├── inference
    ├── utils
    │   └── docker.sh
    ├── README.md
    └── infer.py
├── int8
    └── calibration
    │   ├── test_images
    │       └── mug.jpg
    │   ├── caches
    │       ├── vgg16.cache
    │       ├── inception_v1.cache
    │       ├── resnet50.cache
    │       └── mobilenet_v2.cache
    │   ├── processing.py
    │   ├── SimpleCalibrator.py
    │   ├── README.md
    │   ├── ImagenetCalibrator.py
    │   ├── onnx_to_tensorrt.py
    │   └── labels
    │       └── imagenet1k_labels.txt
├── .gitignore
├── README.md
├── .github
    ├── workflows
    │   └── pythonapp.yml
    └── ISSUE_TEMPLATE
    │   └── bug_report.md
├── onnx
    ├── checker.py
    ├── pytorch
    │   └── alexnet_onnx.py
    └── trtexec
    │   └── alexnet.sh
├── OSS
    ├── README.md
    └── build_OSS.sh
├── plugins
    ├── CustomIPluginV2
    │   ├── Makefile
    │   ├── CustomPlugin.h
    │   ├── test_plugin.py
    │   ├── README.md
    │   └── CustomPlugin.cpp
    ├── README.md
    └── list_plugins.py
├── uff
    └── parse_uff_metadata.py
├── network
    └── dump_network.py
└── LICENSE


/inference/utils/docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | nvidia-docker run -it -v `pwd`:/mnt -w /mnt nvcr.io/nvidia/tensorrt:20.06-py3
3 | 


--------------------------------------------------------------------------------
/int8/calibration/test_images/mug.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmccorm4/tensorrt-utils/HEAD/int8/calibration/test_images/mug.jpg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # ONNX Models
 2 | *.tar.gz
 3 | *.onnx
 4 | *.npz
 5 | resnet50/
 6 | 
 7 | # TensorRT Engines
 8 | *.plan
 9 | *.engine
10 | 
11 | # Plugins
12 | *.d
13 | *.o
14 | *.so
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorRT Utils
2 | 
3 | This repository contains useful scripts and code references I use or encounter
4 | when working with TensorRT.
5 | 
6 | The master branch is currently targeted at TensorRT 7.1+ ([NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) 20.06+).
7 | 
8 | For earlier TensorRT versions, please see the other [tags](https://github.com/rmccorm4/tensorrt-utils/tags).
9 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonapp.yml:
--------------------------------------------------------------------------------
 1 | name: Python application
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v1
12 |     - name: Set up Python 3.7
13 |       uses: actions/setup-python@v1
14 |       with:
15 |         python-version: 3.7
16 |     - name: Install dependencies
17 |       run: |
18 |         python -m pip install --upgrade pip
19 |     - name: Lint with flake8
20 |       run: |
21 |         pip install flake8
22 |         # stop the build if there are Python syntax errors or undefined names
23 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
24 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
25 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
26 | 


--------------------------------------------------------------------------------
/onnx/checker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2020 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument("model", type=str, metavar="ONNX_MODEL", help="ONNX model to check.")
20 | args = parser.parse_args()
21 | 
22 | import onnx
23 | model = onnx.load(args.model)
24 | onnx.checker.check_model(model)
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Description
11 | 
12 | <!-- A clear and concise description of the bug or issue. -->
13 | 
14 | 
15 | ## Environment
16 | 
17 | **TensorRT Version**: 
18 | **GPU Type**: 
19 | **Nvidia Driver Version**: 
20 | **CUDA Version**: 
21 | **CUDNN Version**: 
22 | **Operating System + Version**: 
23 | **Python Version (if applicable)**: 
24 | **TensorFlow Version (if applicable)**: 
25 | **PyTorch Version (if applicable)**: 
26 | **Baremetal or Container (if container which image + tag)**: 
27 | 
28 | 
29 | ## Relevant Files
30 | 
31 | <!-- Please include links to any models, data, files, or scripts necessary to reproduce your issue. (Github repo, Google Drive, Dropbox, etc.) -->
32 | 
33 | 
34 | ## Steps To Reproduce
35 | 
36 | <!-- 
37 |   Craft a minimal bug report following this guide - https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports
38 | 
39 |   Please include:
40 |   * Exact steps/commands to build your repro
41 |   * Exact steps/commands to run your repro
42 |   * Full traceback of errors encountered 
43 | -->
44 | 
45 | 


--------------------------------------------------------------------------------
/OSS/README.md:
--------------------------------------------------------------------------------
 1 | # OSS
 2 | 
 3 | To build the [TensorRT OSS Components](https://github.com/NVIDIA/TensorRT) **interactively** inside of a TensorRT NGC Container,
 4 | you can run the following [build_OSS.sh](build_OSS.sh) script inside of the container:
 5 | 
 6 | ```bash
 7 | # Pull and Run TensorRT Container from NGC
 8 | docker run --runtime=nvidia -it nvcr.io/nvidia/tensorrt:19.10-py3
 9 | 
10 | # Build OSS Components from https://github.com/NVIDIA/TensorRT
11 | wget https://raw.githubusercontent.com/rmccorm4/tensorrt-utils/master/OSS/build_OSS.sh
12 | source build_OSS.sh
13 | 
14 | # NOTE: If you try to run the script with "./build_OSS.sh" instead of 
15 | # "source build_OSS.sh", environment variables such as LD_LIBRARY_PATH and 
16 | # TRT_LIB_DIR won't persist outside of the script, and you may run into problems.
17 | #
18 | # You can also manually set these environment variables yourself in that case.
19 | ```
20 | 
21 | Alternatively, you can take the commands from [build_OSS.sh](build_OSS.sh) and
22 | create a custom `Dockerfile` that pulls `FROM: nvcr.io/nvidia/tensorrt:19.10-py3`
23 | and then proceeds to run the build commands, similar to: https://github.com/NVIDIA/TensorRT/blob/master/docker/ubuntu-18.04.Dockerfile
24 | 


--------------------------------------------------------------------------------
/plugins/CustomIPluginV2/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | CUDA_PATH = /usr/local/cuda
16 | TRT_RELEASE = /usr/src/tensorrt
17 | GCC = g++
18 | NVCC = $(CUDA_PATH)/bin/nvcc
19 | CCFLAGS = -g -std=c++11 -DNDEBUG
20 | INCLUDES += -I. -I$(CUDA_PATH)/include -I${TRT_RELEASE}/include
21 | LDFLAGS := -L${CUDA_PATH}/lib64 -L${TRT_RELEASE}/lib $(LDFLAGS)
22 | LDFLAGS += -lnvinfer -lcudart
23 | 
24 | SO = CustomPlugin.so
25 | OBJ = $(shell find . -name '*.o')
26 | D = $(shell find . -name '*.d')
27 | DEP = $(OBJ:.o=.d)
28 | 
29 | all: $(SO) $(CUDA_BIN)
30 | 
31 | CustomPlugin.so: CustomPlugin.o
32 | 
33 | -include $(DEP)
34 | 
35 | clean:
36 | 	rm -rf $(SO) $(CUDA_BIN) $(OBJ) $(D)
37 | 
38 | %.o: %.cpp
39 | 	$(GCC) $(CCFLAGS) -fPIC -MD -MP $(INCLUDES) -o $@ -c $<
40 | 
41 | %.o: %.cu
42 | 	$(NVCC) $(CCFLAGS) -M -MT $@ $(INCLUDES) -o $(@:.o=.d) $<
43 | 	$(NVCC) $(CCFLAGS) $(INCLUDES) -Xcompiler -fPIC -o $@ -c $<
44 | 
45 | $(SO):
46 | 	$(GCC) $(CCFLAGS) -shared -o $@ $+ $(LDFLAGS)
47 | 


--------------------------------------------------------------------------------
/int8/calibration/caches/vgg16.cache:
--------------------------------------------------------------------------------
 1 | TRT-6001-EntropyCalibration2
 2 | data: 3caa54fc
 3 | vgg0_conv0_fwd: 3da9d57a
 4 | vgg0_relu0_fwd: 3da986a4
 5 | vgg0_conv1_fwd: 3e5a0c01
 6 | vgg0_relu1_fwd: 3e185001
 7 | vgg0_pool0_fwd: 3e185001
 8 | vgg0_conv2_fwd: 3ea7f920
 9 | vgg0_relu2_fwd: 3e509071
10 | vgg0_conv3_fwd: 3ead0503
11 | vgg0_relu3_fwd: 3e8edaa5
12 | vgg0_pool1_fwd: 3e8edaa5
13 | vgg0_conv4_fwd: 3f074c91
14 | vgg0_relu4_fwd: 3eceae4b
15 | vgg0_conv5_fwd: 3f1988f4
16 | vgg0_relu5_fwd: 3ee96826
17 | vgg0_conv6_fwd: 3f1e9c4b
18 | vgg0_relu6_fwd: 3f0a2710
19 | vgg0_pool2_fwd: 3f0a2710
20 | vgg0_conv7_fwd: 3f56751e
21 | vgg0_relu7_fwd: 3f20ddc6
22 | vgg0_conv8_fwd: 3f430a56
23 | vgg0_relu8_fwd: 3f05c536
24 | vgg0_conv9_fwd: 3f1dfee3
25 | vgg0_relu9_fwd: 3ef5c08b
26 | vgg0_pool3_fwd: 3ef5c08b
27 | vgg0_conv10_fwd: 3f0fb18d
28 | vgg0_relu10_fwd: 3eeb08cd
29 | vgg0_conv11_fwd: 3f0bb6af
30 | vgg0_relu11_fwd: 3ebae263
31 | vgg0_conv12_fwd: 3f094719
32 | vgg0_relu12_fwd: 3ea3b38b
33 | vgg0_pool4_fwd: 3ea3b38b
34 | flatten_60: 3ea3846e
35 | (Unnamed Layer* 32) [Constant]_output: 3a3f0b34
36 | (Unnamed Layer* 33) [Constant]_output: 3975ac8b
37 | (Unnamed Layer* 34) [Matrix Multiply]_output: 3e8096ba
38 | vgg0_dense0_fwd: 3e8b3679
39 | vgg0_dense0_relu_fwd: 3e1aa1dd
40 | flatten_65: 3e1aa1dd
41 | (Unnamed Layer* 38) [Constant]_output: 3a8e0898
42 | (Unnamed Layer* 39) [Constant]_output: 39e44a25
43 | (Unnamed Layer* 40) [Matrix Multiply]_output: 3dd5d813
44 | vgg0_dense1_fwd: 3de2b7b4
45 | vgg0_dense1_relu_fwd: 3da062e2
46 | flatten_70: 3da062e2
47 | (Unnamed Layer* 44) [Constant]_output: 3a5610af
48 | (Unnamed Layer* 45) [Constant]_output: 3a4ca905
49 | (Unnamed Layer* 46) [Matrix Multiply]_output: 3e951e45
50 | vgg0_dense2_fwd: 3e77b398
51 | 


--------------------------------------------------------------------------------
/onnx/pytorch/alexnet_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2020 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import argparse
18 | import torch
19 | import torchvision
20 | 
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument("--opset", type=int, default=11, help="ONNX opset version to generate models with.")
23 | args = parser.parse_args()
24 | 
25 | dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
26 | model = torchvision.models.alexnet(pretrained=True).cuda()
27 | 
28 | input_names = [ "actual_input_1" ] #+ [ "learned_%d" % i for i in range(16) ]
29 | output_names = [ "output1" ]
30 | 
31 | # Fixed Shape
32 | torch.onnx.export(model, dummy_input, "alexnet_fixed.onnx", verbose=True, opset_version=args.opset,
33 |                   input_names=input_names, output_names=output_names)
34 | 
35 | # Dynamic Shape
36 | dynamic_axes = {"actual_input_1":{0:"batch_size"}, "output1":{0:"batch_size"}}
37 | print(dynamic_axes)
38 | torch.onnx.export(model, dummy_input, "alexnet_dynamic.onnx", verbose=True, opset_version=args.opset,
39 |                   input_names=input_names, output_names=output_names,
40 |                   dynamic_axes=dynamic_axes)
41 | 


--------------------------------------------------------------------------------
/OSS/build_OSS.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Convenience script for building TensorRT OSS components inside
 4 | # of a TensorRT/TensorFlow container from [NGC](ngc.nvidia.com)
 5 | TAG=${1:-"master"}
 6 | ROOT=`pwd`
 7 | 
 8 | # Download OSS Components
 9 | git clone -b ${TAG} https://github.com/nvidia/TensorRT TensorRT
10 | cd TensorRT
11 | git submodule update --init --recursive
12 | export TRT_SOURCE=`pwd`
13 | 
14 | # Get most up to date ONNX parser if using OSS TensorRT master branch
15 | if [[ ${TAG} -eq "master" ]]; then
16 |   pushd parsers/onnx && git checkout master && git pull && popd
17 | fi
18 | 
19 | # Install required libraries
20 | apt-get update && apt-get install -y --no-install-recommends \
21 | 	libcurl4-openssl-dev \
22 | 	wget \
23 | 	zlib1g-dev \
24 | 	git \
25 | 	pkg-config \
26 | 	figlet
27 | 
28 | # Install CMake >= 3.13
29 | pushd /tmp
30 | wget https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.sh
31 | chmod +x cmake-3.14.4-Linux-x86_64.sh
32 | ./cmake-3.14.4-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir --skip-license
33 | rm ./cmake-3.14.4-Linux-x86_64.sh
34 | popd
35 | 
36 | # Necessary in the nvcr.io/nvidia/tensorflow:19.10-py3 container due to some PATH/cmake issues
37 | # Shouldn't be necessary in nvcr.io/nvidia/tensorrt:19.10-py3 container
38 | export CMAKE_ROOT=/usr/share/cmake-3.14
39 | source ~/.bashrc
40 | 
41 | # Set relevant env variables relative to NGC container paths
42 | export TRT_RELEASE=/usr/src/tensorrt
43 | export TRT_LIB_DIR=$TRT_RELEASE/lib
44 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRT_LIB_DIR
45 | 
46 | # Generate Makefiles and build
47 | cd $TRT_SOURCE
48 | mkdir -p build && cd build
49 | cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out
50 | make -j$(nproc)
51 | 
52 | # Install OSS Components
53 | make install && figlet "Success" || figlet "Failed"
54 | cd ${ROOT}
55 | 


--------------------------------------------------------------------------------
/onnx/trtexec/alexnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2020 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Download sample pytorch->onnx script
18 | wget https://gist.githubusercontent.com/rmccorm4/b72abac18aed6be4c1725db18eba4930/raw/3919c883b97a231877b454dae695fe074a1acdff/alexnet_onnx.py
19 | # Install dependencies
20 | python3 -m pip install torch==1.5.1 torchvision==0.6.1 onnx==1.6
21 | # Export sample Alexnet model to ONNX with a dynamic batch dimension
22 | python3 alexnet_onnx.py --opset=11
23 | 
24 | # (1) Fixed-shape ONNX model example
25 | 
26 | # Create TensorRT engine from fixed-shape ONNX model
27 | trtexec --explicitBatch \
28 |         --onnx=alexnet_fixed.onnx \
29 |         --saveEngine=alexnet_fixed.engine
30 | 
31 | # (2) Dynamic-shape ONNX model example
32 | 
33 | # Emulate "maxBatchSize" behavior from implicit batch engines by setting
34 | # an optimization profile with min=(1, *shape), opt=max=(maxBatchSize, *shape)
35 | MAX_BATCH_SIZE=32
36 | INPUT_NAME="actual_input_1"
37 | 
38 | # Convert dynamic batch ONNX model to TRT Engine with optimization profile defined
39 | #   --minShapes: kMIN shape
40 | #   --optShapes: kOPT shape
41 | #   --maxShapes: kMAX shape
42 | #   --shapes:    # Inference shape - this is like context.set_binding_shape(0, shape)
43 | trtexec --explicitBatch --onnx=alexnet_dynamic.onnx \
44 | --minShapes=${INPUT_NAME}:1x3x224x224 \
45 | --optShapes=${INPUT_NAME}:${MAX_BATCH_SIZE}x3x224x224 \
46 | --maxShapes=${INPUT_NAME}:${MAX_BATCH_SIZE}x3x224x224 \
47 | --shapes=${INPUT_NAME}:1x3x224x224 \
48 | --saveEngine=alexnet_dynamic.engine
49 | 


--------------------------------------------------------------------------------
/uff/parse_uff_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2020 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import warnings
18 | # Filter numpy warnings
19 | warnings.filterwarnings("ignore")
20 | 
21 | import tensorflow as tf
22 | # Filter tensorflow warnings
23 | tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
24 | from uff.model import uff_pb2
25 | 
26 | 
27 | def parse_uff_metadata(filename):
28 |     """
29 |     Parses a UFF file and returns its input/output metadata as an dictionary.
30 | 
31 |     Args:
32 |         filename (str): Path to the UFF file to parse.
33 | 
34 |     Returns:
35 |         Dict: A dictionary of the UFF model's inputs and outputs.
36 | 
37 |     >>> parse_uff_metadata('model.uff')
38 |     {'Inputs': [{'name': 'input_image', 'shape': [1, 224, 224, 3]}],
39 |      'Outputs': [{'MarkOutput_0', 'name:'}]}
40 |     """
41 |     with open(filename, "rb") as f:
42 |         model = uff_pb2.MetaGraph()
43 |         # Protobuf API
44 |         model.ParseFromString(f.read())
45 | 
46 |     inputs, outputs = [], []
47 |     for graph in model.graphs:
48 |         for node in graph.nodes:
49 |             if node.operation == "Input":
50 |                 shape = list(node.fields["shape"].i_list.val)
51 |                 inputs.append({"name": node.id, "shape":shape})
52 | 
53 |             elif node.operation == "MarkOutput":
54 |                 outputs.append({"name:", node.id})
55 | 
56 |     metadata = {"Inputs": inputs, "Outputs": outputs}
57 |     return metadata
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     import argparse
62 |     from pprint import pprint
63 |     parser = argparse.ArgumentParser("Parse a UFF file and return it's metadata.")
64 |     parser.add_argument("--uff", required=True, type=str, help="UFF file to parse.")
65 |     args = parser.parse_args()
66 | 
67 |     metadata = parse_uff_metadata(args.uff)
68 |     pprint(metadata)
69 | 


--------------------------------------------------------------------------------
/network/dump_network.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 NVIDIA Corporation
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | 
17 | # Reference: https://devtalk.nvidia.com/default/topic/1064669/tensorrt/troubleshooting-suggestions-for-onnx-v-tensorrt-discrepancies/post/5392296/#5392296
18 | def dump_network(network, filename):
19 |     """Dumps TensorRT parsed network attributes to a JSON file.
20 |     
21 |     Parameters
22 |     ----------
23 |     network: tensorrt.INetworkDefinition
24 |         Network created from parsing original model (ONNX, etc.)
25 |         
26 |     filename: str
27 |         Filename to dump the network info to in JSON format.
28 |         
29 |     Example
30 |     -------
31 |     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(), trt.OnnxParser(network, TRT_LOGGER) as parser:
32 |         ...
33 |         
34 |         # Fill network attributes with information by parsing model
35 |         with open("model.onnx", "rb") as f:
36 |             parser.parse(f.read())
37 |     
38 |         dump_network(network, "network.json")
39 |     """
40 |     
41 |     network_description = {}
42 | 
43 |     for i in range(network.num_layers):
44 |         layer = network.get_layer(i)
45 |         network_description[i] = {
46 |             a: getattr(layer, a).__repr__() for a in
47 |             ('name', 'type', 'precision', 'precision_is_set', 'num_inputs',
48 |              'num_outputs')
49 |         }
50 |         network_description[i]['inputs'] = {
51 |             i: {
52 |                 a: getattr(layer.get_input(i), a).__repr__() for a in
53 |                 ('name', 'shape', 'dtype')
54 |             } for i in range(layer.num_inputs)
55 |         }
56 |         network_description[i]['outputs'] = {
57 |             i: {
58 |                 a: getattr(layer.get_output(i), a).__repr__() for a in
59 |                 ('name', 'shape', 'dtype')
60 |             } for i in range(layer.num_outputs)
61 |         }
62 | 
63 |     with open(filename, 'w') as fp:
64 |         print("Writing {:}".format(filename))
65 |         json.dump(network_description, fp, indent=4, sort_keys=True)
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/plugins/CustomIPluginV2/CustomPlugin.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef CUSTOM_PLUGIN_H
18 | #define CUSTOM_PLUGIN_H
19 | 
20 | #include "NvInferPlugin.h"
21 | 
22 | #include <vector>
23 | #include <string.h>
24 | #include <iostream>
25 | 
26 | using namespace nvinfer1;
27 | using namespace std;
28 | 
29 | class CustomPlugin : public IPluginV2
30 | {
31 | public:
32 |     CustomPlugin(const string name);
33 | 
34 |     CustomPlugin() = delete;
35 | 
36 |     int getNbOutputs() const override;
37 | 
38 |     Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) override;
39 | 
40 |     int initialize() override;
41 | 
42 |     void terminate() override;
43 | 
44 |     size_t getWorkspaceSize(int batchSize) const override;
45 | 
46 |     int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
47 | 
48 |     size_t getSerializationSize() const override;
49 | 
50 |     void serialize(void *buffer) const override;
51 | 
52 |     void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override;
53 | 
54 |     bool supportsFormat(DataType type, PluginFormat format) const override;
55 | 
56 |     const char *getPluginType() const override;
57 | 
58 |     const char *getPluginVersion() const override;
59 | 
60 |     void destroy() override;
61 | 
62 |     IPluginV2 *clone() const override;
63 | 
64 |     void setPluginNamespace(const char *pluginNamespace) override;
65 | 
66 |     const char *getPluginNamespace() const override;
67 | 
68 | private:
69 |     const string mLayerName;
70 |     string mNamespace;
71 | };
72 | 
73 | class CustomPluginCreator : public IPluginCreator
74 | {
75 | public:
76 |     CustomPluginCreator();
77 | 
78 |     const char *getPluginName() const override;
79 | 
80 |     const char *getPluginVersion() const override;
81 | 
82 |     const PluginFieldCollection *getFieldNames() override;
83 | 
84 |     IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) override;
85 | 
86 |     IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override;
87 | 
88 |     void setPluginNamespace(const char *pluginNamespace) override;
89 | 
90 |     const char *getPluginNamespace() const override;
91 | private:
92 |     static PluginFieldCollection mFC;
93 |     static vector<PluginField> mPluginAttributes;
94 |     string mNamespace;
95 | };
96 | 
97 | #endif /* CustomPlugin.h */
98 | 


--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
 1 | # Plugins
 2 | 
 3 | This directory contains some tools and samples related to
 4 | TensorRT Plugins.
 5 | 
 6 | ## Listing Registered Plugins
 7 | 
 8 | ```
 9 | $ python list_plugins.py -h
10 | usage: list_plugins.py [-h] [-p [PLUGINS [PLUGINS ...]]]
11 | 
12 | Script to list registered TensorRT plugins. Can optionally load custom plugin
13 | libraries.
14 | 
15 | optional arguments:
16 |   -h, --help            show this help message and exit
17 |   -p [PLUGINS [PLUGINS ...]], --plugins [PLUGINS [PLUGINS ...]]
18 |                         Path to a plugin (.so) library file. Accepts multiple
19 |                         arguments.
20 | ```
21 | 
22 | Default `TRT_RELEASE` plugins:
23 | ```
24 | $ python list_plugins.py   
25 | 2020-03-13 22:12:35 - __main__ - INFO - Registering plugins...
26 | 2020-03-13 22:12:35 - __main__ - INFO - Registered Plugin Names:
27 | ['RnRes2Br2bBr2c_TRT',
28 |  'RnRes2Br1Br2c_TRT',
29 |  'CgPersistentLSTMPlugin_TRT',
30 |  'SingleStepLSTMPlugin',
31 |  ...
32 |  'SpecialSlice_TRT',
33 |  'InstanceNormalization_TRT']
34 | ```
35 | 
36 | ## Registering OSS Plugins 
37 | 
38 | When building the OSS components, the default plugin library may be overwritten
39 | or given precendence to the OSS plugin library. As long as you have both .so
40 | library files, you can load both:
41 | 
42 | Build OSS Components:
43 | ```
44 | $ wget https://raw.githubusercontent.com/rmccorm4/tensorrt-utils/master/OSS/build_OSS.sh
45 | $ source build_OSS.sh
46 | ```
47 | 
48 | Load and list multiple plugin libraries:
49 | ```
50 | $ python list_plugins.py --plugins TensorRT/build/out/libnvinfer_plugin.so
51 | 2020-03-13 22:18:51 - __main__ - INFO - Loading plugin library: /mnt/TensorRT/build/out/libnvinfer_plugin.so
52 | 2020-03-13 22:18:51 - __main__ - INFO - Registering plugins...
53 | 2020-03-13 22:18:51 - __main__ - INFO - Registered Plugin Names:
54 | ['RnRes2Br2bBr2c_TRT',
55 |  'RnRes2Br1Br2c_TRT',
56 |  'CgPersistentLSTMPlugin_TRT',
57 |  'SingleStepLSTMPlugin',
58 |  'CustomEmbLayerNormPluginDynamic',    <------ OSS Plugins added
59 |  'CustomFCPluginDynamic',              <------ OSS Plugins added
60 |  'CustomGeluPluginDynamic',            <------ OSS Plugins added
61 |  'CustomQKVToContextPluginDynamic',    <------ OSS Plugins added
62 |  'CustomSkipLayerNormPluginDynamic',   <------ OSS Plugins added
63 |  ...
64 |  'SpecialSlice_TRT',
65 |  'InstanceNormalization_TRT']
66 | ```
67 | 
68 | ## Registering Custom Plugins
69 | 
70 | Same concept as above for OSS plugins, just giving an example to be extra clear:
71 | ```
72 | $ pushd CustomIPluginV2/
73 | $ make
74 | g++ -g -std=c++11 -DNDEBUG -fPIC -MD -MP -I. -I/usr/local/cuda/include -I/usr/src/tensorrt/include -o CustomPlugin.o -c CustomPlugin.cpp
75 | g++ -g -std=c++11 -DNDEBUG -shared -o CustomPlugin.so CustomPlugin.o -L/usr/local/cuda/lib64 -L/usr/src/tensorrt/lib  -lnvinfer -lcudart
76 | 
77 | $ popd
78 | $ python list_plugins.py --plugins CustomIPluginV2/CustomPlugin.so
79 | 2020-03-13 22:16:06 - __main__ - INFO - Loading plugin library: CustomIPluginV2/CustomPlugin.so
80 | Plugin attribute number: 5
81 | 2020-03-13 22:16:06 - __main__ - INFO - Registering plugins...
82 | 2020-03-13 22:16:06 - __main__ - INFO - Registered Plugin Names:
83 | ['RnRes2Br2bBr2c_TRT',
84 |  'RnRes2Br1Br2c_TRT',
85 |  'CgPersistentLSTMPlugin_TRT',
86 |  'SingleStepLSTMPlugin',
87 |  'CustomPlugin',           <------------- Custom plugin registered
88 |  ...
89 |  'SpecialSlice_TRT',
90 |  'InstanceNormalization_TRT']
91 | ```
92 | 


--------------------------------------------------------------------------------
/plugins/list_plugins.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2020 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import ctypes
18 | import logging
19 | import tensorrt as trt
20 | 
21 | logging.basicConfig(level=logging.DEBUG,
22 |                     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
23 |                     datefmt="%Y-%m-%d %H:%M:%S")
24 | logger = logging.getLogger(__name__)
25 | TRT_LOGGER = trt.Logger(trt.Logger.INFO)
26 | 
27 | # https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/infer/Plugin/IPluginCreator.html
28 | def get_all_plugin_details(plugin_registry):
29 |     details = {}
30 | 
31 |     for c in plugin_registry.plugin_creator_list:
32 |         details[c.name] = {}
33 |         details[c.name]["tensorrt_version"] = c.tensorrt_version
34 |         details[c.name]["plugin_version"] = c.plugin_version
35 |         details[c.name]["plugin_namespace"] = c.plugin_namespace
36 |         details[c.name]["PluginFields"] = []
37 | 
38 |         plugin_field_collection = c.field_names
39 |         if plugin_field_collection:
40 |             for i, x in enumerate(list(plugin_field_collection)):
41 |                 pfd = {
42 |                     "name": x.name,
43 |                     "data": x.data,
44 |                     "type": str(x.type),
45 |                     "size": x.size
46 |                 }
47 |                 details[c.name]["PluginFields"].append(pfd)
48 | 
49 |     return details
50 | 
51 | 
52 | def get_all_plugin_names(plugin_registry):
53 |     return [c.name for c in plugin_registry.plugin_creator_list]
54 | 
55 | 
56 | # Example usage: 
57 | #   (1) python list_plugins.py 
58 | #   (2) python list_plugins.py --plugins CustomIPluginV2/CustomPlugin.so
59 | #   (3) python list_plugins.py --plugins CustomIPluginV2/CustomPlugin.so /mnt/TensorRT/build/out/libnvinfer_plugin.so
60 | if __name__ == "__main__":
61 |     import argparse
62 |     parser = argparse.ArgumentParser(description="Script to list registered TensorRT plugins. Can optionally load custom plugin libraries.")
63 |     parser.add_argument("-p", "--plugins", nargs="*", default=[], help="Path to a plugin (.so) library file. Accepts multiple arguments.")
64 |     args = parser.parse_args()
65 | 
66 |     for plugin_library in args.plugins:
67 |         # Example default plugin library: "/usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so"
68 |         logger.info("Loading plugin library: {}".format(plugin_library))
69 |         ctypes.CDLL(plugin_library, mode=ctypes.RTLD_GLOBAL)
70 | 
71 |     logger.info("Registering plugins...")
72 |     # Register the plugins loaded from libraries
73 |     trt.init_libnvinfer_plugins(TRT_LOGGER, "")
74 | 
75 |     # Get plugin registry to view the registered plugins
76 |     plugin_registry = trt.get_plugin_registry()
77 | 
78 |     from pprint import pprint
79 |     #logger.info("Registered Plugin Details:")
80 |     #pprint(get_all_plugin_details(plugin_registry))
81 | 
82 |     logger.info("Registered Plugin Names:") 
83 |     pprint(get_all_plugin_names(plugin_registry))
84 | 


--------------------------------------------------------------------------------
/plugins/CustomIPluginV2/test_plugin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2020 NVIDIA Corporation
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import os
18 | import ctypes
19 | import logging
20 | 
21 | import numpy as np
22 | import tensorrt as trt
23 | 
24 | logging.basicConfig(level=logging.DEBUG,
25 |                     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26 |                     datefmt="%Y-%m-%d %H:%M:%S")
27 | logger = logging.getLogger(__name__)
28 | TRT_LOGGER = trt.Logger(trt.Logger.INFO)
29 | 
30 | def get_plugin_creator_by_name(plugin_registry, plugin_name):
31 |     plugin_creator_list = plugin_registry.plugin_creator_list
32 |     for c in plugin_creator_list:
33 |         if c.name == plugin_name:
34 |             return c
35 | 
36 | if __name__ == '__main__':
37 |     # Load our CustomPlugin library
38 |     plugin_library = os.path.join(".", "CustomPlugin.so")
39 |     logger.info("Loading plugin library: {}".format(plugin_library))
40 |     ctypes.cdll.LoadLibrary(plugin_library)
41 | 
42 |     # Initialize/Register plugins
43 |     logger.info("Initializing plugin registry")
44 |     trt.init_libnvinfer_plugins(TRT_LOGGER, "")
45 |     plugin_registry = trt.get_plugin_registry()
46 | 
47 |     # List all registered plugins. Should see our CustomPlugin in this list.
48 |     logger.info("Registered Plugins:")
49 |     print("\n".join([c.name for c in plugin_registry.plugin_creator_list]))
50 | 
51 |     # Get plugin creator for our custom plugin.
52 |     plugin_name = "CustomPlugin"
53 |     logger.info("Looking up IPluginCreator for {}".format(plugin_name))
54 |     plugin_creator = get_plugin_creator_by_name(plugin_registry, plugin_name)
55 |     if not plugin_creator:
56 |         raise Exception("[{}] IPluginCreator not found.".format(plugin_name))
57 | 
58 |     # Add our custom plugin to a network, and build a TensorRT engine from it.
59 |     with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
60 |         data_shape = (3, 64, 64)
61 |         data = network.add_input("data", trt.DataType.FLOAT, data_shape)
62 | 
63 |         logger.info("Creating PluginFields for {} plugin".format(plugin_name))
64 |         plugin_fields = [trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(5)]
65 |         logger.info("Creating PluginFieldCollection for {} plugin".format(plugin_name))
66 |         plugin_field_collection = trt.PluginFieldCollection(plugin_fields)
67 |         logger.info("Creating {} plugin from PluginFieldCollection".format(plugin_name))
68 |         customPlugin = plugin_creator.create_plugin(plugin_name, plugin_field_collection)
69 | 
70 |         logger.info("Adding {} plugin to network.".format(plugin_name))
71 |         out = network.add_plugin_v2([data], customPlugin)
72 |         network.mark_output(out.get_output(0))
73 | 
74 |         # Serialize our engine for future use
75 |         logger.info("Building engine...")
76 |         with builder.build_cuda_engine(network) as engine:
77 |             filename = "custom_plugin.engine"
78 |             with open(filename, "wb") as f:
79 |                 f.write(engine.serialize())
80 |                 logger.info("Serialized engine file written to {}".format(filename))
81 |         
82 |             # TODO: Add inference example
83 |             # x = np.ones(data_shape, dtype=np.float32)
84 | 
85 | 


--------------------------------------------------------------------------------
/plugins/CustomIPluginV2/README.md:
--------------------------------------------------------------------------------
 1 | # IPluginV2 Dummy Example
 2 | 
 3 | This directory contains a minimal example of creating, building,
 4 | serializing, and de-serializing a TensorRT engine using a custom
 5 | TensorRT Plugin using the IPluginV2 interface.
 6 | 
 7 | The goal is to fill any potential gaps in documentation and make
 8 | it more clear how to use plugins in TensorRT.
 9 | 
10 | 
11 | ## Disclaimer
12 | 
13 | The Plugin interface evolves and gets more features with each release. 
14 | `IPluginV2` is a rather old interface from TensorRT 5, but still works in TensorRT 6 and 7.
15 | 
16 | Make sure to check out the 
17 | [documentation](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#plugin-api-desc)
18 | for more details on each. 
19 | 
20 | I generally follow these follow these rough guidelines:
21 | * **TensorRT 5**:  `IPluginV2`
22 | * **TensorRT 6**:  Prefer `IPluginV2Ext`, but `IPluginV2` is still supported.
23 | * **TensorRT 7**: Prefer `IPluginV2DynamicExt` or `IPluginV2IOExt`, but 
24 | `IPluginV2` and `IPluginV2Ext` are still supported.
25 | 
26 | ## Usage
27 | 
28 | ```
29 | # (Optional) Start a TensorRT 7 Container
30 | 
31 | 
32 | # Build the plugin library .so
33 | make
34 | 
35 | # Create a plugin object, build an engine, and serialize it
36 | python test_plugin.py
37 | 
38 | # Deserialize the engine and run inference on it with random data
39 | trtexec --loadEngine=custom_plugin.engine --plugins=./CustomPlugin.so
40 | ```
41 | 
42 | > NOTE: You must load a plugin's library before attempting to deserialize it.
43 | In other words, the above `trtexec` command will fail without
44 | `--plugins=./CustomPlugin.so`
45 | 
46 | ## Example Outputs
47 | 
48 | Building the plugin library:
49 | ```
50 | $ make
51 | g++ -g -std=c++11 -DNDEBUG -fPIC -MD -MP -I. -I/usr/local/cuda/include -I/usr/src/tensorrt/include -o CustomPlugin.o -c CustomPlugin.cpp
52 | g++ -g -std=c++11 -DNDEBUG -shared -o CustomPlugin.so CustomPlugin.o -L/usr/local/cuda/lib64 -L/usr/src/tensorrt/lib  -lnvinfer -lcudart
53 | ```
54 | 
55 | Creating, building, and serializing an engine with the plugin:
56 | ```
57 | root@028772c8fd4f:/mnt/CustomIPluginV2# python test_plugin.py  
58 | 2020-03-13 21:53:39 - __main__ - INFO - Loading plugin library: ./CustomPlugin.so
59 | Plugin attribute number: 5
60 | 2020-03-13 21:53:39 - __main__ - INFO - Initializing plugin registry
61 | 2020-03-13 21:53:39 - __main__ - INFO - Registered Plugins:
62 | ...
63 | CustomPlugin
64 | ...
65 | 2020-03-13 21:53:39 - __main__ - INFO - Looking up IPluginCreator for CustomPlugin
66 | 2020-03-13 21:53:40 - __main__ - INFO - Creating PluginFields for CustomPlugin plugin
67 | 2020-03-13 21:53:40 - __main__ - INFO - Creating PluginFieldCollection for CustomPlugin plugin
68 | 2020-03-13 21:53:40 - __main__ - INFO - Creating CustomPlugin plugin from PluginFieldCollection
69 | var0: 0
70 | var1: 1
71 | var2: 2
72 | var3: 3
73 | var4: 4
74 | 2020-03-13 21:53:40 - __main__ - INFO - Adding CustomPlugin plugin to network.
75 | 2020-03-13 21:53:40 - __main__ - INFO - Building engine...
76 | [TensorRT] INFO: Detected 1 inputs and 1 output network tensors.
77 | 2020-03-13 21:53:41 - __main__ - INFO - Serialized engine file written to custom_plugin.engine
78 | ```
79 | 
80 | Loading the engine and doing inference:
81 | ```
82 | $ trtexec --loadEngine=custom_plugin.engine --plugins=./CustomPlugin.so
83 | ...
84 | [03/13/2020-21:36:01] [I] Plugins: ./CustomPlugin.so
85 | ...
86 | [03/13/2020-21:36:01] [I] Loading supplied plugin library: ./CustomPlugin.so
87 | ...
88 | [03/13/2020-21:36:06] [I] GPU Compute
89 | [03/13/2020-21:36:06] [I] min: 0.00292969 ms
90 | [03/13/2020-21:36:06] [I] max: 0.022583 ms
91 | [03/13/2020-21:36:06] [I] mean: 0.00384302 ms
92 | [03/13/2020-21:36:06] [I] median: 0.00408936 ms
93 | [03/13/2020-21:36:06] [I] percentile: 0.00415039 ms at 99%
94 | [03/13/2020-21:36:06] [I] total compute time: 0.29489 s
95 | &&&& PASSED TensorRT.trtexec # trtexec --loadEngine=custom_plugin.engine --plugins=./CustomPlugin.so
96 | ```
97 | 


--------------------------------------------------------------------------------
/int8/calibration/processing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 NVIDIA Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | 
 17 | import numpy as np
 18 | from PIL import Image
 19 | 
 20 | 
 21 | logging.basicConfig(level=logging.DEBUG,
 22 |                     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 23 |                     datefmt="%Y-%m-%d %H:%M:%S")
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | def preprocess_imagenet(image, channels=3, height=224, width=224):
 28 |     """Pre-processing for Imagenet-based Image Classification Models:
 29 |         resnet50, vgg16, mobilenet, etc. (Doesn't seem to work for Inception)
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     image: PIL.Image
 34 |         The image resulting from PIL.Image.open(filename) to preprocess
 35 |     channels: int
 36 |         The number of channels the image has (Usually 1 or 3)
 37 |     height: int
 38 |         The desired height of the image (usually 224 for Imagenet data)
 39 |     width: int
 40 |         The desired width of the image  (usually 224 for Imagenet data)
 41 | 
 42 |     Returns
 43 |     -------
 44 |     img_data: numpy array
 45 |         The preprocessed image data in the form of a numpy array
 46 | 
 47 |     """
 48 |     # Get the image in CHW format
 49 |     resized_image = image.resize((width, height), Image.ANTIALIAS)
 50 |     img_data = np.asarray(resized_image).astype(np.float32)
 51 | 
 52 |     if len(img_data.shape) == 2:
 53 |         # For images without a channel dimension, we stack
 54 |         img_data = np.stack([img_data] * 3)
 55 |         logger.debug("Received grayscale image. Reshaped to {:}".format(img_data.shape))
 56 |     else:
 57 |         img_data = img_data.transpose([2, 0, 1])
 58 | 
 59 |     mean_vec = np.array([0.485, 0.456, 0.406])
 60 |     stddev_vec = np.array([0.229, 0.224, 0.225])
 61 |     assert img_data.shape[0] == channels
 62 | 
 63 |     for i in range(img_data.shape[0]):
 64 |         # Scale each pixel to [0, 1] and normalize per channel.
 65 |         img_data[i, :, :] = (img_data[i, :, :] / 255 - mean_vec[i]) / stddev_vec[i]
 66 | 
 67 |     return img_data
 68 | 
 69 | 
 70 | def preprocess_inception(image, channels=3, height=224, width=224):
 71 |     """Pre-processing for InceptionV1. Inception expects different pre-processing
 72 |     than {resnet50, vgg16, mobilenet}. This may not be totally correct,
 73 |     but it worked for some simple test images.
 74 | 
 75 |     Parameters
 76 |     ----------
 77 |     image: PIL.Image
 78 |         The image resulting from PIL.Image.open(filename) to preprocess
 79 |     channels: int
 80 |         The number of channels the image has (Usually 1 or 3)
 81 |     height: int
 82 |         The desired height of the image (usually 224 for Imagenet data)
 83 |     width: int
 84 |         The desired width of the image  (usually 224 for Imagenet data)
 85 | 
 86 |     Returns
 87 |     -------
 88 |     img_data: numpy array
 89 |         The preprocessed image data in the form of a numpy array
 90 | 
 91 |     """
 92 |     # Get the image in CHW format
 93 |     resized_image = image.resize((width, height), Image.BILINEAR)
 94 |     img_data = np.asarray(resized_image).astype(np.float32)
 95 | 
 96 |     if len(img_data.shape) == 2:
 97 |         # For images without a channel dimension, we stack
 98 |         img_data = np.stack([img_data] * 3)
 99 |         logger.debug("Received grayscale image. Reshaped to {:}".format(img_data.shape))
100 |     else:
101 |         img_data = img_data.transpose([2, 0, 1])
102 | 
103 |     return img_data
104 | 


--------------------------------------------------------------------------------
/int8/calibration/SimpleCalibrator.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import numpy as np
 4 | import tensorrt as trt
 5 | import pycuda.driver as cuda
 6 | import pycuda.autoinit
 7 | 
 8 | logging.basicConfig(level=logging.DEBUG,
 9 |                     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
10 |                     datefmt="%Y-%m-%d %H:%M:%S")
11 | logger = logging.getLogger(__name__)
12 | 
13 | class SimpleCalibrator(trt.IInt8EntropyCalibrator2):
14 |     def __init__(self, network, config):
15 |         super().__init__()
16 | 
17 |         # TODO: Not sure of difference between get_batch_size and what's returned in get_batch ?
18 |         # Notes:
19 |         #     get_batch_size() is required to return non-null value
20 |         #     get_batch_size() can return  0 with seemingly no consequence with/without calibration cache
21 |         #     get_batch_size() can return -1 with seemingly no consequence with/without calibration cache
22 |         #     get_batch() seems to do the work, as long as get_batch_size doesn't throw an error
23 |         self.batch_size = -1
24 |         self.shapes = []
25 |         self.device_inputs = None
26 |         num_calibration_samples = 1000
27 |         self.iterator = (i for i in range(num_calibration_samples))
28 |         self.cache_file = "simple_calibration.cache"
29 |         self.network = network
30 |         self.calib_profile = config.get_calibration_profile()
31 | 
32 |     def get_batch(self, input_names, p_str=None):
33 |         try:
34 |             # Use iterator here to avoid having to pass input names to constructor
35 |             next(self.iterator) 
36 |             if not self.shapes:
37 |                 self.set_shapes(input_names)
38 | 
39 |             if not self.device_inputs:
40 |                 self.device_inputs = [cuda.mem_alloc(np.zeros(s, dtype=np.float32).nbytes) for s in self.shapes]
41 | 
42 |             if not self.batch_size:
43 |                 # Get batch size from first input in calibration shapes. Assumes batch sizes
44 |                 # are the same for every input
45 |                 self.batch_size = self.shapes[0][0]
46 | 
47 |             batches = [np.random.random(s).astype(np.float32) for s in self.shapes]
48 |             for i in range(len(batches)):
49 |                 cuda.memcpy_htod(self.device_inputs[i], batches[i])
50 | 
51 |             return [int(d) for d in self.device_inputs]
52 |         except StopIteration:
53 |             return None
54 | 
55 |     def get_batch_size(self):
56 |         return self.batch_size
57 | 
58 |     def set_shapes(self, input_names):
59 |         if self.calib_profile:
60 |             self.shapes = [self.calib_profile.get_shape(name) for name in input_names]
61 |         else:
62 |             self.shapes = []
63 |             # This assumes order of input_names matches the network input indices
64 |             for i, name in enumerate(input_names):
65 |                 shape = self.network.get_input(i).shape
66 |                 _shape = []
67 |                 found_dynamic = False
68 |                 # Replace any dynamic dimensions with ones if any
69 |                 for dim in shape:
70 |                     if dim < 0:
71 |                         dim = 1
72 |                         found_dynamic = True
73 |                     
74 |                     _shape.append(dim)
75 | 
76 |                 _shape = tuple(_shape)
77 |                 if found_dynamic:        
78 |                     logger.warning("[{}] has dynamic shape: {}. Set to {} instead.".format(name, shape, _shape))
79 | 
80 |                 self.shapes.append(_shape)
81 | 
82 |     def read_calibration_cache(self):
83 |         # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
84 |         if os.path.exists(self.cache_file):
85 |             with open(self.cache_file, "rb") as f:
86 |                 logger.info("Using calibration cache to save time: {:}".format(self.cache_file))
87 |                 return f.read()
88 | 
89 |     def write_calibration_cache(self, cache):
90 |         with open(self.cache_file, "wb") as f:
91 |             logger.info("Caching calibration data for future use: {:}".format(self.cache_file))
92 |             f.write(cache)
93 | 


--------------------------------------------------------------------------------
/int8/calibration/caches/inception_v1.cache:
--------------------------------------------------------------------------------
  1 | TRT-6001-EntropyCalibration2
  2 | data_0: 4000890a
  3 | conv1/7x7_s2_1: 4156d4ee
  4 | conv1/7x7_s2_2: 4156d4ee
  5 | pool1/3x3_s2_1: 4156d4ee
  6 | pool1/norm1_1: 3f966694
  7 | conv2/3x3_reduce_1: 40531d41
  8 | conv2/3x3_reduce_2: 401afafe
  9 | conv2/3x3_1: 40ab3b0d
 10 | conv2/3x3_2: 40864cce
 11 | conv2/norm2_1: 3f8be311
 12 | pool2/3x3_s2_1: 3f8be311
 13 | inception_3a/1x1_1: 4043f896
 14 | inception_3a/1x1_2: 4095ca78
 15 | inception_3a/3x3_reduce_1: 4026793f
 16 | inception_3a/3x3_reduce_2: 4028ef9d
 17 | inception_3a/3x3_1: 409c6007
 18 | inception_3a/3x3_2: 4095ca78
 19 | inception_3a/5x5_reduce_1: 403a1a1f
 20 | inception_3a/5x5_reduce_2: 40482ad9
 21 | inception_3a/5x5_1: 40851b0c
 22 | inception_3a/5x5_2: 4095ca78
 23 | inception_3a/pool_1: 3f8be311
 24 | inception_3a/pool_proj_1: 4091939e
 25 | inception_3a/pool_proj_2: 4095ca78
 26 | inception_3a/output_1: 4095ca78
 27 | inception_3b/1x1_1: 403d5428
 28 | inception_3b/1x1_2: 40644790
 29 | inception_3b/3x3_reduce_1: 40542e9a
 30 | inception_3b/3x3_reduce_2: 405a452e
 31 | inception_3b/3x3_1: 4062d3c4
 32 | inception_3b/3x3_2: 40644790
 33 | inception_3b/5x5_reduce_1: 407f323f
 34 | inception_3b/5x5_reduce_2: 40824e73
 35 | inception_3b/5x5_1: 40be5714
 36 | inception_3b/5x5_2: 40644790
 37 | inception_3b/pool_1: 4095ca78
 38 | inception_3b/pool_proj_1: 40a1ea9b
 39 | inception_3b/pool_proj_2: 40644790
 40 | inception_3b/output_1: 40644790
 41 | pool3/3x3_s2_1: 40644790
 42 | inception_4a/1x1_1: 4049c022
 43 | inception_4a/1x1_2: 405eeea7
 44 | inception_4a/3x3_reduce_1: 406c2e30
 45 | inception_4a/3x3_reduce_2: 4080ab0e
 46 | inception_4a/3x3_1: 4090d9a8
 47 | inception_4a/3x3_2: 405eeea7
 48 | inception_4a/5x5_reduce_1: 40611015
 49 | inception_4a/5x5_reduce_2: 40611015
 50 | inception_4a/5x5_1: 406a8ffc
 51 | inception_4a/5x5_2: 405eeea7
 52 | inception_4a/pool_1: 40644790
 53 | inception_4a/pool_proj_1: 40948e26
 54 | inception_4a/pool_proj_2: 405eeea7
 55 | inception_4a/output_1: 405eeea7
 56 | inception_4b/1x1_1: 3ffd9f86
 57 | inception_4b/1x1_2: 401727d7
 58 | inception_4b/3x3_reduce_1: 40049b24
 59 | inception_4b/3x3_reduce_2: 401d035a
 60 | inception_4b/3x3_1: 3ff59d91
 61 | inception_4b/3x3_2: 401727d7
 62 | inception_4b/5x5_reduce_1: 402db153
 63 | inception_4b/5x5_reduce_2: 402e5e41
 64 | inception_4b/5x5_1: 402e0b6b
 65 | inception_4b/5x5_2: 401727d7
 66 | inception_4b/pool_1: 405eeea7
 67 | inception_4b/pool_proj_1: 404773ae
 68 | inception_4b/pool_proj_2: 401727d7
 69 | inception_4b/output_1: 401727d7
 70 | inception_4c/1x1_1: 3feede53
 71 | inception_4c/1x1_2: 400a031f
 72 | inception_4c/3x3_reduce_1: 400b4652
 73 | inception_4c/3x3_reduce_2: 400cf856
 74 | inception_4c/3x3_1: 400d8c53
 75 | inception_4c/3x3_2: 400a031f
 76 | inception_4c/5x5_reduce_1: 402307bd
 77 | inception_4c/5x5_reduce_2: 4026d7a2
 78 | inception_4c/5x5_1: 4027de49
 79 | inception_4c/5x5_2: 400a031f
 80 | inception_4c/pool_1: 401727d7
 81 | inception_4c/pool_proj_1: 4021fba5
 82 | inception_4c/pool_proj_2: 400a031f
 83 | inception_4c/output_1: 400a031f
 84 | inception_4d/1x1_1: 3fd00f7e
 85 | inception_4d/1x1_2: 3fe642a2
 86 | inception_4d/3x3_reduce_1: 40000041
 87 | inception_4d/3x3_reduce_2: 400e2d06
 88 | inception_4d/3x3_1: 400d0762
 89 | inception_4d/3x3_2: 3fe642a2
 90 | inception_4d/5x5_reduce_1: 3fc8d056
 91 | inception_4d/5x5_reduce_2: 3fef36e0
 92 | inception_4d/5x5_1: 40047a2f
 93 | inception_4d/5x5_2: 3fe642a2
 94 | inception_4d/pool_1: 400a031f
 95 | inception_4d/pool_proj_1: 40149438
 96 | inception_4d/pool_proj_2: 3fe642a2
 97 | inception_4d/output_1: 3fe642a2
 98 | inception_4e/1x1_1: 3f5b2097
 99 | inception_4e/1x1_2: 3f849c92
100 | inception_4e/3x3_reduce_1: 3f5633bd
101 | inception_4e/3x3_reduce_2: 3f6a3ad3
102 | inception_4e/3x3_1: 3f6fe552
103 | inception_4e/3x3_2: 3f849c92
104 | inception_4e/5x5_reduce_1: 3fafab34
105 | inception_4e/5x5_reduce_2: 3fb8b359
106 | inception_4e/5x5_1: 3fa8c3bd
107 | inception_4e/5x5_2: 3f849c92
108 | inception_4e/pool_1: 3fe642a2
109 | inception_4e/pool_proj_1: 3fbccee9
110 | inception_4e/pool_proj_2: 3f849c92
111 | inception_4e/output_1: 3f849c92
112 | pool4/3x3_s2_1: 3f849c92
113 | inception_5a/1x1_1: 3f57d6e4
114 | inception_5a/1x1_2: 3f58482d
115 | inception_5a/3x3_reduce_1: 3f6ef654
116 | inception_5a/3x3_reduce_2: 3f80e39a
117 | inception_5a/3x3_1: 3f733ce2
118 | inception_5a/3x3_2: 3f58482d
119 | inception_5a/5x5_reduce_1: 3fa70d16
120 | inception_5a/5x5_reduce_2: 3fac44d9
121 | inception_5a/5x5_1: 3f87ce74
122 | inception_5a/5x5_2: 3f58482d
123 | inception_5a/pool_1: 3f849c92
124 | inception_5a/pool_proj_1: 3f9c3113
125 | inception_5a/pool_proj_2: 3f58482d
126 | inception_5a/output_1: 3f58482d
127 | inception_5b/1x1_1: 3eb75b24
128 | inception_5b/1x1_2: 3eaf7ad1
129 | inception_5b/3x3_reduce_1: 3f07a433
130 | inception_5b/3x3_reduce_2: 3f0df6c8
131 | inception_5b/3x3_1: 3eb34a5c
132 | inception_5b/3x3_2: 3eaf7ad1
133 | inception_5b/5x5_reduce_1: 3f0aaebb
134 | inception_5b/5x5_reduce_2: 3f0aaebb
135 | inception_5b/5x5_1: 3e9cbebd
136 | inception_5b/5x5_2: 3eaf7ad1
137 | inception_5b/pool_1: 3f58482d
138 | inception_5b/pool_proj_1: 3f0e8bad
139 | inception_5b/pool_proj_2: 3eaf7ad1
140 | inception_5b/output_1: 3eaf7ad1
141 | (Unnamed Layer* 138) [Pooling]_output: 3eaf7ad1
142 | pool5/7x7_s1_1: 3dc5a2da
143 | pool5/7x7_s1_2: 3dc5a2da
144 | OC2_DUMMY_0: 3dc5a2da
145 | (Unnamed Layer* 143) [Constant]_output: 3c6b9c60
146 | (Unnamed Layer* 144) [Constant]_output: 3a87a23a
147 | (Unnamed Layer* 145) [Matrix Multiply]_output: 3e7e39b8
148 | loss3/classifier_1: 3e52cb66
149 | prob_1: 3abdbb77
150 | 


--------------------------------------------------------------------------------
/inference/README.md:
--------------------------------------------------------------------------------
  1 | # TensorRT Inference Example (Python API)
  2 | 
  3 | A generic inference script example to handle TensorRT engines created from both fixed-shape
  4 | and dynamic-shape ONNX models. 
  5 | 
  6 | This script will detect the input/output names/shapes and
  7 | generate random inputs to do inference on. It will also dump a lot of relevant metadata
  8 | and information that happens along the way to clarify the steps that happen when doing
  9 | inference with TensorRT.
 10 | 
 11 | ## Setup
 12 | 
 13 | 1. (Optional) Start a TensorRT 7.1 Docker container using the NGC 20.06 TensorRT Release:
 14 | ```
 15 | # Mount current directory to /mnt and use /mnt as workspace inside container
 16 | nvidia-docker run -it -v `pwd`:/mnt -w /mnt nvcr.io/nvidia/tensorrt:20.06-py3
 17 | ```
 18 | 
 19 | 2. (Optional) Download and run a sample script to:
 20 |   * Generate a fixed-shape Alexnet ONNX model
 21 |   * Generate a dynamic-shape Alexnet ONNX model
 22 | 
 23 | ```
 24 | # Download sample pytorch->onnx script
 25 | wget https://gist.githubusercontent.com/rmccorm4/b72abac18aed6be4c1725db18eba4930/raw/3919c883b97a231877b454dae695fe074a1acdff/alexnet_onnx.py
 26 | # Install dependencies
 27 | python3 -m pip install torch==1.5.1 torchvision==0.6.1 onnx==1.6
 28 | # Export sample Alexnet model to ONNX with a dynamic batch dimension
 29 | python3 alexnet_onnx.py --opset=11
 30 | ```
 31 | 
 32 | 3. (Optional) Use `trtexec` to:
 33 |   * Create a TensorRT engine from the **fixed-shape** Alexnet ONNX model in previous step
 34 | 
 35 | ```
 36 | # Create TensorRT engine from fixed-shape ONNX model
 37 | trtexec --explicitBatch \
 38 |         --onnx=alexnet_fixed.onnx \
 39 |         --saveEngine=alexnet_fixed.engine
 40 | ```
 41 | 
 42 | 4. (Optional) Use `trtexec` to:
 43 |   * Create a TensorRT engine from the **dynamic-shape** Alexnet ONNX model in previous step
 44 | 
 45 | ```
 46 | # Emulate "maxBatchSize" behavior from implicit batch engines by setting
 47 | # an optimization profile with min=(1, *shape), opt=max=(maxBatchSize, *shape)
 48 | MAX_BATCH_SIZE=32
 49 | INPUT_NAME="actual_input_1"
 50 | 
 51 | # Convert dynamic batch ONNX model to TRT Engine with optimization profile defined
 52 | #   --minShapes: kMIN shape
 53 | #   --optShapes: kOPT shape
 54 | #   --maxShapes: kMAX shape
 55 | #   --shapes:    # Inference shape - this is like context.set_binding_shape(0, shape)
 56 | trtexec --onnx=alexnet_dynamic.onnx \
 57 |         --explicitBatch \
 58 |         --minShapes=${INPUT_NAME}:1x3x224x224 \
 59 |         --optShapes=${INPUT_NAME}:${MAX_BATCH_SIZE}x3x224x224 \
 60 |         --maxShapes=${INPUT_NAME}:${MAX_BATCH_SIZE}x3x224x224 \
 61 |         --shapes=${INPUT_NAME}:1x3x224x224 \
 62 |         --saveEngine=alexnet_dynamic.engine
 63 | ```
 64 | 
 65 | ## Inference
 66 | 
 67 | 1. Run a generic python script that can handle both fixed-shape and dynamic-shape
 68 |    TensorRT engines. 
 69 |   * This script generates random input data
 70 |   * For dynamic shape engines, this script defaults to input shapes of the engine's 
 71 |     first optimization profile's (`context.active_optimization_profile=0`) kOPT shape
 72 | 
 73 | > **NOTE**: This script is not meant for peak performance, but is meant to serve as a detailed
 74 |   example to demonstrate all of the moving parts involved in inference, especially for
 75 |   dynamic shape engines.
 76 | 
 77 | ### Fixed-shape Engine Example
 78 | 
 79 | ```
 80 | root@49b19ca81d38:/mnt# python3 infer.py -e alexnet_fixed.engine   
 81 | Loaded engine: alexnet_fixed.engine
 82 | Active Optimization Profile: 0
 83 | Engine/Binding Metadata
 84 |         Number of optimization profiles: 1
 85 |         Number of bindings per profile: 2
 86 |         First binding for profile: 0
 87 |         Last binding for profile: 0
 88 | Generating Random Inputs
 89 |         Input [actual_input_1] shape: (10, 3, 224, 224)
 90 | Input Metadata
 91 |         Number of Inputs: 1
 92 |         Input Bindings for Profile 0: [0]
 93 |         Input names: ['actual_input_1']
 94 |         Input shapes: [(10, 3, 224, 224)]
 95 | Output Metadata
 96 |         Number of Outputs: 1
 97 |         Output names: ['output1']
 98 |         Output shapes: [(10, 1000)]
 99 |         Output Bindings for Profile 0: [1]
100 | Inference Outputs: [array([[-1.9423429 , -0.31228915, -0.11592922, ..., -1.8848201 ,
101 |         -1.967107  ,  1.77118   ],
102 |        [-2.067131  , -0.1413779 , -0.10807458, ..., -1.8306588 ,
103 |         -2.0896611 ,  1.8038476 ],
104 |        [-1.9210347 , -0.59420735, -0.46031308, ..., -1.8159304 ,
105 |         -1.953881  ,  1.7182006 ],
106 |        ...,
107 |        [-2.091892  , -0.24702555,  0.01917603, ..., -2.0384629 ,
108 |         -2.075258  ,  1.877627  ],
109 |        [-2.1854897 , -0.48040774, -0.19558612, ..., -1.9172117 ,
110 |         -2.1862085 ,  1.877256  ],
111 |        [-2.2054253 , -0.29715982, -0.15480372, ..., -1.5839869 ,
112 |         -1.941939  ,  1.9311339 ]], dtype=float32)]
113 | ```
114 | 
115 | ### Dynamic-shape Engine Example
116 | 
117 | ```
118 | root@49b19ca81d38:/mnt# python3 infer.py -e alexnet_dynamic.engine 
119 | Loaded engine: alexnet_dynamic.engine
120 | Active Optimization Profile: 0
121 | Engine/Binding Metadata
122 |         Number of optimization profiles: 1
123 |         Number of bindings per profile: 2
124 |         First binding for profile: 0
125 |         Last binding for profile: 0
126 | Generating Random Inputs
127 |         Input [actual_input_1] shape: (-1, 3, 224, 224)
128 |         Profile Shapes for [actual_input_1]: [kMIN (1, 3, 224, 224) | kOPT (32, 3, 224, 224) | kMAX (32, 3, 224, 224)]
129 |         Input [actual_input_1] shape was dynamic, setting inference shape to (32, 3, 224, 224)
130 | Input Metadata
131 |         Number of Inputs: 1
132 |         Input Bindings for Profile 0: [0]
133 |         Input names: ['actual_input_1']
134 |         Input shapes: [(32, 3, 224, 224)]
135 | Output Metadata
136 |         Number of Outputs: 1
137 |         Output names: ['output1']
138 |         Output shapes: [(32, 1000)]
139 |         Output Bindings for Profile 0: [1]
140 | Inference Outputs: [array([[-2.1023765 , -0.3775048 , -0.17939475, ..., -1.8248225 ,
141 |         -2.042846  ,  2.0328057 ],
142 |        [-1.737236  , -0.54789805, -0.33429265, ..., -1.7502917 ,
143 |         -2.173114  ,  1.699208  ],
144 |        [-1.9979393 , -0.467514  , -0.3265229 , ..., -1.8016856 ,
145 |         -2.025305  ,  1.6176162 ],
146 |        ...,
147 |        [-1.9945843 , -0.42376897,  0.11915839, ..., -1.7103177 ,
148 |         -2.0614984 ,  1.7928884 ],
149 |        [-2.180842  , -0.52559745, -0.12792507, ..., -1.9101417 ,
150 |         -2.132482  ,  1.8081576 ],
151 |        [-2.011236  , -0.29210696, -0.2629762 , ..., -1.8188174 ,
152 |         -2.0216613 ,  1.884306  ]], dtype=float32)]
153 | ```
154 | 


--------------------------------------------------------------------------------
/int8/calibration/caches/resnet50.cache:
--------------------------------------------------------------------------------
  1 | TRT-6001-EntropyCalibration2
  2 | gpu_0/data_0: 3caa54fc
  3 | gpu_0/conv1_1: 3dd3d571
  4 | gpu_0/res_conv1_bn_1: 3dc52e66
  5 | gpu_0/res_conv1_bn_2: 3dc52e66
  6 | gpu_0/pool1_1: 3dc52e66
  7 | gpu_0/res2_0_branch2a_1: 3e34884f
  8 | gpu_0/res2_0_branch2a_bn_1: 3de20a64
  9 | gpu_0/res2_0_branch2a_bn_2: 3d92b4f6
 10 | gpu_0/res2_0_branch2b_1: 3dbff08f
 11 | gpu_0/res2_0_branch2b_bn_1: 3d99010d
 12 | gpu_0/res2_0_branch2b_bn_2: 3d99010d
 13 | gpu_0/res2_0_branch2c_1: 3d1bd61c
 14 | gpu_0/res2_0_branch2c_bn_1: 3d5f6e51
 15 | gpu_0/res2_0_branch1_1: 3de8621e
 16 | gpu_0/res2_0_branch1_bn_1: 3deaf47e
 17 | gpu_0/res2_0_branch2c_bn_2: 3dfb9d4f
 18 | gpu_0/res2_0_branch2c_bn_3: 3d99d186
 19 | gpu_0/res2_1_branch2a_1: 3d89c258
 20 | gpu_0/res2_1_branch2a_bn_1: 3db24847
 21 | gpu_0/res2_1_branch2a_bn_2: 3d756f0c
 22 | gpu_0/res2_1_branch2b_1: 3d78a42a
 23 | gpu_0/res2_1_branch2b_bn_1: 3dbcdc50
 24 | gpu_0/res2_1_branch2b_bn_2: 3dae3f35
 25 | gpu_0/res2_1_branch2c_1: 3cba6310
 26 | gpu_0/res2_1_branch2c_bn_1: 3d3a4210
 27 | gpu_0/res2_1_branch2c_bn_2: 3d6d245b
 28 | gpu_0/res2_1_branch2c_bn_3: 3d5aea38
 29 | gpu_0/res2_2_branch2a_1: 3d5b0e5b
 30 | gpu_0/res2_2_branch2a_bn_1: 3d8e7607
 31 | gpu_0/res2_2_branch2a_bn_2: 3d8e5793
 32 | gpu_0/res2_2_branch2b_1: 3d7e5704
 33 | gpu_0/res2_2_branch2b_bn_1: 3dbca5f3
 34 | gpu_0/res2_2_branch2b_bn_2: 3d97cba2
 35 | gpu_0/res2_2_branch2c_1: 3c978836
 36 | gpu_0/res2_2_branch2c_bn_1: 3d4460aa
 37 | gpu_0/res2_2_branch2c_bn_2: 3d82e037
 38 | gpu_0/res2_2_branch2c_bn_3: 3d627bc2
 39 | gpu_0/res3_0_branch2a_1: 3db06913
 40 | gpu_0/res3_0_branch2a_bn_1: 3daf9020
 41 | gpu_0/res3_0_branch2a_bn_2: 3d88cdcf
 42 | gpu_0/res3_0_branch2b_1: 3d88c4a1
 43 | gpu_0/res3_0_branch2b_bn_1: 3d9b8f30
 44 | gpu_0/res3_0_branch2b_bn_2: 3da2356f
 45 | gpu_0/res3_0_branch2c_1: 3cd1f299
 46 | gpu_0/res3_0_branch2c_bn_1: 3d9df2ed
 47 | gpu_0/res3_0_branch1_1: 3d489a87
 48 | gpu_0/res3_0_branch1_bn_1: 3d9c3972
 49 | gpu_0/res3_0_branch2c_bn_2: 3dbd8d2c
 50 | gpu_0/res3_0_branch2c_bn_3: 3da8c445
 51 | gpu_0/res3_1_branch2a_1: 3da47581
 52 | gpu_0/res3_1_branch2a_bn_1: 3d8138fc
 53 | gpu_0/res3_1_branch2a_bn_2: 3d620173
 54 | gpu_0/res3_1_branch2b_1: 3da5b629
 55 | gpu_0/res3_1_branch2b_bn_1: 3db284c5
 56 | gpu_0/res3_1_branch2b_bn_2: 3d9cb488
 57 | gpu_0/res3_1_branch2c_1: 3ccdfafb
 58 | gpu_0/res3_1_branch2c_bn_1: 3d7f5b9c
 59 | gpu_0/res3_1_branch2c_bn_2: 3d7f5b9c
 60 | gpu_0/res3_1_branch2c_bn_3: 3d868670
 61 | gpu_0/res3_2_branch2a_1: 3d6799c4
 62 | gpu_0/res3_2_branch2a_bn_1: 3d8188d0
 63 | gpu_0/res3_2_branch2a_bn_2: 3d901632
 64 | gpu_0/res3_2_branch2b_1: 3dc018bf
 65 | gpu_0/res3_2_branch2b_bn_1: 3dadd1aa
 66 | gpu_0/res3_2_branch2b_bn_2: 3d8c52a1
 67 | gpu_0/res3_2_branch2c_1: 3c9057af
 68 | gpu_0/res3_2_branch2c_bn_1: 3d356146
 69 | gpu_0/res3_2_branch2c_bn_2: 3d81d931
 70 | gpu_0/res3_2_branch2c_bn_3: 3d81d931
 71 | gpu_0/res3_3_branch2a_1: 3d6794aa
 72 | gpu_0/res3_3_branch2a_bn_1: 3d8f6c73
 73 | gpu_0/res3_3_branch2a_bn_2: 3d91e512
 74 | gpu_0/res3_3_branch2b_1: 3d628623
 75 | gpu_0/res3_3_branch2b_bn_1: 3db151d0
 76 | gpu_0/res3_3_branch2b_bn_2: 3d8bd820
 77 | gpu_0/res3_3_branch2c_1: 3c729ccc
 78 | gpu_0/res3_3_branch2c_bn_1: 3d1bae10
 79 | gpu_0/res3_3_branch2c_bn_2: 3d592b6e
 80 | gpu_0/res3_3_branch2c_bn_3: 3d592b6e
 81 | gpu_0/res4_0_branch2a_1: 3d5a694c
 82 | gpu_0/res4_0_branch2a_bn_1: 3da40ec3
 83 | gpu_0/res4_0_branch2a_bn_2: 3d8f97fb
 84 | gpu_0/res4_0_branch2b_1: 3d409525
 85 | gpu_0/res4_0_branch2b_bn_1: 3d8edb39
 86 | gpu_0/res4_0_branch2b_bn_2: 3d8edb39
 87 | gpu_0/res4_0_branch2c_1: 3cd32b34
 88 | gpu_0/res4_0_branch2c_bn_1: 3d627088
 89 | gpu_0/res4_0_branch1_1: 3d0ea51a
 90 | gpu_0/res4_0_branch1_bn_1: 3d64d306
 91 | gpu_0/res4_0_branch2c_bn_2: 3da784d6
 92 | gpu_0/res4_0_branch2c_bn_3: 3da784d6
 93 | gpu_0/res4_1_branch2a_1: 3da1f886
 94 | gpu_0/res4_1_branch2a_bn_1: 3d8f102d
 95 | gpu_0/res4_1_branch2a_bn_2: 3d9f8a5f
 96 | gpu_0/res4_1_branch2b_1: 3d974e1c
 97 | gpu_0/res4_1_branch2b_bn_1: 3d938004
 98 | gpu_0/res4_1_branch2b_bn_2: 3d4b8b39
 99 | gpu_0/res4_1_branch2c_1: 3c80df9a
100 | gpu_0/res4_1_branch2c_bn_1: 3d462692
101 | gpu_0/res4_1_branch2c_bn_2: 3d8536b9
102 | gpu_0/res4_1_branch2c_bn_3: 3d9146df
103 | gpu_0/res4_2_branch2a_1: 3d3c6903
104 | gpu_0/res4_2_branch2a_bn_1: 3d7d1941
105 | gpu_0/res4_2_branch2a_bn_2: 3d687fae
106 | gpu_0/res4_2_branch2b_1: 3d57e1eb
107 | gpu_0/res4_2_branch2b_bn_1: 3dce180b
108 | gpu_0/res4_2_branch2b_bn_2: 3d5a35d6
109 | gpu_0/res4_2_branch2c_1: 3c5ac789
110 | gpu_0/res4_2_branch2c_bn_1: 3d57ab7b
111 | gpu_0/res4_2_branch2c_bn_2: 3d89cedf
112 | gpu_0/res4_2_branch2c_bn_3: 3d8819a4
113 | gpu_0/res4_3_branch2a_1: 3d2b76af
114 | gpu_0/res4_3_branch2a_bn_1: 3d64423d
115 | gpu_0/res4_3_branch2a_bn_2: 3d8c0e5c
116 | gpu_0/res4_3_branch2b_1: 3d432fb1
117 | gpu_0/res4_3_branch2b_bn_1: 3d9163e3
118 | gpu_0/res4_3_branch2b_bn_2: 3d5b48f2
119 | gpu_0/res4_3_branch2c_1: 3c4c34c8
120 | gpu_0/res4_3_branch2c_bn_1: 3d5fcbe5
121 | gpu_0/res4_3_branch2c_bn_2: 3d670d89
122 | gpu_0/res4_3_branch2c_bn_3: 3d670d89
123 | gpu_0/res4_4_branch2a_1: 3d23715a
124 | gpu_0/res4_4_branch2a_bn_1: 3d6192e5
125 | gpu_0/res4_4_branch2a_bn_2: 3d61ed9e
126 | gpu_0/res4_4_branch2b_1: 3d0bcd2e
127 | gpu_0/res4_4_branch2b_bn_1: 3d85947d
128 | gpu_0/res4_4_branch2b_bn_2: 3d6767da
129 | gpu_0/res4_4_branch2c_1: 3ca5a193
130 | gpu_0/res4_4_branch2c_bn_1: 3d420299
131 | gpu_0/res4_4_branch2c_bn_2: 3d8066bd
132 | gpu_0/res4_4_branch2c_bn_3: 3d8066bd
133 | gpu_0/res4_5_branch2a_1: 3d4445de
134 | gpu_0/res4_5_branch2a_bn_1: 3d86d185
135 | gpu_0/res4_5_branch2a_bn_2: 3d91b080
136 | gpu_0/res4_5_branch2b_1: 3d19c208
137 | gpu_0/res4_5_branch2b_bn_1: 3da49c13
138 | gpu_0/res4_5_branch2b_bn_2: 3da32bc6
139 | gpu_0/res4_5_branch2c_1: 3cae5e52
140 | gpu_0/res4_5_branch2c_bn_1: 3d568070
141 | gpu_0/res4_5_branch2c_bn_2: 3d878535
142 | gpu_0/res4_5_branch2c_bn_3: 3d960b69
143 | gpu_0/res5_0_branch2a_1: 3ceda51d
144 | gpu_0/res5_0_branch2a_bn_1: 3d802ccc
145 | gpu_0/res5_0_branch2a_bn_2: 3d6aae0b
146 | gpu_0/res5_0_branch2b_1: 3cedf750
147 | gpu_0/res5_0_branch2b_bn_1: 3d8dcf87
148 | gpu_0/res5_0_branch2b_bn_2: 3d7ae9a2
149 | gpu_0/res5_0_branch2c_1: 3caf1f62
150 | gpu_0/res5_0_branch2c_bn_1: 3e2fe88b
151 | gpu_0/res5_0_branch1_1: 3c9dd71d
152 | gpu_0/res5_0_branch1_bn_1: 3e085b0c
153 | gpu_0/res5_0_branch2c_bn_2: 3e869a50
154 | gpu_0/res5_0_branch2c_bn_3: 3e902de5
155 | gpu_0/res5_1_branch2a_1: 3e191e69
156 | gpu_0/res5_1_branch2a_bn_1: 3d90a061
157 | gpu_0/res5_1_branch2a_bn_2: 3d609a83
158 | gpu_0/res5_1_branch2b_1: 3d1e2db4
159 | gpu_0/res5_1_branch2b_bn_1: 3d74d4a1
160 | gpu_0/res5_1_branch2b_bn_2: 3d2a48e3
161 | gpu_0/res5_1_branch2c_1: 3c4aceed
162 | gpu_0/res5_1_branch2c_bn_1: 3dc2f2c7
163 | gpu_0/res5_1_branch2c_bn_2: 3e6587d1
164 | gpu_0/res5_1_branch2c_bn_3: 3e925ab7
165 | gpu_0/res5_2_branch2a_1: 3e3357a0
166 | gpu_0/res5_2_branch2a_bn_1: 3d9339c8
167 | gpu_0/res5_2_branch2a_bn_2: 3d6b6029
168 | gpu_0/res5_2_branch2b_1: 3ce5ba60
169 | gpu_0/res5_2_branch2b_bn_1: 3d866f78
170 | gpu_0/res5_2_branch2b_bn_2: 3d54df46
171 | gpu_0/res5_2_branch2c_1: 3c08540c
172 | gpu_0/res5_2_branch2c_bn_1: 3e110a69
173 | gpu_0/res5_2_branch2c_bn_2: 3e750bf1
174 | gpu_0/res5_2_branch2c_bn_3: 3ec805a6
175 | gpu_0/pool5_1: 3ec805a6
176 | OC2_DUMMY_0: 3d9942c2
177 | (Unnamed Layer* 174) [Constant]_output: 3996cbd8
178 | (Unnamed Layer* 175) [Constant]_output: 3b64f8bc
179 | (Unnamed Layer* 176) [Matrix Multiply]_output: 3e5b5330
180 | gpu_0/pred_1: 3e526e4b
181 | gpu_0/softmax_1: 3afb76dc
182 | 


--------------------------------------------------------------------------------
/int8/calibration/README.md:
--------------------------------------------------------------------------------
  1 | # ImageNet Models: ONNX -> TensorRT
  2 | 
  3 | This directory contains some helper scripts for creating TensorRT engines from
  4 | various ONNX classification models based on Imagenet data using the Python API. 
  5 | 
  6 | These scripts were last tested using the 
  7 | [NGC TensorRT Container Version 20.06-py3](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt).
  8 | You can see the corresponding framework versions for this container [here](https://docs.nvidia.com/deeplearning/sdk/tensorrt-container-release-notes/rel_20.06.html#rel_20.06).
  9 | 
 10 | ## Quickstart
 11 | 
 12 | > **NOTE**: This INT8 example is only valid for **fixed-shape** ONNX models at the moment. 
 13 | >
 14 | > With the release of TensorRT 7.1,
 15 | INT8 Calibration on **dynamic-shape** models is now supported, however this example has not been updated
 16 | to reflect that yet. For more details on INT8 Calibration for **dynamic-shape** models, please
 17 | see the [documentation](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#int8-calib-dynamic-shapes).
 18 | 
 19 | ### 1. Start TensorRT Container with current directory mounted.
 20 | 
 21 | ```bash
 22 | docker run -it --runtime=nvidia -v ${PWD}:/mnt --workdir=/mnt nvcr.io/nvidia/tensorrt:20.06-py3
 23 | ```
 24 | 
 25 | ### 2. Download Resnet50 ONNX model from [ONNX Model Zoo](https://github.com/onnx/models/tree/master/vision/classification).
 26 | 
 27 | ```bash
 28 | wget https://s3.amazonaws.com/download.onnx/models/opset_8/resnet50.tar.gz
 29 | tar -xvzf resnet50.tar.gz
 30 | ```
 31 | 
 32 | ### 3. Convert ONNX model to TensorRT
 33 | 
 34 | See `./onnx_to_tensorrt.py -h` for full list of command line arguments.
 35 | 
 36 | Also see `trtexec` if you're only interested in FP32/FP16 and not INT8 engines.
 37 | 
 38 | **FP32**
 39 | ```bash
 40 | # OR trtexec --explicitBatch --onnx=resnet50/model.onnx --saveEngine=resnet50.fp32.engine
 41 | ./onnx_to_tensorrt.py --explicit-batch --onnx resnet50/model.onnx -o resnet50.fp32.engine
 42 | ```
 43 | 
 44 | **FP16**
 45 | ```bash
 46 | # OR trtexec --explicitBatch --onnx=resnet50/model.onnx --fp16 --saveEngine=resnet50.fp16.engine
 47 | ./onnx_to_tensorrt.py --explicit-batch --onnx resnet50/model.onnx -o resnet50.fp16.engine --fp16
 48 | ```
 49 | 
 50 | **INT8**
 51 | 
 52 | For simplicity, we can use an existing calibration cache from [caches/resnet50.cache](caches/resnet50.cache):
 53 | ```bash
 54 | ./onnx_to_tensorrt.py --explicit-batch \
 55 |                       --onnx resnet50/model.onnx \
 56 |                       --fp16 \
 57 |                       --int8 \
 58 |                       --calibration-cache="caches/resnet50.cache" \
 59 |                       -o resnet50.int8.engine 
 60 | ```
 61 | 
 62 | See the [INT8 Calibration](#int8-calibration) section below for details on calibration
 63 | using your own model or different data, where you don't have an existing calibration cache
 64 | or want to create a new one.
 65 | 
 66 | ## INT8 Calibration
 67 | 
 68 | See [ImagenetCalibrator.py](ImagenetCalibrator.py) for a reference implementation
 69 | of TensorRT's [IInt8EntropyCalibrator2](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/infer/Int8/EntropyCalibrator2.html).
 70 | 
 71 | This class can be tweaked to work for other kinds of models, inputs, etc.
 72 | 
 73 | In the [Quickstart](#quickstart) section above, we made use of a pre-existing cache,
 74 | [caches/resnet50.cache](caches/resnet50.cache), to save time for the sake of an example.
 75 | 
 76 | However, to calibrate using different data or a different model, you can do so with the `--calibration-data` argument.
 77 | 
 78 | * This requires that you've mounted a dataset, such as Imagenet, to use for calibration.
 79 |     * Add something like `-v /imagenet:/imagenet` to your Docker command in Step (1) 
 80 |       to mount a dataset found locally at `/imagenet`.
 81 | * You can specify your own `preprocess_func` by defining it inside of `processing.py` and
 82 |   passing the function name as a `--preprocess_func` command-line argument
 83 |     * By default, `preprocess_imagenet` is used.
 84 |     * For `InceptionV1` for example, you can pass `--preprocess_func=preprocess_inception`
 85 |       instead of `preprocess_imagenet`. See [processing.py](processing.py).
 86 | 
 87 | ```bash
 88 | # Path to dataset to use for calibration. 
 89 | #   **Not necessary if you already have a calibration cache from a previous run.
 90 | CALIBRATION_DATA="/imagenet"
 91 | 
 92 | # Truncate calibration images to a random sample of this amount if more are found.
 93 | #   **Not necessary if you already have a calibration cache from a previous run.
 94 | MAX_CALIBRATION_SIZE=512
 95 | 
 96 | # Calibration cache to be used instead of calibration data if it already exists,
 97 | # or the cache will be created from the calibration data if it doesn't exist.
 98 | CACHE_FILENAME="caches/custom.cache"
 99 | 
100 | # Any function name defined in `processing.py`
101 | PREPROCESS_FUNC="preprocess_imagenet"
102 | 
103 | # Path to ONNX model
104 | ONNX_MODEL="resnet50/model.onnx"
105 | 
106 | # Path to write TensorRT engine to
107 | OUTPUT="resnet50.int8.engine"
108 | 
109 | # Creates an int8 engine from your ONNX model, creating ${CACHE_FILENAME} based
110 | # on your ${CALIBRATION_DATA}, unless ${CACHE_FILENAME} already exists, then
111 | # it will use simply use that instead.
112 | python3 onnx_to_tensorrt.py --fp16 --int8 -v \
113 |         --max_calibration_size=${MAX_CALIBRATION_SIZE} \
114 |         --calibration-data=${CALIBRATION_DATA} \
115 |         --calibration-cache=${CACHE_FILENAME} \
116 |         --preprocess_func=${PREPROCESS_FUNC} \
117 |         --explicit-batch \
118 |         --onnx ${ONNX_MODEL} -o ${OUTPUT}
119 | 
120 | ```
121 | 
122 | ### Pre-processing
123 | 
124 | In order to calibrate your model correctly, you should `pre-process` your data the same way
125 | that you would during inference. You can pass in a `preprocess_func` to the constructor
126 | of `ImagenetCalibrator(..., preprocess_func=<function_name>, ...)`,  where `<function_name>`
127 | is a string, corresponding to the name of a pre-processing function defined inside of
128 | `processing.py`. You can add your own pre-processing functions to `processing.py` and pass
129 | the function name into the constructor accordingly.
130 | 
131 | 
132 | ## ONNX Models
133 | 
134 | ### ONNX Model Zoo
135 | 
136 | To name a few models taken from the [ONNX Model Zoo](https://github.com/onnx/models/tree/master/vision/classification)
137 | that should work pretty well with these scripts:
138 | * [ResNet50](https://s3.amazonaws.com/download.onnx/models/opset_8/resnet50.tar.gz)
139 | * [MobileNetV2](https://s3.amazonaws.com/onnx-model-zoo/mobilenet/mobilenetv2-1.0/mobilenetv2-1.0.tar.gz)
140 | * [InceptionV1](https://s3.amazonaws.com/download.onnx/models/opset_8/inception_v1.tar.gz)
141 | * [VGG16](https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.tar.gz)
142 | 
143 | > **NOTE**: For creating FP32/FP16 engines, **it will likely be much simpler to just use `trtexec --onnx=model.onnx ...`**.
144 | > These `python` scripts are mostly just convenient for doing `INT8` calibration, because `trtexec` currently just creates
145 | > `INT8` engines for the sake of benchmarking, and doesn't preserve the accuracy of the model. The FP32/FP16 options just
146 | > come along for free.
147 | 
148 | ### TensorFlow Models
149 | 
150 | To play with these scripts using a TensorFlow model, you'll first need to convert the model to ONNX.
151 | 
152 | One of the best tools for doing that currently is [tf2onnx](https://github.com/onnx/tensorflow-onnx). 
153 | Please see their documentation for more details.
154 | 
155 | Additionally, if you don't want to go from TF -> ONNX -> TRT, you can also try the built-in TF-TRT library
156 | that comes with TensorFlow: https://github.com/tensorflow/tensorrt
157 | 
158 | ### PyTorch Models
159 | 
160 | To play with these scripts using a PyTorch model, you'll first need to convert the model to ONNX.
161 | 
162 | PyTorch has built-in capabilities for exporting models to ONNX format. Please see their documentation for more details: 
163 | https://pytorch.org/docs/stable/onnx.html
164 | 
165 | Similarly to TF-TRT, there is an ongoing effort for PyTorch here called `torch2trt`: https://github.com/NVIDIA-AI-IOT/torch2trt
166 | 


--------------------------------------------------------------------------------
/int8/calibration/ImagenetCalibrator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 NVIDIA Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import sys
 17 | import glob
 18 | import random
 19 | import logging
 20 | 
 21 | import numpy as np
 22 | from PIL import Image
 23 | import tensorrt as trt
 24 | import pycuda.driver as cuda
 25 | import pycuda.autoinit
 26 | 
 27 | logging.basicConfig(level=logging.DEBUG,
 28 |                     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 29 |                     datefmt="%Y-%m-%d %H:%M:%S")
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | def get_int8_calibrator(calib_cache, calib_data, max_calib_size, preprocess_func_name, calib_batch_size):
 33 |     # Use calibration cache if it exists
 34 |     if os.path.exists(calib_cache):
 35 |         logger.info("Skipping calibration files, using calibration cache: {:}".format(calib_cache))
 36 |         calib_files = []
 37 |     # Use calibration files from validation dataset if no cache exists
 38 |     else:
 39 |         if not calib_data:
 40 |             raise ValueError("ERROR: Int8 mode requested, but no calibration data provided. Please provide --calibration-data /path/to/calibration/files")
 41 | 
 42 |         calib_files = get_calibration_files(calib_data, max_calib_size)
 43 | 
 44 |     # Choose pre-processing function for INT8 calibration
 45 |     import processing
 46 |     if preprocess_func_name is not None:
 47 |         preprocess_func = getattr(processing, preprocess_func_name)
 48 |     else:
 49 |         preprocess_func = processing.preprocess_imagenet
 50 | 
 51 |     int8_calibrator = ImagenetCalibrator(calibration_files=calib_files,
 52 |                                          batch_size=calib_batch_size,
 53 |                                          cache_file=calib_cache,
 54 |                                          preprocess_func=preprocess_func)
 55 |     return int8_calibrator
 56 | 
 57 | 
 58 | def get_calibration_files(calibration_data, max_calibration_size=None, allowed_extensions=(".jpeg", ".jpg", ".png")):
 59 |     """Returns a list of all filenames ending with `allowed_extensions` found in the `calibration_data` directory.
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     calibration_data: str
 64 |         Path to directory containing desired files.
 65 |     max_calibration_size: int
 66 |         Max number of files to use for calibration. If calibration_data contains more than this number,
 67 |         a random sample of size max_calibration_size will be returned instead. If None, all samples will be used.
 68 | 
 69 |     Returns
 70 |     -------
 71 |     calibration_files: List[str]
 72 |          List of filenames contained in the `calibration_data` directory ending with `allowed_extensions`.
 73 |     """
 74 | 
 75 |     logger.info("Collecting calibration files from: {:}".format(calibration_data))
 76 |     calibration_files = [path for path in glob.iglob(os.path.join(calibration_data, "**"), recursive=True)
 77 |                          if os.path.isfile(path) and path.lower().endswith(allowed_extensions)]
 78 |     logger.info("Number of Calibration Files found: {:}".format(len(calibration_files)))
 79 | 
 80 |     if len(calibration_files) == 0:
 81 |         raise Exception("ERROR: Calibration data path [{:}] contains no files!".format(calibration_data))
 82 | 
 83 |     if max_calibration_size:
 84 |         if len(calibration_files) > max_calibration_size:
 85 |             logger.warning("Capping number of calibration images to max_calibration_size: {:}".format(max_calibration_size))
 86 |             random.seed(42)  # Set seed for reproducibility
 87 |             calibration_files = random.sample(calibration_files, max_calibration_size)
 88 | 
 89 |     return calibration_files
 90 | 
 91 | 
 92 | # https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/infer/Int8/EntropyCalibrator2.html
 93 | class ImagenetCalibrator(trt.IInt8EntropyCalibrator2):
 94 |     """INT8 Calibrator Class for Imagenet-based Image Classification Models.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     calibration_files: List[str]
 99 |         List of image filenames to use for INT8 Calibration
100 |     batch_size: int
101 |         Number of images to pass through in one batch during calibration
102 |     input_shape: Tuple[int]
103 |         Tuple of integers defining the shape of input to the model (Default: (3, 224, 224))
104 |     cache_file: str
105 |         Name of file to read/write calibration cache from/to.
106 |     preprocess_func: function -> numpy.ndarray
107 |         Pre-processing function to run on calibration data. This should match the pre-processing
108 |         done at inference time. In general, this function should return a numpy array of
109 |         shape `input_shape`.
110 |     """
111 | 
112 |     def __init__(self, calibration_files=[], batch_size=32, input_shape=(3, 224, 224),
113 |                  cache_file="calibration.cache", preprocess_func=None):
114 |         super().__init__()
115 |         self.input_shape = input_shape
116 |         self.cache_file = cache_file
117 |         self.batch_size = batch_size
118 |         self.batch = np.zeros((self.batch_size, *self.input_shape), dtype=np.float32)
119 |         self.device_input = cuda.mem_alloc(self.batch.nbytes)
120 | 
121 |         self.files = calibration_files
122 |         # Pad the list so it is a multiple of batch_size
123 |         if len(self.files) % self.batch_size != 0:
124 |             logger.info("Padding # calibration files to be a multiple of batch_size {:}".format(self.batch_size))
125 |             self.files += calibration_files[(len(calibration_files) % self.batch_size):self.batch_size]
126 | 
127 |         self.batches = self.load_batches()
128 | 
129 |         if preprocess_func is None:
130 |             logger.error("No preprocess_func defined! Please provide one to the constructor.")
131 |             sys.exit(1)
132 |         else:
133 |             self.preprocess_func = preprocess_func
134 | 
135 |     def load_batches(self):
136 |         # Populates a persistent self.batch buffer with images.
137 |         for index in range(0, len(self.files), self.batch_size):
138 |             for offset in range(self.batch_size):
139 |                 image = Image.open(self.files[index + offset])
140 |                 self.batch[offset] = self.preprocess_func(image, *self.input_shape)
141 |             logger.info("Calibration images pre-processed: {:}/{:}".format(index+self.batch_size, len(self.files)))
142 |             yield self.batch
143 | 
144 |     def get_batch_size(self):
145 |         return self.batch_size
146 | 
147 |     def get_batch(self, names):
148 |         try:
149 |             # Assume self.batches is a generator that provides batch data.
150 |             batch = next(self.batches)
151 |             # Assume that self.device_input is a device buffer allocated by the constructor.
152 |             cuda.memcpy_htod(self.device_input, batch)
153 |             return [int(self.device_input)]
154 |         except StopIteration:
155 |             # When we're out of batches, we return either [] or None.
156 |             # This signals to TensorRT that there is no calibration data remaining.
157 |             return None
158 | 
159 |     def read_calibration_cache(self):
160 |         # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
161 |         if os.path.exists(self.cache_file):
162 |             with open(self.cache_file, "rb") as f:
163 |                 logger.info("Using calibration cache to save time: {:}".format(self.cache_file))
164 |                 return f.read()
165 | 
166 |     def write_calibration_cache(self, cache):
167 |         with open(self.cache_file, "wb") as f:
168 |             logger.info("Caching calibration data for future use: {:}".format(self.cache_file))
169 |             f.write(cache)
170 | 


--------------------------------------------------------------------------------
/inference/infer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright 2020 NVIDIA Corporation
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import argparse
 18 | from typing import Tuple, List
 19 | 
 20 | import numpy as np
 21 | import pycuda.driver as cuda
 22 | import pycuda.autoinit
 23 | import tensorrt as trt
 24 | 
 25 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
 26 | 
 27 | 
 28 | def is_fixed(shape: Tuple[int]):
 29 |     return not is_dynamic(shape)
 30 | 
 31 | 
 32 | def is_dynamic(shape: Tuple[int]):
 33 |     return any(dim is None or dim < 0 for dim in shape)
 34 | 
 35 |   
 36 | def setup_binding_shapes(
 37 |     engine: trt.ICudaEngine,
 38 |     context: trt.IExecutionContext,
 39 |     host_inputs: List[np.ndarray],
 40 |     input_binding_idxs: List[int],
 41 |     output_binding_idxs: List[int],
 42 | ):
 43 |     # Explicitly set the dynamic input shapes, so the dynamic output
 44 |     # shapes can be computed internally
 45 |     for host_input, binding_index in zip(host_inputs, input_binding_idxs):
 46 |         context.set_binding_shape(binding_index, host_input.shape)
 47 | 
 48 |     assert context.all_binding_shapes_specified
 49 | 
 50 |     host_outputs = []
 51 |     device_outputs = []
 52 |     for binding_index in output_binding_idxs:
 53 |         output_shape = context.get_binding_shape(binding_index)
 54 |         # Allocate buffers to hold output results after copying back to host
 55 |         buffer = np.empty(output_shape, dtype=np.float32)
 56 |         host_outputs.append(buffer)
 57 |         # Allocate output buffers on device
 58 |         device_outputs.append(cuda.mem_alloc(buffer.nbytes))
 59 | 
 60 |     return host_outputs, device_outputs
 61 | 
 62 | 
 63 | def get_binding_idxs(engine: trt.ICudaEngine, profile_index: int):
 64 |     # Calculate start/end binding indices for current context's profile
 65 |     num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
 66 |     start_binding = profile_index * num_bindings_per_profile
 67 |     end_binding = start_binding + num_bindings_per_profile
 68 |     print("Engine/Binding Metadata")
 69 |     print("\tNumber of optimization profiles: {}".format(engine.num_optimization_profiles))
 70 |     print("\tNumber of bindings per profile: {}".format(num_bindings_per_profile))
 71 |     print("\tFirst binding for profile {}: {}".format(profile_index, start_binding))
 72 |     print("\tLast binding for profile {}: {}".format(profile_index, end_binding-1))
 73 | 
 74 |     # Separate input and output binding indices for convenience
 75 |     input_binding_idxs = []
 76 |     output_binding_idxs = []
 77 |     for binding_index in range(start_binding, end_binding):
 78 |         if engine.binding_is_input(binding_index):
 79 |             input_binding_idxs.append(binding_index)
 80 |         else:
 81 |             output_binding_idxs.append(binding_index)
 82 | 
 83 |     return input_binding_idxs, output_binding_idxs
 84 | 
 85 | 
 86 | def load_engine(filename: str):
 87 |     # Load serialized engine file into memory
 88 |     with open(filename, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
 89 |         return runtime.deserialize_cuda_engine(f.read())
 90 | 
 91 | 
 92 | def get_random_inputs(
 93 |     engine: trt.ICudaEngine,
 94 |     context: trt.IExecutionContext,
 95 |     input_binding_idxs: List[int],
 96 |     seed: int = 42,
 97 | ):
 98 |     # Input data for inference
 99 |     host_inputs = []
100 |     print("Generating Random Inputs")
101 |     print("\tUsing random seed: {}".format(seed))
102 |     np.random.seed(seed)
103 |     for binding_index in input_binding_idxs:
104 |         # If input shape is fixed, we'll just use it
105 |         input_shape = context.get_binding_shape(binding_index)
106 |         input_name = engine.get_binding_name(binding_index)
107 |         print("\tInput [{}] shape: {}".format(input_name, input_shape))
108 |         # If input shape is dynamic, we'll arbitrarily select one of the
109 |         # the min/opt/max shapes from our optimization profile
110 |         if is_dynamic(input_shape):
111 |             profile_index = context.active_optimization_profile
112 |             profile_shapes = engine.get_profile_shape(profile_index, binding_index)
113 |             print("\tProfile Shapes for [{}]: [kMIN {} | kOPT {} | kMAX {}]".format(input_name, *profile_shapes))
114 |             # 0=min, 1=opt, 2=max, or choose any shape, (min <= shape <= max)
115 |             input_shape = profile_shapes[1]
116 |             print("\tInput [{}] shape was dynamic, setting inference shape to {}".format(input_name, input_shape))
117 | 
118 |         host_inputs.append(np.random.random(input_shape).astype(np.float32))
119 | 
120 |     return host_inputs
121 | 
122 | 
123 | def main():
124 |     parser = argparse.ArgumentParser()
125 |     parser.add_argument("-e", "--engine", required=True, type=str,
126 |                         help="Path to TensorRT engine file.")
127 |     parser.add_argument("-s", "--seed", type=int, default=42,
128 |                         help="Random seed for reproducibility.")
129 |     args = parser.parse_args()
130 | 
131 |     # Load a serialized engine into memory
132 |     engine = load_engine(args.engine)
133 |     print("Loaded engine: {}".format(args.engine))
134 | 
135 |     # Create context, this can be re-used
136 |     context = engine.create_execution_context()
137 |     # Profile 0 (first profile) is used by default
138 |     context.active_optimization_profile = 0
139 |     print("Active Optimization Profile: {}".format(context.active_optimization_profile))
140 | 
141 |     # These binding_idxs can change if either the context or the
142 |     # active_optimization_profile are changed
143 |     input_binding_idxs, output_binding_idxs = get_binding_idxs(
144 |         engine, context.active_optimization_profile
145 |     )
146 |     input_names = [engine.get_binding_name(binding_idx) for binding_idx in input_binding_idxs]
147 |     
148 |     # Generate random inputs based on profile shapes
149 |     host_inputs = get_random_inputs(engine, context, input_binding_idxs, seed=args.seed)
150 | 
151 |     # Allocate device memory for inputs. This can be easily re-used if the
152 |     # input shapes don't change
153 |     device_inputs = [cuda.mem_alloc(h_input.nbytes) for h_input in host_inputs]
154 |     # Copy host inputs to device, this needs to be done for each new input
155 |     for h_input, d_input in zip(host_inputs, device_inputs):
156 |         cuda.memcpy_htod(d_input, h_input)
157 | 
158 |     print("Input Metadata")
159 |     print("\tNumber of Inputs: {}".format(len(input_binding_idxs)))
160 |     print("\tInput Bindings for Profile {}: {}".format(context.active_optimization_profile, input_binding_idxs))
161 |     print("\tInput names: {}".format(input_names))
162 |     print("\tInput shapes: {}".format([inp.shape for inp in host_inputs]))
163 | 
164 |     # This needs to be called everytime your input shapes change
165 |     # If your inputs are always the same shape (same batch size, etc.),
166 |     # then you will only need to call this once
167 |     host_outputs, device_outputs = setup_binding_shapes(
168 |         engine, context, host_inputs, input_binding_idxs, output_binding_idxs,
169 |     )
170 |     output_names = [engine.get_binding_name(binding_idx) for binding_idx in output_binding_idxs]
171 | 
172 |     print("Output Metadata")
173 |     print("\tNumber of Outputs: {}".format(len(output_binding_idxs)))
174 |     print("\tOutput names: {}".format(output_names))
175 |     print("\tOutput shapes: {}".format([out.shape for out in host_outputs]))
176 |     print("\tOutput Bindings for Profile {}: {}".format(context.active_optimization_profile, output_binding_idxs))
177 |     
178 |     # Bindings are a list of device pointers for inputs and outputs
179 |     bindings = device_inputs + device_outputs
180 | 
181 |     # Inference
182 |     context.execute_v2(bindings)
183 | 
184 |     # Copy outputs back to host to view results
185 |     for h_output, d_output in zip(host_outputs, device_outputs):
186 |         cuda.memcpy_dtoh(h_output, d_output)
187 | 
188 |     # View outputs
189 |     print("Inference Outputs:", host_outputs)
190 | 
191 |     # Cleanup (Can also use context managers instead)
192 |     del context
193 |     del engine
194 | 
195 | if __name__ == "__main__":
196 |     main()
197 | 


--------------------------------------------------------------------------------
/int8/calibration/caches/mobilenet_v2.cache:
--------------------------------------------------------------------------------
  1 | TRT-6001-EntropyCalibration2
  2 | data: 3caa54fc
  3 | mobilenetv20_features_conv0_fwd: 3e0263e1
  4 | mobilenetv20_features_batchnorm0_fwd: 3ca3d874
  5 | mobilenetv20_features_relu0_fwd: 3cd360c7
  6 | mobilenetv20_features_linearbottleneck0_conv0_fwd: 3cdba0a1
  7 | mobilenetv20_features_linearbottleneck0_batchnorm0_fwd: 3c456aef
  8 | mobilenetv20_features_linearbottleneck0_relu0_fwd: 3c456aef
  9 | mobilenetv20_features_linearbottleneck0_conv1_fwd: 3dca58e0
 10 | mobilenetv20_features_linearbottleneck0_batchnorm1_fwd: 3ca9d0bd
 11 | mobilenetv20_features_linearbottleneck0_relu1_fwd: 3ca9d0bd
 12 | mobilenetv20_features_linearbottleneck0_conv2_fwd: 3d0dd74c
 13 | mobilenetv20_features_linearbottleneck0_batchnorm2_fwd: 3d1acfcc
 14 | mobilenetv20_features_linearbottleneck1_conv0_fwd: 3d04e554
 15 | mobilenetv20_features_linearbottleneck1_batchnorm0_fwd: 3cae4213
 16 | mobilenetv20_features_linearbottleneck1_relu0_fwd: 3ca7bd8e
 17 | mobilenetv20_features_linearbottleneck1_conv1_fwd: 3dbe01ec
 18 | mobilenetv20_features_linearbottleneck1_batchnorm1_fwd: 3c979a26
 19 | mobilenetv20_features_linearbottleneck1_relu1_fwd: 3c9f655b
 20 | mobilenetv20_features_linearbottleneck1_conv2_fwd: 3d078250
 21 | mobilenetv20_features_linearbottleneck1_batchnorm2_fwd: 3d21eda0
 22 | mobilenetv20_features_linearbottleneck2_conv0_fwd: 3d064f87
 23 | mobilenetv20_features_linearbottleneck2_batchnorm0_fwd: 3c87e5fa
 24 | mobilenetv20_features_linearbottleneck2_relu0_fwd: 3c3469fd
 25 | mobilenetv20_features_linearbottleneck2_conv1_fwd: 3c6be04d
 26 | mobilenetv20_features_linearbottleneck2_batchnorm1_fwd: 3c54960c
 27 | mobilenetv20_features_linearbottleneck2_relu1_fwd: 3c32683a
 28 | mobilenetv20_features_linearbottleneck2_conv2_fwd: 3c50a992
 29 | mobilenetv20_features_linearbottleneck2_batchnorm2_fwd: 3cb9006e
 30 | mobilenetv20_features_linearbottleneck2_elemwise_add0: 3d31e02f
 31 | mobilenetv20_features_linearbottleneck3_conv0_fwd: 3d247409
 32 | mobilenetv20_features_linearbottleneck3_batchnorm0_fwd: 3c87bc3b
 33 | mobilenetv20_features_linearbottleneck3_relu0_fwd: 3c3d0e73
 34 | mobilenetv20_features_linearbottleneck3_conv1_fwd: 3d67f7c9
 35 | mobilenetv20_features_linearbottleneck3_batchnorm1_fwd: 3c931c78
 36 | mobilenetv20_features_linearbottleneck3_relu1_fwd: 3c778713
 37 | mobilenetv20_features_linearbottleneck3_conv2_fwd: 3cddf4c7
 38 | mobilenetv20_features_linearbottleneck3_batchnorm2_fwd: 3d0ba453
 39 | mobilenetv20_features_linearbottleneck4_conv0_fwd: 3cad490d
 40 | mobilenetv20_features_linearbottleneck4_batchnorm0_fwd: 3c1fa789
 41 | mobilenetv20_features_linearbottleneck4_relu0_fwd: 3bf3d464
 42 | mobilenetv20_features_linearbottleneck4_conv1_fwd: 3b3fbcb2
 43 | mobilenetv20_features_linearbottleneck4_batchnorm1_fwd: 3c947b56
 44 | mobilenetv20_features_linearbottleneck4_relu1_fwd: 3c36a119
 45 | mobilenetv20_features_linearbottleneck4_conv2_fwd: 3c37e198
 46 | mobilenetv20_features_linearbottleneck4_batchnorm2_fwd: 3cb7da02
 47 | mobilenetv20_features_linearbottleneck4_elemwise_add0: 3d17a5b0
 48 | mobilenetv20_features_linearbottleneck5_conv0_fwd: 3ca8fb87
 49 | mobilenetv20_features_linearbottleneck5_batchnorm0_fwd: 3c238022
 50 | mobilenetv20_features_linearbottleneck5_relu0_fwd: 3c08f6b2
 51 | mobilenetv20_features_linearbottleneck5_conv1_fwd: 3b0d0eae
 52 | mobilenetv20_features_linearbottleneck5_batchnorm1_fwd: 3c1cef0d
 53 | mobilenetv20_features_linearbottleneck5_relu1_fwd: 3c2b191c
 54 | mobilenetv20_features_linearbottleneck5_conv2_fwd: 3c0dacda
 55 | mobilenetv20_features_linearbottleneck5_batchnorm2_fwd: 3c78218d
 56 | mobilenetv20_features_linearbottleneck5_elemwise_add0: 3d0ca22a
 57 | mobilenetv20_features_linearbottleneck6_conv0_fwd: 3cf1f356
 58 | mobilenetv20_features_linearbottleneck6_batchnorm0_fwd: 3c3e5cb4
 59 | mobilenetv20_features_linearbottleneck6_relu0_fwd: 3c2cbd95
 60 | mobilenetv20_features_linearbottleneck6_conv1_fwd: 3b9a44e8
 61 | mobilenetv20_features_linearbottleneck6_batchnorm1_fwd: 3caab9af
 62 | mobilenetv20_features_linearbottleneck6_relu1_fwd: 3c8f1bfe
 63 | mobilenetv20_features_linearbottleneck6_conv2_fwd: 3caca27f
 64 | mobilenetv20_features_linearbottleneck6_batchnorm2_fwd: 3c76b8dc
 65 | mobilenetv20_features_linearbottleneck7_conv0_fwd: 3c35943f
 66 | mobilenetv20_features_linearbottleneck7_batchnorm0_fwd: 3c0d7003
 67 | mobilenetv20_features_linearbottleneck7_relu0_fwd: 3c09e036
 68 | mobilenetv20_features_linearbottleneck7_conv1_fwd: 3b3e01e3
 69 | mobilenetv20_features_linearbottleneck7_batchnorm1_fwd: 3c552d2e
 70 | mobilenetv20_features_linearbottleneck7_relu1_fwd: 3c5636be
 71 | mobilenetv20_features_linearbottleneck7_conv2_fwd: 3c2be3a8
 72 | mobilenetv20_features_linearbottleneck7_batchnorm2_fwd: 3c5e77c5
 73 | mobilenetv20_features_linearbottleneck7_elemwise_add0: 3cac58b7
 74 | mobilenetv20_features_linearbottleneck8_conv0_fwd: 3c7b36d7
 75 | mobilenetv20_features_linearbottleneck8_batchnorm0_fwd: 3c0fa2bc
 76 | mobilenetv20_features_linearbottleneck8_relu0_fwd: 3c0fa2bc
 77 | mobilenetv20_features_linearbottleneck8_conv1_fwd: 3b372615
 78 | mobilenetv20_features_linearbottleneck8_batchnorm1_fwd: 3c52ca8d
 79 | mobilenetv20_features_linearbottleneck8_relu1_fwd: 3c39f223
 80 | mobilenetv20_features_linearbottleneck8_conv2_fwd: 3c2590c8
 81 | mobilenetv20_features_linearbottleneck8_batchnorm2_fwd: 3c86a503
 82 | mobilenetv20_features_linearbottleneck8_elemwise_add0: 3ce0a53a
 83 | mobilenetv20_features_linearbottleneck9_conv0_fwd: 3c9a6891
 84 | mobilenetv20_features_linearbottleneck9_batchnorm0_fwd: 3c10dd86
 85 | mobilenetv20_features_linearbottleneck9_relu0_fwd: 3c14b45a
 86 | mobilenetv20_features_linearbottleneck9_conv1_fwd: 3b38c374
 87 | mobilenetv20_features_linearbottleneck9_batchnorm1_fwd: 3c948a4d
 88 | mobilenetv20_features_linearbottleneck9_relu1_fwd: 3c30c232
 89 | mobilenetv20_features_linearbottleneck9_conv2_fwd: 3c50ef1a
 90 | mobilenetv20_features_linearbottleneck9_batchnorm2_fwd: 3d022211
 91 | mobilenetv20_features_linearbottleneck9_elemwise_add0: 3d3e79ce
 92 | mobilenetv20_features_linearbottleneck10_conv0_fwd: 3d08dcbb
 93 | mobilenetv20_features_linearbottleneck10_batchnorm0_fwd: 3c1d579d
 94 | mobilenetv20_features_linearbottleneck10_relu0_fwd: 3c2c8cd0
 95 | mobilenetv20_features_linearbottleneck10_conv1_fwd: 3cf744da
 96 | mobilenetv20_features_linearbottleneck10_batchnorm1_fwd: 3cc060e5
 97 | mobilenetv20_features_linearbottleneck10_relu1_fwd: 3c9aec9d
 98 | mobilenetv20_features_linearbottleneck10_conv2_fwd: 3ce3b8c4
 99 | mobilenetv20_features_linearbottleneck10_batchnorm2_fwd: 3cf1fb80
100 | mobilenetv20_features_linearbottleneck11_conv0_fwd: 3c78b3c9
101 | mobilenetv20_features_linearbottleneck11_batchnorm0_fwd: 3c42ff6e
102 | mobilenetv20_features_linearbottleneck11_relu0_fwd: 3c4383aa
103 | mobilenetv20_features_linearbottleneck11_conv1_fwd: 3b11c825
104 | mobilenetv20_features_linearbottleneck11_batchnorm1_fwd: 3c5f1105
105 | mobilenetv20_features_linearbottleneck11_relu1_fwd: 3c51220c
106 | mobilenetv20_features_linearbottleneck11_conv2_fwd: 3c47783d
107 | mobilenetv20_features_linearbottleneck11_batchnorm2_fwd: 3c88d64f
108 | mobilenetv20_features_linearbottleneck11_elemwise_add0: 3d037990
109 | mobilenetv20_features_linearbottleneck12_conv0_fwd: 3cda9d75
110 | mobilenetv20_features_linearbottleneck12_batchnorm0_fwd: 3c4fe1ee
111 | mobilenetv20_features_linearbottleneck12_relu0_fwd: 3c69cefe
112 | mobilenetv20_features_linearbottleneck12_conv1_fwd: 3af15cb1
113 | mobilenetv20_features_linearbottleneck12_batchnorm1_fwd: 3c945906
114 | mobilenetv20_features_linearbottleneck12_relu1_fwd: 3c9d5f6a
115 | mobilenetv20_features_linearbottleneck12_conv2_fwd: 3cc385ed
116 | mobilenetv20_features_linearbottleneck12_batchnorm2_fwd: 3d6a8190
117 | mobilenetv20_features_linearbottleneck12_elemwise_add0: 3d981f37
118 | mobilenetv20_features_linearbottleneck13_conv0_fwd: 3d876e75
119 | mobilenetv20_features_linearbottleneck13_batchnorm0_fwd: 3cbab9b1
120 | mobilenetv20_features_linearbottleneck13_relu0_fwd: 3ceb7faa
121 | mobilenetv20_features_linearbottleneck13_conv1_fwd: 3d4003da
122 | mobilenetv20_features_linearbottleneck13_batchnorm1_fwd: 3cfb5783
123 | mobilenetv20_features_linearbottleneck13_relu1_fwd: 3cfb5783
124 | mobilenetv20_features_linearbottleneck13_conv2_fwd: 3d244ef2
125 | mobilenetv20_features_linearbottleneck13_batchnorm2_fwd: 3cf407c5
126 | mobilenetv20_features_linearbottleneck14_conv0_fwd: 3c82378c
127 | mobilenetv20_features_linearbottleneck14_batchnorm0_fwd: 3c827cc6
128 | mobilenetv20_features_linearbottleneck14_relu0_fwd: 3c5dc7da
129 | mobilenetv20_features_linearbottleneck14_conv1_fwd: 3b29c7bb
130 | mobilenetv20_features_linearbottleneck14_batchnorm1_fwd: 3d24b6b1
131 | mobilenetv20_features_linearbottleneck14_relu1_fwd: 3d24b6b1
132 | mobilenetv20_features_linearbottleneck14_conv2_fwd: 3ceaf7f1
133 | mobilenetv20_features_linearbottleneck14_batchnorm2_fwd: 3cd0d888
134 | mobilenetv20_features_linearbottleneck14_elemwise_add0: 3d48a66b
135 | mobilenetv20_features_linearbottleneck15_conv0_fwd: 3cee1fe0
136 | mobilenetv20_features_linearbottleneck15_batchnorm0_fwd: 3c8bda88
137 | mobilenetv20_features_linearbottleneck15_relu0_fwd: 3c8bda88
138 | mobilenetv20_features_linearbottleneck15_conv1_fwd: 3bcd0791
139 | mobilenetv20_features_linearbottleneck15_batchnorm1_fwd: 3ceb3d13
140 | mobilenetv20_features_linearbottleneck15_relu1_fwd: 3d057a63
141 | mobilenetv20_features_linearbottleneck15_conv2_fwd: 3cbdb735
142 | mobilenetv20_features_linearbottleneck15_batchnorm2_fwd: 3d207c2d
143 | mobilenetv20_features_linearbottleneck15_elemwise_add0: 3d50d260
144 | mobilenetv20_features_linearbottleneck16_conv0_fwd: 3da4cd0b
145 | mobilenetv20_features_linearbottleneck16_batchnorm0_fwd: 3c91514d
146 | mobilenetv20_features_linearbottleneck16_relu0_fwd: 3bde46b2
147 | mobilenetv20_features_linearbottleneck16_conv1_fwd: 3a9327dc
148 | mobilenetv20_features_linearbottleneck16_batchnorm1_fwd: 3b83e5fd
149 | mobilenetv20_features_linearbottleneck16_relu1_fwd: 3b7b0508
150 | mobilenetv20_features_linearbottleneck16_conv2_fwd: 3bc14785
151 | mobilenetv20_features_linearbottleneck16_batchnorm2_fwd: 3c61c5eb
152 | mobilenetv20_features_conv1_fwd: 3c2ebfd9
153 | mobilenetv20_features_batchnorm1_fwd: 3de6f342
154 | mobilenetv20_features_relu1_fwd: 3df98145
155 | mobilenetv20_features_pool0_fwd: 3df98145
156 | mobilenetv20_output_pred_fwd: 3e6c0f9b
157 | mobilenetv20_output_flatten0_reshape0: 3e6c0f9b
158 | 


--------------------------------------------------------------------------------
/int8/calibration/onnx_to_tensorrt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright 2019 NVIDIA Corporation
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import os
 18 | import sys
 19 | import glob
 20 | import math
 21 | import logging
 22 | import argparse
 23 | 
 24 | import tensorrt as trt
 25 | 
 26 | TRT_LOGGER = trt.Logger()
 27 | logging.basicConfig(level=logging.DEBUG,
 28 |                     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 29 |                     datefmt="%Y-%m-%d %H:%M:%S")
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | def add_profiles(config, inputs, opt_profiles):
 34 |     logger.debug("=== Optimization Profiles ===")
 35 |     for i, profile in enumerate(opt_profiles):
 36 |         for inp in inputs:
 37 |             _min, _opt, _max = profile.get_shape(inp.name)
 38 |             logger.debug("{} - OptProfile {} - Min {} Opt {} Max {}".format(inp.name, i, _min, _opt, _max))
 39 |         config.add_optimization_profile(profile)
 40 | 
 41 | 
 42 | def mark_outputs(network):
 43 |     # Mark last layer's outputs if not already marked
 44 |     # NOTE: This may not be correct in all cases
 45 |     last_layer = network.get_layer(network.num_layers-1)
 46 |     if not last_layer.num_outputs:
 47 |         logger.error("Last layer contains no outputs.")
 48 |         return
 49 | 
 50 |     for i in range(last_layer.num_outputs):
 51 |         network.mark_output(last_layer.get_output(i))
 52 | 
 53 | 
 54 | def check_network(network):
 55 |     if not network.num_outputs:
 56 |         logger.warning("No output nodes found, marking last layer's outputs as network outputs. Correct this if wrong.")
 57 |         mark_outputs(network)
 58 |     
 59 |     inputs = [network.get_input(i) for i in range(network.num_inputs)]
 60 |     outputs = [network.get_output(i) for i in range(network.num_outputs)]
 61 |     max_len = max([len(inp.name) for inp in inputs] + [len(out.name) for out in outputs])
 62 | 
 63 |     logger.debug("=== Network Description ===")
 64 |     for i, inp in enumerate(inputs):
 65 |         logger.debug("Input  {0} | Name: {1:{2}} | Shape: {3}".format(i, inp.name, max_len, inp.shape))
 66 |     for i, out in enumerate(outputs):
 67 |         logger.debug("Output {0} | Name: {1:{2}} | Shape: {3}".format(i, out.name, max_len, out.shape))
 68 | 
 69 | 
 70 | def get_batch_sizes(max_batch_size):
 71 |     # Returns powers of 2, up to and including max_batch_size
 72 |     max_exponent = math.log2(max_batch_size)
 73 |     for i in range(int(max_exponent)+1):
 74 |         batch_size = 2**i
 75 |         yield batch_size
 76 |     
 77 |     if max_batch_size != batch_size:
 78 |         yield max_batch_size
 79 | 
 80 | 
 81 | # TODO: This only covers dynamic shape for batch size, not dynamic shape for other dimensions
 82 | def create_optimization_profiles(builder, inputs, batch_sizes=[1,8,16,32,64]): 
 83 |     # Check if all inputs are fixed explicit batch to create a single profile and avoid duplicates
 84 |     if all([inp.shape[0] > -1 for inp in inputs]):
 85 |         profile = builder.create_optimization_profile()
 86 |         for inp in inputs:
 87 |             fbs, shape = inp.shape[0], inp.shape[1:]
 88 |             profile.set_shape(inp.name, min=(fbs, *shape), opt=(fbs, *shape), max=(fbs, *shape))
 89 |             return [profile]
 90 |     
 91 |     # Otherwise for mixed fixed+dynamic explicit batch inputs, create several profiles
 92 |     profiles = {}
 93 |     for bs in batch_sizes:
 94 |         if not profiles.get(bs):
 95 |             profiles[bs] = builder.create_optimization_profile()
 96 | 
 97 |         for inp in inputs: 
 98 |             shape = inp.shape[1:]
 99 |             # Check if fixed explicit batch
100 |             if inp.shape[0] > -1:
101 |                 bs = inp.shape[0]
102 | 
103 |             profiles[bs].set_shape(inp.name, min=(bs, *shape), opt=(bs, *shape), max=(bs, *shape))
104 | 
105 |     return list(profiles.values())
106 | 
107 | def main():
108 |     parser = argparse.ArgumentParser(description="Creates a TensorRT engine from the provided ONNX file.\n")
109 |     parser.add_argument("--onnx", required=True, help="The ONNX model file to convert to TensorRT")
110 |     parser.add_argument("-o", "--output", type=str, default="model.engine", help="The path at which to write the engine")
111 |     parser.add_argument("-b", "--max-batch-size", type=int, default=32, help="The max batch size for the TensorRT engine input")
112 |     parser.add_argument("-v", "--verbosity", action="count", help="Verbosity for logging. (None) for ERROR, (-v) for INFO/WARNING/ERROR, (-vv) for VERBOSE.")
113 |     parser.add_argument("--explicit-batch", action='store_true', help="Set trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH.")
114 |     parser.add_argument("--explicit-precision", action='store_true', help="Set trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION.")
115 |     parser.add_argument("--gpu-fallback", action='store_true', help="Set trt.BuilderFlag.GPU_FALLBACK.")
116 |     parser.add_argument("--refittable", action='store_true', help="Set trt.BuilderFlag.REFIT.")
117 |     parser.add_argument("--debug", action='store_true', help="Set trt.BuilderFlag.DEBUG.")
118 |     parser.add_argument("--strict-types", action='store_true', help="Set trt.BuilderFlag.STRICT_TYPES.")
119 |     parser.add_argument("--fp16", action="store_true", help="Attempt to use FP16 kernels when possible.")
120 |     parser.add_argument("--int8", action="store_true", help="Attempt to use INT8 kernels when possible. This should generally be used in addition to the --fp16 flag. \
121 |                                                              ONLY SUPPORTS RESNET-LIKE MODELS SUCH AS RESNET50/VGG16/INCEPTION/etc.")
122 |     parser.add_argument("--calibration-cache", help="(INT8 ONLY) The path to read/write from calibration cache.", default="calibration.cache")
123 |     parser.add_argument("--calibration-data", help="(INT8 ONLY) The directory containing {*.jpg, *.jpeg, *.png} files to use for calibration. (ex: Imagenet Validation Set)", default=None)
124 |     parser.add_argument("--calibration-batch-size", help="(INT8 ONLY) The batch size to use during calibration.", type=int, default=32)
125 |     parser.add_argument("--max-calibration-size", help="(INT8 ONLY) The max number of data to calibrate on from --calibration-data.", type=int, default=512)
126 |     parser.add_argument("-p", "--preprocess_func", type=str, default=None, help="(INT8 ONLY) Function defined in 'processing.py' to use for pre-processing calibration data.")
127 |     parser.add_argument("-s", "--simple", action="store_true", help="Use SimpleCalibrator with random data instead of ImagenetCalibrator for INT8 calibration.")
128 |     args, _ = parser.parse_known_args()
129 | 
130 |     # Adjust logging verbosity
131 |     if args.verbosity is None:
132 |         TRT_LOGGER.min_severity = trt.Logger.Severity.ERROR
133 |     # -v
134 |     elif args.verbosity == 1:
135 |         TRT_LOGGER.min_severity = trt.Logger.Severity.INFO
136 |     # -vv
137 |     else:
138 |         TRT_LOGGER.min_severity = trt.Logger.Severity.VERBOSE
139 |     logger.info("TRT_LOGGER Verbosity: {:}".format(TRT_LOGGER.min_severity))
140 | 
141 |     # Network flags
142 |     network_flags = 0
143 |     if args.explicit_batch:
144 |         network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
145 |     if args.explicit_precision:
146 |         network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)
147 | 
148 |     builder_flag_map = {
149 |             'gpu_fallback': trt.BuilderFlag.GPU_FALLBACK,
150 |             'refittable': trt.BuilderFlag.REFIT,
151 |             'debug': trt.BuilderFlag.DEBUG,
152 |             'strict_types': trt.BuilderFlag.STRICT_TYPES,
153 |             'fp16': trt.BuilderFlag.FP16,
154 |             'int8': trt.BuilderFlag.INT8,
155 |     }
156 | 
157 |     # Building engine
158 |     with trt.Builder(TRT_LOGGER) as builder, \
159 |          builder.create_network(network_flags) as network, \
160 |          builder.create_builder_config() as config, \
161 |          trt.OnnxParser(network, TRT_LOGGER) as parser:
162 |             
163 |         config.max_workspace_size = 2**30 # 1GiB
164 | 
165 |         # Set Builder Config Flags
166 |         for flag in builder_flag_map:
167 |             if getattr(args, flag):
168 |                 logger.info("Setting {}".format(builder_flag_map[flag]))
169 |                 config.set_flag(builder_flag_map[flag])
170 | 
171 |         # Fill network atrributes with information by parsing model
172 |         with open(args.onnx, "rb") as f:
173 |             if not parser.parse(f.read()):
174 |                 print('ERROR: Failed to parse the ONNX file: {}'.format(args.onnx))
175 |                 for error in range(parser.num_errors):
176 |                     print(parser.get_error(error))
177 |                 sys.exit(1)
178 | 
179 |         # Display network info and check certain properties
180 |         check_network(network)
181 | 
182 |         if args.explicit_batch:
183 |             # Add optimization profiles
184 |             batch_sizes = [1, 8, 16, 32, 64]
185 |             inputs = [network.get_input(i) for i in range(network.num_inputs)]
186 |             opt_profiles = create_optimization_profiles(builder, inputs, batch_sizes)
187 |             add_profiles(config, inputs, opt_profiles)
188 |         # Implicit Batch Network
189 |         else:
190 |             builder.max_batch_size = args.max_batch_size
191 |             opt_profiles = []
192 | 
193 |         # Precision flags
194 |         if args.fp16 and not builder.platform_has_fast_fp16:
195 |             logger.warning("FP16 not supported on this platform.")
196 | 
197 |         if args.int8 and not builder.platform_has_fast_int8:
198 |             logger.warning("INT8 not supported on this platform.")
199 | 
200 |         if args.int8:
201 |             if args.simple:
202 |                 from SimpleCalibrator import SimpleCalibrator # local module
203 |                 config.int8_calibrator = SimpleCalibrator(network, config)
204 |             else:
205 |                 from ImagenetCalibrator import ImagenetCalibrator, get_int8_calibrator # local module
206 |                 config.int8_calibrator = get_int8_calibrator(args.calibration_cache,
207 |                                                              args.calibration_data,
208 |                                                              args.max_calibration_size,
209 |                                                              args.preprocess_func,
210 |                                                              args.calibration_batch_size)
211 | 
212 |         logger.info("Building Engine...")
213 |         with builder.build_engine(network, config) as engine, open(args.output, "wb") as f:
214 |             logger.info("Serializing engine to file: {:}".format(args.output))
215 |             f.write(engine.serialize())
216 | 
217 | if __name__ == "__main__":
218 |     main()
219 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    Copyright 2020 NVIDIA Corporation
180 | 
181 |    Licensed under the Apache License, Version 2.0 (the "License");
182 |    you may not use this file except in compliance with the License.
183 |    You may obtain a copy of the License at
184 | 
185 |        http://www.apache.org/licenses/LICENSE-2.0
186 | 
187 |    Unless required by applicable law or agreed to in writing, software
188 |    distributed under the License is distributed on an "AS IS" BASIS,
189 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
190 |    See the License for the specific language governing permissions and
191 |    limitations under the License.
192 |    
193 | 


--------------------------------------------------------------------------------
/plugins/CustomIPluginV2/CustomPlugin.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #include "CustomPlugin.h"
 18 | #include "NvInfer.h"
 19 | 
 20 | #include <vector>
 21 | #include <string.h>
 22 | #include <iostream>
 23 | 
 24 | using namespace nvinfer1;
 25 | using namespace std;
 26 | 
 27 | namespace
 28 | {
 29 |     static const char *CUSTOM_PLUGIN_NAME{"CustomPlugin"};
 30 |     static const char *CUSTOM_PLUGIN_VERSION{"1"};
 31 | }
 32 | 
 33 | PluginFieldCollection CustomPluginCreator::mFC{};
 34 | vector<PluginField> CustomPluginCreator::mPluginAttributes;
 35 | 
 36 | CustomPlugin::CustomPlugin(const string name) :mLayerName(name) {}
 37 | 
 38 | 
 39 | /* ----- Custom Plugin ----- */
 40 | 
 41 | 
 42 | /*
 43 |  * Description
 44 |  *     Initialize the layer for execution. 
 45 |  *
 46 |  *     This is called when the engine is created.
 47 |  *
 48 |  * Returns
 49 |  *     0 for success, else non-zero (which will cause engine termination).
 50 |  */
 51 | int CustomPlugin::initialize()
 52 | {
 53 |     return 0; 
 54 | }
 55 | 
 56 | 
 57 | /* 
 58 |  * Description
 59 |  *     Release resources acquired during plugin layer initialization.
 60 |  *
 61 |  *     This is called when the engine is destroyed. 
 62 |  */
 63 | void CustomPlugin::terminate() {}
 64 | 
 65 | 
 66 | /*
 67 |  * Description
 68 |  *     Get the number of outputs from the layer.
 69 |  *
 70 |  *     This function is called by the implementations of INetworkDefinition and
 71 |  *     IBuilder. In particular, it is called prior to any call to initialize().
 72 |  *
 73 |  * Returns
 74 |  *     The number of outputs.
 75 |  */
 76 | int CustomPlugin::getNbOutputs() const
 77 | {
 78 |     return 1;
 79 | }
 80 | 
 81 | 
 82 | /*
 83 |  * Description
 84 |  *     Get the dimensions of an output tensor.
 85 |  *
 86 |  *     This function is called by the implementations of INetworkDefinition and
 87 |  *     IBuilder. In particular, it is called prior to any call to initialize().
 88 |  *
 89 |  * Parameters
 90 |  *     index        The index of the output tensor.
 91 |  *     inputs       The input tensors.
 92 |  *     nbInputDims  The number of input tensors.
 93 |  *
 94 |  * Returns
 95 |  *     The output dimensions of a layer.
 96 |  */
 97 | Dims CustomPlugin::getOutputDimensions(int index, const Dims *inputs, int nbInputDims) 
 98 | {
 99 |     // TODO: Add a meaningful output
100 |     Dims output;
101 |     return output;
102 | }
103 | 
104 | 
105 | /*
106 |  * Description
107 |  *     Execute the layer asynchronously.
108 |  *
109 |  *     This is called manually by the user for async inference.
110 |  *
111 |  * Parameters
112 |  *     batchSize    The number of inputs in the batch.
113 |  *     inputs       The memory for the input tensors.
114 |  *     outputs      The memory for the output tensors.
115 |  *     workspace    Workspace for execution.
116 |  *     stream       The stream in which to execute the kernels.
117 |  *
118 |  * Returns
119 |  *     0 for success, else non-zero (which will cause engine termination). 
120 |  */
121 | int CustomPlugin::enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)
122 | {
123 |     // 0 for success, else non-zero (which will cause engine termination).
124 |     return -1; // TODO: Inference not implemented for this example yet.
125 | }
126 | 
127 | 
128 | /*
129 |  * Description
130 |  *     Find the workspace size required by the layer.
131 |  *
132 |  *     This function is called during engine startup, after initialize(). 
133 |  *     
134 |  *     The workspace size returned should be sufficient for any batch size up
135 |  *     to the maximum.
136 |  *
137 |  * Returns
138 |  *     The workspace size.
139 |  */
140 | size_t CustomPlugin::getWorkspaceSize(int batchSize) const
141 | {
142 |     return 0;
143 | }
144 | 
145 | 
146 | /*
147 |  * Description
148 |  *     Configure the layer.
149 |  *
150 |  *     The dimensions passed here do not include the outermost batch size
151 |  *     (i.e. for 2-D image networks, they will be 3-D CHW dimensions).
152 |  *
153 |  *     This function is called by the builder prior to initialize(). 
154 |  *
155 |  *     It provides an opportunity for the layer to make algorithm choices on
156 |  *     the basis of its weights, dimensions, and maximum batch size.
157 |  *
158 |  * Parameters
159 |  *     inputDims    The input tensor dimensions.
160 |  *     nbInputs     The number of inputs.
161 |  *     outputDims   The output tensor dimensions.
162 |  *     nbOutputs    The number of outputs.
163 |  *     type         The data type selected for the engine.
164 |  *     format       The format selected for the engine.
165 |  *     maxBatchSize The maximum batch size.
166 |  *
167 |  * Warning
168 |  *     For the format field, the values PluginFormat::kCHW4, 
169 |  *     PluginFormat::kCHW16, and PluginFormat::kCHW32 will not be passed in,
170 |  *     this is to keep backward compatibility with TensorRT 5.x series. Use 
171 |  *     PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
172 |  */
173 | void CustomPlugin::configureWithFormat(const Dims *inputs, int nbInputs, const Dims *outputs, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)
174 | {
175 |     // TODO: Add meaningful example or link to one.
176 | }
177 | 
178 | 
179 | /*
180 |  * Description
181 |  *     Check format support.
182 |  *
183 |  *     This function is called by the implementations of INetworkDefinition,
184 |  *     IBuilder, and safe::ICudaEngine/ICudaEngine. In particular, it is called
185 |  *     when creating an engine and when deserializing an engine.
186 |  *
187 |  * Parameters
188 |  *     type     DataType requested.
189 |  *     format   PluginFormat requested.
190 |  * 
191 |  * Returns
192 |  *     true if the plugin supports the type-format combination, false otherwise.
193 |  *
194 |  * Warning
195 |  *     For the format field, the values PluginFormat::kCHW4, 
196 |  *     PluginFormat::kCHW16, and PluginFormat::kCHW32 will not be passed in,
197 |  *     this is to keep backward compatibility with TensorRT 5.x series. Use 
198 |  *     PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
199 |  */
200 | bool CustomPlugin::supportsFormat(DataType type, PluginFormat format) const
201 | {
202 |     return true;
203 | }
204 | 
205 | 
206 | /*
207 |  * Description
208 |  *     Find the size of the serialization buffer required.
209 |  *
210 |  * Returns
211 |  *     The size of the serialization buffer.
212 |  */
213 | size_t CustomPlugin::getSerializationSize() const
214 | {
215 |     return sizeof(mLayerName);
216 | }
217 | 
218 | 
219 | /*
220 |  * Description
221 |  *     Serialize the layer. Useful for saving engines for future use.
222 |  *
223 |  * Parameters
224 |  *     buffer   A pointer to a buffer to serialize data. Size of buffer must
225 |  *              be equal to value returned by getSerializationSize.
226 |  */
227 | void CustomPlugin::serialize(void *buffer) const {}
228 | 
229 | 
230 | /*
231 |  * Description
232 |  *     Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
233 |  */
234 | void CustomPlugin::destroy() { delete this; }
235 | 
236 | 
237 | /*
238 |  * Description
239 |  *     Clone the plugin object. This copies over internal plugin parameters and
240 |  *     returns a new plugin object with these parameters.
241 |  *
242 |  * Returns
243 |  *     A Plugin object.
244 |  */
245 | IPluginV2 *CustomPlugin::clone() const
246 | {
247 |     return new CustomPlugin(mLayerName);
248 | }
249 | 
250 | 
251 | /*
252 |  * Description
253 |  *     Set the namespace that this plugin object belongs to. Ideally, all plugin
254 |  *     objects from the same plugin library should have the same namespace.
255 |  */
256 | void CustomPlugin::setPluginNamespace(const char *libNamespace)
257 | {
258 |     mNamespace = libNamespace;
259 | }
260 | 
261 | 
262 | /* 
263 |  * Description
264 |  *     Should match the plugin namespace returned by the corresponding
265 |  *     plugin creator.
266 |  *
267 |  * Returns
268 |  *     The namespace of the plugin object.
269 |  */
270 | const char *CustomPlugin::getPluginNamespace() const
271 | {
272 |     return mNamespace.c_str();
273 | }
274 | 
275 | 
276 | /*
277 |  * Description
278 |  *     Should match the plugin name returned by the corresponding 
279 |  *     plugin creator.
280 |  *
281 |  * Returns
282 |  *     The Plugin type.
283 |  */
284 | const char *CustomPlugin::getPluginType() const
285 | {
286 |     return CUSTOM_PLUGIN_NAME;
287 | }
288 | 
289 | 
290 | /* 
291 |  * Description
292 |  *     Should match the plugin version returned by the corresponding 
293 |  *     plugin creator.
294 |  *
295 |  * Returns
296 |  *     The Plugin version.
297 |  */
298 | const char *CustomPlugin::getPluginVersion() const
299 | {
300 |     return CUSTOM_PLUGIN_VERSION;
301 | }
302 | 
303 | 
304 | /* ----- Custom IPluginCreator ----- */
305 | 
306 | 
307 | /*
308 |  * Description
309 |  *     Custom IPluginCreator Constructor
310 |  */
311 | CustomPluginCreator::CustomPluginCreator()
312 | {
313 |     mPluginAttributes.emplace_back(PluginField("var1", nullptr, PluginFieldType::kINT32, 1));
314 |     mPluginAttributes.emplace_back(PluginField("var2", nullptr, PluginFieldType::kINT32, 1));
315 |     mPluginAttributes.emplace_back(PluginField("var3", nullptr, PluginFieldType::kINT32, 1));
316 |     mPluginAttributes.emplace_back(PluginField("var4", nullptr, PluginFieldType::kINT32, 1));
317 |     mPluginAttributes.emplace_back(PluginField("var5", nullptr, PluginFieldType::kINT32, 1));
318 | 
319 |     // Fill PluginFieldCollection with PluginField arguments metadata
320 |     mFC.nbFields = mPluginAttributes.size();
321 |     cout << "Plugin attribute number: " << mPluginAttributes.size() << endl;
322 |     mFC.fields = mPluginAttributes.data();
323 | }
324 | 
325 | 
326 | /*
327 |  * Returns
328 |  *     A Plugin object, or nullptr in case of error.
329 |  */
330 | IPluginV2 *CustomPluginCreator::createPlugin(const char *name, const PluginFieldCollection *fc)
331 | {
332 |     const PluginField *fields = fc->fields;
333 | 
334 |     // TODO: Do something more meaningful here
335 |     for (int i = 0; i < fc->nbFields; i++)
336 |     {
337 |         const char *name = fields[i].name;
338 |         auto data = fields[i].data;
339 |         cout << name << ": " << static_cast<int>(*(static_cast<const int *>(data))) << endl;
340 |     }
341 | 
342 |     return new CustomPlugin(name);
343 | }
344 | 
345 | 
346 | /* 
347 |  * Description
348 |  *     Called during deserialization of plugin layer.
349 |  *
350 |  * Returns
351 |  *     A Plugin object.
352 |  */
353 | IPluginV2 *CustomPluginCreator::deserializePlugin(const char *name, const void *serialData, size_t serialLength)
354 | {
355 |     return new CustomPlugin(name);
356 | }
357 | 
358 | 
359 | /*
360 |  * Description
361 |  *     Set the namespace of the plugin creator based on the plugin library it
362 |  *     belongs to. This can be set while registering the plugin creator.
363 |  */
364 | void CustomPluginCreator::setPluginNamespace(const char *pluginNamespace)
365 | {
366 |     mNamespace = pluginNamespace;
367 | }
368 | 
369 | 
370 | /*
371 |  * Description
372 |  *     Should match the plugin namespace returned by the corresponding plugin.
373 |  *
374 |  * Returns
375 |  *     The namespace of the plugin object.
376 |  */
377 | const char *CustomPluginCreator::getPluginNamespace() const
378 | {
379 |     return mNamespace.c_str();
380 | }
381 | 
382 | 
383 | /* 
384 |  * Description
385 |  *     Should match the plugin name returned by the corresponding plugin.
386 |  *
387 |  * Returns
388 |  *     The Plugin name
389 |  */
390 | const char *CustomPluginCreator::getPluginName() const
391 | {
392 |     return CUSTOM_PLUGIN_NAME;
393 | }
394 | 
395 | 
396 | /*
397 |  * Description
398 |  *     Should match the plugin version returned by the corresponding plugin.
399 |  *
400 |  * Returns
401 |  *     The Plugin version.
402 |  */
403 | const char *CustomPluginCreator::getPluginVersion() const
404 | {
405 |     return CUSTOM_PLUGIN_VERSION;
406 | }
407 | 
408 | 
409 | /*
410 |  * Description
411 |  *     Return a PluginFieldCollection (list) of PluginField objects that needs
412 |  *     to be passed to createPlugin.
413 |  *
414 |  * Returns
415 |  *     A PluginFieldCollection object.
416 |  */
417 | const PluginFieldCollection *CustomPluginCreator::getFieldNames()
418 | {
419 |     return &mFC;
420 | }
421 | 
422 | 
423 | /*
424 |  * TensorRT also provides the ability to register a plugin by calling
425 |  * REGISTER_TENSORRT_PLUGIN(pluginCreator) which statically registers the
426 |  * Plugin Creator to the Plugin Registry. During runtime, the Plugin Registry
427 |  * can be queried using the extern function getPluginRegistry(). The Plugin 
428 |  * Registry stores a pointer to all the registered Plugin Creators and can be
429 |  * used to look up a specific Plugin Creator based on the plugin name and version.
430 |  */
431 | REGISTER_TENSORRT_PLUGIN(CustomPluginCreator);
432 | 


--------------------------------------------------------------------------------
/int8/calibration/labels/imagenet1k_labels.txt:
--------------------------------------------------------------------------------
   1 | tench
   2 | goldfish
   3 | great white shark
   4 | tiger shark
   5 | hammerhead
   6 | electric ray
   7 | stingray
   8 | cock
   9 | hen
  10 | ostrich
  11 | brambling
  12 | goldfinch
  13 | house finch
  14 | junco
  15 | indigo bunting
  16 | robin
  17 | bulbul
  18 | jay
  19 | magpie
  20 | chickadee
  21 | water ouzel
  22 | kite
  23 | bald eagle
  24 | vulture
  25 | great grey owl
  26 | European fire salamander
  27 | common newt
  28 | eft
  29 | spotted salamander
  30 | axolotl
  31 | bullfrog
  32 | tree frog
  33 | tailed frog
  34 | loggerhead
  35 | leatherback turtle
  36 | mud turtle
  37 | terrapin
  38 | box turtle
  39 | banded gecko
  40 | common iguana
  41 | American chameleon
  42 | whiptail
  43 | agama
  44 | frilled lizard
  45 | alligator lizard
  46 | Gila monster
  47 | green lizard
  48 | African chameleon
  49 | Komodo dragon
  50 | African crocodile
  51 | American alligator
  52 | triceratops
  53 | thunder snake
  54 | ringneck snake
  55 | hognose snake
  56 | green snake
  57 | king snake
  58 | garter snake
  59 | water snake
  60 | vine snake
  61 | night snake
  62 | boa constrictor
  63 | rock python
  64 | Indian cobra
  65 | green mamba
  66 | sea snake
  67 | horned viper
  68 | diamondback
  69 | sidewinder
  70 | trilobite
  71 | harvestman
  72 | scorpion
  73 | black and gold garden spider
  74 | barn spider
  75 | garden spider
  76 | black widow
  77 | tarantula
  78 | wolf spider
  79 | tick
  80 | centipede
  81 | black grouse
  82 | ptarmigan
  83 | ruffed grouse
  84 | prairie chicken
  85 | peacock
  86 | quail
  87 | partridge
  88 | African grey
  89 | macaw
  90 | sulphur-crested cockatoo
  91 | lorikeet
  92 | coucal
  93 | bee eater
  94 | hornbill
  95 | hummingbird
  96 | jacamar
  97 | toucan
  98 | drake
  99 | red-breasted merganser
 100 | goose
 101 | black swan
 102 | tusker
 103 | echidna
 104 | platypus
 105 | wallaby
 106 | koala
 107 | wombat
 108 | jellyfish
 109 | sea anemone
 110 | brain coral
 111 | flatworm
 112 | nematode
 113 | conch
 114 | snail
 115 | slug
 116 | sea slug
 117 | chiton
 118 | chambered nautilus
 119 | Dungeness crab
 120 | rock crab
 121 | fiddler crab
 122 | king crab
 123 | American lobster
 124 | spiny lobster
 125 | crayfish
 126 | hermit crab
 127 | isopod
 128 | white stork
 129 | black stork
 130 | spoonbill
 131 | flamingo
 132 | little blue heron
 133 | American egret
 134 | bittern
 135 | crane
 136 | limpkin
 137 | European gallinule
 138 | American coot
 139 | bustard
 140 | ruddy turnstone
 141 | red-backed sandpiper
 142 | redshank
 143 | dowitcher
 144 | oystercatcher
 145 | pelican
 146 | king penguin
 147 | albatross
 148 | grey whale
 149 | killer whale
 150 | dugong
 151 | sea lion
 152 | Chihuahua
 153 | Japanese spaniel
 154 | Maltese dog
 155 | Pekinese
 156 | Shih-Tzu
 157 | Blenheim spaniel
 158 | papillon
 159 | toy terrier
 160 | Rhodesian ridgeback
 161 | Afghan hound
 162 | basset
 163 | beagle
 164 | bloodhound
 165 | bluetick
 166 | black-and-tan coonhound
 167 | Walker hound
 168 | English foxhound
 169 | redbone
 170 | borzoi
 171 | Irish wolfhound
 172 | Italian greyhound
 173 | whippet
 174 | Ibizan hound
 175 | Norwegian elkhound
 176 | otterhound
 177 | Saluki
 178 | Scottish deerhound
 179 | Weimaraner
 180 | Staffordshire bullterrier
 181 | American Staffordshire terrier
 182 | Bedlington terrier
 183 | Border terrier
 184 | Kerry blue terrier
 185 | Irish terrier
 186 | Norfolk terrier
 187 | Norwich terrier
 188 | Yorkshire terrier
 189 | wire-haired fox terrier
 190 | Lakeland terrier
 191 | Sealyham terrier
 192 | Airedale
 193 | cairn
 194 | Australian terrier
 195 | Dandie Dinmont
 196 | Boston bull
 197 | miniature schnauzer
 198 | giant schnauzer
 199 | standard schnauzer
 200 | Scotch terrier
 201 | Tibetan terrier
 202 | silky terrier
 203 | soft-coated wheaten terrier
 204 | West Highland white terrier
 205 | Lhasa
 206 | flat-coated retriever
 207 | curly-coated retriever
 208 | golden retriever
 209 | Labrador retriever
 210 | Chesapeake Bay retriever
 211 | German short-haired pointer
 212 | vizsla
 213 | English setter
 214 | Irish setter
 215 | Gordon setter
 216 | Brittany spaniel
 217 | clumber
 218 | English springer
 219 | Welsh springer spaniel
 220 | cocker spaniel
 221 | Sussex spaniel
 222 | Irish water spaniel
 223 | kuvasz
 224 | schipperke
 225 | groenendael
 226 | malinois
 227 | briard
 228 | kelpie
 229 | komondor
 230 | Old English sheepdog
 231 | Shetland sheepdog
 232 | collie
 233 | Border collie
 234 | Bouvier des Flandres
 235 | Rottweiler
 236 | German shepherd
 237 | Doberman
 238 | miniature pinscher
 239 | Greater Swiss Mountain dog
 240 | Bernese mountain dog
 241 | Appenzeller
 242 | EntleBucher
 243 | boxer
 244 | bull mastiff
 245 | Tibetan mastiff
 246 | French bulldog
 247 | Great Dane
 248 | Saint Bernard
 249 | Eskimo dog
 250 | malamute
 251 | Siberian husky
 252 | dalmatian
 253 | affenpinscher
 254 | basenji
 255 | pug
 256 | Leonberg
 257 | Newfoundland
 258 | Great Pyrenees
 259 | Samoyed
 260 | Pomeranian
 261 | chow
 262 | keeshond
 263 | Brabancon griffon
 264 | Pembroke
 265 | Cardigan
 266 | toy poodle
 267 | miniature poodle
 268 | standard poodle
 269 | Mexican hairless
 270 | timber wolf
 271 | white wolf
 272 | red wolf
 273 | coyote
 274 | dingo
 275 | dhole
 276 | African hunting dog
 277 | hyena
 278 | red fox
 279 | kit fox
 280 | Arctic fox
 281 | grey fox
 282 | tabby
 283 | tiger cat
 284 | Persian cat
 285 | Siamese cat
 286 | Egyptian cat
 287 | cougar
 288 | lynx
 289 | leopard
 290 | snow leopard
 291 | jaguar
 292 | lion
 293 | tiger
 294 | cheetah
 295 | brown bear
 296 | American black bear
 297 | ice bear
 298 | sloth bear
 299 | mongoose
 300 | meerkat
 301 | tiger beetle
 302 | ladybug
 303 | ground beetle
 304 | long-horned beetle
 305 | leaf beetle
 306 | dung beetle
 307 | rhinoceros beetle
 308 | weevil
 309 | fly
 310 | bee
 311 | ant
 312 | grasshopper
 313 | cricket
 314 | walking stick
 315 | cockroach
 316 | mantis
 317 | cicada
 318 | leafhopper
 319 | lacewing
 320 | dragonfly
 321 | damselfly
 322 | admiral
 323 | ringlet
 324 | monarch
 325 | cabbage butterfly
 326 | sulphur butterfly
 327 | lycaenid
 328 | starfish
 329 | sea urchin
 330 | sea cucumber
 331 | wood rabbit
 332 | hare
 333 | Angora
 334 | hamster
 335 | porcupine
 336 | fox squirrel
 337 | marmot
 338 | beaver
 339 | guinea pig
 340 | sorrel
 341 | zebra
 342 | hog
 343 | wild boar
 344 | warthog
 345 | hippopotamus
 346 | ox
 347 | water buffalo
 348 | bison
 349 | ram
 350 | bighorn
 351 | ibex
 352 | hartebeest
 353 | impala
 354 | gazelle
 355 | Arabian camel
 356 | llama
 357 | weasel
 358 | mink
 359 | polecat
 360 | black-footed ferret
 361 | otter
 362 | skunk
 363 | badger
 364 | armadillo
 365 | three-toed sloth
 366 | orangutan
 367 | gorilla
 368 | chimpanzee
 369 | gibbon
 370 | siamang
 371 | guenon
 372 | patas
 373 | baboon
 374 | macaque
 375 | langur
 376 | colobus
 377 | proboscis monkey
 378 | marmoset
 379 | capuchin
 380 | howler monkey
 381 | titi
 382 | spider monkey
 383 | squirrel monkey
 384 | Madagascar cat
 385 | indri
 386 | Indian elephant
 387 | African elephant
 388 | lesser panda
 389 | giant panda
 390 | barracouta
 391 | eel
 392 | coho
 393 | rock beauty
 394 | anemone fish
 395 | sturgeon
 396 | gar
 397 | lionfish
 398 | puffer
 399 | abacus
 400 | abaya
 401 | academic gown
 402 | accordion
 403 | acoustic guitar
 404 | aircraft carrier
 405 | airliner
 406 | airship
 407 | altar
 408 | ambulance
 409 | amphibian
 410 | analog clock
 411 | apiary
 412 | apron
 413 | ashcan
 414 | assault rifle
 415 | backpack
 416 | bakery
 417 | balance beam
 418 | balloon
 419 | ballpoint
 420 | Band Aid
 421 | banjo
 422 | bannister
 423 | barbell
 424 | barber chair
 425 | barbershop
 426 | barn
 427 | barometer
 428 | barrel
 429 | barrow
 430 | baseball
 431 | basketball
 432 | bassinet
 433 | bassoon
 434 | bathing cap
 435 | bath towel
 436 | bathtub
 437 | beach wagon
 438 | beacon
 439 | beaker
 440 | bearskin
 441 | beer bottle
 442 | beer glass
 443 | bell cote
 444 | bib
 445 | bicycle-built-for-two
 446 | bikini
 447 | binder
 448 | binoculars
 449 | birdhouse
 450 | boathouse
 451 | bobsled
 452 | bolo tie
 453 | bonnet
 454 | bookcase
 455 | bookshop
 456 | bottlecap
 457 | bow
 458 | bow tie
 459 | brass
 460 | brassiere
 461 | breakwater
 462 | breastplate
 463 | broom
 464 | bucket
 465 | buckle
 466 | bulletproof vest
 467 | bullet train
 468 | butcher shop
 469 | cab
 470 | caldron
 471 | candle
 472 | cannon
 473 | canoe
 474 | can opener
 475 | cardigan
 476 | car mirror
 477 | carousel
 478 | carpenter's kit
 479 | carton
 480 | car wheel
 481 | cash machine
 482 | cassette
 483 | cassette player
 484 | castle
 485 | catamaran
 486 | CD player
 487 | cello
 488 | cellular telephone
 489 | chain
 490 | chainlink fence
 491 | chain mail
 492 | chain saw
 493 | chest
 494 | chiffonier
 495 | chime
 496 | china cabinet
 497 | Christmas stocking
 498 | church
 499 | cinema
 500 | cleaver
 501 | cliff dwelling
 502 | cloak
 503 | clog
 504 | cocktail shaker
 505 | coffee mug
 506 | coffeepot
 507 | coil
 508 | combination lock
 509 | computer keyboard
 510 | confectionery
 511 | container ship
 512 | convertible
 513 | corkscrew
 514 | cornet
 515 | cowboy boot
 516 | cowboy hat
 517 | cradle
 518 | crane
 519 | crash helmet
 520 | crate
 521 | crib
 522 | Crock Pot
 523 | croquet ball
 524 | crutch
 525 | cuirass
 526 | dam
 527 | desk
 528 | desktop computer
 529 | dial telephone
 530 | diaper
 531 | digital clock
 532 | digital watch
 533 | dining table
 534 | dishrag
 535 | dishwasher
 536 | disk brake
 537 | dock
 538 | dogsled
 539 | dome
 540 | doormat
 541 | drilling platform
 542 | drum
 543 | drumstick
 544 | dumbbell
 545 | Dutch oven
 546 | electric fan
 547 | electric guitar
 548 | electric locomotive
 549 | entertainment center
 550 | envelope
 551 | espresso maker
 552 | face powder
 553 | feather boa
 554 | file
 555 | fireboat
 556 | fire engine
 557 | fire screen
 558 | flagpole
 559 | flute
 560 | folding chair
 561 | football helmet
 562 | forklift
 563 | fountain
 564 | fountain pen
 565 | four-poster
 566 | freight car
 567 | French horn
 568 | frying pan
 569 | fur coat
 570 | garbage truck
 571 | gasmask
 572 | gas pump
 573 | goblet
 574 | go-kart
 575 | golf ball
 576 | golfcart
 577 | gondola
 578 | gong
 579 | gown
 580 | grand piano
 581 | greenhouse
 582 | grille
 583 | grocery store
 584 | guillotine
 585 | hair slide
 586 | hair spray
 587 | half track
 588 | hammer
 589 | hamper
 590 | hand blower
 591 | hand-held computer
 592 | handkerchief
 593 | hard disc
 594 | harmonica
 595 | harp
 596 | harvester
 597 | hatchet
 598 | holster
 599 | home theater
 600 | honeycomb
 601 | hook
 602 | hoopskirt
 603 | horizontal bar
 604 | horse cart
 605 | hourglass
 606 | iPod
 607 | iron
 608 | jack-o'-lantern
 609 | jean
 610 | jeep
 611 | jersey
 612 | jigsaw puzzle
 613 | jinrikisha
 614 | joystick
 615 | kimono
 616 | knee pad
 617 | knot
 618 | lab coat
 619 | ladle
 620 | lampshade
 621 | laptop
 622 | lawn mower
 623 | lens cap
 624 | letter opener
 625 | library
 626 | lifeboat
 627 | lighter
 628 | limousine
 629 | liner
 630 | lipstick
 631 | Loafer
 632 | lotion
 633 | loudspeaker
 634 | loupe
 635 | lumbermill
 636 | magnetic compass
 637 | mailbag
 638 | mailbox
 639 | maillot
 640 | maillot
 641 | manhole cover
 642 | maraca
 643 | marimba
 644 | mask
 645 | matchstick
 646 | maypole
 647 | maze
 648 | measuring cup
 649 | medicine chest
 650 | megalith
 651 | microphone
 652 | microwave
 653 | military uniform
 654 | milk can
 655 | minibus
 656 | miniskirt
 657 | minivan
 658 | missile
 659 | mitten
 660 | mixing bowl
 661 | mobile home
 662 | Model T
 663 | modem
 664 | monastery
 665 | monitor
 666 | moped
 667 | mortar
 668 | mortarboard
 669 | mosque
 670 | mosquito net
 671 | motor scooter
 672 | mountain bike
 673 | mountain tent
 674 | mouse
 675 | mousetrap
 676 | moving van
 677 | muzzle
 678 | nail
 679 | neck brace
 680 | necklace
 681 | nipple
 682 | notebook
 683 | obelisk
 684 | oboe
 685 | ocarina
 686 | odometer
 687 | oil filter
 688 | organ
 689 | oscilloscope
 690 | overskirt
 691 | oxcart
 692 | oxygen mask
 693 | packet
 694 | paddle
 695 | paddlewheel
 696 | padlock
 697 | paintbrush
 698 | pajama
 699 | palace
 700 | panpipe
 701 | paper towel
 702 | parachute
 703 | parallel bars
 704 | park bench
 705 | parking meter
 706 | passenger car
 707 | patio
 708 | pay-phone
 709 | pedestal
 710 | pencil box
 711 | pencil sharpener
 712 | perfume
 713 | Petri dish
 714 | photocopier
 715 | pick
 716 | pickelhaube
 717 | picket fence
 718 | pickup
 719 | pier
 720 | piggy bank
 721 | pill bottle
 722 | pillow
 723 | ping-pong ball
 724 | pinwheel
 725 | pirate
 726 | pitcher
 727 | plane
 728 | planetarium
 729 | plastic bag
 730 | plate rack
 731 | plow
 732 | plunger
 733 | Polaroid camera
 734 | pole
 735 | police van
 736 | poncho
 737 | pool table
 738 | pop bottle
 739 | pot
 740 | potter's wheel
 741 | power drill
 742 | prayer rug
 743 | printer
 744 | prison
 745 | projectile
 746 | projector
 747 | puck
 748 | punching bag
 749 | purse
 750 | quill
 751 | quilt
 752 | racer
 753 | racket
 754 | radiator
 755 | radio
 756 | radio telescope
 757 | rain barrel
 758 | recreational vehicle
 759 | reel
 760 | reflex camera
 761 | refrigerator
 762 | remote control
 763 | restaurant
 764 | revolver
 765 | rifle
 766 | rocking chair
 767 | rotisserie
 768 | rubber eraser
 769 | rugby ball
 770 | rule
 771 | running shoe
 772 | safe
 773 | safety pin
 774 | saltshaker
 775 | sandal
 776 | sarong
 777 | sax
 778 | scabbard
 779 | scale
 780 | school bus
 781 | schooner
 782 | scoreboard
 783 | screen
 784 | screw
 785 | screwdriver
 786 | seat belt
 787 | sewing machine
 788 | shield
 789 | shoe shop
 790 | shoji
 791 | shopping basket
 792 | shopping cart
 793 | shovel
 794 | shower cap
 795 | shower curtain
 796 | ski
 797 | ski mask
 798 | sleeping bag
 799 | slide rule
 800 | sliding door
 801 | slot
 802 | snorkel
 803 | snowmobile
 804 | snowplow
 805 | soap dispenser
 806 | soccer ball
 807 | sock
 808 | solar dish
 809 | sombrero
 810 | soup bowl
 811 | space bar
 812 | space heater
 813 | space shuttle
 814 | spatula
 815 | speedboat
 816 | spider web
 817 | spindle
 818 | sports car
 819 | spotlight
 820 | stage
 821 | steam locomotive
 822 | steel arch bridge
 823 | steel drum
 824 | stethoscope
 825 | stole
 826 | stone wall
 827 | stopwatch
 828 | stove
 829 | strainer
 830 | streetcar
 831 | stretcher
 832 | studio couch
 833 | stupa
 834 | submarine
 835 | suit
 836 | sundial
 837 | sunglass
 838 | sunglasses
 839 | sunscreen
 840 | suspension bridge
 841 | swab
 842 | sweatshirt
 843 | swimming trunks
 844 | swing
 845 | switch
 846 | syringe
 847 | table lamp
 848 | tank
 849 | tape player
 850 | teapot
 851 | teddy
 852 | television
 853 | tennis ball
 854 | thatch
 855 | theater curtain
 856 | thimble
 857 | thresher
 858 | throne
 859 | tile roof
 860 | toaster
 861 | tobacco shop
 862 | toilet seat
 863 | torch
 864 | totem pole
 865 | tow truck
 866 | toyshop
 867 | tractor
 868 | trailer truck
 869 | tray
 870 | trench coat
 871 | tricycle
 872 | trimaran
 873 | tripod
 874 | triumphal arch
 875 | trolleybus
 876 | trombone
 877 | tub
 878 | turnstile
 879 | typewriter keyboard
 880 | umbrella
 881 | unicycle
 882 | upright
 883 | vacuum
 884 | vase
 885 | vault
 886 | velvet
 887 | vending machine
 888 | vestment
 889 | viaduct
 890 | violin
 891 | volleyball
 892 | waffle iron
 893 | wall clock
 894 | wallet
 895 | wardrobe
 896 | warplane
 897 | washbasin
 898 | washer
 899 | water bottle
 900 | water jug
 901 | water tower
 902 | whiskey jug
 903 | whistle
 904 | wig
 905 | window screen
 906 | window shade
 907 | Windsor tie
 908 | wine bottle
 909 | wing
 910 | wok
 911 | wooden spoon
 912 | wool
 913 | worm fence
 914 | wreck
 915 | yawl
 916 | yurt
 917 | web site
 918 | comic book
 919 | crossword puzzle
 920 | street sign
 921 | traffic light
 922 | book jacket
 923 | menu
 924 | plate
 925 | guacamole
 926 | consomme
 927 | hot pot
 928 | trifle
 929 | ice cream
 930 | ice lolly
 931 | French loaf
 932 | bagel
 933 | pretzel
 934 | cheeseburger
 935 | hotdog
 936 | mashed potato
 937 | head cabbage
 938 | broccoli
 939 | cauliflower
 940 | zucchini
 941 | spaghetti squash
 942 | acorn squash
 943 | butternut squash
 944 | cucumber
 945 | artichoke
 946 | bell pepper
 947 | cardoon
 948 | mushroom
 949 | Granny Smith
 950 | strawberry
 951 | orange
 952 | lemon
 953 | fig
 954 | pineapple
 955 | banana
 956 | jackfruit
 957 | custard apple
 958 | pomegranate
 959 | hay
 960 | carbonara
 961 | chocolate sauce
 962 | dough
 963 | meat loaf
 964 | pizza
 965 | potpie
 966 | burrito
 967 | red wine
 968 | espresso
 969 | cup
 970 | eggnog
 971 | alp
 972 | bubble
 973 | cliff
 974 | coral reef
 975 | geyser
 976 | lakeside
 977 | promontory
 978 | sandbar
 979 | seashore
 980 | valley
 981 | volcano
 982 | ballplayer
 983 | groom
 984 | scuba diver
 985 | rapeseed
 986 | daisy
 987 | yellow lady's slipper
 988 | corn
 989 | acorn
 990 | hip
 991 | buckeye
 992 | coral fungus
 993 | agaric
 994 | gyromitra
 995 | stinkhorn
 996 | earthstar
 997 | hen-of-the-woods
 998 | bolete
 999 | ear
1000 | toilet tissue
1001 | 


--------------------------------------------------------------------------------