├── .gitignore
├── LICENSE.md
├── README.md
├── benchmarks
    ├── JETSON_NANO.md
    └── JETSON_XAVIER.md
├── build.py
├── examples
    ├── .gitignore
    ├── pg_mbv2_to_trt.py
    └── pg_resnet50_to_trt.py
├── log.md
├── notebooks
    ├── image_classification
    │   ├── conversion.ipynb
    │   ├── imagenet_labels.json
    │   └── live_demo.ipynb
    └── image_segmentation
    │   └── conversion.ipynb
├── pilgrim_trt_plugins
    ├── .gitignore
    ├── CMakeLists.txt
    ├── LICENSE
    ├── README.md
    ├── build.sh
    ├── include
    │   ├── amir_cuda_util
    │   │   ├── cuda_util.h
    │   │   └── cudnn_util.h
    │   └── plugin
    │   │   ├── adaptivePoolPlugin
    │   │       └── adaptivePoolPlugin.h
    │   │   ├── amirInferPlugin.h
    │   │   ├── batchedNMSPlugin
    │   │       └── batchedNMSPlugin.h
    │   │   ├── carafeFeatureReassemblePlugin
    │   │       └── carafeFeatureReassemblePlugin.h
    │   │   ├── deformableConvPlugin
    │   │       ├── deformableConvPlugin.h
    │   │       └── modulatedDeformableConvPlugin.h
    │   │   ├── deformablePoolPlugin
    │   │       └── deformablePoolPlugin.h
    │   │   ├── delta2bboxPlugin
    │   │       └── delta2bboxPlugin.h
    │   │   ├── exViewPlugin
    │   │       └── exViewPlugin.h
    │   │   ├── gridAnchorDynamicPlugin
    │   │       └── gridAnchorDynamicPlugin.h
    │   │   ├── gridSamplePlugin
    │   │       └── gridSamplePlugin.h
    │   │   ├── groupNormPlugin
    │   │       └── groupNormPlugin.h
    │   │   ├── layerNormPlugin
    │   │       └── layerNormPlugin.h
    │   │   ├── meshGridPlugin
    │   │       └── meshGridPlugin.h
    │   │   ├── repeatDimsPlugin
    │   │       └── repeatDimsPlugin.h
    │   │   ├── roiExtractorPlugin
    │   │       └── roiExtractorPlugin.h
    │   │   ├── roiPoolPlugin
    │   │       └── roiPoolPlugin.h
    │   │   ├── torchCumMaxMinPlugin
    │   │       └── torchCumMaxMinPlugin.h
    │   │   ├── torchCumPlugin
    │   │       └── torchCumPlugin.h
    │   │   ├── torchFlipPlugin
    │   │       └── torchFlipPlugin.h
    │   │   ├── torchGatherPlugin
    │   │       └── torchGatherPlugin.h
    │   │   └── torchNMSPlugin
    │   │       └── torchNMSPlugin.h
    └── src
    │   ├── CMakeLists.txt
    │   ├── amir_cuda_util
    │       ├── CMakeLists.txt
    │       ├── memcpyPermute.cu
    │       ├── reduceUtils.cuh
    │       ├── repeatDims.cu
    │       └── tensorMeanVar.cu
    │   └── plugin
    │       ├── CMakeLists.txt
    │       ├── InferPlugin.cpp
    │       ├── NvDsInferParseMmdet.cpp
    │       ├── adaptivePoolPlugin
    │           ├── CMakeLists.txt
    │           ├── adaptivePoolPlugin.cpp
    │           ├── adaptive_pool.cu
    │           └── adaptive_pool.h
    │       ├── batchedNMSPlugin
    │           ├── CMakeLists.txt
    │           ├── allClassNMS.cu
    │           ├── batchedNMSInference.cpp
    │           ├── batchedNMSInference.h
    │           ├── batchedNMSPlugin.cpp
    │           ├── batchedNMSPlugin.h
    │           ├── bboxUtils.h
    │           ├── common.cu
    │           ├── common.h
    │           ├── cub_helper.h
    │           ├── gatherNMSOutputs.cu
    │           ├── gatherNMSOutputs.h
    │           ├── kernel.cpp
    │           ├── kernel.h
    │           ├── nmsHelper.cpp
    │           ├── nmsUtils.h
    │           ├── permuteData.cu
    │           ├── plugin.h
    │           ├── sortScoresPerClass.cu
    │           └── sortScoresPerImage.cu
    │       ├── carafeFeatureReassemblePlugin
    │           ├── CMakeLists.txt
    │           ├── carafeFeatureReassemblePlugin.cpp
    │           ├── carafe_cuda.h
    │           └── carafe_cuda_kernel.cu
    │       ├── common
    │           ├── amirCommon.h
    │           ├── common.h
    │           ├── logger.cpp
    │           ├── logger.h
    │           ├── logging.h
    │           └── serialize.hpp
    │       ├── deformableConvPlugin
    │           ├── CMakeLists.txt
    │           ├── deform_conv_cuda.cpp
    │           ├── deform_conv_cuda.h
    │           ├── deform_conv_cuda_kernel.cu
    │           ├── deformableConvPlugin.cpp
    │           └── modulatedDeformableConvPlugin.cpp
    │       ├── deformablePoolPlugin
    │           ├── CMakeLists.txt
    │           ├── deform_roi_pool.cu
    │           ├── deform_roi_pool.h
    │           ├── deform_roi_pool_cuda_kernel.cuh
    │           └── deformablePoolPlugin.cpp
    │       ├── delta2bboxPlugin
    │           ├── CMakeLists.txt
    │           ├── delta2bbox.cu
    │           ├── delta2bbox.h
    │           └── delta2bboxPlugin.cpp
    │       ├── exViewPlugin
    │           ├── CMakeLists.txt
    │           ├── exViewPlugin.cpp
    │           ├── expressionParser.cpp
    │           └── expressionParser.h
    │       ├── gridAnchorDynamicPlugin
    │           ├── CMakeLists.txt
    │           ├── gridAnchorDynamicPlugin.cpp
    │           ├── grid_anchor_dynamic.cu
    │           └── grid_anchor_dynamic.h
    │       ├── gridSamplePlugin
    │           ├── CMakeLists.txt
    │           ├── gridSamplePlugin.cpp
    │           ├── grid_sample.cu
    │           └── grid_sample.h
    │       ├── groupNormPlugin
    │           ├── CMakeLists.txt
    │           ├── groupNormPlugin.cpp
    │           ├── group_norm.h
    │           └── group_norm_kernel.cu
    │       ├── layerNormPlugin
    │           ├── CMakeLists.txt
    │           ├── layerNormPlugin.cpp
    │           ├── layer_norm.h
    │           └── layer_norm_kernel.cu
    │       ├── meshGridPlugin
    │           ├── CMakeLists.txt
    │           ├── meshGridPlugin.cpp
    │           ├── mesh_grid.cu
    │           └── mesh_grid.h
    │       ├── repeatDimsPlugin
    │           ├── CMakeLists.txt
    │           └── repeatDimsPlugin.cpp
    │       ├── roiExtractorPlugin
    │           ├── CMakeLists.txt
    │           ├── roiExtractorPlugin.cpp
    │           ├── roi_extractor.h
    │           └── roi_extractor_kernel.cu
    │       ├── roiPoolPlugin
    │           ├── CMakeLists.txt
    │           ├── roiPoolPlugin.cpp
    │           ├── roi_pool.h
    │           └── roi_pool_kernel.cu
    │       ├── torchCumMaxMinPlugin
    │           ├── CMakeLists.txt
    │           ├── torchCumMaxMinPlugin.cpp
    │           ├── torch_cum_maxmin.cu
    │           └── torch_cum_maxmin.h
    │       ├── torchCumPlugin
    │           ├── CMakeLists.txt
    │           ├── torchCumPlugin.cpp
    │           ├── torch_cum.cu
    │           └── torch_cum.h
    │       ├── torchFlipPlugin
    │           ├── CMakeLists.txt
    │           ├── torchFlipPlugin.cpp
    │           ├── torch_flip.cu
    │           └── torch_flip.h
    │       ├── torchGatherPlugin
    │           ├── CMakeLists.txt
    │           ├── torchGatherPlugin.cpp
    │           ├── torch_gather.cu
    │           └── torch_gather.h
    │       └── torchNMSPlugin
    │           ├── CMakeLists.txt
    │           ├── bboxUtils.h
    │           ├── torchNMSPlugin.cpp
    │           ├── torch_nms.cu
    │           └── torch_nms.h
├── setup.py
├── test.sh
├── torch2trt_dynamic.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    └── top_level.txt
└── torch2trt_dynamic
    ├── __init__.py
    ├── calibration.py
    ├── converters
        ├── AdaptiveAvgPool2d.py
        ├── AdaptiveMaxPool2d.py
        ├── BatchNorm1d.py
        ├── BatchNorm2d.py
        ├── Conv1d.py
        ├── Conv2d.py
        ├── ConvTranspose1d.py
        ├── ConvTranspose2d.py
        ├── GroupNorm.py
        ├── Identity.py
        ├── LayerNorm.py
        ├── Linear.py
        ├── LogSoftmax.py
        ├── ReLU.py
        ├── ReLU6.py
        ├── __init__.py
        ├── activation.py
        ├── adaptive_avg_pool2d.py
        ├── adaptive_max_pool2d.py
        ├── add.py
        ├── addcmul.py
        ├── arange.py
        ├── argmax.py
        ├── argmin.py
        ├── avg_pool2d.py
        ├── cast_type.py
        ├── cat.py
        ├── chunk.py
        ├── clamp.py
        ├── conv2d.py
        ├── cummax.py
        ├── cummin.py
        ├── cumprod.py
        ├── cumsum.py
        ├── deform_conv2d.py
        ├── div.py
        ├── dummy_converters.py
        ├── expand.py
        ├── expand_as.py
        ├── exview.py
        ├── flatten.py
        ├── flip.py
        ├── floor_divide.py
        ├── full_like.py
        ├── gather.py
        ├── getitem.py
        ├── grid_sample.py
        ├── identity.py
        ├── index_select.py
        ├── instance_norm.py
        ├── interpolate
        │   ├── __init__.py
        │   ├── interpolate.cpp
        │   ├── interpolate.proto
        │   └── interpolate.py
        ├── interpolate_custom.py
        ├── linear.py
        ├── linspace.py
        ├── logical.py
        ├── masked_fill.py
        ├── matmul.py
        ├── max.py
        ├── max_pool2d.py
        ├── mean.py
        ├── meshgrid.py
        ├── min.py
        ├── mod.py
        ├── mul.py
        ├── narrow.py
        ├── new_ones.py
        ├── new_zeros.py
        ├── nms.py
        ├── normalize.py
        ├── ones.py
        ├── ones_like.py
        ├── pad.py
        ├── permute.py
        ├── pixel_shuffle.py
        ├── pow.py
        ├── prelu.py
        ├── prod.py
        ├── relu.py
        ├── relu6.py
        ├── repeat.py
        ├── roi_align.py
        ├── roi_pool.py
        ├── sigmoid.py
        ├── size.py
        ├── softmax.py
        ├── split.py
        ├── squeeze.py
        ├── stack.py
        ├── std.py
        ├── sub.py
        ├── sum.py
        ├── t.py
        ├── tanh.py
        ├── to.py
        ├── topk.py
        ├── transpose.py
        ├── unary.py
        ├── unsqueeze.py
        ├── view.py
        ├── view_as.py
        ├── zeros.py
        └── zeros_like.py
    ├── module_test.py
    ├── plugins
        ├── __init__.py
        ├── create_adaptivepool_plugin.py
        ├── create_dcn_plugin.py
        ├── create_exview_plugin.py
        ├── create_gridsample_plugin.py
        ├── create_groupnorm_plugin.py
        ├── create_layernorm_plugin.py
        ├── create_meshgrid_plugin.py
        ├── create_nms_plugin.py
        ├── create_repeatdim_plugin.py
        ├── create_roiextractor_plugin.py
        ├── create_roipool_plugin.py
        ├── create_torchcum_plugin.py
        ├── create_torchcummaxmin_plugin.py
        ├── create_torchflip_plugin.py
        ├── create_torchgather_plugin.py
        └── globals.py
    ├── shape_converter.py
    ├── test.py
    ├── tests
        ├── __init__.py
        └── torchvision
        │   ├── __init__.py
        │   ├── classification.py
        │   ├── save_load.py
        │   └── segmentation.py
    ├── torch2trt_dynamic.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .ninja_deps
 2 | .ninja_log
 3 | build.ninja
 4 | tags
 5 | *.o
 6 | *.pb.o
 7 | torch2trt.egg-info
 8 | build/
 9 | dist/
10 | __pycache__/
11 | *.so
12 | *.pb.h
13 | *.pb.cc
14 | *_pb2.py
15 | *.pyc
16 | *.ipynb_checkpoints
17 | *.pth
18 | torch2trt_dynamic.egg-info/
19 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pilgrim Project
 2 | 
 3 | This project is forked from torch2trt and torch2trt_dynamic, the aiming of this project is provide a way directly convert pytorch models to TensorRT engine.
 4 | 
 5 | This can be fasten your project if your model was built upon pytorch, we have tested several models all works fine and we will add more test and plugins to support model complicated models.
 6 | 
 7 | The reason why we don't want onnx way is that:
 8 | 
 9 | - onnx is another middle-ware not very necessary;
10 | - it's not easy to maintain an onnx-plugin in both converter of onnx it-self as well as converter which is onnx2trt.
11 | 
12 | ## Usage
13 | 
14 | pilgrim is in early stage, the target model on our list are:
15 | 
16 | - [x] mobielentv3;
17 | - [x] resnet50;
18 | - [ ] yolov3;
19 | - [ ] yolov5;
20 | - [ ] MaskRCNN
21 | - [ ] more...
22 | 
23 | You can check models under examples folder. For install it, simply:
24 | 
25 | ```
26 | sudo python3 setup.py build develop
27 | ```
28 | 
29 | For Highly complicated model, such as FasterRCNN, MaskRCNN, YoloV5, Centernet-DCN, you gonna need build plugins for support:
30 | 
31 | ```
32 | cd pilgrim_trt_plugins
33 | ./build.sh
34 | ```
35 | 
36 | the plugins will update every frequently, so pls make sure your repo is up to date.
37 | 
38 | 
39 | 
40 | ## TODO
41 | 
42 | - [ ] Try converting FasterRCNN model to TensorRT with pilgrim tool;
43 | - [ ] Try converting YoloV5 model to tensorrt with pilgrim tool;
44 | - [ ] Try converting CenterNet-DCN to tensorrt with pilgrim tool (this will invoke DCN plugin directly mapping pytorch plugin to TensorRT plugin without any ONNX dependencies);
45 | 
46 | ## Copyright
47 | 
48 | Copyright belongs to NVIDIA and all related authors.
49 | 


--------------------------------------------------------------------------------
/benchmarks/JETSON_NANO.md:
--------------------------------------------------------------------------------
 1 | | Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) |
 2 | |------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------|
 3 | | torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 46.4 | 69.9 | 22.1 | 14.7 |
 4 | | torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.20E-02 | 44 | 137 | 24.2 | 7.6 |
 5 | | torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 76.6 | 248 | 14 | 4.34 |
 6 | | torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 29.4 | 90.2 | 34.7 | 11.4 |
 7 | | torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 50.7 | 64.8 | 20.2 |
 8 | | torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.45E-02 | 12.4 | 34.2 | 81.7 | 29.8 |
 9 | | torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.01E+03 | 7.18 | 19.9 | 141 | 51.1 |
10 | | torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.96 | 14.1 | 204 | 72.3 |
11 | | torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 11.5 | 41.9 | 84.5 | 24.8 |
12 | | torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 8.25 | 33.2 | 118 | 31.2 |
13 | | torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 6.84 | 25.4 | 141 | 40.8 |
14 | | torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.15E-03 | 4.71 | 15.6 | 247 | 65.8 |
15 | | torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.51E-04 | 8.9 | 18.3 | 114 | 55.1 |
16 | | torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.07E-04 | 6.53 | 14.7 | 156 | 68.7 |
17 | | torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.58E-04 | 5.09 | 11.9 | 201 | 85.1 |
18 | | torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.74 | 18.4 | 117 | 54.8 |
19 | | torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.19E-04 | 6.31 | 14.8 | 162 | 68.5 |
20 | | torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 4.96 | 12 | 207 | 84.3 |
21 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
 1 | import imp
 2 | import subprocess
 3 | import os
 4 | from string import Template
 5 | 
 6 | PLUGINS = [
 7 |     'interpolate',
 8 | ]
 9 | 
10 | BASE_FOLDER = 'torch2trt_dynamic/converters'
11 | 
12 | NINJA_TEMPLATE = Template((
13 |     "rule link\n"
14 |     "  command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer\n"
15 |     "rule protoc\n"
16 |     "  command = protoc $$in --cpp_out=. --python_out=.\n"
17 |     "rule cxx\n"
18 |     "  command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir\n"
19 | ))
20 | 
21 | PLUGIN_TEMPLATE = Template((
22 |     "build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto\n"
23 |     "build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc\n"
24 |     "build $plugin.o: cxx $plugin_dir/$plugin.cpp\n"
25 | ))
26 | 
27 | 
28 | def build(cuda_dir="/usr/local/cuda",
29 |           torch_dir=imp.find_module('torch')[1],
30 |           trt_inc_dir="/usr/include/aarch64-linux-gnu",
31 |           trt_lib_dir="/usr/lib/aarch64-linux-gnu"):
32 | 
33 |     global PLUGINS, BASE_FOLDER, NINJA_TEMPLATE, PLUGIN_TEMPLATE
34 | 
35 |     NINJA_STR = NINJA_TEMPLATE.substitute({
36 |         'torch_dir': torch_dir,
37 |         'cuda_dir': cuda_dir,
38 |         'trt_inc_dir': trt_inc_dir,
39 |         'trt_lib_dir': trt_lib_dir,
40 |     })
41 | 
42 | 
43 |     plugin_o_files = []
44 |     for plugin in PLUGINS:
45 |         NINJA_STR += \
46 |             PLUGIN_TEMPLATE.substitute({
47 |                 'plugin': plugin,
48 |                 'plugin_dir': os.path.join(BASE_FOLDER, plugin),
49 |             })
50 |         plugin_o_files += [plugin + '.pb.o', plugin + '.o']
51 | 
52 |     NINJA_STR += Template((
53 |         "build torch2trt_dynamic/libtorch2trt_dynamic.so: link $o_files\n"
54 |     )).substitute({'o_files': ' '.join(plugin_o_files)})
55 | 
56 |     with open('build.ninja', 'w') as f:
57 |         f.write(NINJA_STR)
58 | 
59 |     subprocess.call(['ninja'])
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     build()
64 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | *.trt
2 | 


--------------------------------------------------------------------------------
/examples/pg_mbv2_to_trt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | this using Pilgrim convert MobileNetV3 to TensorRT engine
 4 | 
 5 | """
 6 | from torch2trt_dynamic.torch2trt import torch2trt
 7 | import torch
 8 | from torch import nn
 9 | from torchvision.models.resnet import resnet50
10 | from torchvision.models.mobilenet import mobilenet_v2
11 | 
12 | # create some regular pytorch model...
13 | model = mobilenet_v2().cuda().eval()
14 | 
15 | # create example data
16 | x = torch.ones((1, 3, 224, 224)).cuda()
17 | 
18 | # convert to TensorRT feeding sample data as input
19 | opt_shape_param = [
20 |     [
21 |         [1, 3, 128, 128],   # min
22 |         [1, 3, 256, 256],   # opt
23 |         [1, 3, 512, 512]    # max
24 |     ]
25 | ]
26 | model_trt = torch2trt(model, [x], fp16_mode=False)
27 | 
28 | print('serialize engine...')
29 | engine_path = 'mbv2.trt'
30 | with open(engine_path, "wb") as f:
31 |     f.write(model_trt.engine.serialize())
32 | 
33 | print('Done.')


--------------------------------------------------------------------------------
/examples/pg_resnet50_to_trt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | this using Pilgrim convert MobileNetV3 to TensorRT engine
 4 | 
 5 | """
 6 | from torch2trt_dynamic.torch2trt import torch2trt
 7 | import torch
 8 | from torch import nn
 9 | from torchvision.models.resnet import resnet50
10 | from torchvision.models.mobilenet import mobilenet_v2
11 | 
12 | # create some regular pytorch model...
13 | model = resnet50().cuda().eval()
14 | 
15 | # create example data
16 | x = torch.ones((1, 3, 224, 224)).cuda()
17 | 
18 | # convert to TensorRT feeding sample data as input
19 | opt_shape_param = [
20 |     [
21 |         [1, 3, 128, 128],   # min
22 |         [1, 3, 256, 256],   # opt
23 |         [1, 3, 512, 512]    # max
24 |     ]
25 | ]
26 | model_trt = torch2trt(model, [x], fp16_mode=False, opt_shape_param=opt_shape_param)
27 | 
28 | print('serialize engine...')
29 | engine_path = 'resnet50.trt'
30 | with open(engine_path, "wb") as f:
31 |     f.write(model_trt.engine.serialize())
32 | 
33 | print('Done.')


--------------------------------------------------------------------------------
/log.md:
--------------------------------------------------------------------------------
1 | - **2020.09.29**:
2 | 
3 |   Add example convert resnet to tensorrt engine using Pilgrim. However, it's still needs to simplify the way to found those plugins.
4 | 
5 |   But it actually works! So that, can we see how to make it also work some other models for deployment such as DCN etc.


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/.gitignore:
--------------------------------------------------------------------------------
1 | /build/
2 | 
3 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 q.yao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/README.md:
--------------------------------------------------------------------------------
 1 | # Amirstan_plugin
 2 | 
 3 | Amirstan plugin contain some useful tensorrt plugin.
 4 | These plugins are used to support some other project such as 
 5 | 
 6 | https://github.com/grimoire/torch2trt_dynamic 
 7 | 
 8 | https://github.com/grimoire/mmdetection-to-tensorrt
 9 | 
10 | 
11 | ## Requirement
12 | 
13 | - Tensorrt >= 7.0.0.11
14 | - cub >= 1.8.0
15 | 
16 | ## Installation
17 | 
18 | - Install cub: https://nvlabs.github.io/cub/
19 | - Install tensorrt7: https://developer.nvidia.com/tensorrt
20 | 
21 | ```shell
22 | git clone https://github.com/grimoire/amirstan_plugin.git
23 | cd amirstan_plugin
24 | mkdir build
25 | cd build
26 | cmake -DCUB_ROOT_DIR=<path_to_cub> -DTENSORRT_DIR=<path_to_tensorrt> ..
27 | make -j10
28 | ```
29 | 
30 | set the envoirment variable(in ~/.bashrc):
31 | 
32 | ```shell
33 | export AMIRSTAN_LIBRARY_PATH=<amirstan_plugin_root>/build/lib
34 | ```
35 | 
36 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/build.sh:
--------------------------------------------------------------------------------
 1 | mkdir build
 2 | cd build
 3 | cmake -DTENSORRT_DIR=~/TensorRT ..
 4 | make -j7
 5 | cd ..
 6 | P=`pwd`
 7 | 
 8 | echo 'Now pls add this variable to your ~/.bashrc or ~/.zshrc'
 9 | echo "echo 'export PILGRIM_TRT_PLUGINS_LIB=$P/build/lib' >> ~/.zshrc" 
10 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/include/amir_cuda_util/cuda_util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | 
 4 | namespace amirstan
 5 | {
 6 | namespace cuda
 7 | {
 8 | 
 9 | #define CUDA_KERNEL_LOOP(i, n)                                 \
10 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
11 |         i += blockDim.x * gridDim.x)
12 | 
13 | #define cudaCheckError() { \
14 |   cudaError_t e=cudaGetLastError(); \
15 |   if(e!=cudaSuccess) { \
16 |   printf("Cuda failure %s:%d: '%s'\n",__FILE__,__LINE__,cudaGetErrorString(e)); \
17 |   exit(0); \
18 |   } \
19 |   }
20 |   
21 | const int CUDA_NUM_THREADS = 512;
22 | const int CUDA_WARP_SIZE=32;
23 | const int CUDA_NUM_WARP=CUDA_NUM_THREADS/float(CUDA_WARP_SIZE);
24 | const int kMaxGridNum = 65535;
25 | inline int GET_BLOCKS(const int N)
26 | {
27 |   return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
28 | }
29 | 
30 | struct TensorSize{
31 |     int size[8];
32 |     int dims;
33 | };
34 | 
35 | struct TensorStride{
36 |     size_t size[8];
37 |     int dims;
38 | };
39 | 
40 | template <class value_type>
41 | void memcpyPermute(value_type *dst,const value_type *src, int *src_size, int *permute, int src_dim, cudaStream_t stream=0);
42 | 
43 | template <typename T>
44 | void tensorMean(T *dst, T *src, int* src_size, bool *reduce_dims, int dims, cudaStream_t stream=0, void* workspace=nullptr);
45 | 
46 | template <typename T>
47 | void tensorMeanVar(T *mean_dst, T* var_dst,const T *src, int* src_size, bool *reduce_dims, int dims, cudaStream_t stream=0, void* workspace=nullptr);
48 | 
49 | template<typename T>
50 | void repeat_dims(T* dst, const T* src,const int *input_size, const int *repeatDims, int dims, cudaStream_t stream=0);
51 | } // namespace cuda
52 | 
53 | } // namespace amirstan


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/include/amir_cuda_util/cudnn_util.h:
--------------------------------------------------------------------------------
 1 | #include <cudnn.h>
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace cudnn
 6 | {
 7 | template <class T>
 8 | void cudnnBatchNormTrain(cudnnHandle_t handle,
 9 |                          const T &input,
10 |                          int batch_size,
11 |                          int channels,
12 |                          int width,
13 |                          int height,
14 |                          const T &weight, const T &bias,
15 |                          const T &running_mean, const T &running_var,
16 |                          T exponentialAverageFactor,
17 |                          T epsilon,
18 |                          T &result_mean, T &result_var);
19 | }
20 | } // namespace amirstan


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/include/plugin/amirInferPlugin.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | extern "C"
4 | {
5 |     // bool initLibAmirstanInferPlugins(void *logger, const char *libNamespace);
6 |     
7 |     bool initLibAmirstanInferPlugins();
8 | } // extern "C"


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory (plugin)
2 | add_subdirectory (amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/amir_cuda_util/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | #find_package(CUDA REQUIRED)
 4 | enable_language(CUDA)
 5 | 
 6 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
 7 | 
 8 | file(GLOB_RECURSE AMIR_CUDA_UTIL_HEADS *.h *.hpp *.cuh)
 9 | file(GLOB AMIR_CUDA_UTIL_SOURCES *.cpp *.cu)
10 | 
11 | source_group("Include" FILES ${AMIR_CUDA_UTIL_HEADS})
12 | source_group("Source" FILES ${AMIR_CUDA_UTIL_SOURCES})
13 | 
14 | cuda_add_library(amir_cuda_util STATIC ${AMIR_CUDA_UTIL_HEADS} ${AMIR_CUDA_UTIL_SOURCES})
15 | target_link_libraries(amir_cuda_util ${CUDA_LIBRARY})
16 | # target_link_libraries(amir_cuda_util ${CUDA_npp_LIBRARY})


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/amir_cuda_util/memcpyPermute.cu:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <stdio.h>
 3 | #include <iostream>
 4 | 
 5 | #include "amir_cuda_util/cuda_util.h"
 6 | 
 7 | 
 8 | namespace amirstan
 9 | {
10 | namespace cuda
11 | {
12 | 
13 | 
14 | template <class value_type>
15 | __global__ void copy_permute_kernel(value_type *dst, const value_type *src, int n, 
16 |   TensorSize ts_src_stride, TensorSize ts_dst_stride, TensorSize ts_permute, int src_dim)
17 | {
18 |   int* src_stride = &(ts_src_stride.size[0]);
19 |   int* dst_stride = &(ts_dst_stride.size[0]);
20 |   int* permute = &(ts_permute.size[0]);
21 |   CUDA_KERNEL_LOOP(index, n)
22 |   { 
23 |       size_t dst_index = index;
24 |       size_t src_index = 0;
25 |       for (int i = 0; i < src_dim; ++i)
26 |       {
27 |           int dim_index = dst_index / dst_stride[i];
28 |           dst_index = dst_index % dst_stride[i];
29 |           src_index += dim_index * src_stride[permute[i]];
30 |       }
31 |       dst[index] = src[src_index];
32 |   }
33 | }
34 | 
35 | template <class value_type>
36 | void memcpyPermute(value_type *dst,const value_type *src, int *src_size, int *permute, int src_dim, cudaStream_t stream)
37 | {
38 |   size_t copy_size = 1;
39 |   TensorSize ts_permute;
40 |   memcpy(&(ts_permute.size[0]), permute, src_dim *sizeof(int));
41 | 
42 |   TensorSize ts_src_stride;
43 |   TensorSize ts_dst_stride;
44 |   TensorSize ts_dst_size;
45 |   int *src_stride = &(ts_src_stride.size[0]);
46 |   int *dst_stride = &(ts_dst_stride.size[0]);
47 |   int *dst_size = &(ts_dst_size.size[0]);
48 |   src_stride[src_dim - 1] = 1;
49 |   dst_stride[src_dim - 1] = 1;
50 |   
51 |   for (int i = src_dim - 1; i >= 0; --i)
52 |   {
53 |       dst_size[i] = src_size[permute[i]];
54 |       if (i < src_dim - 1)
55 |       {
56 |         src_stride[i] = src_stride[i + 1] * src_size[i + 1];
57 |       }
58 |   }
59 |   
60 |   for (int i = src_dim - 1; i >= 0; --i)
61 |   {
62 |       copy_size *= dst_size[i];
63 |       if (i < src_dim - 1)
64 |       {
65 |         dst_stride[i] = dst_stride[i + 1] * dst_size[i + 1];
66 |       }
67 |   }
68 | 
69 |   copy_permute_kernel<value_type><<<GET_BLOCKS(copy_size), CUDA_NUM_THREADS, 0, stream>>> 
70 |   (dst, src, copy_size, 
71 |     ts_src_stride, ts_dst_stride, ts_permute, src_dim);
72 |   
73 | }
74 | 
75 | template void memcpyPermute<float>(float *dst,const float *src, int *src_size, int *permute, int src_dim, cudaStream_t stream);
76 | 
77 | } // namespace cuda
78 | 
79 | } // namespace amirstan


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_custom_target(plugin)
 2 | set(TARGET_NAME amirstan_plugin)
 3 | set(SHARED_TARGET ${TARGET_NAME})
 4 | set(STATIC_TARGET ${TARGET_NAME}_static)
 5 | 
 6 | enable_language(CUDA)
 7 | set(TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 8 | 
 9 | include_directories(common)
10 | 
11 | set(PLUGIN_LIST deformableConvPlugin
12 |                 groupNormPlugin
13 |                 repeatDimsPlugin
14 |                 exViewPlugin
15 |                 layerNormPlugin
16 |                 gridAnchorDynamicPlugin
17 |                 roiExtractorPlugin
18 |                 delta2bboxPlugin
19 |                 batchedNMSPlugin
20 |                 torchGatherPlugin
21 |                 deformablePoolPlugin
22 |                 adaptivePoolPlugin
23 |                 meshGridPlugin
24 |                 carafeFeatureReassemblePlugin
25 |                 gridSamplePlugin
26 |                 torchFlipPlugin
27 |                 torchCumMaxMinPlugin
28 |                 torchCumPlugin
29 |                 torchNMSPlugin
30 |                 roiPoolPlugin
31 |                 )
32 | 
33 | set(INFER_PLUGIN_SRC InferPlugin.cpp
34 |                     common/logger.cpp)
35 | 
36 | set(INFER_PLUGIN_LIB ${TENSORRT_LIBRARY})
37 | 
38 | foreach(PLUGIN_NAME ${PLUGIN_LIST})
39 | add_subdirectory(${PLUGIN_NAME})
40 | set(INFER_PLUGIN_LIB ${INFER_PLUGIN_LIB} ${PLUGIN_NAME}_static)
41 | endforeach()
42 | 
43 | if (WITH_DEEPSTREAM)                                                                                                        
44 |     MESSAGE(STATUS "Adding NvDsInferParseMmdet")
45 |     set(INFER_PLUGIN_LIB ${INFER_PLUGIN_LIB} ${TENSORRT_LIBRARY_INFER_PLUGIN} )
46 |     set(INFER_PLUGIN_SRC ${INFER_PLUGIN_SRC} NvDsInferParseMmdet.cpp)
47 | endif ()
48 | 
49 | cuda_add_library(${SHARED_TARGET} SHARED ${INFER_PLUGIN_SRC})
50 | target_link_libraries(${SHARED_TARGET} ${INFER_PLUGIN_LIB})
51 | target_include_directories(${SHARED_TARGET} PRIVATE ${PROJECT_SOURCE_DIR}/common)
52 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/InferPlugin.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "plugin/amirInferPlugin.h"
 3 | #include "plugin/deformableConvPlugin/deformableConvPlugin.h"
 4 | #include "plugin/deformableConvPlugin/modulatedDeformableConvPlugin.h"
 5 | #include "plugin/deformablePoolPlugin/deformablePoolPlugin.h"
 6 | #include "plugin/groupNormPlugin/groupNormPlugin.h"
 7 | #include "plugin/repeatDimsPlugin/repeatDimsPlugin.h"
 8 | #include "plugin/exViewPlugin/exViewPlugin.h"
 9 | #include "plugin/layerNormPlugin/layerNormPlugin.h"
10 | #include "plugin/gridAnchorDynamicPlugin/gridAnchorDynamicPlugin.h"
11 | #include "plugin/roiExtractorPlugin/roiExtractorPlugin.h"
12 | #include "plugin/delta2bboxPlugin/delta2bboxPlugin.h"
13 | #include "plugin/batchedNMSPlugin/batchedNMSPlugin.h"
14 | #include "plugin/torchGatherPlugin/torchGatherPlugin.h"
15 | #include "plugin/adaptivePoolPlugin/adaptivePoolPlugin.h"
16 | #include "plugin/meshGridPlugin/meshGridPlugin.h"
17 | #include "plugin/carafeFeatureReassemblePlugin/carafeFeatureReassemblePlugin.h"
18 | #include "plugin/gridSamplePlugin/gridSamplePlugin.h"
19 | #include "plugin/torchFlipPlugin/torchFlipPlugin.h"
20 | #include "plugin/torchCumMaxMinPlugin/torchCumMaxMinPlugin.h"
21 | #include "plugin/torchCumPlugin/torchCumPlugin.h"
22 | #include "plugin/torchNMSPlugin/torchNMSPlugin.h"
23 | #include "plugin/roiPoolPlugin/roiPoolPlugin.h"
24 | 
25 | 
26 | extern "C"
27 | {
28 |     
29 |     bool initLibAmirstanInferPlugins(){
30 |         return true;
31 |     }
32 | } // extern "C"
33 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/adaptivePoolPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME adaptivePoolPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/adaptivePoolPlugin/adaptive_pool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 | enum PoolType{
 9 |     MAX=0,
10 |     AVERAGE=1
11 | };
12 | 
13 | template <typename T>
14 | void adaptive_pool(T *output, const T* input, 
15 |                     int* input_dims, int* output_dims, int nb_dims,
16 |                     int nb_reduce_dims,
17 |                     PoolType pool_type,
18 |                     cudaStream_t stream);
19 | 
20 | }
21 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME batchedNMSPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB PLUGIN_SOURCE *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${PLUGIN_HEADS})
16 | source_group("Source" FILES ${PLUGIN_SOURCE})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${PLUGIN_HEADS} ${PLUGIN_SOURCE})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/batchedNMSInference.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TRT_BATCHED_NMS_INFERENCE_H
17 | #define TRT_BATCHED_NMS_INFERENCE_H
18 | #include "plugin.h"
19 | 
20 | using namespace nvinfer1;
21 | using namespace nvinfer1::plugin;
22 | 
23 | pluginStatus_t nmsInference(cudaStream_t stream, int N, int boxesSize, int scoresSize, bool shareLocation,
24 |     int backgroundLabelId, int numPredsPerClass, int numClasses, int topK, int keepTopK, float scoreThreshold,
25 |     float iouThreshold, DataType DT_BBOX, const void* locData, DataType DT_SCORE, const void* confData, void* keepCount,
26 |     void* nmsedBoxes, void* nmsedScores, void* nmsedClasses, void* workspace, bool isNormalized = true,
27 |     bool confSigmoid = false, bool clipBoxes = true);
28 | #endif
29 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/bboxUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TRT_BBOX_UTILS_H
17 | #define TRT_BBOX_UTILS_H
18 | 
19 | #include "plugin.h"
20 | 
21 | using namespace nvinfer1;
22 | using namespace nvinfer1::plugin;
23 | 
24 | template <typename T>
25 | struct Bbox
26 | {
27 |     T xmin, ymin, xmax, ymax;
28 |     Bbox(T xmin, T ymin, T xmax, T ymax)
29 |         : xmin(xmin)
30 |         , ymin(ymin)
31 |         , xmax(xmax)
32 |         , ymax(ymax)
33 |     {
34 |     }
35 |     Bbox() = default;
36 | };
37 | 
38 | template <typename T>
39 | struct BboxInfo
40 | {
41 |     T conf_score;
42 |     int label;
43 |     int bbox_idx;
44 |     bool kept;
45 |     BboxInfo(T conf_score, int label, int bbox_idx, bool kept)
46 |         : conf_score(conf_score)
47 |         , label(label)
48 |         , bbox_idx(bbox_idx)
49 |         , kept(kept)
50 |     {
51 |     }
52 |     BboxInfo() = default;
53 | };
54 | 
55 | template <typename TFloat>
56 | bool operator<(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
57 | {
58 |     return lhs.x1 < rhs.x1;
59 | }
60 | 
61 | template <typename TFloat>
62 | bool operator==(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
63 | {
64 |     return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2;
65 | }
66 | // }}}
67 | 
68 | int8_t* alignPtr(int8_t* ptr, uintptr_t to);
69 | 
70 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize);
71 | 
72 | size_t dataTypeSize(DataType dtype);
73 | 
74 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets);
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/cub_helper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "kernel.h"
17 | template <typename KeyT, typename ValueT>
18 | size_t cubSortPairsWorkspaceSize(int num_items, int num_segments)
19 | {
20 |     size_t temp_storage_bytes = 0;
21 |     cub::DeviceSegmentedRadixSort::SortPairsDescending((void*) NULL, temp_storage_bytes, (const KeyT*) NULL,
22 |         (KeyT*) NULL, (const ValueT*) NULL, (ValueT*) NULL,
23 |         num_items,    // # items
24 |         num_segments, // # segments
25 |         (const int*) NULL, (const int*) NULL);
26 |     return temp_storage_bytes;
27 | }
28 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/gatherNMSOutputs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TRT_BATCHED_NMS_HELPER_H
17 | #define TRT_BATCHED_NMS_HELPER_H
18 | #include "plugin.h"
19 | using namespace nvinfer1;
20 | using namespace nvinfer1::plugin;
21 | 
22 | pluginStatus_t gatherNMSOutputs(cudaStream_t stream, bool shareLocation, int numImages, int numPredsPerClass,
23 |     int numClasses, int topK, int keepTopK, DataType DT_BBOX, DataType DT_SCORE, const void* indices,
24 |     const void* scores, const void* bboxData, void* keepCount, void* nmsedBoxes, void* nmsedScores, void* nmsedClasses,
25 |     bool clipBoxes = true);
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/kernel.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "kernel.h"
17 | #include "plugin.h"
18 | 
19 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
20 |     int topK, DataType DT_BBOX, DataType DT_SCORE)
21 | {
22 |     size_t wss[7];
23 |     wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX);
24 |     wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX);
25 |     wss[2] = detectionForwardPreNMSSize(N, C2);
26 |     wss[3] = detectionForwardPreNMSSize(N, C2);
27 |     wss[4] = detectionForwardPostNMSSize(N, numClasses, topK);
28 |     wss[5] = detectionForwardPostNMSSize(N, numClasses, topK);
29 |     wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE),
30 |         sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE));
31 |     return calculateTotalWorkspaceSize(wss, 7);
32 | }
33 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/nmsHelper.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "plugin.h"
17 | #include <algorithm>
18 | 
19 | using namespace nvinfer1;
20 | using namespace nvinfer1::plugin;
21 | 
22 | size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX)
23 | {
24 |     if (DT_BBOX == DataType::kFLOAT)
25 |     {
26 |         return N * C1 * sizeof(float);
27 |     }
28 | 
29 |     printf("Only FP32 type bounding boxes are supported.\n");
30 |     return (size_t) -1;
31 | }
32 | 
33 | size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX)
34 | {
35 |     if (DT_BBOX == DataType::kFLOAT)
36 |     {
37 |         return shareLocation ? 0 : N * C1 * sizeof(float);
38 |     }
39 |     printf("Only FP32 type bounding boxes are supported.\n");
40 |     return (size_t) -1;
41 | }
42 | 
43 | size_t detectionForwardPreNMSSize(int N, int C2)
44 | {
45 |     ASSERT(sizeof(float) == sizeof(int));
46 |     return N * C2 * sizeof(float);
47 | }
48 | 
49 | size_t detectionForwardPostNMSSize(int N, int numClasses, int topK)
50 | {
51 |     ASSERT(sizeof(float) == sizeof(int));
52 |     return N * numClasses * topK * sizeof(float);
53 | }
54 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/nmsUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TRT_NMS_UTILS_H
17 | #define TRT_NMS_UTILS_H
18 | 
19 | #include "plugin.h"
20 | 
21 | using namespace nvinfer1;
22 | using namespace nvinfer1::plugin;
23 | 
24 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass,
25 |     int topK, DataType DT_BBOX, DataType DT_SCORE);
26 | #endif
27 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/carafeFeatureReassemblePlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME carafeFeatureReassemblePlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE DCN_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB DCN_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${DCN_PLUGIN_HEADS})
16 | source_group("Source" FILES ${DCN_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${DCN_PLUGIN_HEADS} ${DCN_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)
22 | # target_link_libraries(dcn_plugin ${CUDA_npp_LIBRARY})


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/carafeFeatureReassemblePlugin/carafe_cuda.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 |     template <class T>
 8 |     int CARAFEForwardLaucher(const T* features, const T* masks,
 9 |                          const int kernel_size, const int group_size,
10 |                          const int scale_factor, const int batch_size,
11 |                          const int channels, const int input_height,
12 |                          const int input_width, const int output_height,
13 |                          const int output_width, const int mask_channels,
14 |                          T* rfeatures, T* routput,
15 |                          T* rmasks, T* output,
16 |                          cudaStream_t stream);
17 | }
18 | } // namespace amirstan


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/common/logger.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "logger.h"
18 | #include "logging.h"
19 | 
20 | Logger gLogger{Logger::Severity::kINFO};
21 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
22 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
23 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
24 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
25 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
26 | 
27 | void setReportableSeverity(Logger::Severity severity)
28 | {
29 |     gLogger.setReportableSeverity(severity);
30 |     gLogVerbose.setReportableSeverity(severity);
31 |     gLogInfo.setReportableSeverity(severity);
32 |     gLogWarning.setReportableSeverity(severity);
33 |     gLogError.setReportableSeverity(severity);
34 |     gLogFatal.setReportableSeverity(severity);
35 | }
36 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/common/logger.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef LOGGER_H
18 | #define LOGGER_H
19 | 
20 | #include "logging.h"
21 | 
22 | extern Logger gLogger;
23 | extern LogStreamConsumer gLogVerbose;
24 | extern LogStreamConsumer gLogInfo;
25 | extern LogStreamConsumer gLogWarning;
26 | extern LogStreamConsumer gLogError;
27 | extern LogStreamConsumer gLogFatal;
28 | 
29 | void setReportableSeverity(Logger::Severity severity);
30 | 
31 | #endif // LOGGER_H
32 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/deformableConvPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME deformableConvPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE DCN_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB DCN_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${DCN_PLUGIN_HEADS})
16 | source_group("Source" FILES ${DCN_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${DCN_PLUGIN_HEADS} ${DCN_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)
22 | # target_link_libraries(dcn_plugin ${CUDA_npp_LIBRARY})


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/deformableConvPlugin/deform_conv_cuda.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cublas_v2.h>
 4 | 
 5 | typedef struct _DCN_PARAMS
 6 | {
 7 |     cublasHandle_t cublas_handle;
 8 |     int batchSize = 1;
 9 |     int inputChannel = 1;
10 |     int inputW = 256;
11 |     int inputH = 256;
12 |     int outputChannel = 1;
13 |     int kernelW = 3;
14 |     int kernelH = 3;
15 |     int strideW = 1;
16 |     int strideH = 1;
17 |     int padW = 0;
18 |     int padH = 0;
19 |     int dilationW = 1;
20 |     int dilationH = 1;
21 |     int group = 1;
22 |     int deformable_group = 1;
23 |     int im2col_step = 64;
24 | } DCN_PARAMS;
25 | 
26 | int deform_conv_forward_cuda(float *input, float *weight, float *bias, float *offset,
27 |                              float *output, void* workspace,
28 |                              const DCN_PARAMS &dcn_params,
29 |                              cudaStream_t stream = 0);
30 | 
31 |                              
32 | void modulated_deform_conv_cuda_forward(
33 |     float* input, float* weight, float* bias,
34 |     float* offset, float* mask, float* output, 
35 |     void *workspace, const DCN_PARAMS &dcn_params, cudaStream_t stream=0);


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/deformablePoolPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME deformablePoolPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/deformablePoolPlugin/deform_roi_pool.cu:
--------------------------------------------------------------------------------
 1 | #include "deform_roi_pool_cuda_kernel.cuh"
 2 | #include "deform_roi_pool.h"
 3 | 
 4 | #include "amir_cuda_util/cuda_util.h"
 5 | 
 6 | 
 7 | 
 8 | namespace amirstan
 9 | {
10 | namespace plugin
11 | {
12 |     using namespace amirstan::cuda;
13 |     template <typename scalar_t>
14 |     void DeformRoIPoolForwardCUDAKernelLauncher(scalar_t* input, scalar_t* rois,
15 |         scalar_t* offset, scalar_t* output,
16 |         int pooled_height, int pooled_width,
17 |         int output_size, int channels, int height, int width,
18 |         float spatial_scale,
19 |         int sampling_ratio, float gamma, cudaStream_t stream) {
20 |     
21 |         deform_roi_pool_forward_cuda_kernel<scalar_t>
22 |         <<<GET_BLOCKS(output_size), CUDA_NUM_THREADS, 0, stream>>>(
23 |         output_size, input,
24 |         rois, offset,
25 |         output, pooled_height, pooled_width,
26 |         static_cast<scalar_t>(spatial_scale), sampling_ratio,
27 |         static_cast<scalar_t>(gamma), channels, height, width);
28 |     
29 |     }
30 | 
31 |     void deform_roi_pool_forward(float* input, float* rois, float* offset,
32 |                                 float* output, int pooled_height, int pooled_width,
33 |                                 int output_size, int channels, int height, int width,
34 |                                 float spatial_scale, int sampling_ratio,
35 |                                 float gamma,
36 |                                 cudaStream_t stream){
37 |             DeformRoIPoolForwardCUDAKernelLauncher<float>(input, rois, offset, output,
38 |                                                     pooled_height, pooled_width,
39 |                                                     output_size, channels, height, width,
40 |                                                     spatial_scale, sampling_ratio, gamma,
41 |                                                     stream);
42 |     }
43 | 
44 | }
45 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/deformablePoolPlugin/deform_roi_pool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cuda_runtime.h>
 4 | 
 5 | 
 6 | 
 7 | namespace amirstan
 8 | {
 9 | namespace plugin
10 | {
11 | 
12 | void deform_roi_pool_forward(float* input, float* rois, float* offset,
13 |                              float* output, int pooled_height, int pooled_width,
14 |                             int output_size, int channels, int height, int width,
15 |                              float spatial_scale, int sampling_ratio,
16 |                              float gamma,
17 |                             cudaStream_t stream);
18 | 
19 | }
20 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/delta2bboxPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME delta2bboxPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/delta2bboxPlugin/delta2bbox.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | 
 4 | 
 5 | 
 6 | namespace amirstan
 7 | {
 8 | namespace plugin
 9 | {
10 | 
11 |     template<typename T>
12 |     void delta2bbox(T* out_cls, T* out_bbox,
13 |                     const T* in_cls, const T* in_bbox, const T* anchor, const int* clip_range,
14 |                     int batch_size, int num_bbox, int num_outbbox, int num_classes, int num_ratios,
15 |                     bool use_segmoid_cls,
16 |                     float* mean, float* std,
17 |                     cudaStream_t stream);
18 | 
19 | }
20 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/exViewPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME exViewPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/exViewPlugin/expressionParser.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include "NvInferPlugin.h"
 4 | 
 5 | namespace amirstan
 6 | {
 7 | namespace plugin
 8 | {
 9 | 
10 | const nvinfer1::IDimensionExpr* parse_expression(const std::string& exp, const nvinfer1::DimsExprs *inputs, nvinfer1::IExprBuilder &exprBuilder);
11 | 
12 | }
13 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/gridAnchorDynamicPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME gridAnchorDynamicPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE DCN_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB DCN_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${DCN_PLUGIN_HEADS})
16 | source_group("Source" FILES ${DCN_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${DCN_PLUGIN_HEADS} ${DCN_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)
22 | # target_link_libraries(dcn_plugin ${CUDA_npp_LIBRARY})


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/gridAnchorDynamicPlugin/grid_anchor_dynamic.cu:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <algorithm>
 3 | #include <stdio.h>
 4 | #include "grid_anchor_dynamic.h"
 5 | #include "amir_cuda_util/cuda_util.h"
 6 | 
 7 | namespace amirstan
 8 | {
 9 | namespace plugin
10 | {
11 |     using namespace amirstan::cuda;
12 |     template <typename T>
13 |     __global__ void grid_anchor_dynamic_kernel(T* output, const T *base_anchor, 
14 |         int width, int height,
15 |         int stride, int num_base_anchor){
16 |         CUDA_KERNEL_LOOP(i, width*height*num_base_anchor){
17 |             const int y = i/(width*num_base_anchor);
18 |             const int x = (i%(width*num_base_anchor))/num_base_anchor;
19 |             const int base_id = i%num_base_anchor;
20 | 
21 |             output[i*4 + 0] = base_anchor[base_id*4 + 0] + x*stride;
22 |             output[i*4 + 1] = base_anchor[base_id*4 + 1] + y*stride;
23 |             output[i*4 + 2] = base_anchor[base_id*4 + 2] + x*stride;
24 |             output[i*4 + 3] = base_anchor[base_id*4 + 3] + y*stride;
25 |         }
26 |     }
27 | 
28 |     template <typename T>
29 |     void grid_anchor_dynamic(T *output, const T* base_anchor, 
30 |                             int width, int height, 
31 |                             int stride,
32 |                             int num_base_anchor,
33 |                             cudaStream_t stream){
34 |         
35 |         size_t input_size = num_base_anchor*height*width;
36 |         grid_anchor_dynamic_kernel<T><<<GET_BLOCKS(input_size), CUDA_NUM_THREADS, 0, stream>>>(output, base_anchor,
37 |         width, height, stride, num_base_anchor);
38 |     }
39 | 
40 |     template void grid_anchor_dynamic<float>(float *output, const float* base_anchor, 
41 |         int width, int height, 
42 |         int stride,
43 |         int num_base_anchor,
44 |         cudaStream_t stream);
45 | 
46 | }
47 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/gridAnchorDynamicPlugin/grid_anchor_dynamic.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 | template <typename T>
 9 | void grid_anchor_dynamic(T *output, const T* base_anchor, 
10 |                         int width, int height, 
11 |                         int stride,
12 |                         int num_base_anchor,
13 |                         cudaStream_t stream);
14 | 
15 | }
16 | } // namespace amirstan


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/gridSamplePlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME gridSamplePlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/gridSamplePlugin/grid_sample.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 |   enum class GridSamplerInterpolation {Bilinear, Nearest};
 9 |   enum class GridSamplerPadding {Zeros, Border, Reflection};
10 | 
11 |     template <typename T>
12 |     void grid_sample(T *output, const T* input, const T* grid, 
13 |                     int* output_dims, int* input_dims, int *grid_dims, int nb_dims,
14 |                     GridSamplerInterpolation interp, GridSamplerPadding padding,
15 |                     bool align_corners,
16 |                     cudaStream_t stream);
17 | 
18 | }
19 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/groupNormPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME groupNormPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/groupNormPlugin/group_norm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | 
 4 | 
 5 | 
 6 | namespace amirstan
 7 | {
 8 | namespace plugin
 9 | {
10 |     template<typename T>
11 |     void compute_group_norm(T* output, const T* input, 
12 |     int batch_size, int num_groups, int num_channels, int WH,
13 |     T eps,
14 |      const float* weight,const float* bias,  cudaStream_t stream, void* workspace);
15 | }
16 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/groupNormPlugin/group_norm_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <algorithm>
 3 | #include <stdio.h>
 4 | #include <cuda_fp16.h>
 5 | 
 6 | #include "group_norm.h"
 7 | #include "amir_cuda_util/cuda_util.h"
 8 | 
 9 | namespace amirstan
10 | {
11 | namespace plugin
12 | {
13 |     using namespace amirstan::cuda;
14 |     template<typename T>
15 |     __global__ void group_norm_kernel(T* output,const T* input, size_t input_size,
16 |         int batch_size, int num_groups, int num_channels, int WH,
17 |         T eps, 
18 |         T * mean,  T* var, const float* weight,const float* bias){
19 |         CUDA_KERNEL_LOOP(i, input_size) {
20 |             const int mean_var_index = i/(num_channels*WH/num_groups);
21 |             const int axpy_index = (i%(num_channels*WH))/WH;
22 |             T ret = (input[i]- mean[mean_var_index])/sqrt(var[mean_var_index]+eps);
23 |             ret = ret*T(weight[axpy_index]) + T(bias[axpy_index]);
24 |             output[i] = ret;
25 |         }
26 |        }
27 | 
28 |     template<typename T>
29 |     void compute_group_norm(T* output, const T* input, 
30 |         int batch_size, int num_groups, int num_channels, int WH,
31 |          T eps, 
32 |         const float* weight,const float* bias,  cudaStream_t stream, void* workspace){
33 |         T* mean = (T*)workspace;
34 |         T* var = mean + batch_size*num_groups;
35 |         int mean_var_shape[2] = {batch_size*num_groups, num_channels*WH/num_groups};
36 |         bool mean_var_reduce_dims[2] = {false,true};
37 | 
38 |         amirstan::cuda::tensorMeanVar<T>(mean,var, input,
39 |             &mean_var_shape[0], &mean_var_reduce_dims[0] , 2,
40 |                 stream, (void*)(var+batch_size*num_groups));
41 |         
42 |         size_t input_size = batch_size * num_channels * WH;
43 | 
44 |         group_norm_kernel<T><<<GET_BLOCKS(input_size), CUDA_NUM_THREADS,0,stream>>>(output, input, input_size,
45 |              batch_size, num_groups, num_channels, WH, 
46 |              eps,
47 |              mean, var, weight, bias);
48 |         
49 |     }
50 | 
51 |     template void compute_group_norm<float>(float* output, const float* input, 
52 |         int batch_size, int num_groups, int num_channels,  int WH,
53 |         float eps,
54 |          const float* weight,const float* bias,  cudaStream_t stream, void* workspace);
55 | 
56 | 
57 |     // template void compute_group_norm<half>(half* output, const half* input, 
58 |     //     int batch_size, int num_groups, int num_channels,  int WH,
59 |     //     half eps,
60 |     //     const float* weight,const float* bias,  cudaStream_t stream, void* workspace);
61 | }
62 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/layerNormPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME layerNormPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/layerNormPlugin/layer_norm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | 
 4 | 
 5 | 
 6 | namespace amirstan
 7 | {
 8 | namespace plugin
 9 | {
10 |     template<typename T>
11 |     void compute_layer_norm(T* output, const T* input, 
12 |     int norm_size, int layer_size,
13 |     T eps,
14 |      const T* weight,const T* bias,  cudaStream_t stream, void* workspace);
15 | }
16 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/layerNormPlugin/layer_norm_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <algorithm>
 3 | #include <stdio.h>
 4 | #include <cuda_fp16.h>
 5 | #include "layer_norm.h"
 6 | #include "amir_cuda_util/cuda_util.h"
 7 | 
 8 | namespace amirstan
 9 | {
10 | namespace plugin
11 | {
12 | 
13 | 
14 |     using namespace amirstan::cuda;
15 |     template<typename T>
16 |     __global__ void layer_norm_kernel(T* output,const T* input, size_t input_size,
17 |         int norm_size, int layer_size,
18 |         T eps, 
19 |         T * mean,  T * var, const T* weight,const T* bias){
20 |         CUDA_KERNEL_LOOP(i, input_size) {
21 |             const int mean_var_index = i/layer_size;
22 |             const int axpy_index = i%layer_size;
23 |             T ret = (input[i]- mean[mean_var_index])/sqrt(var[mean_var_index]+eps);
24 |             ret = ret*weight[axpy_index] + bias[axpy_index];
25 |             output[i] = ret;
26 |         }
27 |     }
28 | 
29 |     template<typename T>
30 |     void compute_layer_norm(T* output, const T* input, 
31 |         int norm_size, int layer_size,
32 |          T eps, 
33 |         const T* weight,const T* bias,  cudaStream_t stream, void* workspace){
34 |         T* mean = (T*)workspace;
35 |         T* var = mean + norm_size;
36 | 
37 |         int mean_var_shape[2] = {norm_size, layer_size};
38 |         bool mean_var_reduce_dims[2] = {false,true};
39 | 
40 |         amirstan::cuda::tensorMeanVar<T>(mean, var, input,
41 |             &mean_var_shape[0], &mean_var_reduce_dims[0] , 2,
42 |                 stream, (void*)(var+norm_size));
43 |         
44 |         size_t input_size = norm_size * layer_size;
45 | 
46 |         layer_norm_kernel<T><<<GET_BLOCKS(input_size), CUDA_NUM_THREADS,0,stream>>>(output, input, input_size,
47 |             norm_size, layer_size,
48 |              eps,
49 |              mean, var, weight, bias);
50 |         
51 |     }
52 | 
53 |     template void compute_layer_norm<float>(float* output, const float* input,
54 |         int norm_size, int layer_size,
55 |         float eps,
56 |          const float* weight,const float* bias,  cudaStream_t stream, void* workspace);
57 | 
58 | 
59 |     // template void compute_layer_norm<half>(half* output, const half* input,
60 |     //         int norm_size, int layer_size,
61 |     //         half eps,
62 |     //          const half* weight,const half* bias,  cudaStream_t stream, void* workspace);
63 | }
64 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/meshGridPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME meshGridPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/meshGridPlugin/mesh_grid.cu:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <algorithm>
 3 | #include <stdio.h>
 4 | #include <cuda_fp16.h>
 5 | 
 6 | #include "mesh_grid.h"
 7 | #include "amir_cuda_util/cuda_util.h"
 8 | 
 9 | namespace amirstan
10 | {
11 | namespace plugin
12 | {
13 |     using namespace amirstan::cuda;
14 | 
15 | 
16 |     template <typename T>
17 |     __global__ void arange_mesh_grid_kernel(T* output,
18 |                                         size_t pre_stride, size_t post_stride,
19 |                                         float start, float stride, size_t N){
20 |         
21 |         CUDA_KERNEL_LOOP(i, N){
22 |             const size_t index = (i%pre_stride)/post_stride;
23 | 
24 |             const T value = start + index * (stride);
25 |             output[i] = value;
26 |         }
27 |     }
28 | 
29 | 
30 |     template <typename T>
31 |     void arange_mesh_grid(T *output,
32 |                             const int* output_dims, int nb_dims,
33 |                             int slice_dim, float start, float stride,
34 |                             cudaStream_t stream){
35 | 
36 |         size_t post_stride = 1;
37 |         int i=nb_dims-1;
38 |         for(i=nb_dims-1; i>slice_dim; --i){
39 |             post_stride*=output_dims[i];
40 |         }
41 |         size_t pre_stride = post_stride*output_dims[slice_dim];
42 | 
43 |         size_t N = 1;
44 |         for(i=0; i<nb_dims; ++i){
45 |             N*=output_dims[i];
46 |         }
47 | 
48 |         arange_mesh_grid_kernel<T><<<GET_BLOCKS(N), CUDA_NUM_THREADS,0,stream>>>(output, 
49 |                                                                                 pre_stride, post_stride,
50 |                                                                                 start, stride, N);
51 | 
52 |     }
53 | 
54 |     template void arange_mesh_grid<float>(float *output,
55 |                                         const int* output_dims, int nb_dims,
56 |                                         int slice_dim, float start, float stride,
57 |                                         cudaStream_t stream);
58 | 
59 |     template void arange_mesh_grid<int>(int *output,
60 |                                         const int* output_dims, int nb_dims,
61 |                                         int slice_dim, float start, float stride,
62 |                                         cudaStream_t stream);
63 |                                     
64 |     template void arange_mesh_grid<half>(half *output,
65 |                                         const int* output_dims, int nb_dims,
66 |                                         int slice_dim, float start, float stride,
67 |                                         cudaStream_t stream);
68 | 
69 | }
70 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/meshGridPlugin/mesh_grid.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 | template <typename T>
 9 | void arange_mesh_grid(T *output,
10 |                         const int* output_dims, int nb_dims,
11 |                         int slice_dim, float start, float stride,
12 |                         cudaStream_t stream);
13 | 
14 | }
15 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/repeatDimsPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME repeatDimsPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/roiExtractorPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME roiExtractorPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/roiExtractorPlugin/roi_extractor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | 
 4 | 
 5 | 
 6 | namespace amirstan
 7 | {
 8 | namespace plugin
 9 | {
10 | 
11 |     template<typename T>
12 |     void roi_extractor(T* output, 
13 |                         const T* rois, int num_rois,
14 |                         const void *const *feats, int num_feats,
15 |                         int n,
16 |                         int c,
17 |                         int *h,
18 |                         int *w,
19 |                         float *strides,
20 |                         int out_size,
21 |                         int sample_num,
22 |                         float roi_scale_factor,
23 |                         int finest_scale,
24 |                         bool aligned,
25 |                         cudaStream_t stream);
26 | 
27 | }
28 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/roiPoolPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME roiPoolPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | 
 7 | #find_package(CUDA REQUIRED)
 8 | enable_language(CUDA)
 9 | 
10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
11 | 
12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
14 | 
15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
17 | 
18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/roiPoolPlugin/roi_pool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda_runtime.h>
 3 | 
 4 | 
 5 | 
 6 | namespace amirstan
 7 | {
 8 | namespace plugin
 9 | {
10 | 
11 |     template<typename T>
12 |     void roi_pool(T* output, 
13 |                 const T* rois, int num_rois,
14 |                 const void *const *feats, int num_feats,
15 |                 int n,
16 |                 int c,
17 |                 int *h,
18 |                 int *w,
19 |                 float *strides,
20 |                 int out_size,
21 |                 float roi_scale_factor,
22 |                 int finest_scale,
23 |                 cudaStream_t stream);
24 | 
25 | }
26 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchCumMaxMinPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME torchCumMaxMinPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchCumMaxMinPlugin/torch_cum_maxmin.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 |     template <typename T>
 9 |     void torch_cum_maxmin(T *output, int *index, const T* input,
10 |                     int* input_dims, int nb_dims,
11 |                     int cum_dim, int cum_type,
12 |                     cudaStream_t stream);
13 | 
14 | }
15 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchCumPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME torchCumPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchCumPlugin/torch_cum.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 |     template <typename T>
 9 |     void torch_cum(T *output, const T* input,
10 |                     int* input_dims, int nb_dims,
11 |                     int cum_dim, int cum_type,
12 |                     cudaStream_t stream);
13 | 
14 | }
15 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchFlipPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME torchFlipPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchFlipPlugin/torch_flip.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 |     template <typename T>
 9 |     void torch_flip(T *output, const T* input,
10 |                     int* input_dims, int nb_dims,
11 |                     int* flip_dims, int nb_flip_dims,
12 |                     cudaStream_t stream);
13 | 
14 | }
15 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchGatherPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME torchGatherPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchGatherPlugin/torch_gather.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 | template <typename T>
 9 | void torch_gather(T *output, const T* input, const int* index, 
10 |                         int dim, int* input_dims, int *index_dims, int nb_dims,
11 |                         cudaStream_t stream);
12 | 
13 | }
14 | }


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchNMSPlugin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(TARGET_NAME torchNMSPlugin)
 4 | set(SHARED_TARGET ${TARGET_NAME})
 5 | set(STATIC_TARGET ${TARGET_NAME}_static)
 6 | #find_package(CUDA REQUIRED)
 7 | enable_language(CUDA)
 8 | 
 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC)
10 | 
11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh)
12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu)
13 | 
14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS})
15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES})
16 | 
17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES})
18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY})
19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY})
20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util)


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchNMSPlugin/bboxUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef TRT_BBOX_UTILS_H
17 | #define TRT_BBOX_UTILS_H
18 | 
19 | #include "plugin.h"
20 | 
21 | using namespace nvinfer1;
22 | using namespace nvinfer1::plugin;
23 | 
24 | template <typename T>
25 | struct Bbox
26 | {
27 |     T xmin, ymin, xmax, ymax;
28 |     Bbox(T xmin, T ymin, T xmax, T ymax)
29 |         : xmin(xmin)
30 |         , ymin(ymin)
31 |         , xmax(xmax)
32 |         , ymax(ymax)
33 |     {
34 |     }
35 |     Bbox() = default;
36 | };
37 | 
38 | template <typename T>
39 | struct BboxInfo
40 | {
41 |     T conf_score;
42 |     int label;
43 |     int bbox_idx;
44 |     bool kept;
45 |     BboxInfo(T conf_score, int label, int bbox_idx, bool kept)
46 |         : conf_score(conf_score)
47 |         , label(label)
48 |         , bbox_idx(bbox_idx)
49 |         , kept(kept)
50 |     {
51 |     }
52 |     BboxInfo() = default;
53 | };
54 | 
55 | template <typename TFloat>
56 | bool operator<(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
57 | {
58 |     return lhs.x1 < rhs.x1;
59 | }
60 | 
61 | template <typename TFloat>
62 | bool operator==(const Bbox<TFloat>& lhs, const Bbox<TFloat>& rhs)
63 | {
64 |     return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2;
65 | }
66 | // }}}
67 | 
68 | int8_t* alignPtr(int8_t* ptr, uintptr_t to);
69 | 
70 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize);
71 | 
72 | size_t dataTypeSize(DataType dtype);
73 | 
74 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets);
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/pilgrim_trt_plugins/src/plugin/torchNMSPlugin/torch_nms.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace amirstan
 4 | {
 5 | namespace plugin
 6 | {
 7 | 
 8 |     template <typename T>
 9 |     size_t nms_workspace_size(int num_boxes);
10 | 
11 |     template <typename T>
12 |     void torch_nms(int *output, const T* bboxes, const T* scores,
13 |                     int num_boxes, float iou_threshold, void* workspace,
14 |                     cudaStream_t stream);
15 | 
16 | }
17 | }


--------------------------------------------------------------------------------
/torch2trt_dynamic.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: torch2trt-dynamic
 3 | Version: 0.2.0
 4 | Summary: An easy to use PyTorch to TensorRT converter with dynamic shape support
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | torch2trt_dynamic
2 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/__init__.py:
--------------------------------------------------------------------------------
 1 | from .torch2trt_dynamic import *
 2 | from .converters import *
 3 | import tensorrt as trt
 4 | 
 5 | 
 6 | def load_plugins():
 7 |     import os
 8 |     import ctypes
 9 |     ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt_dynamic.so'))
10 |     
11 |     registry = trt.get_plugin_registry()
12 |     torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt_dynamic']
13 |     for c in torch2trt_creators:
14 |         registry.register_creator(c, 'torch2trt_dynamic')
15 | 
16 | 
17 | try:
18 |     load_plugins()
19 |     PLUGINS_LOADED = True
20 | except OSError:
21 |     PLUGINS_LOADED = False
22 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/AdaptiveAvgPool2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.AdaptiveAvgPool2d.forward')
 6 | def convert_AdaptiveAvgPool2d(ctx):
 7 |     module = ctx.method_args[0]
 8 |     input = ctx.method_args[1]
 9 |     output = ctx.method_return
10 | 
11 |     input_trt = add_missing_trt_tensors(ctx.network, [input])[0]
12 | 
13 |     output_size = module.output_size
14 |     if not isinstance(output_size, tuple):
15 |         output_size = (output_size, ) * 2
16 | 
17 |     stride = (input_trt.shape[-2] // output_size[-2],
18 |               input_trt.shape[-1] // output_size[-1])
19 | 
20 |     kernel_size = stride
21 |     layer = ctx.network.add_pooling(
22 |         input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size)
23 |     layer.stride = stride
24 | 
25 |     output._trt = layer.get_output(0)
26 | 
27 | 
28 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
29 | def test_AdaptiveAvgPool2d_1x1():
30 |     return torch.nn.AdaptiveAvgPool2d((1, 1))
31 | 
32 | 
33 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
34 | def test_AdaptiveAvgPool2d_2x2():
35 |     return torch.nn.AdaptiveAvgPool2d((2, 2))
36 | 
37 | 
38 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
39 | def test_AdaptiveAvgPool2d_3x3():
40 |     return torch.nn.AdaptiveAvgPool2d((3, 3))
41 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/AdaptiveMaxPool2d.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..module_test import add_module_test
 3 | from .adaptive_max_pool2d import convert_adaptive_max_pool2d
 4 | 
 5 | @tensorrt_converter('torch.nn.AdaptiveMaxPool2d.forward')
 6 | def convert_AdaptiveMaxPool2d(ctx):
 7 |     ctx.method_args = (ctx.method_args[1], ctx.method_args[0].output_size)
 8 |     convert_adaptive_max_pool2d(ctx)
 9 | 
10 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
11 | def test_AdaptiveMaxPool2d_1x1():
12 |     return torch.nn.AdaptiveMaxPool2d((1, 1))
13 | 
14 | 
15 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
16 | def test_AdaptiveMaxPool2d_2x2():
17 |     return torch.nn.AdaptiveMaxPool2d((2, 2))
18 | 
19 | 
20 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
21 | def test_AdaptiveMaxPool2d_3x3():
22 |     return torch.nn.AdaptiveMaxPool2d((3, 3))
23 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/BatchNorm1d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.BatchNorm1d.forward')
 6 | def convert_BatchNorm1d(ctx):
 7 | 
 8 |     module = ctx.method_args[0]
 9 |     input = ctx.method_args[1]
10 |     input_trt = trt_(ctx.network, input)
11 |     output = ctx.method_return
12 |     
13 |     scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps)
14 |     bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale
15 |     power = np.ones_like(scale)
16 |     
17 |     # reshape to 2D
18 |     input_shape_trt = ctx.network.add_shape(input_trt).get_output(0)
19 |     one_trt = trt_(ctx.network, torch.tensor([1],dtype=torch.int32).to(input.device))
20 |     if len(input.shape)==2:
21 |         new_input_shape_trt = ctx.network.add_concatenation([input_shape_trt, one_trt, one_trt]).get_output(0)
22 |     else:
23 |         new_input_shape_trt = ctx.network.add_concatenation([input_shape_trt, one_trt]).get_output(0)
24 |     layer = ctx.network.add_shuffle(input_trt)
25 |     layer.set_input(1, new_input_shape_trt)
26 | 
27 |     layer = ctx.network.add_scale(layer.get_output(0), trt.ScaleMode.CHANNEL, bias, scale, power)
28 | 
29 |     # reshape back to 1D
30 |     conv_out_trt = layer.get_output(0)
31 |     layer = ctx.network.add_shuffle(conv_out_trt)
32 |     layer.set_input(1, input_shape_trt)
33 |     
34 |     output._trt = layer.get_output(0)
35 | 
36 | 
37 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
38 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)])
39 | def test_BatchNorm1d_basic():
40 |     return torch.nn.BatchNorm1d(10)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/BatchNorm2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.nn.BatchNorm2d.forward')
 5 | def convert_BatchNorm2d(ctx):
 6 |     module = ctx.method_args[0]
 7 |     input = ctx.method_args[1]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 |     
11 |     scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps)
12 |     bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale
13 |     power = np.ones_like(scale)
14 | 
15 |     layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power)    
16 | 
17 | 
18 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/Conv2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | @tensorrt_converter('torch.nn.Conv2d.forward')
 5 | def convert_Conv2d(ctx):
 6 |     module = ctx.method_args[0]
 7 |     input = ctx.method_args[1]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 | 
11 |     kernel_size = module.kernel_size
12 |     if not isinstance(kernel_size, tuple):
13 |         kernel_size = (kernel_size, ) * 2
14 | 
15 |     stride = module.stride
16 |     if not isinstance(stride, tuple):
17 |         stride = (stride, ) * 2
18 | 
19 |     padding = module.padding
20 |     if not isinstance(padding, tuple):
21 |         padding = (padding, ) * 2
22 | 
23 |     dilation = module.dilation
24 |     if not isinstance(dilation, tuple):
25 |         dilation = (dilation, ) * 2
26 | 
27 |     kernel = module.weight.detach().cpu().numpy()
28 |     
29 |     bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
30 |     if module.bias is not None:
31 |         bias = module.bias.detach().cpu().numpy()
32 | 
33 |     layer = ctx.network.add_convolution(
34 |         input=input_trt,
35 |         num_output_maps=module.out_channels,
36 |         kernel_shape=kernel_size,
37 |         kernel=kernel,
38 |         bias=bias)
39 |     layer.stride = stride
40 |     layer.padding = padding
41 |     layer.dilation = dilation
42 | 
43 |     if module.groups is not None:
44 |         layer.num_groups = module.groups
45 | 
46 |     output._trt = layer.get_output(0)
47 | 
48 | 
49 | 
50 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)])
51 | def test_Conv2d_basic():
52 |     return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0)
53 | 
54 | 
55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)])
56 | def test_Conv2d_stride2():
57 |     return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0)
58 | 
59 | 
60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)])
61 | def test_Conv2d_kernel3():
62 |     return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1)
63 | 
64 | 
65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)])
66 | def test_Conv2d_dilation2():
67 |     return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2)
68 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/ConvTranspose1d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.nn.ConvTranspose1d.forward')
 5 | def convert_ConvTranspose1d(ctx):
 6 |     module = ctx.method_args[0]
 7 |     input = ctx.method_args[1]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 | 
11 |     kernel_size = module.kernel_size
12 |     if not isinstance(kernel_size, tuple):
13 |         kernel_size = (kernel_size, 1)
14 |     else:
15 |         kernel_size = kernel_size + (1,)
16 | 
17 |     stride = module.stride
18 |     if not isinstance(stride, tuple):
19 |         stride = (stride, 1)
20 |     else:
21 |         stride = stride + (1,)
22 | 
23 |     padding = module.padding
24 |     if not isinstance(padding, tuple):
25 |         padding = (padding, 0)
26 |     else:
27 |         padding = padding + (0,)
28 |         
29 |     kernel = module.weight.detach().cpu().numpy()[..., None]
30 |     
31 |     bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
32 |     if module.bias is not None:
33 |         bias = module.bias.detach().cpu().numpy()[..., None]
34 | 
35 |     # unsqueeze(3)
36 |     layer = ctx.network.add_shuffle(input_trt)
37 |     layer.reshape_dims = (0,0,0,1)
38 |     input_trt = layer.get_output(0)
39 | 
40 |     # deconv
41 |     layer = ctx.network.add_deconvolution(
42 |         input=input_trt,
43 |         num_output_maps=module.out_channels,
44 |         kernel_shape=kernel_size,
45 |         kernel=kernel,
46 |         bias=bias)
47 |     layer.stride = stride
48 |     layer.padding = padding
49 |     
50 |     if module.groups is not None:
51 |         layer.num_groups = module.groups
52 | 
53 |     output_trt = layer.get_output(0)
54 | 
55 |     # squeeze(3)
56 |     layer = ctx.network.add_shuffle(output_trt)
57 |     layer.reshape_dims = (0,0,0)
58 |     output_trt = layer.get_output(0)
59 | 
60 |     output._trt = output_trt


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/ConvTranspose2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.nn.ConvTranspose2d.forward')
 5 | def convert_ConvTranspose2d(ctx):
 6 |     module = ctx.method_args[0]
 7 |     input = ctx.method_args[1]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 | 
11 |     kernel_size = module.kernel_size
12 |     if not isinstance(kernel_size, tuple):
13 |         kernel_size = (kernel_size, ) * 2
14 | 
15 |     stride = module.stride
16 |     if not isinstance(stride, tuple):
17 |         stride = (stride, ) * 2
18 | 
19 |     padding = module.padding
20 |     if not isinstance(padding, tuple):
21 |         padding = (padding, ) * 2
22 |         
23 |     kernel = module.weight.detach().cpu().numpy()
24 |     
25 |     bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
26 |     if module.bias is not None:
27 |         bias = module.bias.detach().cpu().numpy()
28 | 
29 |     layer = ctx.network.add_deconvolution(
30 |         input=input_trt,
31 |         num_output_maps=module.out_channels,
32 |         kernel_shape=kernel_size,
33 |         kernel=kernel,
34 |         bias=bias)
35 |     layer.stride = stride
36 |     layer.padding = padding
37 |     
38 |     if module.groups is not None:
39 |         layer.num_groups = module.groups
40 | 
41 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/Identity.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.nn.Dropout.forward')
 5 | @tensorrt_converter('torch.nn.Dropout2d.forward')
 6 | @tensorrt_converter('torch.nn.Dropout3d.forward')
 7 | def convert_Identity(ctx):
 8 |     input = ctx.method_args[1]
 9 |     input_trt = trt_(ctx.network, input)
10 |     output = ctx.method_return
11 |     output._trt = input_trt


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/Linear.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..module_test import add_module_test
 3 | import torch
 4 | 
 5 | 
 6 | @tensorrt_converter('torch.nn.Linear.forward')
 7 | def convert_Linear(ctx):
 8 |     module = ctx.method_args[0]
 9 |     input = ctx.method_args[1]
10 |     input_trt = trt_(ctx.network, input)
11 |     output = ctx.method_return
12 | 
13 |     ### reshape to ...xNx1x1
14 |     layer = ctx.network.add_shuffle(input_trt)
15 |     layer.reshape_dims = (0,)*len(input_trt.shape) + (1, 1) 
16 | 
17 |     ### add fully connected
18 |     bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
19 |     if module.bias is not None:
20 |         bias = module.bias.detach().cpu().numpy()
21 |     
22 |     layer = ctx.network.add_convolution(
23 |         input=layer.get_output(0),
24 |         num_output_maps=module.out_features,
25 |         kernel_shape=(1, 1),
26 |         kernel=module.weight.detach().cpu().numpy(),
27 |         bias=bias)
28 | 
29 |     # layer = ctx.network.add_fully_connected(
30 |     #     input=layer.get_output(0),
31 |     #     # input=input_trt,
32 |     #     num_outputs=module.out_features,
33 |     #     kernel=module.weight.detach().cpu().numpy(),
34 |     #     bias=bias)
35 | 
36 |     ### reshape back to N
37 |     layer = ctx.network.add_shuffle(layer.get_output(0))
38 |     # # layer.reshape_dims = tuple(output.shape[1:])
39 |     layer.reshape_dims = (0,)*len(input_trt.shape)
40 | 
41 |     output._trt = layer.get_output(0)
42 | 
43 | 
44 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)])
46 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)])
47 | def test_Linear_basic():
48 |     return torch.nn.Linear(10, 5)
49 | 
50 | 
51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)])
53 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)])
54 | def test_Linear_no_bias():
55 |     return torch.nn.Linear(10, 5, bias=False)
56 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/LogSoftmax.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.nn.LogSoftmax.forward')
 5 | def convert_LogSoftmax(ctx):
 6 |     input = ctx.method_args[1]
 7 |     input_trt = trt_(ctx.network, input)
 8 |     output = ctx.method_return
 9 |     layer = ctx.network.add_softmax(input=input_trt)
10 |     layer = ctx.network.add_unary(input=layer.get_output(0),
11 |             op=trt.UnaryOperation.LOG)
12 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/ReLU.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.nn.ReLU.forward')
 5 | def convert_ReLU(ctx):
 6 |     input = ctx.method_args[1]
 7 |     input_trt = trt_(ctx.network, input)
 8 |     output = ctx.method_return
 9 |     layer = ctx.network.add_activation(
10 |         input=input_trt, type=trt.ActivationType.RELU)
11 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/ReLU6.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.ReLU6.forward')
 6 | def convert_ReLU6(ctx):
 7 |     input = ctx.method_args[1]
 8 |     output = ctx.method_return
 9 |     
10 |     input_trt, trt_6 = trt_(ctx.network, input, 6.)
11 | 
12 |     layer = ctx.network.add_activation(
13 |         input=input_trt, type=trt.ActivationType.RELU)
14 |     layer = ctx.network.add_elementwise(
15 |         layer.get_output(0), trt_6, trt.ElementWiseOperation.MIN)
16 | 
17 |     output._trt = layer.get_output(0)
18 |     
19 |     
20 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
21 | def test_relu6_basic():
22 |     return torch.nn.ReLU6()


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/adaptive_avg_pool2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from torch2trt_dynamic.plugins import create_adaptivepool_plugin
 4 | 
 5 | 
 6 | # @tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d')
 7 | # def convert_adaptive_avg_pool2d(ctx):
 8 | #     input = ctx.method_args[0]
 9 | #     output_size = get_arg(ctx, 'output_size', pos=1, default=0)
10 | #     output = ctx.method_return
11 | #     input_trt = trt_(ctx.network, input)
12 | 
13 | #     if isinstance(output_size, int):
14 | #         output_size = (output_size, output_size)
15 |     
16 | #     output_size = tuple([-1 if not o else o for o in output_size])
17 | 
18 | #     plugin = create_adaptivepool_plugin("adaptive_avg_pool2d_"+str(id(input)),
19 | #                                         output_size=output_size,
20 | #                                         pooling_type=trt.PoolingType.AVERAGE)
21 |             
22 | #     layer = ctx.network.add_plugin_v2(
23 | #         inputs=[input_trt], plugin=plugin)
24 | 
25 | #     output._trt = layer.get_output(0)
26 | 
27 | @tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d')
28 | def convert_adaptive_avg_pool2d(ctx):
29 |     ctx.method_args = (torch.nn.AdaptiveAvgPool2d(ctx.method_args[1]), ctx.method_args[0])
30 |     convert_AdaptiveAvgPool2d(ctx)
31 | 
32 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/adaptive_max_pool2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from torch2trt_dynamic.plugins import create_adaptivepool_plugin
 4 | 
 5 | 
 6 | @tensorrt_converter('torch.nn.functional.adaptive_max_pool2d')
 7 | def convert_adaptive_max_pool2d(ctx):
 8 |     input = ctx.method_args[0]
 9 |     output_size = get_arg(ctx, 'output_size', pos=1, default=0)
10 |     output = ctx.method_return
11 |     input_trt = trt_(ctx.network, input)
12 | 
13 |     if isinstance(output_size, int):
14 |         output_size = (output_size, output_size)
15 |     
16 |     output_size = tuple([-1 if not o else o for o in output_size])
17 | 
18 |     plugin = create_adaptivepool_plugin("adaptive_max_pool2d_"+str(id(input)),
19 |                                         output_size=output_size,
20 |                                         pooling_type=trt.PoolingType.MAX)
21 | 
22 |     layer = ctx.network.add_plugin_v2(
23 |         inputs=[input_trt], plugin=plugin)
24 | 
25 |     output._trt = layer.get_output(0)
26 | 
27 | ### old version
28 | # @tensorrt_converter('torch.nn.functional.adaptive_max_pool2d')
29 | # def convert_adaptive_max_pool2d(ctx):
30 | #     input = ctx.method_args[0]
31 | #     output = ctx.method_return
32 | #     input_trt = trt_(ctx.network, input)
33 | 
34 | #     output_size = ctx.method_args[1]
35 | #     if isinstance(output_size, int):
36 | #         output_size = (output_size, ) * 2
37 | 
38 | #     if output_size[0]==1 and output_size[1] == 1:
39 | #         shape_length = len(input.shape)
40 | #         axes = (1<<(shape_length-1)) + (1<<(shape_length-2))
41 | #         keepdim = True
42 | #         layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MAX, axes, keepdim)
43 | #         output._trt = layer.get_output(0)
44 | #     else:
45 | #         stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1])
46 | 
47 | #         kernel_size = stride
48 | #         layer = ctx.network.add_pooling(
49 | #             input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size)
50 | #         layer.stride = stride
51 | 
52 | #         output._trt = layer.get_output(0)
53 | 
54 |     
55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
56 | def test_adaptive_max_pool2d_1x1():
57 |     return torch.nn.AdaptiveMaxPool2d((1, 1))
58 | 
59 | 
60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
61 | def test_adaptive_max_pool2d_2x2():
62 |     return torch.nn.AdaptiveMaxPool2d((2, 2))
63 | 
64 | 
65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
66 | def test_adaptive_max_pool2d_3x3():
67 |     return torch.nn.AdaptiveMaxPool2d((3, 3))
68 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/add.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.add')
 6 | @tensorrt_converter('torch.Tensor.__iadd__')
 7 | @tensorrt_converter('torch.Tensor.__add__')
 8 | @tensorrt_converter('torch.Tensor.__radd__')
 9 | def convert_add(ctx):
10 |     input_a = ctx.method_args[0]
11 |     input_b = ctx.method_args[1]
12 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
13 |     output = ctx.method_return
14 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM)
15 |     output._trt = layer.get_output(0)
16 |     
17 | 
18 | class Add(torch.nn.Module):
19 |     def __init__(self):
20 |         super(Add, self).__init__()
21 | 
22 |     def forward(self, x, y):
23 |         return x + y
24 | 
25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
26 | def test_add_basic():
27 |     return Add()
28 | 
29 | 
30 | class IAdd(torch.nn.Module):
31 |     def __init__(self):
32 |         super(IAdd, self).__init__()
33 | 
34 |     def forward(self, x, y):
35 |         x += y
36 |         return x
37 | 
38 | 
39 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
40 | def test_add_iadd():
41 |     return IAdd()
42 | 
43 | 
44 | class TorchAdd(torch.nn.Module):
45 |     def __init__(self):
46 |         super(TorchAdd, self).__init__()
47 | 
48 |     def forward(self, x, y):
49 |         return torch.add(x, y)
50 | 
51 | 
52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
53 | def test_add_torchadd():
54 |     return TorchAdd()
55 | 
56 | 
57 | class RAddInt(torch.nn.Module):
58 |     def __init__(self):
59 |         super(RAddInt, self).__init__()
60 | 
61 |     def forward(self, x):
62 |         return 1 + x
63 | 
64 | 
65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
66 | def test_add_radd_int():
67 |     return RAddInt()
68 | 
69 | 
70 | class RAddFloat(torch.nn.Module):
71 |     def __init__(self):
72 |         super(RAddFloat, self).__init__()
73 | 
74 |     def forward(self, x):
75 |         return 1.0 + x
76 | 
77 | 
78 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
79 | def test_add_radd_float():
80 |     return RAddFloat()


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/argmax.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | from torch2trt_dynamic.torch2trt_dynamic import *
 3 | from torch2trt_dynamic.module_test import add_module_test
 4 | from .flatten import *
 5 | from .topk import *
 6 | from .squeeze import *
 7 | 
 8 | 
 9 | @tensorrt_converter('torch.Tensor.argmax')
10 | @tensorrt_converter('torch.argmax')
11 | def convert_argmax(ctx):
12 |     
13 |     old_args = ctx.method_args
14 |     input = ctx.method_args[0]
15 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
16 |     keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
17 | 
18 |     output = ctx.method_return
19 | 
20 |     # dim is None
21 |     if dim is None:
22 |         input_flatten = input.flatten()
23 |         ctx.method_args = [input]
24 |         ctx.method_return = input_flatten
25 |         convert_flatten(ctx)
26 |         input = ctx.method_return
27 |         dim = 0
28 |     
29 |     # topk
30 |     topk_output = input.topk(1, dim)
31 |     topk_input = [input, 1, dim]
32 |     ctx.method_args = topk_input
33 |     ctx.method_return = topk_output
34 |     convert_topk(ctx)
35 |     topk_index = ctx.method_return[1]
36 | 
37 | 
38 |     output._trt = topk_index._trt
39 |     ctx.method_return = output
40 | 
41 |     # keepdim
42 |     if not keepdim and topk_index.shape[dim]==1 and len(topk_index.shape)>1:
43 |         ctx.method_args = [topk_index, dim]
44 |         ctx.method_return = output
45 |         convert_squeeze(ctx)
46 |     ctx.method_args = old_args    
47 | 
48 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/argmin.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | from torch2trt_dynamic.torch2trt_dynamic import *
 3 | from torch2trt_dynamic.module_test import add_module_test
 4 | from .flatten import *
 5 | from .topk import *
 6 | from .squeeze import *
 7 | 
 8 | 
 9 | @tensorrt_converter('torch.Tensor.argmin')
10 | @tensorrt_converter('torch.argmin')
11 | def convert_argmin(ctx):
12 |     
13 |     old_args = ctx.method_args
14 |     input = ctx.method_args[0]
15 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
16 |     keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
17 | 
18 |     output = ctx.method_return
19 | 
20 |     # dim is None
21 |     if dim is None:
22 |         input_flatten = input.flatten()
23 |         ctx.method_args = [input]
24 |         ctx.method_return = input_flatten
25 |         convert_flatten(ctx)
26 |         input = ctx.method_return
27 |         dim = 0
28 |     
29 |     # topk
30 |     topk_output = input.topk(1, dim, largest=False)
31 |     topk_input = [input, 1, dim, False]
32 |     ctx.method_args = topk_input
33 |     ctx.method_return = topk_output
34 |     convert_topk(ctx)
35 |     topk_index = ctx.method_return[1]
36 | 
37 | 
38 |     output._trt = topk_index._trt
39 |     ctx.method_return = output
40 | 
41 |     # keepdim
42 |     if not keepdim and topk_index.shape[dim]==1 and len(topk_index.shape)>1:
43 |         ctx.method_args = [topk_index, dim]
44 |         ctx.method_return = output
45 |         convert_squeeze(ctx)
46 |     ctx.method_args = old_args
47 | 
48 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/avg_pool2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.avg_pool2d')
 6 | def convert_avg_pool2d(ctx):
 7 |     # parse args
 8 |     input = get_arg(ctx, 'input', pos=0, default=None)
 9 |     kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None)
10 |     stride = get_arg(ctx, 'stride', pos=2, default=None)
11 |     padding = get_arg(ctx, 'padding', pos=3, default=0)
12 |     ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False)
13 |     count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True)
14 |     
15 |     # get input trt tensor (or create constant if it doesn't exist)
16 |     input_trt = trt_(ctx.network, input)
17 |     
18 |     output = ctx.method_return
19 | 
20 |     # get kernel size
21 |     if not isinstance(kernel_size, tuple):
22 |         kernel_size = (kernel_size, ) * 2
23 | 
24 |     # get stride
25 |     if not isinstance(stride, tuple):
26 |         stride = (stride, ) * 2
27 | 
28 |     # get padding
29 |     if not isinstance(padding, tuple):
30 |         padding = (padding, ) * 2
31 | 
32 |     layer = ctx.network.add_pooling(
33 |         input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size)
34 |     
35 |     layer.stride = stride
36 |     layer.padding = padding
37 |     layer.average_count_excludes_padding = not count_include_pad
38 |     
39 |     if ceil_mode:
40 |         layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP
41 | 
42 |     output._trt = layer.get_output(0)
43 |     
44 |     
45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)])
46 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)])
47 | def test_avg_pool2d_without_ceil_mode():
48 |     return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
49 | 
50 | 
51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)])
52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)])
53 | def test_avg_pool2d_with_ceil_mode():
54 |     return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True
55 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/cast_type.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | from torch2trt_dynamic.torch2trt_dynamic import *
 3 | 
 4 | 
 5 | def convert_type(ctx, data_type):
 6 |     input = ctx.method_args[0]
 7 |     output = ctx.method_return
 8 | 
 9 |     input_trt = trt_(ctx.network, input)
10 | 
11 |     layer = ctx.network.add_identity(input_trt)
12 |     layer.set_output_type(0, data_type)
13 |     output._trt = layer.get_output(0)
14 | 
15 | 
16 | @tensorrt_converter('torch.Tensor.long')
17 | @tensorrt_converter('torch.Tensor.int')
18 | def convert_int(ctx):
19 |     convert_type(ctx, trt.DataType.INT32)
20 |     convert_type(ctx, trt.DataType.INT32)
21 | 
22 | @tensorrt_converter('torch.Tensor.float')
23 | def convert_float(ctx):
24 |     convert_type(ctx, trt.DataType.FLOAT)
25 |     convert_type(ctx, trt.DataType.FLOAT)
26 | 
27 | # @tensorrt_converter('torch.Tensor.char')
28 | # def convert_char(ctx):
29 | #     convert_type(ctx, trt.DataType.CHAR)
30 | 
31 | 
32 | # @tensorrt_converter('torch.Tensor.half')
33 | # def convert_half(ctx):
34 | #     convert_type(ctx, trt.DataType.HALF)
35 | 
36 | 
37 | @tensorrt_converter('torch.Tensor.bool')
38 | def convert_bool(ctx):
39 |     convert_type(ctx, trt.DataType.BOOL)
40 | 
41 | 
42 | 
43 | @tensorrt_converter('torch.Tensor.type_as')
44 | def convert_type_as(ctx):
45 |     input = ctx.method_args[0]
46 |     other = ctx.method_args[1]
47 |     output = ctx.method_return
48 | 
49 |     input_trt = trt_(ctx.network, input)
50 |     other_trt = trt_(ctx.network, other)
51 | 
52 |     layer = ctx.network.add_identity(input_trt)
53 |     layer.set_output_type(0, other_trt.dtype)
54 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/cat.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.cat')
 5 | def convert_cat(ctx):
 6 |     inputs = ctx.method_args[0]
 7 | 
 8 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
 9 |     if dim<0:
10 |         dim = len(inputs[0].shape)+dim
11 | 
12 |     output = ctx.method_return
13 |     trt_inputs = [trt_(ctx.network, i) for i in inputs]
14 | 
15 |     layer = ctx.network.add_concatenation(inputs=trt_inputs)
16 | 
17 |     layer.axis = dim
18 |     output._trt = layer.get_output(0)
19 | 
20 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/conv2d.py:
--------------------------------------------------------------------------------
 1 | ### copy from https://github.com/yuzhiyiliu/torch2trt/blob/origin/torch.nn.functional.conv2d_support/torch2trt/converters/conv2d.py
 2 | 
 3 | 
 4 | from torch2trt_dynamic.torch2trt_dynamic import *
 5 | from .Conv2d import *
 6 | 
 7 | @tensorrt_converter('torch.nn.functional.conv2d')
 8 | def convert_conv2d(ctx):
 9 |     weight = get_arg(ctx, 'weight', pos=1, default=None)
10 |     bias = get_arg(ctx, 'bias', pos=2, default=None)
11 |     in_channels  = weight.size()[1]
12 |     out_channels = weight.size()[0]
13 |     kernel_size  = tuple(weight.size()[2:4])
14 |     stride       = get_arg(ctx, 'stride', pos=3, default=None)
15 |     padding      = get_arg(ctx, 'padding', pos=4, default=None)
16 |     dilation     = get_arg(ctx, 'dilation', pos=5, default=None)
17 |     groups       = get_arg(ctx, 'groups', pos=6, default=None)
18 |     need_bias = False if bias is None else True
19 | 
20 |     module = torch.nn.Conv2d(
21 |         in_channels=in_channels,
22 |         out_channels=out_channels,
23 |         kernel_size=kernel_size,
24 |         stride=stride,
25 |         padding=padding,
26 |         dilation=dilation,
27 |         groups=groups,
28 |         bias=need_bias)
29 |     module.weight = weight
30 |     module.bias = bias
31 | 
32 |     ctx.method_args = (module, ctx.method_args[0])
33 |     convert_Conv2d(ctx)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/cummax.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | from torch2trt_dynamic.plugins import *
 4 | 
 5 | @tensorrt_converter('torch.cummax')
 6 | @tensorrt_converter('torch.Tensor.cummax')
 7 | def convert_cummax(ctx):
 8 |     input = ctx.method_args[0]
 9 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
10 |     cum_type = 0
11 | 
12 |     if dim<0:
13 |         dim = len(input.shape)+dim
14 | 
15 |     input_trt = trt_(ctx.network, input)
16 |     output = ctx.method_return
17 | 
18 |     plugin = create_torchcummaxmin_plugin("cummax_" + str(id(input)),
19 |                                             dim=dim,
20 |                                             cum_type=cum_type
21 |                                             )
22 |     
23 |     custom_layer = ctx.network.add_plugin_v2(
24 |         inputs=[input_trt], plugin=plugin)
25 | 
26 |     output[0]._trt = custom_layer.get_output(0)
27 |     output[1]._trt = custom_layer.get_output(1)
28 | 
29 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/cummin.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | from ..plugins import *
 4 | 
 5 | @tensorrt_converter('torch.cummin')
 6 | @tensorrt_converter('torch.Tensor.cummin')
 7 | def convert_cummin(ctx):
 8 |     input = ctx.method_args[0]
 9 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
10 |     cum_type = 1
11 | 
12 |     if dim<0:
13 |         dim = len(input.shape)+dim
14 |     input_trt = trt_(ctx.network, input)
15 |     output = ctx.method_return
16 | 
17 |     plugin = create_torchcummaxmin_plugin("cummin_" + str(id(input)),
18 |                                             dim=dim,
19 |                                             cum_type=cum_type
20 |                                             )
21 |     
22 |     custom_layer = ctx.network.add_plugin_v2(
23 |         inputs=[input_trt], plugin=plugin)
24 | 
25 |     output[0]._trt = custom_layer.get_output(0)
26 |     output[1]._trt = custom_layer.get_output(1)
27 | 
28 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/cumprod.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | from ..plugins import *
 4 | 
 5 | @tensorrt_converter('torch.cumprod')
 6 | @tensorrt_converter('torch.Tensor.cumprod')
 7 | def convert_cumprod(ctx):
 8 |     input = ctx.method_args[0]
 9 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
10 |     cum_type = 1
11 | 
12 |     if dim<0:
13 |         dim = len(input.shape)+dim
14 |     input_trt = trt_(ctx.network, input)
15 |     output = ctx.method_return
16 | 
17 |     plugin = create_torchcum_plugin("cumprod_" + str(id(input)),
18 |                                             dim=dim,
19 |                                             cum_type=cum_type
20 |                                             )
21 |     
22 |     custom_layer = ctx.network.add_plugin_v2(
23 |         inputs=[input_trt], plugin=plugin)
24 | 
25 |     output._trt = custom_layer.get_output(0)
26 | 
27 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/cumsum.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | from ..plugins import *
 4 | 
 5 | @tensorrt_converter('torch.cumsum')
 6 | @tensorrt_converter('torch.Tensor.cumsum')
 7 | def convert_cumsum(ctx):
 8 |     input = ctx.method_args[0]
 9 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
10 |     cum_type = 0
11 | 
12 |     if dim<0:
13 |         dim = len(input.shape)+dim
14 |     input_trt = trt_(ctx.network, input)
15 |     output = ctx.method_return
16 | 
17 |     plugin = create_torchcum_plugin("cumsum_" + str(id(input)),
18 |                                             dim=dim,
19 |                                             cum_type=cum_type
20 |                                             )
21 |     
22 |     custom_layer = ctx.network.add_plugin_v2(
23 |         inputs=[input_trt], plugin=plugin)
24 | 
25 |     output._trt = custom_layer.get_output(0)
26 | 
27 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/deform_conv2d.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from .Conv2d import convert_Conv2d
 3 | 
 4 | from ..plugins import *
 5 | import torchvision.ops
 6 | 
 7 | @tensorrt_converter('torchvision.ops.deform_conv.deform_conv2d')
 8 | def convert_deform_conv2d(ctx):
 9 | 
10 |     input = get_arg(ctx, 'input', pos=0, default=None)
11 |     offset = get_arg(ctx, 'offset', pos=1, default=None)
12 |     weight = get_arg(ctx, 'weight', pos=2, default=None)
13 |     bias = get_arg(ctx, 'bias', pos=3, default=None)
14 |     stride = get_arg(ctx, 'stride', pos=4, default=1)
15 |     padding = get_arg(ctx, 'padding', pos=5, default=0)
16 |     dilation = get_arg(ctx, 'dilation', pos=6, default=1)
17 |     # groups = get_arg(ctx, 'groups', pos=6, default=1)
18 |     # deform_groups = get_arg(ctx, 'deform_groups', pos=7, default=1)
19 |     groups=1
20 | 
21 |     output = ctx.method_return
22 | 
23 |     input_trt = trt_(ctx.network, input)
24 |     offset_trt = trt_(ctx.network, offset)
25 | 
26 |     kernel_size = weight.shape[2]
27 |     if not isinstance(kernel_size, tuple):
28 |         kernel_size = (kernel_size, ) * 2
29 | 
30 |     if not isinstance(stride, tuple):
31 |         stride = (stride, ) * 2
32 | 
33 |     if not isinstance(padding, tuple):
34 |         padding = (padding, ) * 2
35 | 
36 |     if not isinstance(dilation, tuple):
37 |         dilation = (dilation, ) * 2
38 | 
39 |     deform_groups=int(offset.shape[1]//(2*kernel_size[0]*kernel_size[1]))
40 | 
41 |     kernel = weight.detach().cpu().numpy()
42 |     out_channels = output.shape[1]
43 |     
44 |     bias = bias.detach().cpu().numpy()
45 | 
46 |     plugin = create_dcn_plugin("dcn_" + str(id(input)),
47 |                                out_channels=out_channels,
48 |                                kernel_size=kernel_size,
49 |                                W=kernel,
50 |                                B=bias,
51 |                                padding=padding,
52 |                                stride=stride,
53 |                                dilation=dilation,
54 |                                deformable_group=deform_groups,
55 |                                group=groups
56 |                                )
57 |                                
58 |     custom_layer = ctx.network.add_plugin_v2(
59 |         inputs=[input_trt, offset_trt], plugin=plugin)
60 |     
61 |     output._trt = custom_layer.get_output(0)
62 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/dummy_converters.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | 
 4 | def is_private(method):
 5 |     method = method.split('.')[-1]  # remove prefix
 6 |     return method[0] == '_' and method[1] is not '_'
 7 | 
 8 | def is_function_type(method):
 9 |     fntype =  eval(method + '.__class__.__name__')
10 |     return fntype == 'function' or fntype == 'builtin_function_or_method' or fntype == 'method_descriptor'
11 | 
12 | def get_methods(namespace):
13 |     methods = []
14 |     for method in dir(eval(namespace)):
15 |         full_method = namespace + '.' + method
16 |         if not is_private(full_method) and is_function_type(full_method):
17 |             methods.append(full_method)
18 |     return methods
19 | 
20 | 
21 | TORCH_METHODS = []
22 | TORCH_METHODS += get_methods('torch')
23 | TORCH_METHODS += get_methods('torch.Tensor')
24 | TORCH_METHODS += get_methods('torch.nn.functional')
25 | 
26 | 
27 | for method in TORCH_METHODS:
28 |     
29 |     @tensorrt_converter(method, is_real=False)
30 |     def warn_method(ctx):
31 |         print('Warning: Encountered known unsupported method %s' % ctx.method_str)
32 |         
33 | 
34 | @tensorrt_converter('torch.Tensor.dim', is_real=False)
35 | @tensorrt_converter('torch.Tensor.size', is_real=False)
36 | def dont_warn(ctx):
37 |     pass


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/expand.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | from ..torch2trt_dynamic import *
 3 | from ..module_test import add_module_test
 4 | from .repeat import *
 5 | from .exview import convert_exview
 6 | 
 7 | 
 8 | @tensorrt_converter('torch.Tensor.expand')
 9 | def convert_expand(ctx):
10 |     
11 |     old_args = ctx.method_args
12 |     input = ctx.method_args[0]
13 |     if isinstance(ctx.method_args[1:], int):
14 |         sizes = ctx.method_args[1:]
15 |     else:
16 |         sizes = ctx.method_args[1]
17 | 
18 |     output = ctx.method_return
19 | 
20 |     repeat_shape = []
21 |     for i in range(output.dim()):
22 |         if i < output.dim()-input.dim():
23 |             repeat_shape.append(output.shape[i])
24 |         else:
25 |             repeat_shape.append(output.shape[i]//input.shape[i+input.dim()-output.dim()])
26 |     
27 |     ctx.method_args = [input]+repeat_shape
28 |     ctx.method_return = output
29 |     convert_repeat(ctx)
30 |     ctx.method_args=old_args
31 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/expand_as.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | from ..plugins import *
 4 | 
 5 | @tensorrt_converter('torch.Tensor.expand_as')
 6 | def convert_expand_as(ctx):
 7 |     input = ctx.method_args[0]
 8 |     other = get_arg(ctx, 'other', pos=1, default=None)
 9 | 
10 |     input_trt = trt_(ctx.network, input)
11 |     other_trt = trt_(ctx.network, other)
12 |     output = ctx.method_return
13 | 
14 |     plugin = create_repeat_plugin("repeat_" + str(id(input)),
15 |                                   repeat_shape=[]
16 |                                   )
17 |                                   
18 |     custom_layer = ctx.network.add_plugin_v2(
19 |         inputs=[input_trt, other_trt], plugin=plugin)
20 | 
21 |     output._trt = custom_layer.get_output(0)
22 |     


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/flip.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..plugins import *
 3 | 
 4 | @tensorrt_converter('torch.flip')
 5 | @tensorrt_converter('torch.Tensor.flip')
 6 | def convert_flip(ctx):
 7 |     input = ctx.method_args[0]
 8 |     dims = get_arg(ctx, 'dims', pos=1, default=0)
 9 |     if isinstance(dims, int):
10 |         dims = [dims]
11 |     
12 |     dims = [len(input.shape)+dim if dim<0 else dim for dim in dims]
13 | 
14 |     input_trt = trt_(ctx.network, input)
15 |     output = ctx.method_return
16 | 
17 |     plugin = create_torchflip_plugin("flip_" + str(id(input)),
18 |                                   dims=dims
19 |                                   )
20 |     
21 |     custom_layer = ctx.network.add_plugin_v2(
22 |         inputs=[input_trt], plugin=plugin)
23 | 
24 |     output._trt = custom_layer.get_output(0)
25 | 
26 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/floor_divide.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.floor_divide')
 6 | @tensorrt_converter('torch.Tensor.floor_divide')
 7 | @tensorrt_converter('torch.Tensor.floor_divide_')
 8 | @tensorrt_converter('torch.Tensor.__floordiv__')
 9 | @tensorrt_converter('torch.Tensor.__ifloordiv__')
10 | def convert_floor_div(ctx):
11 |     input_a = ctx.method_args[0]
12 |     input_b = ctx.method_args[1]
13 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
14 |     output = ctx.method_return
15 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV)
16 |     output._trt = layer.get_output(0)
17 | 
18 | 
19 | @tensorrt_converter('torch.Tensor.__rfloordiv__')
20 | def convert_rfloor_div(ctx):
21 |     input_a = ctx.method_args[1]  # inputs switched for rdiv
22 |     input_b = ctx.method_args[0]
23 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
24 |     output = ctx.method_return
25 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV)
26 |     output._trt = layer.get_output(0)
27 |     
28 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/full_like.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from .mul import convert_mul
 3 | from .add import convert_add
 4 | from .cast_type import *
 5 | 
 6 | @tensorrt_converter('torch.full_like')
 7 | def convert_full_like(ctx):
 8 |     input = ctx.method_args[0]
 9 |     fill_value = get_arg(ctx, "fill_value", pos=1, default=0)
10 |     dtype = get_arg(ctx, 'dtype', pos=3, default=torch.float32)
11 |     output = ctx.method_return
12 |     input_trt = trt_(ctx.network, input)
13 | 
14 |     old_method_args = ctx.method_args
15 |     old_method_kwargs = ctx.method_kwargs
16 | 
17 |     # mul zero
18 |     input_mul_zero = input*0
19 |     ctx.method_args = [input, 0]
20 |     ctx.method_kwargs = {}
21 |     ctx.method_return = input_mul_zero
22 |     convert_mul(ctx)
23 | 
24 |     # add fill_value
25 |     input_add_one = input_mul_zero+fill_value
26 |     ctx.method_args = [input_mul_zero, fill_value]
27 |     ctx.method_kwargs = {}
28 |     ctx.method_return = input_add_one
29 |     convert_add(ctx)
30 | 
31 |     convert_type_func = None
32 |     if dtype==torch.float32:
33 |         convert_type_func = convert_float
34 |     elif dtype==torch.int32 or dtype==torch.long:
35 |         convert_type_func = convert_int
36 |     elif dtype==torch.bool:
37 |         convert_type_func = convert_bool
38 |     else:
39 |         print("unsupported convert type:{}".format(dtype))
40 |     
41 |     if convert_type_func is not None:
42 |         input_as_type = input_add_one.to(dtype)
43 |         ctx.method_args = [input_add_one, dtype]
44 |         ctx.method_return = input_as_type
45 |         convert_type_func(ctx)
46 |         ctx.method_args = [input_as_type, 0]
47 |         ctx.method_kwargs = {}
48 |         ctx.method_return = output
49 |         convert_add(ctx)
50 | 
51 |     ctx.method_args = old_method_args
52 |     ctx.method_kwargs = old_method_kwargs
53 |     ctx.method_return = output


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/gather.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..plugins import *
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.gather')
 6 | @tensorrt_converter('torch.gather')
 7 | def convert_gather(ctx):
 8 |     inputs = ctx.method_args[0]
 9 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
10 |     index = get_arg(ctx, 'index', pos=2, default=None)
11 |     output = ctx.method_return
12 |     
13 |     inputs_trt = trt_(ctx.network, inputs)
14 |     index_trt = trt_(ctx.network, index)
15 | 
16 |     plugin = create_torchgather_plugin("torch_gather_"+str(id(inputs)),
17 |                                         dim=dim)
18 |             
19 |     layer = ctx.network.add_plugin_v2(
20 |         inputs=[inputs_trt, index_trt], plugin=plugin)
21 | 
22 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/grid_sample.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..plugins import *
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.grid_sample')
 6 | def convert_grid_sample(ctx):
 7 |     input = ctx.method_args[0]
 8 |     grid = get_arg(ctx, 'grid', pos=1, default=None)
 9 |     mode = get_arg(ctx, 'mode', pos=2, default='bilinear')
10 |     padding_mode = get_arg(ctx, 'padding_mode', pos=3, default='zeros')
11 |     align_corners = get_arg(ctx, 'align_corners', pos=4, default=False)
12 | 
13 |     output = ctx.method_return
14 |     
15 |     input_trt = trt_(ctx.network, input)
16 |     grid_trt = trt_(ctx.network, grid)
17 | 
18 |     if mode == 'bilinear':
19 |         mode = trt.ResizeMode.LINEAR
20 |     elif mode == 'nearest':
21 |         mode = trt.ResizeMode.NEAREST
22 |     
23 |     if padding_mode == 'zeros':
24 |         padding_mode = 0
25 |     elif padding_mode == 'border':
26 |         padding_mode = 1
27 |     elif padding_mode == 'reflection':
28 |         padding_mode = 2
29 | 
30 |     plugin = create_gridsample_plugin("torch_gridsample_"+str(id(input)),
31 |                                         mode=mode,
32 |                                         padding_mode=padding_mode,
33 |                                         align_corners=align_corners)
34 |             
35 |     layer = ctx.network.add_plugin_v2(
36 |         inputs=[input_trt, grid_trt], plugin=plugin)
37 | 
38 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/identity.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.Tensor.cuda')
 5 | @tensorrt_converter('torch.Tensor.detach')
 6 | @tensorrt_converter('torch.Tensor.contiguous')
 7 | @tensorrt_converter('torch.nn.functional.dropout')
 8 | @tensorrt_converter('torch.nn.functional.dropout2d')
 9 | @tensorrt_converter('torch.nn.functional.dropout3d')
10 | def convert_identity(ctx):
11 |     input = ctx.method_args[0]
12 |     input_trt = trt_(ctx.network, input)
13 |     output = ctx.method_return
14 |     output._trt = input_trt
15 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/index_select.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | import tensorrt as trt
 3 | 
 4 | @tensorrt_converter('torch.index_select')
 5 | @tensorrt_converter('torch.Tensor.index_select')
 6 | def convert_index_select(ctx):
 7 |     input = ctx.method_args[0]
 8 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
 9 |     index = get_arg(ctx, 'index', pos=2, default=None)
10 |     
11 |     input_trt = trt_(ctx.network, input)
12 |     index_trt = trt_(ctx.network, index)
13 |     output = ctx.method_return
14 | 
15 |     layer = ctx.network.add_gather(input_trt, index_trt, dim)
16 |     output._trt = layer.get_output(0)
17 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/interpolate/__init__.py:
--------------------------------------------------------------------------------
1 | from .interpolate import *
2 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/interpolate/interpolate.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | 
 4 | package torch2trt;
 5 | 
 6 | enum DataTypeMessage {
 7 |   kFloat = 0;
 8 |   kHalf = 1;
 9 |   kInt8 = 2;
10 |   kInt32 = 3;
11 | }
12 | 
13 | 
14 | message interpolate_Message {
15 |   repeated int64 size = 1;
16 |   string mode = 2;
17 |   bool align_corners = 3;
18 |   
19 |   // below params are configured by TRT and not set by user
20 |   DataTypeMessage dtype = 4;
21 |   repeated int64 input_size = 5;
22 |   repeated int64 output_size = 6;
23 | }
24 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/linear.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..module_test import add_module_test
 3 | import torch
 4 | from .t import convert_t
 5 | from .matmul import convert_matmul
 6 | from .sum import convert_sum
 7 | 
 8 | 
 9 | @tensorrt_converter('torch.nn.functional.linear')
10 | def convert_linear(ctx):
11 |     old_method_args = ctx.method_args
12 |     old_method_kwargs = ctx.method_kwargs
13 | 
14 |     input = ctx.method_args[0]
15 |     weight = get_arg(ctx, 'weight', pos=1, default=None)
16 |     bias = get_arg(ctx, 'bias', pos=2, default=None)
17 |     output = ctx.method_return
18 | 
19 |     # transpose weight
20 |     weight_transpose = weight.t()
21 |     ctx.method_args = [weight]
22 |     ctx.method_kwargs = {}
23 |     ctx.method_return = weight_transpose
24 |     convert_t(ctx)
25 | 
26 |     # matmul
27 |     matmul_output = input.matmul(weight_transpose)
28 |     ctx.method_args = [input, weight]
29 |     ctx.method_kwargs = {}
30 |     ctx.method_return = matmul_output
31 |     convert_matmul(ctx)
32 | 
33 |     # add bias
34 |     if bias is not None:
35 |         add_bias_output = matmul_output + bias
36 |         ctx.method_args = [matmul_output, bias]
37 |         ctx.method_return = add_bias_output
38 |         convert_sum(ctx)
39 |         output._trt = add_bias_output._trt
40 |     else:
41 |         output._trt = matmul_output._trt
42 | 
43 | 
44 |     ctx.method_args = old_method_args
45 |     ctx.method_kwargs = old_method_kwargs
46 |     ctx.method_return = output
47 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/linspace.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | 
 3 | @tensorrt_converter('torch.linspace')
 4 | def convert_linspace(ctx):
 5 |     start = get_arg(ctx, 'start', pos=0, default=0)
 6 |     end = get_arg(ctx, 'end', pos=1, default=1)
 7 |     steps = get_arg(ctx, 'steps', pos=2, default=2)
 8 |     dtype = get_arg(ctx, 'dtype', pos=4, default=None)
 9 |     
10 |     output = ctx.method_return
11 |     dtype = output.dtype
12 |     if dtype==torch.int64:
13 |         dtype = torch.int32
14 | 
15 |     # check const
16 |     is_const = True
17 |     is_const = False if hasattr(start, '_trt') or hasattr(end, '_trt') or hasattr(steps, '_trt') else is_const
18 | 
19 |     if is_const:
20 |         # create const value
21 |         output_trt = trt_(ctx.network, output)
22 |     
23 |     else:
24 |         ## create fill
25 | 
26 |         # compute shape
27 |         start_trt = trt_(ctx.network, start)
28 |         end_trt = trt_(ctx.network, end)
29 |         steps_trt = trt_(ctx.network, steps)
30 | 
31 |         length_trt = steps_trt
32 | 
33 |         # to float
34 |         one_trt = trt_(ctx.network, torch.tensor([1], dtype=torch.float32))
35 |         start_trt = trt_cast(ctx.network, start_trt, trt.DataType.FLOAT)
36 |         end_trt = trt_cast(ctx.network, end_trt, trt.DataType.FLOAT)
37 |         steps_trt = trt_cast(ctx.network, steps_trt, trt.DataType.FLOAT)
38 |         
39 |         # length = (end - start + step - 1) // step
40 |         step_trt = ctx.network.add_elementwise(end_trt, start_trt, trt.ElementWiseOperation.SUB).get_output(0)
41 |         step_div_trt = ctx.network.add_elementwise(steps_trt, one_trt, trt.ElementWiseOperation.SUB).get_output(0)
42 |         step_trt = ctx.network.add_elementwise(step_trt, step_div_trt, trt.ElementWiseOperation.DIV).get_output(0)
43 | 
44 |         # start rank 0
45 |         layer = ctx.network.add_shuffle(start_trt)
46 |         layer.reshape_dims = tuple()
47 |         start_trt = layer.get_output(0)
48 | 
49 |         layer = ctx.network.add_fill(output.shape, trt.FillOperation.LINSPACE)
50 |         layer.set_input(0, length_trt)
51 |         layer.set_input(1, start_trt)
52 |         layer.set_input(2, step_trt)
53 |         output_trt = layer.get_output(0)
54 | 
55 |     # cast data type
56 |     data_type = torch_dtype_to_trt(dtype)
57 | 
58 |     if data_type is not None:
59 |         layer = ctx.network.add_identity(output_trt)
60 |         layer.set_output_type(0, data_type)
61 |         output_trt = layer.get_output(0)
62 | 
63 |     output._trt = output_trt
64 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/masked_fill.py:
--------------------------------------------------------------------------------
 1 | from ..torch2trt_dynamic import *
 2 | from ..module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.masked_fill', is_real=False)
 6 | @tensorrt_converter('torch.Tensor.masked_fill', is_real=False)
 7 | @tensorrt_converter('torch.Tensor.masked_fill_', is_real=False)
 8 | def convert_masked_fill(ctx):
 9 |     input = ctx.method_args[0]
10 |     mask = get_arg(ctx, 'mask', pos=1, default=None)
11 |     value = get_arg(ctx, 'value', pos=2, default=0)
12 |     output = ctx.method_return
13 | 
14 |     float_mask = mask.type_as(input)
15 |     result = input*(1-float_mask)+value*float_mask
16 | 
17 |     output._trt = result._trt
18 |     ctx.method_return = output


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/max_pool2d.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.max_pool2d')
 6 | def convert_max_pool2d(ctx):
 7 |     # parse args
 8 |     input = get_arg(ctx, 'input', pos=0, default=None)
 9 |     kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None)
10 |     stride = get_arg(ctx, 'stride', pos=2, default=None)
11 |     padding = get_arg(ctx, 'padding', pos=3, default=0)
12 |     dilation = get_arg(ctx, 'dilation', pos=4, default=1)
13 |     ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False)
14 |     
15 |     # get input trt tensor (or create constant if it doesn't exist)
16 |     input_trt = trt_(ctx.network, input)
17 |     
18 |     output = ctx.method_return
19 | 
20 |     # get kernel size
21 |     if not isinstance(kernel_size, tuple):
22 |         kernel_size = (kernel_size, ) * 2
23 | 
24 |     # get stride
25 |     if not isinstance(stride, tuple):
26 |         stride = (stride, ) * 2
27 | 
28 |     # get padding
29 |     if not isinstance(padding, tuple):
30 |         padding = (padding, ) * 2
31 | 
32 |     layer = ctx.network.add_pooling(
33 |         input=input_trt, type=trt.PoolingType.MAX, window_size=kernel_size)
34 |     
35 |     layer.stride = stride
36 |     layer.padding = padding
37 |     
38 |     if ceil_mode:
39 |         layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP
40 | 
41 |     output._trt = layer.get_output(0)
42 |     
43 |     
44 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)])
45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)])
46 | def test_MaxPool2d_without_ceil_mode():
47 |     return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
48 | 
49 | 
50 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)])
51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)])
52 | def test_MaxPool2d_with_ceil_mode():
53 |     return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/mean.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.mean')
 6 | @tensorrt_converter('torch.Tensor.mean')
 7 | def convert_mean(ctx):
 8 |     input = ctx.method_args[0]
 9 |     input_trt = trt_(ctx.network, input)
10 |     output = ctx.method_return
11 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
12 |     keep_dims = get_arg(ctx, 'keepdim', pos=2, default=False)
13 |     
14 |     # get dims from args or kwargs
15 |     if dim is None:
16 |         dim = tuple(range(len(input.shape)))
17 |         
18 |     # convert list to tuple
19 |     if isinstance(dim, list):
20 |         dim = tuple(dim)
21 |         
22 |     if not isinstance(dim, tuple):
23 |         dim = (dim, )
24 | 
25 |     dim = tuple([d if d>=0 else len(input.shape)+d for d in dim])
26 |         
27 |     # create axes bitmask for reduce layer
28 |     axes = 0
29 |     for d in dim:
30 |         axes |= 1<<d
31 | 
32 |     layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.AVG, axes, keep_dims)
33 |     output._trt = layer.get_output(0)
34 | 
35 |     
36 | class Mean(torch.nn.Module):
37 |     def __init__(self, dim, keepdim):
38 |         super(Mean, self).__init__()
39 |         self.dim = dim
40 |         self.keepdim = keepdim
41 |     def forward(self, x):
42 |         return x.mean(self.dim, self.keepdim)
43 |     
44 |     
45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
46 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
47 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
48 | def test_mean_channel():
49 |     return Mean(1, False)
50 | 
51 | 
52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
53 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
54 | def test_mean_tuple():
55 |     return Mean((1, 2), False)
56 | 
57 | 
58 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
59 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
61 | def test_mean_keepdim():
62 |     return Mean(1, True)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/meshgrid.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from torch2trt_dynamic.plugins import create_meshgrid_plugin
 4 | 
 5 | 
 6 | @tensorrt_converter("torch.meshgrid")
 7 | def convert_meshgrid(ctx):
 8 |     input_list = ctx.method_args
 9 |     output = ctx.method_return
10 | 
11 |     input_list_trt = [trt_(ctx.network, input_tensor) for input_tensor in input_list]
12 | 
13 |     num_inputs = len(input_list)
14 | 
15 | 
16 |     plugin = create_meshgrid_plugin("adaptive_meshgrid_"+str(id(input)),
17 |                                         num_inputs=num_inputs)
18 |             
19 |     layer = ctx.network.add_plugin_v2(
20 |         inputs=input_list_trt, plugin=plugin)
21 | 
22 |     for idx, out in enumerate(output):
23 |         out._trt = layer.get_output(idx)
24 | 
25 | 
26 | # from .repeat import convert_repeat
27 | # from .view import convert_view
28 | # @tensorrt_converter('torch.meshgrid')
29 | # def convert_meshgrid(ctx):
30 | 
31 | #     input_list = ctx.method_args
32 | #     output = ctx.method_return
33 | 
34 | #     num_inputs = len(input_list)
35 | 
36 | #     for i in range(num_inputs):
37 | #         tmp_in = input_list[i]
38 | #         tmp_out = output[i]
39 | 
40 | #         shape = [1]*num_inputs
41 | #         shape[i]=-1
42 | #         tmp_in_view = tmp_in.view(*shape)
43 | #         ctx.method_args = [tmp_in, *shape]
44 | #         ctx.method_return = tmp_in_view
45 | #         convert_view(ctx)
46 | 
47 | #         repeat_shape = [input.view(-1).shape[0] for input in input_list]
48 | #         repeat_shape[i] = 1
49 | #         ctx.method_args = [tmp_in_view, *repeat_shape]
50 | #         ctx.method_return = tmp_out
51 | #         convert_repeat(ctx)
52 | 
53 | #     ctx.method_args = input_list
54 | #     ctx.method_return = output
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/mod.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.__mod__')
 6 | def convert_mod(ctx):
 7 |     input_a = ctx.method_args[0]
 8 |     input_b = ctx.method_args[1]
 9 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
10 |     output = ctx.method_return
11 | 
12 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV)
13 |     floor_div_trt = layer.get_output(0)
14 | 
15 |     layer = ctx.network.add_elementwise(input_b_trt, floor_div_trt, trt.ElementWiseOperation.PROD)
16 |     prod_trt = layer.get_output(0)
17 | 
18 |     layer = ctx.network.add_elementwise(input_a_trt, prod_trt, trt.ElementWiseOperation.SUB)
19 |     output._trt = layer.get_output(0)
20 |     


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/mul.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.mul')
 6 | @tensorrt_converter('torch.Tensor.__imul__')
 7 | @tensorrt_converter('torch.Tensor.__mul__')
 8 | @tensorrt_converter('torch.Tensor.__rmul__')
 9 | def convert_mul(ctx):
10 |     input_a = ctx.method_args[0]
11 |     input_b = ctx.method_args[1]
12 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
13 |     output = ctx.method_return
14 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD)
15 |     output._trt = layer.get_output(0)
16 | 
17 | 
18 | class Mul(torch.nn.Module):
19 |     def __init__(self):
20 |         super(Mul, self).__init__()
21 | 
22 |     def forward(self, x, y):
23 |         return x * y
24 | 
25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
26 | def test_mul_basic():
27 |     return Mul()
28 | 
29 | 
30 | class IMul(torch.nn.Module):
31 |     def __init__(self):
32 |         super(IMul, self).__init__()
33 | 
34 |     def forward(self, x, y):
35 |         x *= y
36 |         return x
37 | 
38 | 
39 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
40 | def test_mul_imul():
41 |     return IMul()
42 | 
43 | 
44 | class TorchMul(torch.nn.Module):
45 |     def __init__(self):
46 |         super(TorchMul, self).__init__()
47 | 
48 |     def forward(self, x, y):
49 |         return torch.mul(x, y)
50 |     
51 | 
52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
53 | def test_mul_torchmul():
54 |     return TorchMul()
55 | 
56 | 
57 | class RMulInt(torch.nn.Module):
58 |     def __init__(self):
59 |         super(RMulInt, self).__init__()
60 | 
61 |     def forward(self, x):
62 |         return 10 * x
63 | 
64 | 
65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
66 | def test_rmul_int():
67 |     return RMulInt()
68 | 
69 | 
70 | class RMulFloat(torch.nn.Module):
71 |     def __init__(self):
72 |         super(RMulFloat, self).__init__()
73 | 
74 |     def forward(self, x):
75 |         return 10.0 * x
76 | 
77 | 
78 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
79 | def test_rmul_float():
80 |     return RMulFloat()


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/narrow.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.plugins import *
 3 | from .size import get_intwarper_trt
 4 | 
 5 | 
 6 | @tensorrt_converter('torch.Tensor.narrow')
 7 | @tensorrt_converter('torch.narrow')
 8 | def convert_narrow(ctx):
 9 |     input = ctx.method_args[0]
10 |     if 'dim' in ctx.method_kwargs:
11 |         dim = ctx.method_kwargs['dim']
12 |     elif 'dimension' in ctx.method_kwargs:
13 |         dim = ctx.method_kwargs['dimension']
14 |     else:
15 |         dim = ctx.method_args[1]
16 |     input_dim = input.dim()
17 |     if dim<0:
18 |         dim = dim+input_dim
19 | 
20 |     start = get_arg(ctx, 'start', pos=2, default=None)
21 |     length = get_arg(ctx, 'length', pos=3, default=None)
22 | 
23 |     
24 |     output = ctx.method_return
25 |     
26 |     input_trt = trt_(ctx.network, input)
27 | 
28 |     input_shape_trt = tensor_trt_get_shape_trt(ctx.network, input_trt)
29 |     start_trt = get_intwarper_trt(start, ctx)
30 |     length_trt = get_intwarper_trt(length, ctx)
31 |     stride_trt = trt_(ctx.network, torch.ones([input_dim]).int())
32 |     if dim!=0:
33 |         start_pre_trt = trt_(ctx.network, torch.zeros([dim,]).int())
34 |         start_trt = ctx.network.add_concatenation([start_pre_trt, start_trt]).get_output(0)
35 |         length_pre_trt = slice_shape_trt(ctx.network, input_shape_trt, 0, dim)
36 |         length_trt = ctx.network.add_concatenation([length_pre_trt, length_trt]).get_output(0)
37 |     if dim<input_dim-1:
38 |         start_post_trt = trt_(ctx.network, torch.zeros([input_dim-dim - 1]).int())
39 | 
40 |         start_trt = ctx.network.add_concatenation([start_trt, start_post_trt]).get_output(0)
41 |         length_post_trt = slice_shape_trt(ctx.network, input_shape_trt, dim+1)
42 |         length_trt = ctx.network.add_concatenation([length_trt, length_post_trt]).get_output(0)
43 |     
44 |     layer = ctx.network.add_slice(input_trt, [0]*input_dim, [1]*input_dim, [1]*input_dim)
45 |     layer.set_input(1, start_trt)
46 |     layer.set_input(2, length_trt)
47 |     layer.set_input(3, stride_trt)
48 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/new_ones.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.Tensor.new_ones')
 5 | def convert_new_ones(ctx):
 6 |     input = ctx.method_args[0]
 7 |     size = get_arg(ctx, 'size', pos=1, default=None)
 8 |     dtype = get_arg(ctx, 'dtype', pos=2, default=input.dtype)
 9 | 
10 |     output = ctx.method_return
11 | 
12 |     if isinstance(size, int):
13 |         size = (size, )
14 | 
15 |     # check const
16 |     is_const = True
17 |     for s in size:
18 |         if hasattr(s,'_trt'):
19 |             is_const = False
20 |             break
21 | 
22 |     if is_const:
23 |         # create const value
24 |         output_trt = trt_(ctx.network, output)
25 |     
26 |     else:
27 |         # create fill
28 |         trt_size = []
29 |         for s in size:
30 |             if hasattr(s, '_trt'):
31 |                 trt_size.append(s._trt)
32 |             else:
33 |                 trt_size.append(trt_(ctx.network, s))
34 |         
35 |         trt_size = ctx.network.add_concatenation(trt_size).get_output(0)
36 | 
37 |         layer = ctx.network.add_fill(size, trt.FillOperation.RANDOM_UNIFORM)
38 |         layer.set_input(0, trt_size)
39 |         layer.set_input(1, trt_(ctx.network, input.new_tensor(1)))
40 |         layer.set_input(2, trt_(ctx.network, input.new_tensor(1)))
41 | 
42 |         output_trt = layer.get_output(0)
43 | 
44 | 
45 |     data_type = None
46 |     if dtype==torch.float32:
47 |         data_type = trt.DataType.FLOAT
48 |     elif dtype==torch.int32 or dtype==torch.long:
49 |         data_type = trt.DataType.INT32
50 |     elif dtype==torch.bool:
51 |         data_type = trt.DataType.BOOL
52 |     else:
53 |         print("unsupported convert type:{}".format(dtype))
54 |     
55 |     if data_type is not None:
56 |         layer = ctx.network.add_identity(output_trt)
57 |         layer.set_output_type(0, data_type)
58 |         output_trt = layer.get_output(0)
59 | 
60 |     output._trt = output_trt
61 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/new_zeros.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | 
 4 | @tensorrt_converter('torch.Tensor.new_zeros')
 5 | def convert_new_zeros(ctx):
 6 |     input = ctx.method_args[0]
 7 |     size = get_arg(ctx, 'size', pos=1, default=None)
 8 |     dtype = get_arg(ctx, 'dtype', pos=2, default=input.dtype)
 9 | 
10 |     output = ctx.method_return
11 | 
12 |     if isinstance(size, int):
13 |         size = (size, )
14 | 
15 |     # check const
16 |     is_const = True
17 |     for s in size:
18 |         if hasattr(s,'_trt'):
19 |             is_const = False
20 |             break
21 | 
22 |     if is_const:
23 |         # create const value
24 |         output_trt = trt_(ctx.network, output)
25 |     
26 |     else:
27 |         # create fill
28 |         trt_size = []
29 |         for s in size:
30 |             if hasattr(s, '_trt'):
31 |                 trt_size.append(s._trt)
32 |             else:
33 |                 trt_size.append(trt_(ctx.network, s))
34 |         
35 |         trt_size = ctx.network.add_concatenation(trt_size).get_output(0)
36 | 
37 |         layer = ctx.network.add_fill(size, trt.FillOperation.RANDOM_UNIFORM)
38 |         layer.set_input(0, trt_size)
39 |         layer.set_input(1, trt_(ctx.network, input.new_tensor(0)))
40 |         layer.set_input(2, trt_(ctx.network, input.new_tensor(0)))
41 | 
42 |         output_trt = layer.get_output(0)
43 | 
44 | 
45 |     data_type = None
46 |     if dtype==torch.float32:
47 |         data_type = trt.DataType.FLOAT
48 |     elif dtype==torch.int32 or dtype==torch.long:
49 |         data_type = trt.DataType.INT32
50 |     elif dtype==torch.bool:
51 |         data_type = trt.DataType.BOOL
52 |     else:
53 |         print("unsupported convert type:{}".format(dtype))
54 |     
55 |     if data_type is not None:
56 |         layer = ctx.network.add_identity(output_trt)
57 |         layer.set_output_type(0, data_type)
58 |         output_trt = layer.get_output(0)
59 | 
60 |     output._trt = output_trt
61 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/nms.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.plugins import *
 3 | import torchvision.ops
 4 | 
 5 | 
 6 | @tensorrt_converter('torchvision.ops.nms')
 7 | def convert_nms(ctx):
 8 | 
 9 |     boxes = get_arg(ctx, 'boxes', pos=0, default=None)
10 |     scores = get_arg(ctx, 'scores', pos=1, default=None)
11 |     iou_threshold = get_arg(ctx, 'iou_threshold', pos=2, default=0.7)
12 | 
13 |     output = ctx.method_return
14 | 
15 |     boxes_trt = trt_(ctx.network, boxes)
16 |     scores_trt = trt_(ctx.network, scores)
17 | 
18 | 
19 |     plugin = create_nms_plugin("nms_" + str(id(boxes)),
20 |                                iou_threshold=iou_threshold
21 |                                )
22 | 
23 |     custom_layer = ctx.network.add_plugin_v2(
24 |         inputs=[boxes_trt, scores_trt], plugin=plugin)
25 |     
26 |     output._trt = custom_layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/ones.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from collections.abc import Iterable
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.ones')
 6 | def convert_ones(ctx):
 7 |     size = ctx.method_args[0]
 8 |     if not isinstance(size, Iterable):
 9 |         size = ctx.method_args
10 |     dtype = torch.float32
11 |     if "dtype" in ctx.method_kwargs:
12 |         dtype = ctx.method_kwargs["dtype"]
13 |     output = ctx.method_return
14 | 
15 |     if isinstance(size, int):
16 |         size = (size, )
17 | 
18 |     # check const
19 |     is_const = True
20 |     for s in size:
21 |         if hasattr(s, '_trt'):
22 |             is_const = False
23 |             break
24 | 
25 |     if is_const:
26 |         # create const value
27 |         output_trt = trt_(ctx.network, output)
28 | 
29 |     else:
30 |         # create fill
31 |         trt_size = []
32 |         for s in size:
33 |             if hasattr(s, '_trt'):
34 |                 trt_size.append(s._trt)
35 |             else:
36 |                 trt_size.append(trt_(ctx.network, s))
37 | 
38 |         trt_size = ctx.network.add_concatenation(trt_size).get_output(0)
39 | 
40 |         layer = ctx.network.add_fill(size, trt.FillOperation.RANDOM_UNIFORM)
41 |         layer.set_input(0, trt_size)
42 |         layer.set_input(
43 |             1, trt_(ctx.network,
44 |                     torch.tensor(1., dtype=dtype).cuda()))
45 |         layer.set_input(
46 |             2, trt_(ctx.network,
47 |                     torch.tensor(1., dtype=dtype).cuda()))
48 | 
49 |         output_trt = layer.get_output(0)
50 | 
51 |     data_type = None
52 |     if dtype == torch.float32:
53 |         data_type = trt.DataType.FLOAT
54 |     elif dtype == torch.int32 or dtype == torch.long:
55 |         data_type = trt.DataType.INT32
56 |     elif dtype == torch.bool:
57 |         data_type = trt.DataType.BOOL
58 |     else:
59 |         print("unsupported convert type:{}".format(dtype))
60 | 
61 |     if data_type is not None:
62 |         layer = ctx.network.add_identity(output_trt)
63 |         layer.set_output_type(0, data_type)
64 |         output_trt = layer.get_output(0)
65 | 
66 |     output._trt = output_trt
67 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/ones_like.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from .mul import convert_mul
 3 | from .add import convert_add
 4 | from .cast_type import *
 5 | 
 6 | @tensorrt_converter('torch.ones_like')
 7 | def convert_ones_like(ctx):
 8 |     input = ctx.method_args[0]
 9 |     dtype = get_arg(ctx, 'dtype', pos=1, default=torch.float32)
10 |     output = ctx.method_return
11 |     input_trt = trt_(ctx.network, input)
12 | 
13 |     old_method_args = ctx.method_args
14 |     old_method_kwargs = ctx.method_kwargs
15 | 
16 |     # mul zero
17 |     input_mul_zero = input*0
18 |     ctx.method_args = [input, 0]
19 |     ctx.method_kwargs = {}
20 |     ctx.method_return = input_mul_zero
21 |     convert_mul(ctx)
22 | 
23 |     # add one
24 |     input_add_one = input_mul_zero+1
25 |     ctx.method_args = [input_mul_zero, 1]
26 |     ctx.method_kwargs = {}
27 |     ctx.method_return = input_add_one
28 |     convert_add(ctx)
29 | 
30 |     convert_type_func = None
31 |     if dtype==torch.float32:
32 |         convert_type_func = convert_float
33 |     elif dtype==torch.int32 or dtype==torch.long:
34 |         convert_type_func = convert_int
35 |     elif dtype==torch.bool:
36 |         convert_type_func = convert_bool
37 |     else:
38 |         print("unsupported convert type:{}".format(dtype))
39 |     
40 |     if convert_type_func is not None:
41 |         input_as_type = input_add_one.to(dtype)
42 |         ctx.method_args = [input_add_one, dtype]
43 |         ctx.method_return = input_as_type
44 |         convert_type_func(ctx)
45 |         ctx.method_args = [input_as_type, 0]
46 |         ctx.method_kwargs = {}
47 |         ctx.method_return = output
48 |         convert_add(ctx)
49 | 
50 |     ctx.method_args = old_method_args
51 |     ctx.method_kwargs = old_method_kwargs
52 |     ctx.method_return = output


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/pad.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.pad')
 6 | def convert_pad(ctx):
 7 |     input = ctx.method_args[0]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 |     
11 |     pad = get_arg(ctx, 'pad', pos=1, default=[0,0,0,0])
12 |     pre_padding = (pad[2], pad[0])
13 |     post_padding = (pad[3], pad[1])
14 |     
15 |     # mode / value are ignored since not supported by TensorRT
16 |     
17 |     layer = ctx.network.add_padding(input_trt, pre_padding, post_padding)
18 |     output._trt = layer.get_output(0)
19 |     
20 | 
21 | class Pad(torch.nn.Module):
22 |     
23 |     def __init__(self, pad):
24 |         super(Pad, self).__init__()
25 |         self.pad = pad
26 |         
27 |     def forward(self, x):
28 |         return torch.nn.functional.pad(x, self.pad)
29 |     
30 |     
31 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
32 | def test_pad_basic():
33 |     return Pad((1, 2, 3, 4))


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/permute.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.permute')
 6 | def convert_permute(ctx):
 7 |     input = ctx.method_args[0]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 |     
11 |     # permutation -1 because TRT does not include batch dim
12 |     if isinstance(ctx.method_args[1], int):
13 |         permutation = tuple(ctx.method_args[1:])  # handle permute(a, b, c)
14 |     else:
15 |         permutation = tuple(ctx.method_args[1])   # handle permute([a, b, c])
16 | 
17 |         
18 |     # assert(permutation[0] == 0)  # cannot move batch dim
19 |     
20 |     # trt_permutation = tuple([p - 1 for p in permutation])[1:]
21 |     
22 |     trt_permutation = permutation
23 |     
24 |     layer = ctx.network.add_shuffle(input_trt)
25 |     layer.second_transpose = tuple(trt_permutation)
26 |     
27 |     output._trt = layer.get_output(0)
28 | 
29 | 
30 | class Permute(torch.nn.Module):
31 |     def __init__(self, *args):
32 |         super(Permute, self).__init__()
33 |         self.args = args
34 |     def forward(self, x):
35 |         return x.permute(*self.args).contiguous()
36 | 
37 | 
38 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
39 | def test_permute_2d_0123():
40 |     return Permute(0, 1, 2, 3)
41 | 
42 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)])
43 | def test_permute_2d_0312():
44 |     return Permute(0, 3, 1, 2)
45 | 
46 | 
47 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
48 | def test_permute_3d_01234():
49 |     return Permute(0, 1, 2, 3, 4)
50 | 
51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
52 | def test_permute_3d_04132():
53 |     return Permute(0, 4, 1, 3, 2)
54 | 
55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
56 | def test_permute_list():
57 |     return Permute([0, 4, 1, 3, 2])
58 | 
59 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5, 6)])
60 | def test_permute_tuple():
61 |     return Permute((0, 4, 1, 3, 2))


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/prelu.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.prelu')
 6 | def convert_prelu(ctx):
 7 |     input = get_arg(ctx, 'input', pos=0, default=None)
 8 |     weight = get_arg(ctx, 'weight', pos=1, default=None)
 9 |     output = ctx.method_return
10 |     
11 |     weight_shape = [1] * len(input.shape)
12 |     weight_shape[1] = weight.numel()
13 |     
14 |     input_trt = trt_(ctx.network, input)
15 |     
16 |    
17 |     # y = prelu(x) = relu(x) - alpha * relu(-x)
18 |     weight_trt = ctx.network.add_constant(weight_shape, -weight.detach().view(weight_shape).cpu().numpy()).get_output(0) # detach so considered leaf
19 |     
20 |     # x >= 0
21 |     a = ctx.network.add_activation(input_trt, trt.ActivationType.RELU).get_output(0)
22 |     
23 |     # x <= 0
24 |     b = ctx.network.add_unary(input_trt, trt.UnaryOperation.NEG).get_output(0)
25 |     b = ctx.network.add_activation(b, trt.ActivationType.RELU).get_output(0)
26 |     b = ctx.network.add_elementwise(b, weight_trt, trt.ElementWiseOperation.PROD).get_output(0)
27 |     
28 |     # y = a + b
29 |     y = ctx.network.add_elementwise(a, b, trt.ElementWiseOperation.SUM)
30 |     
31 |     output._trt = y.get_output(0)
32 | 
33 |     
34 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5)])
35 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)])
37 | def test_prelu_scalar():
38 |     return torch.nn.PReLU()
39 | 
40 | 
41 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5)])
42 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)])
43 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)])
44 | def test_prelu_vector():
45 |     m = torch.nn.PReLU(5)
46 |     m.weight = torch.nn.Parameter(torch.randn(5)) # randn so each channel different
47 |     return m


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/prod.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from .unary import UnaryModule
 4 |     
 5 | 
 6 | @tensorrt_converter('torch.prod')
 7 | @tensorrt_converter('torch.Tensor.prod')
 8 | def convert_prod(ctx):
 9 |     input = ctx.method_args[0]
10 |     dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim)))
11 |     keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
12 |     input_trt= trt_(ctx.network, input)
13 |     output = ctx.method_return
14 |     layer = ctx.network.add_reduce(input_trt,  trt.ReduceOperation.PROD, torch_dim_to_trt_axes(dim), keepdim)
15 |     output._trt = layer.get_output(0)
16 |         
17 |         
18 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
19 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
20 | def test_prod_reduce_all():
21 |     return UnaryModule(lambda x: torch.prod(x))     
22 | 
23 | 
24 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
26 | def test_prod_reduce_dim1():
27 |     return UnaryModule(lambda x: torch.prod(x, 1))
28 | 
29 | 
30 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
31 | def test_prod_reduce_dim22():
32 |     return UnaryModule(lambda x: torch.prod(x, 2))
33 | 
34 | 
35 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
37 | def test_prod_reduce_dim1_keepdim():
38 |     return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True))


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/relu.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from .ReLU import *
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.relu')
 6 | @tensorrt_converter('torch.relu_')
 7 | @tensorrt_converter('torch.nn.functional.relu')
 8 | @tensorrt_converter('torch.nn.functional.relu_')
 9 | def convert_relu(ctx):
10 |     ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args
11 |     convert_ReLU(ctx)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/relu6.py:
--------------------------------------------------------------------------------
1 | from torch2trt_dynamic.torch2trt_dynamic import *
2 | from .ReLU6 import *
3 | 
4 | 
5 | @tensorrt_converter('torch.nn.functional.relu6')
6 | def convert_relu6(ctx):
7 |     ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args
8 |     convert_ReLU6(ctx)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/repeat.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | 
 3 | from torch2trt_dynamic.plugins import *
 4 | 
 5 | @tensorrt_converter('torch.Tensor.repeat')
 6 | def convert_repeat(ctx):
 7 |     input = ctx.method_args[0]
 8 |     shape = ctx.method_args[1]
 9 |     if isinstance(shape, int):
10 |         shape = ctx.method_args[1:]
11 | 
12 |     input_trt = trt_(ctx.network, input)
13 |     output = ctx.method_return
14 | 
15 |     plugin = create_repeat_plugin("repeat_" + str(id(input)),
16 |                                   repeat_shape=shape
17 |                                   )
18 |     
19 |     custom_layer = ctx.network.add_plugin_v2(
20 |         inputs=[input_trt], plugin=plugin)
21 | 
22 |     output._trt = custom_layer.get_output(0)
23 | 
24 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.plugins import *
 3 | import torchvision.ops
 4 | 
 5 | 
 6 | @tensorrt_converter('torchvision.ops.roi_align')
 7 | def convert_roi_align(ctx):
 8 | 
 9 |     input = get_arg(ctx, 'input', pos=0, default=None)
10 |     boxes = get_arg(ctx, 'boxes', pos=1, default=None)
11 |     output_size = get_arg(ctx, 'output_size', pos=2, default=7)
12 |     spatial_scale = get_arg(ctx, 'spatial_scale', pos=3, default=1.)
13 |     sampling_ratio = get_arg(ctx, 'sampling_ratio', pos=4, default=-1)
14 |     aligned = get_arg(ctx, 'aligned', pos=5, default=False)
15 | 
16 |     output = ctx.method_return
17 | 
18 |     input_trt = trt_(ctx.network, input)
19 |     boxes_offset_trt, boxes_trt = trt_(ctx.network, 0.5/spatial_scale, boxes)
20 | 
21 |     plugin = create_roiextractor_plugin("roi_align_" + str(id(boxes)),
22 |                                         out_size = output_size,
23 |                                         sample_num = sampling_ratio,
24 |                                         featmap_strides = [1./spatial_scale],
25 |                                         roi_scale_factor = 1.,
26 |                                         finest_scale = 56,
27 |                                         aligned = 1 if aligned else 0
28 |                                )
29 | 
30 |     custom_layer = ctx.network.add_plugin_v2(
31 |         inputs=[boxes_trt, input_trt], plugin=plugin)
32 |     
33 |     output._trt = custom_layer.get_output(0)
34 | 
35 | 
36 | @tensorrt_converter('torchvision.ops.RoIAlign.forward')
37 | def convert_RoiAlign(ctx):
38 |     module = ctx.method_args[0]
39 |     input = get_arg(ctx, 'input', pos=1, default=None)
40 |     boxes = get_arg(ctx, 'boxes', pos=2, default=None)
41 | 
42 |     output_size = module.output_size
43 |     spatial_scale = module.spatial_scale
44 |     sampling_ratio = module.sampling_ratio
45 |     aligned = module.aligned
46 | 
47 |     old_method_args = ctx.method_args
48 |     old_method_kwargs = ctx.method_kwargs
49 |     new_method_args = [input, boxes, output_size, spatial_scale, sampling_ratio, aligned]
50 |     new_method_kwargs = {}
51 |     ctx.method_args = new_method_args
52 |     ctx.method_kwargs = new_method_kwargs
53 |     convert_roi_align(ctx)
54 |     ctx.method_args = old_method_args
55 |     ctx.method_kwargs = old_method_kwargs
56 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.plugins import *
 3 | import torchvision.ops
 4 | 
 5 | 
 6 | @tensorrt_converter('torchvision.ops.roi_pool')
 7 | def convert_roi_pool(ctx):
 8 |     input = get_arg(ctx, 'input', pos=0, default=None)
 9 |     boxes = get_arg(ctx, 'boxes', pos=1, default=None)
10 |     output_size = get_arg(ctx, 'output_size', pos=2, default=7)
11 |     spatial_scale = get_arg(ctx, 'spatial_scale', pos=3, default=1.)
12 | 
13 |     output = ctx.method_return
14 | 
15 |     input_trt = trt_(ctx.network, input)
16 |     boxes_trt = trt_(ctx.network, boxes)
17 | 
18 |     plugin = create_roipool_plugin("roi_pool_" + str(id(boxes)),
19 |                                         out_size = output_size,
20 |                                         featmap_strides = [1./spatial_scale],
21 |                                         roi_scale_factor = -1,
22 |                                         finest_scale = 56)
23 | 
24 |     custom_layer = ctx.network.add_plugin_v2(
25 |         inputs=[boxes_trt, input_trt], plugin=plugin)
26 |     
27 |     output._trt = custom_layer.get_output(0)
28 | 
29 | 
30 | @tensorrt_converter('torchvision.ops.RoIPool.forward')
31 | def convert_RoIPool(ctx):
32 |     module = ctx.method_args[0]
33 |     input = get_arg(ctx, 'input', pos=1, default=None)
34 |     boxes = get_arg(ctx, 'boxes', pos=2, default=None)
35 | 
36 |     output_size = module.output_size
37 |     spatial_scale = module.spatial_scale
38 | 
39 |     old_method_args = ctx.method_args
40 |     old_method_kwargs = ctx.method_kwargs
41 |     new_method_args = [input, boxes, output_size, spatial_scale]
42 |     new_method_kwargs = {}
43 |     ctx.method_args = new_method_args
44 |     ctx.method_kwargs = new_method_kwargs
45 |     convert_roi_pool(ctx)
46 |     ctx.method_args = old_method_args
47 |     ctx.method_kwargs = old_method_kwargs
48 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/sigmoid.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.sigmoid')
 6 | @tensorrt_converter('torch.sigmoid')
 7 | @tensorrt_converter('torch.Tensor.sigmoid')
 8 | def convert_sigmoid(ctx):
 9 |     input = ctx.method_args[0]
10 |     input_trt = trt_(ctx.network, input)
11 |     output = ctx.method_return
12 |     
13 |     layer = ctx.network.add_activation(input_trt, trt.ActivationType.SIGMOID)
14 |     output._trt = layer.get_output(0)
15 |     
16 | 
17 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
18 | def test_sigmoid_basic():
19 |     return torch.nn.Sigmoid()


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/softmax.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.softmax')
 6 | @tensorrt_converter('torch.softmax')
 7 | @tensorrt_converter('torch.nn.functional.softmax')
 8 | def convert_softmax(ctx):
 9 | 
10 |     input = ctx.method_args[0]
11 |     input_trt = trt_(ctx.network, input)
12 |     output = ctx.method_return
13 | 
14 |     # get dims from args or kwargs
15 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
16 |     if dim is None:
17 |         dim = -1
18 |     if dim<0:
19 |         dim = len(input.shape)+dim
20 | 
21 |     # axes = 1 << (dim - 1)
22 |     axes = 1<<dim
23 | 
24 |     layer = ctx.network.add_softmax(input=input_trt)
25 |     layer.axes = axes
26 | 
27 |     output._trt = layer.get_output(0)
28 | 
29 | 
30 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
31 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
32 | def test_softmax_module():
33 |     return torch.nn.Softmax(1)
34 | 
35 | 
36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
37 | def test_softmax_module_dim2():
38 |     return torch.nn.Softmax(2)
39 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/split.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.split')
 6 | @tensorrt_converter('torch.Tensor.split')
 7 | def convert_split(ctx):
 8 |     input = get_arg(ctx, 'input', 0, None)
 9 |     input_trt = trt_(ctx.network, input)
10 |     # we don't need to parse split/chunk (arg 1)
11 |     # since we infer size from output tensors
12 |     dim = get_arg(ctx, 'dim', 2, 0)
13 |     
14 |     outputs = ctx.method_return
15 |     
16 |     assert(dim >= 1)
17 |     
18 |     start = [0] * len(input.shape) # exclude batch
19 |     stride = [1] * len(start)
20 |     offset = 0
21 |     trt_dim = dim
22 |     
23 |     # add slice layers
24 |     for i, output in enumerate(outputs):
25 |         shape = list(output.shape)
26 |         start[trt_dim] = offset
27 |         layer = ctx.network.add_slice(input_trt, start=start, shape=shape, stride=stride)
28 |         output._trt = layer.get_output(0)
29 |         offset = offset + shape[trt_dim]
30 |         
31 | 
32 | class TorchSplit(torch.nn.Module):
33 |     
34 |     def __init__(self, *args, **kwargs):
35 |         super(TorchSplit, self).__init__()
36 |         self.args = args
37 |         self.kwargs = kwargs
38 |         
39 |     def forward(self, x):
40 |         return torch.split(x, *self.args, **self.kwargs)
41 |     
42 |     
43 | class TensorSplit(torch.nn.Module):
44 |     
45 |     def __init__(self, *args, **kwargs):
46 |         super(TensorSplit, self).__init__()
47 |         self.args = args
48 |         self.kwargs = kwargs
49 |         
50 |     def forward(self, x):
51 |         return x.split(*self.args, **self.kwargs)
52 |     
53 | 
54 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
56 | def test_torch_split_1_1():
57 |     return TorchSplit(1, 1)
58 | 
59 | 
60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
61 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
62 | def test_torch_split_2_1():
63 |     return TorchSplit(2, 1)
64 | 
65 |     
66 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
67 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
68 | def test_torch_split_3_1():
69 |     return TorchSplit(3, 1)
70 | 
71 | 
72 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
73 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
74 | def test_torch_split_3_2():
75 |     return TorchSplit(3, 2)
76 | 
77 | 
78 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
79 | def test_tensor_split_3_2():
80 |     return TensorSplit(3, 2)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/squeeze.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from .identity import *
 4 | 
 5 | 
 6 | @tensorrt_converter('torch.Tensor.squeeze')
 7 | @tensorrt_converter('torch.squeeze')
 8 | def convert_squeeze(ctx):
 9 |         
10 |     input = ctx.method_args[0]
11 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
12 |     if dim is None:
13 |         dim = list(filter(lambda x:input.shape[x]==1, range(len(input.shape))))
14 |     else:
15 |         if input.shape[dim]!=1:
16 |             ctx.method_args = [input]
17 |             convert_identity(ctx)
18 |             return
19 |         if dim <0:
20 |             dim = len(input.shape)+dim
21 |         dim = [dim]
22 |     input_trt = trt_(ctx.network, input)
23 |     shape_trt = ctx.network.add_shape(input_trt).get_output(0)
24 |     output = ctx.method_return
25 | 
26 |     reverse_dim = list(filter(lambda x: x not in dim, range(len(input.shape))))
27 |     reverse_dim_trt = trt_(ctx.network, torch.tensor(reverse_dim,dtype=torch.int32).to(input.device))
28 | 
29 |     new_shape_trt = ctx.network.add_gather(shape_trt, reverse_dim_trt, 0).get_output(0)
30 | 
31 |     layer = ctx.network.add_shuffle(input_trt)
32 |     layer.set_input(1, new_shape_trt)
33 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/stack.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from .cat import *
 3 | from .unsqueeze import *
 4 | 
 5 | 
 6 | @tensorrt_converter('torch.stack')
 7 | def convert_stack(ctx):
 8 |     inputs = ctx.method_args[0]
 9 |     dim = get_arg(ctx, 'dim', pos=1, default=0)
10 |     output = ctx.method_return
11 | 
12 |     unsqueeze_inputs = []
13 |     for input in inputs:
14 |         unsqueeze_input = input.unsqueeze(dim=dim)
15 |         ctx.method_args = (input, dim)
16 |         ctx.method_return = unsqueeze_input
17 |         convert_unsqueeze(ctx)
18 |         unsqueeze_inputs.append(unsqueeze_input)
19 |     
20 |     ctx.method_args = (unsqueeze_inputs, dim)
21 |     ctx.method_return = output
22 | 
23 |     convert_cat(ctx)
24 | 
25 | 
26 | # @tensorrt_converter('torch.stack')
27 | # def convert_stack(ctx):
28 | #     support_dynamic_shape = False
29 | #     if hasattr(ctx, "support_dynamic_shape"):
30 | #         support_dynamic_shape = ctx.support_dynamic_shape
31 |         
32 | #     inputs = ctx.method_args[0]
33 | 
34 | #     if 'dim' in ctx.method_kwargs:
35 | #         dim = ctx.method_kwargs['dim']
36 | #     else:
37 | #         dim = ctx.method_args[1]
38 | 
39 | #     output = ctx.method_return
40 | #     trt_inputs = [trt_(ctx.network, i) for i in inputs]
41 | 
42 | #     if dim==-1:
43 | #         dim = len(inputs[0].shape)
44 | #     shape = inputs[0].shape[:dim] + (1,) + inputs[0].shape[dim:]
45 | #     shape = tuple(shape)
46 | #     reshaped_trt_inputs = []
47 | #     for trt_input in trt_inputs:
48 | #         layer = ctx.network.add_shuffle(trt_input)
49 | #         layer.reshape_dims = shape
50 | #         reshaped_trt_inputs.append(layer.get_output(0))
51 | 
52 | #     layer = ctx.network.add_concatenation(inputs=reshaped_trt_inputs)
53 | 
54 | #     if support_dynamic_shape:
55 | #         layer.axis = dim
56 | #     else:
57 | #         layer.axis = dim - 1
58 | #     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/sub.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.sub')
 6 | @tensorrt_converter('torch.Tensor.__isub__')
 7 | @tensorrt_converter('torch.Tensor.__sub__')
 8 | def convert_sub(ctx):
 9 |     input_a = ctx.method_args[0]
10 |     input_b = ctx.method_args[1]
11 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
12 |     output = ctx.method_return
13 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB)
14 |     output._trt = layer.get_output(0)
15 | 
16 |     
17 | @tensorrt_converter('torch.Tensor.__rsub__')
18 | def convert_sub(ctx):
19 |     input_a = ctx.method_args[1]
20 |     input_b = ctx.method_args[0]  # flipped for rsub
21 |     input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b)
22 |     output = ctx.method_return
23 |     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB)
24 |     output._trt = layer.get_output(0)
25 |     
26 | 
27 | class Sub(torch.nn.Module):
28 |     def __init__(self):
29 |         super(Sub, self).__init__()
30 | 
31 |     def forward(self, x, y):
32 |         return x - y
33 | 
34 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
35 | def test_sub_basic():
36 |     return Sub()
37 | 
38 | 
39 | class ISub(torch.nn.Module):
40 |     def __init__(self):
41 |         super(ISub, self).__init__()
42 | 
43 |     def forward(self, x, y):
44 |         x -= y
45 |         return x
46 | 
47 | 
48 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
49 | def test_sub_isub():
50 |     return ISub()
51 | 
52 | 
53 | class TorchSub(torch.nn.Module):
54 |     def __init__(self):
55 |         super(TorchSub, self).__init__()
56 | 
57 |     def forward(self, x, y):
58 |         return torch.sub(x, y)
59 | 
60 | 
61 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)])
62 | def test_torch_sub():
63 |     return TorchSub()
64 | 
65 | 
66 | class RSubInt(torch.nn.Module):
67 |     def __init__(self):
68 |         super(RSubInt, self).__init__()
69 | 
70 |     def forward(self, x):
71 |         return 1 - x
72 | 
73 | 
74 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
75 | def test_rsub_int():
76 |     return RSubInt()
77 | 
78 | 
79 | class RSubFloat(torch.nn.Module):
80 |     def __init__(self):
81 |         super(RSubFloat, self).__init__()
82 | 
83 |     def forward(self, x):
84 |         return 1.0 - x
85 | 
86 | 
87 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)])
88 | def test_rsub_float():
89 |     return RSubFloat()


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/sum.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from .unary import UnaryModule
 4 |     
 5 | 
 6 | @tensorrt_converter('torch.sum')
 7 | @tensorrt_converter('torch.Tensor.sum')
 8 | def convert_sum(ctx):
 9 |     input = ctx.method_args[0]
10 |     dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim)))
11 |     keepdim = get_arg(ctx, 'keepdim', pos=2, default=False)
12 |     input_trt= trt_(ctx.network, input)
13 |     output = ctx.method_return
14 |     layer = ctx.network.add_reduce(input_trt,  trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keepdim)
15 |     output._trt = layer.get_output(0)
16 |         
17 |         
18 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
19 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
20 | def test_sum_reduce_all():
21 |     return UnaryModule(lambda x: torch.sum(x))     
22 | 
23 | 
24 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
26 | def test_sum_reduce_dim1():
27 |     return UnaryModule(lambda x: torch.sum(x, 1))
28 | 
29 | 
30 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
31 | def test_sum_reduce_dim22():
32 |     return UnaryModule(lambda x: torch.sum(x, 2))
33 | 
34 | 
35 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)])
36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
37 | def test_sum_reduce_dim1_keepdim():
38 |     return UnaryModule(lambda x: torch.sum(x, 1, keepdim=True))


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/t.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | from .transpose import convert_transpose
 4 | 
 5 | 
 6 | @tensorrt_converter('torch.Tensor.t')
 7 | def convert_t(ctx):
 8 |     input = ctx.method_args[0]
 9 |     input_trt = trt_(ctx.network, input)
10 |     output = ctx.method_return
11 |     # permutation -1 because TRT does not include batch dim
12 | 
13 |     if len(input.shape)==1:
14 |         layer = ctx.network.add_identity(input_trt)
15 |         output._trt = layer.get_output(0)
16 |     else:
17 |         ctx.method_args = [input, 1, 0]
18 |         ctx.method_kwargs = {}
19 |         convert_transpose(ctx)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/tanh.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.nn.functional.tanh')
 6 | @tensorrt_converter('torch.tanh')
 7 | def convert_tanh(ctx):
 8 |     input = ctx.method_args[0]
 9 |     input_trt = trt_(ctx.network, input)
10 |     output = ctx.method_return
11 |     
12 |     layer = ctx.network.add_activation(input_trt, trt.ActivationType.TANH)
13 |     output._trt = layer.get_output(0)
14 |     
15 | 
16 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
17 | def test_tanh_basic():
18 |     return torch.nn.Tanh()


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/to.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from .cast_type import *
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.to')
 6 | def convert_Tensor_to(ctx):
 7 |     input = ctx.method_args[0]
 8 |     output = ctx.method_return
 9 | 
10 |     input_trt = trt_(ctx.network, input)
11 |     if output.dtype == input.dtype:
12 |         output._trt = input_trt
13 |     else:
14 |         data_type = output.dtype
15 |         if data_type == torch.int64:
16 |             data_type = torch.int32
17 |         
18 |         output_trt = trt_cast(ctx.network, input_trt, data_type)
19 |         output._trt = output_trt


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/transpose.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.transpose')
 6 | def convert_transpose(ctx):
 7 |     input = ctx.method_args[0]
 8 |     input_trt = trt_(ctx.network, input)
 9 |     output = ctx.method_return
10 |     # permutation -1 because TRT does not include batch dim
11 | 
12 |     permutation = list(range(len(input.shape)))
13 |     dim0 = ctx.method_args[1]
14 |     dim1 = ctx.method_args[2]
15 |     permutation[dim0] = dim1
16 |     permutation[dim1] = dim0
17 |     layer = ctx.network.add_shuffle(input_trt)
18 |     layer.second_transpose = tuple(permutation)
19 |     output._trt = layer.get_output(0)
20 | 
21 | 
22 | class Transpose(torch.nn.Module):
23 |     def __init__(self, dim0, dim1):
24 |         super(Transpose, self).__init__()
25 |         self.dim0 = dim0
26 |         self.dim1 = dim1
27 |     def forward(self, x):
28 |         return torch.transpose(x, self.dim0, self.dim1).contiguous()
29 | 
30 | 
31 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
32 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
33 | def test_transpose_12():
34 |     return Transpose(1, 2)
35 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/unsqueeze.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.unsqueeze')
 6 | @tensorrt_converter('torch.unsqueeze')
 7 | def convert_unsqueeze(ctx):
 8 |         
 9 |     input = ctx.method_args[0]
10 |     dim = get_arg(ctx, 'dim', pos=1, default=None)
11 |     if dim<0:
12 |         dim = len(input.shape)+dim+1
13 |     input_trt = trt_(ctx.network, input)
14 |     shape_trt = ctx.network.add_shape(input_trt).get_output(0)
15 |     unsqueeze_trt = trt_(ctx.network, input.new_ones((1),dtype=torch.int32))
16 |     output = ctx.method_return
17 | 
18 |     shape1_trt = None
19 |     shape2_trt = None
20 |     if dim == 0:
21 |         shape2_trt = shape_trt
22 |     elif dim == len(input.shape):
23 |         shape1_trt = shape_trt
24 |     else:
25 |         slice1_start = [0]
26 |         slice1_size = [dim]
27 |         slice1_stride = [1]
28 |         shape1_trt = ctx.network.add_slice(shape_trt, slice1_start, slice1_size, slice1_stride).get_output(0)
29 |         slice2_start = [dim]
30 |         slice2_size = [len(input.shape)-dim]
31 |         slice2_stride = [1]
32 |         shape2_trt = ctx.network.add_slice(shape_trt, slice2_start, slice2_size, slice2_stride).get_output(0)
33 | 
34 |     if shape1_trt == None:
35 |         new_shape_trt = ctx.network.add_concatenation([unsqueeze_trt, shape2_trt]).get_output(0)
36 |     elif shape2_trt == None:
37 |         new_shape_trt = ctx.network.add_concatenation([shape1_trt, unsqueeze_trt]).get_output(0)
38 |     else:
39 |         new_shape_trt = ctx.network.add_concatenation([shape1_trt, unsqueeze_trt, shape2_trt]).get_output(0)
40 | 
41 |     layer = ctx.network.add_shuffle(input_trt)
42 |     layer.set_input(1, new_shape_trt)
43 |     output._trt = layer.get_output(0)


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/view_as.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from torch2trt_dynamic.module_test import add_module_test
 3 | 
 4 | 
 5 | @tensorrt_converter('torch.Tensor.view_as')
 6 | def convert_view_as(ctx):
 7 |         
 8 |     input = ctx.method_args[0]
 9 |     other = get_arg(ctx, 'other', pos=1, default=None)
10 |     input_trt = trt_(ctx.network, input)
11 |     other_trt = trt_(ctx.network, other)
12 |     output = ctx.method_return
13 | 
14 |     shape_trt = ctx.network.add_shape(other_trt).get_output(0)
15 |     
16 |     layer = ctx.network.add_shuffle(input_trt)
17 |     layer.set_input(1, shape_trt)
18 |     output._trt = layer.get_output(0)
19 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/zeros.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from collections.abc import Iterable
 3 | 
 4 | @tensorrt_converter('torch.zeros')
 5 | def convert_zeros(ctx):
 6 |     size = ctx.method_args[0]
 7 |     if not isinstance(size, Iterable):
 8 |         size = ctx.method_args
 9 |     dtype = torch.float32
10 |     if "dtype" in ctx.method_kwargs:
11 |         dtype = ctx.method_kwargs["dtype"]
12 |     output = ctx.method_return
13 | 
14 |     if isinstance(size, int):
15 |         size = (size, )
16 | 
17 |     # check const
18 |     is_const = True
19 |     for s in size:
20 |         if hasattr(s,'_trt'):
21 |             is_const = False
22 |             break
23 | 
24 |     if is_const:
25 |         # create const value
26 |         output_trt = trt_(ctx.network, output)
27 |     
28 |     else:
29 |         # create fill
30 |         trt_size = []
31 |         for s in size:
32 |             if hasattr(s, '_trt'):
33 |                 trt_size.append(s._trt)
34 |             else:
35 |                 trt_size.append(trt_(ctx.network, s))
36 |         
37 |         trt_size = ctx.network.add_concatenation(trt_size).get_output(0)
38 | 
39 |         layer = ctx.network.add_fill(size, trt.FillOperation.RANDOM_UNIFORM)
40 |         layer.set_input(0, trt_size)
41 |         layer.set_input(1, trt_(ctx.network, torch.tensor(0., dtype=dtype).cuda()))
42 |         layer.set_input(2, trt_(ctx.network, torch.tensor(0., dtype=dtype).cuda()))
43 | 
44 |         output_trt = layer.get_output(0)
45 | 
46 | 
47 |     data_type = None
48 |     if dtype==torch.float32:
49 |         data_type = trt.DataType.FLOAT
50 |     elif dtype==torch.int32 or dtype==torch.long:
51 |         data_type = trt.DataType.INT32
52 |     elif dtype==torch.bool:
53 |         data_type = trt.DataType.BOOL
54 |     else:
55 |         print("unsupported convert type:{}".format(dtype))
56 |     
57 |     if data_type is not None:
58 |         layer = ctx.network.add_identity(output_trt)
59 |         layer.set_output_type(0, data_type)
60 |         output_trt = layer.get_output(0)
61 | 
62 |     output._trt = output_trt
63 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/converters/zeros_like.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic.torch2trt_dynamic import *
 2 | from .mul import convert_mul
 3 | from .add import convert_add
 4 | from .cast_type import *
 5 | 
 6 | 
 7 | @tensorrt_converter('torch.zeros_like')
 8 | def convert_zeros_like(ctx):
 9 |     input = ctx.method_args[0]
10 |     dtype = get_arg(ctx, 'dtype', pos=1, default=torch.float32)
11 |     output = ctx.method_return
12 |     input_trt = trt_(ctx.network, input)
13 | 
14 |     old_method_args = ctx.method_args
15 |     old_method_kwargs = ctx.method_kwargs
16 | 
17 |     # mul zero
18 |     input_mul_zero = input*0
19 |     ctx.method_args = [input, 0]
20 |     ctx.method_kwargs = {}
21 |     ctx.method_return = input_mul_zero
22 |     convert_mul(ctx)
23 | 
24 |     convert_type_func = None
25 |     if dtype==torch.float32:
26 |         convert_type_func = convert_float
27 |     elif dtype==torch.int32 or dtype==torch.long:
28 |         convert_type_func = convert_int
29 |     elif dtype==torch.bool:
30 |         convert_type_func = convert_bool
31 |     else:
32 |         print("unsupported convert type:{}".format(dtype))
33 |     
34 |     if convert_type_func is not None:
35 |         input_as_type = input_mul_zero.to(dtype)
36 |         ctx.method_args = [input_mul_zero, dtype]
37 |         ctx.method_return = input_as_type
38 |         convert_type_func(ctx)
39 |         ctx.method_args = [input_as_type, 0]
40 |         ctx.method_kwargs = {}
41 |         ctx.method_return = output
42 |         convert_add(ctx)
43 | 
44 |     ctx.method_args = old_method_args
45 |     ctx.method_kwargs = old_method_kwargs
46 |     ctx.method_return = output


--------------------------------------------------------------------------------
/torch2trt_dynamic/module_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | 
 4 | 
 5 | class ModuleTest(object):
 6 |     def __init__(self, module_fn, dtype, device, input_shapes, **torch2trt_kwargs):
 7 |         self.module_fn = module_fn
 8 |         self.dtype = dtype
 9 |         self.device = device
10 |         self.input_shapes = input_shapes
11 |         self.torch2trt_kwargs = torch2trt_kwargs
12 |         
13 |     def module_name(self):
14 |         return self.module_fn.__module__ + '.' + self.module_fn.__name__
15 | 
16 | 
17 | MODULE_TESTS = [
18 | ]
19 | 
20 | 
21 | def add_module_test(dtype, device, input_shapes, **torch2trt_kwargs):
22 |     def register_module_test(module):
23 |         global MODULE_TESTS
24 |         MODULE_TESTS += [ModuleTest(module, dtype, device, input_shapes, **torch2trt_kwargs)]
25 |         return module
26 |     return register_module_test


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | from .create_groupnorm_plugin import create_groupnorm_plugin
 2 | from .create_repeatdim_plugin import create_repeat_plugin
 3 | from .create_exview_plugin import create_exview_plugin
 4 | from .create_layernorm_plugin import create_layernorm_plugin
 5 | from .create_torchgather_plugin import create_torchgather_plugin
 6 | from .create_adaptivepool_plugin import create_adaptivepool_plugin
 7 | from .create_meshgrid_plugin import create_meshgrid_plugin
 8 | from .create_gridsample_plugin import create_gridsample_plugin
 9 | from .create_torchflip_plugin import create_torchflip_plugin
10 | from .create_torchcummaxmin_plugin import create_torchcummaxmin_plugin
11 | from .create_torchcum_plugin import create_torchcum_plugin
12 | from .create_dcn_plugin import create_dcn_plugin
13 | from .create_nms_plugin import create_nms_plugin
14 | from .create_roiextractor_plugin import create_roiextractor_plugin
15 | from .create_roipool_plugin import create_roipool_plugin


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_adaptivepool_plugin.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | import numpy as np
 3 | 
 4 | import os
 5 | import os.path as osp
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | 
11 | 
12 | def create_adaptivepool_plugin(layer_name,
13 |                                 output_size,
14 |                                 pooling_type):
15 | 
16 |     creator = trt.get_plugin_registry().get_plugin_creator(
17 |         'AdaptivePoolPluginDynamic', '1', '')
18 | 
19 |     pfc = trt.PluginFieldCollection()
20 | 
21 |     pf_output_size = trt.PluginField("output_size", np.array(
22 |         output_size, dtype=np.int32), trt.PluginFieldType.INT32)
23 |     pfc.append(pf_output_size)
24 | 
25 |     pf_pooling_type = trt.PluginField("pooling_type", np.array(
26 |         [int(pooling_type)], dtype=np.int32), trt.PluginFieldType.INT32)
27 |     pfc.append(pf_pooling_type)
28 | 
29 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_exview_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | # dir_path = os.path.dirname(os.path.realpath(__file__))
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | import tensorrt as trt
11 | 
12 | 
13 | def create_exview_plugin(layer_name,
14 |                             expr_list):
15 | 
16 |     creator = trt.get_plugin_registry().get_plugin_creator(
17 |         'ExViewPluginDynamic', '1', '')
18 | 
19 |     pfc = trt.PluginFieldCollection()
20 | 
21 |     expr_str = ';'.join(expr_list)
22 |     pf_dim_expression = trt.PluginField("dim_expression", np.array(
23 |         [ord(i) for i in list(expr_str)], np.uint8), trt.PluginFieldType.CHAR)
24 |     pfc.append(pf_dim_expression)
25 | 
26 |     return creator.create_plugin(layer_name, pfc)
27 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_gridsample_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | # dir_path = os.path.dirname(os.path.realpath(__file__))
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | import tensorrt as trt
11 | 
12 | 
13 | def create_gridsample_plugin(layer_name,
14 |                             mode,
15 |                             padding_mode,
16 |                             align_corners):
17 | 
18 |     creator = trt.get_plugin_registry().get_plugin_creator(
19 |         'GridSamplePluginDynamic', '1', '')
20 | 
21 |     pfc = trt.PluginFieldCollection()
22 | 
23 |     pf_mode = trt.PluginField("mode", np.array(
24 |         [mode], dtype=np.int32), trt.PluginFieldType.INT32)
25 |     pfc.append(pf_mode)
26 | 
27 |     pf_padding_mode = trt.PluginField("padding_mode", np.array(
28 |         [padding_mode], dtype=np.int32), trt.PluginFieldType.INT32)
29 |     pfc.append(pf_padding_mode)
30 | 
31 |     pf_align_corners = trt.PluginField("align_corners", np.array(
32 |         [align_corners], dtype=np.int32), trt.PluginFieldType.INT32)
33 |     pfc.append(pf_align_corners)
34 | 
35 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_groupnorm_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # import pyamirstan_plugin as pyamir
 4 | 
 5 | import os
 6 | import os.path as osp
 7 | # dir_path = os.path.dirname(os.path.realpath(__file__))
 8 | from .globals import dir_path
 9 | import ctypes
10 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
11 | 
12 | import tensorrt as trt
13 | 
14 | 
15 | def create_groupnorm_plugin(layer_name,
16 |                             num_groups,
17 |                             num_channels,
18 |                             W,
19 |                             B,
20 |                             eps=1e-5,
21 |                             type_id=trt.DataType.FLOAT):
22 | 
23 |     creator = trt.get_plugin_registry().get_plugin_creator(
24 |         'GroupNormPluginDynamic', '1', '')
25 |     
26 |     pfc = trt.PluginFieldCollection()
27 | 
28 |     pf_num_groups = trt.PluginField("num_groups", np.array(
29 |         [num_groups], dtype=np.int32), trt.PluginFieldType.INT32)
30 |     pfc.append(pf_num_groups)
31 | 
32 |     pf_num_channels = trt.PluginField("num_channels", np.array(
33 |         [num_channels], dtype=np.int32), trt.PluginFieldType.INT32)
34 |     pfc.append(pf_num_channels)
35 | 
36 |     pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32)
37 |     pfc.append(pf_eps)
38 | 
39 |     pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
40 |     pfc.append(pf_W)
41 | 
42 |     pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32)
43 |     pfc.append(pf_B)
44 |     
45 |     pf_type_id = trt.PluginField("type_id", np.array(
46 |         [type_id], dtype=np.int32), trt.PluginFieldType.INT32)
47 |     pfc.append(pf_type_id)
48 |     
49 |     return creator.create_plugin(layer_name, pfc)
50 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_layernorm_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # import pyamirstan_plugin as pyamir
 4 | 
 5 | import os
 6 | import os.path as osp
 7 | # dir_path = os.path.dirname(os.path.realpath(__file__))
 8 | from .globals import dir_path
 9 | import ctypes
10 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
11 | 
12 | import tensorrt as trt
13 | 
14 | 
15 | def create_layernorm_plugin(layer_name,
16 |                             normalized_shape,
17 |                             W,
18 |                             B,
19 |                             eps=1e-5,
20 |                             type_id=trt.DataType.FLOAT):
21 | 
22 |     creator = trt.get_plugin_registry().get_plugin_creator(
23 |         'LayerNormPluginDynamic', '1', '')
24 |     
25 |     pfc = trt.PluginFieldCollection()
26 | 
27 |     pf_normalized_shape = trt.PluginField("normalized_shape", np.array(
28 |         normalized_shape, dtype=np.int32), trt.PluginFieldType.INT32)
29 |     pfc.append(pf_normalized_shape)
30 | 
31 |     pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32)
32 |     pfc.append(pf_eps)
33 | 
34 |     pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
35 |     pfc.append(pf_W)
36 | 
37 |     pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32)
38 |     pfc.append(pf_B)
39 |     
40 |     pf_type_id = trt.PluginField("type_id", np.array(
41 |         [type_id], dtype=np.int32), trt.PluginFieldType.INT32)
42 |     pfc.append(pf_type_id)
43 |     
44 |     return creator.create_plugin(layer_name, pfc)
45 | 


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_meshgrid_plugin.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | import numpy as np
 3 | 
 4 | import os
 5 | import os.path as osp
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | 
11 | 
12 | def create_meshgrid_plugin(layer_name,
13 |                                 num_inputs,
14 |                                 slice_dims = [2, 3],
15 |                                 starts = [0., 0.],
16 |                                 strides = [1., 1.]):
17 | 
18 |     creator = trt.get_plugin_registry().get_plugin_creator(
19 |         'MeshGridPluginDynamic', '1', '')
20 | 
21 |     pfc = trt.PluginFieldCollection()
22 | 
23 |     pf_num_inputs = trt.PluginField("num_inputs", np.array(
24 |         [int(num_inputs)], dtype=np.int32), trt.PluginFieldType.INT32)
25 |     pfc.append(pf_num_inputs)
26 | 
27 |     pf_slice_dims = trt.PluginField("slice_dims", np.array(
28 |         slice_dims, dtype=np.int32), trt.PluginFieldType.INT32)
29 |     pfc.append(pf_slice_dims)
30 | 
31 |     pf_starts = trt.PluginField("starts", np.array(
32 |         starts, dtype=np.float32), trt.PluginFieldType.FLOAT32)
33 |     pfc.append(pf_starts)
34 | 
35 |     pf_strides = trt.PluginField("strides", np.array(
36 |         strides, dtype=np.float32), trt.PluginFieldType.FLOAT32)
37 |     pfc.append(pf_strides)
38 | 
39 | 
40 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_nms_plugin.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | import numpy as np
 3 | 
 4 | import os
 5 | import os.path as osp
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | 
11 | 
12 | def create_nms_plugin(layer_name,
13 |                       iou_threshold):
14 | 
15 |     creator = trt.get_plugin_registry().get_plugin_creator(
16 |         'TorchNMSPluginDynamic', '1', '')
17 | 
18 |     pfc = trt.PluginFieldCollection()
19 |     
20 |     pf_iou_threshold = trt.PluginField("iou_threshold", np.array(
21 |         [iou_threshold], dtype=np.float32), trt.PluginFieldType.FLOAT32)
22 |     pfc.append(pf_iou_threshold)
23 | 
24 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_repeatdim_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | # dir_path = os.path.dirname(os.path.realpath(__file__))
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | import tensorrt as trt
11 | 
12 | 
13 | def create_repeat_plugin(layer_name,
14 |                             repeat_shape,
15 |                             type_id=trt.DataType.FLOAT):
16 | 
17 |     creator = trt.get_plugin_registry().get_plugin_creator(
18 |         'RepeatDimsPluginDynamic', '1', '')
19 | 
20 |     pfc = trt.PluginFieldCollection()
21 | 
22 |     pf_repeat_shape = trt.PluginField("repeat_dims", np.array(
23 |         repeat_shape, dtype=np.int32), trt.PluginFieldType.INT32)
24 |     pfc.append(pf_repeat_shape)
25 | 
26 |     pf_type_id = trt.PluginField("type_id", np.array(
27 |         [type_id], dtype=np.int32), trt.PluginFieldType.INT32)
28 |     pfc.append(pf_type_id)
29 | 
30 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_roiextractor_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | from .globals import dir_path
 6 | import ctypes
 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 8 | 
 9 | import tensorrt as trt
10 | import torchvision.ops
11 | 
12 | 
13 | def create_roiextractor_plugin(layer_name,
14 |                                 out_size,
15 |                                 sample_num,
16 |                                 featmap_strides,
17 |                                 roi_scale_factor,
18 |                                 finest_scale,
19 |                                 aligned):
20 | 
21 |     creator = trt.get_plugin_registry().get_plugin_creator(
22 |         'RoiExtractorPluginDynamic', '1', '')
23 | 
24 |     pfc = trt.PluginFieldCollection()
25 | 
26 |     pf_out_size = trt.PluginField("out_size", np.array(
27 |         [out_size], dtype=np.int32), trt.PluginFieldType.INT32)
28 |     pfc.append(pf_out_size)
29 | 
30 |     pf_sample_num = trt.PluginField("sample_num", np.array(
31 |         [sample_num], dtype=np.int32), trt.PluginFieldType.INT32)
32 |     pfc.append(pf_sample_num)
33 | 
34 |     pf_featmap_strides = trt.PluginField("featmap_strides", np.array(
35 |         featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32)
36 |     pfc.append(pf_featmap_strides)
37 | 
38 |     pf_roi_scale_factor = trt.PluginField("roi_scale_factor", np.array(
39 |         [roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32)
40 |     pfc.append(pf_roi_scale_factor)
41 | 
42 |     pf_finest_scale = trt.PluginField("finest_scale", np.array(
43 |         [finest_scale], dtype=np.int32), trt.PluginFieldType.INT32)
44 |     pfc.append(pf_finest_scale)
45 | 
46 |     pf_aligned = trt.PluginField("aligned", np.array(
47 |         [aligned], dtype=np.int32), trt.PluginFieldType.INT32)
48 |     pfc.append(pf_aligned)
49 | 
50 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_roipool_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | from .globals import dir_path
 6 | import ctypes
 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 8 | 
 9 | import tensorrt as trt
10 | import torchvision.ops
11 | 
12 | 
13 | def create_roipool_plugin(layer_name,
14 |                             out_size,
15 |                             featmap_strides,
16 |                             roi_scale_factor,
17 |                             finest_scale):
18 | 
19 |     creator = trt.get_plugin_registry().get_plugin_creator(
20 |         'RoiPoolPluginDynamic', '1', '')
21 | 
22 |     pfc = trt.PluginFieldCollection()
23 | 
24 |     pf_out_size = trt.PluginField("out_size", np.array(
25 |         [out_size], dtype=np.int32), trt.PluginFieldType.INT32)
26 |     pfc.append(pf_out_size)
27 | 
28 |     pf_featmap_strides = trt.PluginField("featmap_strides", np.array(
29 |         featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32)
30 |     pfc.append(pf_featmap_strides)
31 | 
32 |     pf_roi_scale_factor = trt.PluginField("roi_scale_factor", np.array(
33 |         [roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32)
34 |     pfc.append(pf_roi_scale_factor)
35 | 
36 |     pf_finest_scale = trt.PluginField("finest_scale", np.array(
37 |         [finest_scale], dtype=np.int32), trt.PluginFieldType.INT32)
38 |     pfc.append(pf_finest_scale)
39 | 
40 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_torchcum_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | from .globals import dir_path
 6 | import ctypes
 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 8 | 
 9 | import tensorrt as trt
10 | 
11 | 
12 | def create_torchcum_plugin(layer_name,
13 |                                 dim,
14 |                                 cum_type):
15 | 
16 |     creator = trt.get_plugin_registry().get_plugin_creator(
17 |         'TorchCumPluginDynamic', '1', '')
18 | 
19 |     pfc = trt.PluginFieldCollection()
20 | 
21 |     pf_dim = trt.PluginField("dim", np.array(
22 |         [dim], dtype=np.int32), trt.PluginFieldType.INT32)
23 |     pfc.append(pf_dim)
24 | 
25 |     pf_cum_type = trt.PluginField("cum_type", np.array(
26 |         [cum_type], dtype=np.int32), trt.PluginFieldType.INT32)
27 |     pfc.append(pf_cum_type)
28 | 
29 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_torchcummaxmin_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | from .globals import dir_path
 6 | import ctypes
 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 8 | 
 9 | import tensorrt as trt
10 | 
11 | 
12 | def create_torchcummaxmin_plugin(layer_name,
13 |                                 dim,
14 |                                 cum_type):
15 | 
16 |     creator = trt.get_plugin_registry().get_plugin_creator(
17 |         'TorchCumMaxMinPluginDynamic', '1', '')
18 | 
19 |     pfc = trt.PluginFieldCollection()
20 | 
21 |     pf_dim = trt.PluginField("dim", np.array(
22 |         [dim], dtype=np.int32), trt.PluginFieldType.INT32)
23 |     pfc.append(pf_dim)
24 | 
25 |     pf_cum_type = trt.PluginField("cum_type", np.array(
26 |         [cum_type], dtype=np.int32), trt.PluginFieldType.INT32)
27 |     pfc.append(pf_cum_type)
28 | 
29 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_torchflip_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | from .globals import dir_path
 6 | import ctypes
 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 8 | 
 9 | import tensorrt as trt
10 | 
11 | 
12 | def create_torchflip_plugin(layer_name,
13 |                             dims):
14 | 
15 |     creator = trt.get_plugin_registry().get_plugin_creator(
16 |         'TorchFlipPluginDynamic', '1', '')
17 | 
18 |     pfc = trt.PluginFieldCollection()
19 | 
20 |     pf_dims = trt.PluginField("dims", np.array(
21 |         dims, dtype=np.int32), trt.PluginFieldType.INT32)
22 |     pfc.append(pf_dims)
23 | 
24 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/create_torchgather_plugin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import os
 4 | import os.path as osp
 5 | # dir_path = os.path.dirname(os.path.realpath(__file__))
 6 | from .globals import dir_path
 7 | import ctypes
 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so"))
 9 | 
10 | import tensorrt as trt
11 | 
12 | 
13 | def create_torchgather_plugin(layer_name,
14 |                             dim):
15 | 
16 |     creator = trt.get_plugin_registry().get_plugin_creator(
17 |         'TorchGatherPluginDynamic', '1', '')
18 | 
19 |     pfc = trt.PluginFieldCollection()
20 | 
21 |     pf_dim = trt.PluginField("dim", np.array(
22 |         [dim], dtype=np.int32), trt.PluginFieldType.INT32)
23 |     pfc.append(pf_dim)
24 | 
25 |     return creator.create_plugin(layer_name, pfc)


--------------------------------------------------------------------------------
/torch2trt_dynamic/plugins/globals.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | dir_path = osp.join( os.path.expanduser('~'), "space/trt_plugin/build/lib/")
4 | 
5 | if not osp.exists(dir_path):
6 |     if "PILGRIM_TRT_PLUGINS_LIB" in os.environ:
7 |         dir_path = os.environ["PILGRIM_TRT_PLUGINS_LIB"]
8 |     else:
9 |         dir_path = os.path.dirname(os.path.realpath(__file__))


--------------------------------------------------------------------------------
/torch2trt_dynamic/shape_converter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def get_tensor_shape(self):
 5 |     return self.size()
 6 | 
 7 | 
 8 | old_get_attribute = torch.Tensor.__getattribute__
 9 | def new_getattribute__(self, name):
10 |     if name is 'shape':
11 |         return get_tensor_shape(self)
12 |     else:
13 |         return old_get_attribute(self, name)
14 | 
15 | class ShapeConverter:
16 |     def __init__(self):
17 |         pass
18 | 
19 |     def __enter__(self):
20 |         torch.Tensor.__getattribute__ = new_getattribute__
21 | 
22 |     def __exit__(self, type, val, tb):
23 |         torch.Tensor.__getattribute__ = old_get_attribute


--------------------------------------------------------------------------------
/torch2trt_dynamic/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/pilgrim_torch2trt/27a8e6a195cbc3a83b16483ec4c0930da4aa77e6/torch2trt_dynamic/tests/__init__.py


--------------------------------------------------------------------------------
/torch2trt_dynamic/tests/torchvision/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/pilgrim_torch2trt/27a8e6a195cbc3a83b16483ec4c0930da4aa77e6/torch2trt_dynamic/tests/torchvision/__init__.py


--------------------------------------------------------------------------------
/torch2trt_dynamic/tests/torchvision/save_load.py:
--------------------------------------------------------------------------------
 1 | from torch2trt_dynamic import *
 2 | import torchvision
 3 | import torch
 4 | from .segmentation import deeplabv3_resnet50
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     model = deeplabv3_resnet50().cuda().eval().half()
 9 |     data = torch.randn((1, 3, 224, 224)).cuda().half()
10 |     
11 |     print('Running torch2trt...')
12 |     model_trt = torch2trt_dynamic(model, [data], fp16_mode=True, max_workspace_size=1<<25)
13 | 
14 |     print('Saving model...')
15 |     torch.save(model_trt.state_dict(), '.test_model.pth')
16 | 
17 |     print('Loading model...')
18 |     model_trt_2 = TRTModule()
19 |     model_trt_2.load_state_dict(torch.load('.test_model.pth'))
20 | 
21 |     assert(model_trt_2.engine is not None)
22 |     
23 |     print(torch.max(torch.abs(model_trt_2(data) - model(data))))
24 |     print(torch.max(torch.abs(model_trt_2(data) - model_trt(data))))


--------------------------------------------------------------------------------
/torch2trt_dynamic/tests/torchvision/segmentation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | from torch2trt_dynamic.module_test import add_module_test
 4 | 
 5 | 
 6 | class ModelWrapper(torch.nn.Module):
 7 |     def __init__(self, model):
 8 |         super(ModelWrapper, self).__init__()
 9 |         self.model = model
10 |     def forward(self, x):
11 |         return self.model(x)['out']
12 |     
13 | 
14 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True)  
15 | def deeplabv3_resnet50():
16 |     bb = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=False)
17 |     model = ModelWrapper(bb)
18 |     return model
19 | 
20 | 
21 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True)
22 | def deeplabv3_resnet101():
23 |     bb = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False)
24 |     model = ModelWrapper(bb)
25 |     return model
26 | 
27 | 
28 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True)
29 | def fcn_resnet50():
30 |     bb = torchvision.models.segmentation.fcn_resnet50(pretrained=False)
31 |     model = ModelWrapper(bb)
32 |     return model
33 | 
34 | 
35 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True)
36 | def fcn_resnet101():
37 |     bb = torchvision.models.segmentation.fcn_resnet101(pretrained=False)
38 |     model = ModelWrapper(bb)
39 |     return model


--------------------------------------------------------------------------------
/torch2trt_dynamic/utils.py:
--------------------------------------------------------------------------------
 1 | import graphviz
 2 | 
 3 | 
 4 | def trt_network_to_dot_graph(network):
 5 |     dot = graphviz.Digraph(comment='Network')
 6 |     
 7 |     # add nodes (layers)
 8 |     for i in range(network.num_layers):
 9 |         layer = network.get_layer(i)
10 |         dot.node(layer.name)
11 |         
12 |     # add nodes (inputs)
13 |     for i in range(network.num_inputs):
14 |         dot.node(network.get_input(i).name)
15 |         
16 |     # add nodes (outputs)
17 |     for i in range(network.num_outputs):
18 |         dot.node(network.get_output(i).name)
19 |         
20 |     # add layer->layer edges
21 |     for a in range(network.num_layers):
22 |         layer_a = network.get_layer(a)
23 |         
24 |         for b in range(network.num_layers):
25 |             layer_b = network.get_layer(b)
26 |             
27 |             for i in range(layer_a.num_outputs):
28 |                 output_i = layer_a.get_output(i)
29 |                 
30 |                 for j in range(layer_b.num_inputs):
31 |                     input_j = layer_b.get_input(j)
32 |                     
33 |                     if output_i == input_j:
34 |                         dot.edge(layer_a.name, layer_b.name, label=str(input_j.shape))
35 |       
36 |     # add input->layer edges
37 |     for i in range(network.num_inputs):
38 |         input_i = network.get_input(i)
39 |         
40 |         for b in range(network.num_layers):
41 |             layer_b = network.get_layer(b)
42 |             
43 |             for j in range(layer_b.num_inputs):
44 |                 input_j = layer_b.get_input(j)
45 | 
46 |                 if input_i == input_j:
47 |                     dot.edge(input_i.name, layer_b.name, label=str(input_j.shape))
48 |                     
49 |     # add layer->output edges
50 |     for i in range(network.num_outputs):
51 |         input_i = network.get_output(i)
52 |         
53 |         for b in range(network.num_layers):
54 |             layer_b = network.get_layer(b)
55 |             
56 |             for j in range(layer_b.num_outputs):
57 |                 input_j = layer_b.get_output(j)
58 | 
59 |                 if input_i == input_j:
60 |                     dot.edge(layer_b.name, input_i.name, label=str(input_j.shape))
61 |                     
62 |     return dot


--------------------------------------------------------------------------------