├── .gitignore ├── LICENSE.md ├── README.md ├── benchmarks ├── JETSON_NANO.md └── JETSON_XAVIER.md ├── build.py ├── examples ├── .gitignore ├── pg_mbv2_to_trt.py └── pg_resnet50_to_trt.py ├── log.md ├── notebooks ├── image_classification │ ├── conversion.ipynb │ ├── imagenet_labels.json │ └── live_demo.ipynb └── image_segmentation │ └── conversion.ipynb ├── pilgrim_trt_plugins ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build.sh ├── include │ ├── amir_cuda_util │ │ ├── cuda_util.h │ │ └── cudnn_util.h │ └── plugin │ │ ├── adaptivePoolPlugin │ │ └── adaptivePoolPlugin.h │ │ ├── amirInferPlugin.h │ │ ├── batchedNMSPlugin │ │ └── batchedNMSPlugin.h │ │ ├── carafeFeatureReassemblePlugin │ │ └── carafeFeatureReassemblePlugin.h │ │ ├── deformableConvPlugin │ │ ├── deformableConvPlugin.h │ │ └── modulatedDeformableConvPlugin.h │ │ ├── deformablePoolPlugin │ │ └── deformablePoolPlugin.h │ │ ├── delta2bboxPlugin │ │ └── delta2bboxPlugin.h │ │ ├── exViewPlugin │ │ └── exViewPlugin.h │ │ ├── gridAnchorDynamicPlugin │ │ └── gridAnchorDynamicPlugin.h │ │ ├── gridSamplePlugin │ │ └── gridSamplePlugin.h │ │ ├── groupNormPlugin │ │ └── groupNormPlugin.h │ │ ├── layerNormPlugin │ │ └── layerNormPlugin.h │ │ ├── meshGridPlugin │ │ └── meshGridPlugin.h │ │ ├── repeatDimsPlugin │ │ └── repeatDimsPlugin.h │ │ ├── roiExtractorPlugin │ │ └── roiExtractorPlugin.h │ │ ├── roiPoolPlugin │ │ └── roiPoolPlugin.h │ │ ├── torchCumMaxMinPlugin │ │ └── torchCumMaxMinPlugin.h │ │ ├── torchCumPlugin │ │ └── torchCumPlugin.h │ │ ├── torchFlipPlugin │ │ └── torchFlipPlugin.h │ │ ├── torchGatherPlugin │ │ └── torchGatherPlugin.h │ │ └── torchNMSPlugin │ │ └── torchNMSPlugin.h └── src │ ├── CMakeLists.txt │ ├── amir_cuda_util │ ├── CMakeLists.txt │ ├── memcpyPermute.cu │ ├── reduceUtils.cuh │ ├── repeatDims.cu │ └── tensorMeanVar.cu │ └── plugin │ ├── CMakeLists.txt │ ├── InferPlugin.cpp │ ├── NvDsInferParseMmdet.cpp │ ├── adaptivePoolPlugin │ ├── CMakeLists.txt │ ├── adaptivePoolPlugin.cpp │ ├── adaptive_pool.cu │ └── adaptive_pool.h │ ├── batchedNMSPlugin │ ├── CMakeLists.txt │ ├── allClassNMS.cu │ ├── batchedNMSInference.cpp │ ├── batchedNMSInference.h │ ├── batchedNMSPlugin.cpp │ ├── batchedNMSPlugin.h │ ├── bboxUtils.h │ ├── common.cu │ ├── common.h │ ├── cub_helper.h │ ├── gatherNMSOutputs.cu │ ├── gatherNMSOutputs.h │ ├── kernel.cpp │ ├── kernel.h │ ├── nmsHelper.cpp │ ├── nmsUtils.h │ ├── permuteData.cu │ ├── plugin.h │ ├── sortScoresPerClass.cu │ └── sortScoresPerImage.cu │ ├── carafeFeatureReassemblePlugin │ ├── CMakeLists.txt │ ├── carafeFeatureReassemblePlugin.cpp │ ├── carafe_cuda.h │ └── carafe_cuda_kernel.cu │ ├── common │ ├── amirCommon.h │ ├── common.h │ ├── logger.cpp │ ├── logger.h │ ├── logging.h │ └── serialize.hpp │ ├── deformableConvPlugin │ ├── CMakeLists.txt │ ├── deform_conv_cuda.cpp │ ├── deform_conv_cuda.h │ ├── deform_conv_cuda_kernel.cu │ ├── deformableConvPlugin.cpp │ └── modulatedDeformableConvPlugin.cpp │ ├── deformablePoolPlugin │ ├── CMakeLists.txt │ ├── deform_roi_pool.cu │ ├── deform_roi_pool.h │ ├── deform_roi_pool_cuda_kernel.cuh │ └── deformablePoolPlugin.cpp │ ├── delta2bboxPlugin │ ├── CMakeLists.txt │ ├── delta2bbox.cu │ ├── delta2bbox.h │ └── delta2bboxPlugin.cpp │ ├── exViewPlugin │ ├── CMakeLists.txt │ ├── exViewPlugin.cpp │ ├── expressionParser.cpp │ └── expressionParser.h │ ├── gridAnchorDynamicPlugin │ ├── CMakeLists.txt │ ├── gridAnchorDynamicPlugin.cpp │ ├── grid_anchor_dynamic.cu │ └── grid_anchor_dynamic.h │ ├── gridSamplePlugin │ ├── CMakeLists.txt │ ├── gridSamplePlugin.cpp │ ├── grid_sample.cu │ └── grid_sample.h │ ├── groupNormPlugin │ ├── CMakeLists.txt │ ├── groupNormPlugin.cpp │ ├── group_norm.h │ └── group_norm_kernel.cu │ ├── layerNormPlugin │ ├── CMakeLists.txt │ ├── layerNormPlugin.cpp │ ├── layer_norm.h │ └── layer_norm_kernel.cu │ ├── meshGridPlugin │ ├── CMakeLists.txt │ ├── meshGridPlugin.cpp │ ├── mesh_grid.cu │ └── mesh_grid.h │ ├── repeatDimsPlugin │ ├── CMakeLists.txt │ └── repeatDimsPlugin.cpp │ ├── roiExtractorPlugin │ ├── CMakeLists.txt │ ├── roiExtractorPlugin.cpp │ ├── roi_extractor.h │ └── roi_extractor_kernel.cu │ ├── roiPoolPlugin │ ├── CMakeLists.txt │ ├── roiPoolPlugin.cpp │ ├── roi_pool.h │ └── roi_pool_kernel.cu │ ├── torchCumMaxMinPlugin │ ├── CMakeLists.txt │ ├── torchCumMaxMinPlugin.cpp │ ├── torch_cum_maxmin.cu │ └── torch_cum_maxmin.h │ ├── torchCumPlugin │ ├── CMakeLists.txt │ ├── torchCumPlugin.cpp │ ├── torch_cum.cu │ └── torch_cum.h │ ├── torchFlipPlugin │ ├── CMakeLists.txt │ ├── torchFlipPlugin.cpp │ ├── torch_flip.cu │ └── torch_flip.h │ ├── torchGatherPlugin │ ├── CMakeLists.txt │ ├── torchGatherPlugin.cpp │ ├── torch_gather.cu │ └── torch_gather.h │ └── torchNMSPlugin │ ├── CMakeLists.txt │ ├── bboxUtils.h │ ├── torchNMSPlugin.cpp │ ├── torch_nms.cu │ └── torch_nms.h ├── setup.py ├── test.sh ├── torch2trt_dynamic.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt └── top_level.txt └── torch2trt_dynamic ├── __init__.py ├── calibration.py ├── converters ├── AdaptiveAvgPool2d.py ├── AdaptiveMaxPool2d.py ├── BatchNorm1d.py ├── BatchNorm2d.py ├── Conv1d.py ├── Conv2d.py ├── ConvTranspose1d.py ├── ConvTranspose2d.py ├── GroupNorm.py ├── Identity.py ├── LayerNorm.py ├── Linear.py ├── LogSoftmax.py ├── ReLU.py ├── ReLU6.py ├── __init__.py ├── activation.py ├── adaptive_avg_pool2d.py ├── adaptive_max_pool2d.py ├── add.py ├── addcmul.py ├── arange.py ├── argmax.py ├── argmin.py ├── avg_pool2d.py ├── cast_type.py ├── cat.py ├── chunk.py ├── clamp.py ├── conv2d.py ├── cummax.py ├── cummin.py ├── cumprod.py ├── cumsum.py ├── deform_conv2d.py ├── div.py ├── dummy_converters.py ├── expand.py ├── expand_as.py ├── exview.py ├── flatten.py ├── flip.py ├── floor_divide.py ├── full_like.py ├── gather.py ├── getitem.py ├── grid_sample.py ├── identity.py ├── index_select.py ├── instance_norm.py ├── interpolate │ ├── __init__.py │ ├── interpolate.cpp │ ├── interpolate.proto │ └── interpolate.py ├── interpolate_custom.py ├── linear.py ├── linspace.py ├── logical.py ├── masked_fill.py ├── matmul.py ├── max.py ├── max_pool2d.py ├── mean.py ├── meshgrid.py ├── min.py ├── mod.py ├── mul.py ├── narrow.py ├── new_ones.py ├── new_zeros.py ├── nms.py ├── normalize.py ├── ones.py ├── ones_like.py ├── pad.py ├── permute.py ├── pixel_shuffle.py ├── pow.py ├── prelu.py ├── prod.py ├── relu.py ├── relu6.py ├── repeat.py ├── roi_align.py ├── roi_pool.py ├── sigmoid.py ├── size.py ├── softmax.py ├── split.py ├── squeeze.py ├── stack.py ├── std.py ├── sub.py ├── sum.py ├── t.py ├── tanh.py ├── to.py ├── topk.py ├── transpose.py ├── unary.py ├── unsqueeze.py ├── view.py ├── view_as.py ├── zeros.py └── zeros_like.py ├── module_test.py ├── plugins ├── __init__.py ├── create_adaptivepool_plugin.py ├── create_dcn_plugin.py ├── create_exview_plugin.py ├── create_gridsample_plugin.py ├── create_groupnorm_plugin.py ├── create_layernorm_plugin.py ├── create_meshgrid_plugin.py ├── create_nms_plugin.py ├── create_repeatdim_plugin.py ├── create_roiextractor_plugin.py ├── create_roipool_plugin.py ├── create_torchcum_plugin.py ├── create_torchcummaxmin_plugin.py ├── create_torchflip_plugin.py ├── create_torchgather_plugin.py └── globals.py ├── shape_converter.py ├── test.py ├── tests ├── __init__.py └── torchvision │ ├── __init__.py │ ├── classification.py │ ├── save_load.py │ └── segmentation.py ├── torch2trt_dynamic.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ninja_deps 2 | .ninja_log 3 | build.ninja 4 | tags 5 | *.o 6 | *.pb.o 7 | torch2trt.egg-info 8 | build/ 9 | dist/ 10 | __pycache__/ 11 | *.so 12 | *.pb.h 13 | *.pb.cc 14 | *_pb2.py 15 | *.pyc 16 | *.ipynb_checkpoints 17 | *.pth 18 | torch2trt_dynamic.egg-info/ 19 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pilgrim Project 2 | 3 | This project is forked from torch2trt and torch2trt_dynamic, the aiming of this project is provide a way directly convert pytorch models to TensorRT engine. 4 | 5 | This can be fasten your project if your model was built upon pytorch, we have tested several models all works fine and we will add more test and plugins to support model complicated models. 6 | 7 | The reason why we don't want onnx way is that: 8 | 9 | - onnx is another middle-ware not very necessary; 10 | - it's not easy to maintain an onnx-plugin in both converter of onnx it-self as well as converter which is onnx2trt. 11 | 12 | ## Usage 13 | 14 | pilgrim is in early stage, the target model on our list are: 15 | 16 | - [x] mobielentv3; 17 | - [x] resnet50; 18 | - [ ] yolov3; 19 | - [ ] yolov5; 20 | - [ ] MaskRCNN 21 | - [ ] more... 22 | 23 | You can check models under examples folder. For install it, simply: 24 | 25 | ``` 26 | sudo python3 setup.py build develop 27 | ``` 28 | 29 | For Highly complicated model, such as FasterRCNN, MaskRCNN, YoloV5, Centernet-DCN, you gonna need build plugins for support: 30 | 31 | ``` 32 | cd pilgrim_trt_plugins 33 | ./build.sh 34 | ``` 35 | 36 | the plugins will update every frequently, so pls make sure your repo is up to date. 37 | 38 | 39 | 40 | ## TODO 41 | 42 | - [ ] Try converting FasterRCNN model to TensorRT with pilgrim tool; 43 | - [ ] Try converting YoloV5 model to tensorrt with pilgrim tool; 44 | - [ ] Try converting CenterNet-DCN to tensorrt with pilgrim tool (this will invoke DCN plugin directly mapping pytorch plugin to TensorRT plugin without any ONNX dependencies); 45 | 46 | ## Copyright 47 | 48 | Copyright belongs to NVIDIA and all related authors. 49 | -------------------------------------------------------------------------------- /benchmarks/JETSON_NANO.md: -------------------------------------------------------------------------------- 1 | | Name | Data Type | Input Shapes | torch2trt kwargs | Max Error | Throughput (PyTorch) | Throughput (TensorRT) | Latency (PyTorch) | Latency (TensorRT) | 2 | |------|-----------|--------------|------------------|-----------|----------------------|-----------------------|-------------------|--------------------| 3 | | torchvision.models.alexnet.alexnet | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 2.29E-05 | 46.4 | 69.9 | 22.1 | 14.7 | 4 | | torchvision.models.squeezenet.squeezenet1_0 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.20E-02 | 44 | 137 | 24.2 | 7.6 | 5 | | torchvision.models.squeezenet.squeezenet1_1 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 76.6 | 248 | 14 | 4.34 | 6 | | torchvision.models.resnet.resnet18 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 29.4 | 90.2 | 34.7 | 11.4 | 7 | | torchvision.models.resnet.resnet34 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.56E-01 | 15.5 | 50.7 | 64.8 | 20.2 | 8 | | torchvision.models.resnet.resnet50 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 6.45E-02 | 12.4 | 34.2 | 81.7 | 29.8 | 9 | | torchvision.models.resnet.resnet101 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 1.01E+03 | 7.18 | 19.9 | 141 | 51.1 | 10 | | torchvision.models.resnet.resnet152 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 0.00E+00 | 4.96 | 14.1 | 204 | 72.3 | 11 | | torchvision.models.densenet.densenet121 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 11.5 | 41.9 | 84.5 | 24.8 | 12 | | torchvision.models.densenet.densenet169 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.86E-03 | 8.25 | 33.2 | 118 | 31.2 | 13 | | torchvision.models.densenet.densenet201 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.42E-03 | 6.84 | 25.4 | 141 | 40.8 | 14 | | torchvision.models.densenet.densenet161 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.15E-03 | 4.71 | 15.6 | 247 | 65.8 | 15 | | torchvision.models.vgg.vgg11 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.51E-04 | 8.9 | 18.3 | 114 | 55.1 | 16 | | torchvision.models.vgg.vgg13 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.07E-04 | 6.53 | 14.7 | 156 | 68.7 | 17 | | torchvision.models.vgg.vgg16 | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 4.58E-04 | 5.09 | 11.9 | 201 | 85.1 | 18 | | torchvision.models.vgg.vgg11_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 3.81E-04 | 8.74 | 18.4 | 117 | 54.8 | 19 | | torchvision.models.vgg.vgg13_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 5.19E-04 | 6.31 | 14.8 | 162 | 68.5 | 20 | | torchvision.models.vgg.vgg16_bn | float16 | [(1, 3, 224, 224)] | {'fp16_mode': True} | 9.77E-04 | 4.96 | 12 | 207 | 84.3 | 21 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import subprocess 3 | import os 4 | from string import Template 5 | 6 | PLUGINS = [ 7 | 'interpolate', 8 | ] 9 | 10 | BASE_FOLDER = 'torch2trt_dynamic/converters' 11 | 12 | NINJA_TEMPLATE = Template(( 13 | "rule link\n" 14 | " command = g++ -shared -o $$out $$in -L$torch_dir/lib -L$cuda_dir/lib64 -L$trt_lib_dir -lc10 -lc10_cuda -ltorch -lcudart -lprotobuf -lprotobuf-lite -pthread -lpthread -lnvinfer\n" 15 | "rule protoc\n" 16 | " command = protoc $$in --cpp_out=. --python_out=.\n" 17 | "rule cxx\n" 18 | " command = g++ -c -fPIC $$in -I$cuda_dir/include -I$torch_dir/include -I$torch_dir/include/torch/csrc/api/include -I. -std=c++11 -I$trt_inc_dir\n" 19 | )) 20 | 21 | PLUGIN_TEMPLATE = Template(( 22 | "build $plugin_dir/$plugin.pb.h $plugin_dir/$plugin.pb.cc $plugin_dir/${plugin}_pb2.py: protoc $plugin_dir/$plugin.proto\n" 23 | "build $plugin.pb.o: cxx $plugin_dir/$plugin.pb.cc\n" 24 | "build $plugin.o: cxx $plugin_dir/$plugin.cpp\n" 25 | )) 26 | 27 | 28 | def build(cuda_dir="/usr/local/cuda", 29 | torch_dir=imp.find_module('torch')[1], 30 | trt_inc_dir="/usr/include/aarch64-linux-gnu", 31 | trt_lib_dir="/usr/lib/aarch64-linux-gnu"): 32 | 33 | global PLUGINS, BASE_FOLDER, NINJA_TEMPLATE, PLUGIN_TEMPLATE 34 | 35 | NINJA_STR = NINJA_TEMPLATE.substitute({ 36 | 'torch_dir': torch_dir, 37 | 'cuda_dir': cuda_dir, 38 | 'trt_inc_dir': trt_inc_dir, 39 | 'trt_lib_dir': trt_lib_dir, 40 | }) 41 | 42 | 43 | plugin_o_files = [] 44 | for plugin in PLUGINS: 45 | NINJA_STR += \ 46 | PLUGIN_TEMPLATE.substitute({ 47 | 'plugin': plugin, 48 | 'plugin_dir': os.path.join(BASE_FOLDER, plugin), 49 | }) 50 | plugin_o_files += [plugin + '.pb.o', plugin + '.o'] 51 | 52 | NINJA_STR += Template(( 53 | "build torch2trt_dynamic/libtorch2trt_dynamic.so: link $o_files\n" 54 | )).substitute({'o_files': ' '.join(plugin_o_files)}) 55 | 56 | with open('build.ninja', 'w') as f: 57 | f.write(NINJA_STR) 58 | 59 | subprocess.call(['ninja']) 60 | 61 | 62 | if __name__ == '__main__': 63 | build() 64 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.trt 2 | -------------------------------------------------------------------------------- /examples/pg_mbv2_to_trt.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | this using Pilgrim convert MobileNetV3 to TensorRT engine 4 | 5 | """ 6 | from torch2trt_dynamic.torch2trt import torch2trt 7 | import torch 8 | from torch import nn 9 | from torchvision.models.resnet import resnet50 10 | from torchvision.models.mobilenet import mobilenet_v2 11 | 12 | # create some regular pytorch model... 13 | model = mobilenet_v2().cuda().eval() 14 | 15 | # create example data 16 | x = torch.ones((1, 3, 224, 224)).cuda() 17 | 18 | # convert to TensorRT feeding sample data as input 19 | opt_shape_param = [ 20 | [ 21 | [1, 3, 128, 128], # min 22 | [1, 3, 256, 256], # opt 23 | [1, 3, 512, 512] # max 24 | ] 25 | ] 26 | model_trt = torch2trt(model, [x], fp16_mode=False) 27 | 28 | print('serialize engine...') 29 | engine_path = 'mbv2.trt' 30 | with open(engine_path, "wb") as f: 31 | f.write(model_trt.engine.serialize()) 32 | 33 | print('Done.') -------------------------------------------------------------------------------- /examples/pg_resnet50_to_trt.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | this using Pilgrim convert MobileNetV3 to TensorRT engine 4 | 5 | """ 6 | from torch2trt_dynamic.torch2trt import torch2trt 7 | import torch 8 | from torch import nn 9 | from torchvision.models.resnet import resnet50 10 | from torchvision.models.mobilenet import mobilenet_v2 11 | 12 | # create some regular pytorch model... 13 | model = resnet50().cuda().eval() 14 | 15 | # create example data 16 | x = torch.ones((1, 3, 224, 224)).cuda() 17 | 18 | # convert to TensorRT feeding sample data as input 19 | opt_shape_param = [ 20 | [ 21 | [1, 3, 128, 128], # min 22 | [1, 3, 256, 256], # opt 23 | [1, 3, 512, 512] # max 24 | ] 25 | ] 26 | model_trt = torch2trt(model, [x], fp16_mode=False, opt_shape_param=opt_shape_param) 27 | 28 | print('serialize engine...') 29 | engine_path = 'resnet50.trt' 30 | with open(engine_path, "wb") as f: 31 | f.write(model_trt.engine.serialize()) 32 | 33 | print('Done.') -------------------------------------------------------------------------------- /log.md: -------------------------------------------------------------------------------- 1 | - **2020.09.29**: 2 | 3 | Add example convert resnet to tensorrt engine using Pilgrim. However, it's still needs to simplify the way to found those plugins. 4 | 5 | But it actually works! So that, can we see how to make it also work some other models for deployment such as DCN etc. -------------------------------------------------------------------------------- /pilgrim_trt_plugins/.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | 3 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 q.yao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/README.md: -------------------------------------------------------------------------------- 1 | # Amirstan_plugin 2 | 3 | Amirstan plugin contain some useful tensorrt plugin. 4 | These plugins are used to support some other project such as 5 | 6 | https://github.com/grimoire/torch2trt_dynamic 7 | 8 | https://github.com/grimoire/mmdetection-to-tensorrt 9 | 10 | 11 | ## Requirement 12 | 13 | - Tensorrt >= 7.0.0.11 14 | - cub >= 1.8.0 15 | 16 | ## Installation 17 | 18 | - Install cub: https://nvlabs.github.io/cub/ 19 | - Install tensorrt7: https://developer.nvidia.com/tensorrt 20 | 21 | ```shell 22 | git clone https://github.com/grimoire/amirstan_plugin.git 23 | cd amirstan_plugin 24 | mkdir build 25 | cd build 26 | cmake -DCUB_ROOT_DIR= -DTENSORRT_DIR= .. 27 | make -j10 28 | ``` 29 | 30 | set the envoirment variable(in ~/.bashrc): 31 | 32 | ```shell 33 | export AMIRSTAN_LIBRARY_PATH=/build/lib 34 | ``` 35 | 36 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/build.sh: -------------------------------------------------------------------------------- 1 | mkdir build 2 | cd build 3 | cmake -DTENSORRT_DIR=~/TensorRT .. 4 | make -j7 5 | cd .. 6 | P=`pwd` 7 | 8 | echo 'Now pls add this variable to your ~/.bashrc or ~/.zshrc' 9 | echo "echo 'export PILGRIM_TRT_PLUGINS_LIB=$P/build/lib' >> ~/.zshrc" 10 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/include/amir_cuda_util/cuda_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace amirstan 5 | { 6 | namespace cuda 7 | { 8 | 9 | #define CUDA_KERNEL_LOOP(i, n) \ 10 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ 11 | i += blockDim.x * gridDim.x) 12 | 13 | #define cudaCheckError() { \ 14 | cudaError_t e=cudaGetLastError(); \ 15 | if(e!=cudaSuccess) { \ 16 | printf("Cuda failure %s:%d: '%s'\n",__FILE__,__LINE__,cudaGetErrorString(e)); \ 17 | exit(0); \ 18 | } \ 19 | } 20 | 21 | const int CUDA_NUM_THREADS = 512; 22 | const int CUDA_WARP_SIZE=32; 23 | const int CUDA_NUM_WARP=CUDA_NUM_THREADS/float(CUDA_WARP_SIZE); 24 | const int kMaxGridNum = 65535; 25 | inline int GET_BLOCKS(const int N) 26 | { 27 | return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); 28 | } 29 | 30 | struct TensorSize{ 31 | int size[8]; 32 | int dims; 33 | }; 34 | 35 | struct TensorStride{ 36 | size_t size[8]; 37 | int dims; 38 | }; 39 | 40 | template 41 | void memcpyPermute(value_type *dst,const value_type *src, int *src_size, int *permute, int src_dim, cudaStream_t stream=0); 42 | 43 | template 44 | void tensorMean(T *dst, T *src, int* src_size, bool *reduce_dims, int dims, cudaStream_t stream=0, void* workspace=nullptr); 45 | 46 | template 47 | void tensorMeanVar(T *mean_dst, T* var_dst,const T *src, int* src_size, bool *reduce_dims, int dims, cudaStream_t stream=0, void* workspace=nullptr); 48 | 49 | template 50 | void repeat_dims(T* dst, const T* src,const int *input_size, const int *repeatDims, int dims, cudaStream_t stream=0); 51 | } // namespace cuda 52 | 53 | } // namespace amirstan -------------------------------------------------------------------------------- /pilgrim_trt_plugins/include/amir_cuda_util/cudnn_util.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace amirstan 4 | { 5 | namespace cudnn 6 | { 7 | template 8 | void cudnnBatchNormTrain(cudnnHandle_t handle, 9 | const T &input, 10 | int batch_size, 11 | int channels, 12 | int width, 13 | int height, 14 | const T &weight, const T &bias, 15 | const T &running_mean, const T &running_var, 16 | T exponentialAverageFactor, 17 | T epsilon, 18 | T &result_mean, T &result_var); 19 | } 20 | } // namespace amirstan -------------------------------------------------------------------------------- /pilgrim_trt_plugins/include/plugin/amirInferPlugin.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern "C" 4 | { 5 | // bool initLibAmirstanInferPlugins(void *logger, const char *libNamespace); 6 | 7 | bool initLibAmirstanInferPlugins(); 8 | } // extern "C" -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory (plugin) 2 | add_subdirectory (amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/amir_cuda_util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | #find_package(CUDA REQUIRED) 4 | enable_language(CUDA) 5 | 6 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 7 | 8 | file(GLOB_RECURSE AMIR_CUDA_UTIL_HEADS *.h *.hpp *.cuh) 9 | file(GLOB AMIR_CUDA_UTIL_SOURCES *.cpp *.cu) 10 | 11 | source_group("Include" FILES ${AMIR_CUDA_UTIL_HEADS}) 12 | source_group("Source" FILES ${AMIR_CUDA_UTIL_SOURCES}) 13 | 14 | cuda_add_library(amir_cuda_util STATIC ${AMIR_CUDA_UTIL_HEADS} ${AMIR_CUDA_UTIL_SOURCES}) 15 | target_link_libraries(amir_cuda_util ${CUDA_LIBRARY}) 16 | # target_link_libraries(amir_cuda_util ${CUDA_npp_LIBRARY}) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/amir_cuda_util/memcpyPermute.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "amir_cuda_util/cuda_util.h" 6 | 7 | 8 | namespace amirstan 9 | { 10 | namespace cuda 11 | { 12 | 13 | 14 | template 15 | __global__ void copy_permute_kernel(value_type *dst, const value_type *src, int n, 16 | TensorSize ts_src_stride, TensorSize ts_dst_stride, TensorSize ts_permute, int src_dim) 17 | { 18 | int* src_stride = &(ts_src_stride.size[0]); 19 | int* dst_stride = &(ts_dst_stride.size[0]); 20 | int* permute = &(ts_permute.size[0]); 21 | CUDA_KERNEL_LOOP(index, n) 22 | { 23 | size_t dst_index = index; 24 | size_t src_index = 0; 25 | for (int i = 0; i < src_dim; ++i) 26 | { 27 | int dim_index = dst_index / dst_stride[i]; 28 | dst_index = dst_index % dst_stride[i]; 29 | src_index += dim_index * src_stride[permute[i]]; 30 | } 31 | dst[index] = src[src_index]; 32 | } 33 | } 34 | 35 | template 36 | void memcpyPermute(value_type *dst,const value_type *src, int *src_size, int *permute, int src_dim, cudaStream_t stream) 37 | { 38 | size_t copy_size = 1; 39 | TensorSize ts_permute; 40 | memcpy(&(ts_permute.size[0]), permute, src_dim *sizeof(int)); 41 | 42 | TensorSize ts_src_stride; 43 | TensorSize ts_dst_stride; 44 | TensorSize ts_dst_size; 45 | int *src_stride = &(ts_src_stride.size[0]); 46 | int *dst_stride = &(ts_dst_stride.size[0]); 47 | int *dst_size = &(ts_dst_size.size[0]); 48 | src_stride[src_dim - 1] = 1; 49 | dst_stride[src_dim - 1] = 1; 50 | 51 | for (int i = src_dim - 1; i >= 0; --i) 52 | { 53 | dst_size[i] = src_size[permute[i]]; 54 | if (i < src_dim - 1) 55 | { 56 | src_stride[i] = src_stride[i + 1] * src_size[i + 1]; 57 | } 58 | } 59 | 60 | for (int i = src_dim - 1; i >= 0; --i) 61 | { 62 | copy_size *= dst_size[i]; 63 | if (i < src_dim - 1) 64 | { 65 | dst_stride[i] = dst_stride[i + 1] * dst_size[i + 1]; 66 | } 67 | } 68 | 69 | copy_permute_kernel<<>> 70 | (dst, src, copy_size, 71 | ts_src_stride, ts_dst_stride, ts_permute, src_dim); 72 | 73 | } 74 | 75 | template void memcpyPermute(float *dst,const float *src, int *src_size, int *permute, int src_dim, cudaStream_t stream); 76 | 77 | } // namespace cuda 78 | 79 | } // namespace amirstan -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_custom_target(plugin) 2 | set(TARGET_NAME amirstan_plugin) 3 | set(SHARED_TARGET ${TARGET_NAME}) 4 | set(STATIC_TARGET ${TARGET_NAME}_static) 5 | 6 | enable_language(CUDA) 7 | set(TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR}) 8 | 9 | include_directories(common) 10 | 11 | set(PLUGIN_LIST deformableConvPlugin 12 | groupNormPlugin 13 | repeatDimsPlugin 14 | exViewPlugin 15 | layerNormPlugin 16 | gridAnchorDynamicPlugin 17 | roiExtractorPlugin 18 | delta2bboxPlugin 19 | batchedNMSPlugin 20 | torchGatherPlugin 21 | deformablePoolPlugin 22 | adaptivePoolPlugin 23 | meshGridPlugin 24 | carafeFeatureReassemblePlugin 25 | gridSamplePlugin 26 | torchFlipPlugin 27 | torchCumMaxMinPlugin 28 | torchCumPlugin 29 | torchNMSPlugin 30 | roiPoolPlugin 31 | ) 32 | 33 | set(INFER_PLUGIN_SRC InferPlugin.cpp 34 | common/logger.cpp) 35 | 36 | set(INFER_PLUGIN_LIB ${TENSORRT_LIBRARY}) 37 | 38 | foreach(PLUGIN_NAME ${PLUGIN_LIST}) 39 | add_subdirectory(${PLUGIN_NAME}) 40 | set(INFER_PLUGIN_LIB ${INFER_PLUGIN_LIB} ${PLUGIN_NAME}_static) 41 | endforeach() 42 | 43 | if (WITH_DEEPSTREAM) 44 | MESSAGE(STATUS "Adding NvDsInferParseMmdet") 45 | set(INFER_PLUGIN_LIB ${INFER_PLUGIN_LIB} ${TENSORRT_LIBRARY_INFER_PLUGIN} ) 46 | set(INFER_PLUGIN_SRC ${INFER_PLUGIN_SRC} NvDsInferParseMmdet.cpp) 47 | endif () 48 | 49 | cuda_add_library(${SHARED_TARGET} SHARED ${INFER_PLUGIN_SRC}) 50 | target_link_libraries(${SHARED_TARGET} ${INFER_PLUGIN_LIB}) 51 | target_include_directories(${SHARED_TARGET} PRIVATE ${PROJECT_SOURCE_DIR}/common) 52 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/InferPlugin.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "plugin/amirInferPlugin.h" 3 | #include "plugin/deformableConvPlugin/deformableConvPlugin.h" 4 | #include "plugin/deformableConvPlugin/modulatedDeformableConvPlugin.h" 5 | #include "plugin/deformablePoolPlugin/deformablePoolPlugin.h" 6 | #include "plugin/groupNormPlugin/groupNormPlugin.h" 7 | #include "plugin/repeatDimsPlugin/repeatDimsPlugin.h" 8 | #include "plugin/exViewPlugin/exViewPlugin.h" 9 | #include "plugin/layerNormPlugin/layerNormPlugin.h" 10 | #include "plugin/gridAnchorDynamicPlugin/gridAnchorDynamicPlugin.h" 11 | #include "plugin/roiExtractorPlugin/roiExtractorPlugin.h" 12 | #include "plugin/delta2bboxPlugin/delta2bboxPlugin.h" 13 | #include "plugin/batchedNMSPlugin/batchedNMSPlugin.h" 14 | #include "plugin/torchGatherPlugin/torchGatherPlugin.h" 15 | #include "plugin/adaptivePoolPlugin/adaptivePoolPlugin.h" 16 | #include "plugin/meshGridPlugin/meshGridPlugin.h" 17 | #include "plugin/carafeFeatureReassemblePlugin/carafeFeatureReassemblePlugin.h" 18 | #include "plugin/gridSamplePlugin/gridSamplePlugin.h" 19 | #include "plugin/torchFlipPlugin/torchFlipPlugin.h" 20 | #include "plugin/torchCumMaxMinPlugin/torchCumMaxMinPlugin.h" 21 | #include "plugin/torchCumPlugin/torchCumPlugin.h" 22 | #include "plugin/torchNMSPlugin/torchNMSPlugin.h" 23 | #include "plugin/roiPoolPlugin/roiPoolPlugin.h" 24 | 25 | 26 | extern "C" 27 | { 28 | 29 | bool initLibAmirstanInferPlugins(){ 30 | return true; 31 | } 32 | } // extern "C" 33 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/adaptivePoolPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME adaptivePoolPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/adaptivePoolPlugin/adaptive_pool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | enum PoolType{ 9 | MAX=0, 10 | AVERAGE=1 11 | }; 12 | 13 | template 14 | void adaptive_pool(T *output, const T* input, 15 | int* input_dims, int* output_dims, int nb_dims, 16 | int nb_reduce_dims, 17 | PoolType pool_type, 18 | cudaStream_t stream); 19 | 20 | } 21 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME batchedNMSPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB PLUGIN_SOURCE *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${PLUGIN_HEADS}) 16 | source_group("Source" FILES ${PLUGIN_SOURCE}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${PLUGIN_HEADS} ${PLUGIN_SOURCE}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/batchedNMSInference.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TRT_BATCHED_NMS_INFERENCE_H 17 | #define TRT_BATCHED_NMS_INFERENCE_H 18 | #include "plugin.h" 19 | 20 | using namespace nvinfer1; 21 | using namespace nvinfer1::plugin; 22 | 23 | pluginStatus_t nmsInference(cudaStream_t stream, int N, int boxesSize, int scoresSize, bool shareLocation, 24 | int backgroundLabelId, int numPredsPerClass, int numClasses, int topK, int keepTopK, float scoreThreshold, 25 | float iouThreshold, DataType DT_BBOX, const void* locData, DataType DT_SCORE, const void* confData, void* keepCount, 26 | void* nmsedBoxes, void* nmsedScores, void* nmsedClasses, void* workspace, bool isNormalized = true, 27 | bool confSigmoid = false, bool clipBoxes = true); 28 | #endif 29 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/bboxUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TRT_BBOX_UTILS_H 17 | #define TRT_BBOX_UTILS_H 18 | 19 | #include "plugin.h" 20 | 21 | using namespace nvinfer1; 22 | using namespace nvinfer1::plugin; 23 | 24 | template 25 | struct Bbox 26 | { 27 | T xmin, ymin, xmax, ymax; 28 | Bbox(T xmin, T ymin, T xmax, T ymax) 29 | : xmin(xmin) 30 | , ymin(ymin) 31 | , xmax(xmax) 32 | , ymax(ymax) 33 | { 34 | } 35 | Bbox() = default; 36 | }; 37 | 38 | template 39 | struct BboxInfo 40 | { 41 | T conf_score; 42 | int label; 43 | int bbox_idx; 44 | bool kept; 45 | BboxInfo(T conf_score, int label, int bbox_idx, bool kept) 46 | : conf_score(conf_score) 47 | , label(label) 48 | , bbox_idx(bbox_idx) 49 | , kept(kept) 50 | { 51 | } 52 | BboxInfo() = default; 53 | }; 54 | 55 | template 56 | bool operator<(const Bbox& lhs, const Bbox& rhs) 57 | { 58 | return lhs.x1 < rhs.x1; 59 | } 60 | 61 | template 62 | bool operator==(const Bbox& lhs, const Bbox& rhs) 63 | { 64 | return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2; 65 | } 66 | // }}} 67 | 68 | int8_t* alignPtr(int8_t* ptr, uintptr_t to); 69 | 70 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize); 71 | 72 | size_t dataTypeSize(DataType dtype); 73 | 74 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets); 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/cub_helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "kernel.h" 17 | template 18 | size_t cubSortPairsWorkspaceSize(int num_items, int num_segments) 19 | { 20 | size_t temp_storage_bytes = 0; 21 | cub::DeviceSegmentedRadixSort::SortPairsDescending((void*) NULL, temp_storage_bytes, (const KeyT*) NULL, 22 | (KeyT*) NULL, (const ValueT*) NULL, (ValueT*) NULL, 23 | num_items, // # items 24 | num_segments, // # segments 25 | (const int*) NULL, (const int*) NULL); 26 | return temp_storage_bytes; 27 | } 28 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/gatherNMSOutputs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TRT_BATCHED_NMS_HELPER_H 17 | #define TRT_BATCHED_NMS_HELPER_H 18 | #include "plugin.h" 19 | using namespace nvinfer1; 20 | using namespace nvinfer1::plugin; 21 | 22 | pluginStatus_t gatherNMSOutputs(cudaStream_t stream, bool shareLocation, int numImages, int numPredsPerClass, 23 | int numClasses, int topK, int keepTopK, DataType DT_BBOX, DataType DT_SCORE, const void* indices, 24 | const void* scores, const void* bboxData, void* keepCount, void* nmsedBoxes, void* nmsedScores, void* nmsedClasses, 25 | bool clipBoxes = true); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/kernel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "kernel.h" 17 | #include "plugin.h" 18 | 19 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 20 | int topK, DataType DT_BBOX, DataType DT_SCORE) 21 | { 22 | size_t wss[7]; 23 | wss[0] = detectionForwardBBoxDataSize(N, C1, DT_BBOX); 24 | wss[1] = detectionForwardBBoxPermuteSize(shareLocation, N, C1, DT_BBOX); 25 | wss[2] = detectionForwardPreNMSSize(N, C2); 26 | wss[3] = detectionForwardPreNMSSize(N, C2); 27 | wss[4] = detectionForwardPostNMSSize(N, numClasses, topK); 28 | wss[5] = detectionForwardPostNMSSize(N, numClasses, topK); 29 | wss[6] = std::max(sortScoresPerClassWorkspaceSize(N, numClasses, numPredsPerClass, DT_SCORE), 30 | sortScoresPerImageWorkspaceSize(N, numClasses * topK, DT_SCORE)); 31 | return calculateTotalWorkspaceSize(wss, 7); 32 | } 33 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/nmsHelper.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "plugin.h" 17 | #include 18 | 19 | using namespace nvinfer1; 20 | using namespace nvinfer1::plugin; 21 | 22 | size_t detectionForwardBBoxDataSize(int N, int C1, DataType DT_BBOX) 23 | { 24 | if (DT_BBOX == DataType::kFLOAT) 25 | { 26 | return N * C1 * sizeof(float); 27 | } 28 | 29 | printf("Only FP32 type bounding boxes are supported.\n"); 30 | return (size_t) -1; 31 | } 32 | 33 | size_t detectionForwardBBoxPermuteSize(bool shareLocation, int N, int C1, DataType DT_BBOX) 34 | { 35 | if (DT_BBOX == DataType::kFLOAT) 36 | { 37 | return shareLocation ? 0 : N * C1 * sizeof(float); 38 | } 39 | printf("Only FP32 type bounding boxes are supported.\n"); 40 | return (size_t) -1; 41 | } 42 | 43 | size_t detectionForwardPreNMSSize(int N, int C2) 44 | { 45 | ASSERT(sizeof(float) == sizeof(int)); 46 | return N * C2 * sizeof(float); 47 | } 48 | 49 | size_t detectionForwardPostNMSSize(int N, int numClasses, int topK) 50 | { 51 | ASSERT(sizeof(float) == sizeof(int)); 52 | return N * numClasses * topK * sizeof(float); 53 | } 54 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/batchedNMSPlugin/nmsUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TRT_NMS_UTILS_H 17 | #define TRT_NMS_UTILS_H 18 | 19 | #include "plugin.h" 20 | 21 | using namespace nvinfer1; 22 | using namespace nvinfer1::plugin; 23 | 24 | size_t detectionInferenceWorkspaceSize(bool shareLocation, int N, int C1, int C2, int numClasses, int numPredsPerClass, 25 | int topK, DataType DT_BBOX, DataType DT_SCORE); 26 | #endif 27 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/carafeFeatureReassemblePlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME carafeFeatureReassemblePlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE DCN_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB DCN_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${DCN_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${DCN_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${DCN_PLUGIN_HEADS} ${DCN_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) 22 | # target_link_libraries(dcn_plugin ${CUDA_npp_LIBRARY}) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/carafeFeatureReassemblePlugin/carafe_cuda.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | template 8 | int CARAFEForwardLaucher(const T* features, const T* masks, 9 | const int kernel_size, const int group_size, 10 | const int scale_factor, const int batch_size, 11 | const int channels, const int input_height, 12 | const int input_width, const int output_height, 13 | const int output_width, const int mask_channels, 14 | T* rfeatures, T* routput, 15 | T* rmasks, T* output, 16 | cudaStream_t stream); 17 | } 18 | } // namespace amirstan -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/common/logger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "logger.h" 18 | #include "logging.h" 19 | 20 | Logger gLogger{Logger::Severity::kINFO}; 21 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; 22 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; 23 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; 24 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; 25 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; 26 | 27 | void setReportableSeverity(Logger::Severity severity) 28 | { 29 | gLogger.setReportableSeverity(severity); 30 | gLogVerbose.setReportableSeverity(severity); 31 | gLogInfo.setReportableSeverity(severity); 32 | gLogWarning.setReportableSeverity(severity); 33 | gLogError.setReportableSeverity(severity); 34 | gLogFatal.setReportableSeverity(severity); 35 | } 36 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/common/logger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef LOGGER_H 18 | #define LOGGER_H 19 | 20 | #include "logging.h" 21 | 22 | extern Logger gLogger; 23 | extern LogStreamConsumer gLogVerbose; 24 | extern LogStreamConsumer gLogInfo; 25 | extern LogStreamConsumer gLogWarning; 26 | extern LogStreamConsumer gLogError; 27 | extern LogStreamConsumer gLogFatal; 28 | 29 | void setReportableSeverity(Logger::Severity severity); 30 | 31 | #endif // LOGGER_H 32 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/deformableConvPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME deformableConvPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE DCN_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB DCN_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${DCN_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${DCN_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${DCN_PLUGIN_HEADS} ${DCN_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) 22 | # target_link_libraries(dcn_plugin ${CUDA_npp_LIBRARY}) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/deformableConvPlugin/deform_conv_cuda.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | typedef struct _DCN_PARAMS 6 | { 7 | cublasHandle_t cublas_handle; 8 | int batchSize = 1; 9 | int inputChannel = 1; 10 | int inputW = 256; 11 | int inputH = 256; 12 | int outputChannel = 1; 13 | int kernelW = 3; 14 | int kernelH = 3; 15 | int strideW = 1; 16 | int strideH = 1; 17 | int padW = 0; 18 | int padH = 0; 19 | int dilationW = 1; 20 | int dilationH = 1; 21 | int group = 1; 22 | int deformable_group = 1; 23 | int im2col_step = 64; 24 | } DCN_PARAMS; 25 | 26 | int deform_conv_forward_cuda(float *input, float *weight, float *bias, float *offset, 27 | float *output, void* workspace, 28 | const DCN_PARAMS &dcn_params, 29 | cudaStream_t stream = 0); 30 | 31 | 32 | void modulated_deform_conv_cuda_forward( 33 | float* input, float* weight, float* bias, 34 | float* offset, float* mask, float* output, 35 | void *workspace, const DCN_PARAMS &dcn_params, cudaStream_t stream=0); -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/deformablePoolPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME deformablePoolPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/deformablePoolPlugin/deform_roi_pool.cu: -------------------------------------------------------------------------------- 1 | #include "deform_roi_pool_cuda_kernel.cuh" 2 | #include "deform_roi_pool.h" 3 | 4 | #include "amir_cuda_util/cuda_util.h" 5 | 6 | 7 | 8 | namespace amirstan 9 | { 10 | namespace plugin 11 | { 12 | using namespace amirstan::cuda; 13 | template 14 | void DeformRoIPoolForwardCUDAKernelLauncher(scalar_t* input, scalar_t* rois, 15 | scalar_t* offset, scalar_t* output, 16 | int pooled_height, int pooled_width, 17 | int output_size, int channels, int height, int width, 18 | float spatial_scale, 19 | int sampling_ratio, float gamma, cudaStream_t stream) { 20 | 21 | deform_roi_pool_forward_cuda_kernel 22 | <<>>( 23 | output_size, input, 24 | rois, offset, 25 | output, pooled_height, pooled_width, 26 | static_cast(spatial_scale), sampling_ratio, 27 | static_cast(gamma), channels, height, width); 28 | 29 | } 30 | 31 | void deform_roi_pool_forward(float* input, float* rois, float* offset, 32 | float* output, int pooled_height, int pooled_width, 33 | int output_size, int channels, int height, int width, 34 | float spatial_scale, int sampling_ratio, 35 | float gamma, 36 | cudaStream_t stream){ 37 | DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output, 38 | pooled_height, pooled_width, 39 | output_size, channels, height, width, 40 | spatial_scale, sampling_ratio, gamma, 41 | stream); 42 | } 43 | 44 | } 45 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/deformablePoolPlugin/deform_roi_pool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | 7 | namespace amirstan 8 | { 9 | namespace plugin 10 | { 11 | 12 | void deform_roi_pool_forward(float* input, float* rois, float* offset, 13 | float* output, int pooled_height, int pooled_width, 14 | int output_size, int channels, int height, int width, 15 | float spatial_scale, int sampling_ratio, 16 | float gamma, 17 | cudaStream_t stream); 18 | 19 | } 20 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/delta2bboxPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME delta2bboxPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/delta2bboxPlugin/delta2bbox.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | 6 | namespace amirstan 7 | { 8 | namespace plugin 9 | { 10 | 11 | template 12 | void delta2bbox(T* out_cls, T* out_bbox, 13 | const T* in_cls, const T* in_bbox, const T* anchor, const int* clip_range, 14 | int batch_size, int num_bbox, int num_outbbox, int num_classes, int num_ratios, 15 | bool use_segmoid_cls, 16 | float* mean, float* std, 17 | cudaStream_t stream); 18 | 19 | } 20 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/exViewPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME exViewPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/exViewPlugin/expressionParser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "NvInferPlugin.h" 4 | 5 | namespace amirstan 6 | { 7 | namespace plugin 8 | { 9 | 10 | const nvinfer1::IDimensionExpr* parse_expression(const std::string& exp, const nvinfer1::DimsExprs *inputs, nvinfer1::IExprBuilder &exprBuilder); 11 | 12 | } 13 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/gridAnchorDynamicPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME gridAnchorDynamicPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE DCN_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB DCN_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${DCN_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${DCN_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${DCN_PLUGIN_HEADS} ${DCN_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) 22 | # target_link_libraries(dcn_plugin ${CUDA_npp_LIBRARY}) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/gridAnchorDynamicPlugin/grid_anchor_dynamic.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grid_anchor_dynamic.h" 5 | #include "amir_cuda_util/cuda_util.h" 6 | 7 | namespace amirstan 8 | { 9 | namespace plugin 10 | { 11 | using namespace amirstan::cuda; 12 | template 13 | __global__ void grid_anchor_dynamic_kernel(T* output, const T *base_anchor, 14 | int width, int height, 15 | int stride, int num_base_anchor){ 16 | CUDA_KERNEL_LOOP(i, width*height*num_base_anchor){ 17 | const int y = i/(width*num_base_anchor); 18 | const int x = (i%(width*num_base_anchor))/num_base_anchor; 19 | const int base_id = i%num_base_anchor; 20 | 21 | output[i*4 + 0] = base_anchor[base_id*4 + 0] + x*stride; 22 | output[i*4 + 1] = base_anchor[base_id*4 + 1] + y*stride; 23 | output[i*4 + 2] = base_anchor[base_id*4 + 2] + x*stride; 24 | output[i*4 + 3] = base_anchor[base_id*4 + 3] + y*stride; 25 | } 26 | } 27 | 28 | template 29 | void grid_anchor_dynamic(T *output, const T* base_anchor, 30 | int width, int height, 31 | int stride, 32 | int num_base_anchor, 33 | cudaStream_t stream){ 34 | 35 | size_t input_size = num_base_anchor*height*width; 36 | grid_anchor_dynamic_kernel<<>>(output, base_anchor, 37 | width, height, stride, num_base_anchor); 38 | } 39 | 40 | template void grid_anchor_dynamic(float *output, const float* base_anchor, 41 | int width, int height, 42 | int stride, 43 | int num_base_anchor, 44 | cudaStream_t stream); 45 | 46 | } 47 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/gridAnchorDynamicPlugin/grid_anchor_dynamic.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | void grid_anchor_dynamic(T *output, const T* base_anchor, 10 | int width, int height, 11 | int stride, 12 | int num_base_anchor, 13 | cudaStream_t stream); 14 | 15 | } 16 | } // namespace amirstan -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/gridSamplePlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME gridSamplePlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/gridSamplePlugin/grid_sample.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | enum class GridSamplerInterpolation {Bilinear, Nearest}; 9 | enum class GridSamplerPadding {Zeros, Border, Reflection}; 10 | 11 | template 12 | void grid_sample(T *output, const T* input, const T* grid, 13 | int* output_dims, int* input_dims, int *grid_dims, int nb_dims, 14 | GridSamplerInterpolation interp, GridSamplerPadding padding, 15 | bool align_corners, 16 | cudaStream_t stream); 17 | 18 | } 19 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/groupNormPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME groupNormPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/groupNormPlugin/group_norm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | 6 | namespace amirstan 7 | { 8 | namespace plugin 9 | { 10 | template 11 | void compute_group_norm(T* output, const T* input, 12 | int batch_size, int num_groups, int num_channels, int WH, 13 | T eps, 14 | const float* weight,const float* bias, cudaStream_t stream, void* workspace); 15 | } 16 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/groupNormPlugin/group_norm_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "group_norm.h" 7 | #include "amir_cuda_util/cuda_util.h" 8 | 9 | namespace amirstan 10 | { 11 | namespace plugin 12 | { 13 | using namespace amirstan::cuda; 14 | template 15 | __global__ void group_norm_kernel(T* output,const T* input, size_t input_size, 16 | int batch_size, int num_groups, int num_channels, int WH, 17 | T eps, 18 | T * mean, T* var, const float* weight,const float* bias){ 19 | CUDA_KERNEL_LOOP(i, input_size) { 20 | const int mean_var_index = i/(num_channels*WH/num_groups); 21 | const int axpy_index = (i%(num_channels*WH))/WH; 22 | T ret = (input[i]- mean[mean_var_index])/sqrt(var[mean_var_index]+eps); 23 | ret = ret*T(weight[axpy_index]) + T(bias[axpy_index]); 24 | output[i] = ret; 25 | } 26 | } 27 | 28 | template 29 | void compute_group_norm(T* output, const T* input, 30 | int batch_size, int num_groups, int num_channels, int WH, 31 | T eps, 32 | const float* weight,const float* bias, cudaStream_t stream, void* workspace){ 33 | T* mean = (T*)workspace; 34 | T* var = mean + batch_size*num_groups; 35 | int mean_var_shape[2] = {batch_size*num_groups, num_channels*WH/num_groups}; 36 | bool mean_var_reduce_dims[2] = {false,true}; 37 | 38 | amirstan::cuda::tensorMeanVar(mean,var, input, 39 | &mean_var_shape[0], &mean_var_reduce_dims[0] , 2, 40 | stream, (void*)(var+batch_size*num_groups)); 41 | 42 | size_t input_size = batch_size * num_channels * WH; 43 | 44 | group_norm_kernel<<>>(output, input, input_size, 45 | batch_size, num_groups, num_channels, WH, 46 | eps, 47 | mean, var, weight, bias); 48 | 49 | } 50 | 51 | template void compute_group_norm(float* output, const float* input, 52 | int batch_size, int num_groups, int num_channels, int WH, 53 | float eps, 54 | const float* weight,const float* bias, cudaStream_t stream, void* workspace); 55 | 56 | 57 | // template void compute_group_norm(half* output, const half* input, 58 | // int batch_size, int num_groups, int num_channels, int WH, 59 | // half eps, 60 | // const float* weight,const float* bias, cudaStream_t stream, void* workspace); 61 | } 62 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/layerNormPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME layerNormPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/layerNormPlugin/layer_norm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | 6 | namespace amirstan 7 | { 8 | namespace plugin 9 | { 10 | template 11 | void compute_layer_norm(T* output, const T* input, 12 | int norm_size, int layer_size, 13 | T eps, 14 | const T* weight,const T* bias, cudaStream_t stream, void* workspace); 15 | } 16 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/layerNormPlugin/layer_norm_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "layer_norm.h" 6 | #include "amir_cuda_util/cuda_util.h" 7 | 8 | namespace amirstan 9 | { 10 | namespace plugin 11 | { 12 | 13 | 14 | using namespace amirstan::cuda; 15 | template 16 | __global__ void layer_norm_kernel(T* output,const T* input, size_t input_size, 17 | int norm_size, int layer_size, 18 | T eps, 19 | T * mean, T * var, const T* weight,const T* bias){ 20 | CUDA_KERNEL_LOOP(i, input_size) { 21 | const int mean_var_index = i/layer_size; 22 | const int axpy_index = i%layer_size; 23 | T ret = (input[i]- mean[mean_var_index])/sqrt(var[mean_var_index]+eps); 24 | ret = ret*weight[axpy_index] + bias[axpy_index]; 25 | output[i] = ret; 26 | } 27 | } 28 | 29 | template 30 | void compute_layer_norm(T* output, const T* input, 31 | int norm_size, int layer_size, 32 | T eps, 33 | const T* weight,const T* bias, cudaStream_t stream, void* workspace){ 34 | T* mean = (T*)workspace; 35 | T* var = mean + norm_size; 36 | 37 | int mean_var_shape[2] = {norm_size, layer_size}; 38 | bool mean_var_reduce_dims[2] = {false,true}; 39 | 40 | amirstan::cuda::tensorMeanVar(mean, var, input, 41 | &mean_var_shape[0], &mean_var_reduce_dims[0] , 2, 42 | stream, (void*)(var+norm_size)); 43 | 44 | size_t input_size = norm_size * layer_size; 45 | 46 | layer_norm_kernel<<>>(output, input, input_size, 47 | norm_size, layer_size, 48 | eps, 49 | mean, var, weight, bias); 50 | 51 | } 52 | 53 | template void compute_layer_norm(float* output, const float* input, 54 | int norm_size, int layer_size, 55 | float eps, 56 | const float* weight,const float* bias, cudaStream_t stream, void* workspace); 57 | 58 | 59 | // template void compute_layer_norm(half* output, const half* input, 60 | // int norm_size, int layer_size, 61 | // half eps, 62 | // const half* weight,const half* bias, cudaStream_t stream, void* workspace); 63 | } 64 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/meshGridPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME meshGridPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/meshGridPlugin/mesh_grid.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mesh_grid.h" 7 | #include "amir_cuda_util/cuda_util.h" 8 | 9 | namespace amirstan 10 | { 11 | namespace plugin 12 | { 13 | using namespace amirstan::cuda; 14 | 15 | 16 | template 17 | __global__ void arange_mesh_grid_kernel(T* output, 18 | size_t pre_stride, size_t post_stride, 19 | float start, float stride, size_t N){ 20 | 21 | CUDA_KERNEL_LOOP(i, N){ 22 | const size_t index = (i%pre_stride)/post_stride; 23 | 24 | const T value = start + index * (stride); 25 | output[i] = value; 26 | } 27 | } 28 | 29 | 30 | template 31 | void arange_mesh_grid(T *output, 32 | const int* output_dims, int nb_dims, 33 | int slice_dim, float start, float stride, 34 | cudaStream_t stream){ 35 | 36 | size_t post_stride = 1; 37 | int i=nb_dims-1; 38 | for(i=nb_dims-1; i>slice_dim; --i){ 39 | post_stride*=output_dims[i]; 40 | } 41 | size_t pre_stride = post_stride*output_dims[slice_dim]; 42 | 43 | size_t N = 1; 44 | for(i=0; i<<>>(output, 49 | pre_stride, post_stride, 50 | start, stride, N); 51 | 52 | } 53 | 54 | template void arange_mesh_grid(float *output, 55 | const int* output_dims, int nb_dims, 56 | int slice_dim, float start, float stride, 57 | cudaStream_t stream); 58 | 59 | template void arange_mesh_grid(int *output, 60 | const int* output_dims, int nb_dims, 61 | int slice_dim, float start, float stride, 62 | cudaStream_t stream); 63 | 64 | template void arange_mesh_grid(half *output, 65 | const int* output_dims, int nb_dims, 66 | int slice_dim, float start, float stride, 67 | cudaStream_t stream); 68 | 69 | } 70 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/meshGridPlugin/mesh_grid.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | void arange_mesh_grid(T *output, 10 | const int* output_dims, int nb_dims, 11 | int slice_dim, float start, float stride, 12 | cudaStream_t stream); 13 | 14 | } 15 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/repeatDimsPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME repeatDimsPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/roiExtractorPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME roiExtractorPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/roiExtractorPlugin/roi_extractor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | 6 | namespace amirstan 7 | { 8 | namespace plugin 9 | { 10 | 11 | template 12 | void roi_extractor(T* output, 13 | const T* rois, int num_rois, 14 | const void *const *feats, int num_feats, 15 | int n, 16 | int c, 17 | int *h, 18 | int *w, 19 | float *strides, 20 | int out_size, 21 | int sample_num, 22 | float roi_scale_factor, 23 | int finest_scale, 24 | bool aligned, 25 | cudaStream_t stream); 26 | 27 | } 28 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/roiPoolPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME roiPoolPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | 7 | #find_package(CUDA REQUIRED) 8 | enable_language(CUDA) 9 | 10 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 11 | 12 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 13 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 14 | 15 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 16 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 17 | 18 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 19 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 21 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/roiPoolPlugin/roi_pool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | 6 | namespace amirstan 7 | { 8 | namespace plugin 9 | { 10 | 11 | template 12 | void roi_pool(T* output, 13 | const T* rois, int num_rois, 14 | const void *const *feats, int num_feats, 15 | int n, 16 | int c, 17 | int *h, 18 | int *w, 19 | float *strides, 20 | int out_size, 21 | float roi_scale_factor, 22 | int finest_scale, 23 | cudaStream_t stream); 24 | 25 | } 26 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchCumMaxMinPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME torchCumMaxMinPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchCumMaxMinPlugin/torch_cum_maxmin.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | void torch_cum_maxmin(T *output, int *index, const T* input, 10 | int* input_dims, int nb_dims, 11 | int cum_dim, int cum_type, 12 | cudaStream_t stream); 13 | 14 | } 15 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchCumPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME torchCumPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchCumPlugin/torch_cum.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | void torch_cum(T *output, const T* input, 10 | int* input_dims, int nb_dims, 11 | int cum_dim, int cum_type, 12 | cudaStream_t stream); 13 | 14 | } 15 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchFlipPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME torchFlipPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchFlipPlugin/torch_flip.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | void torch_flip(T *output, const T* input, 10 | int* input_dims, int nb_dims, 11 | int* flip_dims, int nb_flip_dims, 12 | cudaStream_t stream); 13 | 14 | } 15 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchGatherPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME torchGatherPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchGatherPlugin/torch_gather.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | void torch_gather(T *output, const T* input, const int* index, 10 | int dim, int* input_dims, int *index_dims, int nb_dims, 11 | cudaStream_t stream); 12 | 13 | } 14 | } -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchNMSPlugin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(TARGET_NAME torchNMSPlugin) 4 | set(SHARED_TARGET ${TARGET_NAME}) 5 | set(STATIC_TARGET ${TARGET_NAME}_static) 6 | #find_package(CUDA REQUIRED) 7 | enable_language(CUDA) 8 | 9 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options -fPIC) 10 | 11 | file(GLOB_RECURSE GROUP_NORM_PLUGIN_HEADS *.h *.hpp *.cuh) 12 | file(GLOB GROUP_NORM_PLUGIN_SOURCES *.cpp *.cu) 13 | 14 | source_group("Include" FILES ${GROUP_NORM_PLUGIN_HEADS}) 15 | source_group("Source" FILES ${GROUP_NORM_PLUGIN_SOURCES}) 16 | 17 | cuda_add_library(${STATIC_TARGET} STATIC ${GROUP_NORM_PLUGIN_HEADS} ${GROUP_NORM_PLUGIN_SOURCES}) 18 | target_link_libraries(${STATIC_TARGET} ${CUDA_LIBRARY}) 19 | target_link_libraries(${STATIC_TARGET} ${TENSORRT_LIBRARY}) 20 | target_link_libraries(${STATIC_TARGET} amir_cuda_util) -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchNMSPlugin/bboxUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef TRT_BBOX_UTILS_H 17 | #define TRT_BBOX_UTILS_H 18 | 19 | #include "plugin.h" 20 | 21 | using namespace nvinfer1; 22 | using namespace nvinfer1::plugin; 23 | 24 | template 25 | struct Bbox 26 | { 27 | T xmin, ymin, xmax, ymax; 28 | Bbox(T xmin, T ymin, T xmax, T ymax) 29 | : xmin(xmin) 30 | , ymin(ymin) 31 | , xmax(xmax) 32 | , ymax(ymax) 33 | { 34 | } 35 | Bbox() = default; 36 | }; 37 | 38 | template 39 | struct BboxInfo 40 | { 41 | T conf_score; 42 | int label; 43 | int bbox_idx; 44 | bool kept; 45 | BboxInfo(T conf_score, int label, int bbox_idx, bool kept) 46 | : conf_score(conf_score) 47 | , label(label) 48 | , bbox_idx(bbox_idx) 49 | , kept(kept) 50 | { 51 | } 52 | BboxInfo() = default; 53 | }; 54 | 55 | template 56 | bool operator<(const Bbox& lhs, const Bbox& rhs) 57 | { 58 | return lhs.x1 < rhs.x1; 59 | } 60 | 61 | template 62 | bool operator==(const Bbox& lhs, const Bbox& rhs) 63 | { 64 | return lhs.x1 == rhs.x1 && lhs.y1 == rhs.y1 && lhs.x2 == rhs.x2 && lhs.y2 == rhs.y2; 65 | } 66 | // }}} 67 | 68 | int8_t* alignPtr(int8_t* ptr, uintptr_t to); 69 | 70 | int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize); 71 | 72 | size_t dataTypeSize(DataType dtype); 73 | 74 | void setUniformOffsets(cudaStream_t stream, int num_segments, int offset, int* d_offsets); 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /pilgrim_trt_plugins/src/plugin/torchNMSPlugin/torch_nms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace amirstan 4 | { 5 | namespace plugin 6 | { 7 | 8 | template 9 | size_t nms_workspace_size(int num_boxes); 10 | 11 | template 12 | void torch_nms(int *output, const T* bboxes, const T* scores, 13 | int num_boxes, float iou_threshold, void* workspace, 14 | cudaStream_t stream); 15 | 16 | } 17 | } -------------------------------------------------------------------------------- /torch2trt_dynamic.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: torch2trt-dynamic 3 | Version: 0.2.0 4 | Summary: An easy to use PyTorch to TensorRT converter with dynamic shape support 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /torch2trt_dynamic.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /torch2trt_dynamic.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | torch2trt_dynamic 2 | -------------------------------------------------------------------------------- /torch2trt_dynamic/__init__.py: -------------------------------------------------------------------------------- 1 | from .torch2trt_dynamic import * 2 | from .converters import * 3 | import tensorrt as trt 4 | 5 | 6 | def load_plugins(): 7 | import os 8 | import ctypes 9 | ctypes.CDLL(os.path.join(os.path.dirname(__file__), 'libtorch2trt_dynamic.so')) 10 | 11 | registry = trt.get_plugin_registry() 12 | torch2trt_creators = [c for c in registry.plugin_creator_list if c.plugin_namespace == 'torch2trt_dynamic'] 13 | for c in torch2trt_creators: 14 | registry.register_creator(c, 'torch2trt_dynamic') 15 | 16 | 17 | try: 18 | load_plugins() 19 | PLUGINS_LOADED = True 20 | except OSError: 21 | PLUGINS_LOADED = False 22 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/AdaptiveAvgPool2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.AdaptiveAvgPool2d.forward') 6 | def convert_AdaptiveAvgPool2d(ctx): 7 | module = ctx.method_args[0] 8 | input = ctx.method_args[1] 9 | output = ctx.method_return 10 | 11 | input_trt = add_missing_trt_tensors(ctx.network, [input])[0] 12 | 13 | output_size = module.output_size 14 | if not isinstance(output_size, tuple): 15 | output_size = (output_size, ) * 2 16 | 17 | stride = (input_trt.shape[-2] // output_size[-2], 18 | input_trt.shape[-1] // output_size[-1]) 19 | 20 | kernel_size = stride 21 | layer = ctx.network.add_pooling( 22 | input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) 23 | layer.stride = stride 24 | 25 | output._trt = layer.get_output(0) 26 | 27 | 28 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 29 | def test_AdaptiveAvgPool2d_1x1(): 30 | return torch.nn.AdaptiveAvgPool2d((1, 1)) 31 | 32 | 33 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 34 | def test_AdaptiveAvgPool2d_2x2(): 35 | return torch.nn.AdaptiveAvgPool2d((2, 2)) 36 | 37 | 38 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 39 | def test_AdaptiveAvgPool2d_3x3(): 40 | return torch.nn.AdaptiveAvgPool2d((3, 3)) 41 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/AdaptiveMaxPool2d.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..module_test import add_module_test 3 | from .adaptive_max_pool2d import convert_adaptive_max_pool2d 4 | 5 | @tensorrt_converter('torch.nn.AdaptiveMaxPool2d.forward') 6 | def convert_AdaptiveMaxPool2d(ctx): 7 | ctx.method_args = (ctx.method_args[1], ctx.method_args[0].output_size) 8 | convert_adaptive_max_pool2d(ctx) 9 | 10 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 11 | def test_AdaptiveMaxPool2d_1x1(): 12 | return torch.nn.AdaptiveMaxPool2d((1, 1)) 13 | 14 | 15 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 16 | def test_AdaptiveMaxPool2d_2x2(): 17 | return torch.nn.AdaptiveMaxPool2d((2, 2)) 18 | 19 | 20 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 21 | def test_AdaptiveMaxPool2d_3x3(): 22 | return torch.nn.AdaptiveMaxPool2d((3, 3)) 23 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/BatchNorm1d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.BatchNorm1d.forward') 6 | def convert_BatchNorm1d(ctx): 7 | 8 | module = ctx.method_args[0] 9 | input = ctx.method_args[1] 10 | input_trt = trt_(ctx.network, input) 11 | output = ctx.method_return 12 | 13 | scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) 14 | bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale 15 | power = np.ones_like(scale) 16 | 17 | # reshape to 2D 18 | input_shape_trt = ctx.network.add_shape(input_trt).get_output(0) 19 | one_trt = trt_(ctx.network, torch.tensor([1],dtype=torch.int32).to(input.device)) 20 | if len(input.shape)==2: 21 | new_input_shape_trt = ctx.network.add_concatenation([input_shape_trt, one_trt, one_trt]).get_output(0) 22 | else: 23 | new_input_shape_trt = ctx.network.add_concatenation([input_shape_trt, one_trt]).get_output(0) 24 | layer = ctx.network.add_shuffle(input_trt) 25 | layer.set_input(1, new_input_shape_trt) 26 | 27 | layer = ctx.network.add_scale(layer.get_output(0), trt.ScaleMode.CHANNEL, bias, scale, power) 28 | 29 | # reshape back to 1D 30 | conv_out_trt = layer.get_output(0) 31 | layer = ctx.network.add_shuffle(conv_out_trt) 32 | layer.set_input(1, input_shape_trt) 33 | 34 | output._trt = layer.get_output(0) 35 | 36 | 37 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) 38 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)]) 39 | def test_BatchNorm1d_basic(): 40 | return torch.nn.BatchNorm1d(10) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/BatchNorm2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.nn.BatchNorm2d.forward') 5 | def convert_BatchNorm2d(ctx): 6 | module = ctx.method_args[0] 7 | input = ctx.method_args[1] 8 | input_trt = trt_(ctx.network, input) 9 | output = ctx.method_return 10 | 11 | scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps) 12 | bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale 13 | power = np.ones_like(scale) 14 | 15 | layer = ctx.network.add_scale(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power) 16 | 17 | 18 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/Conv2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | @tensorrt_converter('torch.nn.Conv2d.forward') 5 | def convert_Conv2d(ctx): 6 | module = ctx.method_args[0] 7 | input = ctx.method_args[1] 8 | input_trt = trt_(ctx.network, input) 9 | output = ctx.method_return 10 | 11 | kernel_size = module.kernel_size 12 | if not isinstance(kernel_size, tuple): 13 | kernel_size = (kernel_size, ) * 2 14 | 15 | stride = module.stride 16 | if not isinstance(stride, tuple): 17 | stride = (stride, ) * 2 18 | 19 | padding = module.padding 20 | if not isinstance(padding, tuple): 21 | padding = (padding, ) * 2 22 | 23 | dilation = module.dilation 24 | if not isinstance(dilation, tuple): 25 | dilation = (dilation, ) * 2 26 | 27 | kernel = module.weight.detach().cpu().numpy() 28 | 29 | bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) 30 | if module.bias is not None: 31 | bias = module.bias.detach().cpu().numpy() 32 | 33 | layer = ctx.network.add_convolution( 34 | input=input_trt, 35 | num_output_maps=module.out_channels, 36 | kernel_shape=kernel_size, 37 | kernel=kernel, 38 | bias=bias) 39 | layer.stride = stride 40 | layer.padding = padding 41 | layer.dilation = dilation 42 | 43 | if module.groups is not None: 44 | layer.num_groups = module.groups 45 | 46 | output._trt = layer.get_output(0) 47 | 48 | 49 | 50 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) 51 | def test_Conv2d_basic(): 52 | return torch.nn.Conv2d(10, 5, kernel_size=1, stride=1, padding=0) 53 | 54 | 55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) 56 | def test_Conv2d_stride2(): 57 | return torch.nn.Conv2d(10, 5, kernel_size=1, stride=2, padding=0) 58 | 59 | 60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) 61 | def test_Conv2d_kernel3(): 62 | return torch.nn.Conv2d(10, 5, kernel_size=3, stride=2, padding=1) 63 | 64 | 65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224, 224)]) 66 | def test_Conv2d_dilation2(): 67 | return torch.nn.Conv2d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2) 68 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/ConvTranspose1d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.nn.ConvTranspose1d.forward') 5 | def convert_ConvTranspose1d(ctx): 6 | module = ctx.method_args[0] 7 | input = ctx.method_args[1] 8 | input_trt = trt_(ctx.network, input) 9 | output = ctx.method_return 10 | 11 | kernel_size = module.kernel_size 12 | if not isinstance(kernel_size, tuple): 13 | kernel_size = (kernel_size, 1) 14 | else: 15 | kernel_size = kernel_size + (1,) 16 | 17 | stride = module.stride 18 | if not isinstance(stride, tuple): 19 | stride = (stride, 1) 20 | else: 21 | stride = stride + (1,) 22 | 23 | padding = module.padding 24 | if not isinstance(padding, tuple): 25 | padding = (padding, 0) 26 | else: 27 | padding = padding + (0,) 28 | 29 | kernel = module.weight.detach().cpu().numpy()[..., None] 30 | 31 | bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) 32 | if module.bias is not None: 33 | bias = module.bias.detach().cpu().numpy()[..., None] 34 | 35 | # unsqueeze(3) 36 | layer = ctx.network.add_shuffle(input_trt) 37 | layer.reshape_dims = (0,0,0,1) 38 | input_trt = layer.get_output(0) 39 | 40 | # deconv 41 | layer = ctx.network.add_deconvolution( 42 | input=input_trt, 43 | num_output_maps=module.out_channels, 44 | kernel_shape=kernel_size, 45 | kernel=kernel, 46 | bias=bias) 47 | layer.stride = stride 48 | layer.padding = padding 49 | 50 | if module.groups is not None: 51 | layer.num_groups = module.groups 52 | 53 | output_trt = layer.get_output(0) 54 | 55 | # squeeze(3) 56 | layer = ctx.network.add_shuffle(output_trt) 57 | layer.reshape_dims = (0,0,0) 58 | output_trt = layer.get_output(0) 59 | 60 | output._trt = output_trt -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/ConvTranspose2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.nn.ConvTranspose2d.forward') 5 | def convert_ConvTranspose2d(ctx): 6 | module = ctx.method_args[0] 7 | input = ctx.method_args[1] 8 | input_trt = trt_(ctx.network, input) 9 | output = ctx.method_return 10 | 11 | kernel_size = module.kernel_size 12 | if not isinstance(kernel_size, tuple): 13 | kernel_size = (kernel_size, ) * 2 14 | 15 | stride = module.stride 16 | if not isinstance(stride, tuple): 17 | stride = (stride, ) * 2 18 | 19 | padding = module.padding 20 | if not isinstance(padding, tuple): 21 | padding = (padding, ) * 2 22 | 23 | kernel = module.weight.detach().cpu().numpy() 24 | 25 | bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) 26 | if module.bias is not None: 27 | bias = module.bias.detach().cpu().numpy() 28 | 29 | layer = ctx.network.add_deconvolution( 30 | input=input_trt, 31 | num_output_maps=module.out_channels, 32 | kernel_shape=kernel_size, 33 | kernel=kernel, 34 | bias=bias) 35 | layer.stride = stride 36 | layer.padding = padding 37 | 38 | if module.groups is not None: 39 | layer.num_groups = module.groups 40 | 41 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/Identity.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.nn.Dropout.forward') 5 | @tensorrt_converter('torch.nn.Dropout2d.forward') 6 | @tensorrt_converter('torch.nn.Dropout3d.forward') 7 | def convert_Identity(ctx): 8 | input = ctx.method_args[1] 9 | input_trt = trt_(ctx.network, input) 10 | output = ctx.method_return 11 | output._trt = input_trt -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/Linear.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..module_test import add_module_test 3 | import torch 4 | 5 | 6 | @tensorrt_converter('torch.nn.Linear.forward') 7 | def convert_Linear(ctx): 8 | module = ctx.method_args[0] 9 | input = ctx.method_args[1] 10 | input_trt = trt_(ctx.network, input) 11 | output = ctx.method_return 12 | 13 | ### reshape to ...xNx1x1 14 | layer = ctx.network.add_shuffle(input_trt) 15 | layer.reshape_dims = (0,)*len(input_trt.shape) + (1, 1) 16 | 17 | ### add fully connected 18 | bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype)) 19 | if module.bias is not None: 20 | bias = module.bias.detach().cpu().numpy() 21 | 22 | layer = ctx.network.add_convolution( 23 | input=layer.get_output(0), 24 | num_output_maps=module.out_features, 25 | kernel_shape=(1, 1), 26 | kernel=module.weight.detach().cpu().numpy(), 27 | bias=bias) 28 | 29 | # layer = ctx.network.add_fully_connected( 30 | # input=layer.get_output(0), 31 | # # input=input_trt, 32 | # num_outputs=module.out_features, 33 | # kernel=module.weight.detach().cpu().numpy(), 34 | # bias=bias) 35 | 36 | ### reshape back to N 37 | layer = ctx.network.add_shuffle(layer.get_output(0)) 38 | # # layer.reshape_dims = tuple(output.shape[1:]) 39 | layer.reshape_dims = (0,)*len(input_trt.shape) 40 | 41 | output._trt = layer.get_output(0) 42 | 43 | 44 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) 45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) 46 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) 47 | def test_Linear_basic(): 48 | return torch.nn.Linear(10, 5) 49 | 50 | 51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)]) 52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 10)]) 53 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 10)]) 54 | def test_Linear_no_bias(): 55 | return torch.nn.Linear(10, 5, bias=False) 56 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/LogSoftmax.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.nn.LogSoftmax.forward') 5 | def convert_LogSoftmax(ctx): 6 | input = ctx.method_args[1] 7 | input_trt = trt_(ctx.network, input) 8 | output = ctx.method_return 9 | layer = ctx.network.add_softmax(input=input_trt) 10 | layer = ctx.network.add_unary(input=layer.get_output(0), 11 | op=trt.UnaryOperation.LOG) 12 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/ReLU.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.nn.ReLU.forward') 5 | def convert_ReLU(ctx): 6 | input = ctx.method_args[1] 7 | input_trt = trt_(ctx.network, input) 8 | output = ctx.method_return 9 | layer = ctx.network.add_activation( 10 | input=input_trt, type=trt.ActivationType.RELU) 11 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/ReLU6.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.ReLU6.forward') 6 | def convert_ReLU6(ctx): 7 | input = ctx.method_args[1] 8 | output = ctx.method_return 9 | 10 | input_trt, trt_6 = trt_(ctx.network, input, 6.) 11 | 12 | layer = ctx.network.add_activation( 13 | input=input_trt, type=trt.ActivationType.RELU) 14 | layer = ctx.network.add_elementwise( 15 | layer.get_output(0), trt_6, trt.ElementWiseOperation.MIN) 16 | 17 | output._trt = layer.get_output(0) 18 | 19 | 20 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 5)]) 21 | def test_relu6_basic(): 22 | return torch.nn.ReLU6() -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/adaptive_avg_pool2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | from torch2trt_dynamic.plugins import create_adaptivepool_plugin 4 | 5 | 6 | # @tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d') 7 | # def convert_adaptive_avg_pool2d(ctx): 8 | # input = ctx.method_args[0] 9 | # output_size = get_arg(ctx, 'output_size', pos=1, default=0) 10 | # output = ctx.method_return 11 | # input_trt = trt_(ctx.network, input) 12 | 13 | # if isinstance(output_size, int): 14 | # output_size = (output_size, output_size) 15 | 16 | # output_size = tuple([-1 if not o else o for o in output_size]) 17 | 18 | # plugin = create_adaptivepool_plugin("adaptive_avg_pool2d_"+str(id(input)), 19 | # output_size=output_size, 20 | # pooling_type=trt.PoolingType.AVERAGE) 21 | 22 | # layer = ctx.network.add_plugin_v2( 23 | # inputs=[input_trt], plugin=plugin) 24 | 25 | # output._trt = layer.get_output(0) 26 | 27 | @tensorrt_converter('torch.nn.functional.adaptive_avg_pool2d') 28 | def convert_adaptive_avg_pool2d(ctx): 29 | ctx.method_args = (torch.nn.AdaptiveAvgPool2d(ctx.method_args[1]), ctx.method_args[0]) 30 | convert_AdaptiveAvgPool2d(ctx) 31 | 32 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/adaptive_max_pool2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | from torch2trt_dynamic.plugins import create_adaptivepool_plugin 4 | 5 | 6 | @tensorrt_converter('torch.nn.functional.adaptive_max_pool2d') 7 | def convert_adaptive_max_pool2d(ctx): 8 | input = ctx.method_args[0] 9 | output_size = get_arg(ctx, 'output_size', pos=1, default=0) 10 | output = ctx.method_return 11 | input_trt = trt_(ctx.network, input) 12 | 13 | if isinstance(output_size, int): 14 | output_size = (output_size, output_size) 15 | 16 | output_size = tuple([-1 if not o else o for o in output_size]) 17 | 18 | plugin = create_adaptivepool_plugin("adaptive_max_pool2d_"+str(id(input)), 19 | output_size=output_size, 20 | pooling_type=trt.PoolingType.MAX) 21 | 22 | layer = ctx.network.add_plugin_v2( 23 | inputs=[input_trt], plugin=plugin) 24 | 25 | output._trt = layer.get_output(0) 26 | 27 | ### old version 28 | # @tensorrt_converter('torch.nn.functional.adaptive_max_pool2d') 29 | # def convert_adaptive_max_pool2d(ctx): 30 | # input = ctx.method_args[0] 31 | # output = ctx.method_return 32 | # input_trt = trt_(ctx.network, input) 33 | 34 | # output_size = ctx.method_args[1] 35 | # if isinstance(output_size, int): 36 | # output_size = (output_size, ) * 2 37 | 38 | # if output_size[0]==1 and output_size[1] == 1: 39 | # shape_length = len(input.shape) 40 | # axes = (1<<(shape_length-1)) + (1<<(shape_length-2)) 41 | # keepdim = True 42 | # layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.MAX, axes, keepdim) 43 | # output._trt = layer.get_output(0) 44 | # else: 45 | # stride = (input._trt.shape[-2] // output_size[-2], input._trt.shape[-1] // output_size[-1]) 46 | 47 | # kernel_size = stride 48 | # layer = ctx.network.add_pooling( 49 | # input=input._trt, type=trt.PoolingType.MAX, window_size=kernel_size) 50 | # layer.stride = stride 51 | 52 | # output._trt = layer.get_output(0) 53 | 54 | 55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 56 | def test_adaptive_max_pool2d_1x1(): 57 | return torch.nn.AdaptiveMaxPool2d((1, 1)) 58 | 59 | 60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 61 | def test_adaptive_max_pool2d_2x2(): 62 | return torch.nn.AdaptiveMaxPool2d((2, 2)) 63 | 64 | 65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 66 | def test_adaptive_max_pool2d_3x3(): 67 | return torch.nn.AdaptiveMaxPool2d((3, 3)) 68 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/add.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.add') 6 | @tensorrt_converter('torch.Tensor.__iadd__') 7 | @tensorrt_converter('torch.Tensor.__add__') 8 | @tensorrt_converter('torch.Tensor.__radd__') 9 | def convert_add(ctx): 10 | input_a = ctx.method_args[0] 11 | input_b = ctx.method_args[1] 12 | input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) 13 | output = ctx.method_return 14 | layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM) 15 | output._trt = layer.get_output(0) 16 | 17 | 18 | class Add(torch.nn.Module): 19 | def __init__(self): 20 | super(Add, self).__init__() 21 | 22 | def forward(self, x, y): 23 | return x + y 24 | 25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) 26 | def test_add_basic(): 27 | return Add() 28 | 29 | 30 | class IAdd(torch.nn.Module): 31 | def __init__(self): 32 | super(IAdd, self).__init__() 33 | 34 | def forward(self, x, y): 35 | x += y 36 | return x 37 | 38 | 39 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) 40 | def test_add_iadd(): 41 | return IAdd() 42 | 43 | 44 | class TorchAdd(torch.nn.Module): 45 | def __init__(self): 46 | super(TorchAdd, self).__init__() 47 | 48 | def forward(self, x, y): 49 | return torch.add(x, y) 50 | 51 | 52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) 53 | def test_add_torchadd(): 54 | return TorchAdd() 55 | 56 | 57 | class RAddInt(torch.nn.Module): 58 | def __init__(self): 59 | super(RAddInt, self).__init__() 60 | 61 | def forward(self, x): 62 | return 1 + x 63 | 64 | 65 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 66 | def test_add_radd_int(): 67 | return RAddInt() 68 | 69 | 70 | class RAddFloat(torch.nn.Module): 71 | def __init__(self): 72 | super(RAddFloat, self).__init__() 73 | 74 | def forward(self, x): 75 | return 1.0 + x 76 | 77 | 78 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 79 | def test_add_radd_float(): 80 | return RAddFloat() -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/argmax.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | from torch2trt_dynamic.torch2trt_dynamic import * 3 | from torch2trt_dynamic.module_test import add_module_test 4 | from .flatten import * 5 | from .topk import * 6 | from .squeeze import * 7 | 8 | 9 | @tensorrt_converter('torch.Tensor.argmax') 10 | @tensorrt_converter('torch.argmax') 11 | def convert_argmax(ctx): 12 | 13 | old_args = ctx.method_args 14 | input = ctx.method_args[0] 15 | dim = get_arg(ctx, 'dim', pos=1, default=None) 16 | keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) 17 | 18 | output = ctx.method_return 19 | 20 | # dim is None 21 | if dim is None: 22 | input_flatten = input.flatten() 23 | ctx.method_args = [input] 24 | ctx.method_return = input_flatten 25 | convert_flatten(ctx) 26 | input = ctx.method_return 27 | dim = 0 28 | 29 | # topk 30 | topk_output = input.topk(1, dim) 31 | topk_input = [input, 1, dim] 32 | ctx.method_args = topk_input 33 | ctx.method_return = topk_output 34 | convert_topk(ctx) 35 | topk_index = ctx.method_return[1] 36 | 37 | 38 | output._trt = topk_index._trt 39 | ctx.method_return = output 40 | 41 | # keepdim 42 | if not keepdim and topk_index.shape[dim]==1 and len(topk_index.shape)>1: 43 | ctx.method_args = [topk_index, dim] 44 | ctx.method_return = output 45 | convert_squeeze(ctx) 46 | ctx.method_args = old_args 47 | 48 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/argmin.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | from torch2trt_dynamic.torch2trt_dynamic import * 3 | from torch2trt_dynamic.module_test import add_module_test 4 | from .flatten import * 5 | from .topk import * 6 | from .squeeze import * 7 | 8 | 9 | @tensorrt_converter('torch.Tensor.argmin') 10 | @tensorrt_converter('torch.argmin') 11 | def convert_argmin(ctx): 12 | 13 | old_args = ctx.method_args 14 | input = ctx.method_args[0] 15 | dim = get_arg(ctx, 'dim', pos=1, default=None) 16 | keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) 17 | 18 | output = ctx.method_return 19 | 20 | # dim is None 21 | if dim is None: 22 | input_flatten = input.flatten() 23 | ctx.method_args = [input] 24 | ctx.method_return = input_flatten 25 | convert_flatten(ctx) 26 | input = ctx.method_return 27 | dim = 0 28 | 29 | # topk 30 | topk_output = input.topk(1, dim, largest=False) 31 | topk_input = [input, 1, dim, False] 32 | ctx.method_args = topk_input 33 | ctx.method_return = topk_output 34 | convert_topk(ctx) 35 | topk_index = ctx.method_return[1] 36 | 37 | 38 | output._trt = topk_index._trt 39 | ctx.method_return = output 40 | 41 | # keepdim 42 | if not keepdim and topk_index.shape[dim]==1 and len(topk_index.shape)>1: 43 | ctx.method_args = [topk_index, dim] 44 | ctx.method_return = output 45 | convert_squeeze(ctx) 46 | ctx.method_args = old_args 47 | 48 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/avg_pool2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.functional.avg_pool2d') 6 | def convert_avg_pool2d(ctx): 7 | # parse args 8 | input = get_arg(ctx, 'input', pos=0, default=None) 9 | kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) 10 | stride = get_arg(ctx, 'stride', pos=2, default=None) 11 | padding = get_arg(ctx, 'padding', pos=3, default=0) 12 | ceil_mode = get_arg(ctx, 'ceil_mode', pos=4, default=False) 13 | count_include_pad = get_arg(ctx, 'count_include_pad', pos=5, default=True) 14 | 15 | # get input trt tensor (or create constant if it doesn't exist) 16 | input_trt = trt_(ctx.network, input) 17 | 18 | output = ctx.method_return 19 | 20 | # get kernel size 21 | if not isinstance(kernel_size, tuple): 22 | kernel_size = (kernel_size, ) * 2 23 | 24 | # get stride 25 | if not isinstance(stride, tuple): 26 | stride = (stride, ) * 2 27 | 28 | # get padding 29 | if not isinstance(padding, tuple): 30 | padding = (padding, ) * 2 31 | 32 | layer = ctx.network.add_pooling( 33 | input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size) 34 | 35 | layer.stride = stride 36 | layer.padding = padding 37 | layer.average_count_excludes_padding = not count_include_pad 38 | 39 | if ceil_mode: 40 | layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP 41 | 42 | output._trt = layer.get_output(0) 43 | 44 | 45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) 46 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) 47 | def test_avg_pool2d_without_ceil_mode(): 48 | return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) 49 | 50 | 51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) 52 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) 53 | def test_avg_pool2d_with_ceil_mode(): 54 | return torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True, count_include_pad=False) # TRT does not support ceil_mode=True && count_include_pad=True 55 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/cast_type.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | from torch2trt_dynamic.torch2trt_dynamic import * 3 | 4 | 5 | def convert_type(ctx, data_type): 6 | input = ctx.method_args[0] 7 | output = ctx.method_return 8 | 9 | input_trt = trt_(ctx.network, input) 10 | 11 | layer = ctx.network.add_identity(input_trt) 12 | layer.set_output_type(0, data_type) 13 | output._trt = layer.get_output(0) 14 | 15 | 16 | @tensorrt_converter('torch.Tensor.long') 17 | @tensorrt_converter('torch.Tensor.int') 18 | def convert_int(ctx): 19 | convert_type(ctx, trt.DataType.INT32) 20 | convert_type(ctx, trt.DataType.INT32) 21 | 22 | @tensorrt_converter('torch.Tensor.float') 23 | def convert_float(ctx): 24 | convert_type(ctx, trt.DataType.FLOAT) 25 | convert_type(ctx, trt.DataType.FLOAT) 26 | 27 | # @tensorrt_converter('torch.Tensor.char') 28 | # def convert_char(ctx): 29 | # convert_type(ctx, trt.DataType.CHAR) 30 | 31 | 32 | # @tensorrt_converter('torch.Tensor.half') 33 | # def convert_half(ctx): 34 | # convert_type(ctx, trt.DataType.HALF) 35 | 36 | 37 | @tensorrt_converter('torch.Tensor.bool') 38 | def convert_bool(ctx): 39 | convert_type(ctx, trt.DataType.BOOL) 40 | 41 | 42 | 43 | @tensorrt_converter('torch.Tensor.type_as') 44 | def convert_type_as(ctx): 45 | input = ctx.method_args[0] 46 | other = ctx.method_args[1] 47 | output = ctx.method_return 48 | 49 | input_trt = trt_(ctx.network, input) 50 | other_trt = trt_(ctx.network, other) 51 | 52 | layer = ctx.network.add_identity(input_trt) 53 | layer.set_output_type(0, other_trt.dtype) 54 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/cat.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.cat') 5 | def convert_cat(ctx): 6 | inputs = ctx.method_args[0] 7 | 8 | dim = get_arg(ctx, 'dim', pos=1, default=0) 9 | if dim<0: 10 | dim = len(inputs[0].shape)+dim 11 | 12 | output = ctx.method_return 13 | trt_inputs = [trt_(ctx.network, i) for i in inputs] 14 | 15 | layer = ctx.network.add_concatenation(inputs=trt_inputs) 16 | 17 | layer.axis = dim 18 | output._trt = layer.get_output(0) 19 | 20 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/conv2d.py: -------------------------------------------------------------------------------- 1 | ### copy from https://github.com/yuzhiyiliu/torch2trt/blob/origin/torch.nn.functional.conv2d_support/torch2trt/converters/conv2d.py 2 | 3 | 4 | from torch2trt_dynamic.torch2trt_dynamic import * 5 | from .Conv2d import * 6 | 7 | @tensorrt_converter('torch.nn.functional.conv2d') 8 | def convert_conv2d(ctx): 9 | weight = get_arg(ctx, 'weight', pos=1, default=None) 10 | bias = get_arg(ctx, 'bias', pos=2, default=None) 11 | in_channels = weight.size()[1] 12 | out_channels = weight.size()[0] 13 | kernel_size = tuple(weight.size()[2:4]) 14 | stride = get_arg(ctx, 'stride', pos=3, default=None) 15 | padding = get_arg(ctx, 'padding', pos=4, default=None) 16 | dilation = get_arg(ctx, 'dilation', pos=5, default=None) 17 | groups = get_arg(ctx, 'groups', pos=6, default=None) 18 | need_bias = False if bias is None else True 19 | 20 | module = torch.nn.Conv2d( 21 | in_channels=in_channels, 22 | out_channels=out_channels, 23 | kernel_size=kernel_size, 24 | stride=stride, 25 | padding=padding, 26 | dilation=dilation, 27 | groups=groups, 28 | bias=need_bias) 29 | module.weight = weight 30 | module.bias = bias 31 | 32 | ctx.method_args = (module, ctx.method_args[0]) 33 | convert_Conv2d(ctx) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/cummax.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | from torch2trt_dynamic.plugins import * 4 | 5 | @tensorrt_converter('torch.cummax') 6 | @tensorrt_converter('torch.Tensor.cummax') 7 | def convert_cummax(ctx): 8 | input = ctx.method_args[0] 9 | dim = get_arg(ctx, 'dim', pos=1, default=0) 10 | cum_type = 0 11 | 12 | if dim<0: 13 | dim = len(input.shape)+dim 14 | 15 | input_trt = trt_(ctx.network, input) 16 | output = ctx.method_return 17 | 18 | plugin = create_torchcummaxmin_plugin("cummax_" + str(id(input)), 19 | dim=dim, 20 | cum_type=cum_type 21 | ) 22 | 23 | custom_layer = ctx.network.add_plugin_v2( 24 | inputs=[input_trt], plugin=plugin) 25 | 26 | output[0]._trt = custom_layer.get_output(0) 27 | output[1]._trt = custom_layer.get_output(1) 28 | 29 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/cummin.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | from ..plugins import * 4 | 5 | @tensorrt_converter('torch.cummin') 6 | @tensorrt_converter('torch.Tensor.cummin') 7 | def convert_cummin(ctx): 8 | input = ctx.method_args[0] 9 | dim = get_arg(ctx, 'dim', pos=1, default=0) 10 | cum_type = 1 11 | 12 | if dim<0: 13 | dim = len(input.shape)+dim 14 | input_trt = trt_(ctx.network, input) 15 | output = ctx.method_return 16 | 17 | plugin = create_torchcummaxmin_plugin("cummin_" + str(id(input)), 18 | dim=dim, 19 | cum_type=cum_type 20 | ) 21 | 22 | custom_layer = ctx.network.add_plugin_v2( 23 | inputs=[input_trt], plugin=plugin) 24 | 25 | output[0]._trt = custom_layer.get_output(0) 26 | output[1]._trt = custom_layer.get_output(1) 27 | 28 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/cumprod.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | from ..plugins import * 4 | 5 | @tensorrt_converter('torch.cumprod') 6 | @tensorrt_converter('torch.Tensor.cumprod') 7 | def convert_cumprod(ctx): 8 | input = ctx.method_args[0] 9 | dim = get_arg(ctx, 'dim', pos=1, default=0) 10 | cum_type = 1 11 | 12 | if dim<0: 13 | dim = len(input.shape)+dim 14 | input_trt = trt_(ctx.network, input) 15 | output = ctx.method_return 16 | 17 | plugin = create_torchcum_plugin("cumprod_" + str(id(input)), 18 | dim=dim, 19 | cum_type=cum_type 20 | ) 21 | 22 | custom_layer = ctx.network.add_plugin_v2( 23 | inputs=[input_trt], plugin=plugin) 24 | 25 | output._trt = custom_layer.get_output(0) 26 | 27 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/cumsum.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | from ..plugins import * 4 | 5 | @tensorrt_converter('torch.cumsum') 6 | @tensorrt_converter('torch.Tensor.cumsum') 7 | def convert_cumsum(ctx): 8 | input = ctx.method_args[0] 9 | dim = get_arg(ctx, 'dim', pos=1, default=0) 10 | cum_type = 0 11 | 12 | if dim<0: 13 | dim = len(input.shape)+dim 14 | input_trt = trt_(ctx.network, input) 15 | output = ctx.method_return 16 | 17 | plugin = create_torchcum_plugin("cumsum_" + str(id(input)), 18 | dim=dim, 19 | cum_type=cum_type 20 | ) 21 | 22 | custom_layer = ctx.network.add_plugin_v2( 23 | inputs=[input_trt], plugin=plugin) 24 | 25 | output._trt = custom_layer.get_output(0) 26 | 27 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/deform_conv2d.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from .Conv2d import convert_Conv2d 3 | 4 | from ..plugins import * 5 | import torchvision.ops 6 | 7 | @tensorrt_converter('torchvision.ops.deform_conv.deform_conv2d') 8 | def convert_deform_conv2d(ctx): 9 | 10 | input = get_arg(ctx, 'input', pos=0, default=None) 11 | offset = get_arg(ctx, 'offset', pos=1, default=None) 12 | weight = get_arg(ctx, 'weight', pos=2, default=None) 13 | bias = get_arg(ctx, 'bias', pos=3, default=None) 14 | stride = get_arg(ctx, 'stride', pos=4, default=1) 15 | padding = get_arg(ctx, 'padding', pos=5, default=0) 16 | dilation = get_arg(ctx, 'dilation', pos=6, default=1) 17 | # groups = get_arg(ctx, 'groups', pos=6, default=1) 18 | # deform_groups = get_arg(ctx, 'deform_groups', pos=7, default=1) 19 | groups=1 20 | 21 | output = ctx.method_return 22 | 23 | input_trt = trt_(ctx.network, input) 24 | offset_trt = trt_(ctx.network, offset) 25 | 26 | kernel_size = weight.shape[2] 27 | if not isinstance(kernel_size, tuple): 28 | kernel_size = (kernel_size, ) * 2 29 | 30 | if not isinstance(stride, tuple): 31 | stride = (stride, ) * 2 32 | 33 | if not isinstance(padding, tuple): 34 | padding = (padding, ) * 2 35 | 36 | if not isinstance(dilation, tuple): 37 | dilation = (dilation, ) * 2 38 | 39 | deform_groups=int(offset.shape[1]//(2*kernel_size[0]*kernel_size[1])) 40 | 41 | kernel = weight.detach().cpu().numpy() 42 | out_channels = output.shape[1] 43 | 44 | bias = bias.detach().cpu().numpy() 45 | 46 | plugin = create_dcn_plugin("dcn_" + str(id(input)), 47 | out_channels=out_channels, 48 | kernel_size=kernel_size, 49 | W=kernel, 50 | B=bias, 51 | padding=padding, 52 | stride=stride, 53 | dilation=dilation, 54 | deformable_group=deform_groups, 55 | group=groups 56 | ) 57 | 58 | custom_layer = ctx.network.add_plugin_v2( 59 | inputs=[input_trt, offset_trt], plugin=plugin) 60 | 61 | output._trt = custom_layer.get_output(0) 62 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/dummy_converters.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | 4 | def is_private(method): 5 | method = method.split('.')[-1] # remove prefix 6 | return method[0] == '_' and method[1] is not '_' 7 | 8 | def is_function_type(method): 9 | fntype = eval(method + '.__class__.__name__') 10 | return fntype == 'function' or fntype == 'builtin_function_or_method' or fntype == 'method_descriptor' 11 | 12 | def get_methods(namespace): 13 | methods = [] 14 | for method in dir(eval(namespace)): 15 | full_method = namespace + '.' + method 16 | if not is_private(full_method) and is_function_type(full_method): 17 | methods.append(full_method) 18 | return methods 19 | 20 | 21 | TORCH_METHODS = [] 22 | TORCH_METHODS += get_methods('torch') 23 | TORCH_METHODS += get_methods('torch.Tensor') 24 | TORCH_METHODS += get_methods('torch.nn.functional') 25 | 26 | 27 | for method in TORCH_METHODS: 28 | 29 | @tensorrt_converter(method, is_real=False) 30 | def warn_method(ctx): 31 | print('Warning: Encountered known unsupported method %s' % ctx.method_str) 32 | 33 | 34 | @tensorrt_converter('torch.Tensor.dim', is_real=False) 35 | @tensorrt_converter('torch.Tensor.size', is_real=False) 36 | def dont_warn(ctx): 37 | pass -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/expand.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | from ..torch2trt_dynamic import * 3 | from ..module_test import add_module_test 4 | from .repeat import * 5 | from .exview import convert_exview 6 | 7 | 8 | @tensorrt_converter('torch.Tensor.expand') 9 | def convert_expand(ctx): 10 | 11 | old_args = ctx.method_args 12 | input = ctx.method_args[0] 13 | if isinstance(ctx.method_args[1:], int): 14 | sizes = ctx.method_args[1:] 15 | else: 16 | sizes = ctx.method_args[1] 17 | 18 | output = ctx.method_return 19 | 20 | repeat_shape = [] 21 | for i in range(output.dim()): 22 | if i < output.dim()-input.dim(): 23 | repeat_shape.append(output.shape[i]) 24 | else: 25 | repeat_shape.append(output.shape[i]//input.shape[i+input.dim()-output.dim()]) 26 | 27 | ctx.method_args = [input]+repeat_shape 28 | ctx.method_return = output 29 | convert_repeat(ctx) 30 | ctx.method_args=old_args 31 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/expand_as.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | from ..plugins import * 4 | 5 | @tensorrt_converter('torch.Tensor.expand_as') 6 | def convert_expand_as(ctx): 7 | input = ctx.method_args[0] 8 | other = get_arg(ctx, 'other', pos=1, default=None) 9 | 10 | input_trt = trt_(ctx.network, input) 11 | other_trt = trt_(ctx.network, other) 12 | output = ctx.method_return 13 | 14 | plugin = create_repeat_plugin("repeat_" + str(id(input)), 15 | repeat_shape=[] 16 | ) 17 | 18 | custom_layer = ctx.network.add_plugin_v2( 19 | inputs=[input_trt, other_trt], plugin=plugin) 20 | 21 | output._trt = custom_layer.get_output(0) 22 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/flip.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..plugins import * 3 | 4 | @tensorrt_converter('torch.flip') 5 | @tensorrt_converter('torch.Tensor.flip') 6 | def convert_flip(ctx): 7 | input = ctx.method_args[0] 8 | dims = get_arg(ctx, 'dims', pos=1, default=0) 9 | if isinstance(dims, int): 10 | dims = [dims] 11 | 12 | dims = [len(input.shape)+dim if dim<0 else dim for dim in dims] 13 | 14 | input_trt = trt_(ctx.network, input) 15 | output = ctx.method_return 16 | 17 | plugin = create_torchflip_plugin("flip_" + str(id(input)), 18 | dims=dims 19 | ) 20 | 21 | custom_layer = ctx.network.add_plugin_v2( 22 | inputs=[input_trt], plugin=plugin) 23 | 24 | output._trt = custom_layer.get_output(0) 25 | 26 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/floor_divide.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.floor_divide') 6 | @tensorrt_converter('torch.Tensor.floor_divide') 7 | @tensorrt_converter('torch.Tensor.floor_divide_') 8 | @tensorrt_converter('torch.Tensor.__floordiv__') 9 | @tensorrt_converter('torch.Tensor.__ifloordiv__') 10 | def convert_floor_div(ctx): 11 | input_a = ctx.method_args[0] 12 | input_b = ctx.method_args[1] 13 | input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) 14 | output = ctx.method_return 15 | layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV) 16 | output._trt = layer.get_output(0) 17 | 18 | 19 | @tensorrt_converter('torch.Tensor.__rfloordiv__') 20 | def convert_rfloor_div(ctx): 21 | input_a = ctx.method_args[1] # inputs switched for rdiv 22 | input_b = ctx.method_args[0] 23 | input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) 24 | output = ctx.method_return 25 | layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.FLOOR_DIV) 26 | output._trt = layer.get_output(0) 27 | 28 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/full_like.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from .mul import convert_mul 3 | from .add import convert_add 4 | from .cast_type import * 5 | 6 | @tensorrt_converter('torch.full_like') 7 | def convert_full_like(ctx): 8 | input = ctx.method_args[0] 9 | fill_value = get_arg(ctx, "fill_value", pos=1, default=0) 10 | dtype = get_arg(ctx, 'dtype', pos=3, default=torch.float32) 11 | output = ctx.method_return 12 | input_trt = trt_(ctx.network, input) 13 | 14 | old_method_args = ctx.method_args 15 | old_method_kwargs = ctx.method_kwargs 16 | 17 | # mul zero 18 | input_mul_zero = input*0 19 | ctx.method_args = [input, 0] 20 | ctx.method_kwargs = {} 21 | ctx.method_return = input_mul_zero 22 | convert_mul(ctx) 23 | 24 | # add fill_value 25 | input_add_one = input_mul_zero+fill_value 26 | ctx.method_args = [input_mul_zero, fill_value] 27 | ctx.method_kwargs = {} 28 | ctx.method_return = input_add_one 29 | convert_add(ctx) 30 | 31 | convert_type_func = None 32 | if dtype==torch.float32: 33 | convert_type_func = convert_float 34 | elif dtype==torch.int32 or dtype==torch.long: 35 | convert_type_func = convert_int 36 | elif dtype==torch.bool: 37 | convert_type_func = convert_bool 38 | else: 39 | print("unsupported convert type:{}".format(dtype)) 40 | 41 | if convert_type_func is not None: 42 | input_as_type = input_add_one.to(dtype) 43 | ctx.method_args = [input_add_one, dtype] 44 | ctx.method_return = input_as_type 45 | convert_type_func(ctx) 46 | ctx.method_args = [input_as_type, 0] 47 | ctx.method_kwargs = {} 48 | ctx.method_return = output 49 | convert_add(ctx) 50 | 51 | ctx.method_args = old_method_args 52 | ctx.method_kwargs = old_method_kwargs 53 | ctx.method_return = output -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/gather.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..plugins import * 3 | 4 | 5 | @tensorrt_converter('torch.Tensor.gather') 6 | @tensorrt_converter('torch.gather') 7 | def convert_gather(ctx): 8 | inputs = ctx.method_args[0] 9 | dim = get_arg(ctx, 'dim', pos=1, default=0) 10 | index = get_arg(ctx, 'index', pos=2, default=None) 11 | output = ctx.method_return 12 | 13 | inputs_trt = trt_(ctx.network, inputs) 14 | index_trt = trt_(ctx.network, index) 15 | 16 | plugin = create_torchgather_plugin("torch_gather_"+str(id(inputs)), 17 | dim=dim) 18 | 19 | layer = ctx.network.add_plugin_v2( 20 | inputs=[inputs_trt, index_trt], plugin=plugin) 21 | 22 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/grid_sample.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..plugins import * 3 | 4 | 5 | @tensorrt_converter('torch.nn.functional.grid_sample') 6 | def convert_grid_sample(ctx): 7 | input = ctx.method_args[0] 8 | grid = get_arg(ctx, 'grid', pos=1, default=None) 9 | mode = get_arg(ctx, 'mode', pos=2, default='bilinear') 10 | padding_mode = get_arg(ctx, 'padding_mode', pos=3, default='zeros') 11 | align_corners = get_arg(ctx, 'align_corners', pos=4, default=False) 12 | 13 | output = ctx.method_return 14 | 15 | input_trt = trt_(ctx.network, input) 16 | grid_trt = trt_(ctx.network, grid) 17 | 18 | if mode == 'bilinear': 19 | mode = trt.ResizeMode.LINEAR 20 | elif mode == 'nearest': 21 | mode = trt.ResizeMode.NEAREST 22 | 23 | if padding_mode == 'zeros': 24 | padding_mode = 0 25 | elif padding_mode == 'border': 26 | padding_mode = 1 27 | elif padding_mode == 'reflection': 28 | padding_mode = 2 29 | 30 | plugin = create_gridsample_plugin("torch_gridsample_"+str(id(input)), 31 | mode=mode, 32 | padding_mode=padding_mode, 33 | align_corners=align_corners) 34 | 35 | layer = ctx.network.add_plugin_v2( 36 | inputs=[input_trt, grid_trt], plugin=plugin) 37 | 38 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/identity.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | 4 | @tensorrt_converter('torch.Tensor.cuda') 5 | @tensorrt_converter('torch.Tensor.detach') 6 | @tensorrt_converter('torch.Tensor.contiguous') 7 | @tensorrt_converter('torch.nn.functional.dropout') 8 | @tensorrt_converter('torch.nn.functional.dropout2d') 9 | @tensorrt_converter('torch.nn.functional.dropout3d') 10 | def convert_identity(ctx): 11 | input = ctx.method_args[0] 12 | input_trt = trt_(ctx.network, input) 13 | output = ctx.method_return 14 | output._trt = input_trt 15 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/index_select.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | import tensorrt as trt 3 | 4 | @tensorrt_converter('torch.index_select') 5 | @tensorrt_converter('torch.Tensor.index_select') 6 | def convert_index_select(ctx): 7 | input = ctx.method_args[0] 8 | dim = get_arg(ctx, 'dim', pos=1, default=None) 9 | index = get_arg(ctx, 'index', pos=2, default=None) 10 | 11 | input_trt = trt_(ctx.network, input) 12 | index_trt = trt_(ctx.network, index) 13 | output = ctx.method_return 14 | 15 | layer = ctx.network.add_gather(input_trt, index_trt, dim) 16 | output._trt = layer.get_output(0) 17 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/interpolate/__init__.py: -------------------------------------------------------------------------------- 1 | from .interpolate import * 2 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/interpolate/interpolate.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | 4 | package torch2trt; 5 | 6 | enum DataTypeMessage { 7 | kFloat = 0; 8 | kHalf = 1; 9 | kInt8 = 2; 10 | kInt32 = 3; 11 | } 12 | 13 | 14 | message interpolate_Message { 15 | repeated int64 size = 1; 16 | string mode = 2; 17 | bool align_corners = 3; 18 | 19 | // below params are configured by TRT and not set by user 20 | DataTypeMessage dtype = 4; 21 | repeated int64 input_size = 5; 22 | repeated int64 output_size = 6; 23 | } 24 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/linear.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..module_test import add_module_test 3 | import torch 4 | from .t import convert_t 5 | from .matmul import convert_matmul 6 | from .sum import convert_sum 7 | 8 | 9 | @tensorrt_converter('torch.nn.functional.linear') 10 | def convert_linear(ctx): 11 | old_method_args = ctx.method_args 12 | old_method_kwargs = ctx.method_kwargs 13 | 14 | input = ctx.method_args[0] 15 | weight = get_arg(ctx, 'weight', pos=1, default=None) 16 | bias = get_arg(ctx, 'bias', pos=2, default=None) 17 | output = ctx.method_return 18 | 19 | # transpose weight 20 | weight_transpose = weight.t() 21 | ctx.method_args = [weight] 22 | ctx.method_kwargs = {} 23 | ctx.method_return = weight_transpose 24 | convert_t(ctx) 25 | 26 | # matmul 27 | matmul_output = input.matmul(weight_transpose) 28 | ctx.method_args = [input, weight] 29 | ctx.method_kwargs = {} 30 | ctx.method_return = matmul_output 31 | convert_matmul(ctx) 32 | 33 | # add bias 34 | if bias is not None: 35 | add_bias_output = matmul_output + bias 36 | ctx.method_args = [matmul_output, bias] 37 | ctx.method_return = add_bias_output 38 | convert_sum(ctx) 39 | output._trt = add_bias_output._trt 40 | else: 41 | output._trt = matmul_output._trt 42 | 43 | 44 | ctx.method_args = old_method_args 45 | ctx.method_kwargs = old_method_kwargs 46 | ctx.method_return = output 47 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/linspace.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | 3 | @tensorrt_converter('torch.linspace') 4 | def convert_linspace(ctx): 5 | start = get_arg(ctx, 'start', pos=0, default=0) 6 | end = get_arg(ctx, 'end', pos=1, default=1) 7 | steps = get_arg(ctx, 'steps', pos=2, default=2) 8 | dtype = get_arg(ctx, 'dtype', pos=4, default=None) 9 | 10 | output = ctx.method_return 11 | dtype = output.dtype 12 | if dtype==torch.int64: 13 | dtype = torch.int32 14 | 15 | # check const 16 | is_const = True 17 | is_const = False if hasattr(start, '_trt') or hasattr(end, '_trt') or hasattr(steps, '_trt') else is_const 18 | 19 | if is_const: 20 | # create const value 21 | output_trt = trt_(ctx.network, output) 22 | 23 | else: 24 | ## create fill 25 | 26 | # compute shape 27 | start_trt = trt_(ctx.network, start) 28 | end_trt = trt_(ctx.network, end) 29 | steps_trt = trt_(ctx.network, steps) 30 | 31 | length_trt = steps_trt 32 | 33 | # to float 34 | one_trt = trt_(ctx.network, torch.tensor([1], dtype=torch.float32)) 35 | start_trt = trt_cast(ctx.network, start_trt, trt.DataType.FLOAT) 36 | end_trt = trt_cast(ctx.network, end_trt, trt.DataType.FLOAT) 37 | steps_trt = trt_cast(ctx.network, steps_trt, trt.DataType.FLOAT) 38 | 39 | # length = (end - start + step - 1) // step 40 | step_trt = ctx.network.add_elementwise(end_trt, start_trt, trt.ElementWiseOperation.SUB).get_output(0) 41 | step_div_trt = ctx.network.add_elementwise(steps_trt, one_trt, trt.ElementWiseOperation.SUB).get_output(0) 42 | step_trt = ctx.network.add_elementwise(step_trt, step_div_trt, trt.ElementWiseOperation.DIV).get_output(0) 43 | 44 | # start rank 0 45 | layer = ctx.network.add_shuffle(start_trt) 46 | layer.reshape_dims = tuple() 47 | start_trt = layer.get_output(0) 48 | 49 | layer = ctx.network.add_fill(output.shape, trt.FillOperation.LINSPACE) 50 | layer.set_input(0, length_trt) 51 | layer.set_input(1, start_trt) 52 | layer.set_input(2, step_trt) 53 | output_trt = layer.get_output(0) 54 | 55 | # cast data type 56 | data_type = torch_dtype_to_trt(dtype) 57 | 58 | if data_type is not None: 59 | layer = ctx.network.add_identity(output_trt) 60 | layer.set_output_type(0, data_type) 61 | output_trt = layer.get_output(0) 62 | 63 | output._trt = output_trt 64 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/masked_fill.py: -------------------------------------------------------------------------------- 1 | from ..torch2trt_dynamic import * 2 | from ..module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.masked_fill', is_real=False) 6 | @tensorrt_converter('torch.Tensor.masked_fill', is_real=False) 7 | @tensorrt_converter('torch.Tensor.masked_fill_', is_real=False) 8 | def convert_masked_fill(ctx): 9 | input = ctx.method_args[0] 10 | mask = get_arg(ctx, 'mask', pos=1, default=None) 11 | value = get_arg(ctx, 'value', pos=2, default=0) 12 | output = ctx.method_return 13 | 14 | float_mask = mask.type_as(input) 15 | result = input*(1-float_mask)+value*float_mask 16 | 17 | output._trt = result._trt 18 | ctx.method_return = output -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/max_pool2d.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.functional.max_pool2d') 6 | def convert_max_pool2d(ctx): 7 | # parse args 8 | input = get_arg(ctx, 'input', pos=0, default=None) 9 | kernel_size = get_arg(ctx, 'kernel_size', pos=1, default=None) 10 | stride = get_arg(ctx, 'stride', pos=2, default=None) 11 | padding = get_arg(ctx, 'padding', pos=3, default=0) 12 | dilation = get_arg(ctx, 'dilation', pos=4, default=1) 13 | ceil_mode = get_arg(ctx, 'ceil_mode', pos=5, default=False) 14 | 15 | # get input trt tensor (or create constant if it doesn't exist) 16 | input_trt = trt_(ctx.network, input) 17 | 18 | output = ctx.method_return 19 | 20 | # get kernel size 21 | if not isinstance(kernel_size, tuple): 22 | kernel_size = (kernel_size, ) * 2 23 | 24 | # get stride 25 | if not isinstance(stride, tuple): 26 | stride = (stride, ) * 2 27 | 28 | # get padding 29 | if not isinstance(padding, tuple): 30 | padding = (padding, ) * 2 31 | 32 | layer = ctx.network.add_pooling( 33 | input=input_trt, type=trt.PoolingType.MAX, window_size=kernel_size) 34 | 35 | layer.stride = stride 36 | layer.padding = padding 37 | 38 | if ceil_mode: 39 | layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP 40 | 41 | output._trt = layer.get_output(0) 42 | 43 | 44 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) 45 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) 46 | def test_MaxPool2d_without_ceil_mode(): 47 | return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False) 48 | 49 | 50 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 4, 6)]) 51 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 5, 7)]) 52 | def test_MaxPool2d_with_ceil_mode(): 53 | return torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/mean.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.mean') 6 | @tensorrt_converter('torch.Tensor.mean') 7 | def convert_mean(ctx): 8 | input = ctx.method_args[0] 9 | input_trt = trt_(ctx.network, input) 10 | output = ctx.method_return 11 | dim = get_arg(ctx, 'dim', pos=1, default=None) 12 | keep_dims = get_arg(ctx, 'keepdim', pos=2, default=False) 13 | 14 | # get dims from args or kwargs 15 | if dim is None: 16 | dim = tuple(range(len(input.shape))) 17 | 18 | # convert list to tuple 19 | if isinstance(dim, list): 20 | dim = tuple(dim) 21 | 22 | if not isinstance(dim, tuple): 23 | dim = (dim, ) 24 | 25 | dim = tuple([d if d>=0 else len(input.shape)+d for d in dim]) 26 | 27 | # create axes bitmask for reduce layer 28 | axes = 0 29 | for d in dim: 30 | axes |= 1<= 0 21 | a = ctx.network.add_activation(input_trt, trt.ActivationType.RELU).get_output(0) 22 | 23 | # x <= 0 24 | b = ctx.network.add_unary(input_trt, trt.UnaryOperation.NEG).get_output(0) 25 | b = ctx.network.add_activation(b, trt.ActivationType.RELU).get_output(0) 26 | b = ctx.network.add_elementwise(b, weight_trt, trt.ElementWiseOperation.PROD).get_output(0) 27 | 28 | # y = a + b 29 | y = ctx.network.add_elementwise(a, b, trt.ElementWiseOperation.SUM) 30 | 31 | output._trt = y.get_output(0) 32 | 33 | 34 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) 35 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) 36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) 37 | def test_prelu_scalar(): 38 | return torch.nn.PReLU() 39 | 40 | 41 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5)]) 42 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3)]) 43 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 5, 3, 3)]) 44 | def test_prelu_vector(): 45 | m = torch.nn.PReLU(5) 46 | m.weight = torch.nn.Parameter(torch.randn(5)) # randn so each channel different 47 | return m -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/prod.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | from .unary import UnaryModule 4 | 5 | 6 | @tensorrt_converter('torch.prod') 7 | @tensorrt_converter('torch.Tensor.prod') 8 | def convert_prod(ctx): 9 | input = ctx.method_args[0] 10 | dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) 11 | keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) 12 | input_trt= trt_(ctx.network, input) 13 | output = ctx.method_return 14 | layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.PROD, torch_dim_to_trt_axes(dim), keepdim) 15 | output._trt = layer.get_output(0) 16 | 17 | 18 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) 19 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 20 | def test_prod_reduce_all(): 21 | return UnaryModule(lambda x: torch.prod(x)) 22 | 23 | 24 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) 25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 26 | def test_prod_reduce_dim1(): 27 | return UnaryModule(lambda x: torch.prod(x, 1)) 28 | 29 | 30 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 31 | def test_prod_reduce_dim22(): 32 | return UnaryModule(lambda x: torch.prod(x, 2)) 33 | 34 | 35 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) 36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 37 | def test_prod_reduce_dim1_keepdim(): 38 | return UnaryModule(lambda x: torch.prod(x, 1, keepdim=True)) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/relu.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from .ReLU import * 3 | 4 | 5 | @tensorrt_converter('torch.relu') 6 | @tensorrt_converter('torch.relu_') 7 | @tensorrt_converter('torch.nn.functional.relu') 8 | @tensorrt_converter('torch.nn.functional.relu_') 9 | def convert_relu(ctx): 10 | ctx.method_args = (torch.nn.ReLU(),) + ctx.method_args 11 | convert_ReLU(ctx) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/relu6.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from .ReLU6 import * 3 | 4 | 5 | @tensorrt_converter('torch.nn.functional.relu6') 6 | def convert_relu6(ctx): 7 | ctx.method_args = (torch.nn.ReLU6(),) + ctx.method_args 8 | convert_ReLU6(ctx) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/repeat.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | 3 | from torch2trt_dynamic.plugins import * 4 | 5 | @tensorrt_converter('torch.Tensor.repeat') 6 | def convert_repeat(ctx): 7 | input = ctx.method_args[0] 8 | shape = ctx.method_args[1] 9 | if isinstance(shape, int): 10 | shape = ctx.method_args[1:] 11 | 12 | input_trt = trt_(ctx.network, input) 13 | output = ctx.method_return 14 | 15 | plugin = create_repeat_plugin("repeat_" + str(id(input)), 16 | repeat_shape=shape 17 | ) 18 | 19 | custom_layer = ctx.network.add_plugin_v2( 20 | inputs=[input_trt], plugin=plugin) 21 | 22 | output._trt = custom_layer.get_output(0) 23 | 24 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.plugins import * 3 | import torchvision.ops 4 | 5 | 6 | @tensorrt_converter('torchvision.ops.roi_align') 7 | def convert_roi_align(ctx): 8 | 9 | input = get_arg(ctx, 'input', pos=0, default=None) 10 | boxes = get_arg(ctx, 'boxes', pos=1, default=None) 11 | output_size = get_arg(ctx, 'output_size', pos=2, default=7) 12 | spatial_scale = get_arg(ctx, 'spatial_scale', pos=3, default=1.) 13 | sampling_ratio = get_arg(ctx, 'sampling_ratio', pos=4, default=-1) 14 | aligned = get_arg(ctx, 'aligned', pos=5, default=False) 15 | 16 | output = ctx.method_return 17 | 18 | input_trt = trt_(ctx.network, input) 19 | boxes_offset_trt, boxes_trt = trt_(ctx.network, 0.5/spatial_scale, boxes) 20 | 21 | plugin = create_roiextractor_plugin("roi_align_" + str(id(boxes)), 22 | out_size = output_size, 23 | sample_num = sampling_ratio, 24 | featmap_strides = [1./spatial_scale], 25 | roi_scale_factor = 1., 26 | finest_scale = 56, 27 | aligned = 1 if aligned else 0 28 | ) 29 | 30 | custom_layer = ctx.network.add_plugin_v2( 31 | inputs=[boxes_trt, input_trt], plugin=plugin) 32 | 33 | output._trt = custom_layer.get_output(0) 34 | 35 | 36 | @tensorrt_converter('torchvision.ops.RoIAlign.forward') 37 | def convert_RoiAlign(ctx): 38 | module = ctx.method_args[0] 39 | input = get_arg(ctx, 'input', pos=1, default=None) 40 | boxes = get_arg(ctx, 'boxes', pos=2, default=None) 41 | 42 | output_size = module.output_size 43 | spatial_scale = module.spatial_scale 44 | sampling_ratio = module.sampling_ratio 45 | aligned = module.aligned 46 | 47 | old_method_args = ctx.method_args 48 | old_method_kwargs = ctx.method_kwargs 49 | new_method_args = [input, boxes, output_size, spatial_scale, sampling_ratio, aligned] 50 | new_method_kwargs = {} 51 | ctx.method_args = new_method_args 52 | ctx.method_kwargs = new_method_kwargs 53 | convert_roi_align(ctx) 54 | ctx.method_args = old_method_args 55 | ctx.method_kwargs = old_method_kwargs 56 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.plugins import * 3 | import torchvision.ops 4 | 5 | 6 | @tensorrt_converter('torchvision.ops.roi_pool') 7 | def convert_roi_pool(ctx): 8 | input = get_arg(ctx, 'input', pos=0, default=None) 9 | boxes = get_arg(ctx, 'boxes', pos=1, default=None) 10 | output_size = get_arg(ctx, 'output_size', pos=2, default=7) 11 | spatial_scale = get_arg(ctx, 'spatial_scale', pos=3, default=1.) 12 | 13 | output = ctx.method_return 14 | 15 | input_trt = trt_(ctx.network, input) 16 | boxes_trt = trt_(ctx.network, boxes) 17 | 18 | plugin = create_roipool_plugin("roi_pool_" + str(id(boxes)), 19 | out_size = output_size, 20 | featmap_strides = [1./spatial_scale], 21 | roi_scale_factor = -1, 22 | finest_scale = 56) 23 | 24 | custom_layer = ctx.network.add_plugin_v2( 25 | inputs=[boxes_trt, input_trt], plugin=plugin) 26 | 27 | output._trt = custom_layer.get_output(0) 28 | 29 | 30 | @tensorrt_converter('torchvision.ops.RoIPool.forward') 31 | def convert_RoIPool(ctx): 32 | module = ctx.method_args[0] 33 | input = get_arg(ctx, 'input', pos=1, default=None) 34 | boxes = get_arg(ctx, 'boxes', pos=2, default=None) 35 | 36 | output_size = module.output_size 37 | spatial_scale = module.spatial_scale 38 | 39 | old_method_args = ctx.method_args 40 | old_method_kwargs = ctx.method_kwargs 41 | new_method_args = [input, boxes, output_size, spatial_scale] 42 | new_method_kwargs = {} 43 | ctx.method_args = new_method_args 44 | ctx.method_kwargs = new_method_kwargs 45 | convert_roi_pool(ctx) 46 | ctx.method_args = old_method_args 47 | ctx.method_kwargs = old_method_kwargs 48 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/sigmoid.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.functional.sigmoid') 6 | @tensorrt_converter('torch.sigmoid') 7 | @tensorrt_converter('torch.Tensor.sigmoid') 8 | def convert_sigmoid(ctx): 9 | input = ctx.method_args[0] 10 | input_trt = trt_(ctx.network, input) 11 | output = ctx.method_return 12 | 13 | layer = ctx.network.add_activation(input_trt, trt.ActivationType.SIGMOID) 14 | output._trt = layer.get_output(0) 15 | 16 | 17 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 18 | def test_sigmoid_basic(): 19 | return torch.nn.Sigmoid() -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/softmax.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.Tensor.softmax') 6 | @tensorrt_converter('torch.softmax') 7 | @tensorrt_converter('torch.nn.functional.softmax') 8 | def convert_softmax(ctx): 9 | 10 | input = ctx.method_args[0] 11 | input_trt = trt_(ctx.network, input) 12 | output = ctx.method_return 13 | 14 | # get dims from args or kwargs 15 | dim = get_arg(ctx, 'dim', pos=1, default=None) 16 | if dim is None: 17 | dim = -1 18 | if dim<0: 19 | dim = len(input.shape)+dim 20 | 21 | # axes = 1 << (dim - 1) 22 | axes = 1<= 1) 17 | 18 | start = [0] * len(input.shape) # exclude batch 19 | stride = [1] * len(start) 20 | offset = 0 21 | trt_dim = dim 22 | 23 | # add slice layers 24 | for i, output in enumerate(outputs): 25 | shape = list(output.shape) 26 | start[trt_dim] = offset 27 | layer = ctx.network.add_slice(input_trt, start=start, shape=shape, stride=stride) 28 | output._trt = layer.get_output(0) 29 | offset = offset + shape[trt_dim] 30 | 31 | 32 | class TorchSplit(torch.nn.Module): 33 | 34 | def __init__(self, *args, **kwargs): 35 | super(TorchSplit, self).__init__() 36 | self.args = args 37 | self.kwargs = kwargs 38 | 39 | def forward(self, x): 40 | return torch.split(x, *self.args, **self.kwargs) 41 | 42 | 43 | class TensorSplit(torch.nn.Module): 44 | 45 | def __init__(self, *args, **kwargs): 46 | super(TensorSplit, self).__init__() 47 | self.args = args 48 | self.kwargs = kwargs 49 | 50 | def forward(self, x): 51 | return x.split(*self.args, **self.kwargs) 52 | 53 | 54 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 55 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 56 | def test_torch_split_1_1(): 57 | return TorchSplit(1, 1) 58 | 59 | 60 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 61 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 62 | def test_torch_split_2_1(): 63 | return TorchSplit(2, 1) 64 | 65 | 66 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 67 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 68 | def test_torch_split_3_1(): 69 | return TorchSplit(3, 1) 70 | 71 | 72 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 73 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 74 | def test_torch_split_3_2(): 75 | return TorchSplit(3, 2) 76 | 77 | 78 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 79 | def test_tensor_split_3_2(): 80 | return TensorSplit(3, 2) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/squeeze.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | from .identity import * 4 | 5 | 6 | @tensorrt_converter('torch.Tensor.squeeze') 7 | @tensorrt_converter('torch.squeeze') 8 | def convert_squeeze(ctx): 9 | 10 | input = ctx.method_args[0] 11 | dim = get_arg(ctx, 'dim', pos=1, default=None) 12 | if dim is None: 13 | dim = list(filter(lambda x:input.shape[x]==1, range(len(input.shape)))) 14 | else: 15 | if input.shape[dim]!=1: 16 | ctx.method_args = [input] 17 | convert_identity(ctx) 18 | return 19 | if dim <0: 20 | dim = len(input.shape)+dim 21 | dim = [dim] 22 | input_trt = trt_(ctx.network, input) 23 | shape_trt = ctx.network.add_shape(input_trt).get_output(0) 24 | output = ctx.method_return 25 | 26 | reverse_dim = list(filter(lambda x: x not in dim, range(len(input.shape)))) 27 | reverse_dim_trt = trt_(ctx.network, torch.tensor(reverse_dim,dtype=torch.int32).to(input.device)) 28 | 29 | new_shape_trt = ctx.network.add_gather(shape_trt, reverse_dim_trt, 0).get_output(0) 30 | 31 | layer = ctx.network.add_shuffle(input_trt) 32 | layer.set_input(1, new_shape_trt) 33 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/stack.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from .cat import * 3 | from .unsqueeze import * 4 | 5 | 6 | @tensorrt_converter('torch.stack') 7 | def convert_stack(ctx): 8 | inputs = ctx.method_args[0] 9 | dim = get_arg(ctx, 'dim', pos=1, default=0) 10 | output = ctx.method_return 11 | 12 | unsqueeze_inputs = [] 13 | for input in inputs: 14 | unsqueeze_input = input.unsqueeze(dim=dim) 15 | ctx.method_args = (input, dim) 16 | ctx.method_return = unsqueeze_input 17 | convert_unsqueeze(ctx) 18 | unsqueeze_inputs.append(unsqueeze_input) 19 | 20 | ctx.method_args = (unsqueeze_inputs, dim) 21 | ctx.method_return = output 22 | 23 | convert_cat(ctx) 24 | 25 | 26 | # @tensorrt_converter('torch.stack') 27 | # def convert_stack(ctx): 28 | # support_dynamic_shape = False 29 | # if hasattr(ctx, "support_dynamic_shape"): 30 | # support_dynamic_shape = ctx.support_dynamic_shape 31 | 32 | # inputs = ctx.method_args[0] 33 | 34 | # if 'dim' in ctx.method_kwargs: 35 | # dim = ctx.method_kwargs['dim'] 36 | # else: 37 | # dim = ctx.method_args[1] 38 | 39 | # output = ctx.method_return 40 | # trt_inputs = [trt_(ctx.network, i) for i in inputs] 41 | 42 | # if dim==-1: 43 | # dim = len(inputs[0].shape) 44 | # shape = inputs[0].shape[:dim] + (1,) + inputs[0].shape[dim:] 45 | # shape = tuple(shape) 46 | # reshaped_trt_inputs = [] 47 | # for trt_input in trt_inputs: 48 | # layer = ctx.network.add_shuffle(trt_input) 49 | # layer.reshape_dims = shape 50 | # reshaped_trt_inputs.append(layer.get_output(0)) 51 | 52 | # layer = ctx.network.add_concatenation(inputs=reshaped_trt_inputs) 53 | 54 | # if support_dynamic_shape: 55 | # layer.axis = dim 56 | # else: 57 | # layer.axis = dim - 1 58 | # output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/sub.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.sub') 6 | @tensorrt_converter('torch.Tensor.__isub__') 7 | @tensorrt_converter('torch.Tensor.__sub__') 8 | def convert_sub(ctx): 9 | input_a = ctx.method_args[0] 10 | input_b = ctx.method_args[1] 11 | input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) 12 | output = ctx.method_return 13 | layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) 14 | output._trt = layer.get_output(0) 15 | 16 | 17 | @tensorrt_converter('torch.Tensor.__rsub__') 18 | def convert_sub(ctx): 19 | input_a = ctx.method_args[1] 20 | input_b = ctx.method_args[0] # flipped for rsub 21 | input_a_trt, input_b_trt = trt_(ctx.network, input_a, input_b) 22 | output = ctx.method_return 23 | layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUB) 24 | output._trt = layer.get_output(0) 25 | 26 | 27 | class Sub(torch.nn.Module): 28 | def __init__(self): 29 | super(Sub, self).__init__() 30 | 31 | def forward(self, x, y): 32 | return x - y 33 | 34 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) 35 | def test_sub_basic(): 36 | return Sub() 37 | 38 | 39 | class ISub(torch.nn.Module): 40 | def __init__(self): 41 | super(ISub, self).__init__() 42 | 43 | def forward(self, x, y): 44 | x -= y 45 | return x 46 | 47 | 48 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) 49 | def test_sub_isub(): 50 | return ISub() 51 | 52 | 53 | class TorchSub(torch.nn.Module): 54 | def __init__(self): 55 | super(TorchSub, self).__init__() 56 | 57 | def forward(self, x, y): 58 | return torch.sub(x, y) 59 | 60 | 61 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224), (1, 3, 224, 224)]) 62 | def test_torch_sub(): 63 | return TorchSub() 64 | 65 | 66 | class RSubInt(torch.nn.Module): 67 | def __init__(self): 68 | super(RSubInt, self).__init__() 69 | 70 | def forward(self, x): 71 | return 1 - x 72 | 73 | 74 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 75 | def test_rsub_int(): 76 | return RSubInt() 77 | 78 | 79 | class RSubFloat(torch.nn.Module): 80 | def __init__(self): 81 | super(RSubFloat, self).__init__() 82 | 83 | def forward(self, x): 84 | return 1.0 - x 85 | 86 | 87 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 224, 224)]) 88 | def test_rsub_float(): 89 | return RSubFloat() -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/sum.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | from .unary import UnaryModule 4 | 5 | 6 | @tensorrt_converter('torch.sum') 7 | @tensorrt_converter('torch.Tensor.sum') 8 | def convert_sum(ctx): 9 | input = ctx.method_args[0] 10 | dim = get_arg(ctx, 'dim', pos=1, default=tuple(range(1, input.ndim))) 11 | keepdim = get_arg(ctx, 'keepdim', pos=2, default=False) 12 | input_trt= trt_(ctx.network, input) 13 | output = ctx.method_return 14 | layer = ctx.network.add_reduce(input_trt, trt.ReduceOperation.SUM, torch_dim_to_trt_axes(dim), keepdim) 15 | output._trt = layer.get_output(0) 16 | 17 | 18 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) 19 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 20 | def test_sum_reduce_all(): 21 | return UnaryModule(lambda x: torch.sum(x)) 22 | 23 | 24 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) 25 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 26 | def test_sum_reduce_dim1(): 27 | return UnaryModule(lambda x: torch.sum(x, 1)) 28 | 29 | 30 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 31 | def test_sum_reduce_dim22(): 32 | return UnaryModule(lambda x: torch.sum(x, 2)) 33 | 34 | 35 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3)]) 36 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 37 | def test_sum_reduce_dim1_keepdim(): 38 | return UnaryModule(lambda x: torch.sum(x, 1, keepdim=True)) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/t.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | from .transpose import convert_transpose 4 | 5 | 6 | @tensorrt_converter('torch.Tensor.t') 7 | def convert_t(ctx): 8 | input = ctx.method_args[0] 9 | input_trt = trt_(ctx.network, input) 10 | output = ctx.method_return 11 | # permutation -1 because TRT does not include batch dim 12 | 13 | if len(input.shape)==1: 14 | layer = ctx.network.add_identity(input_trt) 15 | output._trt = layer.get_output(0) 16 | else: 17 | ctx.method_args = [input, 1, 0] 18 | ctx.method_kwargs = {} 19 | convert_transpose(ctx) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/tanh.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.nn.functional.tanh') 6 | @tensorrt_converter('torch.tanh') 7 | def convert_tanh(ctx): 8 | input = ctx.method_args[0] 9 | input_trt = trt_(ctx.network, input) 10 | output = ctx.method_return 11 | 12 | layer = ctx.network.add_activation(input_trt, trt.ActivationType.TANH) 13 | output._trt = layer.get_output(0) 14 | 15 | 16 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 17 | def test_tanh_basic(): 18 | return torch.nn.Tanh() -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/to.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from .cast_type import * 3 | 4 | 5 | @tensorrt_converter('torch.Tensor.to') 6 | def convert_Tensor_to(ctx): 7 | input = ctx.method_args[0] 8 | output = ctx.method_return 9 | 10 | input_trt = trt_(ctx.network, input) 11 | if output.dtype == input.dtype: 12 | output._trt = input_trt 13 | else: 14 | data_type = output.dtype 15 | if data_type == torch.int64: 16 | data_type = torch.int32 17 | 18 | output_trt = trt_cast(ctx.network, input_trt, data_type) 19 | output._trt = output_trt -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/transpose.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.transpose') 6 | def convert_transpose(ctx): 7 | input = ctx.method_args[0] 8 | input_trt = trt_(ctx.network, input) 9 | output = ctx.method_return 10 | # permutation -1 because TRT does not include batch dim 11 | 12 | permutation = list(range(len(input.shape))) 13 | dim0 = ctx.method_args[1] 14 | dim1 = ctx.method_args[2] 15 | permutation[dim0] = dim1 16 | permutation[dim1] = dim0 17 | layer = ctx.network.add_shuffle(input_trt) 18 | layer.second_transpose = tuple(permutation) 19 | output._trt = layer.get_output(0) 20 | 21 | 22 | class Transpose(torch.nn.Module): 23 | def __init__(self, dim0, dim1): 24 | super(Transpose, self).__init__() 25 | self.dim0 = dim0 26 | self.dim1 = dim1 27 | def forward(self, x): 28 | return torch.transpose(x, self.dim0, self.dim1).contiguous() 29 | 30 | 31 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)]) 32 | @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)]) 33 | def test_transpose_12(): 34 | return Transpose(1, 2) 35 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/unsqueeze.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.Tensor.unsqueeze') 6 | @tensorrt_converter('torch.unsqueeze') 7 | def convert_unsqueeze(ctx): 8 | 9 | input = ctx.method_args[0] 10 | dim = get_arg(ctx, 'dim', pos=1, default=None) 11 | if dim<0: 12 | dim = len(input.shape)+dim+1 13 | input_trt = trt_(ctx.network, input) 14 | shape_trt = ctx.network.add_shape(input_trt).get_output(0) 15 | unsqueeze_trt = trt_(ctx.network, input.new_ones((1),dtype=torch.int32)) 16 | output = ctx.method_return 17 | 18 | shape1_trt = None 19 | shape2_trt = None 20 | if dim == 0: 21 | shape2_trt = shape_trt 22 | elif dim == len(input.shape): 23 | shape1_trt = shape_trt 24 | else: 25 | slice1_start = [0] 26 | slice1_size = [dim] 27 | slice1_stride = [1] 28 | shape1_trt = ctx.network.add_slice(shape_trt, slice1_start, slice1_size, slice1_stride).get_output(0) 29 | slice2_start = [dim] 30 | slice2_size = [len(input.shape)-dim] 31 | slice2_stride = [1] 32 | shape2_trt = ctx.network.add_slice(shape_trt, slice2_start, slice2_size, slice2_stride).get_output(0) 33 | 34 | if shape1_trt == None: 35 | new_shape_trt = ctx.network.add_concatenation([unsqueeze_trt, shape2_trt]).get_output(0) 36 | elif shape2_trt == None: 37 | new_shape_trt = ctx.network.add_concatenation([shape1_trt, unsqueeze_trt]).get_output(0) 38 | else: 39 | new_shape_trt = ctx.network.add_concatenation([shape1_trt, unsqueeze_trt, shape2_trt]).get_output(0) 40 | 41 | layer = ctx.network.add_shuffle(input_trt) 42 | layer.set_input(1, new_shape_trt) 43 | output._trt = layer.get_output(0) -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/view_as.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from torch2trt_dynamic.module_test import add_module_test 3 | 4 | 5 | @tensorrt_converter('torch.Tensor.view_as') 6 | def convert_view_as(ctx): 7 | 8 | input = ctx.method_args[0] 9 | other = get_arg(ctx, 'other', pos=1, default=None) 10 | input_trt = trt_(ctx.network, input) 11 | other_trt = trt_(ctx.network, other) 12 | output = ctx.method_return 13 | 14 | shape_trt = ctx.network.add_shape(other_trt).get_output(0) 15 | 16 | layer = ctx.network.add_shuffle(input_trt) 17 | layer.set_input(1, shape_trt) 18 | output._trt = layer.get_output(0) 19 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/zeros.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from collections.abc import Iterable 3 | 4 | @tensorrt_converter('torch.zeros') 5 | def convert_zeros(ctx): 6 | size = ctx.method_args[0] 7 | if not isinstance(size, Iterable): 8 | size = ctx.method_args 9 | dtype = torch.float32 10 | if "dtype" in ctx.method_kwargs: 11 | dtype = ctx.method_kwargs["dtype"] 12 | output = ctx.method_return 13 | 14 | if isinstance(size, int): 15 | size = (size, ) 16 | 17 | # check const 18 | is_const = True 19 | for s in size: 20 | if hasattr(s,'_trt'): 21 | is_const = False 22 | break 23 | 24 | if is_const: 25 | # create const value 26 | output_trt = trt_(ctx.network, output) 27 | 28 | else: 29 | # create fill 30 | trt_size = [] 31 | for s in size: 32 | if hasattr(s, '_trt'): 33 | trt_size.append(s._trt) 34 | else: 35 | trt_size.append(trt_(ctx.network, s)) 36 | 37 | trt_size = ctx.network.add_concatenation(trt_size).get_output(0) 38 | 39 | layer = ctx.network.add_fill(size, trt.FillOperation.RANDOM_UNIFORM) 40 | layer.set_input(0, trt_size) 41 | layer.set_input(1, trt_(ctx.network, torch.tensor(0., dtype=dtype).cuda())) 42 | layer.set_input(2, trt_(ctx.network, torch.tensor(0., dtype=dtype).cuda())) 43 | 44 | output_trt = layer.get_output(0) 45 | 46 | 47 | data_type = None 48 | if dtype==torch.float32: 49 | data_type = trt.DataType.FLOAT 50 | elif dtype==torch.int32 or dtype==torch.long: 51 | data_type = trt.DataType.INT32 52 | elif dtype==torch.bool: 53 | data_type = trt.DataType.BOOL 54 | else: 55 | print("unsupported convert type:{}".format(dtype)) 56 | 57 | if data_type is not None: 58 | layer = ctx.network.add_identity(output_trt) 59 | layer.set_output_type(0, data_type) 60 | output_trt = layer.get_output(0) 61 | 62 | output._trt = output_trt 63 | -------------------------------------------------------------------------------- /torch2trt_dynamic/converters/zeros_like.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic.torch2trt_dynamic import * 2 | from .mul import convert_mul 3 | from .add import convert_add 4 | from .cast_type import * 5 | 6 | 7 | @tensorrt_converter('torch.zeros_like') 8 | def convert_zeros_like(ctx): 9 | input = ctx.method_args[0] 10 | dtype = get_arg(ctx, 'dtype', pos=1, default=torch.float32) 11 | output = ctx.method_return 12 | input_trt = trt_(ctx.network, input) 13 | 14 | old_method_args = ctx.method_args 15 | old_method_kwargs = ctx.method_kwargs 16 | 17 | # mul zero 18 | input_mul_zero = input*0 19 | ctx.method_args = [input, 0] 20 | ctx.method_kwargs = {} 21 | ctx.method_return = input_mul_zero 22 | convert_mul(ctx) 23 | 24 | convert_type_func = None 25 | if dtype==torch.float32: 26 | convert_type_func = convert_float 27 | elif dtype==torch.int32 or dtype==torch.long: 28 | convert_type_func = convert_int 29 | elif dtype==torch.bool: 30 | convert_type_func = convert_bool 31 | else: 32 | print("unsupported convert type:{}".format(dtype)) 33 | 34 | if convert_type_func is not None: 35 | input_as_type = input_mul_zero.to(dtype) 36 | ctx.method_args = [input_mul_zero, dtype] 37 | ctx.method_return = input_as_type 38 | convert_type_func(ctx) 39 | ctx.method_args = [input_as_type, 0] 40 | ctx.method_kwargs = {} 41 | ctx.method_return = output 42 | convert_add(ctx) 43 | 44 | ctx.method_args = old_method_args 45 | ctx.method_kwargs = old_method_kwargs 46 | ctx.method_return = output -------------------------------------------------------------------------------- /torch2trt_dynamic/module_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | 5 | class ModuleTest(object): 6 | def __init__(self, module_fn, dtype, device, input_shapes, **torch2trt_kwargs): 7 | self.module_fn = module_fn 8 | self.dtype = dtype 9 | self.device = device 10 | self.input_shapes = input_shapes 11 | self.torch2trt_kwargs = torch2trt_kwargs 12 | 13 | def module_name(self): 14 | return self.module_fn.__module__ + '.' + self.module_fn.__name__ 15 | 16 | 17 | MODULE_TESTS = [ 18 | ] 19 | 20 | 21 | def add_module_test(dtype, device, input_shapes, **torch2trt_kwargs): 22 | def register_module_test(module): 23 | global MODULE_TESTS 24 | MODULE_TESTS += [ModuleTest(module, dtype, device, input_shapes, **torch2trt_kwargs)] 25 | return module 26 | return register_module_test -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .create_groupnorm_plugin import create_groupnorm_plugin 2 | from .create_repeatdim_plugin import create_repeat_plugin 3 | from .create_exview_plugin import create_exview_plugin 4 | from .create_layernorm_plugin import create_layernorm_plugin 5 | from .create_torchgather_plugin import create_torchgather_plugin 6 | from .create_adaptivepool_plugin import create_adaptivepool_plugin 7 | from .create_meshgrid_plugin import create_meshgrid_plugin 8 | from .create_gridsample_plugin import create_gridsample_plugin 9 | from .create_torchflip_plugin import create_torchflip_plugin 10 | from .create_torchcummaxmin_plugin import create_torchcummaxmin_plugin 11 | from .create_torchcum_plugin import create_torchcum_plugin 12 | from .create_dcn_plugin import create_dcn_plugin 13 | from .create_nms_plugin import create_nms_plugin 14 | from .create_roiextractor_plugin import create_roiextractor_plugin 15 | from .create_roipool_plugin import create_roipool_plugin -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_adaptivepool_plugin.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | import numpy as np 3 | 4 | import os 5 | import os.path as osp 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | 11 | 12 | def create_adaptivepool_plugin(layer_name, 13 | output_size, 14 | pooling_type): 15 | 16 | creator = trt.get_plugin_registry().get_plugin_creator( 17 | 'AdaptivePoolPluginDynamic', '1', '') 18 | 19 | pfc = trt.PluginFieldCollection() 20 | 21 | pf_output_size = trt.PluginField("output_size", np.array( 22 | output_size, dtype=np.int32), trt.PluginFieldType.INT32) 23 | pfc.append(pf_output_size) 24 | 25 | pf_pooling_type = trt.PluginField("pooling_type", np.array( 26 | [int(pooling_type)], dtype=np.int32), trt.PluginFieldType.INT32) 27 | pfc.append(pf_pooling_type) 28 | 29 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_exview_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | # dir_path = os.path.dirname(os.path.realpath(__file__)) 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | import tensorrt as trt 11 | 12 | 13 | def create_exview_plugin(layer_name, 14 | expr_list): 15 | 16 | creator = trt.get_plugin_registry().get_plugin_creator( 17 | 'ExViewPluginDynamic', '1', '') 18 | 19 | pfc = trt.PluginFieldCollection() 20 | 21 | expr_str = ';'.join(expr_list) 22 | pf_dim_expression = trt.PluginField("dim_expression", np.array( 23 | [ord(i) for i in list(expr_str)], np.uint8), trt.PluginFieldType.CHAR) 24 | pfc.append(pf_dim_expression) 25 | 26 | return creator.create_plugin(layer_name, pfc) 27 | -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_gridsample_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | # dir_path = os.path.dirname(os.path.realpath(__file__)) 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | import tensorrt as trt 11 | 12 | 13 | def create_gridsample_plugin(layer_name, 14 | mode, 15 | padding_mode, 16 | align_corners): 17 | 18 | creator = trt.get_plugin_registry().get_plugin_creator( 19 | 'GridSamplePluginDynamic', '1', '') 20 | 21 | pfc = trt.PluginFieldCollection() 22 | 23 | pf_mode = trt.PluginField("mode", np.array( 24 | [mode], dtype=np.int32), trt.PluginFieldType.INT32) 25 | pfc.append(pf_mode) 26 | 27 | pf_padding_mode = trt.PluginField("padding_mode", np.array( 28 | [padding_mode], dtype=np.int32), trt.PluginFieldType.INT32) 29 | pfc.append(pf_padding_mode) 30 | 31 | pf_align_corners = trt.PluginField("align_corners", np.array( 32 | [align_corners], dtype=np.int32), trt.PluginFieldType.INT32) 33 | pfc.append(pf_align_corners) 34 | 35 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_groupnorm_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # import pyamirstan_plugin as pyamir 4 | 5 | import os 6 | import os.path as osp 7 | # dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | from .globals import dir_path 9 | import ctypes 10 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 11 | 12 | import tensorrt as trt 13 | 14 | 15 | def create_groupnorm_plugin(layer_name, 16 | num_groups, 17 | num_channels, 18 | W, 19 | B, 20 | eps=1e-5, 21 | type_id=trt.DataType.FLOAT): 22 | 23 | creator = trt.get_plugin_registry().get_plugin_creator( 24 | 'GroupNormPluginDynamic', '1', '') 25 | 26 | pfc = trt.PluginFieldCollection() 27 | 28 | pf_num_groups = trt.PluginField("num_groups", np.array( 29 | [num_groups], dtype=np.int32), trt.PluginFieldType.INT32) 30 | pfc.append(pf_num_groups) 31 | 32 | pf_num_channels = trt.PluginField("num_channels", np.array( 33 | [num_channels], dtype=np.int32), trt.PluginFieldType.INT32) 34 | pfc.append(pf_num_channels) 35 | 36 | pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32) 37 | pfc.append(pf_eps) 38 | 39 | pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) 40 | pfc.append(pf_W) 41 | 42 | pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) 43 | pfc.append(pf_B) 44 | 45 | pf_type_id = trt.PluginField("type_id", np.array( 46 | [type_id], dtype=np.int32), trt.PluginFieldType.INT32) 47 | pfc.append(pf_type_id) 48 | 49 | return creator.create_plugin(layer_name, pfc) 50 | -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_layernorm_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # import pyamirstan_plugin as pyamir 4 | 5 | import os 6 | import os.path as osp 7 | # dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | from .globals import dir_path 9 | import ctypes 10 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 11 | 12 | import tensorrt as trt 13 | 14 | 15 | def create_layernorm_plugin(layer_name, 16 | normalized_shape, 17 | W, 18 | B, 19 | eps=1e-5, 20 | type_id=trt.DataType.FLOAT): 21 | 22 | creator = trt.get_plugin_registry().get_plugin_creator( 23 | 'LayerNormPluginDynamic', '1', '') 24 | 25 | pfc = trt.PluginFieldCollection() 26 | 27 | pf_normalized_shape = trt.PluginField("normalized_shape", np.array( 28 | normalized_shape, dtype=np.int32), trt.PluginFieldType.INT32) 29 | pfc.append(pf_normalized_shape) 30 | 31 | pf_eps = trt.PluginField("eps", np.array([eps], dtype=np.float32), trt.PluginFieldType.FLOAT32) 32 | pfc.append(pf_eps) 33 | 34 | pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) 35 | pfc.append(pf_W) 36 | 37 | pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) 38 | pfc.append(pf_B) 39 | 40 | pf_type_id = trt.PluginField("type_id", np.array( 41 | [type_id], dtype=np.int32), trt.PluginFieldType.INT32) 42 | pfc.append(pf_type_id) 43 | 44 | return creator.create_plugin(layer_name, pfc) 45 | -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_meshgrid_plugin.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | import numpy as np 3 | 4 | import os 5 | import os.path as osp 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | 11 | 12 | def create_meshgrid_plugin(layer_name, 13 | num_inputs, 14 | slice_dims = [2, 3], 15 | starts = [0., 0.], 16 | strides = [1., 1.]): 17 | 18 | creator = trt.get_plugin_registry().get_plugin_creator( 19 | 'MeshGridPluginDynamic', '1', '') 20 | 21 | pfc = trt.PluginFieldCollection() 22 | 23 | pf_num_inputs = trt.PluginField("num_inputs", np.array( 24 | [int(num_inputs)], dtype=np.int32), trt.PluginFieldType.INT32) 25 | pfc.append(pf_num_inputs) 26 | 27 | pf_slice_dims = trt.PluginField("slice_dims", np.array( 28 | slice_dims, dtype=np.int32), trt.PluginFieldType.INT32) 29 | pfc.append(pf_slice_dims) 30 | 31 | pf_starts = trt.PluginField("starts", np.array( 32 | starts, dtype=np.float32), trt.PluginFieldType.FLOAT32) 33 | pfc.append(pf_starts) 34 | 35 | pf_strides = trt.PluginField("strides", np.array( 36 | strides, dtype=np.float32), trt.PluginFieldType.FLOAT32) 37 | pfc.append(pf_strides) 38 | 39 | 40 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_nms_plugin.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | import numpy as np 3 | 4 | import os 5 | import os.path as osp 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | 11 | 12 | def create_nms_plugin(layer_name, 13 | iou_threshold): 14 | 15 | creator = trt.get_plugin_registry().get_plugin_creator( 16 | 'TorchNMSPluginDynamic', '1', '') 17 | 18 | pfc = trt.PluginFieldCollection() 19 | 20 | pf_iou_threshold = trt.PluginField("iou_threshold", np.array( 21 | [iou_threshold], dtype=np.float32), trt.PluginFieldType.FLOAT32) 22 | pfc.append(pf_iou_threshold) 23 | 24 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_repeatdim_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | # dir_path = os.path.dirname(os.path.realpath(__file__)) 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | import tensorrt as trt 11 | 12 | 13 | def create_repeat_plugin(layer_name, 14 | repeat_shape, 15 | type_id=trt.DataType.FLOAT): 16 | 17 | creator = trt.get_plugin_registry().get_plugin_creator( 18 | 'RepeatDimsPluginDynamic', '1', '') 19 | 20 | pfc = trt.PluginFieldCollection() 21 | 22 | pf_repeat_shape = trt.PluginField("repeat_dims", np.array( 23 | repeat_shape, dtype=np.int32), trt.PluginFieldType.INT32) 24 | pfc.append(pf_repeat_shape) 25 | 26 | pf_type_id = trt.PluginField("type_id", np.array( 27 | [type_id], dtype=np.int32), trt.PluginFieldType.INT32) 28 | pfc.append(pf_type_id) 29 | 30 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_roiextractor_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | from .globals import dir_path 6 | import ctypes 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 8 | 9 | import tensorrt as trt 10 | import torchvision.ops 11 | 12 | 13 | def create_roiextractor_plugin(layer_name, 14 | out_size, 15 | sample_num, 16 | featmap_strides, 17 | roi_scale_factor, 18 | finest_scale, 19 | aligned): 20 | 21 | creator = trt.get_plugin_registry().get_plugin_creator( 22 | 'RoiExtractorPluginDynamic', '1', '') 23 | 24 | pfc = trt.PluginFieldCollection() 25 | 26 | pf_out_size = trt.PluginField("out_size", np.array( 27 | [out_size], dtype=np.int32), trt.PluginFieldType.INT32) 28 | pfc.append(pf_out_size) 29 | 30 | pf_sample_num = trt.PluginField("sample_num", np.array( 31 | [sample_num], dtype=np.int32), trt.PluginFieldType.INT32) 32 | pfc.append(pf_sample_num) 33 | 34 | pf_featmap_strides = trt.PluginField("featmap_strides", np.array( 35 | featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32) 36 | pfc.append(pf_featmap_strides) 37 | 38 | pf_roi_scale_factor = trt.PluginField("roi_scale_factor", np.array( 39 | [roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) 40 | pfc.append(pf_roi_scale_factor) 41 | 42 | pf_finest_scale = trt.PluginField("finest_scale", np.array( 43 | [finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) 44 | pfc.append(pf_finest_scale) 45 | 46 | pf_aligned = trt.PluginField("aligned", np.array( 47 | [aligned], dtype=np.int32), trt.PluginFieldType.INT32) 48 | pfc.append(pf_aligned) 49 | 50 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_roipool_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | from .globals import dir_path 6 | import ctypes 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 8 | 9 | import tensorrt as trt 10 | import torchvision.ops 11 | 12 | 13 | def create_roipool_plugin(layer_name, 14 | out_size, 15 | featmap_strides, 16 | roi_scale_factor, 17 | finest_scale): 18 | 19 | creator = trt.get_plugin_registry().get_plugin_creator( 20 | 'RoiPoolPluginDynamic', '1', '') 21 | 22 | pfc = trt.PluginFieldCollection() 23 | 24 | pf_out_size = trt.PluginField("out_size", np.array( 25 | [out_size], dtype=np.int32), trt.PluginFieldType.INT32) 26 | pfc.append(pf_out_size) 27 | 28 | pf_featmap_strides = trt.PluginField("featmap_strides", np.array( 29 | featmap_strides).astype(np.float32), trt.PluginFieldType.FLOAT32) 30 | pfc.append(pf_featmap_strides) 31 | 32 | pf_roi_scale_factor = trt.PluginField("roi_scale_factor", np.array( 33 | [roi_scale_factor], dtype=np.float32), trt.PluginFieldType.FLOAT32) 34 | pfc.append(pf_roi_scale_factor) 35 | 36 | pf_finest_scale = trt.PluginField("finest_scale", np.array( 37 | [finest_scale], dtype=np.int32), trt.PluginFieldType.INT32) 38 | pfc.append(pf_finest_scale) 39 | 40 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_torchcum_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | from .globals import dir_path 6 | import ctypes 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 8 | 9 | import tensorrt as trt 10 | 11 | 12 | def create_torchcum_plugin(layer_name, 13 | dim, 14 | cum_type): 15 | 16 | creator = trt.get_plugin_registry().get_plugin_creator( 17 | 'TorchCumPluginDynamic', '1', '') 18 | 19 | pfc = trt.PluginFieldCollection() 20 | 21 | pf_dim = trt.PluginField("dim", np.array( 22 | [dim], dtype=np.int32), trt.PluginFieldType.INT32) 23 | pfc.append(pf_dim) 24 | 25 | pf_cum_type = trt.PluginField("cum_type", np.array( 26 | [cum_type], dtype=np.int32), trt.PluginFieldType.INT32) 27 | pfc.append(pf_cum_type) 28 | 29 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_torchcummaxmin_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | from .globals import dir_path 6 | import ctypes 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 8 | 9 | import tensorrt as trt 10 | 11 | 12 | def create_torchcummaxmin_plugin(layer_name, 13 | dim, 14 | cum_type): 15 | 16 | creator = trt.get_plugin_registry().get_plugin_creator( 17 | 'TorchCumMaxMinPluginDynamic', '1', '') 18 | 19 | pfc = trt.PluginFieldCollection() 20 | 21 | pf_dim = trt.PluginField("dim", np.array( 22 | [dim], dtype=np.int32), trt.PluginFieldType.INT32) 23 | pfc.append(pf_dim) 24 | 25 | pf_cum_type = trt.PluginField("cum_type", np.array( 26 | [cum_type], dtype=np.int32), trt.PluginFieldType.INT32) 27 | pfc.append(pf_cum_type) 28 | 29 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_torchflip_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | from .globals import dir_path 6 | import ctypes 7 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 8 | 9 | import tensorrt as trt 10 | 11 | 12 | def create_torchflip_plugin(layer_name, 13 | dims): 14 | 15 | creator = trt.get_plugin_registry().get_plugin_creator( 16 | 'TorchFlipPluginDynamic', '1', '') 17 | 18 | pfc = trt.PluginFieldCollection() 19 | 20 | pf_dims = trt.PluginField("dims", np.array( 21 | dims, dtype=np.int32), trt.PluginFieldType.INT32) 22 | pfc.append(pf_dims) 23 | 24 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/create_torchgather_plugin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import os 4 | import os.path as osp 5 | # dir_path = os.path.dirname(os.path.realpath(__file__)) 6 | from .globals import dir_path 7 | import ctypes 8 | ctypes.CDLL(osp.join(dir_path, "libamirstan_plugin.so")) 9 | 10 | import tensorrt as trt 11 | 12 | 13 | def create_torchgather_plugin(layer_name, 14 | dim): 15 | 16 | creator = trt.get_plugin_registry().get_plugin_creator( 17 | 'TorchGatherPluginDynamic', '1', '') 18 | 19 | pfc = trt.PluginFieldCollection() 20 | 21 | pf_dim = trt.PluginField("dim", np.array( 22 | [dim], dtype=np.int32), trt.PluginFieldType.INT32) 23 | pfc.append(pf_dim) 24 | 25 | return creator.create_plugin(layer_name, pfc) -------------------------------------------------------------------------------- /torch2trt_dynamic/plugins/globals.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | dir_path = osp.join( os.path.expanduser('~'), "space/trt_plugin/build/lib/") 4 | 5 | if not osp.exists(dir_path): 6 | if "PILGRIM_TRT_PLUGINS_LIB" in os.environ: 7 | dir_path = os.environ["PILGRIM_TRT_PLUGINS_LIB"] 8 | else: 9 | dir_path = os.path.dirname(os.path.realpath(__file__)) -------------------------------------------------------------------------------- /torch2trt_dynamic/shape_converter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_tensor_shape(self): 5 | return self.size() 6 | 7 | 8 | old_get_attribute = torch.Tensor.__getattribute__ 9 | def new_getattribute__(self, name): 10 | if name is 'shape': 11 | return get_tensor_shape(self) 12 | else: 13 | return old_get_attribute(self, name) 14 | 15 | class ShapeConverter: 16 | def __init__(self): 17 | pass 18 | 19 | def __enter__(self): 20 | torch.Tensor.__getattribute__ = new_getattribute__ 21 | 22 | def __exit__(self, type, val, tb): 23 | torch.Tensor.__getattribute__ = old_get_attribute -------------------------------------------------------------------------------- /torch2trt_dynamic/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/pilgrim_torch2trt/27a8e6a195cbc3a83b16483ec4c0930da4aa77e6/torch2trt_dynamic/tests/__init__.py -------------------------------------------------------------------------------- /torch2trt_dynamic/tests/torchvision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/pilgrim_torch2trt/27a8e6a195cbc3a83b16483ec4c0930da4aa77e6/torch2trt_dynamic/tests/torchvision/__init__.py -------------------------------------------------------------------------------- /torch2trt_dynamic/tests/torchvision/save_load.py: -------------------------------------------------------------------------------- 1 | from torch2trt_dynamic import * 2 | import torchvision 3 | import torch 4 | from .segmentation import deeplabv3_resnet50 5 | 6 | 7 | if __name__ == '__main__': 8 | model = deeplabv3_resnet50().cuda().eval().half() 9 | data = torch.randn((1, 3, 224, 224)).cuda().half() 10 | 11 | print('Running torch2trt...') 12 | model_trt = torch2trt_dynamic(model, [data], fp16_mode=True, max_workspace_size=1<<25) 13 | 14 | print('Saving model...') 15 | torch.save(model_trt.state_dict(), '.test_model.pth') 16 | 17 | print('Loading model...') 18 | model_trt_2 = TRTModule() 19 | model_trt_2.load_state_dict(torch.load('.test_model.pth')) 20 | 21 | assert(model_trt_2.engine is not None) 22 | 23 | print(torch.max(torch.abs(model_trt_2(data) - model(data)))) 24 | print(torch.max(torch.abs(model_trt_2(data) - model_trt(data)))) -------------------------------------------------------------------------------- /torch2trt_dynamic/tests/torchvision/segmentation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from torch2trt_dynamic.module_test import add_module_test 4 | 5 | 6 | class ModelWrapper(torch.nn.Module): 7 | def __init__(self, model): 8 | super(ModelWrapper, self).__init__() 9 | self.model = model 10 | def forward(self, x): 11 | return self.model(x)['out'] 12 | 13 | 14 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) 15 | def deeplabv3_resnet50(): 16 | bb = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=False) 17 | model = ModelWrapper(bb) 18 | return model 19 | 20 | 21 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) 22 | def deeplabv3_resnet101(): 23 | bb = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False) 24 | model = ModelWrapper(bb) 25 | return model 26 | 27 | 28 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) 29 | def fcn_resnet50(): 30 | bb = torchvision.models.segmentation.fcn_resnet50(pretrained=False) 31 | model = ModelWrapper(bb) 32 | return model 33 | 34 | 35 | @add_module_test(torch.float16, torch.device('cuda'), [(1, 3, 224, 224)], fp16_mode=True) 36 | def fcn_resnet101(): 37 | bb = torchvision.models.segmentation.fcn_resnet101(pretrained=False) 38 | model = ModelWrapper(bb) 39 | return model -------------------------------------------------------------------------------- /torch2trt_dynamic/utils.py: -------------------------------------------------------------------------------- 1 | import graphviz 2 | 3 | 4 | def trt_network_to_dot_graph(network): 5 | dot = graphviz.Digraph(comment='Network') 6 | 7 | # add nodes (layers) 8 | for i in range(network.num_layers): 9 | layer = network.get_layer(i) 10 | dot.node(layer.name) 11 | 12 | # add nodes (inputs) 13 | for i in range(network.num_inputs): 14 | dot.node(network.get_input(i).name) 15 | 16 | # add nodes (outputs) 17 | for i in range(network.num_outputs): 18 | dot.node(network.get_output(i).name) 19 | 20 | # add layer->layer edges 21 | for a in range(network.num_layers): 22 | layer_a = network.get_layer(a) 23 | 24 | for b in range(network.num_layers): 25 | layer_b = network.get_layer(b) 26 | 27 | for i in range(layer_a.num_outputs): 28 | output_i = layer_a.get_output(i) 29 | 30 | for j in range(layer_b.num_inputs): 31 | input_j = layer_b.get_input(j) 32 | 33 | if output_i == input_j: 34 | dot.edge(layer_a.name, layer_b.name, label=str(input_j.shape)) 35 | 36 | # add input->layer edges 37 | for i in range(network.num_inputs): 38 | input_i = network.get_input(i) 39 | 40 | for b in range(network.num_layers): 41 | layer_b = network.get_layer(b) 42 | 43 | for j in range(layer_b.num_inputs): 44 | input_j = layer_b.get_input(j) 45 | 46 | if input_i == input_j: 47 | dot.edge(input_i.name, layer_b.name, label=str(input_j.shape)) 48 | 49 | # add layer->output edges 50 | for i in range(network.num_outputs): 51 | input_i = network.get_output(i) 52 | 53 | for b in range(network.num_layers): 54 | layer_b = network.get_layer(b) 55 | 56 | for j in range(layer_b.num_outputs): 57 | input_j = layer_b.get_output(j) 58 | 59 | if input_i == input_j: 60 | dot.edge(layer_b.name, input_i.name, label=str(input_j.shape)) 61 | 62 | return dot --------------------------------------------------------------------------------