├── .cm ├── alias-a-soft ├── alias-u-5e1100048ab875d7 ├── alias-a-package ├── alias-a-program ├── alias-a-script ├── alias-u-1dc07ee0f4742028 ├── alias-u-84e27ad9dd12e734 └── alias-u-b0ac08fe1d3c2615 ├── soft ├── lib.tensorrt │ ├── .cm │ │ ├── desc.json │ │ ├── meta.json │ │ └── info.json │ └── customize.py ├── model.tensorrt │ ├── .cm │ │ ├── desc.json │ │ ├── meta.json │ │ └── info.json │ ├── customize.py │ ├── README.md │ └── README_object_detection.md ├── lib.jetson-inference │ ├── .cm │ │ ├── desc.json │ │ ├── meta.json │ │ └── info.json │ └── customize.py ├── lib.python.tensorrt │ └── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json ├── plugin.tensorrt.nms │ ├── .cm │ │ ├── desc.json │ │ ├── meta.json │ │ └── info.json │ ├── README.md │ └── customize.py ├── .cm │ ├── alias-a-lib.tensorrt │ ├── alias-a-model.tensorrt │ ├── alias-u-63257b98ef23405c │ ├── alias-u-ce8554cdf428aa14 │ ├── alias-a-lib.jetson-inference │ ├── alias-a-lib.python.tensorrt │ ├── alias-a-plugin.tensorrt.nms │ ├── alias-u-428380f09d31096d │ ├── alias-u-589e881dee2c8b2e │ ├── alias-u-7929205eba0ebed0 │ ├── alias-a-lib.python.tensorrt-helper │ └── alias-u-ef3af426cb59aaed └── lib.python.tensorrt-helper │ ├── .cm │ ├── desc.json │ ├── info.json │ └── meta.json │ └── tensorrt_helper │ └── __init__.py ├── program ├── tensorrt-test │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ └── tensorrt-test.cpp ├── tensorrt-time │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ └── postprocess.py ├── .cm │ ├── alias-a-tensorrt-test │ ├── alias-a-tensorrt-time │ ├── alias-u-5d4e5a43da89baca │ ├── alias-u-5e23a492b9138354 │ ├── alias-a-image-classification-tensorrt-py │ ├── alias-a-object-detection-tensorrt-py │ ├── alias-u-adedfdb48403db8a │ └── alias-u-b244e68887347d16 ├── object-detection-tensorrt-py │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ └── tensorrt_detect_preprocessed.py └── image-classification-tensorrt-py │ ├── .cm │ ├── desc.json │ ├── info.json │ └── meta.json │ ├── README.md │ └── tensorrt_classify_preprocessed.py ├── package ├── lib-python-tensorrt │ └── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json ├── plugin-tensorrt-nms-download │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ └── README.md ├── .cm │ ├── alias-a-lib-python-tensorrt │ ├── alias-u-d74743c03f4acea8 │ ├── alias-a-model-tensorrt-convert-from-onnx │ ├── alias-a-model-tensorrt-convert-from-tf │ ├── alias-a-plugin-tensorrt-nms-download │ ├── alias-u-0b374ca0d97941fc │ ├── alias-u-162af6920fe54fd9 │ ├── alias-u-2a9f94abcb4462b7 │ ├── alias-a-model-tensorrt-convert-from-caffe │ ├── alias-a-model-tensorrt-download-for.gtx1080 │ ├── alias-a-model-tensorrt-download-for.xavier │ ├── alias-u-7457196d83d72398 │ ├── alias-u-9604ef2ad744d3c2 │ ├── alias-u-cb4c7c3239643219 │ ├── alias-a-lib-jetson-inference-dividiti-master-cuda │ ├── alias-a-lib-jetson-inference-dusty-nv-master-cuda │ ├── alias-u-046ad1842c84fe38 │ ├── alias-u-0ba9e4ed581ffa1e │ ├── alias-a-caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling │ ├── alias-a-caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling │ ├── alias-u-28370e3cdc413088 │ └── alias-u-a41361f20a5741f3 ├── model-tensorrt-convert-from-caffe │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ ├── install.sh │ └── caffe2tensorrt_model_converter.py ├── model-tensorrt-convert-from-onnx │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ ├── install.sh │ └── onnx2tensorrt_model_converter.py ├── model-tensorrt-convert-from-tf │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ ├── install.sh │ └── tf2tensorrt_model_converter.py ├── model-tensorrt-download-for.xavier │ ├── .cm │ │ ├── desc.json │ │ └── info.json │ └── copy │ │ └── coco_flatlabels.txt ├── model-tensorrt-download-for.gtx1080 │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ └── copy │ │ └── coco_flatlabels.txt ├── lib-jetson-inference-dividiti-master-cuda │ └── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json ├── lib-jetson-inference-dusty-nv-master-cuda │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ ├── install.sh │ └── CMakeLists.txt ├── caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling │ ├── .cm │ │ ├── desc.json │ │ ├── info.json │ │ └── meta.json │ ├── solver.prototxt │ ├── LICENSE │ └── README.md └── caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling │ ├── .cm │ ├── desc.json │ ├── info.json │ └── meta.json │ ├── solver.prototxt │ ├── LICENSE │ └── README.md ├── script ├── explore-accuracy │ ├── .cm │ │ ├── desc.json │ │ ├── meta.json │ │ └── info.json │ ├── _clean_experiment_entries.sh │ └── explore-accuracy.py ├── .cm │ ├── alias-a-explore-accuracy │ ├── alias-u-eac6bcaec7cab0ba │ ├── alias-a-explore-batch-size-libs-models │ ├── alias-a-image-classification-tensorrt-py │ ├── alias-u-3b88317cc2c488c5 │ └── alias-u-457db05c346fff37 ├── explore-batch-size-libs-models │ ├── .cm │ │ ├── desc.json │ │ ├── meta.json │ │ └── info.json │ ├── _clean_experiment_entries.sh │ ├── benchmark.nvidia-tx1.py │ └── benchmark.nvidia-gtx1080.py └── image-classification-tensorrt-py │ ├── .cm │ ├── desc.json │ ├── meta.json │ └── info.json │ └── run.sh ├── .gitignore ├── COPYRIGHT.txt ├── AUTHORS ├── .ckr.json ├── LICENSE.txt └── README.md /.cm/alias-a-soft: -------------------------------------------------------------------------------- 1 | 5e1100048ab875d7 2 | -------------------------------------------------------------------------------- /.cm/alias-u-5e1100048ab875d7: -------------------------------------------------------------------------------- 1 | soft 2 | -------------------------------------------------------------------------------- /soft/lib.tensorrt/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /.cm/alias-a-package: -------------------------------------------------------------------------------- 1 | 1dc07ee0f4742028 2 | -------------------------------------------------------------------------------- /.cm/alias-a-program: -------------------------------------------------------------------------------- 1 | b0ac08fe1d3c2615 2 | -------------------------------------------------------------------------------- /.cm/alias-a-script: -------------------------------------------------------------------------------- 1 | 84e27ad9dd12e734 2 | -------------------------------------------------------------------------------- /.cm/alias-u-1dc07ee0f4742028: -------------------------------------------------------------------------------- 1 | package 2 | -------------------------------------------------------------------------------- /.cm/alias-u-84e27ad9dd12e734: -------------------------------------------------------------------------------- 1 | script 2 | -------------------------------------------------------------------------------- /.cm/alias-u-b0ac08fe1d3c2615: -------------------------------------------------------------------------------- 1 | program 2 | -------------------------------------------------------------------------------- /program/tensorrt-test/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /program/tensorrt-time/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/model.tensorrt/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/lib-python-tensorrt/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /script/explore-accuracy/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /script/explore-accuracy/.cm/meta.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/lib.jetson-inference/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/lib.python.tensorrt/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/plugin.tensorrt.nms/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-a-lib.tensorrt: -------------------------------------------------------------------------------- 1 | 63257b98ef23405c 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-a-model.tensorrt: -------------------------------------------------------------------------------- 1 | ce8554cdf428aa14 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-u-63257b98ef23405c: -------------------------------------------------------------------------------- 1 | lib.tensorrt 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-u-ce8554cdf428aa14: -------------------------------------------------------------------------------- 1 | model.tensorrt 2 | -------------------------------------------------------------------------------- /soft/lib.python.tensorrt-helper/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/plugin-tensorrt-nms-download/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /program/.cm/alias-a-tensorrt-test: -------------------------------------------------------------------------------- 1 | 5e23a492b9138354 2 | -------------------------------------------------------------------------------- /program/.cm/alias-a-tensorrt-time: -------------------------------------------------------------------------------- 1 | 5d4e5a43da89baca 2 | -------------------------------------------------------------------------------- /program/.cm/alias-u-5d4e5a43da89baca: -------------------------------------------------------------------------------- 1 | tensorrt-time 2 | -------------------------------------------------------------------------------- /program/.cm/alias-u-5e23a492b9138354: -------------------------------------------------------------------------------- 1 | tensorrt-test 2 | -------------------------------------------------------------------------------- /program/object-detection-tensorrt-py/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /script/.cm/alias-a-explore-accuracy: -------------------------------------------------------------------------------- 1 | eac6bcaec7cab0ba 2 | -------------------------------------------------------------------------------- /script/.cm/alias-u-eac6bcaec7cab0ba: -------------------------------------------------------------------------------- 1 | explore-accuracy 2 | -------------------------------------------------------------------------------- /script/explore-batch-size-libs-models/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /script/explore-batch-size-libs-models/.cm/meta.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-a-lib.jetson-inference: -------------------------------------------------------------------------------- 1 | 428380f09d31096d 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-a-lib.python.tensorrt: -------------------------------------------------------------------------------- 1 | 589e881dee2c8b2e 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-a-plugin.tensorrt.nms: -------------------------------------------------------------------------------- 1 | 7929205eba0ebed0 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-u-428380f09d31096d: -------------------------------------------------------------------------------- 1 | lib.jetson-inference 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-u-589e881dee2c8b2e: -------------------------------------------------------------------------------- 1 | lib.python.tensorrt 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-u-7929205eba0ebed0: -------------------------------------------------------------------------------- 1 | plugin.tensorrt.nms 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-lib-python-tensorrt: -------------------------------------------------------------------------------- 1 | d74743c03f4acea8 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-d74743c03f4acea8: -------------------------------------------------------------------------------- 1 | lib-python-tensorrt 2 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-caffe/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-onnx/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-tf/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.xavier/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /program/image-classification-tensorrt-py/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /script/image-classification-tensorrt-py/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /script/image-classification-tensorrt-py/.cm/meta.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.gtx1080/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-a-lib.python.tensorrt-helper: -------------------------------------------------------------------------------- 1 | ef3af426cb59aaed 2 | -------------------------------------------------------------------------------- /soft/.cm/alias-u-ef3af426cb59aaed: -------------------------------------------------------------------------------- 1 | lib.python.tensorrt-helper 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-model-tensorrt-convert-from-onnx: -------------------------------------------------------------------------------- 1 | 2a9f94abcb4462b7 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-model-tensorrt-convert-from-tf: -------------------------------------------------------------------------------- 1 | 0b374ca0d97941fc 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-plugin-tensorrt-nms-download: -------------------------------------------------------------------------------- 1 | 162af6920fe54fd9 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-0b374ca0d97941fc: -------------------------------------------------------------------------------- 1 | model-tensorrt-convert-from-tf 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-162af6920fe54fd9: -------------------------------------------------------------------------------- 1 | plugin-tensorrt-nms-download 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-2a9f94abcb4462b7: -------------------------------------------------------------------------------- 1 | model-tensorrt-convert-from-onnx 2 | -------------------------------------------------------------------------------- /package/lib-jetson-inference-dividiti-master-cuda/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/lib-jetson-inference-dusty-nv-master-cuda/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /program/.cm/alias-a-image-classification-tensorrt-py: -------------------------------------------------------------------------------- 1 | b244e68887347d16 2 | -------------------------------------------------------------------------------- /program/.cm/alias-a-object-detection-tensorrt-py: -------------------------------------------------------------------------------- 1 | adedfdb48403db8a 2 | -------------------------------------------------------------------------------- /program/.cm/alias-u-adedfdb48403db8a: -------------------------------------------------------------------------------- 1 | object-detection-tensorrt-py 2 | -------------------------------------------------------------------------------- /program/.cm/alias-u-b244e68887347d16: -------------------------------------------------------------------------------- 1 | image-classification-tensorrt-py 2 | -------------------------------------------------------------------------------- /script/.cm/alias-a-explore-batch-size-libs-models: -------------------------------------------------------------------------------- 1 | 457db05c346fff37 2 | -------------------------------------------------------------------------------- /script/.cm/alias-a-image-classification-tensorrt-py: -------------------------------------------------------------------------------- 1 | 3b88317cc2c488c5 2 | -------------------------------------------------------------------------------- /script/.cm/alias-u-3b88317cc2c488c5: -------------------------------------------------------------------------------- 1 | image-classification-tensorrt-py 2 | -------------------------------------------------------------------------------- /script/.cm/alias-u-457db05c346fff37: -------------------------------------------------------------------------------- 1 | explore-batch-size-libs-models 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *tmp/ 3 | *.pyc 4 | *_tmp.json 5 | .ipynb_checkpoints/ 6 | -------------------------------------------------------------------------------- /package/.cm/alias-a-model-tensorrt-convert-from-caffe: -------------------------------------------------------------------------------- 1 | 7457196d83d72398 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-model-tensorrt-download-for.gtx1080: -------------------------------------------------------------------------------- 1 | 9604ef2ad744d3c2 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-model-tensorrt-download-for.xavier: -------------------------------------------------------------------------------- 1 | cb4c7c3239643219 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-7457196d83d72398: -------------------------------------------------------------------------------- 1 | model-tensorrt-convert-from-caffe 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-9604ef2ad744d3c2: -------------------------------------------------------------------------------- 1 | model-tensorrt-download-for.gtx1080 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-cb4c7c3239643219: -------------------------------------------------------------------------------- 1 | model-tensorrt-download-for.xavier 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-lib-jetson-inference-dividiti-master-cuda: -------------------------------------------------------------------------------- 1 | 046ad1842c84fe38 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-lib-jetson-inference-dusty-nv-master-cuda: -------------------------------------------------------------------------------- 1 | 0ba9e4ed581ffa1e 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-046ad1842c84fe38: -------------------------------------------------------------------------------- 1 | lib-jetson-inference-dividiti-master-cuda 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-0ba9e4ed581ffa1e: -------------------------------------------------------------------------------- 1 | lib-jetson-inference-dusty-nv-master-cuda 2 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling/.cm/desc.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /COPYRIGHT.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 cTuning foundation. 2 | Copyright (c) 2019-2020 dividiti Limited. 3 | -------------------------------------------------------------------------------- /package/.cm/alias-a-caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling: -------------------------------------------------------------------------------- 1 | a41361f20a5741f3 2 | -------------------------------------------------------------------------------- /package/.cm/alias-a-caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling: -------------------------------------------------------------------------------- 1 | 28370e3cdc413088 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-28370e3cdc413088: -------------------------------------------------------------------------------- 1 | caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling 2 | -------------------------------------------------------------------------------- /package/.cm/alias-u-a41361f20a5741f3: -------------------------------------------------------------------------------- 1 | caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling 2 | -------------------------------------------------------------------------------- /script/explore-batch-size-libs-models/_clean_experiment_entries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ck rm local:experiment:*tensorrt-1.0.0 3 | -------------------------------------------------------------------------------- /script/explore-accuracy/_clean_experiment_entries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ck rm local:experiment:imagenet-val-accuracy-*-tensorrt-1.0.0 3 | -------------------------------------------------------------------------------- /package/plugin-tensorrt-nms-download/README.md: -------------------------------------------------------------------------------- 1 | Initiate downloading and installation of the NMS plugin by hand, if necessary: 2 | 3 | ```bash 4 | ck install package --tags=trt,plugin,nms,downloaded 5 | ``` 6 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-onnx/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $CK_ENV_COMPILER_PYTHON_FILE $PACKAGE_DIR/onnx2tensorrt_model_converter.py "${CK_ENV_ONNX_MODEL_ONNX_FILEPATH}" "${INSTALL_DIR}/${PACKAGE_NAME}" --output_data_type "${ML_MODEL_DATA_TYPE}" --max_batch_size "${ML_MODEL_MAX_BATCH_SIZE}" 4 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-caffe/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $CK_ENV_COMPILER_PYTHON_FILE $PACKAGE_DIR/caffe2tensorrt_model_converter.py "${CK_ENV_MODEL_CAFFE_WEIGHTS}" "${CK_ENV_MODEL_CAFFE_DEPLOY}" "${INSTALL_DIR}/${PACKAGE_NAME}" --output_data_type "${ML_MODEL_DATA_TYPE}" --max_batch_size "${ML_MODEL_MAX_BATCH_SIZE}" 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | ======================================================================= 2 | N: Anton Lokhmotov 3 | E: anton@dividiti.com 4 | H: https://www.hipeac.net/~anton 5 | O: dividiti, UK 6 | C: 7 | W: 8 | 9 | ======================================================================= 10 | N: Daniil Efremov 11 | E: daniil.efremov@xored.com 12 | H: 13 | O: Xored 14 | C: 15 | W: 16 | 17 | -------------------------------------------------------------------------------- /soft/plugin.tensorrt.nms/README.md: -------------------------------------------------------------------------------- 1 | Assuming you have generated the NMS plugin on the system, 2 | 3 | register it directly: 4 | 5 | ```bash 6 | ck detect soft --tags=tensorrt,plugin,nms --full_path=/datasets/xavier-zenodo/libnmsoptplugin.so 7 | ``` 8 | 9 | or search for it: 10 | 11 | ```bash 12 | ck detect soft --tags=tensorrt,plugin,nms --search_dirs=/datasets 13 | ``` 14 | -------------------------------------------------------------------------------- /package/plugin-tensorrt-nms-download/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "162af6920fe54fd9", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2020-03-19T12:54:05.042443", 8 | "version": [ 9 | "1", 10 | "12", 11 | "2" 12 | ] 13 | }, 14 | "data_name": "plugin-tensorrt-nms-download" 15 | } 16 | -------------------------------------------------------------------------------- /program/object-detection-tensorrt-py/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "adedfdb48403db8a", 3 | "backup_module_uid": "b0ac08fe1d3c2615", 4 | "backup_module_uoa": "program", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2020-02-13T14:36:56.352728", 8 | "version": [ 9 | "1", 10 | "12", 11 | "2" 12 | ] 13 | }, 14 | "data_name": "object-detection-tensorrt-py" 15 | } 16 | -------------------------------------------------------------------------------- /script/image-classification-tensorrt-py/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "3b88317cc2c488c5", 3 | "backup_module_uid": "84e27ad9dd12e734", 4 | "backup_module_uoa": "script", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2019-11-20T12:33:18.324436", 8 | "version": [ 9 | "1", 10 | "11", 11 | "4" 12 | ] 13 | }, 14 | "data_name": "image-classification-tensorrt-py" 15 | } 16 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.xavier/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "cb4c7c3239643219", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2020-03-18T18:19:52.072937", 8 | "version": [ 9 | "1", 10 | "12", 11 | "2" 12 | ] 13 | }, 14 | "data_name": "model-tensorrt-download-for.xavier" 15 | } 16 | -------------------------------------------------------------------------------- /soft/model.tensorrt/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "auto_detect": "yes", 3 | "customize": { 4 | "check_that_exists": "yes", 5 | "ck_version": 10, 6 | "env_prefix": "CK_ENV_TENSORRT_MODEL", 7 | "soft_file_universal": "*.trt" 8 | }, 9 | "soft_name": "TensorRT model", 10 | "tags": [ 11 | "tensorrt", 12 | "trt", 13 | "model" 14 | ], 15 | "template": "yes", 16 | "template_type": "TensorRT model" 17 | } 18 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.gtx1080/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "9604ef2ad744d3c2", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2020-03-30T13:31:42.990896", 8 | "version": [ 9 | "1", 10 | "12", 11 | "2" 12 | ] 13 | }, 14 | "data_name": "model-tensorrt-download-for.gtx1080" 15 | } 16 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-onnx/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "2a9f94abcb4462b7", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2019-11-13T14:47:27.210263", 8 | "version": [ 9 | "1", 10 | "11", 11 | "4", 12 | "1" 13 | ] 14 | }, 15 | "data_name": "model-tensorrt-convert-from-onnx" 16 | } 17 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-tf/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "0b374ca0d97941fc", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2019-12-02T13:13:46.100191", 8 | "version": [ 9 | "1", 10 | "11", 11 | "4", 12 | "1" 13 | ] 14 | }, 15 | "data_name": "model-tensorrt-convert-from-tf" 16 | } 17 | -------------------------------------------------------------------------------- /program/image-classification-tensorrt-py/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "b244e68887347d16", 3 | "backup_module_uid": "b0ac08fe1d3c2615", 4 | "backup_module_uoa": "program", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2019-11-05T17:09:45.359346", 8 | "version": [ 9 | "1", 10 | "11", 11 | "4", 12 | "1" 13 | ] 14 | }, 15 | "data_name": "image-classification-tensorrt-py" 16 | } 17 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-caffe/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "7457196d83d72398", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "engine": "CK", 7 | "iso_datetime": "2019-11-05T14:39:47.527941", 8 | "version": [ 9 | "1", 10 | "11", 11 | "4", 12 | "1" 13 | ] 14 | }, 15 | "data_name": "model-tensorrt-convert-from-caffe" 16 | } 17 | -------------------------------------------------------------------------------- /soft/lib.tensorrt/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "auto_detect": "yes", 3 | "customize": { 4 | "check_that_exists": "yes", 5 | "env_prefix": "CK_ENV_LIB_TENSORRT", 6 | "limit_recursion_dir_search": { 7 | "linux": 3 8 | }, 9 | "soft_file": { 10 | "linux": "libnvinfer.so" 11 | } 12 | }, 13 | "soft_name": "TensorRT engine", 14 | "tags": [ 15 | "lib", 16 | "tensorrt", 17 | "static", 18 | "inference" 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /soft/plugin.tensorrt.nms/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "auto_detect": "yes", 3 | "customize": { 4 | "env_prefix": "CK_ENV_TENSORRT_PLUGIN", 5 | "limit_recursion_dir_search": { 6 | "linux": 3, 7 | "win": 3 8 | }, 9 | "soft_file_universal": "libnmsoptplugin.so", 10 | "version": "0.5" 11 | }, 12 | "deps": {}, 13 | "soft_name": "TensorRT NMS plugin", 14 | "tags": [ 15 | "tensorrt", 16 | "trt", 17 | "plugin", 18 | "nms" 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /soft/lib.jetson-inference/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "auto_detect": "yes", 3 | "customize": { 4 | "check_that_exists": "yes", 5 | "env_prefix": "CK_ENV_LIB_JETSON_INFERENCE", 6 | "limit_recursion_dir_search": { 7 | "linux": 2 8 | }, 9 | "soft_file": { 10 | "linux": "libjetson-inference.so" 11 | } 12 | }, 13 | "soft_name": "Jetson-inference library", 14 | "tags": [ 15 | "lib", 16 | "jetson", 17 | "inference", 18 | "jetson-inference" 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /.ckr.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_uid": "3abe4a16a4fb17d7", 3 | "data_name": "ck-tensorrt", 4 | "dict": { 5 | "url": "http://github.com/dividiti/ck-tensorrt", 6 | "shared": "git", 7 | "repo_deps": [ 8 | { 9 | "repo_uoa": "ck-crowdtuning" 10 | }, 11 | { 12 | "repo_uoa": "ck-mlperf" 13 | }, 14 | { 15 | "repo_uoa": "ck-caffe", 16 | "repo_url": "https://github.com/dividiti/ck-caffe" 17 | } 18 | ] 19 | }, 20 | "data_alias": "ck-tensorrt", 21 | "data_uoa": "ck-tensorrt" 22 | } 23 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "a41361f20a5741f3", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "author": "DeepScale", 7 | "author_webpage": "http://deepscale.ai", 8 | "engine": "CK", 9 | "iso_datetime": "2017-01-17T16:12:05.235256", 10 | "license": "BSD", 11 | "version": [ 12 | "1", 13 | "8", 14 | "6", 15 | "1" 16 | ] 17 | }, 18 | "data_name": "caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling" 19 | } 20 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "28370e3cdc413088", 3 | "backup_module_uid": "1dc07ee0f4742028", 4 | "backup_module_uoa": "package", 5 | "control": { 6 | "author": "DeepScale", 7 | "author_webpage": "http://deepscale.ai", 8 | "engine": "CK", 9 | "iso_datetime": "2017-01-17T16:24:52.276169", 10 | "license": "BSD", 11 | "version": [ 12 | "1", 13 | "8", 14 | "6", 15 | "1" 16 | ] 17 | }, 18 | "data_name": "caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling" 19 | } 20 | -------------------------------------------------------------------------------- /soft/plugin.tensorrt.nms/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "7929205eba0ebed0", 3 | "backup_module_uid": "5e1100048ab875d7", 4 | "backup_module_uoa": "soft", 5 | "control": { 6 | "author": "cTuning foundation", 7 | "author_email": "admin@cTuning.org", 8 | "author_webpage": "http://cTuning.org", 9 | "copyright": "See CK COPYRIGHT.txt for copyright details", 10 | "engine": "CK", 11 | "iso_datetime": "2020-03-19T11:05:13.355243", 12 | "license": "See CK LICENSE.txt for licensing details", 13 | "version": [ 14 | "1", 15 | "12", 16 | "2" 17 | ] 18 | }, 19 | "data_name": "plugin.tensorrt.nms" 20 | } 21 | -------------------------------------------------------------------------------- /soft/lib.python.tensorrt-helper/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "ef3af426cb59aaed", 3 | "backup_module_uid": "5e1100048ab875d7", 4 | "backup_module_uoa": "soft", 5 | "control": { 6 | "author": "cTuning foundation", 7 | "author_email": "admin@cTuning.org", 8 | "author_webpage": "http://cTuning.org", 9 | "copyright": "See CK COPYRIGHT.txt for copyright details", 10 | "engine": "CK", 11 | "iso_datetime": "2020-06-05T15:19:58.588975", 12 | "license": "See CK LICENSE.txt for licensing details", 13 | "version": [ 14 | "1", 15 | "15", 16 | "0" 17 | ] 18 | }, 19 | "data_name": "lib.python.tensorrt-helper" 20 | } 21 | -------------------------------------------------------------------------------- /soft/lib.tensorrt/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "63257b98ef23405c", 3 | "backup_module_uid": "5e1100048ab875d7", 4 | "backup_module_uoa": "soft", 5 | "control": { 6 | "author": "cTuning foundation", 7 | "author_email": "admin@cTuning.org", 8 | "author_webpage": "http://cTuning.org", 9 | "copyright": "See CK COPYRIGHT.txt for copyright details", 10 | "engine": "CK", 11 | "iso_datetime": "2017-01-09T10:54:53.564115", 12 | "license": "See CK LICENSE.txt for licensing details", 13 | "version": [ 14 | "1", 15 | "8", 16 | "6", 17 | "1" 18 | ] 19 | }, 20 | "data_name": "lib.tensorrt" 21 | } 22 | -------------------------------------------------------------------------------- /program/tensorrt-test/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "5e23a492b9138354", 3 | "backup_module_uid": "b0ac08fe1d3c2615", 4 | "backup_module_uoa": "program", 5 | "control": { 6 | "author": "cTuning foundation", 7 | "author_email": "admin@cTuning.org", 8 | "author_webpage": "http://cTuning.org", 9 | "copyright": "See CK COPYRIGHT.txt for copyright details", 10 | "engine": "CK", 11 | "iso_datetime": "2016-12-14T18:58:52.738627", 12 | "license": "See CK LICENSE.txt for licensing details", 13 | "version": [ 14 | "1", 15 | "8", 16 | "4", 17 | "2" 18 | ] 19 | }, 20 | "data_name": "tensorrt-test" 21 | } 22 | -------------------------------------------------------------------------------- /program/tensorrt-time/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "5d4e5a43da89baca", 3 | "backup_module_uid": "b0ac08fe1d3c2615", 4 | "backup_module_uoa": "program", 5 | "control": { 6 | "author": "cTuning foundation", 7 | "author_email": "admin@cTuning.org", 8 | "author_webpage": "http://cTuning.org", 9 | "copyright": "See CK COPYRIGHT.txt for copyright details", 10 | "engine": "CK", 11 | "iso_datetime": "2017-01-03T12:16:16.101706", 12 | "license": "See CK LICENSE.txt for licensing details", 13 | "version": [ 14 | "1", 15 | "8", 16 | "5", 17 | "3" 18 | ] 19 | }, 20 | "data_name": "tensorrt-time" 21 | } 22 | -------------------------------------------------------------------------------- /soft/model.tensorrt/.cm/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "ce8554cdf428aa14", 3 | "backup_module_uid": "5e1100048ab875d7", 4 | "backup_module_uoa": "soft", 5 | "control": { 6 | "author": "cTuning foundation", 7 | "author_email": "admin@cTuning.org", 8 | "author_webpage": "http://cTuning.org", 9 | "copyright": "See CK COPYRIGHT.txt for copyright details", 10 | "engine": "CK", 11 | "iso_datetime": "2019-11-05T15:04:25.971019", 12 | "license": "See CK LICENSE.txt for licensing details", 13 | "version": [ 14 | "1", 15 | "11", 16 | "4", 17 | "1" 18 | ] 19 | }, 20 | "data_name": "model.tensorrt" 21 | } 22 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-tf/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | read -d '' CMD <` 24 | test_initialization: false 25 | average_loss: 40 26 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling/solver.prototxt: -------------------------------------------------------------------------------- 1 | # please cite: 2 | # @article{SqueezeNet, 3 | # Author = {Forrest N. Iandola and Matthew W. Moskewicz and Khalid Ashraf and Song Han and William J. Dally and Kurt Keutzer}, 4 | # Title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $<$1MB model size}, 5 | # Journal = {arXiv:1602.07360}, 6 | # Year = {2016} 7 | # } 8 | 9 | test_iter: 2000 #not subject to iter_size 10 | test_interval: 1000 11 | base_lr: 0.04 12 | display: 40 13 | max_iter: 170000 14 | iter_size: 16 #global batch size = batch_size * iter_size 15 | lr_policy: "poly" 16 | power: 1.0 #linearly decrease LR 17 | momentum: 0.9 18 | weight_decay: 0.0002 19 | snapshot: 1000 20 | snapshot_prefix: "train" 21 | solver_mode: GPU 22 | random_seed: 42 23 | net: "train_val.prototxt" #we typically do `cd SqueezeNet_v1.0; caffe train ` 24 | test_initialization: false 25 | average_loss: 40 26 | -------------------------------------------------------------------------------- /package/plugin-tensorrt-nms-download/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "extra_dir": "", 5 | "install_env": { 6 | "PACKAGE_SKIP_CLEAN_PACKAGE": "YES", 7 | "PACKAGE_SKIP_LINUX_MAKE": "YES", 8 | "PACKAGE_URL": "https://zenodo.org/record/3716059/files", 9 | "PACKAGE_NAME": "libnmsoptplugin.so", 10 | "PACKAGE_WGET": "YES", 11 | "PACKAGE_VERSION": "0.5.mlperf" 12 | }, 13 | "no_os_in_suggested_path": "yes", 14 | "no_ver_in_suggested_path": "yes", 15 | "skip_file_check": "no", 16 | "version": "0.5" 17 | }, 18 | "end_full_path_universal": "libnmsoptplugin.so", 19 | "process_script": "install", 20 | "soft_uoa": "7929205eba0ebed0", 21 | "suggested_path": "plugin-tensorrt-nms-downloaded", 22 | "tags": [ 23 | "plugin", 24 | "trt", 25 | "tensorrt", 26 | "nms", 27 | "converted-by.nvidia", 28 | "for.xavier", 29 | "downloaded" 30 | ], 31 | "use_scripts_from_another_entry": { 32 | "data_uoa": "download-and-install-package", 33 | "module_uoa": "script" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.gtx1080/copy/coco_flatlabels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | 12 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | 26 27 | backpack 28 | umbrella 29 | 29 30 | 30 31 | handbag 32 | tie 33 | suitcase 34 | frisbee 35 | skis 36 | snowboard 37 | sports ball 38 | kite 39 | baseball bat 40 | baseball glove 41 | skateboard 42 | surfboard 43 | tennis racket 44 | bottle 45 | 45 46 | wine glass 47 | cup 48 | fork 49 | knife 50 | spoon 51 | bowl 52 | banana 53 | apple 54 | sandwich 55 | orange 56 | broccoli 57 | carrot 58 | hot dog 59 | pizza 60 | donut 61 | cake 62 | chair 63 | couch 64 | potted plant 65 | bed 66 | 66 67 | dining table 68 | 68 69 | 69 70 | toilet 71 | 71 72 | tv 73 | laptop 74 | mouse 75 | remote 76 | keyboard 77 | cell phone 78 | microwave 79 | oven 80 | toaster 81 | sink 82 | refrigerator 83 | 83 84 | book 85 | clock 86 | vase 87 | scissors 88 | teddy bear 89 | hair drier 90 | toothbrush 91 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.xavier/copy/coco_flatlabels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | 12 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | 26 27 | backpack 28 | umbrella 29 | 29 30 | 30 31 | handbag 32 | tie 33 | suitcase 34 | frisbee 35 | skis 36 | snowboard 37 | sports ball 38 | kite 39 | baseball bat 40 | baseball glove 41 | skateboard 42 | surfboard 43 | tennis racket 44 | bottle 45 | 45 46 | wine glass 47 | cup 48 | fork 49 | knife 50 | spoon 51 | bowl 52 | banana 53 | apple 54 | sandwich 55 | orange 56 | broccoli 57 | carrot 58 | hot dog 59 | pizza 60 | donut 61 | cake 62 | chair 63 | couch 64 | potted plant 65 | bed 66 | 66 67 | dining table 68 | 68 69 | 69 70 | toilet 71 | 71 72 | tv 73 | laptop 74 | mouse 75 | remote 76 | keyboard 77 | cell phone 78 | microwave 79 | oven 80 | toaster 81 | sink 82 | refrigerator 83 | 83 84 | book 85 | clock 86 | vase 87 | scissors 88 | teddy bear 89 | hair drier 90 | toothbrush 91 | -------------------------------------------------------------------------------- /soft/lib.python.tensorrt/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "auto_detect": "yes", 3 | "customize": { 4 | "check_that_exists": "yes", 5 | "detect_version_externally": "yes", 6 | "env_prefix": "CK_ENV_PYTHON_PACKAGE_TENSORRT", 7 | "limit_recursion_dir_search": { 8 | "linux": 5, 9 | "win": 5 10 | }, 11 | "soft_file_universal": "tensorrt$#sep#$__init__.py", 12 | "version_recursive_import": "yes", 13 | "version_variable_name": "__version__" 14 | }, 15 | "deps": { 16 | "tensorrt-library": { 17 | "local": "yes", 18 | "name": "TensorRT library", 19 | "sort": 10, 20 | "tags": "lib,tensorrt,inference" 21 | }, 22 | "python": { 23 | "add_to_path": "yes", 24 | "local": "yes", 25 | "name": "Python interpreter", 26 | "sort": 20, 27 | "tags": "compiler,python", 28 | "version_from": [ 3, 5, 0 ] 29 | } 30 | }, 31 | "soft_name": "Python TensorRT library", 32 | "tags": [ 33 | "lib", 34 | "python-package", 35 | "tensorrt", 36 | "trt" 37 | ], 38 | "use_customize_script_from_another_entry": { 39 | "data_uoa": "4460bdb0ade2a3df", 40 | "module_uoa": "soft" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling/LICENSE: -------------------------------------------------------------------------------- 1 | BSD LICENSE. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted 4 | provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions 7 | and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 10 | and the following disclaimer in the documentation and/or other materials provided with the 11 | distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR 14 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 16 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 18 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 19 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF 20 | THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 21 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling/LICENSE: -------------------------------------------------------------------------------- 1 | BSD LICENSE. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted 4 | provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions 7 | and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 10 | and the following disclaimer in the documentation and/or other materials provided with the 11 | distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR 14 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 16 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 18 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 19 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF 20 | THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 21 | -------------------------------------------------------------------------------- /package/lib-jetson-inference-dividiti-master-cuda/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "extra_dir": "", 5 | "git_src_dir": "src", 6 | "install_env": { 7 | "JETSON_BRANCH": "master", 8 | "JETSON_URL": "http://github.com/ctuning/jetson-inference", 9 | "NVCCFLAGS": "-Wno-deprecated-gpu-targets" 10 | }, 11 | "skip_file_check": "yes", 12 | "use_git_revision": "yes", 13 | "version": "trunk" 14 | }, 15 | "deps": { 16 | "compiler": { 17 | "local": "yes", 18 | "name": "C++ compiler", 19 | "sort": 10, 20 | "tags": "compiler,lang-cpp" 21 | }, 22 | "compiler-extra": { 23 | "add_to_path": "yes", 24 | "local": "yes", 25 | "name": "CUDA compiler", 26 | "sort": 20, 27 | "tags": "compiler,lang-c-cuda" 28 | }, 29 | "cmake": { 30 | "local": "yes", 31 | "name": "cmake", 32 | "sort": 30, 33 | "tags": "tool,cmake" 34 | } 35 | }, 36 | "end_full_path": { 37 | "linux": "lib/libjetson-inference.so" 38 | }, 39 | "need_cpu_info": "yes", 40 | "only_for_host_os_tags": [ 41 | "linux" 42 | ], 43 | "only_for_target_os_tags": [ 44 | "linux" 45 | ], 46 | "process_script": "install", 47 | "soft_uoa": "428380f09d31096d", 48 | "suggested_path": "lib-jetson-inference-dividiti", 49 | "tags": [ 50 | "lib", 51 | "jetson-inference", 52 | "dividiti", 53 | "vmaster", 54 | "vcuda" 55 | ], 56 | "use_scripts_from_another_entry": { 57 | "data_uoa": "0ba9e4ed581ffa1e" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /package/lib-jetson-inference-dusty-nv-master-cuda/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "extra_dir": "", 5 | "git_src_dir": "src", 6 | "install_env": { 7 | "JETSON_BRANCH": "master", 8 | "JETSON_URL": "https://github.com/dusty-nv/jetson-inference", 9 | "NVCCFLAGS": "-Wno-deprecated-gpu-targets" 10 | }, 11 | "skip_file_check": "yes", 12 | "use_git_revision": "yes", 13 | "version": "trunk" 14 | }, 15 | "deps": { 16 | "compiler": { 17 | "local": "yes", 18 | "name": "C++ compiler", 19 | "sort": 10, 20 | "tags": "compiler,lang-cpp" 21 | }, 22 | "compiler-extra": { 23 | "add_to_path": "yes", 24 | "local": "yes", 25 | "name": "CUDA compiler", 26 | "sort": 20, 27 | "tags": "compiler,lang-c-cuda" 28 | }, 29 | "cmake": { 30 | "local": "yes", 31 | "name": "cmake", 32 | "sort": 30, 33 | "tags": "tool,cmake" 34 | }, 35 | "tensorrt": { 36 | "local": "yes", 37 | "name": "TensorRT engine", 38 | "sort": 40, 39 | "tags": "lib,tensorrt" 40 | } 41 | }, 42 | "end_full_path": { 43 | "linux": "lib/libjetson-inference.so" 44 | }, 45 | "need_cpu_info": "yes", 46 | "only_for_host_os_tags": [ 47 | "linux" 48 | ], 49 | "only_for_target_os_tags": [ 50 | "linux" 51 | ], 52 | "process_script": "install", 53 | "soft_uoa": "428380f09d31096d", 54 | "suggested_path": "lib-jetson-inference-nvidia", 55 | "tags": [ 56 | "lib", 57 | "jetson-inference", 58 | "dusty-nv", 59 | "nvidia", 60 | "vmaster", 61 | "vcuda" 62 | ] 63 | } 64 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 cTuning foundation . 2 | Copyright (c) 2019-2020 dividiti Limited. 3 | All rights reserved 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the cTuning foundation 16 | nor the names of its contributors may be used to endorse 17 | or promote products derived from this software without 18 | specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /program/image-classification-tensorrt-py/README.md: -------------------------------------------------------------------------------- 1 | # Image Classification - TensorRT-Python program 2 | 3 | The instructions below have been tested on a Jetson TX1 board with JetPack 4.2.2 installed via the NVIDIA SDK Manager. 4 | 5 | ## Convert TF model to ONNX model 6 | 7 | When installing a Jetpack via the NVIDIA SDK Manager, tick the TensorFlow option. 8 | For JetPack 4.2.2, this installs TensorFlow 1.14.0. 9 | 10 | ### Detect TensorFlow 11 | ``` 12 | $ ck detect soft:lib.tensorflow --full_path=/usr/local/lib/python3.6/dist-packages/tensorflow/__init__.py 13 | ``` 14 | 15 | ### Install ONNX from source (with the ProtoBuf compiler dependency) 16 | ``` 17 | $ ck install package --tags=lib,python-package,onnx,from-source 18 | ``` 19 | 20 | ### Install TF-to-ONNX converter (of a known good version) 21 | ``` 22 | $ ck install package --tags=lib,python-package,tf2onnx --force_version=1.5.1 23 | ``` 24 | **NB:** Both 1.5.2. and 1.5.3 can be installed but fail to convert ResNet to ONNX on TX1. 25 | 26 | ### Convert TF to ONNX 27 | ``` 28 | $ ck install package --tags=model,resnet,onnx,converted-from-tf 29 | ``` 30 | 31 | ### Convert ONNX to TensorRT 32 | 33 | When converting an ONNX model to TensorRT, you can select the numerical data type (`fp32` or `fp16`) 34 | and the maximum batch size (currently `1 .. 20`). 35 | 36 | #### `precision=fp32`, `max_batch_size=1` 37 | ``` 38 | $ ck install package --tags=model,resnet,tensorrt,converted-from-onnx 39 | ``` 40 | 41 | #### `precision=fp16`, `max_batch_size=1` 42 | ``` 43 | $ ck install package --tags=model,resnet,tensorrt,converted-from-onnx,fp16 44 | ``` 45 | 46 | #### `precision=fp32`, `max_batch_size=2` 47 | ``` 48 | $ ck install package --tags=model,resnet,tensorrt,converted-from-onnx,fp32,maxbatch.2 49 | ``` 50 | 51 | #### `precision=fp16`, `max_batch_size=2` 52 | ``` 53 | $ ck install package --tags=model,resnet,tensorrt,converted-from-onnx,fp16,maxbatch.2 54 | ``` 55 | -------------------------------------------------------------------------------- /soft/plugin.tensorrt.nms/customize.py: -------------------------------------------------------------------------------- 1 | # 2 | # Collective Knowledge (individual environment - setup) 3 | # 4 | # See CK LICENSE.txt for licensing details 5 | # See CK COPYRIGHT.txt for copyright details 6 | # 7 | # Developer: Leo Gordon, leo@dividiti.com 8 | # 9 | # An example of a minimalistic soft CK entry: just record the path and move on 10 | 11 | 12 | ############################################################################## 13 | # setup environment setup 14 | 15 | def setup(i): 16 | """ 17 | Input: { 18 | cfg - meta of this soft entry 19 | self_cfg - meta of module soft 20 | ck_kernel - import CK kernel module (to reuse functions) 21 | 22 | host_os_uoa - host OS UOA 23 | host_os_uid - host OS UID 24 | host_os_dict - host OS meta 25 | 26 | target_os_uoa - target OS UOA 27 | target_os_uid - target OS UID 28 | target_os_dict - target OS meta 29 | 30 | target_device_id - target device ID (if via ADB) 31 | 32 | tags - list of tags used to search this entry 33 | 34 | env - updated environment vars from meta 35 | customize - updated customize vars from meta 36 | 37 | deps - resolved dependencies for this soft 38 | 39 | interactive - if 'yes', can ask questions, otherwise quiet 40 | } 41 | 42 | Output: { 43 | return - return code = 0, if successful 44 | > 0, if error 45 | (error) - error text if return > 0 46 | 47 | bat - prepared string for bat file 48 | } 49 | 50 | """ 51 | 52 | env = i.get('env', {}) 53 | cus = i.get('customize',{}) 54 | env_prefix = cus.get('env_prefix','') 55 | full_path = cus.get('full_path','') 56 | 57 | env[env_prefix + '_PATH'] = full_path 58 | 59 | return {'return':0, 'bat':''} 60 | 61 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "extra_dir": "", 5 | "file_with_weights": "squeezenet_v1.0.caffemodel", 6 | "force_ask_path": "yes", 7 | "install_env": { 8 | "MODEL_FILE": "squeezenet_v1.0.caffemodel", 9 | "MODEL_HASH_CALCULATOR": "sha1sum", 10 | "MODEL_HASH_REF": "579d0beb658e43c45937bf8bb5e4034fea4e1f69", 11 | "MODEL_URL": "https://github.com/DeepScale/SqueezeNet/raw/master/SqueezeNet_v1.0/squeezenet_v1.0.caffemodel" 12 | }, 13 | "no_os_in_suggested_path": "yes", 14 | "no_ver_in_suggested_path": "yes", 15 | "params": { 16 | "deploy": { 17 | "substitute": { 18 | "batch_size": 10 19 | }, 20 | "template": "deploy.prototxt" 21 | }, 22 | "train": { 23 | "substitute": { 24 | "train_batch_size": 32, 25 | "val_batch_size": 25 26 | }, 27 | "template": "train_val.prototxt" 28 | }, 29 | "val": { 30 | "accuracy_layers": [ 31 | "accuracy", 32 | "accuracy_top5" 33 | ], 34 | "substitute": { 35 | "train_batch_size": 32, 36 | "val_batch_size": 25 37 | }, 38 | "template": "train_val.prototxt" 39 | } 40 | }, 41 | "skip_file_check": "yes", 42 | "version": "1.0" 43 | }, 44 | "end_full_path": { 45 | "linux": "squeezenet_v1.0.caffemodel", 46 | "win": "squeezenet_v1.0.caffemodel" 47 | }, 48 | "features": { 49 | "accuracy": 0.576801, 50 | "accuracy_top5": 0.803903 51 | }, 52 | "only_for_host_os_tags": [ 53 | "windows", 54 | "linux" 55 | ], 56 | "only_for_target_os_tags": [ 57 | "windows", 58 | "linux" 59 | ], 60 | "package_extra_name": " (deepscale, squeezenet, 1.0)", 61 | "process_script": "download", 62 | "soft_uoa": "e2400aabc11d6cd1", 63 | "suggested_path": "caffemodel-deepscale-squeezenet-1.0", 64 | "tags": [ 65 | "caffe", 66 | "weights", 67 | "caffemodel", 68 | "deepscale", 69 | "squeezenet", 70 | "v1.0", 71 | "v1", 72 | "explicit-window-global-pooling" 73 | ], 74 | "use_scripts_from_another_entry": { 75 | "data_uoa": "download-caffemodel", 76 | "module_uoa": "script", 77 | "repo_uoa": "ck-caffe" 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "extra_dir": "", 5 | "file_with_weights": "squeezenet_v1.1.caffemodel", 6 | "force_ask_path": "yes", 7 | "install_env": { 8 | "MODEL_FILE": "squeezenet_v1.1.caffemodel", 9 | "MODEL_HASH_CALCULATOR": "sha1sum", 10 | "MODEL_HASH_REF": "3397f026368a45ae236403ccc81cfcbe8ebe1bd0", 11 | "MODEL_URL": "https://github.com/DeepScale/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel" 12 | }, 13 | "no_os_in_suggested_path": "yes", 14 | "no_ver_in_suggested_path": "yes", 15 | "params": { 16 | "deploy": { 17 | "substitute": { 18 | "batch_size": 10 19 | }, 20 | "template": "deploy.prototxt" 21 | }, 22 | "train": { 23 | "substitute": { 24 | "train_batch_size": 32, 25 | "val_batch_size": 25 26 | }, 27 | "template": "train_val.prototxt" 28 | }, 29 | "val": { 30 | "accuracy_layers": [ 31 | "accuracy", 32 | "accuracy_top5", 33 | "loss" 34 | ], 35 | "substitute": { 36 | "train_batch_size": 32, 37 | "val_batch_size": 25 38 | }, 39 | "template": "train_val.prototxt" 40 | } 41 | }, 42 | "skip_file_check": "yes", 43 | "version": "1.1" 44 | }, 45 | "end_full_path": { 46 | "linux": "squeezenet_v1.1.caffemodel", 47 | "win": "squeezenet_v1.1.caffemodel" 48 | }, 49 | "features": { 50 | "accuracy": 0.58388, 51 | "accuracy_top5": 0.810123 52 | }, 53 | "only_for_host_os_tags": [ 54 | "windows", 55 | "linux" 56 | ], 57 | "only_for_target_os_tags": [ 58 | "windows", 59 | "linux" 60 | ], 61 | "package_extra_name": " (deepscale, squeezenet, 1.1)", 62 | "process_script": "download", 63 | "soft_uoa": "e2400aabc11d6cd1", 64 | "suggested_path": "caffemodel-deepscale-squeezenet-1.1", 65 | "tags": [ 66 | "caffe", 67 | "weights", 68 | "caffemodel", 69 | "deepscale", 70 | "squeezenet", 71 | "v1.1", 72 | "v1", 73 | "explicit-window-global-pooling" 74 | ], 75 | "use_scripts_from_another_entry": { 76 | "data_uoa": "download-caffemodel", 77 | "module_uoa": "script", 78 | "repo_uoa": "ck-caffe" 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-caffe/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "install_env": { 5 | "PACKAGE_NAME": "converted_model.trt", 6 | "PACKAGE_VERSION": "1" 7 | }, 8 | "no_os_in_suggested_path": "yes", 9 | "no_ver_in_suggested_path": "yes", 10 | "package_name": "TensorRT-from-Caffe model", 11 | "skip_file_check": "yes" 12 | }, 13 | "deps": { 14 | "lib-python-tensorrt": { 15 | "local": "yes", 16 | "name": "Python TensorRT library", 17 | "sort": 20, 18 | "tags": "lib,python-package,tensorrt" 19 | }, 20 | "model-source": { 21 | "local": "yes", 22 | "name": "Original Caffe model", 23 | "sort": 10, 24 | "tags": "caffe,model", 25 | "update_tags_if_env" : { 26 | "image-classification,mobilenet": [ { "_MODEL_TO_CONVERT": "mobilenet_v1_1.0_224" } ], 27 | "image-classification,resnet": [ { "_MODEL_TO_CONVERT": "resnet50_v1" } ] 28 | } 29 | } 30 | }, 31 | "end_full_path_universal": "converted_model.trt", 32 | "process_script": "install", 33 | "soft_uoa": "model.tensorrt", 34 | "suggested_path": "model-tensorrt-converted-from-caffe", 35 | "tags": [ 36 | "model", 37 | "image-classification", 38 | "tensorrt", 39 | "trt", 40 | "converted", 41 | "converted-from-caffe" 42 | ], 43 | "variations": { 44 | "maxbatch.1": { 45 | "on_by_default": "yes", 46 | "extra_env": { 47 | "ML_MODEL_MAX_BATCH_SIZE": "1" 48 | } 49 | }, 50 | "maxbatch.5": { 51 | "extra_env": { 52 | "ML_MODEL_MAX_BATCH_SIZE": "5" 53 | } 54 | }, 55 | "maxbatch.10": { 56 | "extra_env": { 57 | "ML_MODEL_MAX_BATCH_SIZE": "10" 58 | } 59 | }, 60 | "fp32": { 61 | "on_by_default": "yes", 62 | "extra_env": { 63 | "ML_MODEL_DATA_TYPE": "fp32", 64 | "ML_MODEL_INPUT_DATA_TYPE": "float32" 65 | } 66 | }, 67 | "fp16": { 68 | "extra_env": { 69 | "ML_MODEL_DATA_TYPE": "fp16", 70 | "ML_MODEL_INPUT_DATA_TYPE": "float32" 71 | } 72 | }, 73 | "mobilenet": { 74 | "on_by_default": "yes", 75 | "extra_env": { 76 | "_MODEL_TO_CONVERT": "mobilenet_v1_1.0_224", 77 | "ML_MODEL_DATA_LAYOUT": "NCHW", 78 | "ML_MODEL_NORMALIZE_DATA": "YES", 79 | "ML_MODEL_SUBTRACT_MEAN": "NO", 80 | "ML_MODEL_COLOUR_CHANNELS_BGR": "YES" 81 | }, 82 | "extra_customize": { 83 | "package_name": "TensorRT-from-Caffe model (Mobilenet)" 84 | } 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /program/tensorrt-time/postprocess.py: -------------------------------------------------------------------------------- 1 | # 2 | # Convert raw output of the tensorrt-time 3 | # program to the CK format. 4 | # 5 | # Developers: 6 | # - Anton Lokhmotov, dividiti, 2016 7 | # 8 | 9 | import json 10 | import os 11 | import re 12 | import sys 13 | 14 | def ck_postprocess(i): 15 | ck=i['ck_kernel'] 16 | rt=i['run_time'] 17 | env=i.get('env',{}) 18 | deps=i.get('deps',{}) 19 | 20 | d={} 21 | 22 | # Collect env vars of interest. 23 | d['REAL_ENV_CK_CAFFE_MODEL']=env.get('CK_CAFFE_MODEL','') 24 | 25 | # Load tensorrt-time profiling output. 26 | # TODO: Read from program meta run_vars['CK_TENSORRT_CJSON_PATH']. 27 | r=ck.load_json_file({'json_file':'profiler.json'}) 28 | if r['return']>0: return r 29 | 30 | # Update layer info similarly to Caffe output. 31 | d['per_layer_info'] = r['dict']['per_layer_info'] 32 | time_fw_ms = 0.0 33 | for layer_info in d['per_layer_info']: 34 | time_fw_ms += layer_info['time_ms'] 35 | # Update optional keys for compatibility with CK-Caffe. 36 | layer_info['time_s'] = layer_info['time_ms'] * 1e-3 37 | layer_info['label'] = '%02d: %s' % (layer_info['index'], layer_info['name']) 38 | layer_info['timestamp'] = '0101 00:00:00.000000' # FIXME: Add proper timestamp. 39 | layer_info['direction'] = 'forward' 40 | 41 | # Execution time (ms). 42 | d['time_fw_ms'] = time_fw_ms 43 | d['time_bw_ms'] = 0.0 44 | d['time_fwbw_ms'] = d['time_fw_ms'] + d['time_bw_ms'] 45 | d['time_total_ms'] = d['time_fwbw_ms'] 46 | d['time_total_ms_kernel_0'] = d['time_total_ms'] 47 | # Execution time (s). 48 | d['time_fw_s'] = d['time_fw_ms'] * 1e-3 49 | d['time_bw_s'] = d['time_bw_ms'] * 1e-3 50 | d['time_fwbw_s'] = d['time_fwbw_ms'] * 1e-3 51 | d['time_total_s'] = d['time_total_ms'] * 1e-3 52 | d['time_total_s_kernel_0'] = d['time_total_ms_kernel_0'] * 1e-3 53 | 54 | # FIXME: Add memory consumption. 55 | memory_bytes = 0 56 | d['memory_bytes'] = memory_bytes 57 | d['memory_kbytes'] = memory_bytes * 1e-3 58 | d['memory_mbytes'] = memory_bytes * 1e-6 59 | 60 | # Built-in CK keys. 61 | d['execution_time'] = d['time_total_s'] 62 | d['post_processed'] = 'yes' 63 | 64 | rr={} 65 | rr['return']=0 66 | if d.get('post_processed','')=='yes': 67 | r=ck.save_json_to_file({'json_file':'results.json', 'dict':d}) 68 | if r['return']>0: return r 69 | else: 70 | rr['error']='failed to match best prediction in tensorrt-time output!' 71 | rr['return']=1 72 | 73 | return rr 74 | 75 | # Do not add anything here! 76 | -------------------------------------------------------------------------------- /program/image-classification-tensorrt-py/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "b244e68887347d16", 3 | "build_compiler_vars": {}, 4 | "data_name": "image-classification-tensorrt-py", 5 | "main_language": "python", 6 | "no_compile": "yes", 7 | "no_target_file": "yes", 8 | "process_in_tmp": "yes", 9 | "program": "yes", 10 | "run_cmds": { 11 | "default": { 12 | "ignore_return_code": "no", 13 | "run_time": { 14 | "fine_grain_timer_file": "tmp-ck-timer.json", 15 | "post_process_via_ck": { 16 | "data_uoa": "b98ee24399ef4c3a", 17 | "module_uoa": "script", 18 | "script_name": "postprocess" 19 | }, 20 | "run_cmd_main": "$<>$ ../tensorrt_classify_preprocessed.py" 21 | } 22 | } 23 | }, 24 | "run_deps": { 25 | "imagenet-aux": { 26 | "force_target_as_host": "yes", 27 | "local": "yes", 28 | "name": "ImageNet dataset (aux)", 29 | "sort": 10, 30 | "tags": "dataset,imagenet,aux" 31 | }, 32 | "images": { 33 | "force_target_as_host": "yes", 34 | "local": "yes", 35 | "name": "ImageNet dataset (preprocessed subset)", 36 | "sort": 20, 37 | "tags": "dataset,imagenet,preprocessed" 38 | }, 39 | "imagenet-helper": { 40 | "local": "yes", 41 | "name": "Python ImageNet helper functions and metadata", 42 | "sort": 35, 43 | "tags": "lib,python-package,imagenet-helper" 44 | }, 45 | "lib-python-numpy": { 46 | "local": "yes", 47 | "name": "Python NumPy library", 48 | "sort": 40, 49 | "tags": "lib,python-package,numpy" 50 | }, 51 | "lib-python-pycuda": { 52 | "local": "yes", 53 | "name": "Python PyCUDA library", 54 | "sort": 50, 55 | "tags": "lib,python-package,pycuda" 56 | }, 57 | "lib-python-tensorrt": { 58 | "local": "yes", 59 | "name": "Python TensorRT library", 60 | "sort": 60, 61 | "tags": "lib,python-package,tensorrt" 62 | }, 63 | "tensorrt-helper": { 64 | "local": "yes", 65 | "name": "Python TensorRT helper functions and metadata", 66 | "sort": 70, 67 | "tags": "lib,python-package,tensorrt-helper" 68 | }, 69 | "weights": { 70 | "local": "yes", 71 | "name": "TensorRT model", 72 | "sort": 30, 73 | "tags": "tensorrt,model,image-classification" 74 | } 75 | }, 76 | "run_vars": { 77 | "CK_BATCH_COUNT": 1, 78 | "CK_BATCH_SIZE": 1, 79 | "CK_RESULTS_DIR": "predictions", 80 | "CK_SILENT_MODE": 0, 81 | "CK_SKIP_IMAGES": 0 82 | }, 83 | "tags": [ 84 | "image-classification", 85 | "tensorrt", 86 | "trt", 87 | "standalone", 88 | "lang-python" 89 | ] 90 | } 91 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Collective Knowledge repository for collaboratively benchmarking and optimising embedded deep vision runtime library for Jetson TX1 2 | 3 | *This fork is maintained by [dividiti Limited](https://dividiti.com).* 4 | 5 | [![compatibility](https://github.com/ctuning/ck-guide-images/blob/master/ck-compatible.svg)](https://github.com/ctuning/ck) 6 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 7 | 8 | ## Introduction 9 | 10 | [CK-TensorRT](https://github.com/ctuning/ck-tensorrt) is an open framework for 11 | collaborative and reproducible optimisation of convolutional neural networks for Jetson TX1 12 | based on the [Collective Knowledge](http://cknowledge.org) framework. 13 | It's based on the [Deep Inference](https://github.com/dusty-nv/jetson-inference) framework from 14 | Dustin Franklin (a [Jetson developer @ NVIDIA](https://github.com/dusty-nv)). 15 | In essence, CK-TensorRT is simply a suite of convenient wrappers with unified JSON API 16 | for customizable building, evaluating and multi-objective optimisation 17 | of Jetson Inference runtime library for Jetson TX1. 18 | 19 | ## Authors/contributors 20 | 21 | * Anton Lokhmotov, [dividiti](http://dividiti.com) 22 | * Daniil Efremov, [xored](http://xored.com) 23 | 24 | ## Quick installation on Ubuntu 25 | 26 | TBD 27 | 28 | ### Installing general dependencies 29 | 30 | ``` 31 | $ sudo apt install coreutils \ 32 | build-essential \ 33 | make \ 34 | cmake \ 35 | wget \ 36 | git \ 37 | python \ 38 | python-pip 39 | ``` 40 | 41 | ### Installing CK-TensorRT dependencies 42 | ``` 43 | $ sudo apt install libqt4-dev \ 44 | libglew-dev \ 45 | libgstreamer1.0-dev 46 | ``` 47 | 48 | ### Installing CK 49 | 50 | ``` 51 | $ sudo pip install ck 52 | $ ck version 53 | ``` 54 | 55 | ### Installing CK-TensorRT repository 56 | 57 | ``` 58 | $ ck pull repo:ck-tensorrt 59 | ``` 60 | 61 | ### Building CK-TensorRT and all dependencies via CK 62 | 63 | The first time you run a TensorRT program (e.g. `tensorrt-test`), CK will 64 | build and install all missing dependencies on your machine, 65 | download the required data sets and start the benchmark: 66 | 67 | ``` 68 | $ ck run program:tensorrt-test 69 | ``` 70 | 71 | ## Related projects and initiatives 72 | 73 | We are working with the community to unify and crowdsource performance analysis 74 | and tuning of various DNN frameworks (or any realistic workload) 75 | using the Collective Knowledge Technology: 76 | * [Open repository of AI, ML, and systems knowledge](https://cKnowledge.io) 77 | * [CK-Caffe](https://github.com/dividiti/ck-caffe) 78 | * [CK-Caffe2](https://github.com/ctuning/ck-caffe2) 79 | * [Android app for DNN crowd-benchmarking and crowd-tuning]( https://cKnowledge.org/android-apps.html ) 80 | -------------------------------------------------------------------------------- /package/model-tensorrt-download-for.gtx1080/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "extra_dir": "", 5 | "install_env": { 6 | "ML_MODEL_COLOUR_CHANNELS_BGR": "NO", 7 | "ML_MODEL_DATA_LAYOUT": "NCHW", 8 | "ML_MODEL_DATA_TYPE": "int8", 9 | "ML_MODEL_INPUT_DATA_TYPE": "int8", 10 | "PACKAGE_SKIP_CLEAN_PACKAGE": "YES", 11 | "PACKAGE_SKIP_LINUX_MAKE": "YES", 12 | "PACKAGE_URL": "https://www.dropbox.com/s/f3v6mxi721b5lti/", 13 | "PACKAGE_WGET": "YES" 14 | }, 15 | "no_os_in_suggested_path": "yes", 16 | "no_ver_in_suggested_path": "yes", 17 | "skip_file_check": "no" 18 | }, 19 | "end_full_path_universal": [ 20 | "mobilenet-MultiStream-dla-b26-int8.plan", 21 | "mobilenet-MultiStream-gpu-b250-int8.plan", 22 | "resnet-MultiStream-dla-b15-int8.plan" 23 | ], 24 | "process_script": "install", 25 | "soft_uoa": "ce8554cdf428aa14", 26 | "suggested_path": "model-tensorrt-downloaded-for.gtx1080", 27 | "tags": [ 28 | "image-classification", 29 | "int8", 30 | "linear", 31 | "mlperf", 32 | "model", 33 | "trt", 34 | "tensorrt", 35 | "tensorrt.6", 36 | "converted-by.nvidia", 37 | "for.gtx1080", 38 | "nchw", 39 | "rgb", 40 | "downloaded" 41 | ], 42 | "use_scripts_from_another_entry": { 43 | "data_uoa": "download-and-install-package", 44 | "module_uoa": "script" 45 | }, 46 | "variations": { 47 | "mobilenet": { 48 | "extra_env": { 49 | "ML_MODEL_GIVEN_CHANNEL_MEANS": "128 128 128", 50 | "ML_MODEL_IMAGE_HEIGHT": 224, 51 | "ML_MODEL_IMAGE_WIDTH": 224, 52 | "ML_MODEL_MAX_BATCH_SIZE": 250, 53 | "ML_MODEL_NORMALIZE_DATA": "NO", 54 | "ML_MODEL_SUBTRACT_MEAN": "YES", 55 | "PACKAGE_URL": "https://www.dropbox.com/s/9glkuj23hedhhlv/", 56 | "PACKAGE_NAME": "mobilenet-MultiStream-gpu-b250-int8.plan", 57 | "PACKAGE_VERSION": "0.5.mobilenet_b250_int8" 58 | }, 59 | "extra_tags": [ 60 | "image-classification", 61 | "maxbatch.250", 62 | "side.224" 63 | ], 64 | "on_by_default": "no" 65 | }, 66 | "resnet": { 67 | "extra_env": { 68 | "ML_MODEL_GIVEN_CHANNEL_MEANS": "123.68 116.78 103.94", 69 | "ML_MODEL_IMAGE_HEIGHT": 224, 70 | "ML_MODEL_IMAGE_WIDTH": 224, 71 | "ML_MODEL_MAX_BATCH_SIZE": 15, 72 | "ML_MODEL_NORMALIZE_DATA": "NO", 73 | "ML_MODEL_SUBTRACT_MEAN": "YES", 74 | "PACKAGE_URL": "https://www.dropbox.com/s/f3v6mxi721b5lti/", 75 | "PACKAGE_NAME": "resnet-MultiStream-dla-b15-int8.plan", 76 | "PACKAGE_VERSION": "0.5.resnet_b15_int8" 77 | }, 78 | "extra_tags": [ 79 | "image-classification", 80 | "resnet50", 81 | "maxbatch.15", 82 | "side.224" 83 | ], 84 | "on_by_default": "yes" 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /soft/model.tensorrt/customize.py: -------------------------------------------------------------------------------- 1 | # 2 | # Collective Knowledge (individual environment - setup) 3 | # 4 | # See CK LICENSE.txt for licensing details 5 | # See CK COPYRIGHT.txt for copyright details 6 | # 7 | # Developer: Leo Gordon, dividiti 8 | # 9 | 10 | import os 11 | 12 | ############################################################################## 13 | # setup environment setup 14 | 15 | def setup(i): 16 | """ 17 | Input: { 18 | cfg - meta of this soft entry 19 | self_cfg - meta of module soft 20 | ck_kernel - import CK kernel module (to reuse functions) 21 | 22 | host_os_uoa - host OS UOA 23 | host_os_uid - host OS UID 24 | host_os_dict - host OS meta 25 | 26 | target_os_uoa - target OS UOA 27 | target_os_uid - target OS UID 28 | target_os_dict - target OS meta 29 | 30 | target_device_id - target device ID (if via ADB) 31 | 32 | tags - list of tags used to search this entry 33 | 34 | env - updated environment vars from meta 35 | customize - updated customize vars from meta 36 | 37 | deps - resolved dependencies for this soft 38 | 39 | interactive - if 'yes', can ask questions, otherwise quiet 40 | } 41 | 42 | Output: { 43 | return - return code = 0, if successful 44 | > 0, if error 45 | (error) - error text if return > 0 46 | 47 | bat - prepared string for bat file 48 | } 49 | 50 | """ 51 | 52 | import os 53 | 54 | ck = i['ck_kernel'] 55 | cus = i.get('customize',{}) 56 | full_path = cus.get('full_path','') 57 | env = i['env'] 58 | install_root = os.path.dirname(full_path) 59 | install_env = cus.get('install_env', {}) 60 | env_prefix = cus['env_prefix'] 61 | 62 | env[env_prefix + '_ROOT'] = install_root 63 | env[env_prefix + '_FILENAME'] = full_path 64 | 65 | # This group should end with _FILE prefix e.g. FLATLABELS_FILE 66 | # This suffix will be cut off and prefixed by cus['env_prefix'] 67 | # so we'll get vars like CK_ENV_TENSORRT_MODEL_FLATLABELS_FILE 68 | for varname in install_env.keys(): 69 | if varname.endswith('_FILE'): 70 | file_path = os.path.join(install_root, install_env[varname]) 71 | if os.path.exists(file_path): 72 | env[env_prefix + '_' + varname] = file_path 73 | 74 | # Just copy those without any change in the name: 75 | # 76 | for varname in install_env.keys(): 77 | if varname.startswith('ML_MODEL_'): 78 | env[varname] = install_env[varname] 79 | 80 | return {'return':0, 'bat':''} 81 | -------------------------------------------------------------------------------- /soft/model.tensorrt/README.md: -------------------------------------------------------------------------------- 1 | Usage examples: 2 | 3 | ```bash 4 | ck detect soft:model.tensorrt --full_path=/full/path/to/ResNet50_model_fp32.trt \ 5 | --extra_tags=fp32,resnet,resnet50,image-classification,converted-from-caffe \ 6 | --cus.version=resnet_caffe_fp32 \ 7 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=YES \ 8 | --ienv.ML_MODEL_IMAGE_HEIGHT=224 \ 9 | --ienv.ML_MODEL_IMAGE_WIDTH=224 \ 10 | --ienv.ML_MODEL_INPUT_DATA_TYPE=float32 \ 11 | --ienv.ML_MODEL_DATA_TYPE=fp32 \ 12 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 13 | --ienv.ML_MODEL_NORMALIZE_DATA=NO \ 14 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 15 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="123.68 116.78 103.94" 16 | ``` 17 | 18 | ```bash 19 | ck detect soft:model.tensorrt --full_path=/full/path/to/resnet-MultiStream-dla-b15-int8.plan \ 20 | --extra_tags=maxbatch.15,int8,resnet,resnet50,dla,image-classification,converted-by-nvidia \ 21 | --cus.version=resnet_nvidia_int8 \ 22 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 23 | --ienv.ML_MODEL_IMAGE_HEIGHT=224 \ 24 | --ienv.ML_MODEL_IMAGE_WIDTH=224 \ 25 | --ienv.ML_MODEL_INPUT_DATA_TYPE=int8 \ 26 | --ienv.ML_MODEL_DATA_TYPE=int8 \ 27 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 28 | --ienv.ML_MODEL_NORMALIZE_DATA=NO \ 29 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 30 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="123.68 116.78 103.94" \ 31 | --ienv.ML_MODEL_MAX_BATCH_SIZE=15 32 | ``` 33 | 34 | ```bash 35 | ck detect soft:model.tensorrt --full_path=/full/path/to/mobilenet-MultiStream-gpu-b250-int8.plan \ 36 | --extra_tags=maxbatch.250,int8,mobilenet,gpu,image-classification,converted-by-nvidia \ 37 | --cus.version=mobilenet_nvidia_int8 \ 38 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 39 | --ienv.ML_MODEL_IMAGE_HEIGHT=224 \ 40 | --ienv.ML_MODEL_IMAGE_WIDTH=224 \ 41 | --ienv.ML_MODEL_INPUT_DATA_TYPE=int8 \ 42 | --ienv.ML_MODEL_DATA_TYPE=int8 \ 43 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 44 | --ienv.ML_MODEL_NORMALIZE_DATA=NO \ 45 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 46 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="128 128 128" \ 47 | --ienv.ML_MODEL_MAX_BATCH_SIZE=250 48 | ``` 49 | 50 | ```bash 51 | ck detect soft:model.tensorrt --full_path=/full/path/to/ssd-mobilenet-MultiStream-b20-fp16.plan \ 52 | --extra_tags=maxbatch.20,fp16,ssd-mobilenet,object-detection,converted-by-nvidia \ 53 | --cus.version=ssd-mobilenet_nvidia_fp16 \ 54 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 55 | --ienv.ML_MODEL_IMAGE_HEIGHT=300 \ 56 | --ienv.ML_MODEL_IMAGE_WIDTH=300 \ 57 | --ienv.ML_MODEL_INPUT_DATA_TYPE=fp32 \ 58 | --ienv.ML_MODEL_DATA_TYPE=float16 \ 59 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 60 | --ienv.ML_MODEL_NORMALIZE_DATA=YES \ 61 | --ienv.ML_MODEL_SUBTRACT_MEAN=NO \ 62 | --ienv.ML_MODEL_MAX_BATCH_SIZE=20 \ 63 | --ienv.ML_MODEL_CLASS_LABELS=/full/path/to/coco_class_labels.txt \ 64 | --ienv.ML_MODEL_TENSORRT_PLUGIN=/full/path/to/libnmsoptplugin.so 65 | ``` 66 | 67 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-caffe/caffe2tensorrt_model_converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ This is a standalone script for converting Caffe model files into TensorRT model files 4 | 5 | Author: Leo Gordon (dividiti) 6 | """ 7 | 8 | 9 | import argparse 10 | import tensorrt as trt 11 | 12 | 13 | def convert_caffe_model_to_trt(caffe_weights_file, caffe_deploy_file, trt_model_filename, 14 | output_tensor_name, output_data_type, max_workspace_size, max_batch_size): 15 | "Convert a pair of (caffe_weights_file,caffe_deploy_file) into a trt_model_file using the given parameters" 16 | 17 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING) 18 | 19 | with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: 20 | 21 | if (output_data_type=='fp16'): 22 | if not builder.platform_has_fast_fp16: 23 | print('Warning: This platform is not optimized for fast fp16 mode') 24 | 25 | builder.fp16_mode = True 26 | print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) 27 | else: 28 | print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) 29 | 30 | builder.max_workspace_size = max_workspace_size 31 | builder.max_batch_size = max_batch_size 32 | 33 | model_tensors = parser.parse(deploy=caffe_deploy_file, model=caffe_weights_file, network=network, dtype=trt.float32) 34 | network.mark_output(model_tensors.find(output_tensor_name)) 35 | 36 | trt_model_object = builder.build_cuda_engine(network) 37 | 38 | try: 39 | serialized_trt_model = trt_model_object.serialize() 40 | with open(trt_model_filename, "wb") as trt_model_file: 41 | trt_model_file.write(serialized_trt_model) 42 | except: 43 | print('Error: cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) 44 | 45 | def main(): 46 | "Parse command line and feed the conversion function" 47 | 48 | arg_parser = argparse.ArgumentParser() 49 | arg_parser.add_argument('caffe_weights_file', type=str, help='Caffe model weights file') 50 | arg_parser.add_argument('caffe_deploy_file', type=str, help='Caffe model deploy file') 51 | arg_parser.add_argument('trt_model_filename', type=str, help='TensorRT model file') 52 | arg_parser.add_argument('--output_tensor_name', type=str, default='prob', help='Output tensor type') 53 | arg_parser.add_argument('--output_data_type', type=str, default='fp32', help='Model data type') 54 | arg_parser.add_argument('--max_workspace_size', type=int, default=(1<<30), help='Builder workspace size') 55 | arg_parser.add_argument('--max_batch_size', type=int, default=1, help='Builder batch size') 56 | args = arg_parser.parse_args() 57 | 58 | convert_caffe_model_to_trt( args.caffe_weights_file, args.caffe_deploy_file, args.trt_model_filename, 59 | args.output_tensor_name, args.output_data_type, args.max_workspace_size, args.max_batch_size ) 60 | 61 | main() 62 | 63 | -------------------------------------------------------------------------------- /program/tensorrt-time/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "5e23a492b9138354", 3 | "build_compiler_vars": { 4 | "CK_TENSORRT_ENABLE_CJSON": 1, 5 | "CK_TENSORRT_ENABLE_XOPENME": 0 6 | }, 7 | "compile_deps": { 8 | "compiler": { 9 | "local": "yes", 10 | "name": "C++ compiler", 11 | "sort": 10, 12 | "tags": "compiler,lang-cpp" 13 | }, 14 | "cuda-compiler": { 15 | "local": "yes", 16 | "name": "CUDA compiler", 17 | "sort": 20, 18 | "tags": "compiler,lang-c-cuda" 19 | }, 20 | "lib-tensorrt": { 21 | "local": "yes", 22 | "name": "TensorRT engine", 23 | "sort": 25, 24 | "tags": "lib,tensorrt" 25 | }, 26 | "lib-cjson": { 27 | "local": "yes", 28 | "name": "cJSON library", 29 | "sort": 30, 30 | "tags": "lib,cjson" 31 | }, 32 | "lib-xopenme": { 33 | "local": "yes", 34 | "name": "xOpenME library", 35 | "sort": 40, 36 | "tags": "lib,xopenme" 37 | }, 38 | "lib-jetson-inference-dusty-nv-master-cuda": { 39 | "local": "yes", 40 | "name": "Jetson Inference lib", 41 | "sort": 25, 42 | "tags": "lib,jetson-inference" 43 | } 44 | }, 45 | "run_deps": { 46 | "caffemodel": { 47 | "force_target_as_host": "yes", 48 | "local": "yes", 49 | "name": "Caffe model (net and weights)", 50 | "sort": 40, 51 | "tags": "caffemodel" 52 | } 53 | }, 54 | "compiler_add_include_as_env_from_deps": [ 55 | "CK_ENV_LIB_STDCPP_INCLUDE", 56 | "CK_ENV_LIB_STDCPP_INCLUDE_EXTRA", 57 | "CK_ENV_LIB_JETSON_INFERENCE_INCLUDE", 58 | "CK_ENV_LIB_TENSORRT_INCLUDE", 59 | "CK_ENV_LIB_TENSORRT_INCLUDE2", 60 | "CK_ENV_COMPILER_CUDA_INCLUDE" 61 | ], 62 | "compiler_env": "CK_CXX", 63 | "compiler_flags_as_env": "$<>$", 64 | "data_name": "tensorrt-time", 65 | "extra_ld_vars": "$<>$/libcudart.so -lnvinfer -lnvcaffe_parser", 66 | "main_language": "cpp", 67 | "print_files_after_run": [], 68 | "process_in_tmp": "yes", 69 | "program": "yes", 70 | "run_cmds": { 71 | "default": { 72 | "ignore_return_code": "no", 73 | "run_time": { 74 | "fine_grain_timer_file": "results.json", 75 | "params": { 76 | "caffemodel_key": "deploy" 77 | }, 78 | "post_process_via_ck": "yes", 79 | "post_process_cmds": [ 80 | "python $#src_path_local#$postprocess.py" 81 | ], 82 | "pre_process_via_ck": { 83 | "data_uoa": "569404c41618603a", 84 | "script_name": "preprocess" 85 | }, 86 | "run_cmd_main": "$#BIN_FILE#$", 87 | "run_cmd_out1": "stdout.log", 88 | "run_cmd_out2": "stderr.log", 89 | "run_correctness_output_files": [ 90 | ], 91 | "run_output_files": [ 92 | "results.json" 93 | ] 94 | } 95 | } 96 | }, 97 | "run_vars": { 98 | "CK_CAFFE_BATCH_SIZE": 2, 99 | "CK_TENSORRT_CJSON_PATH": "profiler.json" 100 | }, 101 | "skip_bin_ext": "yes", 102 | "source_files": [ 103 | "tensorrt-time.cpp" 104 | ], 105 | "tags": [ 106 | "tensorrt-time", 107 | "demo" 108 | ], 109 | "target_file": "tensorrt-time", 110 | "version": "1.0.0" 111 | } 112 | -------------------------------------------------------------------------------- /program/object-detection-tensorrt-py/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "adedfdb48403db8a", 3 | "build_compiler_vars": {}, 4 | "data_name": "object-detection-tensorrt-py", 5 | "main_language": "python", 6 | "no_compile": "yes", 7 | "no_target_file": "yes", 8 | "process_in_tmp": "yes", 9 | "program": "yes", 10 | "run_cmds": { 11 | "default": { 12 | "ignore_return_code": "no", 13 | "run_time": { 14 | "fine_grain_timer_file": "tmp-ck-timer.json", 15 | "post_process_via_ck": { 16 | "data_uoa": "24c98b0cee248d93", 17 | "module_uoa": "script", 18 | "script_name": "iniless_postprocess" 19 | }, 20 | "run_cmd_main": "$<>$ ../tensorrt_detect_preprocessed.py", 21 | "run_make_directories": [ 22 | "detections", 23 | "results" 24 | ], 25 | "run_output_files": [ 26 | "detections", 27 | "tmp-ck-timer.json" 28 | ] 29 | } 30 | } 31 | }, 32 | "run_deps": { 33 | "tool-coco": { 34 | "local": "yes", 35 | "name": "Python API for COCO", 36 | "sort": 10, 37 | "tags": "tool,coco" 38 | }, 39 | "dataset": { 40 | "force_target_as_host": "yes", 41 | "local": "yes", 42 | "name": "Preprocessed subset of COCO dataset", 43 | "sort": 20, 44 | "tags": "dataset,preprocessed,object-detection" 45 | }, 46 | "coco-helper": { 47 | "local": "yes", 48 | "name": "Python COCO helper functions and metadata", 49 | "sort": 35, 50 | "tags": "lib,python-package,coco-helper" 51 | }, 52 | "lib-python-numpy": { 53 | "local": "yes", 54 | "name": "Python NumPy library", 55 | "sort": 40, 56 | "tags": "lib,python-package,numpy" 57 | }, 58 | "lib-python-matplotlib": { 59 | "local": "yes", 60 | "name": "Python Matplotlib library", 61 | "sort": 45, 62 | "tags": "lib,python-package,matplotlib" 63 | }, 64 | "lib-python-pycuda": { 65 | "local": "yes", 66 | "name": "Python PyCUDA library", 67 | "sort": 50, 68 | "tags": "lib,python-package,pycuda" 69 | }, 70 | "lib-python-tensorrt": { 71 | "local": "yes", 72 | "name": "Python TensorRT library", 73 | "sort": 60, 74 | "tags": "lib,python-package,tensorrt" 75 | }, 76 | "tensorrt-helper": { 77 | "local": "yes", 78 | "name": "Python TensorRT helper functions and metadata", 79 | "sort": 70, 80 | "tags": "lib,python-package,tensorrt-helper" 81 | }, 82 | "plugin-nms": { 83 | "local": "yes", 84 | "name": "TensorRT NMS plugin", 85 | "sort": 35, 86 | "tags": "tensorrt,plugin,nms" 87 | }, 88 | "weights": { 89 | "local": "yes", 90 | "name": "TensorRT model", 91 | "sort": 30, 92 | "tags": "tensorrt,model,object-detection" 93 | } 94 | }, 95 | "run_vars": { 96 | "CK_ANNOTATIONS_OUT_DIR": "annotations", 97 | "CK_DETECTIONS_OUT_DIR": "detections", 98 | "CK_DETECTION_THRESHOLD": 0.0, 99 | "CK_PREPROCESSED_OUT_DIR": "preprocessed", 100 | "CK_RESULTS_OUT_DIR": "results", 101 | "CK_BATCH_COUNT": 1, 102 | "CK_BATCH_SIZE": 1, 103 | "CK_SILENT_MODE": 0, 104 | "CK_SKIP_IMAGES": 0, 105 | "CK_TIMER_FILE": "tmp-ck-timer.json" 106 | }, 107 | "tags": [ 108 | "object-detection", 109 | "tensorrt", 110 | "trt", 111 | "standalone", 112 | "lang-python" 113 | ] 114 | } 115 | -------------------------------------------------------------------------------- /soft/lib.jetson-inference/customize.py: -------------------------------------------------------------------------------- 1 | # 2 | # Collective Knowledge (individual environment - setup) 3 | # 4 | # See CK LICENSE.txt for licensing details 5 | # See CK COPYRIGHT.txt for copyright details 6 | # 7 | # Developer: Grigori Fursin, Grigori.Fursin@cTuning.org, http://fursin.net 8 | # 9 | 10 | import os 11 | 12 | ############################################################################## 13 | # get version from path 14 | 15 | def version_cmd(i): 16 | return {'return':0, 'cmd':'', 'version':'trunk'} 17 | 18 | ############################################################################## 19 | # setup environment setup 20 | 21 | def setup(i): 22 | """ 23 | Input: { 24 | cfg - meta of this soft entry 25 | self_cfg - meta of module soft 26 | ck_kernel - import CK kernel module (to reuse functions) 27 | 28 | host_os_uoa - host OS UOA 29 | host_os_uid - host OS UID 30 | host_os_dict - host OS meta 31 | 32 | target_os_uoa - target OS UOA 33 | target_os_uid - target OS UID 34 | target_os_dict - target OS meta 35 | 36 | target_device_id - target device ID (if via ADB) 37 | 38 | tags - list of tags used to search this entry 39 | 40 | env - updated environment vars from meta 41 | customize - updated customize vars from meta 42 | 43 | deps - resolved dependencies for this soft 44 | 45 | interactive - if 'yes', can ask questions, otherwise quiet 46 | } 47 | 48 | Output: { 49 | return - return code = 0, if successful 50 | > 0, if error 51 | (error) - error text if return > 0 52 | 53 | bat - prepared string for bat file 54 | } 55 | 56 | """ 57 | 58 | # Get variables 59 | ck=i['ck_kernel'] 60 | s='' 61 | 62 | iv=i.get('interactive','') 63 | 64 | cus=i.get('customize',{}) 65 | fp=cus.get('full_path','') 66 | 67 | p1=os.path.dirname(fp) 68 | pi=os.path.dirname(p1) 69 | 70 | hosd=i['host_os_dict'] 71 | tosd=i['target_os_dict'] 72 | 73 | # Check platform 74 | hplat=hosd.get('ck_name','') 75 | 76 | hproc=hosd.get('processor','') 77 | tproc=tosd.get('processor','') 78 | 79 | remote=tosd.get('remote','') 80 | tbits=tosd.get('bits','') 81 | 82 | env=i['env'] 83 | 84 | found=False 85 | while True: 86 | if os.path.isdir(os.path.join(pi,'lib')): 87 | found=True 88 | break 89 | pix=os.path.dirname(pi) 90 | if pix==pi: 91 | break 92 | pi=pix 93 | 94 | if not found: 95 | return {'return':1, 'error':'can\'t find installation root dir'} 96 | 97 | ep=cus['env_prefix'] 98 | env[ep]=pi 99 | 100 | ############################################################ 101 | # Setting environment depending on the platform 102 | if hplat=='win': 103 | # TBD 104 | return {'return':1, 'error':'not yet supported ...'} 105 | 106 | cus['dynamic_lib']=os.path.basename(fp) 107 | env[ep+'_DYNAMIC_NAME']=cus.get('dynamic_lib','') 108 | 109 | cus['path_lib']=os.path.join(pi,'lib') 110 | cus['path_include']=os.path.join(pi,'include') 111 | 112 | r = ck.access({'action': 'lib_path_export_script', 'module_uoa': 'os', 'host_os_dict': hosd, 113 | 'lib_path': cus.get('path_lib','')}) 114 | if r['return']>0: return r 115 | s += r['script'] 116 | 117 | return {'return':0, 'bat':s} 118 | -------------------------------------------------------------------------------- /package/lib-python-tensorrt/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "comment": "As of this update, TensorRT has to be first downloaded as a wheel, then installed from it", 5 | "install_env": { 6 | "PIP_INSTALL_OPTIONS": "--ignore-installed" 7 | } 8 | }, 9 | "deps": { 10 | "python": { 11 | "add_to_path": "yes", 12 | "local": "yes", 13 | "name": "Python interpreter", 14 | "sort": 10, 15 | "tags": "compiler,python", 16 | "update_tags_if_env" : { 17 | "v3.6": [ { "_CP": "cp36" } ], 18 | "v3.7": [ { "_CP": "cp37" } ], 19 | "v3.8": [ { "_CP": "cp38" } ] 20 | } 21 | }, 22 | "cuda": { 23 | "add_to_path": "no", 24 | "local": "yes", 25 | "name": "CUDA runtime", 26 | "sort": 30, 27 | "tags": "compiler,cuda", 28 | "update_tags_if_env" : { 29 | "v10.1": [ { "_CUDA_VER": "v10.1" } ], 30 | "v10.2": [ { "_CUDA_VER": "v10.2" } ], 31 | "v11.0": [ { "_CUDA_VER": "v11.0" } ], 32 | "v11.1": [ { "_CUDA_VER": "v11.1" } ] 33 | } 34 | }, 35 | "cudnn": { 36 | "add_to_path": "no", 37 | "local": "yes", 38 | "name": "cuDNN library", 39 | "sort": 40, 40 | "tags": "lib,cudnn", 41 | "update_tags_if_env" : { 42 | "v0": [ { "_CUDNN_VER": ".." } ] 43 | }, 44 | "comment": "FIXME: CK currently detects the version of cuDNN 8 as '..'." 45 | } 46 | }, 47 | "end_full_path_universal": "build$#sep#$tensorrt$#sep#$__init__.py", 48 | "only_for_host_os_tags": [], 49 | "only_for_target_os_tags": [ 50 | "linux" 51 | ], 52 | "process_script": "install", 53 | "soft_uoa": "589e881dee2c8b2e", 54 | "suggested_path": "lib-python-tensorrt", 55 | "tags": [ 56 | "lib", 57 | "python-package", 58 | "tensorrt", 59 | "trt" 60 | ], 61 | "template": "yes", 62 | "template_type": "Python library", 63 | "use_scripts_from_another_entry": { 64 | "data_uoa": "e4d8d3d2c4c67902", 65 | "module_uoa": "package" 66 | }, 67 | "variations": { 68 | "tensorrt-5.1.5.0-cp36": { 69 | "extra_env": { 70 | "_CP": "cp36", 71 | "PYTHON_PACKAGE_NAME": "/usr/local/tensorrt-5.1.5.0/python/tensorrt-5.1.5.0-cp36-none-linux_x86_64.whl" 72 | } 73 | }, 74 | "tensorrt-6.0.1.5-cp36": { 75 | "on_by_default": "yes", 76 | "extra_env": { 77 | "_CP": "cp36", 78 | "PYTHON_PACKAGE_NAME": "/usr/local/tensorrt-6.0.1.5/python/tensorrt-6.0.1.5-cp36-none-linux_x86_64.whl" 79 | } 80 | }, 81 | "tensorrt-7.0.0.11-cp36": { 82 | "on_by_default": "no", 83 | "extra_env": { 84 | "_CP": "cp36", 85 | "PYTHON_PACKAGE_NAME": "/usr/local/tensorrt-7.0.0.11/python/tensorrt-7.0.0.11-cp36-none-linux_x86_64.whl" 86 | } 87 | }, 88 | "tensorrt-6.0.1.8-cp37": { 89 | "on_by_default": "no", 90 | "extra_env": { 91 | "_CP": "cp37", 92 | "PYTHON_PACKAGE_NAME": "/data/tensorrt-6/python/tensorrt-6.0.1.8-cp37-none-linux_x86_64.whl" 93 | } 94 | }, 95 | "tensorrt-7.0.0.11-cp37": { 96 | "on_by_default": "no", 97 | "extra_env": { 98 | "_CP": "cp37", 99 | "PYTHON_PACKAGE_NAME": "/data/tensorrt-7/python/tensorrt-7.0.0.11-cp37-none-linux_x86_64.whl" 100 | } 101 | }, 102 | "tensorrt-7.2.1.6-cp37": { 103 | "on_by_default": "no", 104 | "extra_env": { 105 | "_CUDA_VER": "v11.1", 106 | "_CUDNN_VER": "..", 107 | "_CP": "cp37", 108 | "PYTHON_PACKAGE_NAME": "/usr/local/tensorrt-7.2.1.6.cuda-11.1.cudnn-8.0/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl" 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /package/lib-jetson-inference-dusty-nv-master-cuda/install.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # 4 | # Installation script for jetson-inference. 5 | # 6 | # See CK LICENSE.txt for licensing details. 7 | # See CK COPYRIGHT.txt for copyright details. 8 | # 9 | # Developer(s): 10 | # - Daniil Efremov, daniil.efremov@xored.com, 2016 11 | # - Anton Lokhmotov, anton@dividiti.com, 2016 12 | # 13 | 14 | # PACKAGE_DIR 15 | # INSTALL_DIR 16 | 17 | export JETSON_PKG_DIR=${PACKAGE_DIR} 18 | export JETSON_SRC_DIR=${INSTALL_DIR}/src 19 | export JETSON_BLD_DIR=${INSTALL_DIR}/bld 20 | 21 | #export CUDA_CUDART_LIBRARY=/usr/local/cuda 22 | #export CUDA_TOOLKIT_INCLUDE=/usr/local/cuda/include 23 | #export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda 24 | #export PATH=$PATH:/usr/local/cuda/bin 25 | #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib:/usr/local/lib 26 | #export CPLUS_INCLUDE_PATH=/usr/local/cuda/include 27 | 28 | export CK_ENV_COMPILER_CUDA=/usr/local/cuda 29 | 30 | ################################################################################ 31 | echo "Cleaning dir '${INSTALL_DIR}'" 32 | rm -rf ${INSTALL_DIR}/* 33 | 34 | ################################################################################ 35 | echo "Creating dir '${JETSON_SRC_DIR}'" 36 | mkdir ${JETSON_SRC_DIR} 37 | 38 | echo "Creating dir '${JETSON_BLD_DIR}'" 39 | mkdir ${JETSON_BLD_DIR} 40 | 41 | ################################################################################ 42 | echo "" 43 | echo "Cloning jetson-inference from '${JETSON_URL}' ..." 44 | 45 | echo "git clone ${JETSON_URL} --no-checkout" 46 | git clone ${JETSON_URL} --no-checkout ${JETSON_SRC_DIR} 47 | if [ "${?}" != "0" ] ; then 48 | echo "Error: Cloning jetson-inference from '${JETSON_URL}' failed!" 49 | exit 1 50 | fi 51 | 52 | ################################################################################ 53 | echo "" 54 | echo "Checking out the '${JETSON_BRANCH}' branch of jetson-inference ..." 55 | 56 | cd ${JETSON_SRC_DIR} 57 | git checkout ${JETSON_BRANCH} 58 | if [ "${?}" != "0" ] ; then 59 | echo "Error: Checking out the '${JETSON_BRANCH}' branch of jetson-inference failed!" 60 | exit 1 61 | fi 62 | 63 | ################################################################################ 64 | echo "" 65 | echo "Configuring jetson-inference in '${JETSON_BLD_DIR}' ..." 66 | 67 | cp ${ORIGINAL_PACKAGE_DIR}/CMakeLists.txt ${JETSON_SRC_DIR}/CMakeLists.txt 68 | 69 | # -DBUILD_DEPS=NO # YES - apt update/install, download nets, etc. 70 | 71 | cd ${JETSON_BLD_DIR} 72 | cmake ${JETSON_SRC_DIR} \ 73 | -DCMAKE_BUILD_TYPE=${CK_ENV_CMAKE_BUILD_TYPE:-Release} \ 74 | -DCUDA_TOOLKIT_ROOT_DIR="${CK_ENV_COMPILER_CUDA}" \ 75 | -DCMAKE_CXX_COMPILER="${CK_CXX}" \ 76 | -DCMAKE_C_COMPILER="${CK_CC}" \ 77 | -DBUILD_DEPS=NO \ 78 | -DGIE_PATH=${CK_ENV_LIB_TENSORRT_INCLUDE}/../ \ 79 | -DNV_TENSORRT_MAJOR=2 80 | 81 | if [ "${?}" != "0" ] ; then 82 | echo "Error: Configuring jetson-inference failed!" 83 | exit 1 84 | fi 85 | 86 | ################################################################################ 87 | echo "" 88 | echo "Building jetson-inference in '${JETSON_BLD_DIR}' ..." 89 | 90 | cd ${JETSON_BLD_DIR} 91 | make imagenet-console 92 | if [ "${?}" != "0" ] ; then 93 | echo "Error: Building jetson-inference failed!" 94 | exit 1 95 | fi 96 | 97 | ################################################################################ 98 | echo "" 99 | echo "Installing jetson-inference in '${INSTALL_DIR}' ..." 100 | 101 | cp -r ${JETSON_BLD_DIR}/$(uname -m)/* ${INSTALL_DIR} 102 | if [ "${?}" != "0" ] ; then 103 | echo "Error: Installing jetson-inference failed!" 104 | exit 1 105 | fi 106 | 107 | ################################################################################ 108 | echo "" 109 | echo "Installed jetson-inference in '${INSTALL_DIR}'." 110 | exit 0 111 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.0-explicit-window-global-pooling/README.md: -------------------------------------------------------------------------------- 1 | 2 | The Caffe-compatible files that you are probably looking for: 3 | 4 | SqueezeNet_v1.0/train_val.prototxt #model architecture 5 | SqueezeNet_v1.0/solver.prototxt #additional training details (learning rate schedule, etc.) 6 | SqueezeNet_v1.0/squeezenet_v1.0.caffemodel #pretrained model parameters 7 | 8 | If you find SqueezeNet useful in your research, please consider citing the [SqueezeNet paper](http://arxiv.org/abs/1602.07360): 9 | 10 | @article{SqueezeNet, 11 | Author = {Forrest N. Iandola and Matthew W. Moskewicz and Khalid Ashraf and Song Han and William J. Dally and Kurt Keutzer}, 12 | Title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $<$1MB model size}, 13 | Journal = {arXiv:1602.07360}, 14 | Year = {2016} 15 | } 16 | 17 | 18 | Helpful hints: 19 | 20 | 1. **Getting the SqueezeNet model:** `git clone `. 21 | In this repository, we include Caffe-compatible files for the model architecture, the solver configuration, and the pretrained model (4.8MB uncompressed). 22 | 23 | 2. **Batch size.** We have experimented with batch sizes ranging from 32 to 1024. In this repo, our default batch size is 512. If implemented naively on a single GPU, a batch size this large may result in running out of memory. An effective workaround is to use hierarchical batching (sometimes called "delayed batching"). Caffe supports hierarchical batching by doing `train_val.prototxt>batch_size` training samples concurrently in memory. After `solver.prototxt>iter_size` iterations, the gradients are summed and the model is updated. Mathematically, the batch size is `batch_size * iter_size`. In the included prototxt files, we have set `(batch_size=32, iter_size=16)`, but any combination of batch_size and iter_size that multiply to 512 will produce eqivalent results. In fact, with the same random number generator seed, the model will be fully reproducable if trained multiple times. Finally, note that in Caffe `iter_size` is applied while training on the training set but not while testing on the test set. 24 | 25 | 3. **Implementing Fire modules.** In the paper, we describe the `expand` portion of the Fire layer as a collection of 1x1 and 3x3 filters. Caffe does not natively support a convolution layer that has multiple filter sizes. To work around this, we implement `expand1x1` and `expand3x3` layers and concatenate the results together in the channel dimension. 26 | 27 | 4. **The SqueezeNet team has released a few variants of SqueezeNet**. Each of these include pretrained models, and the non-compressed versions include training protocols, too. 28 | 29 | SqueezeNet v1.0 (in this repo), the base model described in our SqueezeNet paper. 30 | 31 | [Compressed SqueezeNet v1.0](https://github.com/songhan/SqueezeNet_compressed), as described in the SqueezeNet paper. 32 | 33 | [SqueezeNet v1.0 with Residual Connections](https://github.com/songhan/SqueezeNet-Residual), which delivers higher accuracy without increasing the model size. 34 | 35 | [SqueezeNet v1.0 with Dense→Sparse→Dense (DSD) Training](https://github.com/songhan/SqueezeNet-DSD-Training), which delivers higher accuracy without increasing the model size. 36 | 37 | SqueezeNet v1.1 (in this repo), which requires 2.4x less computation than SqueezeNet v1.0 without diminshing accuracy. 38 | 39 | 5. **Community adoption of SqueezeNet**: 40 | 41 | [SqueezeNet in the *MXNet* framework](https://github.com/haria/SqueezeNet), by Guo Haria 42 | 43 | [SqueezeNet in the *Chainer* framework](https://github.com/ejlb/squeezenet-chainer), by Eddie Bell 44 | 45 | [SqueezeNet in the *Keras* framework](https://github.com/DT42/squeezenet_demo), by [dt42.io](https://dt42.io/) 46 | 47 | [Neural Art using SqueezeNet](https://github.com/pavelgonchar/neural-art-mini), by Pavel Gonchar 48 | 49 | [SqueezeNet compression in Ristretto](https://arxiv.org/abs/1605.06402), by Philipp Gysel 50 | 51 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-tf/tf2tensorrt_model_converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ This is a standalone script for converting Onnx model files into TensorRT model files 4 | 5 | Author: Leo Gordon (dividiti) 6 | """ 7 | 8 | 9 | import argparse 10 | import tensorrt as trt 11 | import uff 12 | 13 | 14 | def convert_tf_model_to_trt(tf_model_filename, trt_model_filename, 15 | model_data_layout, input_layer_name, input_height, input_width, 16 | output_layer_name, output_data_type, max_workspace_size, max_batch_size): 17 | "Convert an tf_model_filename into a trt_model_filename using the given parameters" 18 | 19 | uff_model = uff.from_tensorflow_frozen_model(tf_model_filename) 20 | 21 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING) 22 | 23 | with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: 24 | 25 | if model_data_layout == 'NHWC': 26 | parser.register_input(input_layer_name, [input_height, input_width, 3], trt.UffInputOrder.NHWC) 27 | else: 28 | parser.register_input(input_layer_name, [3, input_height, input_width], trt.UffInputOrder.NCHW) 29 | 30 | parser.register_output(output_layer_name) 31 | 32 | if not parser.parse_buffer(uff_model, network): 33 | raise RuntimeError("UFF model parsing (originally from {}) failed. Error: {}".format(tf_model_filename, parser.get_error(0).desc())) 34 | 35 | if (output_data_type=='fp32'): 36 | print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) 37 | else: 38 | if not builder.platform_has_fast_fp16: 39 | print('Warning: This platform is not optimized for fast fp16 mode') 40 | 41 | builder.fp16_mode = True 42 | print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) 43 | 44 | builder.max_workspace_size = max_workspace_size 45 | builder.max_batch_size = max_batch_size 46 | 47 | 48 | trt_model_object = builder.build_cuda_engine(network) 49 | 50 | try: 51 | serialized_trt_model = trt_model_object.serialize() 52 | with open(trt_model_filename, "wb") as trt_model_file: 53 | trt_model_file.write(serialized_trt_model) 54 | except: 55 | raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) 56 | 57 | 58 | def main(): 59 | "Parse command line and feed the conversion function" 60 | 61 | arg_parser = argparse.ArgumentParser() 62 | arg_parser.add_argument('tf_model_filename', type=str, help='TensorFlow model file') 63 | arg_parser.add_argument('trt_model_filename', type=str, help='TensorRT model file') 64 | arg_parser.add_argument('--model_data_layout', type=str, default='NHWC', help='Model data layout (NHWC or NCHW)') 65 | arg_parser.add_argument('--input_layer_name', type=str, default='input', help='Input layer name') 66 | arg_parser.add_argument('--input_height', type=int, default=224, help='Input height') 67 | arg_parser.add_argument('--input_width', type=int, default=224, help='Input width') 68 | arg_parser.add_argument('--output_layer_name', type=str, default='MobilenetV1/Predictions/Reshape_1', help='Output layer name') 69 | arg_parser.add_argument('--output_data_type', type=str, default='fp32', help='Model data type') 70 | arg_parser.add_argument('--max_workspace_size', type=int, default=(1<<30), help='Builder workspace size') 71 | arg_parser.add_argument('--max_batch_size', type=int, default=1, help='Builder batch size') 72 | args = arg_parser.parse_args() 73 | 74 | convert_tf_model_to_trt( args.tf_model_filename, args.trt_model_filename, 75 | args.model_data_layout, args.input_layer_name, args.input_height, args.input_width, 76 | args.output_layer_name, args.output_data_type, args.max_workspace_size, args.max_batch_size ) 77 | 78 | main() 79 | 80 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-onnx/onnx2tensorrt_model_converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ This is a standalone script for converting Onnx model files into TensorRT model files 4 | 5 | Author: Leo Gordon (dividiti) 6 | """ 7 | 8 | 9 | import argparse 10 | import tensorrt as trt 11 | 12 | 13 | def convert_onnx_model_to_trt(onnx_model_filename, trt_model_filename, 14 | input_tensor_name, output_tensor_name, 15 | output_data_type, max_workspace_size, max_batch_size): 16 | "Convert an onnx_model_filename into a trt_model_filename using the given parameters" 17 | 18 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING) 19 | 20 | TRT_VERSION_MAJOR = int(trt.__version__.split('.')[0]) 21 | 22 | with trt.Builder(TRT_LOGGER) as builder: 23 | if TRT_VERSION_MAJOR >= 7: 24 | flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) 25 | network = builder.create_network(flag) 26 | else: 27 | network = builder.create_network() 28 | parser = trt.OnnxParser(network, TRT_LOGGER) 29 | 30 | if (output_data_type=='fp32'): 31 | print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) 32 | builder.fp16_mode = False 33 | else: 34 | if not builder.platform_has_fast_fp16: 35 | print('Warning: This platform is not optimized for fast fp16 mode') 36 | 37 | builder.fp16_mode = True 38 | print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) 39 | 40 | builder.max_workspace_size = max_workspace_size 41 | builder.max_batch_size = max_batch_size 42 | 43 | with open(onnx_model_filename, 'rb') as onnx_model_file: 44 | onnx_model = onnx_model_file.read() 45 | 46 | if not parser.parse(onnx_model): 47 | raise RuntimeError("Onnx model parsing from {} failed. Error: {}".format(onnx_model_filename, parser.get_error(0).desc())) 48 | 49 | if TRT_VERSION_MAJOR >= 7: 50 | # Create an optimization profile (see Section 7.2 of https://docs.nvidia.com/deeplearning/sdk/pdf/TensorRT-Developer-Guide.pdf). 51 | profile = builder.create_optimization_profile() 52 | # FIXME: Hardcoded for ImageNet. The minimum/optimum/maximum dimensions of a dynamic input tensor are the same. 53 | profile.set_shape(input_tensor_name, (1, 3, 224, 224), (max_batch_size, 3, 224, 224), (max_batch_size, 3, 224, 224)) 54 | 55 | config = builder.create_builder_config() 56 | config.add_optimization_profile(profile) 57 | 58 | trt_model_object = builder.build_engine(network, config) 59 | else: 60 | trt_model_object = builder.build_cuda_engine(network) 61 | 62 | try: 63 | serialized_trt_model = trt_model_object.serialize() 64 | with open(trt_model_filename, "wb") as trt_model_file: 65 | trt_model_file.write(serialized_trt_model) 66 | except: 67 | raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) 68 | 69 | 70 | def main(): 71 | "Parse command line and feed the conversion function" 72 | 73 | arg_parser = argparse.ArgumentParser() 74 | arg_parser.add_argument('onnx_model_file', type=str, help='Onnx model file') 75 | arg_parser.add_argument('trt_model_filename', type=str, help='TensorRT model file') 76 | arg_parser.add_argument('--input_tensor_name', type=str, default='input_tensor:0', help='Input tensor type') 77 | arg_parser.add_argument('--output_tensor_name', type=str, default='prob', help='Output tensor type') 78 | arg_parser.add_argument('--output_data_type', type=str, default='fp32', help='Model data type') 79 | arg_parser.add_argument('--max_workspace_size', type=int, default=(1<<30), help='Builder workspace size') 80 | arg_parser.add_argument('--max_batch_size', type=int, default=1, help='Builder batch size') 81 | args = arg_parser.parse_args() 82 | 83 | convert_onnx_model_to_trt( args.onnx_model_file, args.trt_model_filename, 84 | args.input_tensor_name, args.output_tensor_name, 85 | args.output_data_type, args.max_workspace_size, args.max_batch_size ) 86 | 87 | main() 88 | 89 | -------------------------------------------------------------------------------- /package/caffemodel-deepscale-squeezenet-1.1-explicit-window-global-pooling/README.md: -------------------------------------------------------------------------------- 1 | 2 | The Caffe-compatible files that you are probably looking for: 3 | 4 | SqueezeNet_v1.0/train_val.prototxt #model architecture 5 | SqueezeNet_v1.0/solver.prototxt #additional training details (learning rate schedule, etc.) 6 | SqueezeNet_v1.0/squeezenet_v1.0.caffemodel #pretrained model parameters 7 | 8 | If you find SqueezeNet useful in your research, please consider citing the [SqueezeNet paper](http://arxiv.org/abs/1602.07360): 9 | 10 | @article{SqueezeNet, 11 | Author = {Forrest N. Iandola and Matthew W. Moskewicz and Khalid Ashraf and Song Han and William J. Dally and Kurt Keutzer}, 12 | Title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $<$1MB model size}, 13 | Journal = {arXiv:1602.07360}, 14 | Year = {2016} 15 | } 16 | 17 | 18 | Helpful hints: 19 | 20 | 1. **Getting the SqueezeNet model:** `git clone `. 21 | In this repository, we include Caffe-compatible files for the model architecture, the solver configuration, and the pretrained model (4.8MB uncompressed). 22 | 23 | 2. **Batch size.** We have experimented with batch sizes ranging from 32 to 1024. In this repo, our default batch size is 512. If implemented naively on a single GPU, a batch size this large may result in running out of memory. An effective workaround is to use hierarchical batching (sometimes called "delayed batching"). Caffe supports hierarchical batching by doing `train_val.prototxt>batch_size` training samples concurrently in memory. After `solver.prototxt>iter_size` iterations, the gradients are summed and the model is updated. Mathematically, the batch size is `batch_size * iter_size`. In the included prototxt files, we have set `(batch_size=32, iter_size=16)`, but any combination of batch_size and iter_size that multiply to 512 will produce eqivalent results. In fact, with the same random number generator seed, the model will be fully reproducable if trained multiple times. Finally, note that in Caffe `iter_size` is applied while training on the training set but not while testing on the test set. 24 | 25 | 3. **Implementing Fire modules.** In the paper, we describe the `expand` portion of the Fire layer as a collection of 1x1 and 3x3 filters. Caffe does not natively support a convolution layer that has multiple filter sizes. To work around this, we implement `expand1x1` and `expand3x3` layers and concatenate the results together in the channel dimension. 26 | 27 | 4. **The SqueezeNet team has released a few variants of SqueezeNet**. Each of these include pretrained models, and the non-compressed versions include training protocols, too. 28 | 29 | SqueezeNet v1.0 (in this repo), the base model described in our SqueezeNet paper. 30 | 31 | [Compressed SqueezeNet v1.0](https://github.com/songhan/SqueezeNet_compressed), as described in the SqueezeNet paper. 32 | 33 | [SqueezeNet v1.0 with Residual Connections](https://github.com/songhan/SqueezeNet-Residual), which delivers higher accuracy without increasing the model size. 34 | 35 | [SqueezeNet v1.0 with Dense→Sparse→Dense (DSD) Training](https://github.com/songhan/SqueezeNet-DSD-Training), which delivers higher accuracy without increasing the model size. 36 | 37 | SqueezeNet v1.1 (in this repo), which requires 2.4x less computation than SqueezeNet v1.0 without diminshing accuracy. 38 | 39 | 5. **Community adoption of SqueezeNet**: 40 | 41 | [SqueezeNet in the *MXNet* framework](https://github.com/haria/SqueezeNet), by Guo Haria 42 | 43 | [SqueezeNet in the *Chainer* framework](https://github.com/ejlb/squeezenet-chainer), by Eddie Bell 44 | 45 | [SqueezeNet in the *Keras* framework](https://github.com/DT42/squeezenet_demo), by [dt42.io](https://dt42.io/) 46 | 47 | [Neural Art using SqueezeNet](https://github.com/pavelgonchar/neural-art-mini), by Pavel Gonchar 48 | 49 | [SqueezeNet compression in Ristretto](https://arxiv.org/abs/1605.06402), by Philipp Gysel 50 | 51 | 52 | **What's new in SqueezeNet v1.1?** 53 | 54 | | | SqueezeNet v1.0 | SqueezeNet v1.1 | 55 | | :------------- |:-------------:| :-----:| 56 | | conv1: | 96 filters of resolution 7x7 | 64 filters of resolution 3x3 | 57 | | pooling layers: | pool_{1,4,8} | pool_{1,3,5} | 58 | | computation | 1.72 GFLOPS/image | 0.72 GFLOPS/image: *2.4x less computation* | 59 | | ImageNet accuracy | >= 80.3% top-5 | >= 80.3% top-5 | 60 | 61 | 62 | SqueezeNet v1.1 has 2.4x less computation than v1.0, without sacrificing accuracy. 63 | -------------------------------------------------------------------------------- /package/lib-jetson-inference-dusty-nv-master-cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 2.8) 3 | project(jetson-inference) 4 | 5 | # setup tensorRT flags 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11 7 | set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed.") 8 | 9 | 10 | # if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies) 11 | if( ${BUILD_DEPS} ) 12 | message("Launching pre-build dependency installer script...") 13 | 14 | execute_process(COMMAND sh ../CMakePreBuild.sh 15 | WORKING_DIRECTORY ${PROJECT_BINARY_DIR} 16 | RESULT_VARIABLE PREBUILD_SCRIPT_RESULT) 17 | 18 | set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed." FORCE) 19 | message("Finished installing dependencies") 20 | endif() 21 | 22 | 23 | # Qt is used to load images (installed by ubuntu-desktop) 24 | find_package(Qt4 REQUIRED) 25 | include(${QT_USE_FILE}) 26 | add_definitions(${QT_DEFINITIONS}) 27 | 28 | 29 | # setup CUDA 30 | find_package(CUDA) 31 | 32 | set( 33 | CUDA_NVCC_FLAGS 34 | ${CUDA_NVCC_FLAGS}; 35 | -O3 36 | -gencode arch=compute_53,code=sm_53 37 | -gencode arch=compute_61,code=sm_61 38 | -gencode arch=compute_62,code=sm_62 39 | ) 40 | 41 | 42 | # setup project output paths 43 | set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}) 44 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include) 45 | 46 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR}) 47 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 48 | 49 | message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}") 50 | message("-- output path: ${PROJECT_OUTPUT_DIR}") 51 | 52 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 53 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 54 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 55 | 56 | 57 | # build C/C++ interface 58 | include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include) 59 | include_directories(/usr/include/gstreamer-1.0 /usr/lib/aarch64-linux-gnu/gstreamer-1.0/include /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/aarch64-linux-gnu/glib-2.0/include/) 60 | 61 | file(GLOB inferenceSources *.cpp *.cu util/*.cpp util/cuda/*.cu util/display/*.cpp) 62 | file(GLOB inferenceIncludes *.h util/*.h util/camera/*.h util/cuda/*.h util/display/*.h) 63 | 64 | cuda_add_library(jetson-inference SHARED ${inferenceSources}) 65 | #target_link_libraries(jetson-inference nvcaffe_parser nvinfer Qt4::QtGui GL GLEW gstreamer-1.0 gstapp-1.0) # gstreamer-0.10 gstbase-0.10 gstapp-0.10 66 | target_link_libraries(jetson-inference nvcaffe_parser nvinfer Qt4::QtGui GL GLEW gstreamer-1.0) 67 | 68 | 69 | # transfer all headers to the include directory 70 | foreach(include ${inferenceIncludes}) 71 | message("-- Copying ${include}") 72 | configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY) 73 | endforeach() 74 | 75 | 76 | # create symbolic link for network data 77 | execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" ) 78 | 79 | 80 | # copy image data 81 | file(GLOB imageData ${PROJECT_SOURCE_DIR}/data/images/*) 82 | 83 | foreach(image ${imageData}) 84 | message("-- Copying ${image}") 85 | file(COPY ${image} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) 86 | #configure_file(${include} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COPYONLY) 87 | endforeach() 88 | 89 | 90 | # copy tools 91 | file(COPY "tools/segnet-batch.sh" DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) 92 | 93 | 94 | # build samples & utilities 95 | add_subdirectory(imagenet-console) 96 | add_subdirectory(imagenet-camera) 97 | 98 | add_subdirectory(detectnet-console) 99 | add_subdirectory(detectnet-camera) 100 | 101 | add_subdirectory(segnet-console) 102 | add_subdirectory(segnet-camera) 103 | 104 | #add_subdirectory(util/camera/gst-camera) 105 | add_subdirectory(util/camera/v4l2-console) 106 | add_subdirectory(util/camera/v4l2-display) 107 | 108 | add_subdirectory(docs) 109 | 110 | 111 | # install 112 | foreach(include ${inferenceIncludes}) 113 | install(FILES "${include}" DESTINATION include/jetson-inference) 114 | endforeach() 115 | 116 | # install the shared library 117 | install(TARGETS jetson-inference DESTINATION lib/jetson-inference EXPORT jetson-inferenceConfig) 118 | 119 | # install the cmake project, for importing 120 | install(EXPORT jetson-inferenceConfig DESTINATION share/jetson-inference/cmake) 121 | 122 | -------------------------------------------------------------------------------- /script/image-classification-tensorrt-py/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | task="image-classification" 4 | imagenet_size=50000 5 | 6 | # Implementations. 7 | implementation_tensorrt="image-classification-tensorrt-py" 8 | implementations=( "${implementation_tensorrt}" ) 9 | 10 | # Modes. 11 | modes=( "performance" "accuracy" ) 12 | 13 | # System. 14 | hostname=`hostname` 15 | if [ "${hostname}" = "tx1" ]; then 16 | device="${hostname}" 17 | library="py-tensorrt-5.1.6.1" 18 | library_tags="lib,python-package,tensorrt,5.1.6.1" 19 | elif [ "${hostname}" = "velociti" ]; then 20 | device="gtx1080" 21 | library="py-tensorrt-5.1.5.0" 22 | library_tags="lib,python-package,tensorrt,5.1.5.0" 23 | else 24 | device="${hostname}" 25 | library="py-tensorrt" 26 | library_tags="lib,python-package,tensorrt" 27 | fi 28 | 29 | # Compiler. 30 | if [ "${device}" = "tx1" ]; then 31 | compiler_tags="gcc,v7" 32 | else 33 | compiler_tags="gcc" 34 | fi 35 | 36 | # Models. 37 | models=( "resnet" ) 38 | models_tags=( "model,tensorrt,converted-from-onnx,resnet" ) 39 | models_preprocessing_tags=( "dataset,side.224,crop.875,inter.linear,preprocessed,using-opencv" ) 40 | 41 | # Numerical data types. 42 | data_types=( "fp16" "fp32" ) 43 | 44 | # Max batch sizes. 45 | max_batch_sizes=$(seq 1 20) 46 | 47 | 48 | experiment_id=1 49 | # Iterate for each implementation. 50 | for implementation in ${implementations[@]}; do 51 | if [ "${implementation}" != "${implementation_tensorrt}" ]; then 52 | echo "ERROR: Unsupported implementation '${implementation}'!" 53 | exit 1 54 | fi 55 | 56 | # Iterate for each model. 57 | for i in $(seq 1 ${#models[@]}); do 58 | # Configure the model. 59 | model=${models[${i}-1]} 60 | model_tags=${models_tags[${i}-1]} 61 | model_preprocessing_tags=${models_preprocessing_tags[${i}-1]} 62 | 63 | # Iterate for each data type. 64 | for data_type in ${data_types[@]}; do 65 | 66 | # Iterate for each mode. 67 | for mode in ${modes[@]}; do 68 | # TODO: Use the maximum batch size for accuracy experiments. 69 | if [ "${mode}" == "accuracy" ]; then continue; fi 70 | 71 | # Iterate for each max batch size. 72 | for max_batch_size in ${max_batch_sizes[@]}; do 73 | 74 | # Iterate for each batch size up to max batch size. 75 | for batch_size in $(seq 1 ${max_batch_size}); do 76 | 77 | # Configure record settings. 78 | record_uoa="${task}.${device}.${library}.${model}.${data_type}.max-batch-${max_batch_size}.batch-${batch_size}.${mode}" 79 | record_tags="${task},${device},${library},${model},${data_type},max-batch-${max_batch_size},batch-${batch_size},${mode}" 80 | if [ "${mode}" = "accuracy" ]; then 81 | # Get substring after "preprocessed," to end. 82 | preprocessing="${model_preprocessing_tags##*preprocessed,}" 83 | record_uoa+=".${preprocessing}" 84 | record_tags+=",${preprocessing}" 85 | fi 86 | 87 | echo "[`date`] Experiment #"${experiment_id}": ${record_uoa} ..." 88 | experiment_id=$((${experiment_id}+1)) 89 | 90 | # Skip automatically if experiment record already exists. 91 | record_dir=$(ck list local:experiment:${record_uoa}) 92 | if [ "${record_dir}" != "" ]; then 93 | echo "[`date`] - skipping ..." 94 | echo 95 | continue 96 | fi 97 | 98 | # Skip manually. 99 | if [ "${implementation}" != "${implementation_tensorrt}" ] ; then 100 | echo "[`date`] - skipping ..." 101 | echo 102 | continue 103 | fi 104 | 105 | # Run (but before that print the exact command we are about to run). 106 | echo "[`date`] - running ..." 107 | read -d '' CMD </lib/libnvinfer.so". 19 | libinfer_dir=os.path.dirname(libinfer_so) 20 | if os.path.basename(libinfer_dir)=='lib': 21 | lib_dir=libinfer_dir 22 | arch_os_name='' 23 | else: 24 | lib_dir=os.path.dirname(libinfer_dir) 25 | arch_os_name=os.path.basename(libinfer_dir) 26 | root_dir=os.path.dirname(lib_dir) 27 | 28 | # Undetected version: 0.0.0 29 | major='0'; minor='0'; patch='0' 30 | 31 | nvinferversion_h=os.path.join(root_dir, 'include', arch_os_name, 'NvInferVersion.h') 32 | nvinfer_h=os.path.join(root_dir, 'include', arch_os_name, 'NvInfer.h') 33 | version_file_path = None 34 | if os.path.exists(nvinferversion_h): 35 | # TensorRT v5-6. 36 | version_file_path = nvinferversion_h 37 | elif os.path.exists(nvinfer_h): 38 | # TensorRT v1-5 (?). 39 | version_file_path = nvinfer_h 40 | if version_file_path: 41 | with open(version_file_path, 'r') as version_file: 42 | lines=version_file.readlines() 43 | for line in lines: 44 | if line.startswith('#define NV_TENSORRT_MAJOR'): major=line.split()[2] 45 | if line.startswith('#define NV_TENSORRT_MINOR'): minor=line.split()[2] 46 | if line.startswith('#define NV_TENSORRT_PATCH'): patch=line.split()[2] 47 | 48 | version='%s.%s.%s' % (major,minor,patch) 49 | return {'return':0, 'cmd':'', 'version':version} 50 | 51 | ############################################################################## 52 | # setup environment setup 53 | 54 | def setup(i): 55 | """ 56 | Input: { 57 | cfg - meta of this soft entry 58 | self_cfg - meta of module soft 59 | ck_kernel - import CK kernel module (to reuse functions) 60 | 61 | host_os_uoa - host OS UOA 62 | host_os_uid - host OS UID 63 | host_os_dict - host OS meta 64 | 65 | target_os_uoa - target OS UOA 66 | target_os_uid - target OS UID 67 | target_os_dict - target OS meta 68 | 69 | target_device_id - target device ID (if via ADB) 70 | 71 | tags - list of tags used to search this entry 72 | 73 | env - updated environment vars from meta 74 | customize - updated customize vars from meta 75 | 76 | deps - resolved dependencies for this soft 77 | 78 | interactive - if 'yes', can ask questions, otherwise quiet 79 | } 80 | 81 | Output: { 82 | return - return code = 0, if successful 83 | > 0, if error 84 | (error) - error text if return > 0 85 | 86 | bat - prepared string for bat file 87 | } 88 | 89 | """ 90 | 91 | # Get variables 92 | ck=i['ck_kernel'] 93 | s='' 94 | 95 | iv=i.get('interactive','') 96 | 97 | cus=i.get('customize',{}) 98 | 99 | hosd=i['host_os_dict'] 100 | tosd=i['target_os_dict'] 101 | 102 | # Check platform 103 | hplat=hosd.get('ck_name','') 104 | 105 | hproc=hosd.get('processor','') 106 | tproc=tosd.get('processor','') 107 | 108 | remote=tosd.get('remote','') 109 | tbits=tosd.get('bits','') 110 | 111 | # Paths. 112 | fp=cus.get('full_path','') 113 | path_lib=os.path.dirname(fp) 114 | if not os.path.isdir(path_lib): 115 | return {'return':1, 'error':'can\'t find installation lib dir'} 116 | 117 | path_include=path_lib.replace('lib','include') 118 | if not os.path.isdir(path_include): 119 | return {'return':1, 'error':'can\'t find installation include dir'} 120 | 121 | path_bin=path_lib.replace('lib', 'bin') 122 | env=i['env'] 123 | 124 | ep=cus['env_prefix'] 125 | env[ep]=path_lib 126 | 127 | ############################################################ 128 | # Setting environment depending on the platform 129 | if hplat=='win': 130 | # TBD 131 | return {'return':1, 'error':'not yet supported ...'} 132 | 133 | cus['dynamic_lib']=os.path.basename(fp) 134 | env[ep+'_DYNAMIC_NAME']=cus.get('dynamic_lib','') 135 | 136 | cus['path_lib']=path_lib 137 | cus['path_include']=path_include 138 | cus['path_bin']=path_bin 139 | 140 | r = ck.access({'action': 'lib_path_export_script', 'module_uoa': 'os', 'host_os_dict': hosd, 141 | 'lib_path': cus.get('path_lib','')}) 142 | if r['return']>0: return r 143 | s += r['script'] 144 | s += 'PATH={}:$PATH\n\n'.format(path_bin) 145 | 146 | return {'return':0, 'bat':s} 147 | -------------------------------------------------------------------------------- /program/tensorrt-test/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "backup_data_uid": "5e23a492b9138354", 3 | "build_compiler_vars": { 4 | "CK_TENSORRT_ENABLE_PROFILER": "1" 5 | }, 6 | "compile_deps": { 7 | "compiler": { 8 | "local": "yes", 9 | "name": "C++ compiler", 10 | "sort": 10, 11 | "tags": "compiler,lang-cpp" 12 | }, 13 | "cuda-compiler": { 14 | "local": "yes", 15 | "name": "CUDA compiler", 16 | "sort": 20, 17 | "tags": "compiler,lang-c-cuda" 18 | }, 19 | "lib-tensorrt": { 20 | "local": "yes", 21 | "name": "TensorRT engine", 22 | "sort": 25, 23 | "tags": "lib,tensorrt" 24 | }, 25 | "lib-jetson-inference": { 26 | "local": "yes", 27 | "name": "Jetson Inference library", 28 | "sort": 30, 29 | "tags": "lib,jetson-inference" 30 | } 31 | }, 32 | "run_deps": { 33 | "caffemodel": { 34 | "force_target_as_host": "yes", 35 | "local": "yes", 36 | "name": "Caffe model (net and weights)", 37 | "sort": 40, 38 | "tags": "caffemodel" 39 | }, 40 | "dataset-imagenet-aux": { 41 | "force_target_as_host": "yes", 42 | "local": "yes", 43 | "name": "ImageNet dataset (aux)", 44 | "sort": 50, 45 | "tags": "dataset,imagenet,aux" 46 | }, 47 | "dataset-imagenet-val": { 48 | "force_target_as_host": "yes", 49 | "local": "yes", 50 | "name": "ImageNet dataset (val)", 51 | "only_for_cmd": [ 52 | "imagenet-val", 53 | "imagenet-val-debug" 54 | ], 55 | "sort": 60, 56 | "tags": "dataset,imagenet,val" 57 | } 58 | }, 59 | "compiler_add_include_as_env_from_deps": [ 60 | "CK_ENV_LIB_STDCPP_INCLUDE", 61 | "CK_ENV_LIB_STDCPP_INCLUDE_EXTRA", 62 | "CK_ENV_LIB_JETSON_INFERENCE_INCLUDE", 63 | "CK_ENV_COMPILER_CUDA_INCLUDE" 64 | ], 65 | "compiler_env": "CK_CXX", 66 | "compiler_flags_as_env": "$<>$", 67 | "data_name": "tensorrt-test", 68 | "extra_ld_vars": "$<>$/libcudart.so", 69 | "main_language": "cpp", 70 | "print_files_after_run": [], 71 | "process_in_tmp": "yes", 72 | "program": "yes", 73 | "run_cmds": { 74 | "default": { 75 | "dataset_tags": [ 76 | "dataset", 77 | "jpeg" 78 | ], 79 | "ignore_return_code": "no", 80 | "run_time": { 81 | "fine_grain_timer_file": "results.json", 82 | "params": { 83 | "caffemodel_key": "deploy", 84 | "classification": "yes", 85 | "debug": "yes" 86 | }, 87 | "post_process_via_ck": "yes", 88 | "post_process_cmds": [ 89 | "python $#src_path_local#$postprocess.py" 90 | ], 91 | "pre_process_via_ck": { 92 | "data_uoa": "569404c41618603a", 93 | "script_name": "preprocess" 94 | }, 95 | "run_cmd_main": "$#BIN_FILE#$ $#dataset_path#$$#dataset_filename#$", 96 | "run_cmd_out1": "stdout.log", 97 | "run_cmd_out2": "stderr.log", 98 | "run_correctness_output_files": [ 99 | ], 100 | "run_output_files": [ 101 | "stdout.log", 102 | "stderr.log", 103 | "results.json" 104 | ] 105 | } 106 | }, 107 | "imagenet-val": { 108 | "ignore_return_code": "no", 109 | "run_time": { 110 | "fine_grain_timer_file": "results.json", 111 | "params": { 112 | "caffemodel_key": "deploy", 113 | "classification": "yes" 114 | }, 115 | "post_process_via_ck": "yes", 116 | "post_process_cmds": [ 117 | "python $#src_path_local#$postprocess.py" 118 | ], 119 | "pre_process_via_ck": { 120 | "data_uoa": "569404c41618603a", 121 | "script_name": "preprocess" 122 | }, 123 | "run_cmd_main": "$#BIN_FILE#$", 124 | "run_cmd_out1": "stdout.log", 125 | "run_cmd_out2": "stderr.log", 126 | "run_correctness_output_files": [ 127 | ], 128 | "run_output_files": [ 129 | "stdout.log", 130 | "stderr.log", 131 | "results.json" 132 | ] 133 | } 134 | }, 135 | "imagenet-val-debug": { 136 | "ignore_return_code": "no", 137 | "run_time": { 138 | "fine_grain_timer_file": "results.json", 139 | "params": { 140 | "caffemodel_key": "deploy", 141 | "classification": "yes", 142 | "debug": "yes" 143 | }, 144 | "post_process_via_ck": "yes", 145 | "post_process_cmds": [ 146 | "python $#src_path_local#$postprocess.py" 147 | ], 148 | "pre_process_via_ck": { 149 | "data_uoa": "569404c41618603a", 150 | "script_name": "preprocess" 151 | }, 152 | "run_cmd_main": "$#BIN_FILE#$", 153 | "run_cmd_out1": "stdout.log", 154 | "run_cmd_out2": "stderr.log", 155 | "run_set_env2": { 156 | "CK_TENSORRT_MAX_NUM_IMAGES": "1000" 157 | }, 158 | "run_correctness_output_files": [ 159 | ], 160 | "run_output_files": [ 161 | "stdout.log", 162 | "stderr.log", 163 | "results.json" 164 | ] 165 | } 166 | } 167 | }, 168 | "run_vars": { 169 | "CK_TENSORRT_MAX_NUM_IMAGES": "10" 170 | }, 171 | "skip_bin_ext": "yes", 172 | "source_files": [ 173 | "tensorrt-test.cpp" 174 | ], 175 | "tags": [ 176 | "tensorrt-test", 177 | "demo" 178 | ], 179 | "target_file": "tensorrt-test", 180 | "version": "1.0.0" 181 | } 182 | -------------------------------------------------------------------------------- /soft/model.tensorrt/README_object_detection.md: -------------------------------------------------------------------------------- 1 | Specific usage examples: 2 | 3 | ```bash 4 | ck detect soft:model.tensorrt --full_path=/datasets/tensorrt_plans_for_Xavier/ssd-small/MultiStream/ssd-small-MultiStream-gpu-b20-fp32.plan \ 5 | --extra_tags=maxbatch.20,fp32,ssd-mobilenet,gpu,object-detection,converted-by-nvidia \ 6 | --cus.version=ssd-mobilenet_nvidia_fp32 \ 7 | --ienv.ML_MODEL_CLASS_LABELS=/datasets/tensorrt_plans_for_Xavier/flatlabels.txt \ 8 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 9 | --ienv.ML_MODEL_IMAGE_HEIGHT=300 \ 10 | --ienv.ML_MODEL_IMAGE_WIDTH=300 \ 11 | --ienv.ML_MODEL_INPUT_DATA_TYPE=float32 \ 12 | --ienv.ML_MODEL_DATA_TYPE=float32 \ 13 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 14 | --ienv.ML_MODEL_NORMALIZE_DATA=YES \ 15 | --ienv.ML_MODEL_SUBTRACT_MEAN=NO \ 16 | --ienv.ML_MODEL_MAX_PREDICTIONS=100 \ 17 | --ienv.ML_MODEL_MAX_BATCH_SIZE=20 18 | ``` 19 | 20 | ```bash 21 | ck detect soft:model.tensorrt --full_path=/datasets/tensorrt_plans_for_Xavier/ssd-small/MultiStream/ssd-small-MultiStream-gpu-b20-int8_linear.plan \ 22 | --extra_tags=maxbatch.20,int8,linear,ssd-mobilenet,gpu,object-detection,converted-by-nvidia \ 23 | --cus.version=ssd-mobilenet_nvidia_int8_linear \ 24 | --ienv.ML_MODEL_CLASS_LABELS=/datasets/tensorrt_plans_for_Xavier/flatlabels.txt \ 25 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 26 | --ienv.ML_MODEL_IMAGE_HEIGHT=300 \ 27 | --ienv.ML_MODEL_IMAGE_WIDTH=300 \ 28 | --ienv.ML_MODEL_INPUT_DATA_TYPE=int8 \ 29 | --ienv.ML_MODEL_DATA_TYPE=int8 \ 30 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 31 | --ienv.ML_MODEL_NORMALIZE_DATA=NO \ 32 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 33 | --ienv.ML_MODEL_MAX_PREDICTIONS=100 \ 34 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="128 128 128" \ 35 | --ienv.ML_MODEL_MAX_BATCH_SIZE=20 36 | ``` 37 | 38 | 39 | ```bash 40 | ck detect soft:model.tensorrt --full_path=/datasets/tensorrt_plans_for_Xavier/ssd-large/MultiStream/ssd-large-MultiStream-gpu-b2-int8.plan \ 41 | --extra_tags=maxbatch.2,int8,ssd-resnet,object-detection,converted-by-nvidia \ 42 | --cus.version=ssd-resnet_nvidia_int8 \ 43 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 44 | --ienv.ML_MODEL_IMAGE_HEIGHT=1200 \ 45 | --ienv.ML_MODEL_IMAGE_WIDTH=1200 \ 46 | --ienv.ML_MODEL_INPUT_DATA_TYPE=int8 \ 47 | --ienv.ML_MODEL_DATA_TYPE=int8 \ 48 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 49 | --ienv.ML_MODEL_NORMALIZE_DATA=NO \ 50 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 51 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="123.68 116.78 103.94" \ 52 | --ienv.ML_MODEL_MAX_BATCH_SIZE=2 \ 53 | --ienv.ML_MODEL_MAX_PREDICTIONS=200 \ 54 | --ienv.ML_MODEL_CLASS_LABELS=/datasets/tensorrt_plans_for_Xavier/flatlabels.txt \ 55 | --ienv.ML_MODEL_SKIPS_ORIGINAL_DATASET_CLASSES=12,26,29,30,45,66,68,69,71,83 \ 56 | --ienv.ML_MODEL_TENSORRT_PLUGIN=/datasets/tensorrt_plans_for_Xavier/libnmsoptplugin.so 57 | ``` 58 | 59 | ```bash 60 | ck detect soft:model.tensorrt --full_path=/datasets/tensorrt_plans_for_Xavier/ssd-large-MultiStream-gpu-b2-fp16.plan \ 61 | --extra_tags=maxbatch.2,fp16,ssd-resnet,object-detection,converted-by-nvidia \ 62 | --cus.version=ssd-resnet_nvidia_fp16 \ 63 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 64 | --ienv.ML_MODEL_IMAGE_HEIGHT=1200 \ 65 | --ienv.ML_MODEL_IMAGE_WIDTH=1200 \ 66 | --ienv.ML_MODEL_INPUT_DATA_TYPE=float32 \ 67 | --ienv.ML_MODEL_DATA_TYPE=float16 \ 68 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 69 | --ienv.ML_MODEL_NORMALIZE_DATA=YES \ 70 | --ienv.ML_MODEL_NORMALIZE_LOWER=0.0 \ 71 | --ienv.ML_MODEL_NORMALIZE_UPPER=1.0 \ 72 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 73 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="0.485 0.456 0.406" \ 74 | --ienv.ML_MODEL_GIVEN_CHANNEL_STDS="0.229 0.224 0.225" \ 75 | --ienv.ML_MODEL_MAX_BATCH_SIZE=2 \ 76 | --ienv.ML_MODEL_MAX_PREDICTIONS=200 \ 77 | --ienv.ML_MODEL_CLASS_LABELS=/datasets/tensorrt_plans_for_Xavier/flatlabels.txt \ 78 | --ienv.ML_MODEL_SKIPS_ORIGINAL_DATASET_CLASSES=12,26,29,30,45,66,68,69,71,83 \ 79 | --ienv.ML_MODEL_TENSORRT_PLUGIN=/datasets/tensorrt_plans_for_Xavier/libnmsoptplugin.so 80 | ``` 81 | 82 | ```bash 83 | ck detect soft:model.tensorrt --full_path=/datasets/tensorrt_plans_for_Xavier/ssd-large-MultiStream-gpu-b2-fp32.plan \ 84 | --extra_tags=maxbatch.2,fp32,ssd-resnet,object-detection,converted-by-nvidia \ 85 | --cus.version=ssd-resnet_nvidia_fp32 \ 86 | --ienv.ML_MODEL_COLOUR_CHANNELS_BGR=NO \ 87 | --ienv.ML_MODEL_IMAGE_HEIGHT=1200 \ 88 | --ienv.ML_MODEL_IMAGE_WIDTH=1200 \ 89 | --ienv.ML_MODEL_INPUT_DATA_TYPE=float32 \ 90 | --ienv.ML_MODEL_DATA_TYPE=float32 \ 91 | --ienv.ML_MODEL_DATA_LAYOUT=NCHW \ 92 | --ienv.ML_MODEL_NORMALIZE_DATA=YES \ 93 | --ienv.ML_MODEL_NORMALIZE_LOWER=0.0 \ 94 | --ienv.ML_MODEL_NORMALIZE_UPPER=1.0 \ 95 | --ienv.ML_MODEL_SUBTRACT_MEAN=YES \ 96 | --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS="0.485 0.456 0.406" \ 97 | --ienv.ML_MODEL_GIVEN_CHANNEL_STDS="0.229 0.224 0.225" \ 98 | --ienv.ML_MODEL_MAX_BATCH_SIZE=2 \ 99 | --ienv.ML_MODEL_MAX_PREDICTIONS=200 \ 100 | --ienv.ML_MODEL_CLASS_LABELS=/datasets/tensorrt_plans_for_Xavier/flatlabels.txt \ 101 | --ienv.ML_MODEL_SKIPS_ORIGINAL_DATASET_CLASSES=12,26,29,30,45,66,68,69,71,83 \ 102 | --ienv.ML_MODEL_TENSORRT_PLUGIN=/datasets/tensorrt_plans_for_Xavier/libnmsoptplugin.so 103 | ``` 104 | 105 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-tf/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "install_env": { 5 | "PACKAGE_NAME": "converted_model.trt", 6 | "PACKAGE_VERSION": "1" 7 | }, 8 | "no_os_in_suggested_path": "yes", 9 | "no_ver_in_suggested_path": "yes", 10 | "package_name": "TensorRT-from-TF model", 11 | "skip_file_check": "yes" 12 | }, 13 | "deps": { 14 | "lib-python-tensorrt": { 15 | "local": "yes", 16 | "name": "Python TensorRT library", 17 | "sort": 20, 18 | "tags": "lib,python-package,tensorrt" 19 | }, 20 | "lib-python-tensorflow": { 21 | "local": "yes", 22 | "name": "Python TensorFlow library", 23 | "sort": 30, 24 | "tags": "lib,tensorflow,vprebuilt" 25 | }, 26 | "model-source": { 27 | "local": "yes", 28 | "name": "Original TensorFlow model", 29 | "sort": 10, 30 | "tags": "tf,model", 31 | "update_tags_if_env": { 32 | "image-classification,mobilenet,non-quantized,nhwc": [ 33 | { 34 | "_MODEL_TO_CONVERT": "mobilenet" 35 | } 36 | ], 37 | "image-classification,mobilenet,quantized,nhwc": [ 38 | { 39 | "_MODEL_TO_CONVERT": "mobilenet_quant" 40 | } 41 | ], 42 | "image-classification,resnet": [ 43 | { 44 | "_MODEL_TO_CONVERT": "resnet50" 45 | } 46 | ] 47 | } 48 | } 49 | }, 50 | "end_full_path_universal": "converted_model.trt", 51 | "process_script": "install", 52 | "soft_uoa": "model.tensorrt", 53 | "suggested_path": "model-tensorrt-converted-from-tf", 54 | "tags": [ 55 | "model", 56 | "image-classification", 57 | "tensorrt", 58 | "trt", 59 | "converted", 60 | "converted-from-tf" 61 | ], 62 | "variations": { 63 | "fp16": { 64 | "extra_env": { 65 | "ML_MODEL_DATA_TYPE": "fp16", 66 | "ML_MODEL_INPUT_DATA_TYPE": "float32" 67 | } 68 | }, 69 | "fp32": { 70 | "extra_env": { 71 | "ML_MODEL_DATA_TYPE": "fp32", 72 | "ML_MODEL_INPUT_DATA_TYPE": "float32" 73 | }, 74 | "on_by_default": "yes" 75 | }, 76 | "maxbatch.1": { 77 | "extra_env": { 78 | "ML_MODEL_MAX_BATCH_SIZE": "1" 79 | }, 80 | "on_by_default": "yes" 81 | }, 82 | "maxbatch.10": { 83 | "extra_env": { 84 | "ML_MODEL_MAX_BATCH_SIZE": "10" 85 | } 86 | }, 87 | "maxbatch.11": { 88 | "extra_env": { 89 | "ML_MODEL_MAX_BATCH_SIZE": "11" 90 | } 91 | }, 92 | "maxbatch.12": { 93 | "extra_env": { 94 | "ML_MODEL_MAX_BATCH_SIZE": "12" 95 | } 96 | }, 97 | "maxbatch.13": { 98 | "extra_env": { 99 | "ML_MODEL_MAX_BATCH_SIZE": "13" 100 | } 101 | }, 102 | "maxbatch.14": { 103 | "extra_env": { 104 | "ML_MODEL_MAX_BATCH_SIZE": "14" 105 | } 106 | }, 107 | "maxbatch.15": { 108 | "extra_env": { 109 | "ML_MODEL_MAX_BATCH_SIZE": "15" 110 | } 111 | }, 112 | "maxbatch.16": { 113 | "extra_env": { 114 | "ML_MODEL_MAX_BATCH_SIZE": "16" 115 | } 116 | }, 117 | "maxbatch.17": { 118 | "extra_env": { 119 | "ML_MODEL_MAX_BATCH_SIZE": "17" 120 | } 121 | }, 122 | "maxbatch.18": { 123 | "extra_env": { 124 | "ML_MODEL_MAX_BATCH_SIZE": "18" 125 | } 126 | }, 127 | "maxbatch.19": { 128 | "extra_env": { 129 | "ML_MODEL_MAX_BATCH_SIZE": "19" 130 | } 131 | }, 132 | "maxbatch.2": { 133 | "extra_env": { 134 | "ML_MODEL_MAX_BATCH_SIZE": "2" 135 | } 136 | }, 137 | "maxbatch.20": { 138 | "extra_env": { 139 | "ML_MODEL_MAX_BATCH_SIZE": "20" 140 | } 141 | }, 142 | "maxbatch.3": { 143 | "extra_env": { 144 | "ML_MODEL_MAX_BATCH_SIZE": "3" 145 | } 146 | }, 147 | "maxbatch.4": { 148 | "extra_env": { 149 | "ML_MODEL_MAX_BATCH_SIZE": "4" 150 | } 151 | }, 152 | "maxbatch.5": { 153 | "extra_env": { 154 | "ML_MODEL_MAX_BATCH_SIZE": "5" 155 | } 156 | }, 157 | "maxbatch.6": { 158 | "extra_env": { 159 | "ML_MODEL_MAX_BATCH_SIZE": "6" 160 | } 161 | }, 162 | "maxbatch.7": { 163 | "extra_env": { 164 | "ML_MODEL_MAX_BATCH_SIZE": "7" 165 | } 166 | }, 167 | "maxbatch.8": { 168 | "extra_env": { 169 | "ML_MODEL_MAX_BATCH_SIZE": "8" 170 | } 171 | }, 172 | "maxbatch.9": { 173 | "extra_env": { 174 | "ML_MODEL_MAX_BATCH_SIZE": "9" 175 | } 176 | }, 177 | "mobilenet": { 178 | "extra_customize": { 179 | "package_name": "TensorRT-from-TF model (MobileNet)" 180 | }, 181 | "extra_env": { 182 | "ML_MODEL_DATA_LAYOUT": "NHWC", 183 | "ML_MODEL_NORMALIZE_DATA": "YES", 184 | "ML_MODEL_SUBTRACT_MEAN": "NO", 185 | "_MODEL_TO_CONVERT": "mobilenet" 186 | } 187 | }, 188 | "mobilenet-quant": { 189 | "extra_customize": { 190 | "package_name": "TensorRT-from-TF model (MobileNet quantized)" 191 | }, 192 | "extra_env": { 193 | "ML_MODEL_DATA_LAYOUT": "NHWC", 194 | "ML_MODEL_NORMALIZE_DATA": "NO", 195 | "ML_MODEL_SUBTRACT_MEAN": "YES", 196 | "_MODEL_TO_CONVERT": "mobilenet_quant" 197 | } 198 | }, 199 | "resnet": { 200 | "extra_customize": { 201 | "package_name": "TensorRT-from-TF model (ResNet50)" 202 | }, 203 | "extra_env": { 204 | "ML_MODEL_DATA_LAYOUT": "NHWC", 205 | "ML_MODEL_GIVEN_CHANNEL_MEANS": "123.68 116.78 103.94", 206 | "ML_MODEL_NORMALIZE_DATA": "NO", 207 | "ML_MODEL_SUBTRACT_MEAN": "YES", 208 | "_MODEL_TO_CONVERT": "resnet50" 209 | }, 210 | "on_by_default": "yes" 211 | } 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /program/image-classification-tensorrt-py/tensorrt_classify_preprocessed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import time 5 | import os 6 | import shutil 7 | import numpy as np 8 | 9 | from imagenet_helper import (load_preprocessed_batch, image_list, class_labels, 10 | MODEL_DATA_LAYOUT, MODEL_COLOURS_BGR, MODEL_INPUT_DATA_TYPE, MODEL_DATA_TYPE, MODEL_USE_DLA, 11 | MODEL_IMAGE_WIDTH, MODEL_IMAGE_HEIGHT, MODEL_IMAGE_CHANNELS, 12 | IMAGE_DIR, IMAGE_LIST_FILE, MODEL_NORMALIZE_DATA, SUBTRACT_MEAN, GIVEN_CHANNEL_MEANS, BATCH_SIZE) 13 | 14 | from tensorrt_helper import (initialize_predictor, inference_for_given_batch) 15 | 16 | 17 | ## Model properties: 18 | # 19 | MODEL_PATH = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME'] 20 | MODEL_SOFTMAX_LAYER = os.getenv('CK_ENV_ONNX_MODEL_OUTPUT_LAYER_NAME', os.getenv('CK_ENV_TENSORFLOW_MODEL_OUTPUT_LAYER_NAME', '')) 21 | 22 | 23 | ## Writing the results out: 24 | # 25 | RESULTS_DIR = os.getenv('CK_RESULTS_DIR') 26 | FULL_REPORT = os.getenv('CK_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0') 27 | 28 | ## Processing in batches: 29 | # 30 | BATCH_COUNT = int(os.getenv('CK_BATCH_COUNT', 1)) 31 | 32 | 33 | def main(): 34 | setup_time_begin = time.time() 35 | 36 | # Cleanup results directory 37 | if os.path.isdir(RESULTS_DIR): 38 | shutil.rmtree(RESULTS_DIR) 39 | os.mkdir(RESULTS_DIR) 40 | 41 | pycuda_context, max_batch_size, input_volume, output_volume, num_layers = initialize_predictor() 42 | num_classes = len(class_labels) 43 | 44 | print('Images dir: ' + IMAGE_DIR) 45 | print('Image list file: ' + IMAGE_LIST_FILE) 46 | print('Batch size: {}'.format(BATCH_SIZE)) 47 | print('Batch count: {}'.format(BATCH_COUNT)) 48 | print('Results dir: ' + RESULTS_DIR); 49 | print('Normalize: {}'.format(MODEL_NORMALIZE_DATA)) 50 | print('Subtract mean: {}'.format(SUBTRACT_MEAN)) 51 | print('Per-channel means to subtract: {}'.format(GIVEN_CHANNEL_MEANS)) 52 | 53 | print("Data layout: {}".format(MODEL_DATA_LAYOUT) ) 54 | print("DLA mode used: {}".format(MODEL_USE_DLA) ) 55 | print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT)) 56 | print('Model image width: {}'.format(MODEL_IMAGE_WIDTH)) 57 | print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS)) 58 | print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE)) 59 | print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE)) 60 | print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR)) 61 | print('Model max_batch_size: {}'.format(max_batch_size)) 62 | print('Model output volume (number of outputs per one prediction): {}'.format(output_volume)) 63 | print('Model num_layers: {}'.format(num_layers)) 64 | print('Number of class_labels: {}'.format(num_classes)) 65 | print("") 66 | 67 | 68 | setup_time = time.time() - setup_time_begin 69 | 70 | # Run batched mode 71 | test_time_begin = time.time() 72 | image_index = 0 73 | total_load_time = 0 74 | total_classification_time = 0 75 | first_classification_time = 0 76 | images_loaded = 0 77 | 78 | for batch_index in range(BATCH_COUNT): 79 | batch_number = batch_index+1 80 | 81 | begin_time = time.time() 82 | batch_data, image_index = load_preprocessed_batch(image_list, image_index) 83 | 84 | load_time = time.time() - begin_time 85 | total_load_time += load_time 86 | images_loaded += BATCH_SIZE 87 | 88 | trimmed_batch_results, inference_time_s = inference_for_given_batch(batch_data) 89 | 90 | print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format( 91 | batch_number, BATCH_COUNT, load_time*1000, inference_time_s*1000)) 92 | 93 | total_classification_time += inference_time_s 94 | # Remember first batch prediction time 95 | if batch_index == 0: 96 | first_classification_time = inference_time_s 97 | 98 | # Process results 99 | for index_in_batch in range(BATCH_SIZE): 100 | one_batch_result = trimmed_batch_results[index_in_batch] 101 | if output_volume==1: 102 | arg_max = one_batch_result[0] 103 | softmax_vector = [0]*arg_max + [1] + [0]*(num_classes-arg_max-1) 104 | else: 105 | softmax_vector = one_batch_result[-num_classes:] # skipping the background class on the left (if present) 106 | global_index = batch_index * BATCH_SIZE + index_in_batch 107 | res_file = os.path.join(RESULTS_DIR, image_list[global_index]) 108 | with open(res_file + '.txt', 'w') as f: 109 | for prob in softmax_vector: 110 | f.write('{}\n'.format(prob)) 111 | 112 | pycuda_context.pop() 113 | 114 | test_time = time.time() - test_time_begin 115 | 116 | if BATCH_COUNT > 1: 117 | avg_classification_time = (total_classification_time - first_classification_time) / (images_loaded - BATCH_SIZE) 118 | else: 119 | avg_classification_time = total_classification_time / images_loaded 120 | 121 | avg_load_time = total_load_time / images_loaded 122 | 123 | # Store benchmarking results: 124 | output_dict = { 125 | 'setup_time_s': setup_time, 126 | 'test_time_s': test_time, 127 | 'images_load_time_total_s': total_load_time, 128 | 'images_load_time_avg_s': avg_load_time, 129 | 'prediction_time_total_s': total_classification_time, 130 | 'prediction_time_avg_s': avg_classification_time, 131 | 132 | 'avg_time_ms': avg_classification_time * 1000, 133 | 'avg_fps': 1.0 / avg_classification_time, 134 | 'batch_time_ms': avg_classification_time * 1000 * BATCH_SIZE, 135 | 'batch_size': BATCH_SIZE, 136 | } 137 | with open('tmp-ck-timer.json', 'w') as out_file: 138 | json.dump(output_dict, out_file, indent=4, sort_keys=True) 139 | 140 | 141 | if __name__ == '__main__': 142 | main() 143 | -------------------------------------------------------------------------------- /soft/lib.python.tensorrt-helper/tensorrt_helper/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import numpy as np 5 | import time 6 | 7 | import tensorrt as trt 8 | import pycuda.driver as cuda 9 | import pycuda.autoinit 10 | import pycuda.tools 11 | 12 | 13 | ## Model properties: 14 | # 15 | MODEL_PATH = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME'] 16 | MODEL_PLUGIN_PATH = os.getenv('CK_ENV_TENSORRT_PLUGIN_PATH', os.getenv('ML_MODEL_TENSORRT_PLUGIN','')) 17 | MODEL_USE_DLA = os.getenv('ML_MODEL_USE_DLA', 'NO') in ('YES', 'yes', 'ON', 'on', '1') 18 | MODEL_SOFTMAX_LAYER = os.getenv('CK_ENV_ONNX_MODEL_OUTPUT_LAYER_NAME', os.getenv('CK_ENV_TENSORFLOW_MODEL_OUTPUT_LAYER_NAME', '')) 19 | 20 | ## Processing in batches: 21 | # 22 | BATCH_SIZE = int(os.getenv('CK_BATCH_SIZE', 1)) 23 | 24 | 25 | if MODEL_PLUGIN_PATH: 26 | import ctypes 27 | if not os.path.isfile(MODEL_PLUGIN_PATH): 28 | raise IOError("{}\n{}\n".format( 29 | "Failed to load library ({}).".format(MODEL_PLUGIN_PATH), 30 | "Please build the plugin." 31 | )) 32 | ctypes.CDLL(MODEL_PLUGIN_PATH) 33 | 34 | 35 | def initialize_predictor(): 36 | global pycuda_context 37 | global d_inputs, h_d_outputs, h_output, model_bindings, cuda_stream 38 | global input_volume, output_volume 39 | global trt_context 40 | global BATCH_SIZE 41 | global max_batch_size 42 | global trt_version 43 | 44 | # Load the TensorRT model from file 45 | pycuda_context = pycuda.tools.make_default_context() 46 | 47 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING) 48 | try: 49 | trt.init_libnvinfer_plugins(TRT_LOGGER, "") 50 | with open(MODEL_PATH, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: 51 | serialized_engine = f.read() 52 | trt_engine = runtime.deserialize_cuda_engine(serialized_engine) 53 | trt_version = [ int(v) for v in trt.__version__.split('.') ] 54 | print('[TensorRT v{}.{}] successfully loaded'.format(trt_version[0], trt_version[1])) 55 | except: 56 | pycuda_context.pop() 57 | raise RuntimeError('TensorRT model file {} is not found or corrupted'.format(MODEL_PATH)) 58 | 59 | max_batch_size = trt_engine.max_batch_size 60 | 61 | if trt_version[0] >= 7 and BATCH_SIZE>1: 62 | pycuda_context.pop() 63 | raise RuntimeError("Desired batch_size ({}) is not yet supported in TensorRT {}".format(BATCH_SIZE,trt_version[0])) 64 | 65 | if BATCH_SIZE>max_batch_size: 66 | pycuda_context.pop() 67 | raise RuntimeError("Desired batch_size ({}) exceeds max_batch_size of the model ({})".format(BATCH_SIZE,max_batch_size)) 68 | 69 | trt_context = trt_engine.create_execution_context() 70 | 71 | d_inputs, h_d_outputs, model_bindings = [], [], [] 72 | for interface_layer in trt_engine: 73 | idx = trt_engine.get_binding_index(interface_layer) 74 | dtype = trt_engine.get_binding_dtype(interface_layer) 75 | shape = tuple(abs(i) for i in trt_engine.get_binding_shape(interface_layer)) 76 | fmt = trt_engine.get_binding_format(idx) if trt_version[0] >= 6 else None 77 | 78 | if fmt and fmt == trt.TensorFormat.CHW4 and trt_engine.binding_is_input(interface_layer): 79 | shape[-3] = ((shape[-3] - 1) // 4 + 1) * 4 80 | size = trt.volume(shape) * max_batch_size 81 | 82 | dev_mem = cuda.mem_alloc(size * dtype.itemsize) 83 | model_bindings.append( int(dev_mem) ) 84 | 85 | if trt_engine.binding_is_input(interface_layer): 86 | if trt_version[0] >= 6: 87 | trt_context.set_binding_shape(idx, shape) 88 | interface_type = 'Input' 89 | d_inputs.append(dev_mem) 90 | model_input_shape = shape 91 | else: 92 | interface_type = 'Output' 93 | host_mem = cuda.pagelocked_empty(size, trt.nptype(dtype)) 94 | h_d_outputs.append({ 'host_mem': host_mem, 'dev_mem': dev_mem }) 95 | if MODEL_SOFTMAX_LAYER=='' or interface_layer == MODEL_SOFTMAX_LAYER: 96 | model_output_shape = shape 97 | h_output = host_mem 98 | 99 | print("{} layer {}: dtype={}, shape={}, elements_per_max_batch={}".format(interface_type, interface_layer, dtype, shape, size)) 100 | 101 | cuda_stream = cuda.Stream() 102 | input_volume = trt.volume(model_input_shape) # total number of monochromatic subpixels (before batching) 103 | output_volume = trt.volume(model_output_shape) # total number of elements in one image prediction (before batching) 104 | num_layers = trt_engine.num_layers 105 | 106 | return pycuda_context, max_batch_size, input_volume, output_volume, num_layers 107 | 108 | 109 | def inference_for_given_batch(batch_data): 110 | global d_inputs, h_d_outputs, h_output, model_bindings, cuda_stream 111 | global trt_context 112 | global max_batch_size 113 | global trt_version 114 | 115 | actual_batch_size = len(batch_data) 116 | if MODEL_USE_DLA and max_batch_size>actual_batch_size: 117 | batch_data = np.pad(batch_data, ((0,max_batch_size-actual_batch_size), (0,0), (0,0), (0,0)), 'constant') 118 | pseudo_batch_size = max_batch_size 119 | else: 120 | pseudo_batch_size = actual_batch_size 121 | 122 | flat_batch = np.ravel(batch_data) 123 | 124 | begin_inference_timestamp = time.time() 125 | 126 | cuda.memcpy_htod_async(d_inputs[0], flat_batch, cuda_stream) # assuming one input layer for image classification 127 | if trt_version[0] >= 7: 128 | trt_context.execute_async_v2(bindings=model_bindings, stream_handle=cuda_stream.handle) 129 | else: 130 | trt_context.execute_async(bindings=model_bindings, batch_size=pseudo_batch_size, stream_handle=cuda_stream.handle) 131 | 132 | for output in h_d_outputs: 133 | cuda.memcpy_dtoh_async(output['host_mem'], output['dev_mem'], cuda_stream) 134 | cuda_stream.synchronize() 135 | 136 | inference_time_s = time.time() - begin_inference_timestamp 137 | 138 | ## first dimension contains actual_batch_size vectors, further format depends on the task: 139 | # 140 | trimmed_batch_results = np.split(h_output, max_batch_size)[:actual_batch_size] 141 | 142 | return trimmed_batch_results, inference_time_s 143 | 144 | -------------------------------------------------------------------------------- /package/model-tensorrt-convert-from-onnx/.cm/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "check_exit_status": "yes", 3 | "customize": { 4 | "install_env": { 5 | "PACKAGE_NAME": "converted_model.trt", 6 | "PACKAGE_VERSION": "1" 7 | }, 8 | "no_os_in_suggested_path": "yes", 9 | "no_ver_in_suggested_path": "yes", 10 | "package_name": "TensorRT-from-ONNX model", 11 | "skip_file_check": "yes" 12 | }, 13 | "deps": { 14 | "cuda": { 15 | "local": "yes", 16 | "name": "CUDA runtime", 17 | "sort": 10, 18 | "tags": "compiler,cuda" 19 | }, 20 | "lib-python-tensorrt": { 21 | "local": "yes", 22 | "name": "Python TensorRT library", 23 | "sort": 20, 24 | "tags": "lib,python-package,tensorrt" 25 | }, 26 | "model-source": { 27 | "local": "yes", 28 | "name": "Original ONNX model", 29 | "sort": 10, 30 | "tags": "onnx,model", 31 | "update_tags_if_env": { 32 | "image-classification,resnet": [ { "_MODEL_TO_CONVERT": "tf2onnx_resnet50" } ], 33 | "image-classification,mobilenet,non-quantized,nchw": [ { "_MODEL_TO_CONVERT": "tf2onnx_mobilenet" } ], 34 | "image-classification,mobilenet,quantized,nchw": [ { "_MODEL_TO_CONVERT": "tf2onnx_mobilenet_quant" } ] 35 | } 36 | } 37 | }, 38 | "end_full_path_universal": "converted_model.trt", 39 | "process_script": "install", 40 | "soft_uoa": "model.tensorrt", 41 | "suggested_path": "model-tensorrt-converted-from-onnx", 42 | "tags": [ 43 | "model", 44 | "image-classification", 45 | "tensorrt", 46 | "trt", 47 | "converted", 48 | "converted-from-onnx" 49 | ], 50 | "variations": { 51 | "maxbatch.1": { 52 | "on_by_default": "yes", 53 | "extra_env": { 54 | "ML_MODEL_MAX_BATCH_SIZE": "1" 55 | } 56 | }, 57 | "maxbatch.2": { 58 | "extra_env": { 59 | "ML_MODEL_MAX_BATCH_SIZE": "2" 60 | } 61 | }, 62 | "maxbatch.3": { 63 | "extra_env": { 64 | "ML_MODEL_MAX_BATCH_SIZE": "3" 65 | } 66 | }, 67 | "maxbatch.4": { 68 | "extra_env": { 69 | "ML_MODEL_MAX_BATCH_SIZE": "4" 70 | } 71 | }, 72 | "maxbatch.5": { 73 | "extra_env": { 74 | "ML_MODEL_MAX_BATCH_SIZE": "5" 75 | } 76 | }, 77 | "maxbatch.6": { 78 | "extra_env": { 79 | "ML_MODEL_MAX_BATCH_SIZE": "6" 80 | } 81 | }, 82 | "maxbatch.7": { 83 | "extra_env": { 84 | "ML_MODEL_MAX_BATCH_SIZE": "7" 85 | } 86 | }, 87 | "maxbatch.8": { 88 | "extra_env": { 89 | "ML_MODEL_MAX_BATCH_SIZE": "8" 90 | } 91 | }, 92 | "maxbatch.9": { 93 | "extra_env": { 94 | "ML_MODEL_MAX_BATCH_SIZE": "9" 95 | } 96 | }, 97 | "maxbatch.10": { 98 | "extra_env": { 99 | "ML_MODEL_MAX_BATCH_SIZE": "10" 100 | } 101 | }, 102 | "maxbatch.11": { 103 | "extra_env": { 104 | "ML_MODEL_MAX_BATCH_SIZE": "11" 105 | } 106 | }, 107 | "maxbatch.12": { 108 | "extra_env": { 109 | "ML_MODEL_MAX_BATCH_SIZE": "12" 110 | } 111 | }, 112 | "maxbatch.13": { 113 | "extra_env": { 114 | "ML_MODEL_MAX_BATCH_SIZE": "13" 115 | } 116 | }, 117 | "maxbatch.14": { 118 | "extra_env": { 119 | "ML_MODEL_MAX_BATCH_SIZE": "14" 120 | } 121 | }, 122 | "maxbatch.15": { 123 | "extra_env": { 124 | "ML_MODEL_MAX_BATCH_SIZE": "15" 125 | } 126 | }, 127 | "maxbatch.16": { 128 | "extra_env": { 129 | "ML_MODEL_MAX_BATCH_SIZE": "16" 130 | } 131 | }, 132 | "maxbatch.17": { 133 | "extra_env": { 134 | "ML_MODEL_MAX_BATCH_SIZE": "17" 135 | } 136 | }, 137 | "maxbatch.18": { 138 | "extra_env": { 139 | "ML_MODEL_MAX_BATCH_SIZE": "18" 140 | } 141 | }, 142 | "maxbatch.19": { 143 | "extra_env": { 144 | "ML_MODEL_MAX_BATCH_SIZE": "19" 145 | } 146 | }, 147 | "maxbatch.20": { 148 | "extra_env": { 149 | "ML_MODEL_MAX_BATCH_SIZE": "20" 150 | } 151 | }, 152 | "maxbatch.21": { 153 | "extra_env": { 154 | "ML_MODEL_MAX_BATCH_SIZE": "21" 155 | } 156 | }, 157 | "maxbatch.22": { 158 | "extra_env": { 159 | "ML_MODEL_MAX_BATCH_SIZE": "22" 160 | } 161 | }, 162 | "maxbatch.23": { 163 | "extra_env": { 164 | "ML_MODEL_MAX_BATCH_SIZE": "23" 165 | } 166 | }, 167 | "maxbatch.24": { 168 | "extra_env": { 169 | "ML_MODEL_MAX_BATCH_SIZE": "24" 170 | } 171 | }, 172 | "maxbatch.25": { 173 | "extra_env": { 174 | "ML_MODEL_MAX_BATCH_SIZE": "25" 175 | } 176 | }, 177 | "maxbatch.26": { 178 | "extra_env": { 179 | "ML_MODEL_MAX_BATCH_SIZE": "26" 180 | } 181 | }, 182 | "maxbatch.27": { 183 | "extra_env": { 184 | "ML_MODEL_MAX_BATCH_SIZE": "27" 185 | } 186 | }, 187 | "maxbatch.28": { 188 | "extra_env": { 189 | "ML_MODEL_MAX_BATCH_SIZE": "28" 190 | } 191 | }, 192 | "maxbatch.29": { 193 | "extra_env": { 194 | "ML_MODEL_MAX_BATCH_SIZE": "29" 195 | } 196 | }, 197 | "maxbatch.30": { 198 | "extra_env": { 199 | "ML_MODEL_MAX_BATCH_SIZE": "30" 200 | } 201 | }, 202 | "maxbatch.31": { 203 | "extra_env": { 204 | "ML_MODEL_MAX_BATCH_SIZE": "31" 205 | } 206 | }, 207 | "maxbatch.32": { 208 | "extra_env": { 209 | "ML_MODEL_MAX_BATCH_SIZE": "32" 210 | } 211 | }, 212 | "fp16": { 213 | "extra_env": { 214 | "ML_MODEL_DATA_TYPE": "fp16", 215 | "ML_MODEL_INPUT_DATA_TYPE": "float32" 216 | } 217 | }, 218 | "fp32": { 219 | "on_by_default": "yes", 220 | "extra_env": { 221 | "ML_MODEL_DATA_TYPE": "fp32", 222 | "ML_MODEL_INPUT_DATA_TYPE": "float32" 223 | } 224 | }, 225 | "resnet": { 226 | "on_by_default": "yes", 227 | "extra_customize": { 228 | "package_name": "TensorRT-from-ONNX model (ResNet50)" 229 | }, 230 | "extra_env": { 231 | "ML_MODEL_DATA_LAYOUT": "NCHW", 232 | "ML_MODEL_NORMALIZE_DATA": "NO", 233 | "ML_MODEL_SUBTRACT_MEAN": "YES", 234 | "ML_MODEL_GIVEN_CHANNEL_MEANS": "123.68 116.78 103.94", 235 | "_MODEL_TO_CONVERT": "tf2onnx_resnet50" 236 | } 237 | }, 238 | "mobilenet": { 239 | "extra_customize": { 240 | "package_name": "TensorRT-from-ONNX model (MobileNet)" 241 | }, 242 | "extra_env": { 243 | "ML_MODEL_DATA_LAYOUT": "NCHW", 244 | "ML_MODEL_NORMALIZE_DATA": "YES", 245 | "ML_MODEL_SUBTRACT_MEAN": "NO", 246 | "_MODEL_TO_CONVERT": "tf2onnx_mobilenet" 247 | } 248 | }, 249 | "mobilenet-quant": { 250 | "extra_customize": { 251 | "package_name": "TensorRT-from-ONNX model (MobileNet quantized)" 252 | }, 253 | "extra_env": { 254 | "ML_MODEL_DATA_LAYOUT": "NCHW", 255 | "ML_MODEL_NORMALIZE_DATA": "NO", 256 | "ML_MODEL_SUBTRACT_MEAN": "YES", 257 | "_MODEL_TO_CONVERT": "tf2onnx_mobilenet_quant" 258 | } 259 | } 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /program/object-detection-tensorrt-py/tensorrt_detect_preprocessed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import time 5 | import os 6 | import shutil 7 | import numpy as np 8 | 9 | from coco_helper import (load_preprocessed_batch, image_filenames, original_w_h, 10 | class_labels, num_classes, bg_class_offset, class_map, 11 | MODEL_DATA_LAYOUT, MODEL_COLOURS_BGR, MODEL_INPUT_DATA_TYPE, MODEL_DATA_TYPE, MODEL_USE_DLA, 12 | MODEL_IMAGE_WIDTH, MODEL_IMAGE_HEIGHT, MODEL_IMAGE_CHANNELS, 13 | IMAGE_DIR, IMAGE_LIST_FILE, MODEL_NORMALIZE_DATA, SUBTRACT_MEAN, GIVEN_CHANNEL_MEANS, BATCH_SIZE, BATCH_COUNT) 14 | 15 | from tensorrt_helper import (initialize_predictor, inference_for_given_batch) 16 | 17 | 18 | ## Post-detection filtering by confidence score: 19 | # 20 | SCORE_THRESHOLD = float(os.getenv('CK_DETECTION_THRESHOLD', 0.0)) 21 | 22 | 23 | ## Model properties: 24 | # 25 | MODEL_MAX_PREDICTIONS = int(os.getenv('ML_MODEL_MAX_PREDICTIONS', 100)) 26 | 27 | 28 | ## Writing the results out: 29 | # 30 | CUR_DIR = os.getcwd() 31 | DETECTIONS_OUT_DIR = os.path.join(CUR_DIR, os.environ['CK_DETECTIONS_OUT_DIR']) 32 | ANNOTATIONS_OUT_DIR = os.path.join(CUR_DIR, os.environ['CK_ANNOTATIONS_OUT_DIR']) 33 | RESULTS_OUT_DIR = os.path.join(CUR_DIR, os.environ['CK_RESULTS_OUT_DIR']) 34 | FULL_REPORT = os.getenv('CK_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0') 35 | 36 | 37 | def main(): 38 | setup_time_begin = time.time() 39 | 40 | # Cleanup results directory 41 | if os.path.isdir(DETECTIONS_OUT_DIR): 42 | shutil.rmtree(DETECTIONS_OUT_DIR) 43 | os.mkdir(DETECTIONS_OUT_DIR) 44 | 45 | pycuda_context, max_batch_size, input_volume, output_volume, num_layers = initialize_predictor() 46 | 47 | print('Images dir: ' + IMAGE_DIR) 48 | print('Image list file: ' + IMAGE_LIST_FILE) 49 | print('Batch size: {}'.format(BATCH_SIZE)) 50 | print('Batch count: {}'.format(BATCH_COUNT)) 51 | print('Detections dir: ' + DETECTIONS_OUT_DIR); 52 | print('Normalize: {}'.format(MODEL_NORMALIZE_DATA)) 53 | print('Subtract mean: {}'.format(SUBTRACT_MEAN)) 54 | print('Per-channel means to subtract: {}'.format(GIVEN_CHANNEL_MEANS)) 55 | 56 | print("Data layout: {}".format(MODEL_DATA_LAYOUT) ) 57 | print("DLA mode used: {}".format(MODEL_USE_DLA) ) 58 | print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT)) 59 | print('Model image width: {}'.format(MODEL_IMAGE_WIDTH)) 60 | print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS)) 61 | print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE)) 62 | print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE)) 63 | print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR)) 64 | print('Model max_batch_size: {}'.format(max_batch_size)) 65 | print('Model output volume (number of outputs per one prediction): {}'.format(output_volume)) 66 | print('Model num_layers: {}'.format(num_layers)) 67 | print('Number of class_labels: {}'.format(num_classes)) 68 | print('Post-detection confidence score threshold: {}'.format(SCORE_THRESHOLD)) 69 | print("") 70 | 71 | setup_time = time.time() - setup_time_begin 72 | 73 | # Run batched mode 74 | test_time_begin = time.time() 75 | total_load_time = 0 76 | next_batch_offset = 0 77 | total_inference_time = 0 78 | first_inference_time = 0 79 | images_loaded = 0 80 | 81 | for batch_index in range(BATCH_COUNT): 82 | batch_number = batch_index+1 83 | 84 | begin_time = time.time() 85 | current_batch_offset = next_batch_offset 86 | batch_data, next_batch_offset = load_preprocessed_batch(image_filenames, current_batch_offset) 87 | 88 | load_time = time.time() - begin_time 89 | total_load_time += load_time 90 | images_loaded += BATCH_SIZE 91 | 92 | trimmed_batch_results, inference_time_s = inference_for_given_batch(batch_data) 93 | 94 | print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format( 95 | batch_number, BATCH_COUNT, load_time*1000, inference_time_s*1000)) 96 | 97 | total_inference_time += inference_time_s 98 | # Remember inference_time for the first batch 99 | if batch_index == 0: 100 | first_inference_time = inference_time_s 101 | 102 | # Process results 103 | for index_in_batch in range(BATCH_SIZE): 104 | single_image_predictions = trimmed_batch_results[index_in_batch] 105 | num_boxes = single_image_predictions[MODEL_MAX_PREDICTIONS*7].view('int32') 106 | global_image_index = current_batch_offset + index_in_batch 107 | width_orig, height_orig = original_w_h[global_image_index] 108 | 109 | filename_orig = image_filenames[global_image_index] 110 | detections_filename = os.path.splitext(filename_orig)[0] + '.txt' 111 | detections_filepath = os.path.join(DETECTIONS_OUT_DIR, detections_filename) 112 | 113 | with open(detections_filepath, 'w') as det_file: 114 | det_file.write('{:d} {:d}\n'.format(width_orig, height_orig)) 115 | 116 | for row in range(num_boxes): 117 | (image_id, ymin, xmin, ymax, xmax, confidence, class_number) = single_image_predictions[row*7:(row+1)*7] 118 | 119 | if confidence >= SCORE_THRESHOLD: 120 | class_number = int(class_number) 121 | if class_map: 122 | class_number = class_map[class_number] 123 | 124 | image_id = int(image_id) 125 | x1 = xmin * width_orig 126 | y1 = ymin * height_orig 127 | x2 = xmax * width_orig 128 | y2 = ymax * height_orig 129 | class_label = class_labels[class_number - bg_class_offset] 130 | det_file.write('{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'.format( 131 | x1, y1, x2, y2, confidence, class_number, class_label)) 132 | 133 | pycuda_context.pop() 134 | 135 | test_time = time.time() - test_time_begin 136 | 137 | if BATCH_COUNT > 1: 138 | avg_inference_time = (total_inference_time - first_inference_time) / (images_loaded - BATCH_SIZE) 139 | else: 140 | avg_inference_time = total_inference_time / images_loaded 141 | 142 | avg_load_time = total_load_time / images_loaded 143 | 144 | # Store benchmarking results: 145 | output_dict = { 146 | 'run_time_state': { 147 | 'setup_time_s': setup_time, 148 | 'test_time_s': test_time, 149 | 'images_load_time_total_s': total_load_time, 150 | 'images_load_time_avg_s': avg_load_time, 151 | 'prediction_time_total_s': total_inference_time, 152 | 'prediction_time_avg_s': avg_inference_time, 153 | 154 | 'avg_time_ms': avg_inference_time * 1000, 155 | 'avg_fps': 1.0 / avg_inference_time, 156 | 'batch_time_ms': avg_inference_time * 1000 * BATCH_SIZE, 157 | 'batch_size': BATCH_SIZE, 158 | } 159 | } 160 | with open('tmp-ck-timer.json', 'w') as out_file: 161 | json.dump(output_dict, out_file, indent=4, sort_keys=True) 162 | 163 | 164 | if __name__ == '__main__': 165 | main() 166 | -------------------------------------------------------------------------------- /script/explore-accuracy/explore-accuracy.py: -------------------------------------------------------------------------------- 1 | import ck.kernel as ck 2 | import copy 3 | import re 4 | import json 5 | 6 | def do(i): 7 | # Detect basic platform info. 8 | ii={'action':'detect', 9 | 'module_uoa':'platform', 10 | 'out':'out'} 11 | r=ck.access(ii) 12 | if r['return']>0: return r 13 | 14 | # Host and target OS params. 15 | hos=r['host_os_uoa'] 16 | hosd=r['host_os_dict'] 17 | 18 | tos=r['os_uoa'] 19 | tosd=r['os_dict'] 20 | tdid=r['device_id'] 21 | 22 | # Fix cmd key here since it may be used to get extra run-time deps. 23 | cmd_key='imagenet-val' 24 | 25 | # Load TensorRT-test program meta and desc to check deps. 26 | ii={'action':'load', 27 | 'module_uoa':'program', 28 | 'data_uoa':'tensorrt-test'} 29 | rx=ck.access(ii) 30 | if rx['return']>0: return rx 31 | mm=rx['dict'] 32 | 33 | # Get compile-time and run-time deps. 34 | cdeps=mm.get('compile_deps',{}) 35 | rdeps=mm.get('run_deps',{}) 36 | 37 | # Merge rdeps with cdeps for setting up the pipeline (which uses 38 | # common deps), but tag them as "for_run_time". 39 | for k in rdeps: 40 | cdeps[k]=rdeps[k] 41 | cdeps[k]['for_run_time']='yes' 42 | 43 | # Limit the number of images (50,000 by default). 44 | max_num_images = 10 #50000 45 | 46 | # TensorRT engines. 47 | depl=copy.deepcopy(cdeps['lib-tensorrt']) 48 | 49 | ii={'action':'resolve', 50 | 'module_uoa':'env', 51 | 'host_os':hos, 52 | 'target_os':tos, 53 | 'device_id':tdid, 54 | 'deps':{'lib-tensorrt':copy.deepcopy(depl)} 55 | } 56 | r=ck.access(ii) 57 | if r['return']>0: return r 58 | 59 | #udepl = ['tensorrt-1.0.0'] 60 | udepl=r['deps']['lib-tensorrt'].get('choices',[]) # All UOAs of env for TensorRT engines. 61 | if len(udepl)==0: 62 | return {'return':1, 'error':'no registered TensorRT engines'} 63 | 64 | # Caffe models. 65 | depm=copy.deepcopy(rdeps['caffemodel']) 66 | 67 | ii={'action':'resolve', 68 | 'module_uoa':'env', 69 | 'host_os':hos, 70 | 'target_os':tos, 71 | 'device_id':tdid, 72 | 'deps':{'caffemodel':copy.deepcopy(depm)} 73 | } 74 | r=ck.access(ii) 75 | if r['return']>0: return r 76 | 77 | udepm=r['deps']['caffemodel'].get('choices',[]) # All UOAs of env for Caffe models. 78 | if len(udepm)==0: 79 | return {'return':1, 'error':'no registered Caffe models'} 80 | 81 | # Prepare pipeline. 82 | cdeps['lib-tensorrt']['uoa']=udepl[0] 83 | cdeps['caffemodel']['uoa']=udepm[0] 84 | 85 | ii={'action':'pipeline', 86 | 'prepare':'yes', 87 | 88 | 'repo_uoa':'ck-tensorrt', 89 | 'module_uoa':'program', 90 | 'data_uoa':'tensorrt-test', 91 | 'cmd_key':cmd_key, 92 | 93 | 'dependencies': cdeps, 94 | 95 | 'env':{ 96 | 'CK_TENSORRT_MAX_NUM_IMAGES': max_num_images 97 | }, 98 | 99 | 'no_compiler_description':'yes', 100 | 'compile_only_once':'yes', 101 | 102 | 'cpu_freq':'max', 103 | 'gpu_freq':'max', 104 | 105 | 'flags':'-O3', 106 | 107 | 'speed':'no', 108 | 'energy':'no', 109 | 110 | 'no_state_check':'yes', 111 | 'skip_calibration':'yes', 112 | 113 | 'skip_print_timers':'yes', 114 | 'out':'con', 115 | } 116 | 117 | r=ck.access(ii) 118 | if r['return']>0: return r 119 | 120 | fail=r.get('fail','') 121 | if fail=='yes': 122 | return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} 123 | 124 | ready=r.get('ready','') 125 | if ready!='yes': 126 | return {'return':11, 'error':'pipeline not ready'} 127 | 128 | state=r['state'] 129 | tmp_dir=state['tmp_dir'] 130 | 131 | # Remember resolved deps for this benchmarking session. 132 | xcdeps=r.get('dependencies',{}) 133 | 134 | # Clean pipeline. 135 | if 'ready' in r: del(r['ready']) 136 | if 'fail' in r: del(r['fail']) 137 | if 'return' in r: del(r['return']) 138 | 139 | pipeline=copy.deepcopy(r) 140 | 141 | # For each TensorRT engine. 142 | for lib_uoa in udepl: 143 | # Load TensorRT engine. 144 | ii={'action':'load', 145 | 'module_uoa':'env', 146 | 'data_uoa':lib_uoa} 147 | r=ck.access(ii) 148 | if r['return']>0: return r 149 | # Get the name e.g. 'TensorRT 1.0.0' 150 | lib_name='tensorrt-1.0.0' #r['data_name'] 151 | lib_tags=lib_name 152 | # Skip some libs with "in [..]" or "not in [..]". 153 | if lib_name in []: continue 154 | 155 | # For each Caffe model. 156 | for model_uoa in udepm: 157 | # Load Caffe model. 158 | ii={'action':'load', 159 | 'module_uoa':'env', 160 | 'data_uoa':model_uoa} 161 | r=ck.access(ii) 162 | if r['return']>0: return r 163 | # Get the tags from e.g. 'Caffe model (net and weights) (deepscale, squeezenet, 1.1)' 164 | model_name=r['data_name'] 165 | model_tags = re.match('Caffe model \(net and weights\) \((?P.*)\)', model_name) 166 | model_tags = model_tags.group('tags').replace(' ', '').replace(',', '-') 167 | # Skip some models with "in [..]" or "not in [..]". 168 | if model_tags not in ['nvidia-googlenet']: continue 169 | 170 | record_repo='local' 171 | record_uoa='imagenet-val-accuracy-'+model_tags+'-'+lib_tags 172 | 173 | # Prepare pipeline. 174 | ck.out('---------------------------------------------------------------------------------------') 175 | ck.out('%s - %s' % (lib_name, lib_uoa)) 176 | ck.out('%s - %s' % (model_name, model_uoa)) 177 | ck.out('Experiment - %s:%s' % (record_repo, record_uoa)) 178 | 179 | # Prepare autotuning input. 180 | cpipeline=copy.deepcopy(pipeline) 181 | 182 | # Reset deps and change UOA. 183 | new_deps={#'lib-caffe':copy.deepcopy(depl), 184 | 'caffemodel':copy.deepcopy(depm)} 185 | 186 | #new_deps['lib-caffe']['uoa']=lib_uoa 187 | new_deps['caffemodel']['uoa']=model_uoa 188 | 189 | jj={'action':'resolve', 190 | 'module_uoa':'env', 191 | 'host_os':hos, 192 | 'target_os':tos, 193 | 'device_id':tdid, 194 | 'deps':new_deps} 195 | r=ck.access(jj) 196 | if r['return']>0: return r 197 | 198 | cpipeline['dependencies'].update(new_deps) 199 | 200 | ii={'action':'autotune', 201 | 202 | 'module_uoa':'pipeline', 203 | 'data_uoa':'program', 204 | 205 | 'choices_order':[ 206 | [ 207 | '##choices#env#CK_TENSORRT_ENABLE_FP16' 208 | ] 209 | ], 210 | 'choices_selection':[ 211 | {'type':'loop', 'start':0, 'stop':1, 'step':1, 'default':1} 212 | ], 213 | 214 | 'features_keys_to_process':['##choices#env#CK_TENSORRT_ENABLE_FP16', '##choices#env#CK_TENSORRT_MAX_NUM_IMAGES'], 215 | 216 | 'process_multi_keys':['##characteristics#compile#*'], 217 | 218 | 'iterations':-1, 219 | 'repetitions':1, 220 | 221 | 'record':'yes', 222 | 'record_failed':'yes', 223 | 'record_params':{ 224 | 'search_point_by_features':'yes' 225 | }, 226 | 'record_repo':record_repo, 227 | 'record_uoa':record_uoa, 228 | 229 | 'tags':['accuracy', 'imagenet-val', model_tags, lib_tags], 230 | 231 | 'pipeline':cpipeline, 232 | 'out':'con'} 233 | 234 | r=ck.access(ii) 235 | if r['return']>0: return r 236 | 237 | fail=r.get('fail','') 238 | if fail=='yes': 239 | return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} 240 | 241 | return {'return':0} 242 | 243 | r=do({}) 244 | if r['return']>0: ck.err(r) 245 | -------------------------------------------------------------------------------- /program/tensorrt-test/tensorrt-test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Loosely based on http://github.com/dusty-nv/jetson-inference 3 | * 4 | * Therefore, assuming: 5 | * 2016 (c) NVIDIA 6 | * 2017 (c) dividiti 7 | */ 8 | 9 | #include "imageNet.h" 10 | 11 | #include "loadImage.h" 12 | #include "cudaFont.h" 13 | 14 | #include 15 | 16 | #include 17 | 18 | #define DEFAULT_BATCH_SIZE 1 19 | 20 | int classifyImageRGBA(imageNet* net, const char* imgPath) 21 | { 22 | int exit_status = EXIT_SUCCESS; 23 | 24 | // Load image from disk. 25 | float* imgCPU = NULL; 26 | float* imgCUDA = NULL; 27 | int imgWidth = 0; 28 | int imgHeight = 0; 29 | 30 | if( !loadImageRGBA(imgPath, (float4**)&imgCPU, (float4**)&imgCUDA, &imgWidth, &imgHeight) ) 31 | { 32 | printf("[tensorrt-test] Failed to load image '%s'\n", imgPath); 33 | exit_status = EXIT_FAILURE; 34 | } 35 | else 36 | { 37 | // Classify image. 38 | float confidence = 0.0f; 39 | const int imgClass = net->Classify(imgCUDA, imgWidth, imgHeight, &confidence); 40 | 41 | if( imgClass < 0 ) 42 | { 43 | printf("[tensorrt-test] Failed to classify '%s' (result=%i)\n", imgPath, imgClass); 44 | exit_status = EXIT_FAILURE; 45 | } 46 | else 47 | { 48 | printf("[tensorrt-test] '%s' -> %2.5f%% class #%i (%s)\n", imgPath, confidence * 100.0f, imgClass, net->GetClassDesc(imgClass)); 49 | } 50 | CUDA(cudaFreeHost(imgCPU)); 51 | } 52 | 53 | return exit_status; 54 | } 55 | 56 | 57 | // Main entry point. 58 | int main( int argc, char** argv ) 59 | { 60 | int exit_status = EXIT_SUCCESS; 61 | 62 | // Print environment variables set by CK. 63 | printf("\n[tensorrt-test] CK settings detected:\n"); 64 | 65 | const char * caffe_model_var = "CK_CAFFE_MODEL"; 66 | const char * caffe_model_val = getenv(caffe_model_var); 67 | printf(" %s=\"%s\"\n", caffe_model_var, 68 | caffe_model_val ? caffe_model_val : "?"); 69 | 70 | const char * caffe_weights_var = "CK_ENV_MODEL_CAFFE_WEIGHTS"; 71 | const char * caffe_weights_val = getenv(caffe_weights_var); 72 | printf(" %s=\"%s\"\n", caffe_weights_var, 73 | caffe_weights_val ? caffe_weights_val : "?"); 74 | 75 | const char * imagenet_val_dir_var = "CK_ENV_DATASET_IMAGENET_VAL"; 76 | const char * imagenet_val_dir_val = getenv(imagenet_val_dir_var); 77 | printf(" %s=\"%s\"\n", imagenet_val_dir_var, 78 | imagenet_val_dir_val ? imagenet_val_dir_val : "?"); 79 | 80 | const char * imagenet_mean_bin_var = "CK_CAFFE_IMAGENET_MEAN_BIN"; 81 | const char * imagenet_mean_bin_val = getenv(imagenet_mean_bin_var); 82 | printf(" %s=\"%s\"\n", imagenet_mean_bin_var, 83 | imagenet_mean_bin_val ? imagenet_mean_bin_val : "?"); 84 | 85 | const char * imagenet_synset_words_txt_var = "CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT"; 86 | const char * imagenet_synset_words_txt_val = getenv(imagenet_synset_words_txt_var); 87 | printf(" %s=\"%s\"\n", imagenet_synset_words_txt_var, 88 | imagenet_synset_words_txt_val ? imagenet_synset_words_txt_val : "?"); 89 | 90 | const char * imagenet_val_txt_var = "CK_CAFFE_IMAGENET_VAL_TXT"; 91 | const char * imagenet_val_txt_val = getenv(imagenet_val_txt_var); 92 | printf(" %s=\"%s\"\n", imagenet_val_txt_var, 93 | imagenet_val_txt_val ? imagenet_val_txt_val : "?"); 94 | 95 | const char * tensorrt_max_num_images_var = "CK_TENSORRT_MAX_NUM_IMAGES"; 96 | const char * tensorrt_max_num_images_val = getenv(tensorrt_max_num_images_var); 97 | printf(" %s=\"%s\"\n", tensorrt_max_num_images_var, 98 | tensorrt_max_num_images_val ? tensorrt_max_num_images_val : "?"); 99 | 100 | const char * tensorrt_enable_fp16_var = "CK_TENSORRT_ENABLE_FP16"; 101 | const char * tensorrt_enable_fp16_val = getenv(tensorrt_enable_fp16_var); 102 | printf(" %s=\"%s\"\n", tensorrt_enable_fp16_var, 103 | tensorrt_enable_fp16_val ? tensorrt_enable_fp16_val : "?"); 104 | 105 | // Print configuration variables inferred. 106 | printf("\n[tensorrt-test] TensorRT settings inferred:\n"); 107 | const size_t tensorrt_max_num_images = tensorrt_max_num_images_val ? atoi(tensorrt_max_num_images_val) : 50000; 108 | printf(" TENSORRT_MAX_NUM_IMAGES=%ld\n", tensorrt_max_num_images); 109 | 110 | const bool tensorrt_enable_fp16 = tensorrt_enable_fp16_val ? (bool)atoi(tensorrt_enable_fp16_val) : true; 111 | printf(" TENSORRT_ENABLE_FP16=%d\n", tensorrt_enable_fp16); 112 | 113 | // for classification default batch size is 1 114 | const uint32_t maxBatchSize = DEFAULT_BATCH_SIZE; 115 | 116 | // Print command line arguments. 117 | printf("\n[tensorrt-test] Command line arguments (%i):", argc); 118 | for( int i = 0; i < argc; ++i ) 119 | printf("\n [%i] %s", i, argv[i]); 120 | printf("\n"); 121 | 122 | // Clean possibly cached TensorRT model. 123 | printf("\n[tensorrt-test] Cleaning TensorRT model cache..."); 124 | { 125 | const char* cache_ext = "tensorcache"; 126 | char* cache_path = (char*) malloc(strlen(caffe_weights_val) + strlen(cache_ext) + 2); 127 | sprintf(cache_path, "%s.%s", caffe_weights_val, cache_ext); 128 | printf("\n[tensorrt-test] - file \'%s\' removed ", cache_path); 129 | int status = remove(cache_path); 130 | if (0 == status) 131 | { 132 | printf("successfully!\n"); 133 | } 134 | else 135 | { 136 | printf("unsuccessfully!\n"); 137 | } 138 | free(cache_path); 139 | } 140 | 141 | printf("\n[tensorrt-test] Start imageNet::Create..."); 142 | // Create an imageNet object. 143 | imageNet* net = imageNet::Create( 144 | caffe_model_val, 145 | caffe_weights_val, 146 | imagenet_mean_bin_val, 147 | imagenet_synset_words_txt_val, 148 | "data", "prob", 149 | maxBatchSize 150 | ); 151 | 152 | #if( 1 == CK_TENSORRT_ENABLE_PROFILER ) 153 | net->EnableProfiler(); 154 | #endif 155 | 156 | if( !net ) 157 | { 158 | printf("\n[tensorrt-test] Failed to create ImageNet classifier\n"); 159 | return EXIT_FAILURE; 160 | } 161 | 162 | // Classify a single image or all images in $CK_ENV_DATASET_IMAGENET_VAL. 163 | if( argc == 2 ) 164 | { 165 | const char* imgPath = argv[1]; 166 | exit_status = classifyImageRGBA(net, imgPath); 167 | } 168 | else if( argc == 1 ) 169 | { 170 | DIR* dir; 171 | struct dirent* ent; 172 | if( (dir = opendir(imagenet_val_dir_val)) ) 173 | { 174 | const char* sample_imagenet_val_file = "ILSVRC2012_val_00002212.JPEG"; // 00002212 with AlexNet: top1="no", top5="yes" 175 | char* imagenet_val_path = (char*) malloc(strlen(imagenet_val_dir_val) + strlen(sample_imagenet_val_file) + 2); 176 | size_t num_images = 0; 177 | 178 | printf("\n[tensorrt-test] Scanning directory: %s\n", imagenet_val_path); 179 | while( (ent = readdir(dir)) && (num_images < tensorrt_max_num_images) ) 180 | { 181 | const char* imagenet_val_file = ent->d_name; 182 | if( strlen(imagenet_val_file) < strlen(sample_imagenet_val_file) ) 183 | { 184 | // Skip '.' and '..'. 185 | continue; 186 | } 187 | printf("\n[tensorrt-test] Classifying image #%ld out of %ld\n", num_images+1, tensorrt_max_num_images); 188 | sprintf(imagenet_val_path, "%s/%s", imagenet_val_dir_val, imagenet_val_file); 189 | exit_status = classifyImageRGBA(net, imagenet_val_path); 190 | if (exit_status == EXIT_FAILURE) 191 | { 192 | return exit_status; 193 | } 194 | num_images++; 195 | } 196 | closedir(dir); 197 | free(imagenet_val_path); 198 | } 199 | else 200 | { 201 | printf("\n[tensorrt-test] Failed to open directory \'%s\'\n", imagenet_val_dir_var); 202 | exit_status = EXIT_FAILURE; 203 | } 204 | } 205 | else 206 | { 207 | printf("\n[tensorrt-test] Usage: %s [path]", argv[0]); 208 | printf(" (by default, all files in \'%s\' dir)\n", imagenet_val_dir_val); 209 | exit_status = EXIT_FAILURE; 210 | } 211 | 212 | printf("\n[tensorrt-test] Shutting down...\n"); 213 | delete net; 214 | 215 | return exit_status; 216 | } 217 | -------------------------------------------------------------------------------- /script/explore-batch-size-libs-models/benchmark.nvidia-tx1.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | import ck.kernel as ck 3 | import copy 4 | import re 5 | import json 6 | 7 | platform_tags='nvidia-tx1' 8 | 9 | # Floating-point precision iteration parameters. 10 | fp={ 11 | 'start':0, 12 | 'stop':1, 13 | 'step':1, 14 | 'default':1 15 | } 16 | # Batch size iteration parameters. 17 | bs={ 18 | 'start':1, 19 | 'stop':8, 20 | 'step':1, 21 | 'default':1 22 | } 23 | # Number of statistical repetitions. 24 | num_repetitions=3 25 | 26 | def do(i): 27 | # Detect basic platform info. 28 | ii={'action':'detect', 29 | 'module_uoa':'platform', 30 | 'out':'out'} 31 | r=ck.access(ii) 32 | if r['return']>0: return r 33 | 34 | # Host and target OS params. 35 | hos=r['host_os_uoa'] 36 | hosd=r['host_os_dict'] 37 | 38 | tos=r['os_uoa'] 39 | tosd=r['os_dict'] 40 | tdid=r['device_id'] 41 | 42 | # Fix cmd key here since it may be used to get extra run-time deps. 43 | cmd_key='default' 44 | 45 | # Load TensorRT-time program meta and desc to check deps. 46 | ii={'action':'load', 47 | 'module_uoa':'program', 48 | 'data_uoa':'tensorrt-time'} 49 | rx=ck.access(ii) 50 | if rx['return']>0: return rx 51 | mm=rx['dict'] 52 | 53 | # Get compile-time and run-time deps. 54 | cdeps=mm.get('compile_deps',{}) 55 | rdeps=mm.get('run_deps',{}) 56 | 57 | # Merge rdeps with cdeps for setting up the pipeline (which uses 58 | # common deps), but tag them as "for_run_time". 59 | for k in rdeps: 60 | cdeps[k]=rdeps[k] 61 | cdeps[k]['for_run_time']='yes' 62 | 63 | # TensorRT engines. 64 | depl=copy.deepcopy(cdeps['lib-tensorrt']) 65 | 66 | ii={'action':'resolve', 67 | 'module_uoa':'env', 68 | 'host_os':hos, 69 | 'target_os':tos, 70 | 'device_id':tdid, 71 | 'deps':{'lib-tensorrt':copy.deepcopy(depl)} 72 | } 73 | r=ck.access(ii) 74 | if r['return']>0: return r 75 | 76 | udepl=r['deps']['lib-tensorrt'].get('choices',[]) # All UOAs of env for TensorRT engines. 77 | if len(udepl)==0: 78 | return {'return':1, 'error':'no registered TensorRT engines'} 79 | 80 | # Caffe models. 81 | depm=copy.deepcopy(rdeps['caffemodel']) 82 | 83 | ii={'action':'resolve', 84 | 'module_uoa':'env', 85 | 'host_os':hos, 86 | 'target_os':tos, 87 | 'device_id':tdid, 88 | 'deps':{'caffemodel':copy.deepcopy(depm)} 89 | } 90 | r=ck.access(ii) 91 | if r['return']>0: return r 92 | 93 | udepm=r['deps']['caffemodel'].get('choices',[]) # All UOAs of env for Caffe models. 94 | if len(udepm)==0: 95 | return {'return':1, 'error':'no registered Caffe models'} 96 | 97 | # Prepare pipeline. 98 | cdeps['lib-tensorrt']['uoa']=udepl[0] 99 | cdeps['caffemodel']['uoa']=udepm[0] 100 | 101 | ii={'action':'pipeline', 102 | 'prepare':'yes', 103 | 104 | 'repo_uoa':'ck-tensorrt', 105 | 'module_uoa':'program', 106 | 'data_uoa':'tensorrt-time', 107 | 'cmd_key':cmd_key, 108 | 109 | 'dependencies': cdeps, 110 | 111 | 'no_compiler_description':'yes', 112 | 'compile_only_once':'yes', 113 | 114 | 'cpu_freq':'max', 115 | 'gpu_freq':'max', 116 | 117 | 'flags':'-O3', 118 | 119 | 'speed':'no', 120 | 'energy':'no', 121 | 122 | 'no_state_check':'yes', 123 | 'skip_calibration':'yes', 124 | 125 | 'skip_print_timers':'yes', 126 | 'out':'con', 127 | } 128 | 129 | r=ck.access(ii) 130 | if r['return']>0: return r 131 | 132 | fail=r.get('fail','') 133 | if fail=='yes': 134 | return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} 135 | 136 | ready=r.get('ready','') 137 | if ready!='yes': 138 | return {'return':11, 'error':'pipeline not ready'} 139 | 140 | state=r['state'] 141 | tmp_dir=state['tmp_dir'] 142 | 143 | # Remember resolved deps for this benchmarking session. 144 | xcdeps=r.get('dependencies',{}) 145 | 146 | # Clean pipeline. 147 | if 'ready' in r: del(r['ready']) 148 | if 'fail' in r: del(r['fail']) 149 | if 'return' in r: del(r['return']) 150 | 151 | pipeline=copy.deepcopy(r) 152 | 153 | # For each TensorRT engine. 154 | for lib_uoa in udepl: 155 | # Load TensorRT engine. 156 | ii={'action':'load', 157 | 'module_uoa':'env', 158 | 'data_uoa':lib_uoa} 159 | r=ck.access(ii) 160 | if r['return']>0: return r 161 | # Get the lib name e.g. 'tensorrt-3.0.4'. 162 | lib_version=r['dict']['customize']['version'] 163 | lib_name='tensorrt-%s'%lib_version 164 | lib_tags=lib_name 165 | # Skip some libs with "in [..]" or "not in [..]". 166 | if lib_name in []: continue 167 | 168 | # For each Caffe model. 169 | for model_uoa in udepm: 170 | # Load Caffe model. 171 | ii={'action':'load', 172 | 'module_uoa':'env', 173 | 'data_uoa':model_uoa} 174 | r=ck.access(ii) 175 | if r['return']>0: return r 176 | # Get the tags from e.g. 'Caffe model (net and weights) (deepscale, squeezenet, 1.1)' 177 | model_name=r['data_name'] 178 | model_tags = re.match('Caffe model \(net and weights\) \((?P.*)\)', model_name) 179 | if model_tags: 180 | model_tags = model_tags.group('tags').replace(' ', '').replace(',', '-') 181 | else: 182 | model_tags='' 183 | for tag in r['dict']['tags']: 184 | if model_tags!='': model_tags+='-' 185 | model_tags+=tag 186 | 187 | # Skip some models with "in [..]" or "not in [..]". 188 | if model_tags not in ['bvlc-alexnet','bvlc-googlenet','deepscale-squeezenet-1.1']: continue 189 | 190 | record_repo='local' 191 | record_uoa=model_tags+'-'+lib_tags 192 | 193 | # Prepare pipeline. 194 | ck.out('---------------------------------------------------------------------------------------') 195 | ck.out('%s - %s' % (lib_name, lib_uoa)) 196 | ck.out('%s - %s' % (model_name, model_uoa)) 197 | ck.out('Experiment - %s:%s' % (record_repo, record_uoa)) 198 | 199 | # Prepare autotuning input. 200 | cpipeline=copy.deepcopy(pipeline) 201 | 202 | # Reset deps and change UOA. 203 | new_deps={'lib-tensorrt':copy.deepcopy(depl), 204 | 'caffemodel':copy.deepcopy(depm)} 205 | 206 | new_deps['lib-tensorrt']['uoa']=lib_uoa 207 | new_deps['caffemodel']['uoa']=model_uoa 208 | 209 | jj={'action':'resolve', 210 | 'module_uoa':'env', 211 | 'host_os':hos, 212 | 'target_os':tos, 213 | 'device_id':tdid, 214 | 'deps':new_deps} 215 | r=ck.access(jj) 216 | if r['return']>0: return r 217 | 218 | cpipeline['dependencies'].update(new_deps) 219 | pipeline_name = '%s.json' % record_uoa 220 | 221 | ii={'action':'autotune', 222 | 223 | 'module_uoa':'pipeline', 224 | 'data_uoa':'program', 225 | 226 | 'choices_order':[ 227 | [ 228 | '##choices#env#CK_TENSORRT_ENABLE_FP16' 229 | ], 230 | [ 231 | '##choices#env#CK_CAFFE_BATCH_SIZE' 232 | ] 233 | ], 234 | 'choices_selection':[ 235 | {'type':'loop', 'start':fp['start'], 'stop':fp['stop'], 'step':fp['step'], 'default':fp['default']}, 236 | {'type':'loop', 'start':bs['start'], 'stop':bs['stop'], 'step':bs['step'], 'default':bs['default']} 237 | ], 238 | 239 | 'features_keys_to_process':[ 240 | '##choices#env#CK_TENSORRT_ENABLE_FP16', 241 | '##choices#env#CK_CAFFE_BATCH_SIZE' 242 | ], 243 | 244 | 'iterations':-1, 245 | 'repetitions':num_repetitions, 246 | 247 | 'record':'yes', 248 | 'record_failed':'yes', 249 | 'record_params':{ 250 | 'search_point_by_features':'yes' 251 | }, 252 | 'record_repo':record_repo, 253 | 'record_uoa':record_uoa, 254 | 255 | 'tags':['explore-batch-size-libs-models', platform_tags, model_tags, lib_tags], 256 | 257 | 'pipeline':cpipeline, 258 | 'out':'con'} 259 | 260 | r=ck.access(ii) 261 | if r['return']>0: return r 262 | 263 | fail=r.get('fail','') 264 | if fail=='yes': 265 | return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} 266 | 267 | return {'return':0} 268 | 269 | r=do({}) 270 | if r['return']>0: ck.err(r) 271 | -------------------------------------------------------------------------------- /script/explore-batch-size-libs-models/benchmark.nvidia-gtx1080.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | import ck.kernel as ck 3 | import copy 4 | import re 5 | import json 6 | 7 | platform_tags='nvidia-gtx1080' 8 | 9 | # Floating-point precision iteration parameters. 10 | fp={ 11 | 'start':0, 12 | 'stop':0, 13 | 'step':1, 14 | 'default':0 15 | } 16 | # Batch size iteration parameters. 17 | bs={ 18 | 'start':1, 19 | 'stop':16, 20 | 'step':1, 21 | 'default':1 22 | } 23 | # Number of statistical repetitions. 24 | num_repetitions=3 25 | 26 | def do(i): 27 | # Detect basic platform info. 28 | ii={'action':'detect', 29 | 'module_uoa':'platform', 30 | 'out':'out'} 31 | r=ck.access(ii) 32 | if r['return']>0: return r 33 | 34 | # Host and target OS params. 35 | hos=r['host_os_uoa'] 36 | hosd=r['host_os_dict'] 37 | 38 | tos=r['os_uoa'] 39 | tosd=r['os_dict'] 40 | tdid=r['device_id'] 41 | 42 | # Fix cmd key here since it may be used to get extra run-time deps. 43 | cmd_key='default' 44 | 45 | # Load TensorRT-time program meta and desc to check deps. 46 | ii={'action':'load', 47 | 'module_uoa':'program', 48 | 'data_uoa':'tensorrt-time'} 49 | rx=ck.access(ii) 50 | if rx['return']>0: return rx 51 | mm=rx['dict'] 52 | 53 | # Get compile-time and run-time deps. 54 | cdeps=mm.get('compile_deps',{}) 55 | rdeps=mm.get('run_deps',{}) 56 | 57 | # Merge rdeps with cdeps for setting up the pipeline (which uses 58 | # common deps), but tag them as "for_run_time". 59 | for k in rdeps: 60 | cdeps[k]=rdeps[k] 61 | cdeps[k]['for_run_time']='yes' 62 | 63 | # TensorRT engines. 64 | depl=copy.deepcopy(cdeps['lib-tensorrt']) 65 | 66 | ii={'action':'resolve', 67 | 'module_uoa':'env', 68 | 'host_os':hos, 69 | 'target_os':tos, 70 | 'device_id':tdid, 71 | 'deps':{'lib-tensorrt':copy.deepcopy(depl)} 72 | } 73 | r=ck.access(ii) 74 | if r['return']>0: return r 75 | 76 | udepl=r['deps']['lib-tensorrt'].get('choices',[]) # All UOAs of env for TensorRT engines. 77 | if len(udepl)==0: 78 | return {'return':1, 'error':'no registered TensorRT engines'} 79 | 80 | # Caffe models. 81 | depm=copy.deepcopy(rdeps['caffemodel']) 82 | 83 | ii={'action':'resolve', 84 | 'module_uoa':'env', 85 | 'host_os':hos, 86 | 'target_os':tos, 87 | 'device_id':tdid, 88 | 'deps':{'caffemodel':copy.deepcopy(depm)} 89 | } 90 | r=ck.access(ii) 91 | if r['return']>0: return r 92 | 93 | udepm=r['deps']['caffemodel'].get('choices',[]) # All UOAs of env for Caffe models. 94 | if len(udepm)==0: 95 | return {'return':1, 'error':'no registered Caffe models'} 96 | 97 | # Prepare pipeline. 98 | cdeps['lib-tensorrt']['uoa']=udepl[0] 99 | cdeps['caffemodel']['uoa']=udepm[0] 100 | 101 | ii={'action':'pipeline', 102 | 'prepare':'yes', 103 | 104 | 'repo_uoa':'ck-tensorrt', 105 | 'module_uoa':'program', 106 | 'data_uoa':'tensorrt-time', 107 | 'cmd_key':cmd_key, 108 | 109 | 'dependencies': cdeps, 110 | 111 | 'no_compiler_description':'yes', 112 | 'compile_only_once':'yes', 113 | 114 | 'cpu_freq':'max', 115 | 'gpu_freq':'max', 116 | 117 | 'flags':'-O3', 118 | 119 | 'speed':'no', 120 | 'energy':'no', 121 | 122 | 'no_state_check':'yes', 123 | 'skip_calibration':'yes', 124 | 125 | 'skip_print_timers':'yes', 126 | 'out':'con', 127 | } 128 | 129 | r=ck.access(ii) 130 | if r['return']>0: return r 131 | 132 | fail=r.get('fail','') 133 | if fail=='yes': 134 | return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} 135 | 136 | ready=r.get('ready','') 137 | if ready!='yes': 138 | return {'return':11, 'error':'pipeline not ready'} 139 | 140 | state=r['state'] 141 | tmp_dir=state['tmp_dir'] 142 | 143 | # Remember resolved deps for this benchmarking session. 144 | xcdeps=r.get('dependencies',{}) 145 | 146 | # Clean pipeline. 147 | if 'ready' in r: del(r['ready']) 148 | if 'fail' in r: del(r['fail']) 149 | if 'return' in r: del(r['return']) 150 | 151 | pipeline=copy.deepcopy(r) 152 | 153 | # For each TensorRT engine. 154 | for lib_uoa in udepl: 155 | # Load TensorRT engine. 156 | ii={'action':'load', 157 | 'module_uoa':'env', 158 | 'data_uoa':lib_uoa} 159 | r=ck.access(ii) 160 | if r['return']>0: return r 161 | # Get the lib name e.g. 'tensorrt-3.0.4'. 162 | lib_version=r['dict']['customize']['version'] 163 | lib_name='tensorrt-%s'%lib_version 164 | lib_tags=lib_name 165 | # Skip some libs with "in [..]" or "not in [..]". 166 | if lib_name in []: continue 167 | 168 | # For each Caffe model. 169 | for model_uoa in udepm: 170 | # Load Caffe model. 171 | ii={'action':'load', 172 | 'module_uoa':'env', 173 | 'data_uoa':model_uoa} 174 | r=ck.access(ii) 175 | if r['return']>0: return r 176 | # Get the tags from e.g. 'Caffe model (net and weights) (deepscale, squeezenet, 1.1)' 177 | model_name=r['data_name'] 178 | model_tags = re.match('Caffe model \(net and weights\) \((?P.*)\)', model_name) 179 | if model_tags: 180 | model_tags = model_tags.group('tags').replace(' ', '').replace(',', '-') 181 | else: 182 | model_tags='' 183 | for tag in r['dict']['tags']: 184 | if model_tags!='': model_tags+='-' 185 | model_tags+=tag 186 | 187 | # Skip some models with "in [..]" or "not in [..]". 188 | if model_tags not in ['bvlc-alexnet','bvlc-googlenet','deepscale-squeezenet-1.1']: continue 189 | 190 | record_repo='local' 191 | record_uoa=model_tags+'-'+lib_tags 192 | 193 | # Prepare pipeline. 194 | ck.out('---------------------------------------------------------------------------------------') 195 | ck.out('%s - %s' % (lib_name, lib_uoa)) 196 | ck.out('%s - %s' % (model_name, model_uoa)) 197 | ck.out('Experiment - %s:%s' % (record_repo, record_uoa)) 198 | 199 | # Prepare autotuning input. 200 | cpipeline=copy.deepcopy(pipeline) 201 | 202 | # Reset deps and change UOA. 203 | new_deps={'lib-tensorrt':copy.deepcopy(depl), 204 | 'caffemodel':copy.deepcopy(depm)} 205 | 206 | new_deps['lib-tensorrt']['uoa']=lib_uoa 207 | new_deps['caffemodel']['uoa']=model_uoa 208 | 209 | jj={'action':'resolve', 210 | 'module_uoa':'env', 211 | 'host_os':hos, 212 | 'target_os':tos, 213 | 'device_id':tdid, 214 | 'deps':new_deps} 215 | r=ck.access(jj) 216 | if r['return']>0: return r 217 | 218 | cpipeline['dependencies'].update(new_deps) 219 | pipeline_name = '%s.json' % record_uoa 220 | 221 | ii={'action':'autotune', 222 | 223 | 'module_uoa':'pipeline', 224 | 'data_uoa':'program', 225 | 226 | 'choices_order':[ 227 | [ 228 | '##choices#env#CK_TENSORRT_ENABLE_FP16' 229 | ], 230 | [ 231 | '##choices#env#CK_CAFFE_BATCH_SIZE' 232 | ] 233 | ], 234 | 'choices_selection':[ 235 | {'type':'loop', 'start':fp['start'], 'stop':fp['stop'], 'step':fp['step'], 'default':fp['default']}, 236 | {'type':'loop', 'start':bs['start'], 'stop':bs['stop'], 'step':bs['step'], 'default':bs['default']} 237 | ], 238 | 239 | 'features_keys_to_process':[ 240 | '##choices#env#CK_TENSORRT_ENABLE_FP16', 241 | '##choices#env#CK_CAFFE_BATCH_SIZE' 242 | ], 243 | 244 | 'iterations':-1, 245 | 'repetitions':num_repetitions, 246 | 247 | 'record':'yes', 248 | 'record_failed':'yes', 249 | 'record_params':{ 250 | 'search_point_by_features':'yes' 251 | }, 252 | 'record_repo':record_repo, 253 | 'record_uoa':record_uoa, 254 | 255 | 'tags':['explore-batch-size-libs-models', platform_tags, model_tags, lib_tags], 256 | 257 | 'pipeline':cpipeline, 258 | 'out':'con'} 259 | 260 | r=ck.access(ii) 261 | if r['return']>0: return r 262 | 263 | fail=r.get('fail','') 264 | if fail=='yes': 265 | return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} 266 | 267 | return {'return':0} 268 | 269 | r=do({}) 270 | if r['return']>0: ck.err(r) 271 | --------------------------------------------------------------------------------