├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── FindCUDNN.cmake ├── getCudaArch.cu └── tkDNNConfig.cmake ├── demo ├── config.yaml ├── config_tetra.yaml ├── demo │ ├── demo.cpp │ ├── demo3D.cpp │ ├── demoDepth.cpp │ ├── demoTracker.cpp │ ├── map.cpp │ └── seg_demo.cpp ├── demoConfig.yaml └── yolo_test.mp4 ├── docker ├── Dockerfile ├── Dockerfile.base ├── README.md ├── assets │ ├── entrypoint_setup.sh │ └── terminator_config └── docker_launch.sh ├── docs ├── README_2d3dtracking.md ├── README_depth.md ├── README_seg.md ├── demo.md ├── exporting_weights.md ├── mAP_demo.md └── windows.md ├── include └── tkDNN │ ├── BoundingBox.h │ ├── CenterTrack.h │ ├── CenternetDetection.h │ ├── CenternetDetection3D.h │ ├── DarknetParser.h │ ├── DepthNN.h │ ├── DetectionNN.h │ ├── DetectionNN3D.h │ ├── ImuOdom.h │ ├── Int8BatchStream.h │ ├── Int8Calibrator.h │ ├── Layer.h │ ├── MobilenetDetection.h │ ├── Network.h │ ├── NetworkRT.h │ ├── NetworkViz.h │ ├── SegmentationNN.h │ ├── TrackingNN.h │ ├── Yolo3Detection.h │ ├── demo_utils.h │ ├── evaluation.h │ ├── kernels.h │ ├── kernelsThrust.h │ ├── pluginsRT │ ├── ActivationLeakyRT.h │ ├── ActivationLogisticRT.h │ ├── ActivationMishRT.h │ ├── ActivationReLUCeilingRT.h │ ├── ActivationSigmoidRT.h │ ├── ConstantPaddingRT.h │ ├── DeformableConvRT.h │ ├── FlattenConcatRT.h │ ├── Int8Calibrator.h │ ├── MaxPoolingFixedSizeRT.h │ ├── ReflectionPadding.h │ ├── RegionRT.h │ ├── ReorgRT.h │ ├── ReshapeRT.h │ ├── ResizeLayerRT.h │ ├── RouteRT.h │ ├── ShortcutRT.h │ ├── UpsampleRT.h │ └── YoloRT.h │ ├── test.h │ ├── tkdnn.h │ └── utils.h ├── scripts ├── checkExecTimes.py ├── download_validation.py ├── download_validation.sh ├── install_OpenCV4.sh ├── test_all_tests.sh └── test_inference.sh ├── src ├── Activation.cpp ├── BoundingBox.cpp ├── CenterTrack.cpp ├── CenternetDetection.cpp ├── CenternetDetection3D.cpp ├── Conv2d.cpp ├── DarknetParser.cpp ├── DeformConv2d.cpp ├── Dense.cpp ├── Flatten.cpp ├── Int8BatchStream.cpp ├── Int8Calibrator.cpp ├── LSTM.cpp ├── Layer.cpp ├── LayerWgs.cpp ├── MobilenetDetection.cpp ├── MulAdd.cpp ├── Network.cpp ├── NetworkRT.cpp ├── NetworkViz.cpp ├── Padding.cpp ├── Pooling.cpp ├── Region.cpp ├── Reorg.cpp ├── Reshape.cpp ├── Resize.cpp ├── Route.cpp ├── Shortcut.cpp ├── Softmax.cpp ├── Upsample.cpp ├── Yolo.cpp ├── Yolo3Detection.cpp ├── demo_utils.cpp ├── evaluation.cpp ├── kernels │ ├── activation_elu.cu │ ├── activation_leaky.cu │ ├── activation_logistic.cu │ ├── activation_mish.cu │ ├── activation_relu_ceiling.cu │ ├── activation_sigmoid.cu │ ├── convert.cu │ ├── deformable_conv.cu │ ├── fill.cu │ ├── normalize.cu │ ├── padding.cu │ ├── pooling.cu │ ├── postprocessing.cu │ ├── reorg.cu │ ├── resize.cu │ ├── scaladd.cu │ ├── shortcut.cu │ ├── softmax.cu │ └── upsample.cu ├── pluginsRT │ ├── ActivationLeakyRT.cpp │ ├── ActivationLogisticRT.cpp │ ├── ActivationMishRT.cpp │ ├── ActivationReLUCeilingRT.cpp │ ├── ConstantPaddingRT.cpp │ ├── DeformableConvRT.cpp │ ├── FlattenConcatRT.cpp │ ├── MaxPoolingSizeRT.cpp │ ├── ReflectionPadding.cpp │ ├── RegionRT.cpp │ ├── ReorgRT.cpp │ ├── ReshapeRT.cpp │ ├── ResizeLayerRT.cpp │ ├── RouteRT.cpp │ ├── ShortcutRT.cpp │ ├── UpsampleRT.cpp │ └── YoloRT.cpp └── utils.cpp └── tests ├── backbones ├── dla34 │ ├── dla34.cpp │ ├── dla34_weightsexporter.py │ └── env_dla34.yml └── resnet101 │ ├── env_resnet101.yml │ ├── resnet101.cpp │ └── resnet101_weightsexporter.py ├── centernet ├── dla34_cnet │ └── dla34_cnet.cpp ├── dla34_cnet3d │ └── dla34_cnet3d.cpp └── resnet101_cnet │ └── resnet101_cnet.cpp ├── centertrack └── dla34_ctrack │ └── dla34_ctrack.cpp ├── darknet ├── cfg │ ├── csresnext50-panet-spp.cfg │ ├── csresnext50-panet-spp_berkeley.cfg │ ├── yolo2.cfg │ ├── yolo2_voc.cfg │ ├── yolo2tiny.cfg │ ├── yolo3.cfg │ ├── yolo3_512.cfg │ ├── yolo3_berkeley.cfg │ ├── yolo3_coco4.cfg │ ├── yolo3_flir.cfg │ ├── yolo3tiny.cfg │ ├── yolo3tiny_512.cfg │ ├── yolo4-csp.cfg │ ├── yolo4-csp_crowd.cfg │ ├── yolo4.cfg │ ├── yolo4_320.cfg │ ├── yolo4_320_coco2.cfg │ ├── yolo4_512.cfg │ ├── yolo4_608.cfg │ ├── yolo4_berkeley.cfg │ ├── yolo4_mmr.cfg │ ├── yolo4tiny.cfg │ ├── yolo4tiny_512.cfg │ └── yolo4x.cfg ├── csresnext50-panet-spp.cpp ├── csresnext50-panet-spp_berkeley.cpp ├── names │ ├── berkeley.names │ ├── coco.names │ ├── coco2.names │ ├── coco4.names │ ├── crowdhuman.names │ ├── flir.names │ ├── mmr.names │ └── voc.names ├── viz_yolo3.cpp ├── yolo2.cpp ├── yolo2_voc.cpp ├── yolo2tiny.cpp ├── yolo3.cpp ├── yolo3_512.cpp ├── yolo3_berkeley.cpp ├── yolo3_coco4.cpp ├── yolo3_flir.cpp ├── yolo3tiny.cpp ├── yolo3tiny_512.cpp ├── yolo4-csp.cpp ├── yolo4-csp_crowd.cpp ├── yolo4.cpp ├── yolo4_320.cpp ├── yolo4_320_coco2.cpp ├── yolo4_512.cpp ├── yolo4_608.cpp ├── yolo4_berkeley.cpp ├── yolo4_berkeley_f1.cpp ├── yolo4_mmr.cpp ├── yolo4tiny.cpp ├── yolo4tiny_512.cpp └── yolo4x.cpp ├── exporters ├── caffe_weights_exporter.py └── keras_weights_exporter.py ├── imuodom ├── imuodom.cpp └── infer.py ├── mnist ├── mnist_model.py ├── test_mnist.cpp └── test_mnistRT.cpp ├── mobilenet ├── bdd-mobilenetv2ssd │ └── bdd-mobilenetv2ssd.cpp ├── mobilenetv2ssd │ └── mobilenetv2ssd.cpp └── mobilenetv2ssd512 │ └── mobilenetv2ssd512.cpp ├── monodepth2 ├── monodepth2_1024.cpp └── monodepth2_640.cpp ├── shelfnet ├── shelfnet.cpp ├── shelfnet_berkeley.cpp ├── shelfnet_coco.cpp └── shelfnet_mapillary.cpp ├── simple ├── test_model.py └── test_simple.cpp └── test_rtinference └── rtinference.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | build/ 3 | .vscode/ 4 | *.bin 5 | *.pyc 6 | *.prototxt 7 | *.caffemodel 8 | *.h5 9 | *.tar.gz 10 | *.weights 11 | .idea/ 12 | *.hdf5 13 | *.pk 14 | *.table 15 | cmake-build-release/ 16 | demo/COCO_val2017 17 | demo/BDD100K_val 18 | /.vs 19 | cmake-build-minsizerel/* 20 | scripts/COCO_val2017/* 21 | scripts/COCO_val2017.zip 22 | scripts/all_labels.txt 23 | /cmake/cuda_script 24 | /cmake-build-debug/ 25 | -------------------------------------------------------------------------------- /cmake/FindCUDNN.cmake: -------------------------------------------------------------------------------- 1 | # find the library 2 | if(CUDA_FOUND) 3 | find_cuda_helper_libs(cudnn) 4 | set(CUDNN_LIBRARY ${CUDA_cudnn_LIBRARY} CACHE FILEPATH "location of the cuDNN library") 5 | unset(CUDA_cudnn_LIBRARY CACHE) 6 | 7 | find_cuda_helper_libs(nvinfer) 8 | set(NVINFER_LIBRARY ${CUDA_nvinfer_LIBRARY} CACHE FILEPATH "location of the nvinfer library") 9 | unset(CUDA_nvinfer_LIBRARY CACHE) 10 | endif() 11 | 12 | # find the include 13 | if(CUDNN_LIBRARY) 14 | find_path(CUDNN_INCLUDE_DIR 15 | cudnn.h 16 | PATHS ${CUDA_TOOLKIT_INCLUDE} 17 | DOC "location of cudnn.h" 18 | NO_DEFAULT_PATH 19 | ) 20 | 21 | if(NOT CUDNN_INCLUDE_DIR) 22 | find_path(CUDNN_INCLUDE_DIR 23 | cudnn.h 24 | DOC "location of cudnn.h" 25 | ) 26 | endif() 27 | 28 | message("-- Found CUDNN: " ${CUDNN_LIBRARY}) 29 | message("-- Found CUDNN include: " ${CUDNN_INCLUDE_DIR}) 30 | endif() 31 | 32 | if(NVINFER_LIBRARY) 33 | find_path(NVINFER_INCLUDE_DIR 34 | NvInfer.h 35 | PATHS ${CUDA_TOOLKIT_INCLUDE} 36 | DOC "location of NvInfer.h" 37 | NO_DEFAULT_PATH 38 | ) 39 | 40 | if(NOT NVINFER_INCLUDE_DIR) 41 | find_path(NVINFER_INCLUDE_DIR 42 | NvInfer.h 43 | DOC "location of NvInfer.h" 44 | ) 45 | endif() 46 | 47 | message("-- Found NVINFER: " ${NVINFER_LIBRARY}) 48 | message("-- Found NVINFER include: " ${NVINFER_INCLUDE_DIR}) 49 | endif() 50 | 51 | 52 | include(FindPackageHandleStandardArgs) 53 | find_package_handle_standard_args(CUDNN 54 | FOUND_VAR CUDNN_FOUND 55 | REQUIRED_VARS 56 | CUDNN_LIBRARY 57 | CUDNN_INCLUDE_DIR 58 | VERSION_VAR CUDNN_VERSION 59 | ) 60 | 61 | if(CUDNN_FOUND) 62 | set(CUDNN_LIBRARIES ${CUDNN_LIBRARY} ${NVINFER_LIBRARY}) 63 | set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR} ${NVINFER_INCLUDE_DIR}) 64 | endif() 65 | 66 | set(CUDNN_FOUND true) -------------------------------------------------------------------------------- /cmake/getCudaArch.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv){ 4 | cudaDeviceProp dP; 5 | float min_cc = 5.0; 6 | 7 | int rc = cudaGetDeviceProperties(&dP, 0); 8 | if(rc != cudaSuccess) { 9 | cudaError_t error = cudaGetLastError(); 10 | printf("CUDA error: %s", cudaGetErrorString(error)); 11 | return rc; /* Failure */ 12 | } 13 | if((dP.major+(dP.minor/10)) < min_cc) { 14 | printf("Min Compute Capability of %2.1f required: %d.%d found\n Not Building CUDA Code", min_cc, dP.major, dP.minor); 15 | return 1; /* Failure */ 16 | } else { 17 | printf("-arch=sm_%d%d", dP.major, dP.minor); 18 | return 0; /* Success */ 19 | } 20 | } -------------------------------------------------------------------------------- /cmake/tkDNNConfig.cmake: -------------------------------------------------------------------------------- 1 | message("-- Found tkDNN") 2 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) 3 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++11 -fPIC") 4 | 5 | find_package(CUDA REQUIRED) 6 | find_package(OpenCV REQUIRED) 7 | find_package(CUDNN REQUIRED) 8 | 9 | set(tkDNN_INCLUDE_DIRS 10 | ${CUDA_INCLUDE_DIRS} 11 | ${OPENCV_INCLUDE_DIRS} 12 | ${CUDNN_INCLUDE_DIRS} 13 | ) 14 | 15 | set(tkDNN_LIBRARIES 16 | tkDNN 17 | kernels 18 | ${CUDA_LIBRARIES} 19 | ${CUDA_CUBLAS_LIBRARIES} 20 | ${CUDNN_LIBRARIES} 21 | ${OpenCV_LIBS} 22 | ) 23 | 24 | set(tkDNN_FOUND true) 25 | -------------------------------------------------------------------------------- /demo/config.yaml: -------------------------------------------------------------------------------- 1 | classes : 80 #number of classes 2 | map_points : 101 #number of recall points (0 for all, 101 for COCO, 11 PascalVOC) 3 | map_levels : 10 #number of IoU step for the AP 4 | map_step : 0.05 #step of IoU 5 | IoU_thresh : 0.5 #starting IoU threshold 6 | conf_thresh : 0.001 #threshold on the condifence of the bbox 7 | verbose : false #print on screen information 8 | -------------------------------------------------------------------------------- /demo/config_tetra.yaml: -------------------------------------------------------------------------------- 1 | classes : 3 #number of classes 2 | map_points : 101 #number of recall points (0 for all, 101 for COCO, 11 PascalVOC) 3 | map_levels : 10 #number of IoU step for the AP 4 | map_step : 0.05 #step of IoU 5 | IoU_thresh : 0.5 #starting IoU threshold 6 | conf_thresh : 0.0 #threshold on the condifence of the bbox 7 | verbose : false #print on screen information 8 | -------------------------------------------------------------------------------- /demo/demo/demoDepth.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include /* srand, rand */ 4 | //#include 5 | #include 6 | 7 | #include "tkDNN/DepthNN.h" 8 | 9 | bool gRun; 10 | 11 | void sig_handler(int signo) { 12 | std::cout<<"request gateway stop\n"; 13 | gRun = false; 14 | } 15 | 16 | int main(int argc, char *argv[]) { 17 | 18 | signal(SIGINT, sig_handler); 19 | 20 | std::string net = "monodepth2_fp32.rt"; 21 | if(argc > 1) 22 | net = argv[1]; 23 | #ifdef __linux__ 24 | std::string input = "../demo/yolo_test.mp4"; 25 | #elif _WIN32 26 | std::string input = "..\\..\\..\\demo\\yolo_test.mp4"; 27 | #endif 28 | if(argc > 2) 29 | input = argv[2]; 30 | bool show = true; 31 | if(argc > 3) 32 | show = atoi(argv[3]); 33 | bool save = true; 34 | if(argc > 4) 35 | save = atoi(argv[4]); 36 | 37 | std::cout <<"Net settings - net: "<< net 38 | <<"\n"; 39 | std::cout <<"Demo settings - input: "<< input 40 | <<", show: "<< show 41 | <<", save: "<< save<<"\n\n"; 42 | 43 | tk::dnn::DepthNN depthNN; 44 | 45 | // create depth network 46 | int n_batch = 1; 47 | depthNN.init(net, n_batch); 48 | 49 | // open video stream 50 | cv::VideoCapture cap(input); 51 | if(!cap.isOpened()) 52 | gRun = false; 53 | else 54 | std::cout<<"camera started\n"; 55 | 56 | cv::VideoWriter resultVideo; 57 | if(save) { 58 | int w = depthNN.output_w; 59 | int h = depthNN.output_h; 60 | resultVideo.open("result.mp4", cv::VideoWriter::fourcc('M','P','4','V'), 30, cv::Size(w, h)); 61 | } 62 | 63 | if(show) 64 | cv::namedWindow("depth", cv::WINDOW_NORMAL); 65 | 66 | cv::Mat frame; 67 | std::vector batch_frame; 68 | std::vector batch_dnn_input; 69 | 70 | // start detection loop 71 | gRun = true; 72 | while(gRun) { 73 | batch_dnn_input.clear(); 74 | batch_frame.clear(); 75 | 76 | //read frame 77 | cap >> frame; 78 | if(!frame.data) 79 | break; 80 | batch_frame.push_back(frame); 81 | batch_dnn_input.push_back(frame.clone()); 82 | 83 | //inference 84 | depthNN.update(batch_dnn_input, 1); 85 | if(show){ 86 | cv::imshow("depth", depthNN.depthMats[0]); 87 | cv::waitKey(1); 88 | 89 | } 90 | 91 | if(save) 92 | resultVideo << depthNN.depthMats[0]; 93 | } 94 | 95 | std::cout<<"detection end\n"; 96 | 97 | double mean = 0; 98 | std::cout< 17 | ``` 18 | where 19 | * `````` is the rt file generated by a test 20 | * ```<``` is the path to a video file or a camera input 21 | * `````` if set to 0 the demo will not show the visualization, it will otherwise (default=1) 22 | * `````` if set to 1 the demo will save the video into result.mp4, it won't otherwise (default=1) 23 | 24 | NB) By default it is used FP32 inference 25 | 26 | 27 | ![demo](https://user-images.githubusercontent.com/11939259/160845358-0d6ab15d-c5f4-46ae-b9da-bfaf3903389d.gif "Results on yolo_test.mp4") 28 | 29 | 30 | 54 | 55 | -------------------------------------------------------------------------------- /docs/mAP_demo.md: -------------------------------------------------------------------------------- 1 | # Run the mAP demo 2 | 3 | To compute mAP, precision, recall and f1score to evaluate 2D object detectors, run the map_demo. 4 | 5 | A validation set is needed. 6 | To download COCO_val2017 (80 classes) run (form the root folder): 7 | ``` 8 | bash scripts/download_validation.sh COCO 9 | ``` 10 | To download Berkeley_val (10 classes) run (form the root folder): 11 | ``` 12 | bash scripts/download_validation.sh BDD 13 | ``` 14 | 15 | To compute the map, the following parameters are needed: 16 | ``` 17 | ./map_demo 18 | ``` 19 | where 20 | * ``````: rt file of a chosen network on which compute the mAP. 21 | * ``````: type of network. Right now only y(yolo), c(centernet) and m(mobilenet) are allowed 22 | * ``````: path to a text file containing all the paths of the ground-truth labels. It is important that all the labels of the ground-truth are in a folder called 'labels'. In the folder containing the folder 'labels' there should be also a folder 'images', containing all the ground-truth images having the same same as the labels. To better understand, if there is a label path/to/labels/000001.txt there should be a corresponding image path/to/images/000001.jpg. 23 | * ``````: path to a yaml file with the parameters needed for the mAP computation, similar to demo/config.yaml 24 | 25 | Example: 26 | 27 | ``` 28 | cd build 29 | ./map_demo dla34_cnet_FP32.rt c ../demo/COCO_val2017/all_labels.txt ../demo/config.yaml 30 | ``` 31 | 32 | This demo also creates a json file named ```net_name_COCO_res.json``` containing all the detections computed. The detections are in COCO format, the correct format to submit the results to [CodaLab COCO detection challenge](https://competitions.codalab.org/competitions/20794#participate). -------------------------------------------------------------------------------- /include/tkDNN/BoundingBox.h: -------------------------------------------------------------------------------- 1 | #ifndef BOUNDINGBOX_H 2 | #define BOUNDINGBOX_H 3 | 4 | #include 5 | #include "tkdnn.h" 6 | 7 | namespace tk { namespace dnn { 8 | class BoundingBox : public tk::dnn::box 9 | { 10 | float overlap(const float p1, const float l1, const float p2, const float l22); 11 | float boxesIntersection(const BoundingBox &b); 12 | float boxesUnion(const BoundingBox &b); 13 | 14 | public: 15 | 16 | int uniqueTruthIndex = -1; 17 | int truthFlag = 0; 18 | float maxIoU = 0; 19 | 20 | float IoU(const BoundingBox &b); 21 | void clear(); 22 | 23 | friend std::ostream& operator<<(std::ostream& os, const BoundingBox& bb); 24 | }; 25 | 26 | std::ostream& operator<<(std::ostream& os, const BoundingBox& bb); 27 | bool boxComparison (const BoundingBox& a,const BoundingBox& b) ; 28 | 29 | }} 30 | #endif /*BOUNDINGBOX_H*/ 31 | 32 | -------------------------------------------------------------------------------- /include/tkDNN/CenternetDetection.h: -------------------------------------------------------------------------------- 1 | #ifndef CENTERNETDETECTION_H 2 | #define CENTERNETDETECTION_H 3 | 4 | #include "kernels.h" 5 | #include 6 | #include "opencv2/opencv.hpp" 7 | #include 8 | #include 9 | #include // std::iota 10 | #include // std::sort 11 | 12 | #include "DetectionNN.h" 13 | 14 | #include "kernelsThrust.h" 15 | 16 | 17 | namespace tk { namespace dnn { 18 | 19 | class CenternetDetection : public DetectionNN 20 | { 21 | private: 22 | tk::dnn::dataDim_t dim; 23 | tk::dnn::dataDim_t dim2; 24 | tk::dnn::dataDim_t dim_hm; 25 | tk::dnn::dataDim_t dim_wh; 26 | tk::dnn::dataDim_t dim_reg; 27 | float *topk_scores; 28 | int *topk_inds_; 29 | float *topk_ys_; 30 | float *topk_xs_; 31 | int *ids_d, *ids_, *ids_2, *ids_2d; 32 | 33 | float *scores, *scores_d; 34 | int *clses, *clses_d; 35 | int *topk_inds_d; 36 | float *topk_ys_d; 37 | float *topk_xs_d; 38 | int *inttopk_xs_d, *inttopk_ys_d; 39 | 40 | 41 | float *bbx0, *bby0, *bbx1, *bby1; 42 | float *bbx0_d, *bby0_d, *bbx1_d, *bby1_d; 43 | 44 | float *target_coords; 45 | 46 | #ifdef OPENCV_CUDACONTRIB 47 | float *mean_d; 48 | float *stddev_d; 49 | #else 50 | cv::Vec mean; 51 | cv::Vec stddev; 52 | dnnType *input; 53 | #endif 54 | 55 | float *d_ptrs; 56 | 57 | cv::Mat src; 58 | cv::Mat dst; 59 | cv::Mat dst2; 60 | cv::Mat trans, trans2; 61 | //processing 62 | float toll = 0.000001; 63 | int K = 100; 64 | int width = 128;//56; // TODO 65 | 66 | // pointer used in the kernels 67 | float *src_out; 68 | int *ids_out; 69 | 70 | struct threshold op; 71 | 72 | public: 73 | CenternetDetection() {}; 74 | ~CenternetDetection() {}; 75 | 76 | bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1, const float conf_thresh=0.3); 77 | void preprocess(cv::Mat &frame, const int bi=0); 78 | void postprocess(const int bi=0,const bool mAP=false); 79 | }; 80 | 81 | 82 | } // namespace dnn 83 | } // namespace tk 84 | 85 | 86 | #endif /*CENTERNETDETECTION_H*/ -------------------------------------------------------------------------------- /include/tkDNN/CenternetDetection3D.h: -------------------------------------------------------------------------------- 1 | #ifndef CENTERNETDETECTION3D_H 2 | #define CENTERNETDETECTION3D_H 3 | 4 | #include "kernels.h" 5 | #include 6 | #include "opencv2/opencv.hpp" 7 | #include 8 | #include 9 | #include // std::iota 10 | #include // std::sort 11 | 12 | #ifdef _WIN32 13 | #define _USE_MATH_DEFINES 14 | #include 15 | #endif 16 | 17 | #include "DetectionNN3D.h" 18 | 19 | #include "kernelsThrust.h" 20 | 21 | 22 | namespace tk { namespace dnn { 23 | 24 | class CenternetDetection3D : public DetectionNN3D 25 | { 26 | private: 27 | tk::dnn::dataDim_t dim; 28 | tk::dnn::dataDim_t dim2; 29 | tk::dnn::dataDim_t dim_hm; 30 | tk::dnn::dataDim_t dim_wh; 31 | tk::dnn::dataDim_t dim_reg; 32 | tk::dnn::dataDim_t dim_dep; 33 | tk::dnn::dataDim_t dim_rot; 34 | tk::dnn::dataDim_t dim_dim; 35 | 36 | std::vector inputCalibs; 37 | float *topk_scores; 38 | int *topk_inds_; 39 | float *topk_ys_; 40 | float *topk_xs_; 41 | int *ids_d, *ids_; 42 | 43 | float *ones; 44 | 45 | float *scores, *scores_d; 46 | int *clses, *clses_d; 47 | int *topk_inds_d; 48 | float *topk_ys_d; 49 | float *topk_xs_d; 50 | int *inttopk_xs_d, *inttopk_ys_d; 51 | 52 | float *xs, *ys; 53 | 54 | float *dep, *rot, *dim_, *wh; 55 | float *dep_d, *rot_d, *dim_d, *wh_d; 56 | 57 | float *target_coords; 58 | 59 | #ifdef OPENCV_CUDACONTRIB 60 | float *mean_d; 61 | float *stddev_d; 62 | #else 63 | cv::Vec mean; 64 | cv::Vec stddev; 65 | dnnType *input; 66 | #endif 67 | cv::Mat r; 68 | float *d_ptrs; 69 | 70 | cv::Size sz_old; 71 | 72 | cv::Mat src; 73 | cv::Mat dst; 74 | cv::Mat dst2; 75 | cv::Mat trans, trans2; 76 | std::vector calibs; 77 | 78 | //processing 79 | int K = 100; 80 | int width = 128;//56; // TODO 81 | 82 | // pointer used in the kernels 83 | float *srcOut; 84 | int *idsOut; 85 | 86 | struct threshold op; 87 | cv::Mat corners, pts3DHomo; 88 | 89 | std::vector> faceId; 90 | 91 | public: 92 | CenternetDetection3D() {}; 93 | ~CenternetDetection3D() {}; 94 | 95 | bool init(const std::string& tensor_path, const int n_classes=3, const int n_batches=1, const float conf_thresh=0.3, const std::vector& k_calibs=std::vector()); 96 | void preprocess(cv::Mat &frame, const int bi=0); 97 | void postprocess(const int bi=0,const bool mAP=false); 98 | void draw(std::vector& frames); 99 | }; 100 | 101 | 102 | } // namespace dnn 103 | } // namespace tk 104 | 105 | 106 | #endif /*CENTERNETDETECTION_H*/ -------------------------------------------------------------------------------- /include/tkDNN/DarknetParser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "tkDNN/tkdnn.h" 4 | 5 | namespace tk { namespace dnn { 6 | 7 | struct darknetFields_t{ 8 | std::string type = ""; 9 | int width = 0; 10 | int height = 0; 11 | int channels = 3; 12 | int batch_normalize=0; 13 | int groups = 1; 14 | int group_id = 0; 15 | int filters=1; 16 | int size_x=1; 17 | int size_y=1; 18 | int stride_x=1; 19 | int stride_y=1; 20 | int padding_x = 0; 21 | int padding_y = 0; 22 | int n_mask = 0; 23 | int classes = 20; 24 | int num = 1; 25 | int pad = 0; 26 | int coords = 4; 27 | int nms_kind = 0; 28 | int new_coords= 0; 29 | float scale_xy = 1; 30 | float nms_thresh = 0.45; 31 | std::vector layers; 32 | std::string activation = "linear"; 33 | 34 | friend std::ostream& operator<<(std::ostream& os, const darknetFields_t& f){ 35 | os << f.width << " " << f.height << " " << f.channels << " " << f.batch_normalize<< " " << f.filters << " " << f.activation<< " " << f.scale_xy; 36 | return os; 37 | } 38 | }; 39 | 40 | std::string darknetParseType(const std::string& line); 41 | bool divideNameAndValue(const std::string& line, std::string&name, std::string& value); 42 | std::vector fromStringToIntVec(const std::string& line, const char delimiter); 43 | 44 | bool darknetParseFields(const std::string& line, darknetFields_t& fields); 45 | tk::dnn::Network *darknetAddNet(darknetFields_t &fields); 46 | void darknetAddLayer(tk::dnn::Network *net, darknetFields_t &f, std::string wgs_path, 47 | std::vector &netLayers, const std::vector& names); 48 | std::vector darknetReadNames(const std::string& names_file); 49 | tk::dnn::Network* darknetParser(const std::string& cfg_file, const std::string& wgs_path, const std::string& names_file); 50 | void loadYoloInfo(const std::string &cfg_file,int lineNo,std::vector &mask,std::vector &anchors,int &num,int &classes,float &nms_thresh,int &nms_kind,int &coords); 51 | void loadYoloInitInfo(int &channels,int &width,int &height,const std::string &cfg_file); 52 | std::vector noYolosLine(const std::string &cfg_file); 53 | 54 | }} 55 | -------------------------------------------------------------------------------- /include/tkDNN/Int8BatchStream.h: -------------------------------------------------------------------------------- 1 | #ifndef INT8BATCHSTREAM_H 2 | #define INT8BATCHSTREAM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #ifdef __linux__ 16 | #include 17 | #endif 18 | 19 | #include 20 | 21 | #include "NvInfer.h" 22 | #include "utils.h" 23 | #include "tkdnn.h" 24 | 25 | /* 26 | * BatchStream implements the stream for the INT8 calibrator. 27 | * It reads the two files .txt with the list of image file names 28 | * and the list of label file names. 29 | * It then iterates on images and labels. 30 | */ 31 | class BatchStream { 32 | public: 33 | BatchStream(tk::dnn::dataDim_t dim, int batchSize, int maxBatches, const std::string& fileimglist, const std::string& filelabellist); 34 | virtual ~BatchStream() { } 35 | void reset(int firstBatch); 36 | bool next(); 37 | void skip(int skipCount); 38 | float *getBatch() { return mBatch.data(); } 39 | float *getLabels() { return mLabels.data(); } 40 | int getBatchesRead() const { return mBatchCount; } 41 | int getBatchSize() const { return mBatchSize; } 42 | nvinfer1::Dims4 getDims() const { return mDims; } 43 | float* getFileBatch() { return &mFileBatch[0]; } 44 | float* getFileLabels() { return &mFileLabels[0]; } 45 | void readInListFile(const std::string& dataFilePath, std::vector& mListIn); 46 | void readCVimage(std::string inputFileName, std::vector& res, bool fixshape = true); 47 | void readLabels(std::string inputFileName ,std::vector& ris); 48 | bool update(); 49 | 50 | private: 51 | int mBatchSize{ 0 }; 52 | int mMaxBatches{ 0 }; 53 | int mBatchCount{ 0 }; 54 | int mFileCount{ 0 }; 55 | int mFileBatchPos{ 0 }; 56 | int mImageSize{ 0 }; 57 | 58 | nvinfer1::Dims4 mDims; 59 | std::vector mBatch; 60 | std::vector mLabels; 61 | std::vector mFileBatch; 62 | std::vector mFileLabels; 63 | 64 | int mHeight; 65 | int mWidth; 66 | std::string mFileImgList; 67 | std::vector mListImg; 68 | std::string mFileLabelList; 69 | std::vector mListLabel; 70 | }; 71 | 72 | #endif //INT8BATCHSTREAM -------------------------------------------------------------------------------- /include/tkDNN/Int8Calibrator.h: -------------------------------------------------------------------------------- 1 | #ifndef INT8CALIBRATOR_H 2 | #define INT8CALIBRATOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "NvInfer.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "Int8BatchStream.h" 17 | 18 | #include "tkdnn.h" 19 | #include "utils.h" 20 | 21 | /* 22 | * Int8EntropyCalibrator implements the INT8 calibrator to achieve the 23 | * INT8 quantization. It uses a BatchStream stream to scroll through 24 | * images data. It also implements the calibration cache, a way to 25 | * save the calibration process results to reduce the running time: 26 | * the calibration process takes a long time. 27 | */ 28 | class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator { 29 | public: 30 | Int8EntropyCalibrator(BatchStream& stream, int firstBatch, const std::string& calibTableFilePath, 31 | const std::string& inputBlobName, bool readCache = true); 32 | virtual ~Int8EntropyCalibrator() { checkCuda(cudaFree(mDeviceInput)); } 33 | int getBatchSize() const NOEXCEPT override { return mStream.getBatchSize(); } 34 | bool getBatch(void* bindings[], const char* names[], int nbBindings) NOEXCEPT override; 35 | const void* readCalibrationCache(size_t& length) NOEXCEPT override; 36 | void writeCalibrationCache(const void* cache, size_t length) NOEXCEPT override; 37 | 38 | private: 39 | BatchStream mStream; 40 | const std::string mCalibTableFilePath{ nullptr }; 41 | const std::string mInputBlobName; 42 | bool mReadCache{ true }; 43 | 44 | size_t mInputCount; 45 | void* mDeviceInput{ nullptr }; 46 | std::vector mCalibrationCache; 47 | }; 48 | 49 | #endif //INT8CALIBRATOR_H -------------------------------------------------------------------------------- /include/tkDNN/MobilenetDetection.h: -------------------------------------------------------------------------------- 1 | #ifndef MOBILENETDETECTION_H 2 | #define MOBILENETDETECTION_H 3 | 4 | #include 5 | #include "opencv2/opencv.hpp" 6 | 7 | #include "DetectionNN.h" 8 | 9 | #define N_COORDS 4 10 | #define N_SSDSPEC 6 11 | 12 | namespace tk { namespace dnn { 13 | 14 | struct SSDSpec 15 | { 16 | int featureSize = 0; 17 | int shrinkage = 0; 18 | int boxWidth = 0; 19 | int boxHeight = 0; 20 | int ratio1 = 0; 21 | int ratio2 = 0; 22 | 23 | SSDSpec() {} 24 | SSDSpec(int feature_size, int shrinkage, int box_width, int box_height, int ratio1, int ratio2) : 25 | featureSize(feature_size), shrinkage(shrinkage), boxWidth(box_width), 26 | boxHeight(box_height), ratio1(ratio1), ratio2(ratio2) {} 27 | void setAll(int feature_size, int shrinkage, int box_width, int box_height, int ratio1, int ratio2) 28 | { 29 | this->featureSize = feature_size; 30 | this->shrinkage = shrinkage; 31 | this->boxWidth = box_width; 32 | this->boxHeight = box_height; 33 | this->ratio1 = ratio1; 34 | this->ratio2 = ratio2; 35 | } 36 | void print() 37 | { 38 | std::cout << "fsize: " << featureSize << "\tshrinkage: " << shrinkage << 39 | "\t box W:" << boxWidth << "\tbox H: " << boxHeight << 40 | "\t x ratio:" << ratio1 << "\t y ratio:" << ratio2 << std::endl; 41 | } 42 | }; 43 | 44 | class MobilenetDetection : public DetectionNN 45 | { 46 | private: 47 | float IoUThreshold = 0.45; 48 | float centerVariance = 0.1; 49 | float sizeVariance = 0.2; 50 | int imageSize; 51 | 52 | float *priors = nullptr; 53 | int nPriors = 0; 54 | float *locations_h, *confidences_h; 55 | 56 | 57 | 58 | void generate_ssd_priors(const SSDSpec *specs, const int n_specs, bool clamp = true); 59 | void convert_locatios_to_boxes_and_center(); 60 | float iou(const tk::dnn::box &a, const tk::dnn::box &b); 61 | 62 | 63 | 64 | public: 65 | MobilenetDetection() {}; 66 | ~MobilenetDetection() {}; 67 | 68 | bool init(const std::string& tensor_path,const int n_classes, const int n_batches=1, const float conf_thresh=0.3); 69 | void preprocess(cv::Mat &frame, const int bi=0); 70 | void postprocess(const int bi=0,const bool mAP=false); 71 | }; 72 | 73 | 74 | } // namespace dnn 75 | } // namespace tk 76 | 77 | #endif /*MOBILENETDETECTION_H*/ -------------------------------------------------------------------------------- /include/tkDNN/Network.h: -------------------------------------------------------------------------------- 1 | #ifndef NETWORK_H 2 | #define NETWORK_H 3 | 4 | #include 5 | #include "utils.h" 6 | 7 | namespace tk { namespace dnn { 8 | 9 | /** 10 | Data representation between layers 11 | n = batch size 12 | c = channels 13 | h = height (lines) 14 | w = width (rows) 15 | l = length (3rd dimension) 16 | */ 17 | struct dataDim_t { 18 | 19 | int n, c, h, w, l; 20 | 21 | dataDim_t() : n(1), c(1), h(1), w(1), l(1) {}; 22 | 23 | dataDim_t(int _n, int _c, int _h, int _w, int _l = 1) : 24 | n(_n), c(_c), h(_h), w(_w), l(_l) {}; 25 | 26 | void print() { 27 | std::cout<<"Data dim: "< 3 | #include 4 | #include "tkdnn.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | cv::Mat vizFloat2colorMap(cv::Mat map, double min=0, double max=0, int classes=19); 9 | cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int img_w, double min=0, double max=0, int classes=0); 10 | cv::Mat vizLayer2Mat(tk::dnn::Network *net, int layer, int imgdim = 1000); 11 | 12 | }} 13 | -------------------------------------------------------------------------------- /include/tkDNN/Yolo3Detection.h: -------------------------------------------------------------------------------- 1 | #ifndef Yolo3Detection_H 2 | #define Yolo3Detection_H 3 | #include 4 | #include "opencv2/opencv.hpp" 5 | 6 | #include "DetectionNN.h" 7 | #include "DarknetParser.h" 8 | 9 | namespace tk { namespace dnn { 10 | class Yolo3Detection : public DetectionNN 11 | { 12 | private: 13 | int num = 0; 14 | int nMasks = 0; 15 | int nDets = 0; 16 | tk::dnn::Yolo::detection *dets = nullptr; 17 | tk::dnn::Yolo* yolo[3]; 18 | 19 | tk::dnn::Yolo* getYoloLayer(int n=0); 20 | 21 | cv::Mat bgr_h; 22 | 23 | public: 24 | Yolo3Detection() {}; 25 | ~Yolo3Detection() {}; 26 | 27 | bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1, const float conf_thresh=0.3); 28 | void preprocess(cv::Mat &frame, const int bi=0); 29 | void postprocess(const int bi=0,const bool mAP=false); 30 | }; 31 | 32 | 33 | } // namespace dnn 34 | } // namespace tk 35 | 36 | #endif /* Yolo3Detection_H*/ 37 | -------------------------------------------------------------------------------- /include/tkDNN/demo_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef DEMO_UTILS_H 2 | #define DEMO_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #ifdef __linux__ 11 | #include 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | 21 | void readCalibrationMatrix(const std::string& path, cv::Mat& calib_mat); 22 | 23 | #endif //DEMO_UTILS_H -------------------------------------------------------------------------------- /include/tkDNN/kernels.h: -------------------------------------------------------------------------------- 1 | #ifndef KERNELS_H 2 | #define KERNELS_H 3 | 4 | #include "utils.h" 5 | 6 | void activationELUForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0)); 7 | void activationLEAKYForward(dnnType *srcData, dnnType *dstData, int size, float slope, cudaStream_t stream = cudaStream_t(0)); 8 | void activationReLUCeilingForward(dnnType *srcData, dnnType *dstData, int size, const float ceiling, cudaStream_t stream = cudaStream_t(0)); 9 | void activationLOGISTICForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0)); 10 | void activationSIGMOIDForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0)); 11 | void activationMishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream= cudaStream_t(0)); 12 | 13 | void fill(dnnType *data, int size, dnnType val, cudaStream_t stream = cudaStream_t(0)); 14 | 15 | void resizeForward(dnnType *srcData, dnnType *dstData, int n, int i_c, int i_h, int i_w, 16 | int o_c, int o_h, int o_w, cudaStream_t stream = cudaStream_t(0)); 17 | 18 | void reorgForward(dnnType *srcData, dnnType *dstData, 19 | int n, int c, int h, int w, int stride, cudaStream_t stream = cudaStream_t(0)); 20 | 21 | void MaxPoolingForward(dnnType *srcData, dnnType *dstData, int n, int c, int h, int w, int stride_x, int stride_y, int size, int padding, cudaStream_t stream = cudaStream_t(0)); 22 | 23 | void softmaxForward(float *input, int n, int batch, int batch_offset, 24 | int groups, int group_offset, int stride, float temp, float *output, cudaStream_t stream = cudaStream_t(0)); 25 | 26 | void shortcutForward(dnnType *srcData, dnnType *dstData, int n1, int c1, int h1, int w1, int s1, 27 | int n2, int c2, int h2, int w2, int s2, bool mul, 28 | cudaStream_t stream = cudaStream_t(0)); 29 | 30 | void upsampleForward(dnnType *srcData, dnnType *dstData, 31 | int n, int c, int h, int w, int s, int forward, float scale, 32 | cudaStream_t stream = cudaStream_t(0)); 33 | 34 | void float2half(float *srcData, __half *dstData, int size, const cudaStream_t stream = cudaStream_t(0)); 35 | 36 | void dcnV2CudaForward(cublasStatus_t stat, cublasHandle_t handle, 37 | float *input, float *weight, 38 | float *bias, float *ones, 39 | float *offset, float *mask, 40 | float *output, float *columns, 41 | int kernel_h, int kernel_w, 42 | const int stride_h, const int stride_w, 43 | const int pad_h, const int pad_w, 44 | const int dilation_h, const int dilation_w, 45 | const int deformable_group, const int batch_id, 46 | const int in_n, const int in_c, const int in_h, const int in_w, 47 | const int out_n, const int out_c, const int out_h, const int out_w, 48 | const int dst_dim, cudaStream_t stream = cudaStream_t(0)); 49 | 50 | void scalAdd(dnnType* dstData, int size, float alpha, float beta, int inc, cudaStream_t stream = cudaStream_t(0)); 51 | 52 | void reflection_pad2d_out_forward(int32_t pad_h,int32_t pad_w,float *srcData,float *dstData,int32_t input_h,int32_t input_w,int32_t plane_dim,int32_t n_batch,cudaStream_t cudaStream = cudaStream_t(0)); 53 | 54 | void constant_pad2d_forward(dnnType *srcData,dnnType *dstData,int32_t input_h,int32_t input_w,int32_t output_h, 55 | int32_t output_w,int32_t c,int32_t n,int32_t padT,int32_t padL,dnnType constant,cudaStream_t cudaStream = cudaStream_t(0)); 56 | 57 | 58 | #endif //KERNELS_H 59 | -------------------------------------------------------------------------------- /include/tkDNN/kernelsThrust.h: -------------------------------------------------------------------------------- 1 | #ifndef KERNELSTHRUST_H 2 | #define KERNELSTHRUST_H 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | #include "tkdnn.h" 17 | 18 | struct threshold : public thrust::binary_function 19 | { 20 | __host__ __device__ 21 | float operator()(float x, float y) { 22 | double toll = 1e-6; 23 | if(fabsf(x-y)>toll) 24 | return 0.0f; 25 | else 26 | return x; 27 | } 28 | }; 29 | 30 | void sort(dnnType *src_begin, dnnType *src_end, int *idsrc); 31 | void topk(dnnType *src_begin, int *idsrc, int K, float *topk_scores, 32 | int *topk_inds, float *topk_ys, float *topk_xs); 33 | // void sortAndTopKonDevice(dnnType *src_begin, int *idsrc, float *topk_scores, int *topk_inds, float *topk_ys, float *topk_xs, const int size, const int K, const int n_classes); 34 | void normalize(float *bgr, const int ch, const int h, const int w, const float *mean, const float *stddev); 35 | void transformDep(float *src_begin, float *src_end, float *dst_begin, float *dst_end); 36 | void subtractWithThreshold(dnnType *src_begin, dnnType *src_end, dnnType *src2_begin, dnnType *src_out, struct threshold op); 37 | void topKxyclasses(int *ids_begin, int *ids_end, const int K, const int size, const int wh, int *clses, int *xs, int *ys); 38 | void topKxyAddOffset(int * ids_begin, const int K, const int size, int *intxs_begin, int *intys_begin, 39 | float *xs_begin, float *ys_begin, dnnType *src_begin, float *src_out, int *ids_out); 40 | void bboxes(int * ids_begin, const int K, const int size, float *xs_begin, float *ys_begin, 41 | dnnType *src_begin, float *bbx0, float *bbx1, float *bby0, float *bby1, float *src_out, int *ids_out); 42 | void getRecordsFromTopKId(int * ids_begin, const int K, const int ch, const int size, dnnType *src_begin, float *src_out, int *ids_out); 43 | 44 | void maxElem(dnnType *src_begin, dnnType *dst_begin, const int c, const int h, const int w); 45 | 46 | #endif //KERNELSTHRUST_H -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ActivationLeakyRT.h: -------------------------------------------------------------------------------- 1 | #include "NvInfer.h" 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | 6 | namespace nvinfer1 { 7 | class ActivationLeakyRT : public IPluginV2 { 8 | 9 | public: 10 | explicit ActivationLeakyRT(float s); 11 | 12 | ActivationLeakyRT(const void *data, size_t length); 13 | 14 | ~ActivationLeakyRT(); 15 | 16 | int getNbOutputs() const NOEXCEPT override; 17 | 18 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override; 19 | 20 | void 21 | configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, 22 | PluginFormat format, int maxBatchSize) NOEXCEPT override; 23 | 24 | int initialize() NOEXCEPT override; 25 | 26 | void terminate() NOEXCEPT override {} 27 | 28 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override; 29 | 30 | #if NV_TENSORRT_MAJOR > 7 31 | int enqueue(int batchSize, void const *const *inputs, void *const *outputs, void *workspace, 32 | cudaStream_t stream) NOEXCEPT override; 33 | #elif NV_TENSORRT_MAJOR == 7 34 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 35 | #endif 36 | 37 | size_t getSerializationSize() const NOEXCEPT override; 38 | 39 | void serialize(void *buffer) const NOEXCEPT override; 40 | 41 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override; 42 | 43 | const char *getPluginType() const NOEXCEPT override; 44 | 45 | const char *getPluginVersion() const NOEXCEPT override; 46 | 47 | void destroy() NOEXCEPT override; 48 | 49 | const char *getPluginNamespace() const NOEXCEPT override; 50 | 51 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 52 | 53 | IPluginV2 *clone() const NOEXCEPT override; 54 | 55 | int size; 56 | float slope; 57 | 58 | private: 59 | std::string mPluginNamespace; 60 | }; 61 | 62 | class ActivationLeakyRTPluginCreator : public IPluginCreator { 63 | public: 64 | ActivationLeakyRTPluginCreator(); 65 | 66 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 67 | 68 | IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override; 69 | 70 | const char *getPluginNamespace() const NOEXCEPT override ; 71 | 72 | IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 73 | 74 | const char *getPluginName() const NOEXCEPT override ; 75 | 76 | const char *getPluginVersion() const NOEXCEPT override ; 77 | 78 | const PluginFieldCollection *getFieldNames() NOEXCEPT override; 79 | 80 | private: 81 | static PluginFieldCollection mFC; 82 | static std::vector mPluginAttributes; 83 | std::string mPluginNamespace; 84 | }; 85 | 86 | 87 | REGISTER_TENSORRT_PLUGIN(ActivationLeakyRTPluginCreator); 88 | }; -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ActivationLogisticRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nvinfer1 { 8 | 9 | class ActivationLogisticRT : public IPluginV2 { 10 | 11 | public: 12 | ActivationLogisticRT() ; 13 | 14 | ActivationLogisticRT(const void *data, size_t length) ; 15 | 16 | ~ActivationLogisticRT() ; 17 | 18 | int getNbOutputs() const NOEXCEPT override ; 19 | 20 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 21 | 22 | void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, 23 | PluginFormat format, int maxBatchSize) NOEXCEPT override ; 24 | 25 | int initialize() NOEXCEPT override ; 26 | 27 | void terminate() NOEXCEPT override ; 28 | 29 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override; 30 | 31 | #if NV_TENSORRT_MAJOR > 7 32 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, 33 | cudaStream_t stream) NOEXCEPT override ; 34 | #elif NV_TENSORRT_MAJOR == 7 35 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 36 | #endif 37 | 38 | 39 | size_t getSerializationSize() const NOEXCEPT override ; 40 | 41 | void serialize(void *buffer) const NOEXCEPT override ; 42 | 43 | const char *getPluginType() const NOEXCEPT override ; 44 | 45 | const char *getPluginVersion() const NOEXCEPT override ; 46 | 47 | void destroy() NOEXCEPT override ; 48 | 49 | const char *getPluginNamespace() const NOEXCEPT override ; 50 | 51 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 52 | 53 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ; 54 | 55 | IPluginV2 *clone() const NOEXCEPT override ; 56 | 57 | int size; 58 | 59 | private: 60 | std::string mPluginNamespace; 61 | }; 62 | 63 | class ActivationLogisticRTPluginCreator : public IPluginCreator { 64 | public: 65 | ActivationLogisticRTPluginCreator() ; 66 | 67 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 68 | 69 | IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 70 | 71 | const char *getPluginNamespace() const NOEXCEPT override ; 72 | 73 | IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 74 | 75 | const char *getPluginVersion() const NOEXCEPT override ; 76 | 77 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 78 | 79 | const char *getPluginName() const NOEXCEPT override ; 80 | 81 | private: 82 | static PluginFieldCollection mFC; 83 | static std::vector mPluginAttributes; 84 | std::string mPluginNamespace; 85 | }; 86 | 87 | REGISTER_TENSORRT_PLUGIN(ActivationLogisticRTPluginCreator); 88 | }; -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ActivationMishRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | 6 | namespace nvinfer1 { 7 | class ActivationMishRT : public IPluginV2 { 8 | 9 | public: 10 | ActivationMishRT() ; 11 | 12 | ~ActivationMishRT() ; 13 | 14 | ActivationMishRT(const void *data, size_t length) ; 15 | 16 | 17 | int getNbOutputs() const NOEXCEPT override ; 18 | 19 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 20 | 21 | void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, 22 | PluginFormat format, int maxBatchSize) NOEXCEPT override ; 23 | 24 | int initialize() NOEXCEPT override ; 25 | 26 | void terminate() NOEXCEPT override ; 27 | 28 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ; 29 | #if NV_TENSORRT_MAJOR > 7 30 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,cudaStream_t stream) NOEXCEPT override ; 31 | #elif NV_TENSORRT_MAJOR == 7 32 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 33 | #endif 34 | 35 | size_t getSerializationSize() const NOEXCEPT override ; 36 | 37 | void serialize(void *buffer) const NOEXCEPT override ; 38 | 39 | const char *getPluginType() const NOEXCEPT override ; 40 | 41 | const char *getPluginVersion() const NOEXCEPT override ; 42 | 43 | void destroy() NOEXCEPT override { delete this; } 44 | 45 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ; 46 | 47 | const char *getPluginNamespace() const NOEXCEPT override ; 48 | 49 | void setPluginNamespace(const char *plguinNamespace) NOEXCEPT override ; 50 | 51 | IPluginV2 *clone() const NOEXCEPT override ; 52 | 53 | int size; 54 | private: 55 | std::string mPluginNamespace; 56 | }; 57 | 58 | class ActivationMishRTPluginCreator : public IPluginCreator { 59 | public: 60 | ActivationMishRTPluginCreator() ; 61 | 62 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 63 | const char *getPluginNamespace() const NOEXCEPT override ; 64 | 65 | IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 66 | 67 | IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 68 | 69 | const char *getPluginName() const NOEXCEPT override ; 70 | 71 | const char *getPluginVersion() const NOEXCEPT override ; 72 | 73 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 74 | 75 | private: 76 | static PluginFieldCollection mFC; 77 | static std::vector mPluginAttributes; 78 | std::string mPluginNamespace; 79 | }; 80 | 81 | REGISTER_TENSORRT_PLUGIN(ActivationMishRTPluginCreator); 82 | }; -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ActivationReLUCeilingRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nvinfer1 { 8 | class ActivationReLUCeiling : public IPluginV2 { 9 | 10 | public: 11 | explicit ActivationReLUCeiling(const float ceiling) ; 12 | 13 | ~ActivationReLUCeiling() ; 14 | 15 | ActivationReLUCeiling(const void *data, size_t length) ; 16 | 17 | int getNbOutputs() const NOEXCEPT override ; 18 | 19 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 20 | 21 | void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,PluginFormat format, int maxBatchSize) NOEXCEPT override ; 22 | 23 | int initialize() NOEXCEPT override ; 24 | 25 | void terminate() NOEXCEPT override ; 26 | 27 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ; 28 | #if NV_TENSORRT_MAJOR > 7 29 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,cudaStream_t stream) NOEXCEPT override ; 30 | #elif NV_TENSORRT_MAJOR == 7 31 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 32 | #endif 33 | 34 | size_t getSerializationSize() const NOEXCEPT override ; 35 | 36 | void serialize(void *buffer) const NOEXCEPT override ; 37 | 38 | IPluginV2 *clone() const NOEXCEPT override ; 39 | 40 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ; 41 | 42 | void destroy() NOEXCEPT override ; 43 | 44 | const char *getPluginType() const NOEXCEPT override ; 45 | 46 | const char *getPluginVersion() const NOEXCEPT override ; 47 | 48 | const char *getPluginNamespace() const NOEXCEPT override ; 49 | 50 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 51 | int size; 52 | float ceiling; 53 | private: 54 | std::string mPluginNamespace; 55 | }; 56 | 57 | class ActivationReLUCeilingPluginCreator : public IPluginCreator { 58 | public: 59 | ActivationReLUCeilingPluginCreator() ; 60 | 61 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 62 | 63 | const char *getPluginNamespace() const NOEXCEPT override ; 64 | 65 | IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 66 | 67 | IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 68 | 69 | const char *getPluginName() const NOEXCEPT override ; 70 | const char *getPluginVersion() const NOEXCEPT override ; 71 | 72 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 73 | 74 | public: 75 | static PluginFieldCollection mFC; 76 | static std::vector mPluginAttributes; 77 | std::string mPluginNamespace; 78 | }; 79 | 80 | REGISTER_TENSORRT_PLUGIN(ActivationReLUCeilingPluginCreator); 81 | }; -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ActivationSigmoidRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | 4 | class ActivationSigmoidRT : public IPlugin { 5 | 6 | public: 7 | ActivationSigmoidRT() { 8 | 9 | 10 | } 11 | 12 | ~ActivationSigmoidRT(){ 13 | 14 | } 15 | 16 | int getNbOutputs() const override { 17 | return 1; 18 | } 19 | 20 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override { 21 | return inputs[0]; 22 | } 23 | 24 | void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) override { 25 | size = 1; 26 | for(int i=0; i(inputs[0]), 45 | reinterpret_cast(outputs[0]), batchSize*size, stream); 46 | return 0; 47 | } 48 | 49 | 50 | virtual size_t getSerializationSize() override { 51 | return 1*sizeof(int); 52 | } 53 | 54 | virtual void serialize(void* buffer) override { 55 | char *buf = reinterpret_cast(buffer),*a=buf; 56 | tk::dnn::writeBUF(buf, size); 57 | assert(buf == a + getSerializationSize()); 58 | } 59 | 60 | int size; 61 | }; 62 | -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/FlattenConcatRT.h: -------------------------------------------------------------------------------- 1 | #ifndef _FLATTENCONCATRT_PLUGIN_H 2 | #define _FLATTENCONCATRT_PLUGIN_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | namespace nvinfer1 { 9 | class FlattenConcatRT : public IPluginV2Ext { 10 | 11 | public: 12 | FlattenConcatRT(int c,int h,int w,int rows,int cols) ; 13 | 14 | FlattenConcatRT(const void *data, size_t length) ; 15 | 16 | ~FlattenConcatRT() ; 17 | 18 | int getNbOutputs() const NOEXCEPT override ; 19 | 20 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 21 | 22 | int initialize() NOEXCEPT override ; 23 | 24 | void terminate() NOEXCEPT override ; 25 | 26 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ; 27 | 28 | #if NV_TENSORRT_MAJOR > 7 29 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) NOEXCEPT override ; 30 | #elif NV_TENSORRT_MAJOR <= 7 31 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 32 | #endif 33 | 34 | size_t getSerializationSize() const NOEXCEPT override ; 35 | 36 | void serialize(void *buffer) const NOEXCEPT override ; 37 | 38 | void destroy() NOEXCEPT override ; 39 | 40 | const char *getPluginType() const NOEXCEPT override ; 41 | 42 | const char *getPluginVersion() const NOEXCEPT override; 43 | 44 | const char *getPluginNamespace() const NOEXCEPT override ; 45 | 46 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 47 | 48 | IPluginV2Ext *clone() const NOEXCEPT override ; 49 | 50 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override; 51 | 52 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override; 53 | 54 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override; 55 | 56 | bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override; 57 | 58 | void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, 59 | int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes, 60 | bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat, 61 | int32_t maxBatchSize) NOEXCEPT override; 62 | 63 | void detachFromContext() NOEXCEPT override; 64 | 65 | bool supportsFormat (DataType type, PluginFormat format) const NOEXCEPT override; 66 | 67 | int c, h, w; 68 | int rows, cols; 69 | cublasHandle_t handle{nullptr}; 70 | private: 71 | std::string mPluginNamespace; 72 | }; 73 | 74 | class FlattenConcatRTPluginCreator : public IPluginCreator { 75 | public: 76 | FlattenConcatRTPluginCreator() ; 77 | 78 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 79 | 80 | const char *getPluginNamespace() const NOEXCEPT override ; 81 | 82 | IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 83 | 84 | IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 85 | 86 | const char *getPluginName() const NOEXCEPT override ; 87 | 88 | const char *getPluginVersion() const NOEXCEPT override; 89 | 90 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 91 | 92 | private: 93 | static PluginFieldCollection mFC; 94 | static std::vector mPluginAttributes; 95 | std::string mPluginNamespace; 96 | }; 97 | 98 | REGISTER_TENSORRT_PLUGIN(FlattenConcatRTPluginCreator); 99 | }; 100 | #endif -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ReorgRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | 6 | namespace nvinfer1 { 7 | class ReorgRT : public IPluginV2Ext { 8 | 9 | public: 10 | ReorgRT(int stride,int c,int h,int w); 11 | 12 | ~ReorgRT(); 13 | 14 | ReorgRT(const void *data, size_t length); 15 | 16 | int getNbOutputs() const NOEXCEPT override; 17 | 18 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override; 19 | 20 | int initialize() NOEXCEPT override; 21 | 22 | void terminate() NOEXCEPT override; 23 | 24 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override; 25 | 26 | #if NV_TENSORRT_MAJOR > 7 27 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, 28 | cudaStream_t stream) NOEXCEPT override; 29 | #elif NV_TENSORRT_MAJOR == 7 30 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 31 | #endif 32 | 33 | 34 | size_t getSerializationSize() const NOEXCEPT override; 35 | 36 | void serialize(void *buffer) const NOEXCEPT override; 37 | 38 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override; 39 | 40 | const char *getPluginType() const NOEXCEPT override; 41 | 42 | const char *getPluginVersion() const NOEXCEPT override; 43 | 44 | void destroy() NOEXCEPT override; 45 | 46 | const char *getPluginNamespace() const NOEXCEPT override; 47 | 48 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 49 | 50 | IPluginV2Ext *clone() const NOEXCEPT override; 51 | 52 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override; 53 | 54 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override; 55 | 56 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override; 57 | 58 | bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override; 59 | 60 | void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, 61 | int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes, 62 | bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat, 63 | int32_t maxBatchSize) NOEXCEPT override; 64 | 65 | void detachFromContext() NOEXCEPT override; 66 | 67 | int c, h, w, stride; 68 | private: 69 | std::string mPluginNamespace; 70 | }; 71 | 72 | class ReorgRTPluginCreator : public IPluginCreator { 73 | public: 74 | ReorgRTPluginCreator(); 75 | 76 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 77 | 78 | const char *getPluginNamespace() const NOEXCEPT override; 79 | 80 | IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override; 81 | 82 | IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override; 83 | 84 | const char *getPluginName() const NOEXCEPT override; 85 | 86 | const char *getPluginVersion() const NOEXCEPT override; 87 | 88 | const PluginFieldCollection *getFieldNames() NOEXCEPT override; 89 | 90 | private: 91 | static PluginFieldCollection mFC; 92 | static std::vector mPluginAttributes; 93 | std::string mPluginNamespace; 94 | }; 95 | 96 | REGISTER_TENSORRT_PLUGIN(ReorgRTPluginCreator); 97 | }; 98 | 99 | -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ReshapeRT.h: -------------------------------------------------------------------------------- 1 | #ifndef _RESHAPERT_PLUGIN_H 2 | #define _RESHAPERT_PLUGIN_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | namespace nvinfer1 { 11 | class ReshapeRT : public IPluginV2Ext { 12 | 13 | public: 14 | ReshapeRT(int n,int c,int h,int w) ; 15 | 16 | ReshapeRT(const void *data, size_t length) ; 17 | 18 | ~ReshapeRT() ; 19 | 20 | int getNbOutputs() const NOEXCEPT override ; 21 | 22 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 23 | 24 | 25 | int initialize() NOEXCEPT override ; 26 | 27 | void terminate() NOEXCEPT override ; 28 | 29 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ; 30 | 31 | #if NV_TENSORRT_MAJOR > 7 32 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) NOEXCEPT override ; 33 | #elif NV_TENSORRT_MAJOR == 7 34 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 35 | #endif 36 | 37 | size_t getSerializationSize() const NOEXCEPT override ; 38 | 39 | void serialize(void *buffer) const NOEXCEPT override ; 40 | 41 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ; 42 | 43 | const char *getPluginType() const NOEXCEPT override ; 44 | 45 | const char *getPluginVersion() const NOEXCEPT override ; 46 | 47 | void destroy() NOEXCEPT override ; 48 | 49 | const char *getPluginNamespace() const NOEXCEPT override ; 50 | 51 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 52 | 53 | IPluginV2Ext *clone() const NOEXCEPT override ; 54 | 55 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override; 56 | 57 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override; 58 | 59 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override; 60 | 61 | bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override; 62 | 63 | void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, 64 | int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes, 65 | bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat, 66 | int32_t maxBatchSize) NOEXCEPT override; 67 | 68 | void detachFromContext() NOEXCEPT override; 69 | 70 | int n, c, h, w; 71 | private: 72 | std::string mPluginNamespace; 73 | }; 74 | 75 | class ReshapeRTPluginCreator : public IPluginCreator { 76 | public: 77 | ReshapeRTPluginCreator() ; 78 | 79 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 80 | 81 | const char *getPluginNamespace() const NOEXCEPT override ; 82 | 83 | IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 84 | 85 | IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 86 | 87 | const char *getPluginName() const NOEXCEPT override ; 88 | 89 | const char *getPluginVersion() const NOEXCEPT override ; 90 | 91 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 92 | 93 | private: 94 | static PluginFieldCollection mFC; 95 | static std::vector mPluginAttributes; 96 | std::string mPluginNamespace; 97 | }; 98 | 99 | REGISTER_TENSORRT_PLUGIN(ReshapeRTPluginCreator); 100 | }; 101 | #endif -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ResizeLayerRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nvinfer1 { 8 | 9 | class ResizeLayerRT : public IPluginV2Ext { 10 | 11 | public: 12 | ResizeLayerRT(int oc, int oh, int ow,int ic,int ih,int iw) ; 13 | 14 | ResizeLayerRT(const void *data, size_t length) ; 15 | 16 | ~ResizeLayerRT() ; 17 | 18 | int getNbOutputs() const NOEXCEPT override ; 19 | 20 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 21 | 22 | int initialize() NOEXCEPT override ; 23 | 24 | void terminate() NOEXCEPT override ; 25 | 26 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ; 27 | 28 | #if NV_TENSORRT_MAJOR > 7 29 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, 30 | cudaStream_t stream) NOEXCEPT override ; 31 | #elif NV_TENSORRT_MAJOR <= 7 32 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 33 | #endif 34 | 35 | size_t getSerializationSize() const NOEXCEPT override ; 36 | 37 | void serialize(void *buffer) const NOEXCEPT override ; 38 | 39 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ; 40 | 41 | const char *getPluginType() const NOEXCEPT override ; 42 | 43 | const char *getPluginVersion() const NOEXCEPT override ; 44 | 45 | void destroy() NOEXCEPT override ; 46 | 47 | const char *getPluginNamespace() const NOEXCEPT override ; 48 | 49 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 50 | 51 | IPluginV2Ext *clone() const NOEXCEPT override ; 52 | 53 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override; 54 | 55 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override; 56 | 57 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override; 58 | 59 | bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override; 60 | 61 | void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, 62 | int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes, 63 | bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat, 64 | int32_t maxBatchSize) NOEXCEPT override; 65 | 66 | void detachFromContext() NOEXCEPT override; 67 | 68 | int i_c, i_h, i_w, o_c, o_h, o_w; 69 | 70 | private: 71 | std::string mPluginNamespace; 72 | }; 73 | 74 | class ResizeLayerRTPluginCreator : public IPluginCreator { 75 | public: 76 | ResizeLayerRTPluginCreator() ; 77 | 78 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 79 | 80 | const char *getPluginNamespace() const NOEXCEPT override ; 81 | 82 | IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 83 | 84 | IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 85 | 86 | const char *getPluginName() const NOEXCEPT override ; 87 | 88 | const char *getPluginVersion() const NOEXCEPT override ; 89 | 90 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 91 | 92 | 93 | 94 | 95 | private: 96 | static PluginFieldCollection mFC; 97 | static std::vector mPluginAttributes; 98 | std::string mPluginNamespace; 99 | 100 | }; 101 | 102 | REGISTER_TENSORRT_PLUGIN(ResizeLayerRTPluginCreator); 103 | }; 104 | 105 | -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/RouteRT.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../kernels.h" 3 | #include 4 | #include 5 | 6 | namespace nvinfer1 { 7 | class RouteRT : public IPluginV2 { 8 | 9 | /** 10 | THIS IS NOT USED ANYMORE 11 | */ 12 | 13 | public: 14 | RouteRT(int groups, int group_id) ; 15 | 16 | ~RouteRT() ; 17 | 18 | RouteRT(const void *data, size_t length) ; 19 | 20 | int getNbOutputs() const NOEXCEPT override ; 21 | 22 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ; 23 | 24 | void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,PluginFormat format, int maxBatchSize) NOEXCEPT override ; 25 | 26 | int initialize() NOEXCEPT override ; 27 | 28 | void terminate() NOEXCEPT override ; 29 | 30 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ; 31 | 32 | #if NV_TENSORRT_MAJOR > 7 33 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,cudaStream_t stream) NOEXCEPT override ; 34 | #elif NV_TENSORRT_MAJOR == 7 35 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 36 | #endif 37 | 38 | size_t getSerializationSize() const NOEXCEPT override ; 39 | 40 | void serialize(void *buffer) const NOEXCEPT override ; 41 | 42 | const char *getPluginType() const NOEXCEPT override ; 43 | 44 | const char *getPluginVersion() const NOEXCEPT override ; 45 | 46 | void destroy() NOEXCEPT override ; 47 | 48 | const char *getPluginNamespace() const NOEXCEPT override ; 49 | 50 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 51 | 52 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ; 53 | 54 | IPluginV2 *clone() const NOEXCEPT override ; 55 | 56 | static const int MAX_INPUTS = 4; 57 | int in; 58 | int c_in[MAX_INPUTS]; 59 | int c, h, w; 60 | int groups, group_id; 61 | private: 62 | std::string mPluginNamespace; 63 | }; 64 | 65 | class RouteRTPluginCreator : public IPluginCreator { 66 | public: 67 | RouteRTPluginCreator() ; 68 | 69 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ; 70 | 71 | const char *getPluginNamespace() const NOEXCEPT override ; 72 | 73 | IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ; 74 | 75 | IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ; 76 | 77 | const char *getPluginName() const NOEXCEPT override ; 78 | 79 | const char *getPluginVersion() const NOEXCEPT override ; 80 | 81 | const PluginFieldCollection *getFieldNames() NOEXCEPT override ; 82 | 83 | private: 84 | static PluginFieldCollection mFC; 85 | static std::vector mPluginAttributes; 86 | std::string mPluginNamespace; 87 | }; 88 | 89 | REGISTER_TENSORRT_PLUGIN(RouteRTPluginCreator); 90 | }; 91 | -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/ShortcutRT.h: -------------------------------------------------------------------------------- 1 | #ifndef _SHORTCUTRT_PLUGIN_H 2 | #define _SHORTCUTRT_PLUGIN_H 3 | 4 | #include 5 | #include "../kernels.h" 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | namespace nvinfer1 { 12 | 13 | class ShortcutRT : public IPluginV2Ext { 14 | 15 | public: 16 | ShortcutRT(int bc,int bh,int bw,int c,int h,int w ,bool mul); 17 | 18 | ~ShortcutRT(); 19 | 20 | ShortcutRT(const void *data, size_t length); 21 | 22 | int getNbOutputs() const NOEXCEPT override; 23 | 24 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override; 25 | 26 | void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, int32_t nbOutputs, 27 | DataType const *inputTypes, DataType const *outputTypes, bool const *inputIsBroadcast, 28 | bool const *outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) NOEXCEPT override; 29 | 30 | bool isOutputBroadcastAcrossBatch (int32_t outputIndex, bool const *inputIsBroadcasted, int32_t nbInputs) const NOEXCEPT override; 31 | 32 | bool canBroadcastInputAcrossBatch (int32_t inputIndex) const NOEXCEPT override; 33 | 34 | void attachToContext (cudnnContext *, cublasContext *, IGpuAllocator *) NOEXCEPT override; 35 | 36 | void detachFromContext () NOEXCEPT override; 37 | 38 | DataType getOutputDataType(int32_t index, nvinfer1::DataType const *inputTypes, int32_t nbInputs) const NOEXCEPT override; 39 | 40 | int initialize() NOEXCEPT override; 41 | 42 | void terminate() NOEXCEPT override; 43 | 44 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override; 45 | 46 | #if NV_TENSORRT_MAJOR > 7 47 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, 48 | cudaStream_t stream) NOEXCEPT override; 49 | #elif NV_TENSORRT_MAJOR == 7 50 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 51 | #endif 52 | 53 | 54 | size_t getSerializationSize() const NOEXCEPT override; 55 | 56 | void serialize(void *buffer) const NOEXCEPT override; 57 | 58 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override; 59 | 60 | const char *getPluginType() const NOEXCEPT override; 61 | 62 | const char *getPluginVersion() const NOEXCEPT override; 63 | 64 | void destroy() NOEXCEPT override; 65 | 66 | const char *getPluginNamespace() const NOEXCEPT override; 67 | 68 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 69 | 70 | IPluginV2Ext *clone() const NOEXCEPT override; 71 | 72 | int c, h, w; 73 | int bc, bh, bw,bl; 74 | bool mul; 75 | tk::dnn::dataDim_t bDim; 76 | private: 77 | std::string mPluginNamespace; 78 | }; 79 | 80 | 81 | class ShortcutRTPluginCreator : public IPluginCreator { 82 | public: 83 | ShortcutRTPluginCreator(); 84 | 85 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 86 | 87 | const char *getPluginNamespace() const NOEXCEPT override; 88 | 89 | IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override; 90 | 91 | IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override; 92 | 93 | const char *getPluginName() const NOEXCEPT override; 94 | 95 | const char *getPluginVersion() const NOEXCEPT override; 96 | 97 | const PluginFieldCollection *getFieldNames() NOEXCEPT override; 98 | 99 | public: 100 | static PluginFieldCollection mFC; 101 | static std::vector mPluginAttributes; 102 | std::string mPluginNamespace; 103 | }; 104 | 105 | REGISTER_TENSORRT_PLUGIN(ShortcutRTPluginCreator); 106 | 107 | }; 108 | 109 | #endif -------------------------------------------------------------------------------- /include/tkDNN/pluginsRT/UpsampleRT.h: -------------------------------------------------------------------------------- 1 | #ifndef _UPSAMPLERT_PLUGIN_H 2 | #define _UPSAMPLERT_PLUGIN_H 3 | 4 | #include 5 | #include "../kernels.h" 6 | #include 7 | #include 8 | 9 | namespace nvinfer1 { 10 | 11 | class UpsampleRT : public IPluginV2Ext { 12 | 13 | public: 14 | UpsampleRT(int stride,int c,int h,int w); 15 | 16 | UpsampleRT(const void *data, size_t length); 17 | 18 | ~UpsampleRT(); 19 | 20 | int getNbOutputs() const NOEXCEPT override; 21 | 22 | Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override; 23 | 24 | int initialize() NOEXCEPT override; 25 | 26 | void terminate() NOEXCEPT override; 27 | 28 | size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override; 29 | 30 | #if NV_TENSORRT_MAJOR > 7 31 | int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, 32 | cudaStream_t stream) NOEXCEPT override; 33 | #elif NV_TENSORRT_MAJOR == 7 34 | int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; 35 | #endif 36 | 37 | 38 | size_t getSerializationSize() const NOEXCEPT override; 39 | 40 | void serialize(void *buffer) const NOEXCEPT override; 41 | 42 | bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override; 43 | 44 | const char *getPluginType() const NOEXCEPT override; 45 | 46 | const char *getPluginVersion() const NOEXCEPT override; 47 | 48 | void destroy() NOEXCEPT override; 49 | 50 | const char *getPluginNamespace() const NOEXCEPT override; 51 | 52 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 53 | 54 | IPluginV2Ext *clone() const NOEXCEPT override ; 55 | 56 | bool isOutputBroadcastAcrossBatch (int32_t outputIndex, bool const *inputIsBroadcasted, int32_t nbInputs) const NOEXCEPT override; 57 | 58 | bool canBroadcastInputAcrossBatch (int32_t inputIndex) const NOEXCEPT override; 59 | 60 | void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, int32_t nbOutputs, 61 | DataType const *inputTypes, DataType const *outputTypes, bool const *inputIsBroadcast, 62 | bool const *outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) NOEXCEPT override; 63 | 64 | void attachToContext (cudnnContext *, cublasContext *, IGpuAllocator *) NOEXCEPT override; 65 | 66 | void detachFromContext () NOEXCEPT override; 67 | 68 | DataType getOutputDataType (int32_t index, nvinfer1::DataType const *inputTypes, int32_t nbInputs) const NOEXCEPT override; 69 | 70 | 71 | int c, h, w, stride; 72 | private: 73 | std::string mPluginNamespace; 74 | }; 75 | 76 | class UpsampleRTPluginCreator : public IPluginCreator { 77 | public: 78 | UpsampleRTPluginCreator(); 79 | 80 | void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override; 81 | 82 | const char *getPluginNamespace() const NOEXCEPT override; 83 | 84 | IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override; 85 | 86 | IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override; 87 | 88 | const char *getPluginName() const NOEXCEPT override; 89 | 90 | const char *getPluginVersion() const NOEXCEPT override; 91 | 92 | const PluginFieldCollection *getFieldNames() NOEXCEPT override; 93 | 94 | private: 95 | static PluginFieldCollection mFC; 96 | static std::vector mPluginAttributes; 97 | std::string mPluginNamespace; 98 | }; 99 | 100 | REGISTER_TENSORRT_PLUGIN(UpsampleRTPluginCreator); 101 | }; 102 | 103 | #endif -------------------------------------------------------------------------------- /include/tkDNN/test.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | int testInference(std::vector input_bins, std::vector output_bins, 4 | tk::dnn::Network *net, tk::dnn::NetworkRT *netRT = nullptr) { 5 | 6 | std::vector outputs; 7 | for(int i=0; inum_layers; i++) { 8 | if(net->layers[i]->final) 9 | outputs.push_back(net->layers[i]); 10 | } 11 | // no final layers, set last as output 12 | if(outputs.size() == 0) { 13 | outputs.push_back(net->layers[net->num_layers-1]); 14 | } 15 | 16 | 17 | // check input 18 | if(input_bins.size() != 1) { 19 | FatalError("currently support only 1 input"); 20 | } 21 | if(output_bins.size() != outputs.size()) { 22 | std::cout<input_dim.tot(), &input_h, &data); 30 | 31 | // outputs 32 | //dnnType *cudnn_out[outputs.size()], *rt_out[outputs.size()]; 33 | std::vector cudnn_out,rt_out; 34 | 35 | tk::dnn::dataDim_t dim1 = net->input_dim; //input dim 36 | printCenteredTitle(" CUDNN inference ", '=', 30); { 37 | dim1.print(); 38 | TKDNN_TSTART 39 | net->infer(dim1, data); 40 | TKDNN_TSTOP 41 | dim1.print(); 42 | } 43 | for(int i=0; idstData); 44 | 45 | if(netRT != nullptr) { 46 | tk::dnn::dataDim_t dim2 = net->input_dim; 47 | printCenteredTitle(" TENSORRT inference ", '=', 30); { 48 | dim2.print(); 49 | TKDNN_TSTART 50 | netRT->infer(dim2, data); 51 | TKDNN_TSTOP 52 | dim2.print(); 53 | } 54 | for(int i=0; ibuffersRT[i+1]); 55 | } 56 | 57 | int ret_cudnn = 0, ret_tensorrt = 0, ret_cudnn_tensorrt = 0; 58 | for(int i=0; i df_old.loc[index][1]): 32 | if row[1] > df_old.loc[index][1] + df_old.loc[index][1] * 0.5 : 33 | print('\x1b[3;30;41m' + 'WAY SLOWER' + '\x1b[0m') 34 | else: 35 | print('\x1b[3;30;41m' + 'slower' + '\x1b[0m') 36 | 37 | 38 | -------------------------------------------------------------------------------- /scripts/download_validation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request as dowReq 3 | import zipfile 4 | 5 | val = input("Enter BDD or COCO :") 6 | if(val == "COCO"): 7 | url = "https://cloud.hipert.unimore.it/s/LNxBDk4wzqXPL8c/download" 8 | lib = "..\demo\COCO_val2017" 9 | lib_zip = "COCO_val2017.zip" 10 | elif(val == "BDD"): 11 | url = "https://cloud.hipert.unimore.it/s/bikqk3FzCq2tg4D/download" 12 | lib = "..\demo\BDD100k_val" 13 | lib_zip = "BDD100k_val.zip" 14 | 15 | dowReq.urlretrieve(url,lib_zip) 16 | 17 | with zipfile.ZipFile(lib_zip,'r') as zip_ref: 18 | zip_ref.extractall(lib) 19 | 20 | labelFolder = lib + "\labels" 21 | imageFolder = lib + "\images" 22 | 23 | file1 = open(".\\..\\demo\\all_labels.txt","a") 24 | path1 = os.path.realpath(labelFolder) 25 | for file in os.listdir(labelFolder): 26 | valTemp = path1 + "\\" + file 27 | valTemp = valTemp + '\n' 28 | file1.write(valTemp) 29 | file1.close() 30 | 31 | file2 = open(".\\..\\demo\\all_images.txt","a") 32 | path2 = os.path.realpath(imageFolder) 33 | for file in os.listdir(imageFolder): 34 | pathtemp = path2 + "\\" + file 35 | pathtemp = pathtemp + '\n' 36 | file2.write(pathtemp) 37 | file2.close() 38 | 39 | print("Completed") 40 | -------------------------------------------------------------------------------- /scripts/download_validation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | function elaborate_testset { 5 | wget $1 -O $2.zip 6 | unzip -d $2 $2.zip 7 | rm $2.zip 8 | cd $2/ 9 | realpath labels/* > all_labels.txt 10 | realpath images/* > all_images.txt 11 | cd .. 12 | } 13 | 14 | cd demo 15 | for valset in $@ 16 | do 17 | 18 | if [ $valset = "COCO" ]; then 19 | echo "Downloading $valset validation set in demo" 20 | elaborate_testset "https://cloud.hipert.unimore.it/s/LNxBDk4wzqXPL8c/download" "COCO_val2017" 21 | elif [ $valset = "BDD" ]; then 22 | echo "Downloading $valset validation set in demo" 23 | elaborate_testset "https://cloud.hipert.unimore.it/s/bikqk3FzCq2tg4D/download" "BDD100K_val" 24 | fi 25 | 26 | done 27 | -------------------------------------------------------------------------------- /scripts/install_OpenCV4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #based on https://devtalk.nvidia.com/default/topic/1042035/installing-opencv4-on-xavier/ & https://github.com/markste-in/OpenCV4XAVIER/blob/master/buildOpenCV4.sh 4 | 5 | # Compute Capabilities can be found here https://developer.nvidia.com/cuda-gpus#compute 6 | ARCH_BIN=7.2 # AGX Xavier 7 | #ARCH_BIN=6.2 # Tx2 8 | 9 | cd ~/Downloads 10 | sudo apt-get install -y build-essential \ 11 | unzip \ 12 | pkg-config \ 13 | libjpeg-dev \ 14 | libpng-dev \ 15 | libtiff-dev \ 16 | libavcodec-dev \ 17 | libavformat-dev \ 18 | libswscale-dev \ 19 | libv4l-dev \ 20 | libxvidcore-dev \ 21 | libx264-dev \ 22 | libgtk-3-dev \ 23 | libatlas-base-dev \ 24 | gfortran \ 25 | python3-dev \ 26 | python3-venv \ 27 | libgstreamer1.0-dev \ 28 | libgstreamer-plugins-base1.0-dev \ 29 | libdc1394-22-dev \ 30 | libavresample-dev \ 31 | libtbb-dev \ 32 | 33 | git clone https://github.com/opencv/opencv.git 34 | cd opencv && git checkout 4.5.4 && cd .. 35 | git clone https://github.com/opencv/opencv_contrib.git 36 | cd opencv_contrib && git checkout 4.5.4 && cd .. 37 | 38 | 39 | python3 -m venv opencv4 40 | source opencv4/bin/activate 41 | pip install wheel 42 | pip install numpy 43 | 44 | cd opencv && mkdir build && cd build 45 | 46 | cmake -D CMAKE_BUILD_TYPE=RELEASE \ 47 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 48 | -D INSTALL_PYTHON_EXAMPLES=ON \ 49 | -D INSTALL_C_EXAMPLES=OFF \ 50 | -D OPENCV_EXTRA_MODULES_PATH='~/Downloads/opencv_contrib/modules' \ 51 | -D PYTHON_EXECUTABLE='~/Downloads/opencv4/bin/python' \ 52 | -D BUILD_EXAMPLES=ON \ 53 | -D WITH_CUDA=ON \ 54 | -D CUDA_ARCH_BIN=${ARCH_BIN} \ 55 | -D CUDA_ARCH_PTX="" \ 56 | -D ENABLE_FAST_MATH=ON \ 57 | -D CUDA_FAST_MATH=ON \ 58 | -D WITH_CUBLAS=ON \ 59 | -D WITH_LIBV4L=ON \ 60 | -D WITH_GSTREAMER=ON \ 61 | -D WITH_GSTREAMER_0_10=OFF \ 62 | -D WITH_TBB=ON \ 63 | -D WITH_OPENGL=ON \ 64 | -D WITH_VULKAN=ON \ 65 | ../ 66 | 67 | make -j4 68 | sudo make install 69 | sudo ldconfig 70 | 71 | cd ~/Downloads/opencv4/lib/python3.6/site-packages 72 | ln -s /usr/local/lib/python3.6/site-packages/cv2.cpython-36m-aarch64-linux-gnu.so cv2.so 73 | -------------------------------------------------------------------------------- /scripts/test_all_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #cd build 4 | 5 | RED='\033[1;31m' 6 | GREEN='\033[1;32m' 7 | ORANGE='\033[1;33m' 8 | PINK='\033[1;95m' 9 | NC='\033[0m' # No Color 10 | 11 | function print_output { 12 | if [ $1 -eq 0 ]; then 13 | echo -e "$2 ${GREEN}OK${NC}" 14 | elif [ $1 -eq 1 ]; then 15 | echo -e "$2 ${RED}FATAL ERROR${NC}" 16 | elif [ $1 -eq 2 ] || [ $1 -eq 10 ]; then 17 | echo -e "$2 ${PINK}CUDNN ERROR${NC}" 18 | elif [ $1 -eq 4 ] || [ $1 -eq 12 ]; then 19 | echo -e "$2 ${PINK}TENSORRT ERROR${NC}" 20 | elif [ $1 -eq 8 ]; then 21 | echo -e "$2 ${PINK}CUDNN vs TENSORRT ERROR${NC}" 22 | elif [ $1 -eq 6 ]; then 23 | echo -e "$2 ${PINK}CUDNN & TENSORTRT ERROR${NC}" 24 | elif [ $1 -eq 14 ]; then 25 | echo -e "$2 ${PINK}ERROR FOR EVERY CHECK${NC}" 26 | else 27 | echo -e "$2 ${RED}NOT OKAY (OPENCV maybe)${NC}" 28 | fi 29 | 30 | } 31 | 32 | out_dir=results 33 | out_file=results.log 34 | rm -rf $out_dir/ 35 | mkdir -p $out_dir 36 | 37 | function test_net { 38 | ./test_$1 &> $out_dir/$1_${TKDNN_MODE}_build_$out_file 39 | print_output $? $1 40 | ./test_rtinference $1*.rt 1 &> $out_dir/$1_${TKDNN_MODE}_inference_batch1_$out_file 41 | print_output $? "infer $1" 42 | ./test_rtinference $1*.rt $TKDNN_BATCHSIZE &> $out_dir/$1_${TKDNN_MODE}_inference_batch${TKDNN_BATCHSIZE}_$out_file 43 | print_output $? "batched $1" 44 | } 45 | 46 | 47 | # modes=( 1 ) # only FP32 48 | modes=( 1 2 ) # FP32 and FP16 49 | # modes=( 1 2 3 ) # FP32, FP16 and INT8 50 | 51 | for i in "${modes[@]}" 52 | do 53 | rm -f *rt 54 | if [ $i -eq 1 ] 55 | then 56 | export TKDNN_MODE=FP32 57 | echo -e "${ORANGE}Test FP32${NC}" 58 | fi 59 | if [ $i -eq 2 ] 60 | then 61 | export TKDNN_MODE=FP16 62 | echo -e "${ORANGE}Test FP16${NC}" 63 | fi 64 | if [ $i -eq 3 ] 65 | then 66 | export TKDNN_MODE=INT8 67 | export TKDNN_CALIB_LABEL_PATH=../demo/COCO_val2017/all_labels.txt 68 | export TKDNN_CALIB_IMG_PATH=../demo/COCO_val2017/all_images.txt 69 | echo -e "${ORANGE}Test INT8${NC}" 70 | fi 71 | 72 | export TKDNN_BATCHSIZE=2 73 | echo -e "${ORANGE}Batch $TKDNN_BATCHSIZE ${NC}" 74 | 75 | test_net mnist 76 | # ./test_imuodom &>> $out_file 77 | # print_output $? imuodom 78 | 79 | test_net yolo4 80 | # test_net yolo4_320 81 | # test_net yolo4_320_coco2 82 | # test_net yolo4_512 83 | # test_net yolo4_608 84 | # test_net yolo4-csp 85 | # test_net yolo4x 86 | # test_net yolo4_berkeley 87 | # test_net yolo4_berkeley_f1 88 | # test_net yolo4tiny 89 | # test_net yolo4tiny_512 90 | # test_net yolo3 91 | # test_net yolo3_berkeley 92 | # test_net yolo3_coco4 93 | # test_net yolo3_flir 94 | # test_net yolo3_512 95 | # test_net yolo3tiny 96 | # test_net yolo3tiny_512 97 | # test_net yolo2 98 | # test_net yolo2_voc 99 | # test_net yolo2tiny 100 | # test_net csresnext50-panet-spp 101 | # test_net csresnext50-panet-spp_berkeley 102 | # test_net resnet101_cnet 103 | # test_net dla34_cnet 104 | # test_net dla34_cnet3d 105 | # test_net mobilenetv2ssd 106 | # test_net mobilenetv2ssd512 107 | # test_net bdd-mobilenetv2ssd 108 | # test_net dla34_ctrack 109 | # test_net shelfnet 110 | # test_net shelfnet_berkeley 111 | done 112 | 113 | echo "If errors occured, check logfiles in directory: $out_dir" 114 | -------------------------------------------------------------------------------- /scripts/test_inference.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function test_inference { 4 | ./test_$1 5 | ./test_rtinference $1_$2.rt 1 6 | ./test_rtinference $1_$2.rt 4 7 | } 8 | 9 | sudo jeston_clock 10 | 11 | # modes=( 1 ) # only FP32 12 | # modes=( 1 2 ) # FP32 and FP16 13 | modes=( 1 2 3 ) # FP32, FP16 and INT8 14 | 15 | rm times_rtinference.csv 16 | for i in "${modes[@]}" 17 | do 18 | rm *rt 19 | if [ $i -eq 1 ] 20 | then 21 | export TKDNN_MODE=FP32 22 | mode=fp32 23 | echo -e "${ORANGE}Test FP32${NC}" 24 | fi 25 | if [ $i -eq 2 ] 26 | then 27 | export TKDNN_MODE=FP16 28 | mode=fp16 29 | echo -e "${ORANGE}Test FP16${NC}" 30 | fi 31 | if [ $i -eq 3 ] 32 | then 33 | export TKDNN_MODE=INT8 34 | export TKDNN_CALIB_LABEL_PATH=../demo/COCO_val2017/all_labels.txt 35 | export TKDNN_CALIB_IMG_PATH=../demo/COCO_val2017/all_images.txt 36 | mode=int8 37 | echo -e "${ORANGE}Test INT8${NC}" 38 | 39 | fi 40 | 41 | export TKDNN_BATCHSIZE=4 42 | echo -e "${ORANGE}Batch $TKDNN_BATCHSIZE ${NC}" 43 | 44 | test_inference yolo4_320 $mode 45 | test_inference yolo4 $mode 46 | test_inference yolo4_512 $mode 47 | test_inference yolo4_608 $mode 48 | test_inference yolo4tiny $mode 49 | done 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /src/Activation.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Activation::Activation(Network *net, int act_mode, const float ceiling, const float slope) : 9 | Layer(net) { 10 | 11 | this->act_mode = act_mode; 12 | this->ceiling = ceiling; 13 | this->slope = slope; 14 | checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) ); 15 | 16 | if(int(act_mode) < 100) { 17 | 18 | checkCUDNN( cudnnSetTensor4dDescriptor(srcTensorDesc, 19 | net->tensorFormat, 20 | net->dataType, 21 | input_dim.n*input_dim.l, 22 | input_dim.c, 23 | input_dim.h, input_dim.w) ); 24 | checkCUDNN( cudnnSetTensor4dDescriptor(dstTensorDesc, 25 | net->tensorFormat, 26 | net->dataType, 27 | input_dim.n*input_dim.l, 28 | input_dim.c, 29 | input_dim.h, input_dim.w) ); 30 | 31 | 32 | checkCUDNN( cudnnCreateActivationDescriptor(&activDesc) ); 33 | checkCUDNN( cudnnSetActivationDescriptor(activDesc, 34 | (cudnnActivationMode_t) act_mode, 35 | CUDNN_PROPAGATE_NAN, 36 | ceiling) ); 37 | } 38 | } 39 | 40 | Activation::~Activation() { 41 | 42 | checkCuda( cudaFree(dstData) ); 43 | 44 | if(int(act_mode) < 100) 45 | checkCUDNN( cudnnDestroyActivationDescriptor(activDesc) ); 46 | } 47 | 48 | dnnType* Activation::infer(dataDim_t &dim, dnnType* srcData) { 49 | if(act_mode == ACTIVATION_LEAKY) { 50 | activationLEAKYForward(srcData, dstData, dim.tot(), this->slope); 51 | } 52 | else if(act_mode == ACTIVATION_MISH) { 53 | activationMishForward(srcData, dstData, dim.tot()); 54 | 55 | } 56 | else if(act_mode == ACTIVATION_LOGISTIC) { 57 | activationLOGISTICForward(srcData, dstData, dim.tot()); 58 | 59 | } else if(act_mode == ACTIVATION_ELU) { 60 | activationELUForward(srcData, dstData, dim.tot()); 61 | 62 | } else { 63 | dnnType alpha = dnnType(1); 64 | dnnType beta = dnnType(0); 65 | checkCUDNN( cudnnActivationForward(net->cudnnHandle, 66 | activDesc, 67 | &alpha, 68 | srcTensorDesc, 69 | srcData, 70 | &beta, 71 | dstTensorDesc, 72 | dstData) ); 73 | } 74 | return dstData; 75 | } 76 | 77 | }} 78 | -------------------------------------------------------------------------------- /src/BoundingBox.cpp: -------------------------------------------------------------------------------- 1 | #include "BoundingBox.h" 2 | 3 | namespace tk { namespace dnn { 4 | 5 | float BoundingBox::overlap(const float p1, const float d1, const float p2, const float d2){ 6 | float l1 = p1 - d1/2; 7 | float l2 = p2 - d2/2; 8 | float left = l1 > l2 ? l1 : l2; 9 | float r1 = p1 + d1/2; 10 | float r2 = p2 + d2/2; 11 | float right = r1 < r2 ? r1 : r2; 12 | return right - left; 13 | } 14 | 15 | float BoundingBox::boxesIntersection(const BoundingBox &b){ 16 | float width = this->overlap(x, w, b.x, b.w); 17 | float height = this->overlap(y, h, b.y, b.h); 18 | if(width < 0 || height < 0) 19 | return 0; 20 | float area = width*height; 21 | return area; 22 | } 23 | 24 | float BoundingBox::boxesUnion(const BoundingBox &b){ 25 | float i = this->boxesIntersection(b); 26 | float u = w*h + b.w*b.h - i; 27 | return u; 28 | } 29 | 30 | float BoundingBox::IoU(const BoundingBox &b){ 31 | float I = this->boxesIntersection(b); 32 | float U = this->boxesUnion(b); 33 | if (I == 0 || U == 0) 34 | return 0; 35 | return I / U; 36 | } 37 | 38 | void BoundingBox::clear(){ 39 | uniqueTruthIndex = -1; 40 | truthFlag = 0; 41 | maxIoU = 0; 42 | } 43 | 44 | std::ostream& operator<<(std::ostream& os, const BoundingBox& bb){ 45 | os <<"w: "<< bb.w << ", h: "<< bb.h << ", x: "<< bb.x << ", y: "<< bb.y << 46 | ", cat: "<< bb.cl << ", conf: "<< bb.prob<< ", truth: "<< 47 | bb.truthFlag<< ", assignedGT: "<< bb.uniqueTruthIndex<< 48 | ", maxIoU: "<< bb.maxIoU<<"\n"; 49 | return os; 50 | } 51 | 52 | bool boxComparison (const BoundingBox& a,const BoundingBox& b) { 53 | return (a.prob>b.prob); 54 | } 55 | 56 | }} -------------------------------------------------------------------------------- /src/Dense.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | 5 | namespace tk { namespace dnn { 6 | 7 | Dense::Dense(Network *net, int out_ch, std::string fname_weights) : 8 | LayerWgs(net, net->getOutputDim().tot(), out_ch, 1, 1, 1, fname_weights) { 9 | 10 | output_dim.n = 1; 11 | output_dim.c = out_ch; 12 | output_dim.h = 1; 13 | output_dim.w = 1; 14 | output_dim.l = 1; 15 | 16 | //allocate data for infer result 17 | checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) ); 18 | } 19 | 20 | Dense::~Dense() { 21 | 22 | checkCuda( cudaFree(dstData) ); 23 | } 24 | 25 | dnnType* Dense::infer(dataDim_t &dim, dnnType* srcData) { 26 | 27 | if (dim.n != 1) 28 | FatalError("Not Implemented"); 29 | 30 | int dim_x = dim.tot(); 31 | int dim_y = output_dim.tot(); 32 | 33 | if (dim_x != input_dim.tot()) 34 | FatalError("Input mismatch"); 35 | 36 | dnnType alpha = dnnType(1), beta = dnnType(1); 37 | // place bias into dstData 38 | checkCuda( cudaMemcpy(dstData, bias_d, dim_y*sizeof(dnnType), cudaMemcpyDeviceToDevice) ); 39 | 40 | //do matrix multiplication 41 | checkERROR( cublasSgemv(net->cublasHandle, CUBLAS_OP_T, 42 | dim_x, dim_y, 43 | &alpha, 44 | data_d, dim_x, 45 | srcData, 1, 46 | &beta, 47 | dstData, 1) ); 48 | 49 | //update data dimensions 50 | dim.h = 1; 51 | dim.w = 1; 52 | dim.l = 1; 53 | dim.c = dim_y; 54 | 55 | return dstData; 56 | } 57 | 58 | }} 59 | -------------------------------------------------------------------------------- /src/Flatten.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Flatten::Flatten(Network *net) : Layer(net) { 9 | 10 | checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) ); 11 | 12 | output_dim.n = 1; 13 | output_dim.c = input_dim.tot(); 14 | output_dim.h = 1; 15 | output_dim.w = 1; 16 | output_dim.l = 1; 17 | 18 | this->h = 1; 19 | this->w = 1; 20 | this->rows = input_dim.c; 21 | this->cols = input_dim.h * input_dim.w; 22 | this->c = input_dim.w * input_dim.h * input_dim.c; 23 | } 24 | 25 | Flatten::~Flatten() { 26 | 27 | checkCuda( cudaFree(dstData) ); 28 | } 29 | 30 | dnnType* Flatten::infer(dataDim_t &dim, dnnType* srcData) { 31 | 32 | //transpose per channel 33 | matrixTranspose(net->cublasHandle, srcData, dstData, dim.c, dim.h*dim.w*dim.l); 34 | 35 | //update data dimensions 36 | dim = output_dim; 37 | 38 | return dstData; 39 | } 40 | 41 | }} -------------------------------------------------------------------------------- /src/Int8Calibrator.cpp: -------------------------------------------------------------------------------- 1 | #include "Int8Calibrator.h" 2 | 3 | Int8EntropyCalibrator::Int8EntropyCalibrator(BatchStream& stream, int firstBatch, 4 | const std::string& calibTableFilePath, 5 | const std::string& inputBlobName, 6 | bool readCache): 7 | mStream(stream), 8 | mCalibTableFilePath(calibTableFilePath), 9 | mInputBlobName(inputBlobName.c_str()), 10 | mReadCache(readCache) { 11 | nvinfer1::Dims4 dims = mStream.getDims(); 12 | mInputCount = mStream.getBatchSize() + dims.d[1]*dims.d[2]*dims.d[3]; 13 | checkCuda(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); 14 | mStream.reset(firstBatch); 15 | } 16 | 17 | bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings) NOEXCEPT { 18 | if (!mStream.next()) 19 | return false; 20 | 21 | checkCuda(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); 22 | assert(!strcmp(names[0], mInputBlobName.c_str())); 23 | bindings[0] = mDeviceInput; 24 | return true; 25 | } 26 | 27 | const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) NOEXCEPT { 28 | mCalibrationCache.clear(); 29 | assert(!mCalibTableFilePath.empty()); 30 | std::ifstream input(mCalibTableFilePath, std::ios::binary); 31 | input >> std::noskipws; 32 | input >> std::noskipws; 33 | if (mReadCache && input.good()) 34 | std::copy(std::istream_iterator(input), std::istream_iterator(), 35 | std::back_inserter(mCalibrationCache)); 36 | 37 | length = mCalibrationCache.size(); 38 | return length ? &mCalibrationCache[0] : nullptr; 39 | } 40 | 41 | void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) NOEXCEPT { 42 | assert(!mCalibTableFilePath.empty()); 43 | std::ofstream output(mCalibTableFilePath, std::ios::binary); 44 | output.write(reinterpret_cast(cache), length); 45 | output.close(); 46 | } -------------------------------------------------------------------------------- /src/Layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | 5 | namespace tk { namespace dnn { 6 | 7 | Layer::Layer(Network *net) { 8 | 9 | this->net = net; 10 | this->final = false; 11 | if(net != nullptr) { 12 | this->input_dim = net->getOutputDim(); 13 | this->output_dim = input_dim; 14 | 15 | checkCUDNN( cudnnCreateTensorDescriptor(&srcTensorDesc) ); 16 | checkCUDNN( cudnnCreateTensorDescriptor(&dstTensorDesc) ); 17 | 18 | if(!net->addLayer(this)) 19 | FatalError("Net reached max number of layers"); 20 | } 21 | 22 | feature_map_size = input_dim.tot() + output_dim.tot(); 23 | } 24 | 25 | Layer::~Layer() { 26 | 27 | checkCUDNN( cudnnDestroyTensorDescriptor(srcTensorDesc) ); 28 | checkCUDNN( cudnnDestroyTensorDescriptor(dstTensorDesc) ); 29 | 30 | if(dstData != nullptr) { 31 | cudaFree(dstData); 32 | dstData = nullptr; 33 | } 34 | } 35 | 36 | }} -------------------------------------------------------------------------------- /src/MulAdd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | MulAdd::MulAdd(Network *net, dnnType mul, dnnType add) : Layer(net) { 9 | 10 | this->mul = mul; 11 | this->add = add; 12 | 13 | int size = input_dim.tot(); 14 | 15 | // create a vector with all value set to add 16 | dnnType *add_vector_h = new dnnType[size]; 17 | for(int i=0; icublasHandle, srcData, dstData, add_vector, input_dim.tot(), mul); 37 | 38 | //update data dimensions 39 | dim = output_dim; 40 | 41 | return dstData; 42 | } 43 | 44 | }} -------------------------------------------------------------------------------- /src/Padding.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by perseusdg on 03/01/22. 3 | // 4 | 5 | #include 6 | #include "Layer.h" 7 | #include "kernels.h" 8 | 9 | namespace tk{ namespace dnn { 10 | Padding::Padding(Network *net, int32_t pad_h, int32_t pad_w, tkdnnPaddingMode_t padding_mode,float constant) : Layer(net) { 11 | this->paddingH = pad_h; 12 | this->paddingW = pad_w; 13 | this->padding_mode = padding_mode; 14 | output_dim.c = input_dim.c; 15 | output_dim.n = input_dim.n; 16 | output_dim.h = input_dim.h + 2 * (this->paddingH); 17 | output_dim.w = input_dim.w + 2 * (this->paddingW); 18 | if(padding_mode == tkdnnPaddingMode_t::PADDING_MODE_CONSTANT){ 19 | this->constant = constant; 20 | }else{ 21 | this->constant = 0; 22 | } 23 | checkCuda(cudaMalloc(&dstData,output_dim.tot()*sizeof(dnnType))); 24 | } 25 | 26 | Padding::~Padding() { 27 | checkCuda(cudaFree(dstData)); 28 | } 29 | dnnType* Padding::infer(dataDim_t &dim, float *srcData) { 30 | fill(dstData,output_dim.tot(),0.0); 31 | if(padding_mode == tkdnnPaddingMode_t::PADDING_MODE_REFLECTION) 32 | { 33 | reflection_pad2d_out_forward(paddingH, paddingW, srcData, dstData, input_dim.h, input_dim.w, input_dim.c, 34 | input_dim.n); 35 | } 36 | else if(padding_mode == tkdnnPaddingMode_t::PADDING_MODE_CONSTANT){ 37 | constant_pad2d_forward(srcData,dstData,input_dim.h,input_dim.w,output_dim.h,output_dim.w,input_dim.c, 38 | input_dim.n,paddingH,paddingW,constant); 39 | } 40 | 41 | dim = output_dim; 42 | return dstData; 43 | } 44 | 45 | }} 46 | -------------------------------------------------------------------------------- /src/Reorg.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Reorg::Reorg(Network *net, int stride) : Layer(net) { 9 | 10 | this->stride = stride; 11 | 12 | output_dim.n = input_dim.n; 13 | output_dim.c = input_dim.c*stride*stride; 14 | output_dim.h = input_dim.h/stride; 15 | output_dim.w = input_dim.w/stride; 16 | output_dim.l = input_dim.l; 17 | 18 | checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) ); 19 | } 20 | 21 | Reorg::~Reorg() { 22 | 23 | checkCuda( cudaFree(dstData) ); 24 | } 25 | 26 | dnnType* Reorg::infer(dataDim_t &dim, dnnType* srcData) { 27 | 28 | reorgForward(srcData, dstData, dim.n, dim.c, dim.h, dim.w, stride); 29 | 30 | dim = output_dim; 31 | return dstData; 32 | } 33 | 34 | }} 35 | -------------------------------------------------------------------------------- /src/Reshape.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Reshape::Reshape(Network *net, dataDim_t new_dim) : Layer(net) { 9 | 10 | checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) ); 11 | this->n = new_dim.n; 12 | this->c = new_dim.c; 13 | this->h = new_dim.h; 14 | this->w = new_dim.w; 15 | output_dim.n = new_dim.n; 16 | output_dim.c = new_dim.c; 17 | output_dim.h = new_dim.h; 18 | output_dim.w = new_dim.w; 19 | output_dim.l = new_dim.l; 20 | 21 | output_dim = new_dim; 22 | 23 | if(input_dim.tot() != output_dim.tot()) 24 | FatalError("Reshape dimension mismatch"); 25 | 26 | } 27 | 28 | Reshape::~Reshape() { 29 | 30 | checkCuda( cudaFree(dstData) ); 31 | } 32 | 33 | dnnType* Reshape::infer(dataDim_t &dim, dnnType* srcData) { 34 | 35 | //just copies the data and changes the output dim 36 | checkCuda( cudaMemcpy(dstData, srcData, dim.n*dim.c*dim.h*dim.w*sizeof(dnnType), cudaMemcpyDeviceToDevice)); 37 | dim = output_dim; 38 | 39 | return dstData; 40 | } 41 | 42 | }} -------------------------------------------------------------------------------- /src/Resize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Resize::Resize(Network *net, int scale_c, int scale_h, int scale_w, bool fixed, ResizeMode_t mode) : Layer(net) { 9 | 10 | this->mode = mode; 11 | if(fixed){ 12 | output_dim.c = scale_c; 13 | output_dim.h = scale_h; 14 | output_dim.w = scale_w; 15 | } 16 | else{ 17 | output_dim.c *= scale_c; 18 | output_dim.h *= scale_h; 19 | output_dim.w *= scale_w; 20 | } 21 | 22 | checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) ); 23 | } 24 | 25 | Resize::~Resize() { 26 | 27 | checkCuda( cudaFree(dstData) ); 28 | } 29 | 30 | dnnType* Resize::infer(dataDim_t &dim, dnnType* srcData) { 31 | 32 | resizeForward(srcData, dstData, dim.n, dim.c, dim.h, dim.w, 33 | output_dim.c, output_dim.h, output_dim.w); 34 | dim = output_dim; 35 | 36 | return dstData; 37 | } 38 | 39 | }} -------------------------------------------------------------------------------- /src/Route.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Route::Route(Network *net, Layer **layers, int layers_n, int groups, int group_id) : Layer(net) { 9 | 10 | // copy input layers 11 | if(layers_n > MAX_LAYERS) { 12 | FatalError("ROUTE: reached max number of input layers"); 13 | } 14 | for(int i=0; ilayers[i] = layers[i]; 16 | } 17 | this->layers_n = layers_n; 18 | this->groups = groups; 19 | this->group_id = group_id; 20 | 21 | //get dims 22 | output_dim.l = 1; 23 | output_dim.c = 0; 24 | for(int i=0; ioutput_dim.w; 28 | output_dim.h = layers[i]->output_dim.h; 29 | } else { 30 | if( layers[i]->output_dim.w != output_dim.w || 31 | layers[i]->output_dim.h != output_dim.h ) 32 | FatalError("Route Output dim missmatch"); 33 | } 34 | output_dim.c += layers[i]->output_dim.c; 35 | } 36 | 37 | output_dim.c /= this->groups; 38 | input_dim = output_dim; 39 | 40 | checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) ); 41 | } 42 | 43 | Route::~Route() { 44 | 45 | checkCuda( cudaFree(dstData) ); 46 | } 47 | 48 | dnnType* Route::infer(dataDim_t &dim, dnnType* srcData) { 49 | 50 | 51 | int offset = 0; 52 | for(int i=0; idstData; 54 | int in_dim = layers[i]->output_dim.tot(); 55 | int part_in_dim = in_dim / this->groups; 56 | checkCuda( cudaMemcpy(dstData + offset, input + this->group_id*part_in_dim, part_in_dim*sizeof(dnnType), cudaMemcpyDeviceToDevice)); 57 | offset += part_in_dim; 58 | } 59 | 60 | //update data dimensions 61 | dim = output_dim; 62 | 63 | return dstData; 64 | } 65 | 66 | }} -------------------------------------------------------------------------------- /src/Shortcut.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Shortcut::Shortcut(Network *net, Layer *backLayer, bool mul) : Layer(net) { 9 | 10 | this->backLayer = backLayer; 11 | this->mul = mul; 12 | this->c = input_dim.c; 13 | this->h = input_dim.h; 14 | this->w = input_dim.w; 15 | checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) ); 16 | 17 | if( ( backLayer->output_dim.c != input_dim.c && mul ) || 18 | (( backLayer->output_dim.w != input_dim.w || backLayer->output_dim.h != input_dim.h ) && !mul ) ) 19 | FatalError("Shortcut dim missmatch"); 20 | 21 | } 22 | 23 | Shortcut::~Shortcut() { 24 | 25 | checkCuda( cudaFree(dstData) ); 26 | } 27 | 28 | dnnType* Shortcut::infer(dataDim_t &dim, dnnType* srcData) { 29 | 30 | dataDim_t bdim = this->backLayer->output_dim; 31 | 32 | checkCuda(cudaMemcpy(dstData, srcData, dim.tot()*sizeof(dnnType), cudaMemcpyDeviceToDevice)); 33 | shortcutForward(this->backLayer->dstData, dstData, dim.n, dim.c, dim.h, dim.w, 1, bdim.n, bdim.c, bdim.h, bdim.w, 1, mul); 34 | 35 | //update data dimensions 36 | dim = output_dim; 37 | 38 | return dstData; 39 | } 40 | 41 | }} -------------------------------------------------------------------------------- /src/Softmax.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Softmax::Softmax(Network *net, const tk::dnn::dataDim_t* dim, const cudnnSoftmaxMode_t mode) : Layer(net) { 9 | 10 | checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) ); 11 | 12 | this->mode = mode; 13 | if(dim == nullptr) 14 | { 15 | this->dim.n= input_dim.n; 16 | this->dim.c= input_dim.c; 17 | this->dim.h= input_dim.h; 18 | this->dim.w= input_dim.w; 19 | this->dim.l= input_dim.l; 20 | } 21 | else 22 | { 23 | this->dim.n= dim->n; 24 | this->dim.c= dim->c; 25 | this->dim.h= dim->h; 26 | this->dim.w= dim->w; 27 | this->dim.l= dim->l; 28 | } 29 | 30 | checkCUDNN( cudnnSetTensor4dDescriptor(srcTensorDesc, 31 | net->tensorFormat, 32 | net->dataType, 33 | this->dim.n*this->dim.l, 34 | this->dim.c, 35 | this->dim.h, this->dim.w) ); 36 | checkCUDNN( cudnnSetTensor4dDescriptor(dstTensorDesc, 37 | net->tensorFormat, 38 | net->dataType, 39 | this->dim.n*this->dim.l, 40 | this->dim.c, 41 | this->dim.h, this->dim.w) ); 42 | } 43 | 44 | Softmax::~Softmax() { 45 | 46 | checkCuda( cudaFree(dstData) ); 47 | } 48 | 49 | dnnType* Softmax::infer(dataDim_t &dim, dnnType* srcData) { 50 | 51 | dnnType alpha = dnnType(1); 52 | dnnType beta = dnnType(0); 53 | checkCUDNN( cudnnSoftmaxForward(net->cudnnHandle, 54 | CUDNN_SOFTMAX_ACCURATE , 55 | this->mode, 56 | &alpha, 57 | srcTensorDesc, 58 | srcData, 59 | &beta, 60 | dstTensorDesc, 61 | dstData) ); 62 | return dstData; 63 | } 64 | 65 | }} 66 | -------------------------------------------------------------------------------- /src/Upsample.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Layer.h" 4 | #include "kernels.h" 5 | 6 | namespace tk { namespace dnn { 7 | 8 | Upsample::Upsample(Network *net, int stride) : Layer(net) { 9 | 10 | this->stride = stride; 11 | 12 | output_dim.n = input_dim.n; 13 | output_dim.c = input_dim.c; 14 | output_dim.h = input_dim.h*stride; 15 | output_dim.w = input_dim.w*stride; 16 | output_dim.l = input_dim.l; 17 | this->c = input_dim.c; 18 | this->h = input_dim.h; 19 | this->w = input_dim.w; 20 | 21 | checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) ); 22 | } 23 | 24 | Upsample::~Upsample() { 25 | 26 | checkCuda( cudaFree(dstData) ); 27 | } 28 | 29 | dnnType* Upsample::infer(dataDim_t &dim, dnnType* srcData) { 30 | 31 | fill(dstData, output_dim.tot(), 0.0); 32 | upsampleForward(srcData, dstData, input_dim.n, input_dim.c, input_dim.h, input_dim.w, stride, 1, 1); 33 | 34 | dim = output_dim; 35 | return dstData; 36 | } 37 | 38 | }} 39 | -------------------------------------------------------------------------------- /src/demo_utils.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "demo_utils.h" 3 | 4 | void readCalibrationMatrix(const std::string& path, cv::Mat& calib_mat){ 5 | YAML::Node config = YAML::LoadFile(path); 6 | 7 | //read camera matrix 8 | int rows = config["camera_matrix"]["rows"].as(); 9 | int cols = config["camera_matrix"]["cols"].as(); 10 | 11 | cv::Mat calib = cv::Mat(cv::Size(rows, cols), CV_32F); 12 | float *vals = (float *)calib.data; 13 | 14 | for(int i=0; i < config["camera_matrix"]["data"].size(); ++i ) 15 | vals[i] = config["camera_matrix"]["data"][i].as(); 16 | 17 | calib_mat = calib; 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/kernels/activation_elu.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | /** 4 | Exponential Linear Unit compute kernel 5 | it does the following operation for each x input element: 6 | x < 0 : y = e^(x) -1 7 | x > 0 : y = x 8 | */ 9 | __global__ 10 | void activation_elu(dnnType *input, dnnType *output, int size) { 11 | 12 | int i = blockDim.x*blockIdx.x + threadIdx.x; 13 | 14 | if(i0) 18 | k0 = 1.0f; 19 | else 20 | k0 = 0.0f; 21 | k1 = 1.0f-k0; 22 | 23 | output[i] = k0*input[i] + k1*(expf(input[i]) -1.0f); 24 | } 25 | } 26 | 27 | 28 | /** 29 | ELU activation function 30 | */ 31 | void activationELUForward(dnnType* srcData, dnnType* dstData, int size, const cudaStream_t stream) 32 | { 33 | int blocks = (size+255)/256; 34 | int threads = 256; 35 | 36 | activation_elu<<>>(srcData, dstData, size); 37 | } 38 | -------------------------------------------------------------------------------- /src/kernels/activation_leaky.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ 4 | void activation_leaky(dnnType *input, dnnType *output, int size, float slope) { 5 | 6 | int i = blockDim.x*blockIdx.x + threadIdx.x; 7 | 8 | if(i0) 10 | output[i] = input[i]; 11 | else 12 | output[i] = slope*input[i]; 13 | } 14 | } 15 | 16 | 17 | /** 18 | ELU activation function 19 | */ 20 | void activationLEAKYForward(dnnType* srcData, dnnType* dstData, int size, float slope, cudaStream_t stream) 21 | { 22 | int blocks = (size+255)/256; 23 | int threads = 256; 24 | 25 | activation_leaky<<>>(srcData, dstData, size, slope); 26 | } 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/kernels/activation_logistic.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ 4 | void activation_logistic(dnnType *input, dnnType *output, int size) { 5 | 6 | int i = blockDim.x*blockIdx.x + threadIdx.x; 7 | 8 | if(i>>(srcData, dstData, size); 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/kernels/activation_mish.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | #include 3 | 4 | #define MISH_THRESHOLD 20 5 | 6 | __device__ 7 | float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} 8 | 9 | __device__ 10 | float softplus_kernel(float x, float threshold = 20) { 11 | if (x > threshold) return x; // too large 12 | else if (x < -threshold) return expf(x); // too small 13 | return logf(expf(x) + 1); 14 | } 15 | 16 | 17 | 18 | __device__ 19 | float mish_yashas(float x) { 20 | float e = __expf(x); 21 | if (x <= -18.0f) 22 | return x * e; 23 | 24 | float n = e * e + 2 * e; 25 | if (x <= -5.0f) 26 | return x * __fdividef(n, n + 2); 27 | 28 | return x - 2 * __fdividef(x, n + 2); 29 | } 30 | 31 | // https://github.com/digantamisra98/Mish 32 | // https://github.com/AlexeyAB/darknet/blob/master/src/activation_kernels.cu 33 | __global__ 34 | void activation_mish(dnnType *input, dnnType *output, int size) { 35 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 36 | if (i < size) 37 | // output[i] = input[i] * tanh_activate_kernel( softplus_kernel(input[i], MISH_THRESHOLD)); 38 | output[i] = mish_yashas(input[i]); 39 | } 40 | 41 | /** 42 | Mish activation function 43 | */ 44 | void activationMishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream) 45 | { 46 | int blocks = (size+255)/256; 47 | int threads = 256; 48 | 49 | activation_mish<<>>(srcData, dstData, size); 50 | } -------------------------------------------------------------------------------- /src/kernels/activation_relu_ceiling.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ 4 | void activation_relu_ceiling(dnnType *input, dnnType *output, int size, const float ceiling) { 5 | 6 | int i = blockDim.x*blockIdx.x + threadIdx.x; 7 | 8 | if(i0) 10 | { 11 | if (input[i]>ceiling) 12 | output[i] = ceiling; 13 | else 14 | output[i] = input[i]; 15 | } 16 | else 17 | output[i] = 0.0f; 18 | } 19 | } 20 | 21 | 22 | /** 23 | Relu ceiling activation function 24 | */ 25 | void activationReLUCeilingForward(dnnType* srcData, dnnType* dstData, int size, const float ceiling, cudaStream_t stream) 26 | { 27 | int blocks = (size+255)/256; 28 | int threads = 256; 29 | 30 | activation_relu_ceiling<<>>(srcData, dstData, size, ceiling); 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/kernels/activation_sigmoid.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | #include 3 | 4 | 5 | __global__ 6 | void activation_sigmoid(dnnType *input, dnnType *output, int size) { 7 | 8 | int i = blockDim.x * blockIdx.x + threadIdx.x; 9 | if(i < size) 10 | output[i] = 1.0f / (1.0f + exp (-input[i])); 11 | } 12 | 13 | 14 | /** 15 | ELU activation function 16 | */ 17 | void activationSIGMOIDForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream) 18 | { 19 | int blocks = (size+255)/256; 20 | int threads = 256; 21 | 22 | activation_sigmoid<<>>(srcData, dstData, size); 23 | } -------------------------------------------------------------------------------- /src/kernels/convert.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ 4 | void float2half_device(float *input, __half *output, int size) { 5 | 6 | int i = blockDim.x*blockIdx.x + threadIdx.x; 7 | 8 | if(i>>(srcData, dstData, size); 20 | cudaDeviceSynchronize(); 21 | } 22 | -------------------------------------------------------------------------------- /src/kernels/fill.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ 4 | void fill_kernel(dnnType *data, int size, dnnType val) { 5 | 6 | int i = blockDim.x*blockIdx.x + threadIdx.x; 7 | 8 | if(i>>(data, size, val); 19 | } 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/kernels/normalize.cu: -------------------------------------------------------------------------------- 1 | #include "kernelsThrust.h" 2 | 3 | __global__ 4 | void normalize_kernel(float *bgr, const int dim, const float *mean, const float *stddev){ 5 | int i = blockDim.x*blockIdx.x + threadIdx.x; 6 | int j = blockIdx.y; 7 | bgr[j*(dim)+i] = bgr[j*(dim)+i] - mean[j]; 8 | bgr[j*(dim)+i] = bgr[j*(dim)+i] / stddev[j]; 9 | 10 | } 11 | 12 | void normalize(float *bgr, const int ch, const int h, const int w, const float *mean, const float *stddev){ 13 | int num_thread = 256; 14 | dim3 dimBlock(h*w/num_thread, ch); 15 | normalize_kernel<<>>(bgr, h*w, mean, stddev); 16 | } -------------------------------------------------------------------------------- /src/kernels/pooling.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output) 4 | { 5 | int h = (in_h + pad - size) / stride_y + 1; 6 | int w = (in_w + pad - size) / stride_x + 1; 7 | int c = in_c; 8 | 9 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 10 | if(id >= n) return; 11 | 12 | int j = id % w; 13 | id /= w; 14 | int i = id % h; 15 | id /= h; 16 | int k = id % c; 17 | id /= c; 18 | int b = id; 19 | 20 | int w_offset = -pad / 2; 21 | int h_offset = -pad / 2; 22 | 23 | int out_index = j + w*(i + h*(k + c*b)); 24 | float max = -9999999; 25 | int max_i = -1; 26 | int l, m; 27 | for(l = 0; l < size; ++l){ 28 | for(m = 0; m < size; ++m){ 29 | int cur_h = h_offset + i*stride_y + l; 30 | int cur_w = w_offset + j*stride_x + m; 31 | int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); 32 | int valid = (cur_h >= 0 && cur_h < in_h && 33 | cur_w >= 0 && cur_w < in_w); 34 | float val = (valid != 0) ? input[index] : -9999999; 35 | max_i = (val > max) ? index : max_i; 36 | max = (val > max) ? val : max; 37 | } 38 | } 39 | output[out_index] = max; 40 | } 41 | 42 | void MaxPoolingForward(dnnType* srcData, dnnType* dstData, int n, int c, int h, int w, int stride_x, int stride_y, int size, int padding, cudaStream_t stream) 43 | { 44 | 45 | int tot_size = n*c*h*w; 46 | 47 | int blocks = (tot_size+255)/256; 48 | int threads = 256; 49 | 50 | forward_maxpool_layer_kernel<<>>(tot_size, h, w, c, stride_x, stride_y, size, padding, srcData, dstData); 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/kernels/reorg.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) 4 | { 5 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 6 | if(i >= N) return; 7 | int in_index = i; 8 | int in_w = i%w; 9 | i = i/w; 10 | int in_h = i%h; 11 | i = i/h; 12 | int in_c = i%c; 13 | i = i/c; 14 | int b = i%batch; 15 | 16 | int out_c = c/(stride*stride); 17 | 18 | int c2 = in_c % out_c; 19 | int offset = in_c / out_c; 20 | int w2 = in_w*stride + offset % stride; 21 | int h2 = in_h*stride + offset / stride; 22 | //printf("%d\n", offset); 23 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 24 | 25 | // printf("%d %d %d\n", w2, h2, c2); 26 | //printf("%d %d\n", in_index, out_index); 27 | //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); 28 | 29 | if(forward) out[out_index] = x[in_index]; 30 | else out[in_index] = x[out_index]; 31 | //if(forward) out[1] = x[1]; 32 | //else out[0] = x[0]; 33 | } 34 | 35 | /** 36 | reorg function function 37 | */ 38 | void reorgForward(dnnType* srcData, dnnType* dstData, 39 | int n, int c, int h, int w, int stride, cudaStream_t stream) { 40 | 41 | int size = n*c*h*w; 42 | 43 | int blocks = (size+255)/256; 44 | int threads = 256; 45 | 46 | reorg_kernel<<>>(size, srcData, w, h, c, n, stride, false, dstData); 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/kernels/resize.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | #include 3 | 4 | __global__ void resize_kernel( int size,float *x, int i_w, int i_h, int i_c, 5 | int o_w, int o_h, int o_c, int batch, float *out) 6 | { 7 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 8 | if(id >= size) return; 9 | 10 | int i = id % o_w; 11 | id /= o_w; 12 | int j = id % o_h; 13 | id /= o_h; 14 | int k = id % o_c; 15 | id /= o_c; 16 | int b = id % batch; 17 | 18 | int out_index = i + o_w*(j + o_h*(k + o_c*b)); 19 | int add_index = i/(o_w/i_w) + i_w*(j/(o_h/i_h) + i_h*(k + i_c*b)); 20 | out[out_index] = x[add_index]; 21 | } 22 | 23 | 24 | void resizeForward( dnnType* srcData, dnnType* dstData, int n, int i_c, int i_h, int i_w, 25 | int o_c, int o_h, int o_w, cudaStream_t stream ) 26 | { 27 | int o_size = n*o_c*o_h*o_w; 28 | 29 | int blocks = (o_size+255)/256; 30 | int threads = 256; 31 | 32 | resize_kernel<<>>(o_size, srcData, i_w, i_h, i_c, o_w, o_h, o_c, n, dstData); 33 | } 34 | -------------------------------------------------------------------------------- /src/kernels/scaladd.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | #include 3 | 4 | __global__ void scal_add_kernel(dnnType* dstData, int size, float alpha, float beta, int inc) 5 | { 6 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 7 | if (i < size) dstData[i*inc] = dstData[i*inc] * alpha + beta; 8 | } 9 | 10 | void scalAdd(dnnType* dstData, int size, float alpha, float beta, int inc, cudaStream_t stream) 11 | { 12 | int blocks = (size+255)/256; 13 | int threads = 256; 14 | 15 | scal_add_kernel<<>>(dstData, size, alpha, beta, inc); 16 | } -------------------------------------------------------------------------------- /src/kernels/shortcut.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | #include "assert.h" 3 | 4 | __global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, 5 | int w1, int h1, int c1, dnnType *add, 6 | int w2, int h2, int c2, float s1, float s2, dnnType *out) 7 | { 8 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 9 | if (id >= size) return; 10 | int i = id % minw; 11 | id /= minw; 12 | int j = id % minh; 13 | id /= minh; 14 | int k = id % minc; 15 | id /= minc; 16 | int b = id % batch; 17 | 18 | int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); 19 | int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); 20 | out[out_index] = s1*out[out_index] + s2*add[add_index]; 21 | //out[out_index] += add[add_index]; 22 | } 23 | 24 | __global__ void shortcut_mul_kernel(int size, int minw, int minh, int minc, int sample, int batch, 25 | int w1, int h1, int c1, dnnType *mul, 26 | int w2, int h2, int c2, float s1, float s2, dnnType *out) 27 | { 28 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 29 | if (id >= size) return; 30 | int i = id % minw; 31 | id /= minw; 32 | int j = id % minh; 33 | id /= minh; 34 | int k = id % minc; 35 | id /= minc; 36 | int b = id % batch; 37 | 38 | int out_index = i*sample + w1*(j*sample + h1*(k + c1*b)); 39 | out[out_index] = out[out_index] * mul[k + c2*b]; 40 | } 41 | 42 | void shortcutForward(dnnType* srcData, dnnType* dstData, int n1, int c1, int h1, int w1, int s1, 43 | int n2, int c2, int h2, int w2, int s2, 44 | bool mul, cudaStream_t stream) 45 | { 46 | assert(n1 == n2); 47 | int batch = n1; 48 | 49 | if(!mul){ 50 | int minw = (w1 < w2) ? w1 : w2; 51 | int minh = (h1 < h2) ? h1 : h2; 52 | int minc = (c1 < c2) ? c1 : c2; 53 | int stride = w1/w2; 54 | int sample = w2/w1; 55 | assert(stride == h1/h2); 56 | assert(sample == h2/h1); 57 | if(stride < 1) stride = 1; 58 | if(sample < 1) sample = 1; 59 | 60 | int size = batch * minw * minh * minc; 61 | int blocks = (size+255)/256; 62 | int threads = 256; 63 | 64 | shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, 65 | w1, h1, c1, srcData, w2, h2, c2, s1, s2, dstData); 66 | } 67 | else{ 68 | int minw = w1; 69 | int minh = h1; 70 | int minc = c1; 71 | int sample = 1; 72 | 73 | int size = batch * minw * minh * minc; 74 | int blocks = (size+255)/256; 75 | int threads = 256; 76 | 77 | shortcut_mul_kernel<<>>(size, minw, minh, minc, sample, batch, 78 | w1, h1, c1, srcData, w2, h2, c2, s1, s2, dstData); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/kernels/softmax.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __device__ void softmax_device(float *input, int n, float temp, int stride, float *output) 4 | { 5 | int i; 6 | float sum = 0; 7 | float largest = -INFINITY; 8 | for(i = 0; i < n; ++i){ 9 | int val = input[i*stride]; 10 | largest = (val>largest) ? val : largest; 11 | } 12 | for(i = 0; i < n; ++i){ 13 | float e = exp(input[i*stride]/temp - largest/temp); 14 | sum += e; 15 | output[i*stride] = e; 16 | } 17 | for(i = 0; i < n; ++i){ 18 | output[i*stride] /= sum; 19 | } 20 | } 21 | 22 | __global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) 23 | { 24 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 25 | if (id >= batch*groups) return; 26 | int b = id / groups; 27 | int g = id % groups; 28 | softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); 29 | } 30 | 31 | /** 32 | softmax function 33 | */ 34 | void softmaxForward(float *input, int n, int batch, int batch_offset, 35 | int groups, int group_offset, int stride, float temp, float *output, cudaStream_t stream) 36 | { 37 | int size = groups*batch; 38 | int blocks = (size+255)/256; 39 | int threads = 256; 40 | 41 | softmax_kernel<<>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); 42 | } 43 | -------------------------------------------------------------------------------- /src/kernels/upsample.cu: -------------------------------------------------------------------------------- 1 | #include "kernels.h" 2 | 3 | __global__ void upsample_kernel(size_t N, dnnType *x, int w, int h, int c, int batch, int stride, int forward, float scale, dnnType *out) 4 | { 5 | size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 6 | if(i >= N) return; 7 | int out_index = i; 8 | int out_w = i%(w*stride); 9 | i = i/(w*stride); 10 | int out_h = i%(h*stride); 11 | i = i/(h*stride); 12 | int out_c = i%c; 13 | i = i/c; 14 | int b = i%batch; 15 | 16 | int in_w = out_w / stride; 17 | int in_h = out_h / stride; 18 | int in_c = out_c; 19 | 20 | int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; 21 | 22 | 23 | if(forward) out[out_index] += scale * x[in_index]; 24 | else atomicAdd(x+in_index, scale * out[out_index]); 25 | } 26 | 27 | void upsampleForward(dnnType* srcData, dnnType* dstData, 28 | int n, int c, int h, int w, int s, int forward, float scale, 29 | cudaStream_t stream) { 30 | 31 | int size = w*h*c*n*s*s; 32 | int blocks = (size+255)/256; 33 | int threads = 256; 34 | upsample_kernel<<>>(size, srcData, w, h, c, n, s, forward, scale, dstData); 35 | } 36 | -------------------------------------------------------------------------------- /tests/backbones/dla34/env_dla34.yml: -------------------------------------------------------------------------------- 1 | name: dla34 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _pytorch_select=0.2=gpu_0 7 | - blas=1.0=mkl 8 | - ca-certificates=2019.10.16=0 9 | - certifi=2019.9.11=py36_0 10 | - cffi=1.13.1=py36h2e261b9_0 11 | - cudatoolkit=10.0.130=0 12 | - cudnn=7.6.0=cuda10.0_0 13 | - freetype=2.9.1=h8a8886c_1 14 | - intel-openmp=2019.4=243 15 | - jpeg=9b=h024ee3a_2 16 | - libedit=3.1.20181209=hc058e9b_0 17 | - libffi=3.2.1=hd88cf55_4 18 | - libgcc-ng=9.1.0=hdf63c60_0 19 | - libgfortran-ng=7.3.0=hdf63c60_0 20 | - libpng=1.6.37=hbc83047_0 21 | - libstdcxx-ng=9.1.0=hdf63c60_0 22 | - libtiff=4.0.10=h2733197_2 23 | - mkl=2019.4=243 24 | - mkl-service=2.3.0=py36he904b0f_0 25 | - mkl_fft=1.0.14=py36ha843d7b_0 26 | - mkl_random=1.1.0=py36hd6b4f25_0 27 | - ncurses=6.1=he6710b0_1 28 | - ninja=1.9.0=py36hfd86e86_0 29 | - numpy=1.17.2=py36haad9e8e_0 30 | - numpy-base=1.17.2=py36hde5b4d6_0 31 | - olefile=0.46=py36_0 32 | - openssl=1.1.1d=h7b6447c_3 33 | - pillow=6.2.0=py36h34e0f95_0 34 | - pip=19.3.1=py36_0 35 | - pycparser=2.19=py36_0 36 | - python=3.6.9=h265db76_0 37 | - readline=7.0=h7b6447c_5 38 | - setuptools=41.6.0=py36_0 39 | - six=1.12.0=py36_0 40 | - sqlite=3.30.1=h7b6447c_0 41 | - tk=8.6.8=hbc83047_0 42 | - wheel=0.33.6=py36_0 43 | - xz=5.2.4=h14c3975_4 44 | - zlib=1.2.11=h7b6447c_3 45 | - zstd=1.3.7=h0b5b093_0 46 | - pip: 47 | - chardet==3.0.4 48 | - decorator==4.4.1 49 | - idna==2.8 50 | - lxml==4.4.2 51 | - networkx==2.4 52 | - nltk==3.4.5 53 | - pytorchcv==0.0.55 54 | - requests==2.22.0 55 | - summary==0.2.0 56 | - torch==1.3.0 57 | - torchsummary==1.5.1 58 | - torchvision==0.4.1 59 | - urllib3==1.25.8 60 | 61 | -------------------------------------------------------------------------------- /tests/backbones/resnet101/env_resnet101.yml: -------------------------------------------------------------------------------- 1 | name: resnet101 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _pytorch_select=0.2=gpu_0 7 | - blas=1.0=mkl 8 | - ca-certificates=2019.10.16=0 9 | - certifi=2019.9.11=py36_0 10 | - cffi=1.13.1=py36h2e261b9_0 11 | - cudatoolkit=10.0.130=0 12 | - cudnn=7.6.0=cuda10.0_0 13 | - freetype=2.9.1=h8a8886c_1 14 | - intel-openmp=2019.4=243 15 | - jpeg=9b=h024ee3a_2 16 | - libedit=3.1.20181209=hc058e9b_0 17 | - libffi=3.2.1=hd88cf55_4 18 | - libgcc-ng=9.1.0=hdf63c60_0 19 | - libgfortran-ng=7.3.0=hdf63c60_0 20 | - libpng=1.6.37=hbc83047_0 21 | - libstdcxx-ng=9.1.0=hdf63c60_0 22 | - libtiff=4.0.10=h2733197_2 23 | - mkl=2019.4=243 24 | - mkl-service=2.3.0=py36he904b0f_0 25 | - mkl_fft=1.0.14=py36ha843d7b_0 26 | - mkl_random=1.1.0=py36hd6b4f25_0 27 | - ncurses=6.1=he6710b0_1 28 | - ninja=1.9.0=py36hfd86e86_0 29 | - numpy=1.17.2=py36haad9e8e_0 30 | - numpy-base=1.17.2=py36hde5b4d6_0 31 | - olefile=0.46=py36_0 32 | - openssl=1.1.1d=h7b6447c_3 33 | - pillow=6.2.0=py36h34e0f95_0 34 | - pip=19.3.1=py36_0 35 | - pycparser=2.19=py36_0 36 | - python=3.6.9=h265db76_0 37 | - pytorch=1.2.0=cuda100py36h938c94c_0 38 | - readline=7.0=h7b6447c_5 39 | - setuptools=41.6.0=py36_0 40 | - six=1.12.0=py36_0 41 | - sqlite=3.30.1=h7b6447c_0 42 | - tk=8.6.8=hbc83047_0 43 | - wheel=0.33.6=py36_0 44 | - xz=5.2.4=h14c3975_4 45 | - zlib=1.2.11=h7b6447c_3 46 | - zstd=1.3.7=h0b5b093_0 47 | - pip: 48 | - chardet==3.0.4 49 | - idna==2.8 50 | - pytorchcv==0.0.55 51 | - requests==2.22.0 52 | - torch==1.3.0 53 | - torchsummary==1.5.1 54 | - torchvision==0.4.1 55 | - urllib3==1.25.8 56 | 57 | -------------------------------------------------------------------------------- /tests/darknet/cfg/yolo2.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=32 7 | subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /tests/darknet/cfg/yolo2_voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=40000,60000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /tests/darknet/cfg/yolo2tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Training 3 | batch=64 4 | subdivisions=8 5 | # Testing 6 | # batch=1 7 | # subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | size=3 110 | stride=1 111 | pad=1 112 | filters=512 113 | activation=leaky 114 | 115 | [convolutional] 116 | size=1 117 | stride=1 118 | pad=1 119 | filters=425 120 | activation=linear 121 | 122 | [region] 123 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 124 | bias_match=1 125 | classes=80 126 | coords=4 127 | num=5 128 | softmax=1 129 | jitter=.2 130 | rescore=0 131 | 132 | object_scale=5 133 | noobject_scale=1 134 | class_scale=1 135 | coord_scale=1 136 | 137 | absolute=1 138 | thresh = .6 139 | random=1 140 | -------------------------------------------------------------------------------- /tests/darknet/cfg/yolo3tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /tests/darknet/cfg/yolo3tiny_512.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=512 9 | height=512 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /tests/darknet/csresnext50-panet-spp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "csresnext50-panet-spp"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer115_out.bin", 14 | bin_path + "/debug/layer126_out.bin", 15 | bin_path + "/debug/layer137_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/csresnext50-panet-spp.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/Kcs4xBozwY4wFx8/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/csresnext50-panet-spp_berkeley.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "csresnext50-panet-spp_berkeley"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer115_out.bin", 14 | bin_path + "/debug/layer126_out.bin", 15 | bin_path + "/debug/layer137_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/csresnext50-panet-spp_berkeley.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/q82qHAtqpoaFYo5/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/names/berkeley.names: -------------------------------------------------------------------------------- 1 | person 2 | car 3 | truck 4 | bus 5 | motor 6 | bike 7 | rider 8 | traffic light 9 | traffic sign 10 | train -------------------------------------------------------------------------------- /tests/darknet/names/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /tests/darknet/names/coco2.names: -------------------------------------------------------------------------------- 1 | person 2 | stop sign -------------------------------------------------------------------------------- /tests/darknet/names/coco4.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | -------------------------------------------------------------------------------- /tests/darknet/names/crowdhuman.names: -------------------------------------------------------------------------------- 1 | person 2 | head -------------------------------------------------------------------------------- /tests/darknet/names/flir.names: -------------------------------------------------------------------------------- 1 | person 2 | bike 3 | car 4 | -------------------------------------------------------------------------------- /tests/darknet/names/mmr.names: -------------------------------------------------------------------------------- 1 | blue-cone 2 | yellow-cone 3 | orange-cone 4 | big-orange-cone -------------------------------------------------------------------------------- /tests/darknet/names/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /tests/darknet/viz_yolo3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "tkdnn.h" 7 | #include "test.h" 8 | #include "DarknetParser.h" 9 | #include "NetworkViz.h" 10 | 11 | int main(int argc, char *argv[]) { 12 | if(argc <2) 13 | FatalError("you must provide an input image"); 14 | std::string input_image = argv[1]; 15 | std::string bin_path = "yolo3"; 16 | std::string wgs_path = bin_path + "/layers"; 17 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3.cfg"; 18 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 19 | downloadWeightsifDoNotExist(wgs_path, bin_path, "https://cloud.hipert.unimore.it/s/jPXmHyptpLoNdNR/download"); 20 | 21 | // parse darknet network 22 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 23 | net->print(); 24 | 25 | // input data 26 | dnnType *input_d; 27 | checkCuda( cudaMalloc(&input_d, sizeof(dnnType)*net->input_dim.tot())); 28 | 29 | // load image 30 | cv::Mat frame, frameFloat; 31 | frame = cv::imread(input_image); 32 | cv::resize(frame, frame, cv::Size(net->input_dim.w, net->input_dim.h)); 33 | frame.convertTo(frameFloat, CV_32FC3, 1/255.0); 34 | 35 | //split channels 36 | cv::Mat bgr[3]; 37 | cv::split(frameFloat,bgr);//split source 38 | 39 | //write channels 40 | for(int i=0; iinput_dim.c; i++) { 41 | int idx = i*frameFloat.rows*frameFloat.cols; 42 | int ch = net->input_dim.c-1 -i; 43 | checkCuda( cudaMemcpy(input_d + idx, (void*)bgr[ch].data, frameFloat.rows*frameFloat.cols*sizeof(dnnType), cudaMemcpyHostToDevice)); 44 | } 45 | 46 | tk::dnn::dataDim_t dim = net->input_dim; 47 | dim.print(); 48 | std::cout<<"infer\n"; 49 | net->infer(dim, input_d); 50 | 51 | // output directory 52 | std::string output_viz = "viz/"; 53 | system( (std::string("mkdir -p ") + output_viz).c_str() ); 54 | 55 | for(int i=0; inum_layers; i++) { 56 | std::string output_png = output_viz + "/layer" + std::to_string(i) + ".png"; 57 | std::cout<<"saving "<releaseLayers(); 66 | delete net; 67 | return 0; 68 | } 69 | 70 | -------------------------------------------------------------------------------- /tests/darknet/yolo2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo2"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/layers/output.bin" 14 | }; 15 | std::string wgs_path = bin_path + "/layers"; 16 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo2.cfg"; 17 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 18 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/nf4PJ3k8bxBETwL/download"); 19 | 20 | // parse darknet network 21 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 22 | net->print(); 23 | 24 | //convert network to tensorRT 25 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 26 | 27 | int ret = testInference(input_bins, output_bins, net, netRT); 28 | net->releaseLayers(); 29 | delete net; 30 | delete netRT; 31 | return ret; 32 | } 33 | -------------------------------------------------------------------------------- /tests/darknet/yolo2_voc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo2_voc"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/layers/output.bin" 14 | }; 15 | std::string wgs_path = bin_path + "/layers"; 16 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo2_voc.cfg"; 17 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/voc.names"; 18 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/DJC5Fi2pEjfNDP9/download"); 19 | 20 | // parse darknet network 21 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 22 | net->print(); 23 | 24 | //convert network to tensorRT 25 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 26 | 27 | int ret = testInference(input_bins, output_bins, net, netRT); 28 | net->releaseLayers(); 29 | delete net; 30 | netRT->destroy(); 31 | delete netRT; 32 | return ret; 33 | } 34 | 35 | -------------------------------------------------------------------------------- /tests/darknet/yolo2tiny.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo2tiny"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/layers/output.bin" 14 | }; 15 | std::string wgs_path = bin_path + "/layers"; 16 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo2tiny.cfg"; 17 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 18 | // FIXME: wrong weights 19 | //downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s//download"); 20 | 21 | // parse darknet network 22 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 23 | net->print(); 24 | 25 | //convert network to tensorRT 26 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 27 | 28 | int ret = testInference(input_bins, output_bins, net, netRT); 29 | net->releaseLayers(); 30 | delete net; 31 | netRT->destroy(); 32 | delete netRT; 33 | return ret; 34 | } 35 | -------------------------------------------------------------------------------- /tests/darknet/yolo3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer82_out.bin", 14 | bin_path + "/debug/layer94_out.bin", 15 | bin_path + "/debug/layer106_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/jPXmHyptpLoNdNR/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } -------------------------------------------------------------------------------- /tests/darknet/yolo3_512.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3_512"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer82_out.bin", 14 | bin_path + "/debug/layer94_out.bin", 15 | bin_path + "/debug/layer106_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_512.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/RGecMeGLD4cXEWL/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | // for(int i=0; inum_layers; i++) { 27 | // if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) { 28 | // tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i]; 29 | // c->releaseDevice(); 30 | // c->releaseHost(true, false); 31 | // } 32 | // if(net->layers[i]->dstData != nullptr) { 33 | // cudaFree(net->layers[i]->dstData); 34 | // net->layers[i]->dstData = nullptr; 35 | // } 36 | // } 37 | 38 | //convert network to tensorRT 39 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 40 | 41 | int ret = testInference(input_bins, output_bins, net, netRT); 42 | net->releaseLayers(); 43 | delete net; 44 | netRT->destroy(); 45 | delete netRT; 46 | return ret; 47 | } 48 | -------------------------------------------------------------------------------- /tests/darknet/yolo3_berkeley.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3_berkeley"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer82_out.bin", 14 | bin_path + "/debug/layer94_out.bin", 15 | bin_path + "/debug/layer106_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_berkeley.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/o5cHa4AjTKS64oD/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo3_coco4.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3_coco4"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer82_out.bin", 14 | bin_path + "/debug/layer94_out.bin", 15 | bin_path + "/debug/layer106_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_coco4.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco4.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/o27NDzSAartbyc4/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo3_flir.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3_flir"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer82_out.bin", 14 | bin_path + "/debug/layer94_out.bin", 15 | bin_path + "/debug/layer106_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_flir.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/flir.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/62DECncmF6bMMiH/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo3tiny.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3tiny"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer16_out.bin", 14 | bin_path + "/debug/layer23_out.bin", 15 | }; 16 | std::string wgs_path = bin_path + "/layers"; 17 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3tiny.cfg"; 18 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 19 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/LMcSHtWaLeps8yN/download"); 20 | 21 | // parse darknet network 22 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 23 | net->print(); 24 | 25 | //convert network to tensorRT 26 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 27 | 28 | int ret = testInference(input_bins, output_bins, net, netRT); 29 | net->releaseLayers(); 30 | delete net; 31 | netRT->destroy(); 32 | delete netRT; 33 | return ret; 34 | } 35 | -------------------------------------------------------------------------------- /tests/darknet/yolo3tiny_512.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo3tiny_512"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer16_out.bin", 14 | bin_path + "/debug/layer23_out.bin", 15 | }; 16 | std::string wgs_path = bin_path + "/layers"; 17 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3tiny_512.cfg"; 18 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 19 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/8Zt6bHwHADqP4JC/download"); 20 | 21 | // parse darknet network 22 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 23 | net->print(); 24 | 25 | // for(int i=0; inum_layers; i++) { 26 | // if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) { 27 | // tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i]; 28 | // c->releaseDevice(); 29 | // c->releaseHost(true, false); 30 | // } 31 | // if(net->layers[i]->dstData != nullptr) { 32 | // cudaFree(net->layers[i]->dstData); 33 | // net->layers[i]->dstData = nullptr; 34 | // } 35 | // } 36 | 37 | 38 | //convert network to tensorRT 39 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 40 | 41 | int ret = testInference(input_bins, output_bins, net, netRT); 42 | net->releaseLayers(); 43 | delete net; 44 | netRT->destroy(); 45 | delete netRT; 46 | return ret; 47 | } 48 | -------------------------------------------------------------------------------- /tests/darknet/yolo4-csp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4-csp"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer144_out.bin", 14 | bin_path + "/debug/layer159_out.bin", 15 | bin_path + "/debug/layer174_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4-csp.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/AfzHE4BfTeEm2gH/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } -------------------------------------------------------------------------------- /tests/darknet/yolo4-csp_crowd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4-csp_crowd"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer144_out.bin", 14 | bin_path + "/debug/layer159_out.bin", 15 | bin_path + "/debug/layer174_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4-csp_crowd.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/crowdhuman.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/RKWfWNmWXfJigsK/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | delete netRT; 33 | return ret; 34 | } -------------------------------------------------------------------------------- /tests/darknet/yolo4.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = "../tests/darknet/cfg/yolo4.cfg"; 19 | std::string name_path = "../tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/982LxTQcNQfFQc4/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_320.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_320"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = "../tests/darknet/cfg/yolo4_320.cfg"; 19 | std::string name_path = "../tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/64PHAwrM6RCZbiR/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_320_coco2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_320_coco2"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = "../tests/darknet/cfg/yolo4_320_coco2.cfg"; 19 | std::string name_path = "../tests/darknet/names/coco2.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/f3wk99iG5y7tEr8/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_512.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_512"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = "../tests/darknet/cfg/yolo4_512.cfg"; 19 | std::string name_path = "../tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/fjFDqFmiSARKxFe/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | // for(int i=0; inum_layers; i++) { 27 | // if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) { 28 | // tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i]; 29 | // c->releaseDevice(); 30 | // c->releaseHost(true, false); 31 | // } 32 | // if(net->layers[i]->dstData != nullptr) { 33 | // cudaFree(net->layers[i]->dstData); 34 | // net->layers[i]->dstData = nullptr; 35 | // } 36 | // } 37 | 38 | 39 | //convert network to tensorRT 40 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 41 | 42 | int ret = testInference(input_bins, output_bins, net, netRT); 43 | net->releaseLayers(); 44 | delete net; 45 | netRT->destroy(); 46 | delete netRT; 47 | return ret; 48 | } 49 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_608.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_608"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = "../tests/darknet/cfg/yolo4_608.cfg"; 19 | std::string name_path = "../tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/Bg9r7kqDFJiFB4c/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_berkeley.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_berkeley"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4_berkeley.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/nkWFa5fgb4NTdnB/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_berkeley_f1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_berkeley_f1"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4_berkeley.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/q9dwoqQ5YQqEi7s/download"); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4_mmr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4_mmr"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer139_out.bin", 14 | bin_path + "/debug/layer150_out.bin", 15 | bin_path + "/debug/layer161_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4_mmr.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/mmr.names"; 20 | // downloadWeightsifDoNotExist(input_bins[0], bin_path, ""); 21 | 22 | // parse darknet network 23 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 24 | net->print(); 25 | 26 | //convert network to tensorRT 27 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 28 | 29 | int ret = testInference(input_bins, output_bins, net, netRT); 30 | net->releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4tiny.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4tiny"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer30_out.bin", 14 | bin_path + "/debug/layer37_out.bin" 15 | }; 16 | std::string wgs_path = bin_path + "/layers"; 17 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4tiny.cfg"; 18 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 19 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/iRnc4pSqmx78gJs/download"); 20 | 21 | // parse darknet network 22 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 23 | net->print(); 24 | 25 | //convert network to tensorRT 26 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 27 | 28 | int ret = testInference(input_bins, output_bins, net, netRT); 29 | std::cout<releaseLayers(); 31 | delete net; 32 | netRT->destroy(); 33 | delete netRT; 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /tests/darknet/yolo4tiny_512.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4tiny_512"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer30_out.bin", 14 | bin_path + "/debug/layer37_out.bin" 15 | }; 16 | std::string wgs_path = bin_path + "/layers"; 17 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4tiny_512.cfg"; 18 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 19 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/qa2ws4GXg7mS5nN/download"); 20 | 21 | // parse darknet network 22 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 23 | net->print(); 24 | 25 | // for(int i=0; inum_layers; i++) { 26 | // if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) { 27 | // tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i]; 28 | // c->releaseDevice(); 29 | // c->releaseHost(true, false); 30 | // } 31 | // if(net->layers[i]->dstData != nullptr) { 32 | // cudaFree(net->layers[i]->dstData); 33 | // net->layers[i]->dstData = nullptr; 34 | // } 35 | // } 36 | 37 | //convert network to tensorRT 38 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 39 | 40 | int ret = testInference(input_bins, output_bins, net, netRT); 41 | net->releaseLayers(); 42 | delete net; 43 | netRT->destroy(); 44 | delete netRT; 45 | return ret; 46 | } 47 | -------------------------------------------------------------------------------- /tests/darknet/yolo4x.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkdnn.h" 4 | #include "test.h" 5 | #include "DarknetParser.h" 6 | 7 | int main() { 8 | std::string bin_path = "yolo4x"; 9 | std::vector input_bins = { 10 | bin_path + "/layers/input.bin" 11 | }; 12 | std::vector output_bins = { 13 | bin_path + "/debug/layer168_out.bin", 14 | bin_path + "/debug/layer185_out.bin", 15 | bin_path + "/debug/layer202_out.bin" 16 | }; 17 | std::string wgs_path = bin_path + "/layers"; 18 | std::string cfg_path = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4x.cfg"; 19 | std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names"; 20 | downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/5MFjtNtgbDGdJEo/download"); 21 | 22 | 23 | 24 | // parse darknet network 25 | tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path); 26 | net->print(); 27 | 28 | //convert network to tensorRT 29 | tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str())); 30 | 31 | int ret = testInference(input_bins, output_bins, net, netRT); 32 | net->releaseLayers(); 33 | delete net; 34 | netRT->destroy(); 35 | delete netRT; 36 | return ret; 37 | } 38 | -------------------------------------------------------------------------------- /tests/exporters/caffe_weights_exporter.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import msgpack 4 | import lmdb 5 | import random 6 | import caffe 7 | import numpy as np 8 | 9 | if __name__ == '__main__': 10 | parser = argparse.ArgumentParser(description='CAFFE WEIGHTS EXPORTER TO CUDNN') 11 | parser.add_argument('model',type=str, 12 | help='Path to prototxt network model') 13 | parser.add_argument('weights',type=str, 14 | help='Path to caffemodel file') 15 | 16 | parser.add_argument('--output', type=str, help="output directory", default="layers") 17 | 18 | args = parser.parse_args() 19 | 20 | if not os.path.exists(args.output): 21 | os.makedirs(args.output) 22 | 23 | print "\n\n ====== NET LOADED ====== " 24 | net = caffe.Net(args.model, args.weights, caffe.TEST) 25 | n_lay = len(net.params) 26 | print "Number of layers: ", n_lay 27 | for i in xrange(n_lay): 28 | key = net.params.keys()[i] 29 | print "Layer", key 30 | t = net.layer_dict[key].type 31 | print " type: ", t 32 | w = net.params[key][0].data 33 | b = net.params[key][1].data 34 | print " weights shape:", np.shape(w) 35 | print " bias shape:", np.shape(b) 36 | 37 | w.tofile(args.output + "/" + t + str(i) + ".bin", format="f") 38 | b.tofile(args.output + "/" + t + str(i) + ".bias.bin", format="f") 39 | -------------------------------------------------------------------------------- /tests/imuodom/imuodom.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "tkDNN/ImuOdom.h" 3 | 4 | const char *i0_bin = "imuodom/layers/input0.bin"; 5 | const char *i1_bin = "imuodom/layers/input1.bin"; 6 | const char *i2_bin = "imuodom/layers/input2.bin"; 7 | const char *o0_bin = "imuodom/layers/output0.bin"; 8 | const char *o1_bin = "imuodom/layers/output1.bin"; 9 | 10 | int main() { 11 | 12 | // V1 13 | downloadWeightsifDoNotExist(i0_bin, "imuodom", "https://cloud.hipert.unimore.it/s/ZAy34K5w2ixED6x/download"); 14 | 15 | // V2 16 | //downloadWeightsifDoNotExist(i0_bin, "imuodom", "https://cloud.hipert.unimore.it/s/BBSEbEbQbPKxp4s/download"); 17 | 18 | tk::dnn::ImuOdom ImuNet; 19 | ImuNet.init("imuodom/layers/"); 20 | 21 | const int N = 19513; 22 | 23 | // Network layout 24 | tk::dnn::dataDim_t dim0(1, 4, 1, 100); 25 | tk::dnn::dataDim_t dim1(1, 3, 1, 100); 26 | tk::dnn::dataDim_t dim2(1, 3, 1, 100); 27 | 28 | // Load input 29 | dnnType *i0_d, *i1_d, *i2_d; 30 | dnnType *i0_h, *i1_h, *i2_h; 31 | readBinaryFile(i0_bin, dim0.tot()*N, &i0_h, &i0_d); 32 | readBinaryFile(i1_bin, dim1.tot()*N, &i1_h, &i1_d); 33 | readBinaryFile(i2_bin, dim2.tot()*N, &i2_h, &i2_d); 34 | 35 | dnnType *data; 36 | tk::dnn::dataDim_t dim; 37 | 38 | dnnType *out0, *out1; 39 | dnnType *out0_h, *out1_h; 40 | readBinaryFile(o0_bin, ImuNet.odim0.tot()*N, &out0_h, &out0); 41 | readBinaryFile(o1_bin, ImuNet.odim1.tot()*N, &out1_h, &out1); 42 | 43 | 44 | std::ofstream path("path.txt"); 45 | 46 | int ret_cudnn = 0; 47 | for(int i=0; i 2 | #include "tkdnn.h" 3 | 4 | const char *input_bin = "mnist/input.bin"; 5 | const char *c0_bin = "mnist/layers/c0.bin"; 6 | const char *c1_bin = "mnist/layers/c1.bin"; 7 | const char *d2_bin = "mnist/layers/d2.bin"; 8 | const char *d3_bin = "mnist/layers/d3.bin"; 9 | const char *output_bin = "mnist/output.bin"; 10 | 11 | int main() { 12 | 13 | downloadWeightsifDoNotExist(input_bin, "mnist", "https://cloud.hipert.unimore.it/s/2TyQkMJL3LArLAS/download"); 14 | 15 | // Network layout 16 | tk::dnn::dataDim_t dim(1, 1, 28, 28, 1); 17 | tk::dnn::Network net(dim); 18 | tk::dnn::Conv2d l0(&net, 20, 5, 5, 1, 1, 0, 0, c0_bin); 19 | tk::dnn::Pooling l1(&net, 2, 2, 2, 2, 0, 0, tk::dnn::POOLING_MAX); 20 | tk::dnn::Conv2d l2(&net, 50, 5, 5, 1, 1, 0, 0, c1_bin); 21 | tk::dnn::Pooling l3(&net, 2, 2, 2, 2, 0, 0, tk::dnn::POOLING_MAX); 22 | tk::dnn::Dense l4(&net, 500, d2_bin); 23 | tk::dnn::Activation l5(&net, tk::dnn::ACTIVATION_LEAKY); 24 | tk::dnn::Dense l6(&net, 10, d3_bin); 25 | tk::dnn::Softmax l7(&net); 26 | 27 | tk::dnn::NetworkRT netRT(&net, net.getNetworkRTName("mnist")); 28 | 29 | // Load input 30 | dnnType *data; 31 | dnnType *input_h; 32 | readBinaryFile(input_bin, dim.tot(), &input_h, &data); 33 | 34 | dnnType *out_data, *out_data2; 35 | 36 | std::cout<<"CUDNN inference:\n"; { 37 | dim.print(); //print initial dimension 38 | TKDNN_TSTART 39 | out_data = net.infer(dim, data); 40 | TKDNN_TSTOP 41 | dim.print(); 42 | } 43 | 44 | // Print result 45 | //std::cout<<"\n======= CUDNN RESULT =======\n"; 46 | //printDeviceVector(10, out_data); 47 | 48 | tk::dnn::dataDim_t dim2(1, 1, 28, 28, 1); 49 | 50 | std::cout<<"TENSORRT inference:\n"; { 51 | dim2.print(); 52 | TKDNN_TSTART 53 | out_data2 = netRT.infer(dim2, data); 54 | TKDNN_TSTOP 55 | dim2.print(); 56 | } 57 | 58 | // Print result 59 | //std::cout<<"\n======= TENRT RESULT =======\n"; 60 | //printDeviceVector(10, out_data); 61 | 62 | std::cout<<"\n======= CHECK RESULT =======\n"; 63 | int ret_tensorrt = checkResult(dim.tot(), out_data, out_data2) == 0 ? 0 : ERROR_TENSORRT; 64 | 65 | /* 66 | // Print real test 67 | std::cout<<"\n==== CHECK RESULT ====\n"; 68 | dnnType *out; 69 | dnnType *out_h; 70 | readBinaryFile(output_bin, dim.tot(), &out_h, &out); 71 | printDeviceVector(dim.tot(), out); 72 | */ 73 | return ret_tensorrt; 74 | } 75 | -------------------------------------------------------------------------------- /tests/simple/test_model.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | from keras.models import Sequential 4 | from keras.layers import Input, Dense, Activation, Flatten, Dropout, ELU, Reshape, Lambda, Conv1D 5 | from keras.layers import Bidirectional, CuDNNLSTM 6 | from keras.layers.convolutional import Convolution2D, Convolution3D 7 | from keras.layers.pooling import MaxPooling2D, MaxPooling3D, AveragePooling3D 8 | from keras.models import Sequential, Model 9 | from keras.layers import Cropping2D 10 | import keras.backend.tensorflow_backend as KTF 11 | import struct 12 | from keras.models import Sequential, Model 13 | 14 | def bin_write(f, data): 15 | data = data.flatten() 16 | fmt = 'f'*len(data) 17 | bin = struct.pack(fmt, *data) 18 | f.write(bin) 19 | 20 | def create_model(): 21 | x1 = Input((3, 8), name='x1') 22 | conv = Conv1D(4, 2)(x1) 23 | lstm = Bidirectional(CuDNNLSTM(5, return_sequences=True))(conv) 24 | lstm2 = Bidirectional(CuDNNLSTM(5, return_sequences=False))(lstm) 25 | model = Model([x1], [lstm2]) 26 | model.summary() 27 | 28 | return model 29 | 30 | if __name__ == '__main__': 31 | print ("DATA FORMAT: ", keras.backend.image_data_format()) 32 | 33 | model = create_model() 34 | model.save("net.hdf5") 35 | 36 | np.random.seed(2) 37 | x = np.random.rand(1,1,3,8) 38 | r = model.predict( x[0], batch_size=1) 39 | 40 | r = np.array([r]) 41 | x = x.transpose(0, 3, 1, 2) 42 | #r = r.transpose(0, 3, 1, 2) 43 | print("in: ", np.shape(x)) 44 | print("out: ", np.shape(r)) 45 | print("output: ", r.tolist()) 46 | 47 | x = np.array(x.flatten(), dtype=np.float32) 48 | f = open("input.bin", mode='wb') 49 | bin_write(f, x) 50 | 51 | r = np.array(r.flatten(), dtype=np.float32) 52 | f = open("output.bin", mode='wb') 53 | bin_write(f, r) 54 | 55 | -------------------------------------------------------------------------------- /tests/simple/test_simple.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "tkdnn.h" 3 | 4 | const char *input_bin = "simple/input.bin"; 5 | const char *c0_bin = "simple/layers/conv1d_1.bin"; 6 | const char *l1_bin = "simple/layers/bidirectional_1.bin"; 7 | const char *l2_bin = "simple/layers/bidirectional_2.bin"; 8 | const char *output_bin = "simple/output.bin"; 9 | 10 | int main() { 11 | 12 | // Network layout 13 | tk::dnn::dataDim_t dim(1, 8, 1, 3); 14 | tk::dnn::Network net(dim); 15 | tk::dnn::Conv2d l0(&net, 4, 1, 2, 1, 1, 0, 0, c0_bin); 16 | tk::dnn::LSTM l1(&net, 5, true, l1_bin); 17 | tk::dnn::LSTM l2(&net, 5, false, l2_bin); 18 | 19 | net.print(); 20 | 21 | net.print(); 22 | 23 | // Load input 24 | dnnType *data; 25 | dnnType *input_h; 26 | readBinaryFile(input_bin, dim.tot(), &input_h, &data); 27 | 28 | // Print input 29 | std::cout<<"\n======= INPUT =======\n"; 30 | printDeviceVector(dim.tot(), data); 31 | std::cout<<"\n"; 32 | 33 | //convert network to tensorRT 34 | tk::dnn::NetworkRT netRT(&net, net.getNetworkRTName("simple")); 35 | 36 | dnnType *out_data, *out_data2; // cudnn output, tensorRT output 37 | 38 | tk::dnn::dataDim_t dim1 = dim; //input dim 39 | printCenteredTitle(" CUDNN inference ", '=', 30); { 40 | dim1.print(); 41 | TKDNN_TSTART 42 | out_data = net.infer(dim1, data); 43 | TKDNN_TSTOP 44 | dim1.print(); 45 | } 46 | 47 | tk::dnn::dataDim_t dim2 = dim; 48 | printCenteredTitle(" TENSORRT inference ", '=', 30); { 49 | dim2.print(); 50 | TKDNN_TSTART 51 | out_data2 = netRT.infer(dim2, data); 52 | TKDNN_TSTOP 53 | dim2.print(); 54 | } 55 | 56 | std::cout<<"\n======= CUDNN =======\n"; 57 | printDeviceVector(dim.tot(), out_data); 58 | std::cout<<"\n======= TENSORRT =======\n"; 59 | printDeviceVector(dim.tot(), out_data2); 60 | 61 | printCenteredTitle(" CHECK RESULTS ", '=', 30); 62 | dnnType *out, *out_h; 63 | int out_dim = net.getOutputDim().tot(); 64 | //readBinaryFile(output_bin, out_dim, &out_h, &out); 65 | // std::cout<<"CUDNN vs correct"; 66 | // int ret_cudnn = checkResult(out_dim, out_data, out) == 0 ? 0: ERROR_CUDNN; 67 | // std::cout<<"TRT vs correct"; 68 | // int ret_tensorrt = checkResult(out_dim, out_data2, out) == 0 ? 0 : ERROR_TENSORRT; 69 | std::cout<<"CUDNN vs TRT "; 70 | int ret_cudnn_tensorrt = checkResult(out_dim, out_data, out_data2) == 0 ? 0 : ERROR_CUDNNvsTENSORRT; 71 | 72 | return ret_cudnn_tensorrt; 73 | } 74 | -------------------------------------------------------------------------------- /tests/test_rtinference/rtinference.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tkDNN/tkdnn.h" 4 | #include /* srand, rand */ 5 | 6 | 7 | int main(int argc, char *argv[]) { 8 | 9 | if(argc < 2 || !fileExist(argv[1])) 10 | FatalError("unable to read serialRT file"); 11 | 12 | int BATCH_SIZE = 1; 13 | if(argc >2) 14 | BATCH_SIZE = atoi(argv[2]); 15 | 16 | int NTEST = 100; 17 | if(argc >3) 18 | NTEST = atoi(argv[3]); 19 | 20 | //always same test 21 | srand (0); 22 | 23 | //convert network to tensorRT 24 | tk::dnn::NetworkRT netRT(NULL, argv[1]); 25 | 26 | 27 | 28 | tk::dnn::dataDim_t idim = netRT.input_dim; 29 | tk::dnn::dataDim_t odim = netRT.output_dim; 30 | idim.n = BATCH_SIZE; 31 | odim.n = BATCH_SIZE; 32 | dnnType *input = new float[idim.tot()]; 33 | dnnType *output = new float[odim.tot()]; 34 | dnnType *input_d; 35 | checkCuda( cudaMalloc(&input_d, idim.tot()*sizeof(dnnType))); 36 | 37 | int ret_tensorrt = 0; 38 | std::cout<<"Testing with batchsize: "< stats; 40 | printCenteredTitle(" TENSORRT inference ", '=', 30); 41 | float total_time = 0; 42 | for(int i=0; (NTEST > 0 ? i 1) 58 | stats.push_back(t_ns); 59 | 60 | // control output 61 | // std::cout<<"Output Buffers: "<