├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
    ├── FindCUDNN.cmake
    ├── getCudaArch.cu
    └── tkDNNConfig.cmake
├── demo
    ├── config.yaml
    ├── config_tetra.yaml
    ├── demo
    │   ├── demo.cpp
    │   ├── demo3D.cpp
    │   ├── demoDepth.cpp
    │   ├── demoTracker.cpp
    │   ├── map.cpp
    │   └── seg_demo.cpp
    ├── demoConfig.yaml
    └── yolo_test.mp4
├── docker
    ├── Dockerfile
    ├── Dockerfile.base
    ├── README.md
    ├── assets
    │   ├── entrypoint_setup.sh
    │   └── terminator_config
    └── docker_launch.sh
├── docs
    ├── README_2d3dtracking.md
    ├── README_depth.md
    ├── README_seg.md
    ├── demo.md
    ├── exporting_weights.md
    ├── mAP_demo.md
    └── windows.md
├── include
    └── tkDNN
    │   ├── BoundingBox.h
    │   ├── CenterTrack.h
    │   ├── CenternetDetection.h
    │   ├── CenternetDetection3D.h
    │   ├── DarknetParser.h
    │   ├── DepthNN.h
    │   ├── DetectionNN.h
    │   ├── DetectionNN3D.h
    │   ├── ImuOdom.h
    │   ├── Int8BatchStream.h
    │   ├── Int8Calibrator.h
    │   ├── Layer.h
    │   ├── MobilenetDetection.h
    │   ├── Network.h
    │   ├── NetworkRT.h
    │   ├── NetworkViz.h
    │   ├── SegmentationNN.h
    │   ├── TrackingNN.h
    │   ├── Yolo3Detection.h
    │   ├── demo_utils.h
    │   ├── evaluation.h
    │   ├── kernels.h
    │   ├── kernelsThrust.h
    │   ├── pluginsRT
    │       ├── ActivationLeakyRT.h
    │       ├── ActivationLogisticRT.h
    │       ├── ActivationMishRT.h
    │       ├── ActivationReLUCeilingRT.h
    │       ├── ActivationSigmoidRT.h
    │       ├── ConstantPaddingRT.h
    │       ├── DeformableConvRT.h
    │       ├── FlattenConcatRT.h
    │       ├── Int8Calibrator.h
    │       ├── MaxPoolingFixedSizeRT.h
    │       ├── ReflectionPadding.h
    │       ├── RegionRT.h
    │       ├── ReorgRT.h
    │       ├── ReshapeRT.h
    │       ├── ResizeLayerRT.h
    │       ├── RouteRT.h
    │       ├── ShortcutRT.h
    │       ├── UpsampleRT.h
    │       └── YoloRT.h
    │   ├── test.h
    │   ├── tkdnn.h
    │   └── utils.h
├── scripts
    ├── checkExecTimes.py
    ├── download_validation.py
    ├── download_validation.sh
    ├── install_OpenCV4.sh
    ├── test_all_tests.sh
    └── test_inference.sh
├── src
    ├── Activation.cpp
    ├── BoundingBox.cpp
    ├── CenterTrack.cpp
    ├── CenternetDetection.cpp
    ├── CenternetDetection3D.cpp
    ├── Conv2d.cpp
    ├── DarknetParser.cpp
    ├── DeformConv2d.cpp
    ├── Dense.cpp
    ├── Flatten.cpp
    ├── Int8BatchStream.cpp
    ├── Int8Calibrator.cpp
    ├── LSTM.cpp
    ├── Layer.cpp
    ├── LayerWgs.cpp
    ├── MobilenetDetection.cpp
    ├── MulAdd.cpp
    ├── Network.cpp
    ├── NetworkRT.cpp
    ├── NetworkViz.cpp
    ├── Padding.cpp
    ├── Pooling.cpp
    ├── Region.cpp
    ├── Reorg.cpp
    ├── Reshape.cpp
    ├── Resize.cpp
    ├── Route.cpp
    ├── Shortcut.cpp
    ├── Softmax.cpp
    ├── Upsample.cpp
    ├── Yolo.cpp
    ├── Yolo3Detection.cpp
    ├── demo_utils.cpp
    ├── evaluation.cpp
    ├── kernels
    │   ├── activation_elu.cu
    │   ├── activation_leaky.cu
    │   ├── activation_logistic.cu
    │   ├── activation_mish.cu
    │   ├── activation_relu_ceiling.cu
    │   ├── activation_sigmoid.cu
    │   ├── convert.cu
    │   ├── deformable_conv.cu
    │   ├── fill.cu
    │   ├── normalize.cu
    │   ├── padding.cu
    │   ├── pooling.cu
    │   ├── postprocessing.cu
    │   ├── reorg.cu
    │   ├── resize.cu
    │   ├── scaladd.cu
    │   ├── shortcut.cu
    │   ├── softmax.cu
    │   └── upsample.cu
    ├── pluginsRT
    │   ├── ActivationLeakyRT.cpp
    │   ├── ActivationLogisticRT.cpp
    │   ├── ActivationMishRT.cpp
    │   ├── ActivationReLUCeilingRT.cpp
    │   ├── ConstantPaddingRT.cpp
    │   ├── DeformableConvRT.cpp
    │   ├── FlattenConcatRT.cpp
    │   ├── MaxPoolingSizeRT.cpp
    │   ├── ReflectionPadding.cpp
    │   ├── RegionRT.cpp
    │   ├── ReorgRT.cpp
    │   ├── ReshapeRT.cpp
    │   ├── ResizeLayerRT.cpp
    │   ├── RouteRT.cpp
    │   ├── ShortcutRT.cpp
    │   ├── UpsampleRT.cpp
    │   └── YoloRT.cpp
    └── utils.cpp
└── tests
    ├── backbones
        ├── dla34
        │   ├── dla34.cpp
        │   ├── dla34_weightsexporter.py
        │   └── env_dla34.yml
        └── resnet101
        │   ├── env_resnet101.yml
        │   ├── resnet101.cpp
        │   └── resnet101_weightsexporter.py
    ├── centernet
        ├── dla34_cnet
        │   └── dla34_cnet.cpp
        ├── dla34_cnet3d
        │   └── dla34_cnet3d.cpp
        └── resnet101_cnet
        │   └── resnet101_cnet.cpp
    ├── centertrack
        └── dla34_ctrack
        │   └── dla34_ctrack.cpp
    ├── darknet
        ├── cfg
        │   ├── csresnext50-panet-spp.cfg
        │   ├── csresnext50-panet-spp_berkeley.cfg
        │   ├── yolo2.cfg
        │   ├── yolo2_voc.cfg
        │   ├── yolo2tiny.cfg
        │   ├── yolo3.cfg
        │   ├── yolo3_512.cfg
        │   ├── yolo3_berkeley.cfg
        │   ├── yolo3_coco4.cfg
        │   ├── yolo3_flir.cfg
        │   ├── yolo3tiny.cfg
        │   ├── yolo3tiny_512.cfg
        │   ├── yolo4-csp.cfg
        │   ├── yolo4-csp_crowd.cfg
        │   ├── yolo4.cfg
        │   ├── yolo4_320.cfg
        │   ├── yolo4_320_coco2.cfg
        │   ├── yolo4_512.cfg
        │   ├── yolo4_608.cfg
        │   ├── yolo4_berkeley.cfg
        │   ├── yolo4_mmr.cfg
        │   ├── yolo4tiny.cfg
        │   ├── yolo4tiny_512.cfg
        │   └── yolo4x.cfg
        ├── csresnext50-panet-spp.cpp
        ├── csresnext50-panet-spp_berkeley.cpp
        ├── names
        │   ├── berkeley.names
        │   ├── coco.names
        │   ├── coco2.names
        │   ├── coco4.names
        │   ├── crowdhuman.names
        │   ├── flir.names
        │   ├── mmr.names
        │   └── voc.names
        ├── viz_yolo3.cpp
        ├── yolo2.cpp
        ├── yolo2_voc.cpp
        ├── yolo2tiny.cpp
        ├── yolo3.cpp
        ├── yolo3_512.cpp
        ├── yolo3_berkeley.cpp
        ├── yolo3_coco4.cpp
        ├── yolo3_flir.cpp
        ├── yolo3tiny.cpp
        ├── yolo3tiny_512.cpp
        ├── yolo4-csp.cpp
        ├── yolo4-csp_crowd.cpp
        ├── yolo4.cpp
        ├── yolo4_320.cpp
        ├── yolo4_320_coco2.cpp
        ├── yolo4_512.cpp
        ├── yolo4_608.cpp
        ├── yolo4_berkeley.cpp
        ├── yolo4_berkeley_f1.cpp
        ├── yolo4_mmr.cpp
        ├── yolo4tiny.cpp
        ├── yolo4tiny_512.cpp
        └── yolo4x.cpp
    ├── exporters
        ├── caffe_weights_exporter.py
        └── keras_weights_exporter.py
    ├── imuodom
        ├── imuodom.cpp
        └── infer.py
    ├── mnist
        ├── mnist_model.py
        ├── test_mnist.cpp
        └── test_mnistRT.cpp
    ├── mobilenet
        ├── bdd-mobilenetv2ssd
        │   └── bdd-mobilenetv2ssd.cpp
        ├── mobilenetv2ssd
        │   └── mobilenetv2ssd.cpp
        └── mobilenetv2ssd512
        │   └── mobilenetv2ssd512.cpp
    ├── monodepth2
        ├── monodepth2_1024.cpp
        └── monodepth2_640.cpp
    ├── shelfnet
        ├── shelfnet.cpp
        ├── shelfnet_berkeley.cpp
        ├── shelfnet_coco.cpp
        └── shelfnet_mapillary.cpp
    ├── simple
        ├── test_model.py
        └── test_simple.cpp
    └── test_rtinference
        └── rtinference.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | build/
 3 | .vscode/
 4 | *.bin
 5 | *.pyc
 6 | *.prototxt
 7 | *.caffemodel
 8 | *.h5
 9 | *.tar.gz
10 | *.weights
11 | .idea/
12 | *.hdf5
13 | *.pk
14 | *.table
15 | cmake-build-release/
16 | demo/COCO_val2017
17 | demo/BDD100K_val
18 | /.vs
19 | cmake-build-minsizerel/*
20 | scripts/COCO_val2017/*
21 | scripts/COCO_val2017.zip
22 | scripts/all_labels.txt
23 | /cmake/cuda_script
24 | /cmake-build-debug/
25 | 


--------------------------------------------------------------------------------
/cmake/FindCUDNN.cmake:
--------------------------------------------------------------------------------
 1 | # find the library
 2 | if(CUDA_FOUND)
 3 |   find_cuda_helper_libs(cudnn)
 4 |   set(CUDNN_LIBRARY ${CUDA_cudnn_LIBRARY} CACHE FILEPATH "location of the cuDNN library")
 5 |   unset(CUDA_cudnn_LIBRARY CACHE)
 6 | 
 7 |   find_cuda_helper_libs(nvinfer)
 8 |   set(NVINFER_LIBRARY ${CUDA_nvinfer_LIBRARY} CACHE FILEPATH "location of the nvinfer library")
 9 |   unset(CUDA_nvinfer_LIBRARY CACHE)
10 | endif()
11 | 
12 | # find the include
13 | if(CUDNN_LIBRARY)
14 |   find_path(CUDNN_INCLUDE_DIR
15 |     cudnn.h
16 |     PATHS ${CUDA_TOOLKIT_INCLUDE}
17 |     DOC "location of cudnn.h"
18 |     NO_DEFAULT_PATH
19 |   )
20 | 
21 |   if(NOT CUDNN_INCLUDE_DIR)
22 |     find_path(CUDNN_INCLUDE_DIR
23 |       cudnn.h
24 |       DOC "location of cudnn.h"
25 |     )
26 |   endif()
27 | 
28 |   message("-- Found CUDNN: " ${CUDNN_LIBRARY})
29 |   message("-- Found CUDNN include: " ${CUDNN_INCLUDE_DIR})
30 | endif()
31 | 
32 | if(NVINFER_LIBRARY)
33 |   find_path(NVINFER_INCLUDE_DIR
34 |     NvInfer.h
35 |     PATHS ${CUDA_TOOLKIT_INCLUDE}
36 |     DOC "location of NvInfer.h"
37 |     NO_DEFAULT_PATH
38 |   )
39 | 
40 |   if(NOT NVINFER_INCLUDE_DIR)
41 |     find_path(NVINFER_INCLUDE_DIR
42 |         NvInfer.h
43 |         DOC "location of NvInfer.h"
44 |     )
45 |   endif()
46 | 
47 |   message("-- Found NVINFER: " ${NVINFER_LIBRARY})
48 |   message("-- Found NVINFER include: " ${NVINFER_INCLUDE_DIR})
49 | endif()
50 | 
51 | 
52 | include(FindPackageHandleStandardArgs)
53 | find_package_handle_standard_args(CUDNN
54 |   FOUND_VAR CUDNN_FOUND
55 |   REQUIRED_VARS
56 |   CUDNN_LIBRARY
57 |   CUDNN_INCLUDE_DIR
58 |   VERSION_VAR CUDNN_VERSION
59 | )
60 | 
61 | if(CUDNN_FOUND)
62 |   set(CUDNN_LIBRARIES ${CUDNN_LIBRARY} ${NVINFER_LIBRARY})
63 |   set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR} ${NVINFER_INCLUDE_DIR})
64 | endif()
65 | 
66 | set(CUDNN_FOUND true)


--------------------------------------------------------------------------------
/cmake/getCudaArch.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main(int argc, char **argv){
 4 |     cudaDeviceProp dP;
 5 |     float min_cc = 5.0;
 6 | 
 7 |     int rc = cudaGetDeviceProperties(&dP, 0);
 8 |     if(rc != cudaSuccess) {
 9 |         cudaError_t error = cudaGetLastError();
10 |         printf("CUDA error: %s", cudaGetErrorString(error));
11 |         return rc; /* Failure */
12 |     }
13 |     if((dP.major+(dP.minor/10)) < min_cc) {
14 |         printf("Min Compute Capability of %2.1f required:  %d.%d found\n Not Building CUDA Code", min_cc, dP.major, dP.minor);
15 |         return 1; /* Failure */
16 |     } else {
17 |         printf("-arch=sm_%d%d", dP.major, dP.minor);
18 |         return 0; /* Success */
19 |     }
20 | }


--------------------------------------------------------------------------------
/cmake/tkDNNConfig.cmake:
--------------------------------------------------------------------------------
 1 | message("-- Found tkDNN")
 2 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
 3 | set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} --std=c++11 -fPIC")
 4 | 
 5 | find_package(CUDA REQUIRED)
 6 | find_package(OpenCV REQUIRED)
 7 | find_package(CUDNN REQUIRED)
 8 | 
 9 | set(tkDNN_INCLUDE_DIRS 
10 | 	${CUDA_INCLUDE_DIRS} 
11 | 	${OPENCV_INCLUDE_DIRS} 
12 |     ${CUDNN_INCLUDE_DIRS}
13 | )
14 | 
15 | set(tkDNN_LIBRARIES 
16 |     tkDNN 
17 |     kernels 
18 |     ${CUDA_LIBRARIES} 
19 |     ${CUDA_CUBLAS_LIBRARIES}
20 | 	${CUDNN_LIBRARIES}
21 | 	${OpenCV_LIBS}
22 | )
23 | 
24 | set(tkDNN_FOUND true)
25 | 


--------------------------------------------------------------------------------
/demo/config.yaml:
--------------------------------------------------------------------------------
1 | classes     : 80    #number of classes
2 | map_points  : 101   #number of recall points (0 for all, 101 for COCO, 11 PascalVOC)
3 | map_levels  : 10    #number of IoU step for the AP
4 | map_step    : 0.05  #step of IoU
5 | IoU_thresh  : 0.5   #starting IoU threshold
6 | conf_thresh : 0.001   #threshold on the condifence of the bbox
7 | verbose     : false #print on screen information
8 | 


--------------------------------------------------------------------------------
/demo/config_tetra.yaml:
--------------------------------------------------------------------------------
1 | classes     : 3    #number of classes
2 | map_points  : 101   #number of recall points (0 for all, 101 for COCO, 11 PascalVOC)
3 | map_levels  : 10    #number of IoU step for the AP
4 | map_step    : 0.05  #step of IoU
5 | IoU_thresh  : 0.5   #starting IoU threshold
6 | conf_thresh : 0.0   #threshold on the condifence of the bbox
7 | verbose     : false #print on screen information
8 | 


--------------------------------------------------------------------------------
/demo/demo/demoDepth.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <signal.h>
  3 | #include <stdlib.h>     /* srand, rand */
  4 | //#include <unistd.h>
  5 | #include <mutex>
  6 | 
  7 | #include "tkDNN/DepthNN.h"
  8 | 
  9 | bool gRun;
 10 | 
 11 | void sig_handler(int signo) {
 12 |     std::cout<<"request gateway stop\n";
 13 |     gRun = false;
 14 | }
 15 | 
 16 | int main(int argc, char *argv[]) {
 17 | 
 18 |     signal(SIGINT, sig_handler);
 19 | 
 20 |     std::string net = "monodepth2_fp32.rt";
 21 |     if(argc > 1)
 22 |         net = argv[1]; 
 23 |     #ifdef __linux__ 
 24 |         std::string input = "../demo/yolo_test.mp4";
 25 |     #elif _WIN32
 26 |         std::string input = "..\\..\\..\\demo\\yolo_test.mp4";
 27 |     #endif
 28 |     if(argc > 2)
 29 |         input = argv[2]; 
 30 |     bool show = true;
 31 |     if(argc > 3)
 32 |         show = atoi(argv[3]); 
 33 |     bool save = true;
 34 |     if(argc > 4)
 35 |         save = atoi(argv[4]); 
 36 | 
 37 |     std::cout   <<"Net settings - net: "<< net
 38 |                 <<"\n"; 
 39 |     std::cout   <<"Demo settings - input: "<< input
 40 |                 <<", show: "<< show
 41 |                 <<", save: "<< save<<"\n\n"; 
 42 |     
 43 |     tk::dnn::DepthNN depthNN;  
 44 | 
 45 |     // create depth network 
 46 |     int n_batch = 1;
 47 |     depthNN.init(net, n_batch);
 48 | 
 49 |     // open video stream
 50 |     cv::VideoCapture cap(input);
 51 |     if(!cap.isOpened())
 52 |         gRun = false; 
 53 |     else
 54 |         std::cout<<"camera started\n";
 55 | 
 56 |     cv::VideoWriter resultVideo;
 57 |     if(save) {
 58 |         int w = depthNN.output_w;
 59 |         int h = depthNN.output_h;
 60 |         resultVideo.open("result.mp4", cv::VideoWriter::fourcc('M','P','4','V'), 30, cv::Size(w, h));
 61 |     }
 62 | 
 63 |     if(show)
 64 |         cv::namedWindow("depth", cv::WINDOW_NORMAL);
 65 | 
 66 |     cv::Mat frame;
 67 |     std::vector<cv::Mat> batch_frame;
 68 |     std::vector<cv::Mat> batch_dnn_input;
 69 | 
 70 |     // start detection loop
 71 |     gRun = true;
 72 |     while(gRun) {
 73 |         batch_dnn_input.clear();
 74 |         batch_frame.clear();
 75 | 
 76 |         //read frame
 77 |         cap >> frame; 
 78 |         if(!frame.data) 
 79 |             break;
 80 |         batch_frame.push_back(frame);
 81 |         batch_dnn_input.push_back(frame.clone());
 82 |     
 83 |         //inference
 84 |         depthNN.update(batch_dnn_input, 1);
 85 |         if(show){
 86 |             cv::imshow("depth", depthNN.depthMats[0]);
 87 |             cv::waitKey(1);
 88 | 
 89 |         }
 90 | 
 91 |         if(save)
 92 |             resultVideo << depthNN.depthMats[0];
 93 |     }
 94 | 
 95 |     std::cout<<"detection end\n";   
 96 |     
 97 |     double mean = 0; 
 98 |     std::cout<<COL_GREENB<<"\n\nTime stats depth:\n";
 99 |     std::cout<<"Min: "<<*std::min_element(depthNN.stats.begin(), depthNN.stats.end())<<" ms\n";    
100 |     std::cout<<"Max: "<<*std::max_element(depthNN.stats.begin(), depthNN.stats.end())<<" ms\n";    
101 |     for(int i=0; i<depthNN.stats.size(); i++) mean += depthNN.stats[i]; mean /= depthNN.stats.size();
102 |     std::cout<<"Avg: "<<mean<<" ms\t"<<1000/(mean)<<" FPS\n";   
103 | 
104 |     return 0;
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/demo/demoConfig.yaml:
--------------------------------------------------------------------------------
 1 | # video input
 2 | input     : "../demo/yolo_test.mp4"
 3 | win_input : "..\\..\\..\\demo\\yolo_test.mp4"
 4 | 
 5 | # network config
 6 | net         : "yolo4_berkeley_fp32.rt"
 7 | ntype       : 'y'
 8 | n_classes   : 80
 9 | n_batch     : 1
10 | conf_thresh : 0.3
11 | 
12 | # demo config
13 | show  : true
14 | save  : false
15 | 


--------------------------------------------------------------------------------
/demo/yolo_test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceccocats/tkDNN/c95c2dbfbc12357ad26b041637a295a94d861c2e/demo/yolo_test.mp4


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ceccocats/tkdnn:latest
2 | LABEL maintainer "Francesco Gatti"
3 | 
4 | RUN cd && git clone https://github.com/ceccocats/tkDNN.git && cd tkDNN && mkdir build && cd build \ 
5 |     && cmake .. && make -j12
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Use the prebuilt image
 2 | ```
 3 | # build image
 4 | docker build -t tkdnn:build -f Dockerfile . 
 5 | ```
 6 | 
 7 | # Build Base Docker image
 8 | ```
 9 | # make nvidia docker working
10 | # follow this guide: https://github.com/NVIDIA/nvidia-docker
11 | 
12 | # build image
13 | docker build -t ceccocats/tkdnn:latest -f Dockerfile.base .
14 | 
15 | # run image
16 | ./docker_launch.sh
17 | ```
18 | 
19 | 


--------------------------------------------------------------------------------
/docker/assets/terminator_config:
--------------------------------------------------------------------------------
 1 | [global_config]
 2 |   title_transmit_bg_color = "#2e3436"
 3 | [keybindings]
 4 | [layouts]
 5 |   [[default]]
 6 |     [[[child1]]]
 7 |       parent = window0
 8 |       type = Terminal
 9 |     [[[window0]]]
10 |       parent = ""
11 |       type = Window
12 | [plugins]
13 | [profiles]
14 |   [[default]]
15 |     background_color = "#282828"
16 |     cursor_color = "#aaaaaa"
17 |     foreground_color = "#f3f3f3"
18 |     palette = "#000000:#aa0000:#00aa00:#c4a000:#3465a4:#75507b:#06989a:#d3d7cf:#88807c:#f15d22:#73c48f:#ffce51:#48b9c7:#ad7fa8:#34e2e2:#eeeeec"
19 | 


--------------------------------------------------------------------------------
/docker/docker_launch.sh:
--------------------------------------------------------------------------------
 1 | xhost local:root
 2 | docker run --rm -it --runtime=nvidia --privileged --net=host --cap-add sys_ptrace -d --ipc=host \
 3 | -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY \
 4 | -v $HOME/.Xauthority:/home/$(id -un)/.Xauthority -e XAUTHORITY=/home/$(id -un)/.Xauthority \
 5 | -e DOCKER_USER_NAME=$(id -un) \
 6 | -e DOCKER_USER_ID=$(id -u) \
 7 | -e DOCKER_USER_GROUP_NAME=$(id -gn) \
 8 | -e DOCKER_USER_GROUP_ID=$(id -g) \
 9 | -v $HOME/.ssh:/home/$(id -un)/.ssh  ceccocats/tkdnn
10 | 


--------------------------------------------------------------------------------
/docs/README_depth.md:
--------------------------------------------------------------------------------
 1 | # Monocular depth estimation with tkDNN
 2 | 
 3 | Currently tkDNN supports only Monodepth2 as monocular depth esitmation network.
 4 | 
 5 | 
 6 | ## Run the demo
 7 | 
 8 | To run the depth estimation demo follow these steps (example with monodepth2):
 9 | ```
10 | rm monodepth2_fp32.rt        # be sure to delete(or move) old tensorRT files
11 | ./test_monodepth2            # run the yolo test (is slow)
12 | ./demoDepth monodepth2_fp32.rt ../demo/yolo_test.mp4 
13 | ```
14 | In general the demo program takes the following parameters:
15 | ```
16 | ./demoDepth <network-rt-file> <path-to-video> <show-flag> <save-flag>
17 | ```
18 | where
19 | *  ```<network-rt-file>``` is the rt file generated by a test
20 | *  ```<<path-to-video>``` is the path to a video file or a camera input  
21 | *  ```<show-flag>``` if set to 0 the demo will not show the visualization, it will otherwise (default=1)
22 | *  ```<save-flag>``` if set to 1 the demo will save the video into result.mp4, it won't otherwise (default=1)
23 | 
24 | NB) By default it is used FP32 inference
25 | 
26 | 
27 | ![demo](https://user-images.githubusercontent.com/11939259/160845358-0d6ab15d-c5f4-46ae-b9da-bfaf3903389d.gif "Results on yolo_test.mp4")  
28 | 
29 | 
30 | <!-- ## FPS Results
31 | 
32 | Inference FPS of shelfnet with tkDNN, average of 1200 images on:
33 |   * RTX 2080Ti (CUDA 10.2, TensorRT 7.0.0, Cudnn 7.6.5);
34 |   * Xavier AGX, Jetpack 4.3 (CUDA 10.0, CUDNN 7.6.3, tensorrt 6.0.1 );
35 | 
36 | | Platform   | Test                     | Phase   | FP32, ms  | FP32, FPS | FP16, ms  |	FP16, FPS  | INT8, ms |	INT8, FPS | 
37 | | :------:   | :-----:                  | :-----: | :-----:   | :-----:   | :-----:   |	:-----:    | :-----:  |	:-----:   | 
38 | | RTX 2080Ti | shelfnet 1024x1024 (B=1) | pre     | 6.11863   |  163.435  |   5.81465 |  171.979   |  5.88699 |   169.866 |
39 | | RTX 2080Ti | shelfnet 1024x1024 (B=1) | inf     | 11.5464   |  86.6074  |   7.35396 |  135.981   |  6.37623 |   156.832 |
40 | | RTX 2080Ti | shelfnet 1024x1024 (B=1) | post    | 4.09058   |  244.464  |   3.91961 |  255.128   |  4.07343 |   245.493 |
41 | | RTX 2080Ti | shelfnet 1024x1024 (B=1) | tot     | 21.7556   |  45.9652  |   17.0882 |  58.5199   |  16.3366 |   61.2121 |
42 | | RTX 2080Ti | shelfnet 2048x2048 (B=4) | pre     | 25.435    |  39.3158  |   25.2953 |  39.5331   |  25.9303 |   38.565  | 
43 | | RTX 2080Ti | shelfnet 2048x2048 (B=4) | inf     | 36.5015   |  27.3961  |   17.0534 |  58.6395   |  15.6061 |   64.0773 |  
44 | | RTX 2080Ti | shelfnet 2048x2048 (B=4) | post    | 17.3917   |  57.4985  |   17.1649 |  58.2583   |  17.5539 |   56.9675 |  
45 | | RTX 2080Ti | shelfnet 2048x2048 (B=4) | tot     | 79.3283   |  12.6058  |   59.5136 |  16.8029   |  59.0903 |   16.9233 |  
46 | | AGX Xavier | shelfnet 1024x1024 (B=1) | pre     | 8.0174    |  124.729  |   7.5117  |  133.126   |  7.47333 |   133.809 |
47 | | AGX Xavier | shelfnet 1024x1024 (B=1) | inf     | 72.4173   |  13.8089  |   37.505  |  26.6631   |  31.3286 |   31.9197 |
48 | | AGX Xavier | shelfnet 1024x1024 (B=1) | post    | 8.89958   |  112.365  |   8.83576 |  113.176   |  9.42655 |   106.083 |
49 | | AGX Xavier | shelfnet 1024x1024 (B=1) | tot     | 89.3342   |  11.1939  |   53.8525 |  18.5692   |  48.2285 |   20.7346 |
50 | | AGX Xavier | shelfnet 2048x2048 (B=4) | pre     | 47.1454   |  21.211   |   21.6475 |  46.1947   |  21.4201 |   46.6851 | 
51 | | AGX Xavier | shelfnet 2048x2048 (B=4) | inf     | 266.537   |  3.75183  |   128.321 |  7.79293   |  107.621 |   9.29185 |  
52 | | AGX Xavier | shelfnet 2048x2048 (B=4) | post    | 44.0711   |  22.6906  |   40.1732 |  24.8922   |  39.873  |   25.0796 |  
53 | | AGX Xavier | shelfnet 2048x2048 (B=4) | tot     | 357.753   |  2.79522  |   190.142 |  5.25922   |  168.914 |   5.92016 |   -->
54 | 
55 | 


--------------------------------------------------------------------------------
/docs/mAP_demo.md:
--------------------------------------------------------------------------------
 1 | # Run the mAP demo
 2 | 
 3 | To compute mAP, precision, recall and f1score to evaluate 2D object detectors, run the map_demo.
 4 | 
 5 | A validation set is needed. 
 6 | To download COCO_val2017 (80 classes) run (form the root folder): 
 7 | ```
 8 | bash scripts/download_validation.sh COCO
 9 | ```
10 | To download Berkeley_val (10 classes) run (form the root folder): 
11 | ```
12 | bash scripts/download_validation.sh BDD
13 | ```
14 | 
15 | To compute the map, the following parameters are needed:
16 | ```
17 | ./map_demo <network rt> <network type [y|c|m]> <labels file path> <config file path>
18 | ```
19 | where 
20 | * ```<network rt>```: rt file of a chosen network on which compute the mAP.
21 | * ```<network type [y|c|m]>```: type of network. Right now only y(yolo), c(centernet) and m(mobilenet) are allowed
22 | * ```<labels file path>```: path to a text file containing all the paths of the ground-truth labels. It is important that all the labels of the ground-truth are in a folder called 'labels'. In the folder containing the folder 'labels' there should be also a folder 'images', containing all the ground-truth images having the same same as the labels. To better understand, if there is a label path/to/labels/000001.txt there should be a corresponding image path/to/images/000001.jpg. 
23 | * ```<config file path>```: path to a yaml file with the parameters needed for the mAP computation, similar to demo/config.yaml
24 | 
25 | Example:
26 | 
27 | ```
28 | cd build
29 | ./map_demo dla34_cnet_FP32.rt c ../demo/COCO_val2017/all_labels.txt ../demo/config.yaml
30 | ```
31 | 
32 | This demo also creates a json file named ```net_name_COCO_res.json``` containing all the detections computed. The detections are in COCO format, the correct format to submit the results to [CodaLab COCO detection challenge](https://competitions.codalab.org/competitions/20794#participate).


--------------------------------------------------------------------------------
/include/tkDNN/BoundingBox.h:
--------------------------------------------------------------------------------
 1 | #ifndef BOUNDINGBOX_H
 2 | #define BOUNDINGBOX_H
 3 | 
 4 | #include <iostream>
 5 | #include "tkdnn.h"
 6 | 
 7 | namespace tk { namespace dnn {
 8 | class BoundingBox : public tk::dnn::box
 9 | {
10 |     float overlap(const float p1, const float l1, const float p2, const float l22);
11 |     float boxesIntersection(const BoundingBox &b);
12 |     float boxesUnion(const BoundingBox &b);
13 | 
14 |     public:
15 |     
16 |     int uniqueTruthIndex = -1;
17 |     int truthFlag = 0;
18 |     float maxIoU = 0;
19 | 
20 |     float IoU(const BoundingBox &b);
21 |     void clear();
22 | 
23 |     friend std::ostream& operator<<(std::ostream& os, const BoundingBox& bb);
24 | };
25 | 
26 | std::ostream& operator<<(std::ostream& os, const BoundingBox& bb);
27 | bool boxComparison (const BoundingBox& a,const BoundingBox& b) ;
28 | 
29 | }}
30 | #endif /*BOUNDINGBOX_H*/
31 | 
32 | 


--------------------------------------------------------------------------------
/include/tkDNN/CenternetDetection.h:
--------------------------------------------------------------------------------
 1 | #ifndef CENTERNETDETECTION_H
 2 | #define CENTERNETDETECTION_H
 3 | 
 4 | #include "kernels.h"
 5 | #include <opencv2/videoio.hpp>
 6 | #include "opencv2/opencv.hpp"
 7 | #include <time.h>
 8 | #include <vector>
 9 | #include <numeric>      // std::iota
10 | #include <algorithm>    // std::sort
11 | 
12 | #include "DetectionNN.h"
13 | 
14 | #include "kernelsThrust.h"
15 | 
16 | 
17 | namespace tk { namespace dnn { 
18 | 
19 | class CenternetDetection : public DetectionNN
20 | {
21 | private:
22 |     tk::dnn::dataDim_t dim;
23 |     tk::dnn::dataDim_t dim2;
24 |     tk::dnn::dataDim_t dim_hm; 
25 |     tk::dnn::dataDim_t dim_wh; 
26 |     tk::dnn::dataDim_t dim_reg;
27 |     float *topk_scores;
28 |     int *topk_inds_;
29 |     float *topk_ys_;
30 |     float *topk_xs_;
31 |     int *ids_d, *ids_, *ids_2, *ids_2d;
32 | 
33 |     float *scores, *scores_d;
34 |     int *clses, *clses_d; 
35 |     int *topk_inds_d;
36 |     float *topk_ys_d; 
37 |     float *topk_xs_d;
38 |     int *inttopk_xs_d, *inttopk_ys_d;
39 | 
40 | 
41 |     float *bbx0, *bby0, *bbx1, *bby1;
42 |     float *bbx0_d, *bby0_d, *bbx1_d, *bby1_d;
43 |     
44 |     float *target_coords;
45 | 
46 |     #ifdef OPENCV_CUDACONTRIB
47 |         float *mean_d;
48 |         float *stddev_d;
49 |     #else
50 |         cv::Vec<float, 3> mean;
51 |         cv::Vec<float, 3> stddev;
52 |         dnnType *input;
53 |     #endif
54 | 
55 |     float *d_ptrs;
56 | 
57 |     cv::Mat src;
58 |     cv::Mat dst;
59 |     cv::Mat dst2;  
60 |     cv::Mat trans, trans2;
61 |     //processing
62 |     float toll = 0.000001;
63 |     int K = 100;
64 |     int width = 128;//56;        // TODO
65 | 
66 |     // pointer used in the kernels
67 |     float *src_out;
68 |     int *ids_out;
69 | 
70 |     struct threshold op;
71 |     
72 | public:
73 |     CenternetDetection() {};
74 |     ~CenternetDetection() {}; 
75 | 
76 |     bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1, const float conf_thresh=0.3);
77 |     void preprocess(cv::Mat &frame, const int bi=0);
78 |     void postprocess(const int bi=0,const bool mAP=false);
79 | };
80 | 
81 | 
82 | } // namespace dnn
83 | } // namespace tk
84 | 
85 | 
86 | #endif /*CENTERNETDETECTION_H*/


--------------------------------------------------------------------------------
/include/tkDNN/CenternetDetection3D.h:
--------------------------------------------------------------------------------
  1 | #ifndef CENTERNETDETECTION3D_H
  2 | #define CENTERNETDETECTION3D_H
  3 | 
  4 | #include "kernels.h"
  5 | #include <opencv2/videoio.hpp>
  6 | #include "opencv2/opencv.hpp"
  7 | #include <time.h>
  8 | #include <vector>
  9 | #include <numeric>      // std::iota
 10 | #include <algorithm>    // std::sort
 11 | 
 12 | #ifdef _WIN32
 13 | #define _USE_MATH_DEFINES
 14 | #include <math.h>
 15 | #endif 
 16 | 
 17 | #include "DetectionNN3D.h"
 18 | 
 19 | #include "kernelsThrust.h"
 20 | 
 21 | 
 22 | namespace tk { namespace dnn { 
 23 | 
 24 | class CenternetDetection3D : public DetectionNN3D
 25 | {
 26 | private:
 27 |     tk::dnn::dataDim_t dim;
 28 |     tk::dnn::dataDim_t dim2;
 29 |     tk::dnn::dataDim_t dim_hm; 
 30 |     tk::dnn::dataDim_t dim_wh; 
 31 |     tk::dnn::dataDim_t dim_reg;
 32 |     tk::dnn::dataDim_t dim_dep;
 33 |     tk::dnn::dataDim_t dim_rot;
 34 |     tk::dnn::dataDim_t dim_dim;
 35 | 
 36 |     std::vector<cv::Mat> inputCalibs;
 37 |     float *topk_scores;
 38 |     int *topk_inds_;
 39 |     float *topk_ys_;
 40 |     float *topk_xs_;
 41 |     int *ids_d, *ids_;
 42 | 
 43 |     float *ones;
 44 | 
 45 |     float *scores, *scores_d;
 46 |     int *clses, *clses_d; 
 47 |     int *topk_inds_d;
 48 |     float *topk_ys_d; 
 49 |     float *topk_xs_d;
 50 |     int *inttopk_xs_d, *inttopk_ys_d;
 51 | 
 52 |     float *xs, *ys;
 53 | 
 54 |     float *dep, *rot, *dim_, *wh;
 55 |     float *dep_d, *rot_d, *dim_d, *wh_d;
 56 |     
 57 |     float *target_coords;
 58 | 
 59 |     #ifdef OPENCV_CUDACONTRIB
 60 |         float *mean_d;
 61 |         float *stddev_d;
 62 |     #else
 63 |         cv::Vec<float, 3> mean;
 64 |         cv::Vec<float, 3> stddev;
 65 |         dnnType *input;
 66 |     #endif
 67 |     cv::Mat r;
 68 |     float *d_ptrs;
 69 |     
 70 |     cv::Size sz_old;
 71 | 
 72 |     cv::Mat src;
 73 |     cv::Mat dst;
 74 |     cv::Mat dst2;  
 75 |     cv::Mat trans, trans2;
 76 |     std::vector<cv::Mat> calibs;
 77 | 
 78 |     //processing
 79 |     int K = 100;
 80 |     int width = 128;//56;        // TODO
 81 | 
 82 |     // pointer used in the kernels
 83 |     float *srcOut;
 84 |     int *idsOut;
 85 | 
 86 |     struct threshold op;
 87 |     cv::Mat corners, pts3DHomo;  
 88 | 
 89 |     std::vector<std::vector<int>> faceId;
 90 | 
 91 | public:
 92 |     CenternetDetection3D() {};
 93 |     ~CenternetDetection3D() {}; 
 94 | 
 95 |     bool init(const std::string& tensor_path, const int n_classes=3, const int n_batches=1, const float conf_thresh=0.3, const std::vector<cv::Mat>& k_calibs=std::vector<cv::Mat>());
 96 |     void preprocess(cv::Mat &frame, const int bi=0);
 97 |     void postprocess(const int bi=0,const bool mAP=false);
 98 |     void draw(std::vector<cv::Mat>& frames);
 99 | };
100 | 
101 | 
102 | } // namespace dnn
103 | } // namespace tk
104 | 
105 | 
106 | #endif /*CENTERNETDETECTION_H*/


--------------------------------------------------------------------------------
/include/tkDNN/DarknetParser.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include "tkDNN/tkdnn.h"
 4 | 
 5 | namespace tk { namespace dnn {
 6 | 
 7 |     struct darknetFields_t{
 8 |         std::string type = "";
 9 |         int width = 0;
10 |         int height = 0;
11 |         int channels = 3;
12 |         int batch_normalize=0;
13 |         int groups = 1;
14 |         int group_id = 0;
15 |         int filters=1;
16 |         int size_x=1;
17 |         int size_y=1;
18 |         int stride_x=1;
19 |         int stride_y=1;
20 |         int padding_x = 0;
21 |         int padding_y = 0;
22 |         int n_mask = 0;
23 |         int classes = 20;
24 |         int num = 1;
25 |         int pad = 0;
26 |         int coords = 4;
27 |         int nms_kind = 0;
28 |         int new_coords= 0;
29 |         float scale_xy = 1;
30 |         float nms_thresh = 0.45;
31 |         std::vector<int> layers;
32 |         std::string activation = "linear";
33 | 
34 |         friend std::ostream& operator<<(std::ostream& os, const darknetFields_t& f){
35 |             os << f.width << " " << f.height << " " << f.channels << " " << f.batch_normalize<< " " << f.filters << " "  << f.activation<< " " << f.scale_xy;
36 |             return os;
37 |         }
38 |     };
39 | 
40 |     std::string darknetParseType(const std::string& line);
41 |     bool divideNameAndValue(const std::string& line, std::string&name, std::string& value);
42 |     std::vector<int> fromStringToIntVec(const std::string& line, const char delimiter);
43 |     
44 |     bool darknetParseFields(const std::string& line, darknetFields_t& fields);
45 |     tk::dnn::Network *darknetAddNet(darknetFields_t &fields);
46 |     void darknetAddLayer(tk::dnn::Network *net, darknetFields_t &f, std::string wgs_path, 
47 |                          std::vector<tk::dnn::Layer*> &netLayers, const std::vector<std::string>& names);
48 |     std::vector<std::string> darknetReadNames(const std::string& names_file);
49 |     tk::dnn::Network* darknetParser(const std::string& cfg_file, const std::string& wgs_path, const std::string& names_file);
50 |     void loadYoloInfo(const std::string &cfg_file,int lineNo,std::vector<float> &mask,std::vector<float> &anchors,int &num,int &classes,float &nms_thresh,int &nms_kind,int &coords);
51 |     void loadYoloInitInfo(int &channels,int &width,int &height,const std::string &cfg_file);
52 |     std::vector<int> noYolosLine(const std::string &cfg_file);
53 | 
54 | }}
55 | 


--------------------------------------------------------------------------------
/include/tkDNN/Int8BatchStream.h:
--------------------------------------------------------------------------------
 1 | #ifndef INT8BATCHSTREAM_H
 2 | #define INT8BATCHSTREAM_H
 3 | 
 4 | #include <vector>
 5 | #include <assert.h>
 6 | #include <algorithm>
 7 | #include <iterator>
 8 | #include <stdint.h>
 9 | #include <iostream>
10 | #include <string>
11 | #include <fstream>
12 | #include <iomanip>
13 | #include <signal.h>
14 | #include <stdlib.h>
15 | #ifdef __linux__    
16 | #include <unistd.h>
17 | #endif
18 | 
19 | #include <mutex>
20 | 
21 | #include "NvInfer.h"
22 | #include "utils.h"
23 | #include "tkdnn.h"
24 | 
25 | /*
26 |  * BatchStream implements the stream for the INT8 calibrator. 
27 |  * It reads the two files .txt with the list of image file names 
28 |  * and the list of label file names. 
29 |  * It then iterates on images and labels.
30 |  */
31 | class BatchStream {
32 | public:
33 | 	BatchStream(tk::dnn::dataDim_t dim, int batchSize, int maxBatches, const std::string& fileimglist, const std::string& filelabellist);
34 | 	virtual ~BatchStream() { }
35 | 	void reset(int firstBatch);
36 | 	bool next();
37 | 	void skip(int skipCount);
38 | 	float *getBatch() { return mBatch.data(); }
39 | 	float *getLabels() { return mLabels.data(); }
40 | 	int getBatchesRead() const { return mBatchCount; }
41 | 	int getBatchSize() const { return mBatchSize; }
42 | 	nvinfer1::Dims4 getDims() const { return mDims; }
43 | 	float* getFileBatch() { return &mFileBatch[0]; }
44 | 	float* getFileLabels() { return &mFileLabels[0]; }
45 | 	void readInListFile(const std::string& dataFilePath, std::vector<std::string>& mListIn);
46 | 	void readCVimage(std::string inputFileName, std::vector<float>& res, bool fixshape = true);
47 | 	void readLabels(std::string inputFileName ,std::vector<float>& ris);
48 | 	bool update();
49 | 
50 | private:
51 | 	int mBatchSize{ 0 };
52 | 	int mMaxBatches{ 0 };
53 | 	int mBatchCount{ 0 };
54 | 	int mFileCount{ 0 };
55 | 	int mFileBatchPos{ 0 };
56 | 	int mImageSize{ 0 };
57 | 
58 | 	nvinfer1::Dims4 mDims;
59 | 	std::vector<float> mBatch;
60 | 	std::vector<float> mLabels;
61 | 	std::vector<float> mFileBatch;
62 | 	std::vector<float> mFileLabels;
63 | 
64 | 	int mHeight;
65 | 	int mWidth;
66 | 	std::string mFileImgList;
67 | 	std::vector<std::string> mListImg;
68 | 	std::string mFileLabelList;
69 | 	std::vector<std::string> mListLabel;
70 | };
71 | 
72 | #endif //INT8BATCHSTREAM


--------------------------------------------------------------------------------
/include/tkDNN/Int8Calibrator.h:
--------------------------------------------------------------------------------
 1 | #ifndef INT8CALIBRATOR_H
 2 | #define INT8CALIBRATOR_H
 3 | 
 4 | #include <vector>
 5 | #include <assert.h>
 6 | #include <algorithm>
 7 | #include <iterator>
 8 | #include <stdint.h>
 9 | #include <iostream>
10 | #include <string>
11 | #include "NvInfer.h"
12 | 
13 | #include <fstream>
14 | #include <iomanip>
15 | 
16 | #include "Int8BatchStream.h"
17 | 
18 | #include "tkdnn.h"
19 | #include "utils.h"
20 | 
21 | /*
22 |  * Int8EntropyCalibrator implements the INT8 calibrator to achieve the
23 |  * INT8 quantization. It uses a BatchStream stream to scroll through 
24 |  * images data. It also implements the calibration cache, a way to 
25 |  * save the calibration process results to reduce the running time: 
26 |  * the calibration process takes a long time.
27 |  */
28 | class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator {
29 | public:
30 | 	Int8EntropyCalibrator(BatchStream& stream, int firstBatch, const std::string& calibTableFilePath, 
31 | 							const std::string& inputBlobName, bool readCache = true);
32 | 	virtual ~Int8EntropyCalibrator() { checkCuda(cudaFree(mDeviceInput)); }
33 | 	int getBatchSize() const NOEXCEPT override { return mStream.getBatchSize(); }
34 | 	bool getBatch(void* bindings[], const char* names[], int nbBindings) NOEXCEPT override;
35 | 	const void* readCalibrationCache(size_t& length) NOEXCEPT override;
36 | 	void writeCalibrationCache(const void* cache, size_t length) NOEXCEPT override;
37 | 
38 | private:
39 | 	BatchStream mStream;
40 | 	const std::string mCalibTableFilePath{ nullptr };
41 | 	const std::string mInputBlobName;
42 | 	bool mReadCache{ true };
43 | 
44 | 	size_t mInputCount;
45 | 	void* mDeviceInput{ nullptr };
46 | 	std::vector<char> mCalibrationCache;
47 | };
48 | 
49 | #endif //INT8CALIBRATOR_H


--------------------------------------------------------------------------------
/include/tkDNN/MobilenetDetection.h:
--------------------------------------------------------------------------------
 1 | #ifndef MOBILENETDETECTION_H
 2 | #define MOBILENETDETECTION_H
 3 | 
 4 | #include <opencv2/videoio.hpp>
 5 | #include "opencv2/opencv.hpp"
 6 | 
 7 | #include "DetectionNN.h"
 8 | 
 9 | #define N_COORDS 4
10 | #define N_SSDSPEC 6
11 | 
12 | namespace tk { namespace dnn { 
13 | 
14 | struct SSDSpec
15 | {
16 |     int featureSize = 0;
17 |     int shrinkage = 0;
18 |     int boxWidth = 0;
19 |     int boxHeight = 0;
20 |     int ratio1 = 0;
21 |     int ratio2 = 0;
22 | 
23 |     SSDSpec() {}
24 |     SSDSpec(int feature_size, int shrinkage, int box_width, int box_height, int ratio1, int ratio2) : 
25 |                     featureSize(feature_size), shrinkage(shrinkage), boxWidth(box_width), 
26 |                     boxHeight(box_height), ratio1(ratio1), ratio2(ratio2) {}
27 |     void setAll(int feature_size, int shrinkage, int box_width, int box_height, int ratio1, int ratio2)
28 |     {
29 |         this->featureSize = feature_size;
30 |         this->shrinkage = shrinkage;
31 |         this->boxWidth = box_width;
32 |         this->boxHeight = box_height;
33 |         this->ratio1 = ratio1;
34 |         this->ratio2 = ratio2;
35 |     }
36 |     void print()
37 |     {
38 |         std::cout << "fsize: " << featureSize << "\tshrinkage: " << shrinkage << 
39 |                     "\t box W:" << boxWidth << "\tbox H: " << boxHeight << 
40 |                     "\t x ratio:" << ratio1 << "\t y ratio:" << ratio2 << std::endl;
41 |     }
42 | };
43 | 
44 | class MobilenetDetection : public DetectionNN
45 | {
46 | private:
47 |     float IoUThreshold = 0.45;
48 |     float centerVariance = 0.1;
49 |     float sizeVariance = 0.2;
50 |     int imageSize;
51 | 
52 |     float *priors = nullptr;
53 |     int nPriors = 0;
54 |     float *locations_h, *confidences_h;
55 | 
56 |     
57 | 
58 |     void generate_ssd_priors(const SSDSpec *specs, const int n_specs, bool clamp = true);
59 |     void convert_locatios_to_boxes_and_center();
60 |     float iou(const tk::dnn::box &a, const tk::dnn::box &b);
61 | 
62 |     
63 | 
64 | public:
65 |     MobilenetDetection() {};
66 |     ~MobilenetDetection() {}; 
67 | 
68 |     bool init(const std::string& tensor_path,const int n_classes, const int n_batches=1, const float conf_thresh=0.3);
69 |     void preprocess(cv::Mat &frame, const int bi=0);
70 |     void postprocess(const int bi=0,const bool mAP=false);
71 | };
72 | 
73 | 
74 | } // namespace dnn
75 | } // namespace tk
76 | 
77 | #endif /*MOBILENETDETECTION_H*/


--------------------------------------------------------------------------------
/include/tkDNN/Network.h:
--------------------------------------------------------------------------------
 1 | #ifndef NETWORK_H
 2 | #define NETWORK_H
 3 | 
 4 | #include <string>
 5 | #include "utils.h"
 6 | 
 7 | namespace tk { namespace dnn {
 8 | 
 9 | /**
10 |     Data representation between layers
11 |     n = batch size
12 |     c = channels
13 |     h = height (lines)
14 |     w = width  (rows)
15 |     l = length (3rd dimension)
16 | */
17 | struct dataDim_t {
18 | 
19 |     int n, c, h, w, l;
20 | 
21 |     dataDim_t() : n(1), c(1), h(1), w(1), l(1) {};
22 | 
23 |     dataDim_t(int _n, int _c, int _h, int _w, int _l = 1) :
24 |         n(_n), c(_c), h(_h), w(_w), l(_l) {};
25 | 
26 |     void print() {
27 |         std::cout<<"Data dim: "<<n<<" "<<c<<" "<<h<<" "<<w<<" "<<l<<"\n";
28 |     }
29 | 
30 |     int tot() {
31 |         return n*c*h*w*l;
32 |     }
33 | };
34 | 
35 | class Layer;
36 | const int MAX_LAYERS = 512;
37 | 
38 | class Network {
39 | 
40 | public:
41 |     Network(dataDim_t input_dim);
42 |     virtual ~Network();
43 |     void releaseLayers();
44 | 
45 |     /**
46 |         Do inference for every added layer
47 |     */
48 |     dnnType* infer(dataDim_t &dim, dnnType* data);
49 | 
50 |     bool addLayer(Layer *l);
51 |     void print();
52 |     const char *getNetworkRTName(const char *network_name);
53 |     void adjustFeatureMapSizeWithShortcuts();
54 | 
55 |     cudnnDataType_t dataType;
56 |     cudnnTensorFormat_t tensorFormat;
57 |     cudnnHandle_t cudnnHandle;
58 |     cublasHandle_t cublasHandle;
59 | 
60 |     Layer* layers[MAX_LAYERS];  //contains layers of the net
61 |     int num_layers; //current number of layers
62 | 
63 |     dataDim_t input_dim;
64 |     dataDim_t getOutputDim();
65 | 
66 |     bool fp16, dla, int8;
67 |     int maxBatchSize;
68 |     bool dontLoadWeights;
69 |     std::string fileImgList;
70 |     std::string fileLabelList;
71 |     std::string networkName;
72 |     std::string networkNameRT;
73 |     
74 | };
75 | 
76 | }}
77 | #endif //NETWORK_H
78 | 


--------------------------------------------------------------------------------
/include/tkDNN/NetworkViz.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include <opencv2/core/types.hpp>
 4 | #include "tkdnn.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | cv::Mat vizFloat2colorMap(cv::Mat map, double min=0, double max=0, int classes=19);
 9 | cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int img_w, double min=0, double max=0, int classes=0);
10 | cv::Mat vizLayer2Mat(tk::dnn::Network *net, int layer, int imgdim = 1000);
11 |     
12 | }}
13 | 


--------------------------------------------------------------------------------
/include/tkDNN/Yolo3Detection.h:
--------------------------------------------------------------------------------
 1 | #ifndef Yolo3Detection_H
 2 | #define Yolo3Detection_H
 3 | #include <opencv2/videoio.hpp>
 4 | #include "opencv2/opencv.hpp"
 5 | 
 6 | #include "DetectionNN.h"
 7 | #include "DarknetParser.h"
 8 | 
 9 | namespace tk { namespace dnn {
10 | class Yolo3Detection : public DetectionNN
11 | {
12 | private:
13 |     int num = 0;
14 |     int nMasks = 0;
15 |     int nDets = 0;
16 |     tk::dnn::Yolo::detection *dets = nullptr;
17 |     tk::dnn::Yolo* yolo[3];
18 | 
19 |     tk::dnn::Yolo* getYoloLayer(int n=0);
20 | 
21 |     cv::Mat bgr_h;
22 |     
23 | public:
24 |     Yolo3Detection() {};
25 |     ~Yolo3Detection() {}; 
26 | 
27 |     bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1, const float conf_thresh=0.3);
28 |     void preprocess(cv::Mat &frame, const int bi=0);
29 |     void postprocess(const int bi=0,const bool mAP=false);
30 | };
31 | 
32 | 
33 | } // namespace dnn
34 | } // namespace tk
35 | 
36 | #endif /* Yolo3Detection_H*/
37 | 


--------------------------------------------------------------------------------
/include/tkDNN/demo_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEMO_UTILS_H
 2 | #define DEMO_UTILS_H
 3 | 
 4 | #include <iostream>
 5 | #include <sstream>
 6 | #include <fstream>
 7 | #include <iomanip>
 8 | #include <stdlib.h>
 9 | 
10 | #ifdef __linux__
11 | #include <unistd.h>
12 | #endif
13 | 
14 | #include <opencv2/core/core.hpp>
15 | #include <opencv2/highgui/highgui.hpp>
16 | #include <opencv2/imgproc/imgproc.hpp>
17 | 
18 | #include <yaml-cpp/yaml.h>
19 | 
20 | 
21 | void readCalibrationMatrix(const std::string& path, cv::Mat& calib_mat);
22 | 
23 | #endif //DEMO_UTILS_H


--------------------------------------------------------------------------------
/include/tkDNN/kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef KERNELS_H
 2 | #define KERNELS_H
 3 | 
 4 | #include "utils.h"
 5 | 
 6 | void activationELUForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0));
 7 | void activationLEAKYForward(dnnType *srcData, dnnType *dstData, int size, float slope, cudaStream_t stream = cudaStream_t(0));
 8 | void activationReLUCeilingForward(dnnType *srcData, dnnType *dstData, int size, const float ceiling, cudaStream_t stream = cudaStream_t(0));
 9 | void activationLOGISTICForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0));
10 | void activationSIGMOIDForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0));
11 | void activationMishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream= cudaStream_t(0));
12 | 
13 | void fill(dnnType *data, int size, dnnType val, cudaStream_t stream = cudaStream_t(0));
14 | 
15 | void resizeForward(dnnType *srcData, dnnType *dstData, int n, int i_c, int i_h, int i_w,
16 |                    int o_c, int o_h, int o_w, cudaStream_t stream = cudaStream_t(0));
17 | 
18 | void reorgForward(dnnType *srcData, dnnType *dstData,
19 |                   int n, int c, int h, int w, int stride, cudaStream_t stream = cudaStream_t(0));
20 | 
21 | void MaxPoolingForward(dnnType *srcData, dnnType *dstData, int n, int c, int h, int w, int stride_x, int stride_y, int size, int padding, cudaStream_t stream = cudaStream_t(0));
22 | 
23 | void softmaxForward(float *input, int n, int batch, int batch_offset,
24 |                     int groups, int group_offset, int stride, float temp, float *output, cudaStream_t stream = cudaStream_t(0));
25 | 
26 | void shortcutForward(dnnType *srcData, dnnType *dstData, int n1, int c1, int h1, int w1, int s1,
27 |                      int n2, int c2, int h2, int w2, int s2, bool mul,
28 |                      cudaStream_t stream = cudaStream_t(0));
29 | 
30 | void upsampleForward(dnnType *srcData, dnnType *dstData,
31 |                      int n, int c, int h, int w, int s, int forward, float scale,
32 |                      cudaStream_t stream = cudaStream_t(0));
33 | 
34 | void float2half(float *srcData, __half *dstData, int size, const cudaStream_t stream = cudaStream_t(0));
35 | 
36 | void dcnV2CudaForward(cublasStatus_t stat, cublasHandle_t handle,
37 |                          float *input, float *weight,
38 |                          float *bias, float *ones,
39 |                          float *offset, float *mask,
40 |                          float *output, float *columns,
41 |                          int kernel_h, int kernel_w,
42 |                          const int stride_h, const int stride_w,
43 |                          const int pad_h, const int pad_w,
44 |                          const int dilation_h, const int dilation_w,
45 |                          const int deformable_group, const int batch_id,
46 |                          const int in_n, const int in_c, const int in_h, const int in_w,
47 |                          const int out_n, const int out_c, const int out_h, const int out_w,
48 |                          const int dst_dim, cudaStream_t stream = cudaStream_t(0));
49 | 
50 | void scalAdd(dnnType* dstData, int size, float alpha, float beta, int inc, cudaStream_t stream = cudaStream_t(0));
51 | 
52 | void reflection_pad2d_out_forward(int32_t pad_h,int32_t pad_w,float *srcData,float *dstData,int32_t input_h,int32_t input_w,int32_t plane_dim,int32_t n_batch,cudaStream_t cudaStream = cudaStream_t(0));
53 | 
54 | void constant_pad2d_forward(dnnType *srcData,dnnType *dstData,int32_t input_h,int32_t input_w,int32_t output_h,
55 |                             int32_t output_w,int32_t c,int32_t n,int32_t padT,int32_t padL,dnnType constant,cudaStream_t cudaStream = cudaStream_t(0));
56 | 
57 | 
58 | #endif //KERNELS_H
59 | 


--------------------------------------------------------------------------------
/include/tkDNN/kernelsThrust.h:
--------------------------------------------------------------------------------
 1 | #ifndef KERNELSTHRUST_H
 2 | #define KERNELSTHRUST_H
 3 | 
 4 | 
 5 | #include <thrust/extrema.h>
 6 | #include <thrust/sort.h>
 7 | #include <thrust/execution_policy.h>
 8 | #include <thrust/functional.h>
 9 | #include <thrust/transform.h>
10 | #include <thrust/iterator/constant_iterator.h>
11 | #include <thrust/gather.h>
12 | #include <thrust/copy.h>
13 | #include <thrust/device_ptr.h>
14 | 
15 | 
16 | #include "tkdnn.h"
17 | 
18 | struct threshold : public thrust::binary_function<float,float,float>
19 | {
20 |   __host__ __device__
21 |   float operator()(float x, float y) { 
22 |     double toll = 1e-6; 
23 |     if(fabsf(x-y)>toll) 
24 |         return 0.0f; 
25 |     else
26 |         return x; 
27 |     }
28 | };
29 | 
30 | void sort(dnnType *src_begin, dnnType *src_end, int *idsrc);
31 | void topk(dnnType *src_begin, int *idsrc, int K, float *topk_scores,
32 |             int *topk_inds, float *topk_ys, float *topk_xs);
33 | // void sortAndTopKonDevice(dnnType *src_begin, int *idsrc, float *topk_scores, int *topk_inds, float *topk_ys, float *topk_xs, const int size, const int K, const int n_classes);            
34 | void normalize(float *bgr, const int ch, const int h, const int w, const float *mean, const float *stddev);
35 | void transformDep(float *src_begin, float *src_end, float *dst_begin, float *dst_end);
36 | void subtractWithThreshold(dnnType *src_begin, dnnType *src_end, dnnType *src2_begin, dnnType *src_out, struct threshold op);
37 | void topKxyclasses(int *ids_begin, int *ids_end, const int K, const int size, const int wh, int *clses, int *xs, int *ys);
38 | void topKxyAddOffset(int * ids_begin, const int K, const int size, int *intxs_begin, int *intys_begin, 
39 |                      float *xs_begin, float *ys_begin, dnnType *src_begin, float *src_out, int *ids_out);
40 | void bboxes(int * ids_begin, const int K, const int size, float *xs_begin, float *ys_begin, 
41 |             dnnType *src_begin, float *bbx0, float *bbx1, float *bby0, float *bby1, float *src_out, int *ids_out);
42 | void getRecordsFromTopKId(int * ids_begin, const int K, const int ch, const int size, dnnType *src_begin, float *src_out, int *ids_out);
43 | 
44 | void maxElem(dnnType *src_begin, dnnType *dst_begin, const int c, const int h, const int w);
45 | 
46 | #endif //KERNELSTHRUST_H


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ActivationLeakyRT.h:
--------------------------------------------------------------------------------
 1 | #include "NvInfer.h"
 2 | #include "../kernels.h"
 3 | #include <cassert>
 4 | #include <vector>
 5 | 
 6 | namespace nvinfer1 {
 7 |     class ActivationLeakyRT : public IPluginV2 {
 8 | 
 9 |     public:
10 |         explicit ActivationLeakyRT(float s);
11 | 
12 |         ActivationLeakyRT(const void *data, size_t length);
13 | 
14 |         ~ActivationLeakyRT();
15 | 
16 |         int getNbOutputs() const NOEXCEPT override;
17 | 
18 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override;
19 | 
20 |         void
21 |         configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,
22 |                             PluginFormat format, int maxBatchSize) NOEXCEPT override;
23 | 
24 |         int initialize() NOEXCEPT override;
25 | 
26 |         void terminate() NOEXCEPT override {}
27 | 
28 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override;
29 | 
30 | #if NV_TENSORRT_MAJOR > 7
31 |         int enqueue(int batchSize, void const *const *inputs, void *const *outputs, void *workspace,
32 |                     cudaStream_t stream) NOEXCEPT override;
33 | #elif NV_TENSORRT_MAJOR == 7
34 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
35 | #endif
36 | 
37 |         size_t getSerializationSize() const NOEXCEPT override;
38 | 
39 |         void serialize(void *buffer) const NOEXCEPT override;
40 | 
41 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override;
42 | 
43 |         const char *getPluginType() const NOEXCEPT override;
44 | 
45 |         const char *getPluginVersion() const NOEXCEPT override;
46 | 
47 |         void destroy() NOEXCEPT override;
48 | 
49 |         const char *getPluginNamespace() const NOEXCEPT override;
50 | 
51 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
52 | 
53 |         IPluginV2 *clone() const NOEXCEPT override;
54 | 
55 |         int size;
56 |         float slope;
57 | 
58 |     private:
59 |         std::string mPluginNamespace;
60 |     };
61 | 
62 |     class ActivationLeakyRTPluginCreator : public IPluginCreator {
63 |     public:
64 |         ActivationLeakyRTPluginCreator();
65 | 
66 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
67 | 
68 |         IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override;
69 | 
70 |         const char *getPluginNamespace() const NOEXCEPT override ;
71 | 
72 |         IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
73 | 
74 |         const char *getPluginName() const NOEXCEPT override ;
75 | 
76 |         const char *getPluginVersion() const NOEXCEPT override ;
77 | 
78 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override;
79 | 
80 |     private:
81 |         static PluginFieldCollection mFC;
82 |         static std::vector<PluginField> mPluginAttributes;
83 |         std::string mPluginNamespace;
84 |     };
85 | 
86 | 
87 |     REGISTER_TENSORRT_PLUGIN(ActivationLeakyRTPluginCreator);
88 | };


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ActivationLogisticRT.h:
--------------------------------------------------------------------------------
 1 | #include<cassert>
 2 | #include "../kernels.h"
 3 | #include <NvInfer.h>
 4 | #include <vector>
 5 | #include <utils.h>
 6 | 
 7 | namespace nvinfer1 {
 8 | 
 9 |     class ActivationLogisticRT : public IPluginV2 {
10 | 
11 |     public:
12 |         ActivationLogisticRT() ;
13 | 
14 |         ActivationLogisticRT(const void *data, size_t length) ;
15 | 
16 |         ~ActivationLogisticRT() ;
17 | 
18 |         int getNbOutputs() const NOEXCEPT override ;
19 | 
20 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
21 | 
22 |         void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,
23 |                             PluginFormat format, int maxBatchSize) NOEXCEPT override ;
24 | 
25 |         int initialize() NOEXCEPT override ;
26 | 
27 |         void terminate() NOEXCEPT override ;
28 | 
29 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override;
30 | 
31 | #if NV_TENSORRT_MAJOR > 7
32 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,
33 |                             cudaStream_t stream) NOEXCEPT override ;
34 | #elif NV_TENSORRT_MAJOR == 7
35 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
36 | #endif
37 | 
38 | 
39 |         size_t getSerializationSize() const NOEXCEPT override ;
40 | 
41 |         void serialize(void *buffer) const NOEXCEPT override ;
42 | 
43 |         const char *getPluginType() const NOEXCEPT override ;
44 | 
45 |         const char *getPluginVersion() const NOEXCEPT override ;
46 | 
47 |         void destroy() NOEXCEPT override ;
48 | 
49 |         const char *getPluginNamespace() const NOEXCEPT override ;
50 | 
51 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
52 | 
53 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ;
54 | 
55 |         IPluginV2 *clone() const NOEXCEPT override ;
56 | 
57 |         int size;
58 | 
59 |     private:
60 |         std::string mPluginNamespace;
61 |     };
62 | 
63 |     class ActivationLogisticRTPluginCreator : public IPluginCreator {
64 |     public:
65 |         ActivationLogisticRTPluginCreator() ;
66 | 
67 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
68 | 
69 |         IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
70 | 
71 |         const char *getPluginNamespace() const NOEXCEPT override ;
72 | 
73 |         IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
74 | 
75 |         const char *getPluginVersion() const NOEXCEPT override ;
76 | 
77 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
78 | 
79 |         const char *getPluginName() const NOEXCEPT override ;
80 | 
81 |     private:
82 |         static PluginFieldCollection mFC;
83 |         static std::vector<PluginField> mPluginAttributes;
84 |         std::string mPluginNamespace;
85 |     };
86 | 
87 |     REGISTER_TENSORRT_PLUGIN(ActivationLogisticRTPluginCreator);
88 | };


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ActivationMishRT.h:
--------------------------------------------------------------------------------
 1 | #include<cassert>
 2 | #include "../kernels.h"
 3 | #include <NvInfer.h>
 4 | #include <vector>
 5 | 
 6 | namespace nvinfer1 {
 7 |     class ActivationMishRT : public IPluginV2 {
 8 | 
 9 |     public:
10 |         ActivationMishRT() ;
11 | 
12 |         ~ActivationMishRT() ;
13 | 
14 |         ActivationMishRT(const void *data, size_t length) ;
15 | 
16 | 
17 |         int getNbOutputs() const NOEXCEPT override ;
18 | 
19 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
20 | 
21 |         void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,
22 |                             PluginFormat format, int maxBatchSize) NOEXCEPT override ;
23 | 
24 |         int initialize() NOEXCEPT override ;
25 | 
26 |         void terminate() NOEXCEPT override ;
27 | 
28 |          size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ;
29 | #if NV_TENSORRT_MAJOR > 7
30 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,cudaStream_t stream) NOEXCEPT override ;
31 | #elif NV_TENSORRT_MAJOR == 7
32 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
33 | #endif
34 | 
35 |         size_t getSerializationSize() const NOEXCEPT override ;
36 | 
37 |         void serialize(void *buffer) const NOEXCEPT override ;
38 | 
39 |         const char *getPluginType() const NOEXCEPT override ;
40 | 
41 |         const char *getPluginVersion() const NOEXCEPT override ;
42 | 
43 |         void destroy() NOEXCEPT override { delete this; }
44 | 
45 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ;
46 | 
47 |         const char *getPluginNamespace() const NOEXCEPT override ;
48 | 
49 |         void setPluginNamespace(const char *plguinNamespace) NOEXCEPT override ;
50 | 
51 |         IPluginV2 *clone() const NOEXCEPT override ;
52 | 
53 |         int size;
54 |     private:
55 |         std::string mPluginNamespace;
56 |     };
57 | 
58 |     class ActivationMishRTPluginCreator : public IPluginCreator {
59 |     public:
60 |         ActivationMishRTPluginCreator() ;
61 | 
62 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
63 |         const char *getPluginNamespace() const NOEXCEPT override ;
64 | 
65 |         IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
66 | 
67 |         IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
68 | 
69 |         const char *getPluginName() const NOEXCEPT override ;
70 | 
71 |         const char *getPluginVersion() const NOEXCEPT override ;
72 | 
73 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
74 | 
75 |     private:
76 |         static PluginFieldCollection mFC;
77 |         static std::vector<PluginField> mPluginAttributes;
78 |         std::string mPluginNamespace;
79 |     };
80 | 
81 |     REGISTER_TENSORRT_PLUGIN(ActivationMishRTPluginCreator);
82 | };


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ActivationReLUCeilingRT.h:
--------------------------------------------------------------------------------
 1 | #include<cassert>
 2 | #include "../kernels.h"
 3 | #include <NvInfer.h>
 4 | #include <vector>
 5 | #include <utils.h>
 6 | 
 7 | namespace nvinfer1 {
 8 |     class ActivationReLUCeiling : public IPluginV2 {
 9 | 
10 |     public:
11 |         explicit ActivationReLUCeiling(const float ceiling) ;
12 | 
13 |         ~ActivationReLUCeiling() ;
14 | 
15 |         ActivationReLUCeiling(const void *data, size_t length) ;
16 | 
17 |         int getNbOutputs() const NOEXCEPT override ;
18 | 
19 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
20 | 
21 |         void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,PluginFormat format, int maxBatchSize) NOEXCEPT override ;
22 | 
23 |         int initialize() NOEXCEPT override ;
24 | 
25 |         void terminate() NOEXCEPT override ;
26 | 
27 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ;
28 | #if NV_TENSORRT_MAJOR > 7
29 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,cudaStream_t stream) NOEXCEPT override ;
30 | #elif NV_TENSORRT_MAJOR == 7
31 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
32 | #endif
33 | 
34 |         size_t getSerializationSize() const NOEXCEPT override ;
35 | 
36 |         void serialize(void *buffer) const NOEXCEPT override ;
37 | 
38 |         IPluginV2 *clone() const NOEXCEPT override ;
39 | 
40 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ;
41 | 
42 |         void destroy() NOEXCEPT override ;
43 | 
44 |         const char *getPluginType() const NOEXCEPT override ;
45 | 
46 |         const char *getPluginVersion() const NOEXCEPT override ;
47 | 
48 |         const char *getPluginNamespace() const NOEXCEPT override ;
49 | 
50 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
51 |         int size;
52 |         float ceiling;
53 |     private:
54 |         std::string mPluginNamespace;
55 |     };
56 | 
57 |     class ActivationReLUCeilingPluginCreator : public IPluginCreator {
58 |     public:
59 |         ActivationReLUCeilingPluginCreator() ;
60 | 
61 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
62 | 
63 |         const char *getPluginNamespace() const NOEXCEPT override ;
64 | 
65 |         IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
66 | 
67 |         IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
68 | 
69 |         const char *getPluginName() const NOEXCEPT override ;
70 |         const char *getPluginVersion() const NOEXCEPT override ;
71 | 
72 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
73 | 
74 |     public:
75 |         static PluginFieldCollection mFC;
76 |         static std::vector<PluginField> mPluginAttributes;
77 |         std::string mPluginNamespace;
78 |     };
79 | 
80 |     REGISTER_TENSORRT_PLUGIN(ActivationReLUCeilingPluginCreator);
81 | };


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ActivationSigmoidRT.h:
--------------------------------------------------------------------------------
 1 | #include<cassert>
 2 | #include "../kernels.h"
 3 | 
 4 | class ActivationSigmoidRT : public IPlugin {
 5 | 
 6 | public:
 7 | 	ActivationSigmoidRT() {
 8 | 
 9 | 
10 | 	}
11 | 
12 | 	~ActivationSigmoidRT(){
13 | 
14 | 	}
15 | 
16 | 	int getNbOutputs() const override {
17 | 		return 1;
18 | 	}
19 | 
20 | 	Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {
21 | 		return inputs[0];
22 | 	}
23 | 
24 | 	void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) override {
25 | 		size = 1;
26 | 		for(int i=0; i<outputDims[0].nbDims; i++)
27 | 			size *= outputDims[0].d[i];
28 | 	}
29 | 
30 | 	int initialize() override {
31 | 
32 | 		return 0;
33 | 	}
34 | 
35 | 	virtual void terminate() override {
36 | 	}
37 | 
38 | 	virtual size_t getWorkspaceSize(int maxBatchSize) const override {
39 | 		return 0;
40 | 	}
41 | 
42 | 	virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {
43 | 
44 | 		activationSIGMOIDForward((dnnType*)reinterpret_cast<const dnnType*>(inputs[0]), 
45 | 											reinterpret_cast<dnnType*>(outputs[0]), batchSize*size, stream);
46 | 		return 0;
47 | 	}
48 | 
49 | 
50 | 	virtual size_t getSerializationSize() override {
51 | 		return 1*sizeof(int);
52 | 	}
53 | 
54 | 	virtual void serialize(void* buffer) override {
55 | 		char *buf = reinterpret_cast<char*>(buffer),*a=buf;
56 | 		tk::dnn::writeBUF(buf, size);
57 | 		assert(buf == a + getSerializationSize());
58 | 	}
59 | 
60 | 	int size;
61 | };
62 | 


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/FlattenConcatRT.h:
--------------------------------------------------------------------------------
  1 | #ifndef _FLATTENCONCATRT_PLUGIN_H
  2 | #define _FLATTENCONCATRT_PLUGIN_H
  3 | 
  4 | #include<cassert>
  5 | #include <NvInfer.h>
  6 | #include <vector>
  7 | #include <utils.h>
  8 | namespace nvinfer1 {
  9 |     class FlattenConcatRT : public IPluginV2Ext {
 10 | 
 11 |     public:
 12 |         FlattenConcatRT(int c,int h,int w,int rows,int cols) ;
 13 | 
 14 |         FlattenConcatRT(const void *data, size_t length) ;
 15 | 
 16 |         ~FlattenConcatRT() ;
 17 | 
 18 |         int getNbOutputs() const NOEXCEPT override ;
 19 | 
 20 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
 21 | 
 22 |         int initialize() NOEXCEPT override ;
 23 | 
 24 |         void terminate() NOEXCEPT override ;
 25 | 
 26 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ;
 27 | 
 28 | #if NV_TENSORRT_MAJOR > 7
 29 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) NOEXCEPT override ;
 30 | #elif NV_TENSORRT_MAJOR <= 7
 31 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
 32 | #endif
 33 | 
 34 |         size_t getSerializationSize() const NOEXCEPT override ;
 35 | 
 36 |         void serialize(void *buffer) const NOEXCEPT override ;
 37 | 
 38 |         void destroy() NOEXCEPT override ;
 39 | 
 40 |         const char *getPluginType() const NOEXCEPT override ;
 41 | 
 42 |         const char *getPluginVersion() const NOEXCEPT override;
 43 | 
 44 |         const char *getPluginNamespace() const NOEXCEPT override ;
 45 | 
 46 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
 47 | 
 48 |         IPluginV2Ext *clone() const NOEXCEPT override ;
 49 | 
 50 |         DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override;
 51 | 
 52 |         void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override;
 53 | 
 54 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override;
 55 | 
 56 |         bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override;
 57 | 
 58 |         void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims,
 59 |                             int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes,
 60 |                             bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat,
 61 |                             int32_t maxBatchSize) NOEXCEPT override;
 62 | 
 63 |         void detachFromContext() NOEXCEPT override;
 64 | 
 65 |         bool supportsFormat (DataType type, PluginFormat format) const NOEXCEPT override;
 66 | 
 67 |         int c, h, w;
 68 |         int rows, cols;
 69 |         cublasHandle_t handle{nullptr};
 70 |     private:
 71 |         std::string mPluginNamespace;
 72 |     };
 73 | 
 74 |     class FlattenConcatRTPluginCreator : public IPluginCreator {
 75 |     public:
 76 |         FlattenConcatRTPluginCreator() ;
 77 | 
 78 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
 79 | 
 80 |         const char *getPluginNamespace() const NOEXCEPT override ;
 81 | 
 82 |         IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
 83 | 
 84 |         IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
 85 | 
 86 |         const char *getPluginName() const NOEXCEPT override ;
 87 | 
 88 |         const char *getPluginVersion() const NOEXCEPT override;
 89 | 
 90 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
 91 | 
 92 |     private:
 93 |         static PluginFieldCollection mFC;
 94 |         static std::vector<PluginField> mPluginAttributes;
 95 |         std::string mPluginNamespace;
 96 |     };
 97 | 
 98 |     REGISTER_TENSORRT_PLUGIN(FlattenConcatRTPluginCreator);
 99 | };
100 | #endif


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ReorgRT.h:
--------------------------------------------------------------------------------
 1 | #include<cassert>
 2 | #include "../kernels.h"
 3 | #include <NvInfer.h>
 4 | #include <vector>
 5 | 
 6 | namespace nvinfer1 {
 7 |     class ReorgRT : public IPluginV2Ext {
 8 | 
 9 |     public:
10 |         ReorgRT(int stride,int c,int h,int w);
11 | 
12 |         ~ReorgRT();
13 | 
14 |         ReorgRT(const void *data, size_t length);
15 | 
16 |         int getNbOutputs() const NOEXCEPT override;
17 | 
18 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override;
19 | 
20 |         int initialize() NOEXCEPT override;
21 | 
22 |         void terminate() NOEXCEPT override;
23 | 
24 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override;
25 | 
26 | #if NV_TENSORRT_MAJOR > 7
27 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,
28 |                     cudaStream_t stream) NOEXCEPT override;
29 | #elif NV_TENSORRT_MAJOR == 7
30 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
31 | #endif
32 | 
33 | 
34 |         size_t getSerializationSize() const NOEXCEPT override;
35 | 
36 |         void serialize(void *buffer) const NOEXCEPT override;
37 | 
38 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override;
39 | 
40 |         const char *getPluginType() const NOEXCEPT override;
41 | 
42 |         const char *getPluginVersion() const NOEXCEPT override;
43 | 
44 |         void destroy() NOEXCEPT override;
45 | 
46 |         const char *getPluginNamespace() const NOEXCEPT override;
47 | 
48 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
49 | 
50 |         IPluginV2Ext *clone() const NOEXCEPT override;
51 | 
52 |         DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override;
53 | 
54 |         void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override;
55 | 
56 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override;
57 | 
58 |         bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override;
59 | 
60 |         void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims,
61 |                               int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes,
62 |                               bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat,
63 |                               int32_t maxBatchSize) NOEXCEPT override;
64 | 
65 |         void detachFromContext() NOEXCEPT override;
66 | 
67 |         int c, h, w, stride;
68 |     private:
69 |         std::string mPluginNamespace;
70 |     };
71 | 
72 |     class ReorgRTPluginCreator : public IPluginCreator {
73 |     public:
74 |         ReorgRTPluginCreator();
75 | 
76 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
77 | 
78 |         const char *getPluginNamespace() const NOEXCEPT override;
79 | 
80 |         IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override;
81 | 
82 |         IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override;
83 | 
84 |         const char *getPluginName() const NOEXCEPT override;
85 | 
86 |         const char *getPluginVersion() const NOEXCEPT override;
87 | 
88 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override;
89 | 
90 |     private:
91 |         static PluginFieldCollection mFC;
92 |         static std::vector<PluginField> mPluginAttributes;
93 |         std::string mPluginNamespace;
94 |     };
95 | 
96 |     REGISTER_TENSORRT_PLUGIN(ReorgRTPluginCreator);
97 | };
98 | 
99 | 


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ReshapeRT.h:
--------------------------------------------------------------------------------
  1 | #ifndef _RESHAPERT_PLUGIN_H
  2 | #define _RESHAPERT_PLUGIN_H
  3 | 
  4 | #include<cassert>
  5 | #include <NvInfer.h>
  6 | #include <vector>
  7 | #include <tkdnn.h>
  8 | 
  9 | 
 10 | namespace nvinfer1 {
 11 |     class ReshapeRT : public IPluginV2Ext {
 12 | 
 13 |     public:
 14 |         ReshapeRT(int n,int c,int h,int w) ;
 15 | 
 16 |         ReshapeRT(const void *data, size_t length) ;
 17 | 
 18 |         ~ReshapeRT() ;
 19 | 
 20 |         int getNbOutputs() const NOEXCEPT override ;
 21 | 
 22 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
 23 | 
 24 | 
 25 |         int initialize() NOEXCEPT override ;
 26 | 
 27 |         void terminate() NOEXCEPT override ;
 28 | 
 29 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ;
 30 | 
 31 | #if NV_TENSORRT_MAJOR > 7
 32 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) NOEXCEPT override ;
 33 | #elif NV_TENSORRT_MAJOR == 7
 34 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
 35 | #endif
 36 | 
 37 |         size_t getSerializationSize() const NOEXCEPT override ;
 38 | 
 39 |         void serialize(void *buffer) const NOEXCEPT override ;
 40 | 
 41 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ;
 42 | 
 43 |         const char *getPluginType() const NOEXCEPT override ;
 44 | 
 45 |         const char *getPluginVersion() const NOEXCEPT override ;
 46 | 
 47 |         void destroy() NOEXCEPT override ;
 48 | 
 49 |         const char *getPluginNamespace() const NOEXCEPT override ;
 50 | 
 51 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
 52 | 
 53 |         IPluginV2Ext *clone() const NOEXCEPT override ;
 54 | 
 55 |         DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override;
 56 | 
 57 |         void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override;
 58 | 
 59 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override;
 60 | 
 61 |         bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override;
 62 | 
 63 |         void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims,
 64 |                               int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes,
 65 |                               bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat,
 66 |                               int32_t maxBatchSize) NOEXCEPT override;
 67 | 
 68 |         void detachFromContext() NOEXCEPT override;
 69 | 
 70 |         int n, c, h, w;
 71 |     private:
 72 |         std::string mPluginNamespace;
 73 |     };
 74 | 
 75 |     class ReshapeRTPluginCreator : public IPluginCreator {
 76 |     public:
 77 |         ReshapeRTPluginCreator() ;
 78 | 
 79 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
 80 | 
 81 |         const char *getPluginNamespace() const NOEXCEPT override ;
 82 | 
 83 |         IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
 84 | 
 85 |         IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
 86 | 
 87 |         const char *getPluginName() const NOEXCEPT override ;
 88 | 
 89 |         const char *getPluginVersion() const NOEXCEPT override ;
 90 | 
 91 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
 92 | 
 93 |     private:
 94 |         static PluginFieldCollection mFC;
 95 |         static std::vector<PluginField> mPluginAttributes;
 96 |         std::string mPluginNamespace;
 97 |     };
 98 | 
 99 |     REGISTER_TENSORRT_PLUGIN(ReshapeRTPluginCreator);
100 | };
101 | #endif


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ResizeLayerRT.h:
--------------------------------------------------------------------------------
  1 | #include<cassert>
  2 | #include "../kernels.h"
  3 | #include <NvInfer.h>
  4 | #include <vector>
  5 | #include <utils.h>
  6 | 
  7 | namespace nvinfer1 {
  8 | 
  9 |     class ResizeLayerRT : public IPluginV2Ext {
 10 | 
 11 |     public:
 12 |         ResizeLayerRT(int oc, int oh, int ow,int ic,int ih,int iw) ;
 13 | 
 14 |         ResizeLayerRT(const void *data, size_t length) ;
 15 | 
 16 |         ~ResizeLayerRT() ;
 17 | 
 18 |         int getNbOutputs() const NOEXCEPT override ;
 19 | 
 20 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
 21 | 
 22 |         int initialize() NOEXCEPT override ;
 23 | 
 24 |         void terminate() NOEXCEPT override ;
 25 | 
 26 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ;
 27 | 
 28 | #if NV_TENSORRT_MAJOR > 7
 29 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,
 30 |                             cudaStream_t stream) NOEXCEPT override ;
 31 | #elif NV_TENSORRT_MAJOR <= 7
 32 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
 33 | #endif
 34 | 
 35 |         size_t getSerializationSize() const NOEXCEPT override ;
 36 | 
 37 |         void serialize(void *buffer) const NOEXCEPT override ;
 38 | 
 39 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ;
 40 | 
 41 |         const char *getPluginType() const NOEXCEPT override ;
 42 | 
 43 |         const char *getPluginVersion() const NOEXCEPT override ;
 44 | 
 45 |         void destroy() NOEXCEPT override ;
 46 | 
 47 |         const char *getPluginNamespace() const NOEXCEPT override ;
 48 | 
 49 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
 50 | 
 51 |         IPluginV2Ext *clone() const NOEXCEPT override ;
 52 | 
 53 |         DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const NOEXCEPT override;
 54 | 
 55 |         void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) NOEXCEPT override;
 56 | 
 57 |         bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const NOEXCEPT override;
 58 | 
 59 |         bool canBroadcastInputAcrossBatch(int inputIndex) const NOEXCEPT override;
 60 | 
 61 |         void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims,
 62 |                               int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes,
 63 |                               bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat,
 64 |                               int32_t maxBatchSize) NOEXCEPT override;
 65 | 
 66 |         void detachFromContext() NOEXCEPT override;
 67 | 
 68 |         int i_c, i_h, i_w, o_c, o_h, o_w;
 69 | 
 70 |     private:
 71 |         std::string mPluginNamespace;
 72 |     };
 73 | 
 74 |     class ResizeLayerRTPluginCreator : public IPluginCreator {
 75 |     public:
 76 |         ResizeLayerRTPluginCreator() ;
 77 | 
 78 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
 79 | 
 80 |         const char *getPluginNamespace() const NOEXCEPT override ;
 81 | 
 82 |         IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
 83 | 
 84 |         IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
 85 | 
 86 |         const char *getPluginName() const NOEXCEPT override ;
 87 | 
 88 |         const char *getPluginVersion() const NOEXCEPT override ;
 89 | 
 90 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
 91 | 
 92 | 
 93 | 
 94 | 
 95 |     private:
 96 |         static PluginFieldCollection mFC;
 97 |         static std::vector<PluginField> mPluginAttributes;
 98 |         std::string mPluginNamespace;
 99 | 
100 |     };
101 | 
102 |     REGISTER_TENSORRT_PLUGIN(ResizeLayerRTPluginCreator);
103 | };
104 | 
105 | 


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/RouteRT.h:
--------------------------------------------------------------------------------
 1 | #include<cassert>
 2 | #include "../kernels.h"
 3 | #include <vector>
 4 | #include <NvInfer.h>
 5 | 
 6 | namespace nvinfer1 {
 7 |     class RouteRT : public IPluginV2 {
 8 | 
 9 |         /**
10 |             THIS IS NOT USED ANYMORE
11 |         */
12 | 
13 |     public:
14 |         RouteRT(int groups, int group_id) ;
15 | 
16 |         ~RouteRT() ;
17 | 
18 |         RouteRT(const void *data, size_t length) ;
19 | 
20 |         int getNbOutputs() const NOEXCEPT override ;
21 | 
22 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override ;
23 | 
24 |         void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type,PluginFormat format, int maxBatchSize) NOEXCEPT override ;
25 | 
26 |         int initialize() NOEXCEPT override ;
27 | 
28 |         void terminate() NOEXCEPT override ;
29 | 
30 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override ;
31 | 
32 | #if NV_TENSORRT_MAJOR > 7
33 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,cudaStream_t stream) NOEXCEPT override ;
34 | #elif NV_TENSORRT_MAJOR == 7
35 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
36 | #endif
37 | 
38 |         size_t getSerializationSize() const NOEXCEPT override ;
39 | 
40 |         void serialize(void *buffer) const NOEXCEPT override ;
41 | 
42 |         const char *getPluginType() const NOEXCEPT override ;
43 | 
44 |         const char *getPluginVersion() const NOEXCEPT override ;
45 | 
46 |         void destroy() NOEXCEPT override ;
47 | 
48 |         const char *getPluginNamespace() const NOEXCEPT override ;
49 | 
50 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
51 | 
52 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override ;
53 | 
54 |         IPluginV2 *clone() const NOEXCEPT override ;
55 | 
56 |         static const int MAX_INPUTS = 4;
57 |         int in;
58 |         int c_in[MAX_INPUTS];
59 |         int c, h, w;
60 |         int groups, group_id;
61 |     private:
62 |         std::string mPluginNamespace;
63 |     };
64 | 
65 |     class RouteRTPluginCreator : public IPluginCreator {
66 |     public:
67 |         RouteRTPluginCreator() ;
68 | 
69 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override ;
70 | 
71 |         const char *getPluginNamespace() const NOEXCEPT override ;
72 | 
73 |         IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override ;
74 | 
75 |         IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override ;
76 | 
77 |         const char *getPluginName() const NOEXCEPT override ;
78 | 
79 |         const char *getPluginVersion() const NOEXCEPT override ;
80 | 
81 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override ;
82 | 
83 |     private:
84 |         static PluginFieldCollection mFC;
85 |         static std::vector<PluginField> mPluginAttributes;
86 |         std::string mPluginNamespace;
87 |     };
88 | 
89 |     REGISTER_TENSORRT_PLUGIN(RouteRTPluginCreator);
90 | };
91 | 


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/ShortcutRT.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SHORTCUTRT_PLUGIN_H
  2 | #define _SHORTCUTRT_PLUGIN_H
  3 | 
  4 | #include<cassert>
  5 | #include "../kernels.h"
  6 | #include <NvInfer.h>
  7 | #include <vector>
  8 | #include <tkdnn.h>
  9 | 
 10 | 
 11 | namespace nvinfer1 {
 12 | 
 13 |     class ShortcutRT : public IPluginV2Ext {
 14 | 
 15 |     public:
 16 |         ShortcutRT(int bc,int bh,int bw,int c,int h,int w ,bool mul);
 17 | 
 18 |         ~ShortcutRT();
 19 | 
 20 |         ShortcutRT(const void *data, size_t length);
 21 | 
 22 |         int getNbOutputs() const NOEXCEPT override;
 23 | 
 24 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override;
 25 | 
 26 |         void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, int32_t nbOutputs,
 27 |                               DataType const *inputTypes, DataType const *outputTypes, bool const *inputIsBroadcast,
 28 |                               bool const *outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) NOEXCEPT override;
 29 | 
 30 |         bool isOutputBroadcastAcrossBatch (int32_t outputIndex, bool const *inputIsBroadcasted, int32_t nbInputs) const NOEXCEPT override;
 31 | 
 32 |         bool canBroadcastInputAcrossBatch (int32_t inputIndex) const NOEXCEPT override;
 33 | 
 34 |         void attachToContext (cudnnContext *, cublasContext *, IGpuAllocator *) NOEXCEPT override;
 35 | 
 36 |         void detachFromContext () NOEXCEPT override;
 37 | 
 38 |         DataType getOutputDataType(int32_t index, nvinfer1::DataType const *inputTypes, int32_t nbInputs) const NOEXCEPT override;
 39 | 
 40 |         int initialize() NOEXCEPT override;
 41 | 
 42 |         void terminate() NOEXCEPT override;
 43 | 
 44 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override;
 45 | 
 46 | #if NV_TENSORRT_MAJOR > 7
 47 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,
 48 |                     cudaStream_t stream) NOEXCEPT override;
 49 | #elif NV_TENSORRT_MAJOR == 7
 50 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
 51 | #endif
 52 | 
 53 | 
 54 |         size_t getSerializationSize() const NOEXCEPT override;
 55 | 
 56 |         void serialize(void *buffer) const NOEXCEPT override;
 57 | 
 58 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override;
 59 | 
 60 |         const char *getPluginType() const NOEXCEPT override;
 61 | 
 62 |         const char *getPluginVersion() const NOEXCEPT override;
 63 | 
 64 |         void destroy() NOEXCEPT override;
 65 | 
 66 |         const char *getPluginNamespace() const NOEXCEPT override;
 67 | 
 68 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
 69 | 
 70 |         IPluginV2Ext *clone() const NOEXCEPT override;
 71 | 
 72 |         int c, h, w;
 73 |         int bc, bh, bw,bl;
 74 |         bool mul;
 75 |         tk::dnn::dataDim_t bDim;
 76 |     private:
 77 |         std::string mPluginNamespace;
 78 |     };
 79 | 
 80 | 
 81 |     class ShortcutRTPluginCreator : public IPluginCreator {
 82 |     public:
 83 |         ShortcutRTPluginCreator();
 84 | 
 85 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
 86 | 
 87 |         const char *getPluginNamespace() const NOEXCEPT override;
 88 | 
 89 |         IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override;
 90 | 
 91 |         IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override;
 92 | 
 93 |         const char *getPluginName() const NOEXCEPT override;
 94 | 
 95 |         const char *getPluginVersion() const NOEXCEPT override;
 96 | 
 97 |         const PluginFieldCollection *getFieldNames() NOEXCEPT override;
 98 | 
 99 |     public:
100 |         static PluginFieldCollection mFC;
101 |         static std::vector<PluginField> mPluginAttributes;
102 |         std::string mPluginNamespace;
103 |     };
104 | 
105 |     REGISTER_TENSORRT_PLUGIN(ShortcutRTPluginCreator);
106 | 
107 | };
108 | 
109 | #endif


--------------------------------------------------------------------------------
/include/tkDNN/pluginsRT/UpsampleRT.h:
--------------------------------------------------------------------------------
  1 | #ifndef _UPSAMPLERT_PLUGIN_H
  2 | #define _UPSAMPLERT_PLUGIN_H
  3 | 
  4 | #include<cassert>
  5 | #include "../kernels.h"
  6 | #include <NvInfer.h>
  7 | #include <vector>
  8 | 
  9 | namespace nvinfer1 {
 10 | 
 11 |     class UpsampleRT : public IPluginV2Ext {
 12 | 
 13 |     public:
 14 |         UpsampleRT(int stride,int c,int h,int w);
 15 | 
 16 |         UpsampleRT(const void *data, size_t length);
 17 | 
 18 |         ~UpsampleRT();
 19 | 
 20 |         int getNbOutputs() const NOEXCEPT override;
 21 | 
 22 |         Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) NOEXCEPT override;
 23 | 
 24 |         int initialize() NOEXCEPT override;
 25 | 
 26 |         void terminate() NOEXCEPT override;
 27 | 
 28 |         size_t getWorkspaceSize(int maxBatchSize) const NOEXCEPT override;
 29 | 
 30 | #if NV_TENSORRT_MAJOR > 7
 31 |         int enqueue(int batchSize, const void *const *inputs, void *const *outputs, void *workspace,
 32 |                             cudaStream_t stream) NOEXCEPT override;
 33 | #elif NV_TENSORRT_MAJOR == 7
 34 |         int32_t enqueue (int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
 35 | #endif
 36 | 
 37 | 
 38 |         size_t getSerializationSize() const NOEXCEPT override;
 39 | 
 40 |         void serialize(void *buffer) const NOEXCEPT override;
 41 | 
 42 |         bool supportsFormat(DataType type, PluginFormat format) const NOEXCEPT override;
 43 | 
 44 |         const char *getPluginType() const NOEXCEPT override;
 45 | 
 46 |         const char *getPluginVersion() const NOEXCEPT override;
 47 | 
 48 |         void destroy() NOEXCEPT override;
 49 | 
 50 |         const char *getPluginNamespace() const NOEXCEPT override;
 51 | 
 52 |         void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
 53 | 
 54 |         IPluginV2Ext *clone() const NOEXCEPT override ;
 55 | 
 56 |         bool isOutputBroadcastAcrossBatch (int32_t outputIndex, bool const *inputIsBroadcasted, int32_t nbInputs) const NOEXCEPT override;
 57 | 
 58 |         bool canBroadcastInputAcrossBatch (int32_t inputIndex) const NOEXCEPT override;
 59 | 
 60 |         void configurePlugin (Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, int32_t nbOutputs,
 61 |                               DataType const *inputTypes, DataType const *outputTypes, bool const *inputIsBroadcast,
 62 |                               bool const *outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) NOEXCEPT override;
 63 | 
 64 |         void attachToContext (cudnnContext *, cublasContext *, IGpuAllocator *) NOEXCEPT override;
 65 | 
 66 |         void detachFromContext () NOEXCEPT override;
 67 | 
 68 |         DataType getOutputDataType (int32_t index, nvinfer1::DataType const *inputTypes, int32_t nbInputs) const NOEXCEPT override;
 69 | 
 70 | 
 71 |         int c, h, w, stride;
 72 |     private:
 73 |         std::string mPluginNamespace;
 74 |     };
 75 | 
 76 |         class UpsampleRTPluginCreator : public IPluginCreator {
 77 |         public:
 78 |             UpsampleRTPluginCreator();
 79 | 
 80 |             void setPluginNamespace(const char *pluginNamespace) NOEXCEPT override;
 81 | 
 82 |             const char *getPluginNamespace() const NOEXCEPT override;
 83 | 
 84 |             IPluginV2Ext *deserializePlugin(const char *name, const void *serialData, size_t serialLength) NOEXCEPT override;
 85 | 
 86 |             IPluginV2Ext *createPlugin(const char *name, const PluginFieldCollection *fc) NOEXCEPT override;
 87 | 
 88 |             const char *getPluginName() const NOEXCEPT override;
 89 | 
 90 |             const char *getPluginVersion() const NOEXCEPT override;
 91 | 
 92 |             const PluginFieldCollection *getFieldNames() NOEXCEPT override;
 93 | 
 94 |         private:
 95 |             static PluginFieldCollection mFC;
 96 |             static std::vector<PluginField> mPluginAttributes;
 97 |             std::string mPluginNamespace;
 98 |         };
 99 | 
100 |         REGISTER_TENSORRT_PLUGIN(UpsampleRTPluginCreator);
101 |     };
102 | 
103 | #endif


--------------------------------------------------------------------------------
/include/tkDNN/test.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <tkdnn.h>
 3 | int testInference(std::vector<std::string> input_bins, std::vector<std::string> output_bins, 
 4 |     tk::dnn::Network *net, tk::dnn::NetworkRT *netRT = nullptr) {
 5 | 
 6 |     std::vector<tk::dnn::Layer*> outputs;
 7 |     for(int i=0; i<net->num_layers; i++) {
 8 |         if(net->layers[i]->final)
 9 |             outputs.push_back(net->layers[i]);
10 |     }
11 |     // no final layers, set last as output
12 |     if(outputs.size() == 0) {
13 |         outputs.push_back(net->layers[net->num_layers-1]);
14 |     }
15 | 
16 | 
17 |     // check input
18 |     if(input_bins.size() != 1) {
19 |         FatalError("currently support only 1 input");
20 |     }
21 |     if(output_bins.size() != outputs.size()) {
22 |         std::cout<<output_bins.size()<<" "<<outputs.size()<<"\n";
23 |         FatalError("outputs size mismatch");
24 |     }
25 | 
26 |     // Load input
27 |     dnnType *data;
28 |     dnnType *input_h;
29 |     readBinaryFile(input_bins[0], net->input_dim.tot(), &input_h, &data);
30 | 
31 |     // outputs
32 |     //dnnType *cudnn_out[outputs.size()], *rt_out[outputs.size()];
33 |     std::vector<dnnType *> cudnn_out,rt_out;
34 | 
35 |     tk::dnn::dataDim_t dim1 =  net->input_dim; //input dim
36 |     printCenteredTitle(" CUDNN inference ", '=', 30); {
37 |         dim1.print();
38 |         TKDNN_TSTART
39 |         net->infer(dim1, data);    
40 |         TKDNN_TSTOP
41 |         dim1.print();   
42 |     }
43 |     for(int i=0; i<outputs.size(); i++) cudnn_out.push_back(outputs[i]->dstData);
44 | 
45 |     if(netRT != nullptr) {
46 |         tk::dnn::dataDim_t dim2 = net->input_dim;
47 |         printCenteredTitle(" TENSORRT inference ", '=', 30); {
48 |             dim2.print();
49 |             TKDNN_TSTART
50 |             netRT->infer(dim2, data);
51 |             TKDNN_TSTOP
52 |             dim2.print();
53 |         }
54 |         for(int i=0; i<outputs.size(); i++) rt_out.push_back((dnnType*)netRT->buffersRT[i+1]);
55 |     }
56 | 
57 |     int ret_cudnn = 0, ret_tensorrt = 0, ret_cudnn_tensorrt = 0; 
58 |     for(int i=0; i<outputs.size(); i++) {
59 |         printCenteredTitle((std::string(" OUTPUT ") + std::to_string(i) + " CHECK RESULTS ").c_str(), '=', 30);
60 |         dnnType *out, *out_h;
61 |         int odim = outputs[i]->output_dim.tot();
62 |         readBinaryFile(output_bins[i], odim, &out_h, &out);
63 |         std::cout<<"CUDNN vs correct"; 
64 |         ret_cudnn |= checkResult(odim, cudnn_out[i], out) == 0 ? 0: ERROR_CUDNN;
65 |         if(netRT != nullptr) {
66 |             std::cout<<"TRT   vs correct"; 
67 |             ret_tensorrt |= checkResult(odim, rt_out[i], out) == 0 ? 0 : ERROR_TENSORRT;
68 |             std::cout<<"CUDNN vs TRT    "; 
69 |             ret_cudnn_tensorrt |= checkResult(odim, cudnn_out[i], rt_out[i]) == 0 ? 0 : ERROR_CUDNNvsTENSORRT;
70 |         }
71 | 
72 |         delete [] out_h;
73 |         checkCuda( cudaFree(out) );
74 |     }
75 |     delete [] input_h;
76 |     checkCuda( cudaFree(data) );
77 |     return ret_cudnn | ret_tensorrt | ret_cudnn_tensorrt;
78 | }


--------------------------------------------------------------------------------
/include/tkDNN/tkdnn.h:
--------------------------------------------------------------------------------
1 | /**
2 |     This is the core header of the library, it should be used only this
3 | */
4 | #include "Network.h"
5 | #include "Layer.h"
6 | #include "NetworkRT.h"
7 | 
8 | #define TKDNN_VERSION 700
9 | 


--------------------------------------------------------------------------------
/scripts/checkExecTimes.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pandas as pd 
 3 | 
 4 | if len(sys.argv) < 3:
 5 |     print("Error: two csv files are needed, old first new second")
 6 |     exit(1)
 7 | 
 8 | old_perf_file = str(sys.argv[1])
 9 | new_perf_file = str(sys.argv[2])
10 | 
11 | verbose = False
12 | if len(sys.argv) == 4:
13 |     verbose = bool(sys.argv[3])
14 | 
15 | print("Comparing {} vs {}".format(old_perf_file, new_perf_file))
16 | 
17 | df_old = pd.read_csv (old_perf_file, sep=';', header=None, index_col=0)
18 | df_new = pd.read_csv (new_perf_file, sep=';', header=None, index_col=0)
19 | 
20 | for index, row in df_new.iterrows():
21 |     if index in df_old.index:
22 |         if verbose:
23 |             print("New: ",row[1], row[2], row[3])
24 |             print("Old: ",df_old.loc[index][1], df_old.loc[index][2], df_old.loc[index][3])
25 | 
26 |         print(index, end=': ')
27 |         if abs(row[1] - df_old.loc[index][1]) < df_old.loc[index][1]*0.1: 
28 |             print("similar performance")
29 |         elif (row[1] < df_old.loc[index][1]): 
30 |             print('\x1b[3;30;42m' + 'faster' + '\x1b[0m')
31 |         elif (row[1] > df_old.loc[index][1]): 
32 |             if row[1] > df_old.loc[index][1] + df_old.loc[index][1] * 0.5 :
33 |                 print('\x1b[3;30;41m' + 'WAY SLOWER' + '\x1b[0m')
34 |             else:
35 |                 print('\x1b[3;30;41m' + 'slower' + '\x1b[0m')
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/scripts/download_validation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import urllib.request as dowReq
 3 | import zipfile
 4 | 
 5 | val = input("Enter BDD or COCO :")
 6 | if(val == "COCO"):
 7 |     url = "https://cloud.hipert.unimore.it/s/LNxBDk4wzqXPL8c/download"
 8 |     lib = "..\demo\COCO_val2017"
 9 |     lib_zip = "COCO_val2017.zip"
10 | elif(val == "BDD"):
11 |     url = "https://cloud.hipert.unimore.it/s/bikqk3FzCq2tg4D/download"
12 |     lib = "..\demo\BDD100k_val"
13 |     lib_zip = "BDD100k_val.zip"
14 | 
15 | dowReq.urlretrieve(url,lib_zip)
16 | 
17 | with zipfile.ZipFile(lib_zip,'r') as zip_ref:
18 |     zip_ref.extractall(lib)
19 | 
20 | labelFolder = lib + "\labels"
21 | imageFolder = lib + "\images"
22 | 
23 | file1 = open(".\\..\\demo\\all_labels.txt","a")
24 | path1 = os.path.realpath(labelFolder)
25 | for file in os.listdir(labelFolder):
26 |     valTemp = path1 + "\\" + file
27 |     valTemp = valTemp + '\n'
28 |     file1.write(valTemp)
29 | file1.close()
30 | 
31 | file2 = open(".\\..\\demo\\all_images.txt","a")
32 | path2 = os.path.realpath(imageFolder)
33 | for file in os.listdir(imageFolder):
34 |     pathtemp = path2 + "\\" + file
35 |     pathtemp = pathtemp + '\n'
36 |     file2.write(pathtemp)
37 | file2.close()
38 | 
39 | print("Completed")
40 | 


--------------------------------------------------------------------------------
/scripts/download_validation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | function elaborate_testset {
 5 |     wget $1 -O $2.zip
 6 |     unzip -d $2 $2.zip
 7 |     rm $2.zip
 8 |     cd $2/
 9 |     realpath labels/* > all_labels.txt
10 |     realpath images/* > all_images.txt
11 |     cd ..
12 | } 
13 | 
14 | cd demo 
15 | for valset in $@
16 | do
17 |     
18 |     if [ $valset = "COCO" ]; then
19 |         echo "Downloading $valset validation set in demo"
20 |         elaborate_testset "https://cloud.hipert.unimore.it/s/LNxBDk4wzqXPL8c/download" "COCO_val2017"
21 |     elif [ $valset = "BDD" ]; then
22 |         echo "Downloading $valset validation set in demo"
23 |         elaborate_testset "https://cloud.hipert.unimore.it/s/bikqk3FzCq2tg4D/download" "BDD100K_val"
24 |     fi
25 | 
26 | done
27 | 


--------------------------------------------------------------------------------
/scripts/install_OpenCV4.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #based on https://devtalk.nvidia.com/default/topic/1042035/installing-opencv4-on-xavier/ & https://github.com/markste-in/OpenCV4XAVIER/blob/master/buildOpenCV4.sh
 4 | 
 5 | # Compute Capabilities can be found here https://developer.nvidia.com/cuda-gpus#compute
 6 | ARCH_BIN=7.2 # AGX Xavier
 7 | #ARCH_BIN=6.2 # Tx2
 8 | 
 9 | cd ~/Downloads
10 | sudo apt-get install -y build-essential \
11 |     unzip \
12 |     pkg-config \
13 |     libjpeg-dev \
14 |     libpng-dev \
15 |     libtiff-dev \
16 |     libavcodec-dev \
17 |     libavformat-dev \
18 |     libswscale-dev \
19 |     libv4l-dev \
20 |     libxvidcore-dev \
21 |     libx264-dev \
22 |     libgtk-3-dev \
23 |     libatlas-base-dev \
24 |     gfortran \
25 |     python3-dev \
26 |     python3-venv \
27 |     libgstreamer1.0-dev \
28 |     libgstreamer-plugins-base1.0-dev \
29 |     libdc1394-22-dev \
30 |     libavresample-dev \
31 |     libtbb-dev \
32 | 
33 | git clone https://github.com/opencv/opencv.git
34 | cd opencv && git checkout 4.5.4 && cd ..
35 | git clone https://github.com/opencv/opencv_contrib.git
36 | cd opencv_contrib && git checkout 4.5.4 && cd ..
37 | 
38 | 
39 | python3 -m venv opencv4
40 | source opencv4/bin/activate
41 | pip install wheel
42 | pip install numpy
43 | 
44 | cd opencv &&  mkdir build && cd build
45 | 
46 | cmake -D CMAKE_BUILD_TYPE=RELEASE \
47 |     -D CMAKE_INSTALL_PREFIX=/usr/local \
48 |     -D INSTALL_PYTHON_EXAMPLES=ON \
49 |     -D INSTALL_C_EXAMPLES=OFF \
50 |     -D OPENCV_EXTRA_MODULES_PATH='~/Downloads/opencv_contrib/modules' \
51 |     -D PYTHON_EXECUTABLE='~/Downloads/opencv4/bin/python' \
52 |     -D BUILD_EXAMPLES=ON \
53 |     -D WITH_CUDA=ON \
54 |     -D CUDA_ARCH_BIN=${ARCH_BIN} \
55 |     -D CUDA_ARCH_PTX="" \
56 |     -D ENABLE_FAST_MATH=ON \
57 |     -D CUDA_FAST_MATH=ON \
58 |     -D WITH_CUBLAS=ON \
59 |     -D WITH_LIBV4L=ON \
60 |     -D WITH_GSTREAMER=ON \
61 |     -D WITH_GSTREAMER_0_10=OFF \
62 |     -D WITH_TBB=ON \
63 |     -D WITH_OPENGL=ON \
64 |     -D WITH_VULKAN=ON \
65 |     ../
66 | 
67 | make -j4
68 | sudo make install
69 | sudo ldconfig
70 | 
71 | cd ~/Downloads/opencv4/lib/python3.6/site-packages
72 | ln -s /usr/local/lib/python3.6/site-packages/cv2.cpython-36m-aarch64-linux-gnu.so cv2.so
73 | 


--------------------------------------------------------------------------------
/scripts/test_all_tests.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #cd build
  4 | 
  5 | RED='\033[1;31m'
  6 | GREEN='\033[1;32m'
  7 | ORANGE='\033[1;33m'
  8 | PINK='\033[1;95m'
  9 | NC='\033[0m' # No Color
 10 | 
 11 | function print_output {
 12 |     if [ $1 -eq 0 ]; then
 13 |         echo -e "$2 ${GREEN}OK${NC}"
 14 |     elif [ $1 -eq 1 ]; then
 15 |         echo -e "$2 ${RED}FATAL ERROR${NC}"
 16 |     elif [ $1 -eq 2 ] || [ $1 -eq 10 ]; then
 17 |         echo -e "$2 ${PINK}CUDNN ERROR${NC}"
 18 |     elif [ $1 -eq 4 ] || [ $1 -eq 12 ]; then
 19 |         echo -e "$2 ${PINK}TENSORRT ERROR${NC}"
 20 |     elif [ $1 -eq 8 ]; then
 21 |         echo -e "$2 ${PINK}CUDNN vs TENSORRT ERROR${NC}"
 22 |     elif [ $1 -eq 6 ]; then
 23 |         echo -e "$2 ${PINK}CUDNN & TENSORTRT ERROR${NC}"
 24 |     elif [ $1 -eq 14 ]; then
 25 |         echo -e "$2 ${PINK}ERROR FOR EVERY CHECK${NC}"
 26 |     else
 27 |         echo -e "$2 ${RED}NOT OKAY (OPENCV maybe)${NC}"
 28 |     fi 
 29 | 
 30 | } 
 31 | 
 32 | out_dir=results
 33 | out_file=results.log
 34 | rm -rf $out_dir/
 35 | mkdir -p $out_dir
 36 | 
 37 | function test_net {
 38 |     ./test_$1 &> $out_dir/$1_${TKDNN_MODE}_build_$out_file
 39 |     print_output $? $1
 40 |     ./test_rtinference $1*.rt 1 &> $out_dir/$1_${TKDNN_MODE}_inference_batch1_$out_file
 41 |     print_output $? "infer $1"
 42 |     ./test_rtinference $1*.rt $TKDNN_BATCHSIZE &> $out_dir/$1_${TKDNN_MODE}_inference_batch${TKDNN_BATCHSIZE}_$out_file
 43 |     print_output $? "batched $1"
 44 | }
 45 | 
 46 | 
 47 | # modes=( 1 ) # only FP32
 48 | modes=( 1 2 ) # FP32 and FP16
 49 | # modes=( 1 2 3 ) # FP32, FP16 and INT8
 50 | 
 51 | for i in "${modes[@]}"
 52 | do
 53 |     rm -f *rt
 54 |     if [ $i -eq 1 ]
 55 |     then
 56 |         export TKDNN_MODE=FP32
 57 |         echo -e "${ORANGE}Test FP32${NC}"
 58 |     fi
 59 |     if [ $i -eq 2 ]
 60 |     then
 61 |         export TKDNN_MODE=FP16
 62 |         echo -e "${ORANGE}Test FP16${NC}"
 63 |     fi
 64 |     if [ $i -eq 3 ]
 65 |     then
 66 |         export TKDNN_MODE=INT8
 67 |         export TKDNN_CALIB_LABEL_PATH=../demo/COCO_val2017/all_labels.txt
 68 |         export TKDNN_CALIB_IMG_PATH=../demo/COCO_val2017/all_images.txt
 69 |         echo -e "${ORANGE}Test INT8${NC}"
 70 | 	fi
 71 | 
 72 |     export TKDNN_BATCHSIZE=2
 73 |     echo -e "${ORANGE}Batch $TKDNN_BATCHSIZE ${NC}"
 74 |     
 75 |     test_net mnist    
 76 |     # ./test_imuodom &>> $out_file
 77 |     # print_output $? imuodom
 78 | 
 79 |     test_net yolo4
 80 |     # test_net yolo4_320
 81 |     # test_net yolo4_320_coco2
 82 |     # test_net yolo4_512
 83 |     # test_net yolo4_608
 84 |     # test_net yolo4-csp
 85 |     # test_net yolo4x
 86 |     # test_net yolo4_berkeley
 87 |     # test_net yolo4_berkeley_f1
 88 |     # test_net yolo4tiny
 89 |     # test_net yolo4tiny_512
 90 |     # test_net yolo3
 91 |     # test_net yolo3_berkeley
 92 |     # test_net yolo3_coco4
 93 |     # test_net yolo3_flir
 94 |     # test_net yolo3_512
 95 |     # test_net yolo3tiny
 96 |     # test_net yolo3tiny_512
 97 |     # test_net yolo2
 98 |     # test_net yolo2_voc
 99 |     # test_net yolo2tiny
100 |     # test_net csresnext50-panet-spp
101 |     # test_net csresnext50-panet-spp_berkeley
102 |     # test_net resnet101_cnet
103 |     # test_net dla34_cnet
104 |     # test_net dla34_cnet3d
105 |     # test_net mobilenetv2ssd
106 |     # test_net mobilenetv2ssd512
107 |     # test_net bdd-mobilenetv2ssd
108 |     # test_net dla34_ctrack
109 |     # test_net shelfnet
110 |     # test_net shelfnet_berkeley
111 | done
112 | 
113 | echo "If errors occured, check logfiles in directory: $out_dir" 
114 | 


--------------------------------------------------------------------------------
/scripts/test_inference.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function test_inference {
 4 |     ./test_$1
 5 |     ./test_rtinference $1_$2.rt 1 
 6 |     ./test_rtinference $1_$2.rt 4
 7 | }
 8 | 
 9 | sudo jeston_clock
10 | 
11 | # modes=( 1 ) # only FP32
12 | # modes=( 1 2 ) # FP32 and FP16
13 | modes=( 1 2 3 ) # FP32, FP16 and INT8
14 | 
15 | rm times_rtinference.csv
16 | for i in "${modes[@]}"
17 | do
18 |     rm *rt
19 |     if [ $i -eq 1 ]
20 |     then
21 |         export TKDNN_MODE=FP32
22 |         mode=fp32
23 |         echo -e "${ORANGE}Test FP32${NC}"
24 |     fi
25 |     if [ $i -eq 2 ]
26 |     then
27 |         export TKDNN_MODE=FP16
28 |         mode=fp16
29 |         echo -e "${ORANGE}Test FP16${NC}"
30 |     fi
31 |     if [ $i -eq 3 ]
32 |     then
33 |         export TKDNN_MODE=INT8
34 |         export TKDNN_CALIB_LABEL_PATH=../demo/COCO_val2017/all_labels.txt
35 |         export TKDNN_CALIB_IMG_PATH=../demo/COCO_val2017/all_images.txt
36 |         mode=int8
37 |         echo -e "${ORANGE}Test INT8${NC}"
38 |         
39 | 	fi
40 | 
41 |     export TKDNN_BATCHSIZE=4
42 |     echo -e "${ORANGE}Batch $TKDNN_BATCHSIZE ${NC}"
43 |     
44 |     test_inference yolo4_320 $mode
45 |     test_inference yolo4 $mode
46 |     test_inference yolo4_512 $mode
47 |     test_inference yolo4_608 $mode
48 |     test_inference yolo4tiny $mode
49 | done
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/src/Activation.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Activation::Activation(Network *net, int act_mode, const float ceiling, const float slope) : 
 9 |     Layer(net) {
10 | 
11 |     this->act_mode  = act_mode;
12 |     this->ceiling   = ceiling;
13 |     this->slope     = slope;
14 |     checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) );
15 | 
16 |     if(int(act_mode) < 100) {
17 | 
18 |         checkCUDNN( cudnnSetTensor4dDescriptor(srcTensorDesc,
19 |                                             net->tensorFormat,
20 |                                             net->dataType,
21 |                                             input_dim.n*input_dim.l, 
22 |                                             input_dim.c,
23 |                                             input_dim.h, input_dim.w) );
24 |         checkCUDNN( cudnnSetTensor4dDescriptor(dstTensorDesc,
25 |                                                 net->tensorFormat,
26 |                                                 net->dataType,
27 |                                                 input_dim.n*input_dim.l, 
28 |                                                 input_dim.c,
29 |                                                 input_dim.h, input_dim.w) );
30 | 
31 | 
32 |         checkCUDNN( cudnnCreateActivationDescriptor(&activDesc) );
33 |         checkCUDNN( cudnnSetActivationDescriptor(activDesc,
34 |                                                 (cudnnActivationMode_t) act_mode,
35 |                                                 CUDNN_PROPAGATE_NAN,
36 |                                                 ceiling) );
37 |     }
38 | }
39 | 
40 | Activation::~Activation() {
41 | 
42 |     checkCuda( cudaFree(dstData) );
43 | 
44 |     if(int(act_mode) < 100)
45 |         checkCUDNN( cudnnDestroyActivationDescriptor(activDesc) );
46 | }
47 | 
48 | dnnType* Activation::infer(dataDim_t &dim, dnnType* srcData) {
49 |     if(act_mode == ACTIVATION_LEAKY) {
50 |         activationLEAKYForward(srcData, dstData, dim.tot(), this->slope);
51 |     }
52 |     else if(act_mode == ACTIVATION_MISH) {
53 |         activationMishForward(srcData, dstData, dim.tot());
54 | 
55 |     }
56 |     else if(act_mode == ACTIVATION_LOGISTIC) {
57 |         activationLOGISTICForward(srcData, dstData, dim.tot());
58 | 
59 |     } else if(act_mode == ACTIVATION_ELU) {
60 |         activationELUForward(srcData, dstData, dim.tot());
61 | 
62 |     } else {
63 |         dnnType alpha = dnnType(1);
64 |         dnnType beta  = dnnType(0);
65 |         checkCUDNN( cudnnActivationForward(net->cudnnHandle,
66 |                                             activDesc,
67 |                                             &alpha,
68 |                                             srcTensorDesc,
69 |                                             srcData,
70 |                                             &beta,
71 |                                             dstTensorDesc,
72 |                                             dstData) );   
73 |     } 
74 |     return dstData;
75 | }
76 | 
77 | }}
78 | 


--------------------------------------------------------------------------------
/src/BoundingBox.cpp:
--------------------------------------------------------------------------------
 1 | #include "BoundingBox.h"
 2 | 
 3 | namespace tk { namespace dnn {
 4 | 
 5 | float BoundingBox::overlap(const float p1, const float d1, const float p2, const float d2){
 6 |     float l1 = p1 - d1/2;
 7 |     float l2 = p2 - d2/2;
 8 |     float left = l1 > l2 ? l1 : l2;
 9 |     float r1 = p1 + d1/2;
10 |     float r2 = p2 + d2/2;
11 |     float right = r1 < r2 ? r1 : r2;
12 |     return right - left;
13 | }
14 | 
15 | float BoundingBox::boxesIntersection(const BoundingBox &b){
16 |     float width = this->overlap(x, w, b.x, b.w);
17 |     float height = this->overlap(y, h, b.y, b.h);
18 |     if(width < 0 || height < 0) 
19 |         return 0;
20 |     float area = width*height;
21 |     return area;
22 | }
23 | 
24 | float BoundingBox::boxesUnion(const BoundingBox &b){
25 |     float i = this->boxesIntersection(b);
26 |     float u = w*h + b.w*b.h - i;
27 |     return u;
28 | }
29 | 
30 | float BoundingBox::IoU(const BoundingBox &b){
31 |     float I = this->boxesIntersection(b);
32 |     float U = this->boxesUnion(b);
33 |     if (I == 0 || U == 0) 
34 |         return 0;
35 |     return I / U;
36 | }
37 | 
38 | void BoundingBox::clear(){
39 |     uniqueTruthIndex = -1;
40 |     truthFlag = 0;
41 |     maxIoU = 0;
42 | }
43 | 
44 | std::ostream& operator<<(std::ostream& os, const BoundingBox& bb){
45 |     os <<"w: "<< bb.w << ", h: "<< bb.h << ", x: "<< bb.x << ", y: "<< bb.y <<
46 |          ", cat: "<< bb.cl << ", conf: "<< bb.prob<< ", truth: "<< 
47 |          bb.truthFlag<< ", assignedGT: "<< bb.uniqueTruthIndex<< 
48 |          ", maxIoU: "<< bb.maxIoU<<"\n";
49 |     return os;
50 | }
51 | 
52 | bool boxComparison (const BoundingBox& a,const BoundingBox& b) { 
53 |     return (a.prob>b.prob); 
54 | }
55 | 
56 | }}


--------------------------------------------------------------------------------
/src/Dense.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | 
 5 | namespace tk { namespace dnn {
 6 | 
 7 | Dense::Dense(Network *net, int out_ch, std::string fname_weights) : 
 8 |     LayerWgs(net, net->getOutputDim().tot(), out_ch, 1, 1, 1, fname_weights) {
 9 | 
10 |     output_dim.n = 1;
11 |     output_dim.c = out_ch;
12 |     output_dim.h = 1;
13 |     output_dim.w = 1;
14 |     output_dim.l = 1;
15 | 
16 |     //allocate data for infer result
17 |     checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );
18 | }
19 | 
20 | Dense::~Dense() {
21 | 
22 |     checkCuda( cudaFree(dstData) );
23 | }
24 | 
25 | dnnType* Dense::infer(dataDim_t &dim, dnnType* srcData) {
26 | 
27 |     if (dim.n != 1)
28 |         FatalError("Not Implemented"); 
29 |     
30 |     int dim_x = dim.tot();
31 |     int dim_y = output_dim.tot();
32 | 
33 |     if (dim_x != input_dim.tot())
34 |         FatalError("Input mismatch");
35 | 
36 |     dnnType alpha = dnnType(1), beta = dnnType(1);
37 |     // place bias into dstData
38 |     checkCuda( cudaMemcpy(dstData, bias_d, dim_y*sizeof(dnnType), cudaMemcpyDeviceToDevice) );
39 |     
40 |     //do matrix multiplication
41 |     checkERROR( cublasSgemv(net->cublasHandle, CUBLAS_OP_T,
42 |                             dim_x, dim_y,
43 |                             &alpha,
44 |                             data_d, dim_x,
45 |                             srcData, 1,
46 |                             &beta,
47 |                             dstData, 1) );
48 | 
49 |     //update data dimensions    
50 |     dim.h = 1;
51 |     dim.w = 1;
52 |     dim.l = 1;
53 |     dim.c = dim_y;
54 | 
55 |     return dstData;
56 | }
57 | 
58 | }}
59 | 


--------------------------------------------------------------------------------
/src/Flatten.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Flatten::Flatten(Network *net) : Layer(net) {
 9 | 
10 |     checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) );
11 | 
12 |     output_dim.n = 1;
13 |     output_dim.c = input_dim.tot();
14 |     output_dim.h = 1;
15 |     output_dim.w = 1;
16 |     output_dim.l = 1;
17 | 
18 |     this->h = 1;
19 |     this->w = 1;
20 |     this->rows = input_dim.c;
21 |     this->cols = input_dim.h * input_dim.w;
22 |     this->c = input_dim.w * input_dim.h * input_dim.c;
23 | }
24 | 
25 | Flatten::~Flatten() {
26 | 
27 |     checkCuda( cudaFree(dstData) );
28 | }
29 | 
30 | dnnType* Flatten::infer(dataDim_t &dim, dnnType* srcData) {
31 | 
32 |     //transpose per channel
33 |     matrixTranspose(net->cublasHandle, srcData, dstData, dim.c, dim.h*dim.w*dim.l);
34 | 
35 |     //update data dimensions    
36 |     dim = output_dim;
37 | 
38 |     return dstData;
39 | }
40 | 
41 | }}


--------------------------------------------------------------------------------
/src/Int8Calibrator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Int8Calibrator.h"
 2 | 
 3 | Int8EntropyCalibrator::Int8EntropyCalibrator(BatchStream& stream, int firstBatch, 
 4 |                                              const std::string& calibTableFilePath,
 5 |                                              const std::string& inputBlobName,
 6 |                                              bool readCache): 
 7 |     mStream(stream), 
 8 |     mCalibTableFilePath(calibTableFilePath),
 9 |     mInputBlobName(inputBlobName.c_str()),
10 |     mReadCache(readCache) {
11 |     nvinfer1::Dims4 dims = mStream.getDims();
12 |     mInputCount = mStream.getBatchSize() + dims.d[1]*dims.d[2]*dims.d[3];
13 |     checkCuda(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
14 |     mStream.reset(firstBatch);
15 | }
16 | 
17 | bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings) NOEXCEPT {
18 |     if (!mStream.next())
19 |         return false;
20 | 
21 |     checkCuda(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
22 |     assert(!strcmp(names[0], mInputBlobName.c_str()));
23 |     bindings[0] = mDeviceInput;
24 |     return true;
25 | }
26 | 
27 | const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) NOEXCEPT {
28 |     mCalibrationCache.clear();
29 |     assert(!mCalibTableFilePath.empty());
30 |     std::ifstream input(mCalibTableFilePath, std::ios::binary);
31 |     input >> std::noskipws;
32 |     input >> std::noskipws;
33 |     if (mReadCache && input.good())
34 |         std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
35 |                   std::back_inserter(mCalibrationCache));
36 | 
37 |     length = mCalibrationCache.size();
38 |     return length ? &mCalibrationCache[0] : nullptr;
39 | }
40 | 
41 | void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) NOEXCEPT {
42 |     assert(!mCalibTableFilePath.empty());
43 |     std::ofstream output(mCalibTableFilePath, std::ios::binary);
44 |     output.write(reinterpret_cast<const char*>(cache), length);
45 |     output.close();
46 | }


--------------------------------------------------------------------------------
/src/Layer.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | 
 5 | namespace tk { namespace dnn {
 6 | 
 7 | Layer::Layer(Network *net) {
 8 | 
 9 |     this->net = net;
10 |     this->final = false;
11 |     if(net != nullptr) {
12 |         this->input_dim = net->getOutputDim();
13 |         this->output_dim = input_dim;
14 |         
15 |         checkCUDNN( cudnnCreateTensorDescriptor(&srcTensorDesc) );
16 |         checkCUDNN( cudnnCreateTensorDescriptor(&dstTensorDesc) );
17 | 
18 |         if(!net->addLayer(this))
19 |             FatalError("Net reached max number of layers");    
20 |     }
21 | 
22 |     feature_map_size = input_dim.tot() + output_dim.tot();
23 | }
24 | 
25 | Layer::~Layer() {
26 | 
27 |     checkCUDNN( cudnnDestroyTensorDescriptor(srcTensorDesc) );
28 |     checkCUDNN( cudnnDestroyTensorDescriptor(dstTensorDesc) );
29 | 
30 |     if(dstData != nullptr) {
31 |         cudaFree(dstData);
32 |         dstData = nullptr;
33 |     }
34 | }
35 | 
36 | }}


--------------------------------------------------------------------------------
/src/MulAdd.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | MulAdd::MulAdd(Network *net, dnnType mul, dnnType add) : Layer(net) {
 9 | 
10 |     this->mul = mul;
11 |     this->add = add;
12 | 
13 |     int size = input_dim.tot();
14 | 
15 |     // create a vector with all value set to add 
16 |     dnnType *add_vector_h = new dnnType[size];
17 |     for(int i=0; i<size; i++)
18 |         add_vector_h[i] = add;
19 | 
20 |     checkCuda( cudaMalloc(&add_vector, size*sizeof(dnnType)));
21 |     checkCuda( cudaMemcpy(add_vector, add_vector_h, size*sizeof(dnnType), cudaMemcpyHostToDevice));
22 |     delete [] add_vector_h;
23 | 
24 | 
25 |     checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) );
26 | }
27 | 
28 | MulAdd::~MulAdd() {
29 | 
30 |     checkCuda( cudaFree(add_vector) );
31 |     checkCuda( cudaFree(dstData) );
32 | }
33 | 
34 | dnnType* MulAdd::infer(dataDim_t &dim, dnnType* srcData) {
35 | 
36 |     matrixMulAdd(net->cublasHandle, srcData, dstData, add_vector, input_dim.tot(), mul);
37 |     
38 |     //update data dimensions    
39 |     dim = output_dim;
40 | 
41 |     return dstData;
42 | }
43 | 
44 | }}


--------------------------------------------------------------------------------
/src/Padding.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by perseusdg on 03/01/22.
 3 | //
 4 | 
 5 | #include <iostream>
 6 | #include "Layer.h"
 7 | #include "kernels.h"
 8 | 
 9 | namespace tk{ namespace dnn {
10 |     Padding::Padding(Network *net, int32_t pad_h, int32_t pad_w, tkdnnPaddingMode_t padding_mode,float constant) : Layer(net) {
11 |         this->paddingH = pad_h;
12 |         this->paddingW = pad_w;
13 |         this->padding_mode = padding_mode;
14 |         output_dim.c = input_dim.c;
15 |         output_dim.n = input_dim.n;
16 |         output_dim.h = input_dim.h + 2 * (this->paddingH);
17 |         output_dim.w = input_dim.w + 2 * (this->paddingW);
18 |         if(padding_mode == tkdnnPaddingMode_t::PADDING_MODE_CONSTANT){
19 |             this->constant = constant;
20 |         }else{
21 |             this->constant = 0;
22 |         }
23 |         checkCuda(cudaMalloc(&dstData,output_dim.tot()*sizeof(dnnType)));
24 |     }
25 | 
26 |     Padding::~Padding() {
27 |         checkCuda(cudaFree(dstData));
28 |     }
29 |     dnnType* Padding::infer(dataDim_t &dim, float *srcData) {
30 |         fill(dstData,output_dim.tot(),0.0);
31 |         if(padding_mode == tkdnnPaddingMode_t::PADDING_MODE_REFLECTION)
32 |         {
33 |             reflection_pad2d_out_forward(paddingH, paddingW, srcData, dstData, input_dim.h, input_dim.w, input_dim.c,
34 |                                          input_dim.n);
35 |         }
36 |         else if(padding_mode == tkdnnPaddingMode_t::PADDING_MODE_CONSTANT){
37 |             constant_pad2d_forward(srcData,dstData,input_dim.h,input_dim.w,output_dim.h,output_dim.w,input_dim.c,
38 |                                    input_dim.n,paddingH,paddingW,constant);
39 |         }
40 | 
41 |         dim = output_dim;
42 |         return dstData;
43 |     }
44 | 
45 | }}
46 | 


--------------------------------------------------------------------------------
/src/Reorg.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Reorg::Reorg(Network *net, int stride) : Layer(net) {
 9 | 
10 |     this->stride = stride;
11 |     
12 |     output_dim.n = input_dim.n;
13 |     output_dim.c = input_dim.c*stride*stride;
14 |     output_dim.h = input_dim.h/stride;
15 |     output_dim.w = input_dim.w/stride;
16 |     output_dim.l = input_dim.l;
17 |     
18 |     checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );
19 | }
20 | 
21 | Reorg::~Reorg() {
22 | 
23 |     checkCuda( cudaFree(dstData) );
24 | }
25 | 
26 | dnnType* Reorg::infer(dataDim_t &dim, dnnType* srcData) {
27 | 
28 |     reorgForward(srcData, dstData, dim.n, dim.c, dim.h, dim.w, stride);
29 | 
30 |     dim = output_dim;
31 |     return dstData;
32 | }
33 | 
34 | }}
35 | 


--------------------------------------------------------------------------------
/src/Reshape.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Reshape::Reshape(Network *net, dataDim_t new_dim) : Layer(net) {
 9 | 
10 |     checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) );
11 |     this->n = new_dim.n;
12 |     this->c = new_dim.c;
13 |     this->h = new_dim.h;
14 |     this->w = new_dim.w;
15 |     output_dim.n = new_dim.n;
16 |     output_dim.c = new_dim.c;
17 |     output_dim.h = new_dim.h;
18 |     output_dim.w = new_dim.w;
19 |     output_dim.l = new_dim.l;
20 | 
21 |     output_dim = new_dim;
22 | 
23 |     if(input_dim.tot() != output_dim.tot())
24 |         FatalError("Reshape dimension mismatch");
25 | 
26 | }
27 | 
28 | Reshape::~Reshape() {
29 | 
30 |     checkCuda( cudaFree(dstData) );
31 | }
32 | 
33 | dnnType* Reshape::infer(dataDim_t &dim, dnnType* srcData) {
34 | 
35 |     //just copies the data and changes the output dim
36 |     checkCuda( cudaMemcpy(dstData, srcData, dim.n*dim.c*dim.h*dim.w*sizeof(dnnType), cudaMemcpyDeviceToDevice));
37 |     dim = output_dim;
38 | 
39 |     return dstData;
40 | }
41 | 
42 | }}


--------------------------------------------------------------------------------
/src/Resize.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Resize::Resize(Network *net, int scale_c, int scale_h, int scale_w, bool fixed, ResizeMode_t mode) : Layer(net) {
 9 | 
10 |     this->mode = mode;
11 |     if(fixed){
12 |         output_dim.c = scale_c;
13 |         output_dim.h = scale_h;
14 |         output_dim.w = scale_w;
15 |     }
16 |     else{
17 |         output_dim.c *= scale_c;
18 |         output_dim.h *= scale_h;
19 |         output_dim.w *= scale_w;
20 |     }
21 | 
22 |     checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );
23 | }
24 | 
25 | Resize::~Resize() {
26 | 
27 |     checkCuda( cudaFree(dstData) );
28 | }
29 | 
30 | dnnType* Resize::infer(dataDim_t &dim, dnnType* srcData) {
31 | 
32 |     resizeForward(srcData, dstData, dim.n, dim.c, dim.h, dim.w,
33 |                    output_dim.c, output_dim.h, output_dim.w);
34 |     dim = output_dim;
35 | 
36 |     return dstData;
37 | }
38 | 
39 | }}


--------------------------------------------------------------------------------
/src/Route.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Route::Route(Network *net, Layer **layers, int layers_n, int groups, int group_id) : Layer(net) {
 9 | 
10 |     // copy input layers
11 |     if(layers_n > MAX_LAYERS) {
12 |         FatalError("ROUTE: reached max number of input layers");
13 |     }
14 |     for(int i=0; i<layers_n; i++) {
15 |         this->layers[i] = layers[i];
16 |     }
17 |     this->layers_n = layers_n;
18 |     this->groups = groups;
19 |     this->group_id = group_id;
20 |     
21 |     //get dims
22 |     output_dim.l = 1;
23 |     output_dim.c = 0;
24 |     for(int i=0; i<layers_n; i++) {
25 | 
26 |         if(i==0) {
27 |             output_dim.w = layers[i]->output_dim.w;
28 |             output_dim.h = layers[i]->output_dim.h;
29 |         } else {
30 |             if( layers[i]->output_dim.w != output_dim.w ||
31 |                 layers[i]->output_dim.h != output_dim.h   )
32 |                 FatalError("Route Output dim missmatch");
33 |         }
34 |         output_dim.c += layers[i]->output_dim.c;
35 |     }
36 | 
37 |     output_dim.c /= this->groups;
38 |     input_dim = output_dim;
39 | 
40 |     checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );
41 | }
42 | 
43 | Route::~Route() {
44 | 
45 |     checkCuda( cudaFree(dstData) );
46 | }
47 | 
48 | dnnType* Route::infer(dataDim_t &dim, dnnType* srcData) {
49 | 
50 | 
51 |     int offset = 0;
52 |     for(int i=0; i<layers_n; i++) {
53 |         dnnType *input = layers[i]->dstData;
54 |         int in_dim = layers[i]->output_dim.tot();
55 |         int part_in_dim = in_dim / this->groups;
56 |         checkCuda( cudaMemcpy(dstData + offset, input + this->group_id*part_in_dim, part_in_dim*sizeof(dnnType), cudaMemcpyDeviceToDevice));
57 |         offset += part_in_dim;
58 |     }
59 | 
60 |     //update data dimensions    
61 |     dim = output_dim;
62 | 
63 |     return dstData;
64 | }
65 | 
66 | }}


--------------------------------------------------------------------------------
/src/Shortcut.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Shortcut::Shortcut(Network *net, Layer *backLayer, bool mul) : Layer(net) {
 9 | 
10 |     this->backLayer = backLayer;
11 |     this->mul = mul;
12 |     this->c = input_dim.c;
13 |     this->h = input_dim.h;
14 |     this->w = input_dim.w;
15 |     checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );
16 | 
17 |     if( ( backLayer->output_dim.c != input_dim.c && mul ) ||
18 |         (( backLayer->output_dim.w != input_dim.w ||  backLayer->output_dim.h != input_dim.h ) && !mul ) )  
19 |         FatalError("Shortcut dim missmatch");
20 |     
21 | }
22 | 
23 | Shortcut::~Shortcut() {
24 | 
25 |     checkCuda( cudaFree(dstData) );
26 | }
27 | 
28 | dnnType* Shortcut::infer(dataDim_t &dim, dnnType* srcData) {
29 | 
30 |     dataDim_t bdim = this->backLayer->output_dim;
31 | 
32 |     checkCuda(cudaMemcpy(dstData, srcData, dim.tot()*sizeof(dnnType), cudaMemcpyDeviceToDevice));
33 |     shortcutForward(this->backLayer->dstData, dstData, dim.n, dim.c, dim.h, dim.w, 1, bdim.n, bdim.c, bdim.h, bdim.w, 1, mul);
34 | 
35 |     //update data dimensions    
36 |     dim = output_dim;
37 | 
38 |     return dstData;
39 | }
40 | 
41 | }}


--------------------------------------------------------------------------------
/src/Softmax.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Softmax::Softmax(Network *net, const tk::dnn::dataDim_t* dim, const cudnnSoftmaxMode_t mode) : Layer(net) {
 9 | 
10 |     checkCuda( cudaMalloc(&dstData, input_dim.tot()*sizeof(dnnType)) );
11 | 
12 |     this->mode = mode;
13 |     if(dim == nullptr)
14 |     {
15 |         this->dim.n= input_dim.n;
16 |         this->dim.c= input_dim.c;
17 |         this->dim.h= input_dim.h;
18 |         this->dim.w= input_dim.w;
19 |         this->dim.l= input_dim.l;
20 |     }
21 |     else
22 |     {
23 |         this->dim.n= dim->n;
24 |         this->dim.c= dim->c;
25 |         this->dim.h= dim->h;
26 |         this->dim.w= dim->w;
27 |         this->dim.l= dim->l;
28 |     }
29 | 
30 |     checkCUDNN( cudnnSetTensor4dDescriptor(srcTensorDesc,
31 |                                         net->tensorFormat,
32 |                                         net->dataType,
33 |                                         this->dim.n*this->dim.l, 
34 |                                         this->dim.c,
35 |                                         this->dim.h, this->dim.w) );
36 |     checkCUDNN( cudnnSetTensor4dDescriptor(dstTensorDesc,
37 |                                             net->tensorFormat,
38 |                                             net->dataType,
39 |                                             this->dim.n*this->dim.l, 
40 |                                             this->dim.c,
41 |                                             this->dim.h, this->dim.w) );
42 | }
43 | 
44 | Softmax::~Softmax() {
45 | 
46 |     checkCuda( cudaFree(dstData) );
47 | }
48 | 
49 | dnnType* Softmax::infer(dataDim_t &dim, dnnType* srcData) {
50 | 
51 |     dnnType alpha = dnnType(1);
52 |     dnnType beta  = dnnType(0);
53 |     checkCUDNN( cudnnSoftmaxForward(net->cudnnHandle,
54 |                                     CUDNN_SOFTMAX_ACCURATE ,
55 |                                     this->mode,
56 |                                     &alpha,
57 |                                     srcTensorDesc,
58 |                                     srcData,
59 |                                     &beta,
60 |                                     dstTensorDesc,
61 |                                     dstData) );
62 |     return dstData;
63 | }
64 | 
65 | }}
66 | 


--------------------------------------------------------------------------------
/src/Upsample.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "Layer.h"
 4 | #include "kernels.h"
 5 | 
 6 | namespace tk { namespace dnn {
 7 | 
 8 | Upsample::Upsample(Network *net, int stride) : Layer(net) {
 9 | 
10 |     this->stride = stride;
11 | 
12 |     output_dim.n = input_dim.n;
13 |     output_dim.c = input_dim.c;
14 |     output_dim.h = input_dim.h*stride;
15 |     output_dim.w = input_dim.w*stride;
16 |     output_dim.l = input_dim.l;
17 |     this->c = input_dim.c;
18 |     this->h = input_dim.h;
19 |     this->w = input_dim.w;
20 |     
21 |     checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );
22 | }
23 | 
24 | Upsample::~Upsample() {
25 | 
26 |     checkCuda( cudaFree(dstData) );
27 | }
28 | 
29 | dnnType* Upsample::infer(dataDim_t &dim, dnnType* srcData) {
30 | 
31 |     fill(dstData, output_dim.tot(), 0.0);
32 |     upsampleForward(srcData, dstData, input_dim.n, input_dim.c, input_dim.h, input_dim.w, stride, 1, 1);
33 | 
34 |     dim = output_dim;
35 |     return dstData;
36 | }
37 | 
38 | }}
39 | 


--------------------------------------------------------------------------------
/src/demo_utils.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "demo_utils.h"
 3 | 
 4 | void readCalibrationMatrix(const std::string& path, cv::Mat& calib_mat){
 5 |     YAML::Node config   = YAML::LoadFile(path);
 6 | 
 7 |     //read camera matrix
 8 |     int rows = config["camera_matrix"]["rows"].as<int>();
 9 |     int cols = config["camera_matrix"]["cols"].as<int>();
10 | 
11 |     cv::Mat calib = cv::Mat(cv::Size(rows, cols), CV_32F);
12 |     float *vals = (float *)calib.data;
13 |     
14 |     for(int i=0; i < config["camera_matrix"]["data"].size(); ++i )
15 |         vals[i] = config["camera_matrix"]["data"][i].as<float>();
16 | 
17 |     calib_mat = calib;
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/src/kernels/activation_elu.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | /**
 4 |     Exponential Linear Unit compute kernel
 5 |     it does the following operation for each x input element:
 6 |         x < 0 :    y = e^(x) -1
 7 |         x > 0 :    y = x
 8 | */
 9 | __global__
10 | void activation_elu(dnnType *input, dnnType *output, int size) {
11 | 
12 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
13 | 
14 |     if(i<size) {    
15 |         dnnType k0, k1;
16 |         
17 |         if (input[i]>0)
18 |             k0 = 1.0f;
19 |         else
20 |             k0 = 0.0f;
21 |         k1 = 1.0f-k0;
22 | 
23 |         output[i] = k0*input[i] + k1*(expf(input[i]) -1.0f);
24 |     }
25 |  }
26 | 
27 | 
28 | /**
29 |     ELU activation function
30 | */
31 | void activationELUForward(dnnType* srcData, dnnType* dstData, int size, const cudaStream_t stream)
32 | {
33 |     int blocks = (size+255)/256;
34 |     int threads = 256;
35 |     
36 |     activation_elu<<<blocks, threads, 0, stream>>>(srcData, dstData, size);
37 | }
38 | 


--------------------------------------------------------------------------------
/src/kernels/activation_leaky.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__
 4 | void activation_leaky(dnnType *input, dnnType *output, int size, float slope) {
 5 | 
 6 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
 7 | 
 8 |     if(i<size) {    
 9 |         if (input[i]>0)
10 |             output[i] = input[i];
11 |         else
12 |             output[i] = slope*input[i];
13 |     }
14 |  }
15 | 
16 | 
17 | /**
18 |     ELU activation function
19 | */
20 | void activationLEAKYForward(dnnType* srcData, dnnType* dstData, int size, float slope, cudaStream_t stream)
21 | {
22 |     int blocks = (size+255)/256;
23 |     int threads = 256;
24 |     
25 |     activation_leaky<<<blocks, threads, 0, stream>>>(srcData, dstData, size, slope);
26 | }
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/src/kernels/activation_logistic.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__
 4 | void activation_logistic(dnnType *input, dnnType *output, int size) {
 5 | 
 6 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
 7 | 
 8 |     if(i<size) {    
 9 |         output[i] =  1.0f/(1.0f + exp(-input[i]));;
10 |     }
11 |  }
12 | 
13 | 
14 | /**
15 |     LOGISTIC activation function
16 | */
17 | void activationLOGISTICForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream)
18 | {
19 |     int blocks = (size+255)/256;
20 |     int threads = 256;
21 |     
22 |     activation_logistic<<<blocks, threads, 0, stream>>>(srcData, dstData, size);
23 | }
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/kernels/activation_mish.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | #include <math.h>
 3 | 
 4 | #define MISH_THRESHOLD 20
 5 | 
 6 | __device__ 
 7 | float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);}
 8 | 
 9 | __device__ 
10 | float softplus_kernel(float x, float threshold = 20) {
11 |     if (x > threshold) return x;                // too large
12 |     else if (x < -threshold) return expf(x);    // too small
13 |     return logf(expf(x) + 1);
14 | }
15 | 
16 | 
17 | 
18 | __device__ 
19 | float mish_yashas(float x) { 
20 |     float e = __expf(x); 
21 |     if (x <= -18.0f) 
22 |         return x * e; 
23 |  
24 |     float n = e * e + 2 * e; 
25 |     if (x <= -5.0f) 
26 |         return x * __fdividef(n, n + 2); 
27 |  
28 |     return x - 2 * __fdividef(x, n + 2); 
29 | } 
30 | 
31 | // https://github.com/digantamisra98/Mish
32 | // https://github.com/AlexeyAB/darknet/blob/master/src/activation_kernels.cu
33 | __global__
34 | void activation_mish(dnnType *input, dnnType *output, int size) {
35 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
36 |     if (i < size) 
37 |         // output[i] = input[i] * tanh_activate_kernel( softplus_kernel(input[i], MISH_THRESHOLD));    
38 |         output[i] = mish_yashas(input[i]);
39 | }
40 | 
41 | /**
42 |     Mish activation function
43 | */
44 | void activationMishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream)
45 | {
46 |     int blocks = (size+255)/256;
47 |     int threads = 256;
48 |     
49 |     activation_mish<<<blocks, threads, 0, stream>>>(srcData, dstData, size);
50 | }


--------------------------------------------------------------------------------
/src/kernels/activation_relu_ceiling.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__
 4 | void activation_relu_ceiling(dnnType *input, dnnType *output, int size, const float ceiling) {
 5 | 
 6 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
 7 | 
 8 |     if(i<size) {    
 9 |         if (input[i]>0)
10 |         {
11 |             if (input[i]>ceiling)
12 |                 output[i] = ceiling;
13 |             else
14 |                 output[i] = input[i];
15 |         }
16 |         else
17 |             output[i] = 0.0f;
18 |     }
19 |  }
20 | 
21 | 
22 | /**
23 |     Relu ceiling activation function
24 | */
25 | void activationReLUCeilingForward(dnnType* srcData, dnnType* dstData, int size, const float ceiling, cudaStream_t stream)
26 | {
27 |     int blocks = (size+255)/256;
28 |     int threads = 256;
29 |     
30 |     activation_relu_ceiling<<<blocks, threads, 0, stream>>>(srcData, dstData, size, ceiling);
31 | }
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/src/kernels/activation_sigmoid.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | #include <math.h>
 3 | 
 4 | 
 5 | __global__
 6 | void activation_sigmoid(dnnType *input, dnnType *output, int size) {
 7 | 
 8 |     int i = blockDim.x * blockIdx.x + threadIdx.x;
 9 |     if(i < size)
10 |         output[i] = 1.0f / (1.0f + exp (-input[i]));
11 |  }
12 | 
13 | 
14 | /**
15 |     ELU activation function
16 | */
17 | void activationSIGMOIDForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream)
18 | {
19 |     int blocks = (size+255)/256;
20 |     int threads = 256;
21 |     
22 |     activation_sigmoid<<<blocks, threads, 0, stream>>>(srcData, dstData, size);
23 | }


--------------------------------------------------------------------------------
/src/kernels/convert.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__
 4 | void float2half_device(float *input, __half *output, int size) {
 5 | 
 6 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
 7 | 
 8 |     if(i<size) {    
 9 |         output[i] = __float2half(input[i]);
10 |     }
11 | }
12 | 
13 | 
14 | void float2half(float* srcData, __half *dstData, int size, const cudaStream_t stream)
15 | {
16 |     int blocks = (size+255)/256;
17 |     int threads = 256;
18 |     
19 |     float2half_device<<<blocks, threads, 0, stream>>>(srcData, dstData, size);
20 |     cudaDeviceSynchronize();
21 | }
22 | 


--------------------------------------------------------------------------------
/src/kernels/fill.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__
 4 | void fill_kernel(dnnType *data, int size, dnnType val) {
 5 | 
 6 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
 7 | 
 8 |     if(i<size) {    
 9 |         data[i] = val;
10 |     }
11 |  }
12 | 
13 | 
14 | void fill(dnnType* data, int size, dnnType val, cudaStream_t stream)
15 | {
16 |     int blocks = (size+255)/256;
17 |     int threads = 256;
18 |     fill_kernel<<<blocks, threads, 0, stream>>>(data, size, val);
19 | }
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/src/kernels/normalize.cu:
--------------------------------------------------------------------------------
 1 | #include "kernelsThrust.h"
 2 | 
 3 | __global__
 4 | void normalize_kernel(float *bgr, const int dim, const float *mean, const float *stddev){
 5 |     int i = blockDim.x*blockIdx.x + threadIdx.x;
 6 |     int j = blockIdx.y;
 7 |     bgr[j*(dim)+i] = bgr[j*(dim)+i] - mean[j];
 8 |     bgr[j*(dim)+i] = bgr[j*(dim)+i] / stddev[j];
 9 |     
10 | }
11 | 
12 | void normalize(float *bgr, const int ch, const int h, const int w, const float *mean, const float *stddev){
13 |     int num_thread = 256;
14 |     dim3 dimBlock(h*w/num_thread, ch);
15 |     normalize_kernel<<<dimBlock, num_thread, 0>>>(bgr, h*w, mean, stddev);
16 | }


--------------------------------------------------------------------------------
/src/kernels/pooling.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output)
 4 | {
 5 |     int h = (in_h + pad - size) / stride_y + 1;
 6 |     int w = (in_w + pad - size) / stride_x + 1;
 7 |     int c = in_c;
 8 | 
 9 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
10 |     if(id >= n) return;
11 | 
12 |     int j = id % w;
13 |     id /= w;
14 |     int i = id % h;
15 |     id /= h;
16 |     int k = id % c;
17 |     id /= c;
18 |     int b = id;
19 | 
20 |     int w_offset = -pad / 2;
21 |     int h_offset = -pad / 2;
22 | 
23 |     int out_index = j + w*(i + h*(k + c*b));
24 |     float max = -9999999;
25 |     int max_i = -1;
26 |     int l, m;
27 |     for(l = 0; l < size; ++l){
28 |         for(m = 0; m < size; ++m){
29 |             int cur_h = h_offset + i*stride_y + l;
30 |             int cur_w = w_offset + j*stride_x + m;
31 |             int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
32 |             int valid = (cur_h >= 0 && cur_h < in_h &&
33 |                     cur_w >= 0 && cur_w < in_w);
34 |             float val = (valid != 0) ? input[index] : -9999999;
35 |             max_i = (val > max) ? index : max_i;
36 |             max   = (val > max) ? val   : max;
37 |         }
38 |     }
39 |     output[out_index] = max;
40 | }
41 | 
42 | void MaxPoolingForward(dnnType* srcData, dnnType* dstData, int n, int c, int h, int w, int stride_x, int stride_y, int size, int padding, cudaStream_t stream) 
43 | {
44 | 
45 |     int tot_size = n*c*h*w;
46 | 
47 |     int blocks = (tot_size+255)/256;
48 |     int threads = 256;
49 |     
50 |     forward_maxpool_layer_kernel<<<blocks, threads, 0, stream>>>(tot_size, h, w, c, stride_x, stride_y, size, padding, srcData, dstData);
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/src/kernels/reorg.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
 4 | {
 5 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 6 |     if(i >= N) return;
 7 |     int in_index = i;
 8 |     int in_w = i%w;
 9 |     i = i/w;
10 |     int in_h = i%h;
11 |     i = i/h;
12 |     int in_c = i%c;
13 |     i = i/c;
14 |     int b = i%batch;
15 | 
16 |     int out_c = c/(stride*stride);
17 | 
18 |     int c2 = in_c % out_c;
19 |     int offset = in_c / out_c;
20 |     int w2 = in_w*stride + offset % stride;
21 |     int h2 = in_h*stride + offset / stride;
22 |     //printf("%d\n", offset);
23 |     int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
24 | 
25 |    // printf("%d %d %d\n", w2, h2, c2);
26 |     //printf("%d %d\n", in_index, out_index);
27 |     //if(out_index >= N || out_index < 0) printf("bad bad bad \n");
28 | 
29 |     if(forward) out[out_index] = x[in_index];
30 |     else out[in_index] = x[out_index];
31 |     //if(forward) out[1] = x[1];
32 |     //else out[0] = x[0];
33 | }
34 | 
35 | /**
36 |     reorg function function
37 | */
38 | void reorgForward(dnnType* srcData, dnnType* dstData, 
39 |                   int n, int c, int h, int w, int stride, cudaStream_t stream) {
40 | 
41 |     int size = n*c*h*w;
42 | 
43 |     int blocks = (size+255)/256;
44 |     int threads = 256;
45 |     
46 |     reorg_kernel<<<blocks, threads, 0, stream>>>(size, srcData, w, h, c, n, stride, false, dstData);
47 | }
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/src/kernels/resize.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | #include <stdio.h>
 3 | 
 4 | __global__ void resize_kernel(  int size,float *x, int i_w, int i_h, int i_c,  
 5 |                                 int o_w, int o_h, int o_c, int batch, float *out)
 6 | {
 7 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 8 |     if(id >= size) return;
 9 | 
10 |     int i = id % o_w;
11 |     id /= o_w;
12 |     int j = id % o_h;
13 |     id /= o_h;
14 |     int k = id % o_c;
15 |     id /= o_c;
16 |     int b = id % batch;
17 | 
18 |     int out_index = i + o_w*(j + o_h*(k + o_c*b));
19 |     int add_index = i/(o_w/i_w) + i_w*(j/(o_h/i_h) + i_h*(k + i_c*b));
20 |     out[out_index] = x[add_index];
21 | }
22 | 
23 | 
24 | void resizeForward(  dnnType* srcData, dnnType* dstData, int n, int i_c, int i_h, int i_w,
25 |      int o_c, int o_h, int o_w, cudaStream_t stream )
26 | {
27 |     int o_size = n*o_c*o_h*o_w;
28 | 
29 |     int blocks = (o_size+255)/256;
30 |     int threads = 256;
31 | 
32 |     resize_kernel<<<blocks, threads, 0, stream>>>(o_size, srcData, i_w, i_h, i_c, o_w, o_h, o_c, n, dstData);
33 | }
34 | 


--------------------------------------------------------------------------------
/src/kernels/scaladd.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | #include <math.h>
 3 | 
 4 | __global__ void scal_add_kernel(dnnType* dstData, int size, float alpha, float beta, int inc)
 5 | {
 6 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 7 |     if (i < size) dstData[i*inc] = dstData[i*inc] * alpha + beta;
 8 | }
 9 | 
10 | void scalAdd(dnnType* dstData, int size, float alpha, float beta, int inc, cudaStream_t stream)
11 | {
12 |     int blocks = (size+255)/256;
13 |     int threads = 256;
14 |     
15 |     scal_add_kernel<<<blocks, threads, 0, stream>>>(dstData, size, alpha, beta, inc);
16 | }


--------------------------------------------------------------------------------
/src/kernels/shortcut.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | #include "assert.h"
 3 | 
 4 | __global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, 
 5 |                                 int w1, int h1, int c1, dnnType *add, 
 6 |                                 int w2, int h2, int c2, float s1, float s2, dnnType *out)
 7 | {
 8 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 9 |     if (id >= size) return;
10 |     int i = id % minw;
11 |     id /= minw;
12 |     int j = id % minh;
13 |     id /= minh;
14 |     int k = id % minc;
15 |     id /= minc;
16 |     int b = id % batch;
17 | 
18 |     int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
19 |     int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
20 |     out[out_index] = s1*out[out_index] + s2*add[add_index];
21 |     //out[out_index] += add[add_index];
22 | }
23 | 
24 | __global__ void shortcut_mul_kernel(int size, int minw, int minh, int minc, int sample, int batch, 
25 |                                 int w1, int h1, int c1, dnnType *mul, 
26 |                                 int w2, int h2, int c2, float s1, float s2, dnnType *out)
27 | {
28 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
29 |     if (id >= size) return;
30 |     int i = id % minw;
31 |     id /= minw;
32 |     int j = id % minh;
33 |     id /= minh;
34 |     int k = id % minc;
35 |     id /= minc;
36 |     int b = id % batch;
37 | 
38 |     int out_index = i*sample + w1*(j*sample + h1*(k + c1*b));
39 |     out[out_index] = out[out_index] * mul[k + c2*b];
40 | }
41 | 
42 | void shortcutForward(dnnType* srcData, dnnType* dstData, int n1, int c1, int h1, int w1, int s1,
43 |                                                          int n2, int c2, int h2, int w2, int s2, 
44 |                                                          bool mul, cudaStream_t stream)
45 | {
46 |     assert(n1 == n2);
47 |     int batch = n1;
48 | 
49 |     if(!mul){
50 |         int minw = (w1 < w2) ? w1 : w2;
51 |         int minh = (h1 < h2) ? h1 : h2;
52 |         int minc = (c1 < c2) ? c1 : c2;
53 |         int stride = w1/w2;
54 |         int sample = w2/w1;
55 |         assert(stride == h1/h2);
56 |         assert(sample == h2/h1);
57 |         if(stride < 1) stride = 1;
58 |         if(sample < 1) sample = 1;
59 | 
60 |         int size = batch * minw * minh * minc;
61 |         int blocks = (size+255)/256;
62 |         int threads = 256;
63 |         
64 |         shortcut_kernel<<<blocks, threads, 0, stream>>>(size, minw, minh, minc, stride, sample, batch, 
65 |             w1, h1, c1, srcData, w2, h2, c2, s1, s2, dstData);  
66 |     } 
67 |     else{
68 |         int minw = w1;
69 |         int minh = h1;
70 |         int minc = c1;
71 |         int sample = 1;
72 | 
73 |         int size = batch * minw * minh * minc;
74 |         int blocks = (size+255)/256;
75 |         int threads = 256;
76 | 
77 |         shortcut_mul_kernel<<<blocks, threads, 0, stream>>>(size, minw, minh, minc, sample, batch, 
78 |             w1, h1, c1, srcData, w2, h2, c2, s1, s2, dstData);  
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/kernels/softmax.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __device__ void softmax_device(float *input, int n, float temp, int stride, float *output)
 4 | {
 5 |     int i;
 6 |     float sum = 0;
 7 |     float largest = -INFINITY;
 8 |     for(i = 0; i < n; ++i){
 9 |         int val = input[i*stride];
10 |         largest = (val>largest) ? val : largest;
11 |     }
12 |     for(i = 0; i < n; ++i){
13 |         float e = exp(input[i*stride]/temp - largest/temp);
14 |         sum += e;
15 |         output[i*stride] = e;
16 |     }
17 |     for(i = 0; i < n; ++i){
18 |         output[i*stride] /= sum;
19 |     }
20 | }
21 | 
22 | __global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
23 | {
24 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
25 |     if (id >= batch*groups) return;
26 |     int b = id / groups;
27 |     int g = id % groups;
28 |     softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
29 | }
30 | 
31 | /**
32 |     softmax function
33 | */
34 | void softmaxForward(float *input, int n, int batch, int batch_offset, 
35 |                     int groups, int group_offset, int stride, float temp, float *output, cudaStream_t stream)
36 | {
37 |     int size = groups*batch;
38 |     int blocks = (size+255)/256;
39 |     int threads = 256;
40 | 
41 |     softmax_kernel<<<blocks, threads, 0, stream>>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output);
42 | }
43 | 


--------------------------------------------------------------------------------
/src/kernels/upsample.cu:
--------------------------------------------------------------------------------
 1 | #include "kernels.h"
 2 | 
 3 | __global__ void upsample_kernel(size_t N, dnnType *x, int w, int h, int c, int batch, int stride, int forward, float scale, dnnType *out)
 4 | {
 5 |     size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 6 |     if(i >= N) return;
 7 |     int out_index = i;
 8 |     int out_w = i%(w*stride);
 9 |     i = i/(w*stride);
10 |     int out_h = i%(h*stride);
11 |     i = i/(h*stride);
12 |     int out_c = i%c;
13 |     i = i/c;
14 |     int b = i%batch;
15 | 
16 |     int in_w = out_w / stride;
17 |     int in_h = out_h / stride;
18 |     int in_c = out_c;
19 | 
20 |     int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w;
21 | 
22 | 
23 |     if(forward) out[out_index] += scale * x[in_index];
24 |     else atomicAdd(x+in_index, scale * out[out_index]);
25 | }
26 | 
27 | void upsampleForward(dnnType* srcData, dnnType* dstData, 
28 |                      int n, int c, int h, int w, int s, int forward, float scale,                                   
29 |                      cudaStream_t stream) {
30 | 
31 |     int size = w*h*c*n*s*s;
32 |     int blocks = (size+255)/256;
33 |     int threads = 256;
34 |     upsample_kernel<<<blocks, threads, 0, stream>>>(size, srcData, w, h, c, n, s, forward, scale, dstData);
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/backbones/dla34/env_dla34.yml:
--------------------------------------------------------------------------------
 1 | name: dla34
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex=0.1=main
 6 |   - _pytorch_select=0.2=gpu_0
 7 |   - blas=1.0=mkl
 8 |   - ca-certificates=2019.10.16=0
 9 |   - certifi=2019.9.11=py36_0
10 |   - cffi=1.13.1=py36h2e261b9_0
11 |   - cudatoolkit=10.0.130=0
12 |   - cudnn=7.6.0=cuda10.0_0
13 |   - freetype=2.9.1=h8a8886c_1
14 |   - intel-openmp=2019.4=243
15 |   - jpeg=9b=h024ee3a_2
16 |   - libedit=3.1.20181209=hc058e9b_0
17 |   - libffi=3.2.1=hd88cf55_4
18 |   - libgcc-ng=9.1.0=hdf63c60_0
19 |   - libgfortran-ng=7.3.0=hdf63c60_0
20 |   - libpng=1.6.37=hbc83047_0
21 |   - libstdcxx-ng=9.1.0=hdf63c60_0
22 |   - libtiff=4.0.10=h2733197_2
23 |   - mkl=2019.4=243
24 |   - mkl-service=2.3.0=py36he904b0f_0
25 |   - mkl_fft=1.0.14=py36ha843d7b_0
26 |   - mkl_random=1.1.0=py36hd6b4f25_0
27 |   - ncurses=6.1=he6710b0_1
28 |   - ninja=1.9.0=py36hfd86e86_0
29 |   - numpy=1.17.2=py36haad9e8e_0
30 |   - numpy-base=1.17.2=py36hde5b4d6_0
31 |   - olefile=0.46=py36_0
32 |   - openssl=1.1.1d=h7b6447c_3
33 |   - pillow=6.2.0=py36h34e0f95_0
34 |   - pip=19.3.1=py36_0
35 |   - pycparser=2.19=py36_0
36 |   - python=3.6.9=h265db76_0
37 |   - readline=7.0=h7b6447c_5
38 |   - setuptools=41.6.0=py36_0
39 |   - six=1.12.0=py36_0
40 |   - sqlite=3.30.1=h7b6447c_0
41 |   - tk=8.6.8=hbc83047_0
42 |   - wheel=0.33.6=py36_0
43 |   - xz=5.2.4=h14c3975_4
44 |   - zlib=1.2.11=h7b6447c_3
45 |   - zstd=1.3.7=h0b5b093_0
46 |   - pip:
47 |     - chardet==3.0.4
48 |     - decorator==4.4.1
49 |     - idna==2.8
50 |     - lxml==4.4.2
51 |     - networkx==2.4
52 |     - nltk==3.4.5
53 |     - pytorchcv==0.0.55
54 |     - requests==2.22.0
55 |     - summary==0.2.0
56 |     - torch==1.3.0
57 |     - torchsummary==1.5.1
58 |     - torchvision==0.4.1
59 |     - urllib3==1.25.8
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/backbones/resnet101/env_resnet101.yml:
--------------------------------------------------------------------------------
 1 | name: resnet101
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex=0.1=main
 6 |   - _pytorch_select=0.2=gpu_0
 7 |   - blas=1.0=mkl
 8 |   - ca-certificates=2019.10.16=0
 9 |   - certifi=2019.9.11=py36_0
10 |   - cffi=1.13.1=py36h2e261b9_0
11 |   - cudatoolkit=10.0.130=0
12 |   - cudnn=7.6.0=cuda10.0_0
13 |   - freetype=2.9.1=h8a8886c_1
14 |   - intel-openmp=2019.4=243
15 |   - jpeg=9b=h024ee3a_2
16 |   - libedit=3.1.20181209=hc058e9b_0
17 |   - libffi=3.2.1=hd88cf55_4
18 |   - libgcc-ng=9.1.0=hdf63c60_0
19 |   - libgfortran-ng=7.3.0=hdf63c60_0
20 |   - libpng=1.6.37=hbc83047_0
21 |   - libstdcxx-ng=9.1.0=hdf63c60_0
22 |   - libtiff=4.0.10=h2733197_2
23 |   - mkl=2019.4=243
24 |   - mkl-service=2.3.0=py36he904b0f_0
25 |   - mkl_fft=1.0.14=py36ha843d7b_0
26 |   - mkl_random=1.1.0=py36hd6b4f25_0
27 |   - ncurses=6.1=he6710b0_1
28 |   - ninja=1.9.0=py36hfd86e86_0
29 |   - numpy=1.17.2=py36haad9e8e_0
30 |   - numpy-base=1.17.2=py36hde5b4d6_0
31 |   - olefile=0.46=py36_0
32 |   - openssl=1.1.1d=h7b6447c_3
33 |   - pillow=6.2.0=py36h34e0f95_0
34 |   - pip=19.3.1=py36_0
35 |   - pycparser=2.19=py36_0
36 |   - python=3.6.9=h265db76_0
37 |   - pytorch=1.2.0=cuda100py36h938c94c_0
38 |   - readline=7.0=h7b6447c_5
39 |   - setuptools=41.6.0=py36_0
40 |   - six=1.12.0=py36_0
41 |   - sqlite=3.30.1=h7b6447c_0
42 |   - tk=8.6.8=hbc83047_0
43 |   - wheel=0.33.6=py36_0
44 |   - xz=5.2.4=h14c3975_4
45 |   - zlib=1.2.11=h7b6447c_3
46 |   - zstd=1.3.7=h0b5b093_0
47 |   - pip:
48 |     - chardet==3.0.4
49 |     - idna==2.8
50 |     - pytorchcv==0.0.55
51 |     - requests==2.22.0
52 |     - torch==1.3.0
53 |     - torchsummary==1.5.1
54 |     - torchvision==0.4.1
55 |     - urllib3==1.25.8
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/darknet/cfg/yolo2.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 |  batch=32
  7 |  subdivisions=8
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/tests/darknet/cfg/yolo2_voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | height=416
  9 | width=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 80200
 21 | policy=steps
 22 | steps=40000,60000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=125
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243 | bias_match=1
244 | classes=20
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/tests/darknet/cfg/yolo2tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Training
  3 |  batch=64
  4 |  subdivisions=8
  5 | # Testing
  6 | # batch=1
  7 | # subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | size=3
110 | stride=1
111 | pad=1
112 | filters=512
113 | activation=leaky
114 | 
115 | [convolutional]
116 | size=1
117 | stride=1
118 | pad=1
119 | filters=425
120 | activation=linear
121 | 
122 | [region]
123 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
124 | bias_match=1
125 | classes=80
126 | coords=4
127 | num=5
128 | softmax=1
129 | jitter=.2
130 | rescore=0
131 | 
132 | object_scale=5
133 | noobject_scale=1
134 | class_scale=1
135 | coord_scale=1
136 | 
137 | absolute=1
138 | thresh = .6
139 | random=1
140 | 


--------------------------------------------------------------------------------
/tests/darknet/cfg/yolo3tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/tests/darknet/cfg/yolo3tiny_512.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=512
  9 | height=512
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/tests/darknet/csresnext50-panet-spp.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "csresnext50-panet-spp";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer115_out.bin",
14 |         bin_path + "/debug/layer126_out.bin",
15 |         bin_path + "/debug/layer137_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/csresnext50-panet-spp.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/Kcs4xBozwY4wFx8/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/csresnext50-panet-spp_berkeley.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path = "csresnext50-panet-spp_berkeley";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer115_out.bin",
14 |         bin_path + "/debug/layer126_out.bin",
15 |         bin_path + "/debug/layer137_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/csresnext50-panet-spp_berkeley.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/q82qHAtqpoaFYo5/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/names/berkeley.names:
--------------------------------------------------------------------------------
 1 | person
 2 | car
 3 | truck
 4 | bus
 5 | motor
 6 | bike
 7 | rider
 8 | traffic light
 9 | traffic sign
10 | train


--------------------------------------------------------------------------------
/tests/darknet/names/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/tests/darknet/names/coco2.names:
--------------------------------------------------------------------------------
1 | person
2 | stop sign


--------------------------------------------------------------------------------
/tests/darknet/names/coco4.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | 


--------------------------------------------------------------------------------
/tests/darknet/names/crowdhuman.names:
--------------------------------------------------------------------------------
1 | person
2 | head


--------------------------------------------------------------------------------
/tests/darknet/names/flir.names:
--------------------------------------------------------------------------------
1 | person
2 | bike
3 | car
4 | 


--------------------------------------------------------------------------------
/tests/darknet/names/mmr.names:
--------------------------------------------------------------------------------
1 | blue-cone
2 | yellow-cone
3 | orange-cone
4 | big-orange-cone


--------------------------------------------------------------------------------
/tests/darknet/names/voc.names:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/tests/darknet/viz_yolo3.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include <opencv2/highgui/highgui.hpp>
 4 | #include <opencv2/imgproc/imgproc.hpp>
 5 | 
 6 | #include "tkdnn.h"
 7 | #include "test.h"
 8 | #include "DarknetParser.h"
 9 | #include "NetworkViz.h"
10 | 
11 | int main(int argc, char *argv[]) {
12 |     if(argc <2)
13 |         FatalError("you must provide an input image");
14 |     std::string input_image = argv[1];
15 |     std::string bin_path  = "yolo3";
16 |     std::string wgs_path  = bin_path + "/layers";
17 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3.cfg";
18 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
19 |     downloadWeightsifDoNotExist(wgs_path, bin_path, "https://cloud.hipert.unimore.it/s/jPXmHyptpLoNdNR/download");
20 | 
21 |     // parse darknet network
22 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
23 |     net->print();
24 | 
25 |     // input data
26 |     dnnType *input_d;
27 |     checkCuda( cudaMalloc(&input_d, sizeof(dnnType)*net->input_dim.tot()));
28 | 
29 |     // load image
30 |     cv::Mat frame, frameFloat;
31 |     frame = cv::imread(input_image);
32 |     cv::resize(frame, frame, cv::Size(net->input_dim.w, net->input_dim.h));
33 |     frame.convertTo(frameFloat, CV_32FC3, 1/255.0); 
34 | 
35 |     //split channels
36 |     cv::Mat bgr[3];
37 |     cv::split(frameFloat,bgr);//split source
38 | 
39 |     //write channels
40 |     for(int i=0; i<net->input_dim.c; i++) {
41 |         int idx = i*frameFloat.rows*frameFloat.cols;
42 |         int ch = net->input_dim.c-1 -i;
43 |         checkCuda( cudaMemcpy(input_d + idx, (void*)bgr[ch].data, frameFloat.rows*frameFloat.cols*sizeof(dnnType), cudaMemcpyHostToDevice));     
44 |     }
45 |     
46 |     tk::dnn::dataDim_t dim =  net->input_dim;     
47 |     dim.print();
48 |     std::cout<<"infer\n";
49 |     net->infer(dim, input_d);    
50 | 
51 |     // output directory
52 |     std::string output_viz = "viz/";
53 |     system( (std::string("mkdir -p ") + output_viz).c_str() );
54 | 
55 |     for(int i=0; i<net->num_layers; i++) {
56 |         std::string output_png = output_viz + "/layer" + std::to_string(i) + ".png";
57 |         std::cout<<"saving "<<output_png<<"\n";
58 |         cv::Mat viz = vizLayer2Mat(net, i);
59 |         cv::imwrite(output_png, viz);
60 |         //cv::imshow("layer", viz);
61 |         //cv::waitKey(0);
62 |     }
63 | 
64 |     checkCuda(cudaFree(input_d));
65 |     net->releaseLayers();
66 |     delete net;
67 |     return 0;
68 | }
69 | 
70 |  


--------------------------------------------------------------------------------
/tests/darknet/yolo2.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo2";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/layers/output.bin"
14 |     };
15 |     std::string wgs_path  = bin_path + "/layers";
16 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo2.cfg";
17 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
18 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/nf4PJ3k8bxBETwL/download");
19 | 
20 |     // parse darknet network
21 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
22 |     net->print();
23 | 
24 |     //convert network to tensorRT
25 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
26 |     
27 |     int ret = testInference(input_bins, output_bins, net, netRT);
28 |     net->releaseLayers();
29 |     delete net;
30 |     delete netRT;
31 |     return ret;
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo2_voc.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo2_voc";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/layers/output.bin"
14 |     };
15 |     std::string wgs_path  = bin_path + "/layers";
16 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo2_voc.cfg";
17 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/voc.names";
18 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/DJC5Fi2pEjfNDP9/download");
19 | 
20 |     // parse darknet network
21 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
22 |     net->print();
23 | 
24 |     //convert network to tensorRT
25 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
26 |     
27 |     int ret = testInference(input_bins, output_bins, net, netRT);
28 |     net->releaseLayers();
29 |     delete net;
30 |     netRT->destroy();
31 |     delete netRT;
32 |     return ret;
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo2tiny.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo2tiny";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/layers/output.bin"
14 |     };
15 |     std::string wgs_path  = bin_path + "/layers";
16 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo2tiny.cfg";
17 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
18 |     // FIXME: wrong weights
19 |     //downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s//download");
20 | 
21 |     // parse darknet network
22 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
23 |     net->print();
24 | 
25 |     //convert network to tensorRT
26 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
27 |     
28 |     int ret = testInference(input_bins, output_bins, net, netRT);
29 |     net->releaseLayers();
30 |     delete net;
31 |     netRT->destroy();
32 |     delete netRT;
33 |     return ret;
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo3.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer82_out.bin",
14 |         bin_path + "/debug/layer94_out.bin",
15 |         bin_path + "/debug/layer106_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/jPXmHyptpLoNdNR/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);    
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }


--------------------------------------------------------------------------------
/tests/darknet/yolo3_512.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3_512";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer82_out.bin",
14 |         bin_path + "/debug/layer94_out.bin",
15 |         bin_path + "/debug/layer106_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_512.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/RGecMeGLD4cXEWL/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 | //     for(int i=0; i<net->num_layers; i++) {
27 | //         if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) {
28 | //             tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i];
29 | //             c->releaseDevice();
30 | //             c->releaseHost(true, false);
31 | //         }
32 | //         if(net->layers[i]->dstData != nullptr) {
33 | //             cudaFree(net->layers[i]->dstData);
34 | //             net->layers[i]->dstData = nullptr;
35 | //         }
36 | //    }
37 | 
38 |     //convert network to tensorRT
39 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
40 |     
41 |     int ret = testInference(input_bins, output_bins, net, netRT);
42 |     net->releaseLayers();
43 |     delete net;
44 |     netRT->destroy();
45 |     delete netRT;
46 |     return ret;
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo3_berkeley.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3_berkeley";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer82_out.bin",
14 |         bin_path + "/debug/layer94_out.bin",
15 |         bin_path + "/debug/layer106_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_berkeley.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/o5cHa4AjTKS64oD/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo3_coco4.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3_coco4";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer82_out.bin",
14 |         bin_path + "/debug/layer94_out.bin",
15 |         bin_path + "/debug/layer106_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_coco4.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco4.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/o27NDzSAartbyc4/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo3_flir.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3_flir";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer82_out.bin",
14 |         bin_path + "/debug/layer94_out.bin",
15 |         bin_path + "/debug/layer106_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3_flir.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/flir.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/62DECncmF6bMMiH/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo3tiny.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3tiny";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer16_out.bin",
14 |         bin_path + "/debug/layer23_out.bin",
15 |     };
16 |     std::string wgs_path  = bin_path + "/layers";
17 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3tiny.cfg";
18 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
19 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/LMcSHtWaLeps8yN/download");
20 | 
21 |     // parse darknet network
22 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
23 |     net->print();
24 | 
25 |     //convert network to tensorRT
26 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
27 |     
28 |     int ret = testInference(input_bins, output_bins, net, netRT);
29 |     net->releaseLayers();
30 |     delete net;
31 |     netRT->destroy();
32 |     delete netRT;
33 |     return ret;
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo3tiny_512.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo3tiny_512";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer16_out.bin",
14 |         bin_path + "/debug/layer23_out.bin",
15 |     };
16 |     std::string wgs_path  = bin_path + "/layers";
17 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo3tiny_512.cfg";
18 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
19 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/8Zt6bHwHADqP4JC/download");
20 | 
21 |     // parse darknet network
22 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
23 |     net->print();
24 | 
25 | //     for(int i=0; i<net->num_layers; i++) {
26 | //         if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) {
27 | //             tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i];
28 | //             c->releaseDevice();
29 | //             c->releaseHost(true, false);
30 | //         }
31 | //         if(net->layers[i]->dstData != nullptr) {
32 | //             cudaFree(net->layers[i]->dstData);
33 | //             net->layers[i]->dstData = nullptr;
34 | //         }
35 | //    }
36 | 
37 | 
38 |     //convert network to tensorRT
39 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
40 |     
41 |     int ret = testInference(input_bins, output_bins, net, netRT);
42 |     net->releaseLayers();
43 |     delete net;
44 |     netRT->destroy();
45 |     delete netRT;
46 |     return ret;
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4-csp.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4-csp";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer144_out.bin",
14 |         bin_path + "/debug/layer159_out.bin",
15 |         bin_path + "/debug/layer174_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4-csp.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/AfzHE4BfTeEm2gH/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }


--------------------------------------------------------------------------------
/tests/darknet/yolo4-csp_crowd.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4-csp_crowd";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer144_out.bin",
14 |         bin_path + "/debug/layer159_out.bin",
15 |         bin_path + "/debug/layer174_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4-csp_crowd.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/crowdhuman.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/RKWfWNmWXfJigsK/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     delete netRT;
33 |     return ret;
34 | }


--------------------------------------------------------------------------------
/tests/darknet/yolo4.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = "../tests/darknet/cfg/yolo4.cfg";
19 |     std::string name_path = "../tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/982LxTQcNQfFQc4/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_320.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_320";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = "../tests/darknet/cfg/yolo4_320.cfg";
19 |     std::string name_path = "../tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/64PHAwrM6RCZbiR/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_320_coco2.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_320_coco2";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = "../tests/darknet/cfg/yolo4_320_coco2.cfg";
19 |     std::string name_path = "../tests/darknet/names/coco2.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/f3wk99iG5y7tEr8/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_512.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_512";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = "../tests/darknet/cfg/yolo4_512.cfg";
19 |     std::string name_path = "../tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/fjFDqFmiSARKxFe/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 | //     for(int i=0; i<net->num_layers; i++) {
27 | //         if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) {
28 | //             tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i];
29 | //             c->releaseDevice();
30 | //             c->releaseHost(true, false);
31 | //         }
32 | //         if(net->layers[i]->dstData != nullptr) {
33 | //             cudaFree(net->layers[i]->dstData);
34 | //             net->layers[i]->dstData = nullptr;
35 | //         }
36 | //    }
37 | 
38 | 
39 |     //convert network to tensorRT
40 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
41 |     
42 |     int ret = testInference(input_bins, output_bins, net, netRT);
43 |     net->releaseLayers();
44 |     delete net;
45 |     netRT->destroy();
46 |     delete netRT;
47 |     return ret;
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_608.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_608";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = "../tests/darknet/cfg/yolo4_608.cfg";
19 |     std::string name_path = "../tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/Bg9r7kqDFJiFB4c/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_berkeley.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_berkeley";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4_berkeley.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/nkWFa5fgb4NTdnB/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_berkeley_f1.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_berkeley_f1";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4_berkeley.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/berkeley.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/q9dwoqQ5YQqEi7s/download");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4_mmr.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4_mmr";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer139_out.bin",
14 |         bin_path + "/debug/layer150_out.bin",
15 |         bin_path + "/debug/layer161_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4_mmr.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/mmr.names";
20 |     // downloadWeightsifDoNotExist(input_bins[0], bin_path, "");
21 | 
22 |     // parse darknet network
23 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
24 |     net->print();
25 | 
26 |     //convert network to tensorRT
27 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
28 |     
29 |     int ret = testInference(input_bins, output_bins, net, netRT);
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4tiny.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4tiny";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer30_out.bin",
14 |         bin_path + "/debug/layer37_out.bin"
15 |     };
16 |     std::string wgs_path  = bin_path + "/layers";
17 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4tiny.cfg";
18 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
19 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/iRnc4pSqmx78gJs/download");
20 | 
21 |     // parse darknet network
22 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
23 |     net->print();
24 | 
25 |     //convert network to tensorRT
26 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
27 |     
28 |     int ret = testInference(input_bins, output_bins, net, netRT);
29 |     std::cout<<ret<<std::endl;
30 |     net->releaseLayers();
31 |     delete net;
32 |     netRT->destroy();
33 |     delete netRT;
34 |     return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4tiny_512.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4tiny_512";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer30_out.bin",
14 |         bin_path + "/debug/layer37_out.bin"
15 |     };
16 |     std::string wgs_path  = bin_path + "/layers";
17 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4tiny_512.cfg";
18 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
19 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/qa2ws4GXg7mS5nN/download");
20 | 
21 |     // parse darknet network
22 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
23 |     net->print();
24 | 
25 | //         for(int i=0; i<net->num_layers; i++) {
26 | //         if(net->layers[i]->getLayerType() == tk::dnn::LAYER_CONV2D) {
27 | //             tk::dnn::Conv2d *c = (tk::dnn::Conv2d*) net->layers[i];
28 | //             c->releaseDevice();
29 | //             c->releaseHost(true, false);
30 | //         }
31 | //         if(net->layers[i]->dstData != nullptr) {
32 | //             cudaFree(net->layers[i]->dstData);
33 | //             net->layers[i]->dstData = nullptr;
34 | //         }
35 | //    }
36 | 
37 |     //convert network to tensorRT
38 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
39 |     
40 |     int ret = testInference(input_bins, output_bins, net, netRT);
41 |     net->releaseLayers();
42 |     delete net;
43 |     netRT->destroy();
44 |     delete netRT;
45 |     return ret;
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/darknet/yolo4x.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<vector>
 3 | #include "tkdnn.h"
 4 | #include "test.h"
 5 | #include "DarknetParser.h"
 6 | 
 7 | int main() {
 8 |     std::string bin_path  = "yolo4x";
 9 |     std::vector<std::string> input_bins = { 
10 |         bin_path + "/layers/input.bin"
11 |     };
12 |     std::vector<std::string> output_bins = {
13 |         bin_path + "/debug/layer168_out.bin",
14 |         bin_path + "/debug/layer185_out.bin",
15 |         bin_path + "/debug/layer202_out.bin"
16 |     };
17 |     std::string wgs_path  = bin_path + "/layers";
18 |     std::string cfg_path  = std::string(TKDNN_PATH) + "/tests/darknet/cfg/yolo4x.cfg";
19 |     std::string name_path = std::string(TKDNN_PATH) + "/tests/darknet/names/coco.names";
20 |     downloadWeightsifDoNotExist(input_bins[0], bin_path, "https://cloud.hipert.unimore.it/s/5MFjtNtgbDGdJEo/download");
21 | 
22 | 
23 | 
24 |     // parse darknet network
25 |     tk::dnn::Network *net = tk::dnn::darknetParser(cfg_path, wgs_path, name_path);
26 |     net->print();
27 | 
28 |     //convert network to tensorRT
29 |     tk::dnn::NetworkRT *netRT = new tk::dnn::NetworkRT(net, net->getNetworkRTName(bin_path.c_str()));
30 |     
31 |     int ret = testInference(input_bins, output_bins, net, netRT);
32 |     net->releaseLayers();
33 |     delete net;
34 |     netRT->destroy();
35 |     delete netRT;
36 |     return ret;
37 | }
38 | 


--------------------------------------------------------------------------------
/tests/exporters/caffe_weights_exporter.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import msgpack
 4 | import lmdb
 5 | import random
 6 | import caffe
 7 | import numpy as np
 8 | 
 9 | if __name__ == '__main__':
10 |     parser = argparse.ArgumentParser(description='CAFFE WEIGHTS EXPORTER TO CUDNN')
11 |     parser.add_argument('model',type=str,
12 |         help='Path to prototxt network model')
13 |     parser.add_argument('weights',type=str,
14 |         help='Path to caffemodel file')
15 |         
16 |     parser.add_argument('--output', type=str, help="output directory", default="layers")
17 | 
18 |     args = parser.parse_args()
19 | 
20 |     if not os.path.exists(args.output):
21 |         os.makedirs(args.output)
22 | 
23 |     print "\n\n ====== NET LOADED ====== "
24 |     net = caffe.Net(args.model, args.weights, caffe.TEST)
25 |     n_lay = len(net.params)
26 |     print "Number of layers: ", n_lay
27 |     for i in xrange(n_lay):
28 |         key = net.params.keys()[i]
29 |         print "Layer", key
30 |         t = net.layer_dict[key].type 
31 |         print "    type: ", t
32 |         w = net.params[key][0].data
33 |         b = net.params[key][1].data 
34 |         print "    weights shape:", np.shape(w)
35 |         print "    bias shape:", np.shape(b)
36 |         
37 |         w.tofile(args.output + "/" + t + str(i) + ".bin", format="f")
38 |         b.tofile(args.output + "/" + t + str(i) + ".bias.bin", format="f")
39 | 


--------------------------------------------------------------------------------
/tests/imuodom/imuodom.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include "tkDNN/ImuOdom.h"
 3 | 
 4 | const char *i0_bin   = "imuodom/layers/input0.bin";
 5 | const char *i1_bin   = "imuodom/layers/input1.bin";
 6 | const char *i2_bin   = "imuodom/layers/input2.bin";
 7 | const char *o0_bin   = "imuodom/layers/output0.bin";
 8 | const char *o1_bin   = "imuodom/layers/output1.bin";
 9 | 
10 | int main() {
11 | 
12 |     // V1 
13 |     downloadWeightsifDoNotExist(i0_bin, "imuodom", "https://cloud.hipert.unimore.it/s/ZAy34K5w2ixED6x/download");
14 |     
15 |     // V2
16 |     //downloadWeightsifDoNotExist(i0_bin, "imuodom", "https://cloud.hipert.unimore.it/s/BBSEbEbQbPKxp4s/download");
17 |     
18 |     tk::dnn::ImuOdom ImuNet;
19 |     ImuNet.init("imuodom/layers/");
20 | 
21 |     const int N = 19513;
22 | 
23 |     // Network layout
24 |     tk::dnn::dataDim_t dim0(1, 4, 1, 100);
25 |     tk::dnn::dataDim_t dim1(1, 3, 1, 100);
26 |     tk::dnn::dataDim_t dim2(1, 3, 1, 100);
27 | 
28 |     // Load input
29 |     dnnType *i0_d, *i1_d, *i2_d;
30 |     dnnType *i0_h, *i1_h, *i2_h;
31 |     readBinaryFile(i0_bin, dim0.tot()*N, &i0_h, &i0_d);
32 |     readBinaryFile(i1_bin, dim1.tot()*N, &i1_h, &i1_d);
33 |     readBinaryFile(i2_bin, dim2.tot()*N, &i2_h, &i2_d);
34 | 
35 |     dnnType *data;
36 |     tk::dnn::dataDim_t dim;
37 | 
38 |     dnnType *out0, *out1;
39 |     dnnType *out0_h, *out1_h;
40 |     readBinaryFile(o0_bin, ImuNet.odim0.tot()*N, &out0_h, &out0);
41 |     readBinaryFile(o1_bin, ImuNet.odim1.tot()*N, &out1_h, &out1);
42 | 
43 |     
44 |     std::ofstream path("path.txt");
45 | 
46 |     int ret_cudnn = 0; 
47 |     for(int i=0; i<N; i++) {
48 |         std::cout<<"i: "<<i<<"\n";
49 |         //TKDNN_TSTART
50 |         // Inference
51 |         ImuNet.update(i0_h, i1_h, i2_h);
52 |         //TKDNN_TSTOP
53 | 
54 |         // log path
55 |         path<<ImuNet.odomPOS(0)<<" "<<ImuNet.odomPOS(1)<<" "<< ImuNet.odomPOS(2)<<" ";
56 |         path<<ImuNet.odomEULER(0)<<" "<<ImuNet.odomEULER(1)<<" "<< ImuNet.odomEULER(2)<<"\n";
57 | 
58 |         path.flush();
59 | 
60 |         // Print real test
61 |         printCenteredTitle( (std::string(" CHECK RESULT ") + std::to_string(i) + " ").c_str() , '=');
62 |         ImuNet.odim0.print();
63 |         ret_cudnn |= checkResult(ImuNet.odim0.tot(), out0, ImuNet.o0_d) == 0 ? 0 : ERROR_CUDNN;
64 |         ImuNet.odim1.print();
65 |         ret_cudnn |= checkResult(ImuNet.odim0.tot(), out1, ImuNet.o1_d) == 0 ? 0 : ERROR_CUDNN;
66 | 
67 |         i0_h += ImuNet.dim0.tot();
68 |         i1_h += ImuNet.dim1.tot();
69 |         i2_h += ImuNet.dim2.tot();
70 |         out0 += ImuNet.odim0.tot();
71 |         out1 += ImuNet.odim1.tot();
72 |     }
73 | 
74 |     int err = 0;
75 |     err = system("cat path.txt  | cut -d\" \" -f1,2 | gnuplot -p -e \"set datafile separator ' '; plot '-'\"");
76 |     err = system("cat path.txt  | cut -d\" \" -f6   | gnuplot -p -e \"set datafile separator ' '; plot '-'\"");
77 |     return ret_cudnn;
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/imuodom/infer.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | from keras.models import load_model
 3 | import keras.backend.tensorflow_backend as KTF
 4 | import numpy as np
 5 | import argparse
 6 | import tensorflow as tf
 7 | import os
 8 | import random
 9 | import struct
10 | from keras.models import Sequential, Model
11 | import pickle
12 | 
13 | def bin_write(f, data):
14 |     data = data.flatten()
15 |     fmt = 'f'*len(data)
16 |     bin = struct.pack(fmt, *data)
17 |     f.write(bin)
18 | 
19 | # USE weight_exporter to generare wgs bins
20 | if __name__ == '__main__':
21 |     
22 | 
23 |     print("DATA FORMAT: ", keras.backend.image_data_format())
24 | 
25 |     print("Load model: ", "ferrariSEP.hdf5")
26 |     model = load_model("ferrariSEP.hdf5")
27 |     model.summary()
28 |     weights = model.get_weights()
29 | 
30 |     indata = pickle.load(open("input.pk", 'rb'))
31 |     outdata = pickle.load(open("output.pk", 'rb'))
32 | 
33 |     x_angle = indata[0]
34 |     x_gyro = indata[1]
35 |     x_acc = indata[2]
36 |     
37 |     [yhat_delta_p, yhat_delta_q] = model.predict(indata, batch_size=1, verbose=1)
38 |     predictdata = [yhat_delta_p, yhat_delta_q]
39 | 
40 |     error = outdata[0] - predictdata[0]
41 |     print("error delta_p: ", error.sum())
42 |     error = outdata[1] - predictdata[1]
43 |     print("error delta_q: ", error.sum())
44 | 
45 | 
46 |     #layer_name = 'dense_4'
47 |     #intermediate_layer_model = Model(inputs=model.input,
48 |     #                                 outputs=model.get_layer(layer_name).output)
49 |     #intermediate_output = intermediate_layer_model.predict([x_angle, x_gyro, x_acc])
50 | 
51 | 
52 |     x_angle = np.array([x_angle])
53 |     x_gyro = np.array([x_gyro])
54 |     x_acc = np.array([x_acc])
55 |     #intermediate_output = np.array([intermediate_output])
56 | 
57 |     x_angle =  x_angle.transpose(1, 3, 0, 2)
58 |     x_gyro =  x_gyro.transpose(1, 3, 0, 2)
59 |     x_acc =  x_acc.transpose(1, 3, 0, 2)
60 |     #intermediate_output = intermediate_output.transpose(0, 3, 1, 2)
61 |     #print("Aggregate:")
62 |     #print(intermediate_output.tolist())
63 |  
64 |     print("x0: ", np.shape(x_angle))
65 |     #print("out: ",np.shape(intermediate_output))
66 | 
67 |     x_angle = np.array(x_angle.flatten(), dtype=np.float32)
68 |     x_gyro = np.array(x_gyro.flatten(), dtype=np.float32)
69 |     x_acc = np.array(x_acc.flatten(), dtype=np.float32)
70 |     yhat_delta_p = np.array(yhat_delta_p.flatten(), dtype=np.float32)
71 |     yhat_delta_q = np.array(yhat_delta_q.flatten(), dtype=np.float32)
72 |     #intermediate_output = np.array(intermediate_output.flatten(), dtype=np.float32)
73 | 
74 | 
75 |     f = open("layers/input0.bin", mode='wb')
76 |     bin_write(f, x_angle)
77 |     f = open("layers/input1.bin", mode='wb')
78 |     bin_write(f, x_gyro)
79 |     f = open("layers/input2.bin", mode='wb')
80 |     bin_write(f, x_acc)
81 |     f = open("layers/output0.bin", mode='wb')
82 |     bin_write(f, yhat_delta_p)
83 |     f = open("layers/output1.bin", mode='wb')
84 |     bin_write(f, yhat_delta_q)
85 |     #f = open("layers/output.bin", mode='wb')
86 |     #bin_write(f, intermediate_output)
87 | 
88 | 


--------------------------------------------------------------------------------
/tests/mnist/mnist_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # mail:    admin@9crk.com
 3 | # author:  9crk.from China.ShenZhen
 4 | # time:    2017-03-22
 5 | 
 6 | import caffe
 7 | import numpy as np
 8 | import cv2
 9 | import sys
10 | import Image
11 | import matplotlib.pyplot as plt
12 | 
13 | model = 'lenet.prototxt';
14 | weights = 'lenet.caffemodel';
15 | net = caffe.Net(model,weights,caffe.TEST);
16 | caffe.set_mode_gpu()
17 | img = np.array(np.random.rand(28,28), dtype=np.float32)
18 | #revert the image,and normalize it to 0-1 range
19 | 
20 | print "INPUT: ", img
21 | img.tofile("input.bin", format="f")
22 | print "SHAPE: ", np.shape(img)
23 | out = net.forward_all(data=np.asarray([img]))
24 | 
25 | out = out[out.keys()[0]]
26 | print out
27 | print np.shape(out)
28 | out.tofile("output.bin", format="f")
29 | #print out['prob'][0]
30 | #print out['prob'][0].argmax()
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/tests/mnist/test_mnist.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include "tkdnn.h"
 3 | 
 4 | const char *input_bin   = "mnist/input.bin";
 5 | const char *c0_bin      = "mnist/layers/c0.bin";
 6 | const char *c1_bin      = "mnist/layers/c1.bin";
 7 | const char *d2_bin      = "mnist/layers/d2.bin";
 8 | const char *d3_bin      = "mnist/layers/d3.bin";
 9 | const char *output_bin   = "mnist/output.bin";
10 | 
11 | int main() {
12 | 
13 |     downloadWeightsifDoNotExist(input_bin, "mnist", "https://cloud.hipert.unimore.it/s/2TyQkMJL3LArLAS/download");
14 | 
15 |     // Network layout
16 |     tk::dnn::dataDim_t dim(1, 1, 28, 28, 1);
17 |     tk::dnn::Network net(dim);
18 |     tk::dnn::Conv2d     l0(&net, 20, 5, 5, 1, 1, 0, 0, c0_bin);
19 |     tk::dnn::Pooling    l1(&net, 2, 2, 2, 2, 0, 0, tk::dnn::POOLING_MAX);
20 |     tk::dnn::Conv2d     l2(&net, 50, 5, 5, 1, 1, 0, 0, c1_bin);
21 |     tk::dnn::Pooling    l3(&net, 2, 2, 2, 2, 0, 0, tk::dnn::POOLING_MAX);
22 |     tk::dnn::Dense      l4(&net, 500, d2_bin);
23 |     tk::dnn::Activation l5(&net, tk::dnn::ACTIVATION_LEAKY);
24 |     tk::dnn::Dense      l6(&net, 10, d3_bin);
25 |     tk::dnn::Softmax    l7(&net);
26 | 
27 |     tk::dnn::NetworkRT netRT(&net, net.getNetworkRTName("mnist"));
28 | 
29 |     // Load input
30 |     dnnType *data;
31 |     dnnType *input_h;
32 |     readBinaryFile(input_bin, dim.tot(), &input_h, &data);
33 | 
34 |     dnnType *out_data, *out_data2;
35 | 
36 |     std::cout<<"CUDNN inference:\n"; {
37 |         dim.print(); //print initial dimension  
38 |         TKDNN_TSTART
39 |         out_data = net.infer(dim, data);    
40 |         TKDNN_TSTOP
41 |         dim.print();   
42 |     }
43 | 
44 |     // Print result
45 |     //std::cout<<"\n======= CUDNN RESULT =======\n";
46 |     //printDeviceVector(10, out_data);
47 |  
48 |     tk::dnn::dataDim_t dim2(1, 1, 28, 28, 1);
49 | 
50 |     std::cout<<"TENSORRT inference:\n"; {
51 |         dim2.print();
52 |         TKDNN_TSTART
53 |         out_data2 = netRT.infer(dim2, data);
54 |         TKDNN_TSTOP
55 |         dim2.print();
56 |     }
57 | 
58 |     // Print result
59 |     //std::cout<<"\n======= TENRT RESULT =======\n";
60 |     //printDeviceVector(10, out_data);
61 | 
62 |     std::cout<<"\n======= CHECK RESULT =======\n";
63 |     int ret_tensorrt = checkResult(dim.tot(), out_data, out_data2) == 0 ? 0 : ERROR_TENSORRT;
64 | 
65 |  /*
66 |     // Print real test
67 |     std::cout<<"\n==== CHECK RESULT ====\n";
68 |     dnnType *out;
69 |     dnnType *out_h;
70 |     readBinaryFile(output_bin, dim.tot(), &out_h, &out);
71 |     printDeviceVector(dim.tot(), out);
72 | */ 
73 |     return ret_tensorrt;
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/simple/test_model.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | import numpy as np
 3 | from keras.models import Sequential
 4 | from keras.layers import Input, Dense, Activation, Flatten, Dropout, ELU, Reshape, Lambda, Conv1D
 5 | from keras.layers import Bidirectional, CuDNNLSTM
 6 | from keras.layers.convolutional import Convolution2D, Convolution3D
 7 | from keras.layers.pooling import MaxPooling2D, MaxPooling3D, AveragePooling3D
 8 | from keras.models import Sequential, Model
 9 | from keras.layers import Cropping2D
10 | import keras.backend.tensorflow_backend as KTF
11 | import struct
12 | from keras.models import Sequential, Model
13 | 
14 | def bin_write(f, data):
15 |     data = data.flatten()
16 |     fmt = 'f'*len(data)
17 |     bin = struct.pack(fmt, *data)
18 |     f.write(bin)
19 | 
20 | def create_model():
21 |     x1 = Input((3, 8), name='x1')
22 |     conv = Conv1D(4, 2)(x1)
23 |     lstm = Bidirectional(CuDNNLSTM(5, return_sequences=True))(conv)    
24 |     lstm2 = Bidirectional(CuDNNLSTM(5, return_sequences=False))(lstm)    
25 |     model = Model([x1], [lstm2])
26 |     model.summary()
27 | 
28 |     return model
29 | 
30 | if __name__ == '__main__':
31 |     print ("DATA FORMAT: ", keras.backend.image_data_format())
32 | 
33 |     model = create_model()
34 |     model.save("net.hdf5")
35 | 
36 |     np.random.seed(2)
37 |     x = np.random.rand(1,1,3,8)
38 |     r = model.predict( x[0], batch_size=1)
39 | 
40 |     r = np.array([r])
41 |     x =  x.transpose(0, 3, 1, 2)
42 |     #r =  r.transpose(0, 3, 1, 2)
43 |     print("in: ", np.shape(x))
44 |     print("out: ", np.shape(r))
45 |     print("output: ", r.tolist())
46 | 
47 |     x = np.array(x.flatten(), dtype=np.float32)
48 |     f = open("input.bin", mode='wb')
49 |     bin_write(f, x)
50 | 
51 |     r = np.array(r.flatten(), dtype=np.float32)
52 |     f = open("output.bin", mode='wb')
53 |     bin_write(f, r)
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/simple/test_simple.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include "tkdnn.h"
 3 | 
 4 | const char *input_bin   = "simple/input.bin";
 5 | const char *c0_bin      = "simple/layers/conv1d_1.bin";
 6 | const char *l1_bin      = "simple/layers/bidirectional_1.bin";
 7 | const char *l2_bin      = "simple/layers/bidirectional_2.bin";
 8 | const char *output_bin  = "simple/output.bin";
 9 | 
10 | int main() {
11 | 
12 |     // Network layout
13 |     tk::dnn::dataDim_t dim(1, 8, 1, 3);
14 |     tk::dnn::Network net(dim);
15 |     tk::dnn::Conv2d     l0(&net, 4, 1, 2, 1, 1, 0, 0, c0_bin);
16 |     tk::dnn::LSTM       l1(&net, 5, true, l1_bin);
17 |     tk::dnn::LSTM       l2(&net, 5, false, l2_bin);
18 | 
19 |     net.print();
20 | 
21 |     net.print();
22 | 
23 |     // Load input
24 |     dnnType *data;
25 |     dnnType *input_h;
26 |     readBinaryFile(input_bin, dim.tot(), &input_h, &data);
27 | 
28 |     // Print input
29 |     std::cout<<"\n======= INPUT =======\n";
30 |     printDeviceVector(dim.tot(), data);
31 |     std::cout<<"\n";
32 | 
33 |     //convert network to tensorRT
34 |     tk::dnn::NetworkRT netRT(&net, net.getNetworkRTName("simple"));
35 | 
36 |     dnnType *out_data, *out_data2; // cudnn output, tensorRT output
37 | 
38 |     tk::dnn::dataDim_t dim1 = dim; //input dim
39 |     printCenteredTitle(" CUDNN inference ", '=', 30); {
40 |         dim1.print();
41 |         TKDNN_TSTART
42 |         out_data = net.infer(dim1, data);
43 |         TKDNN_TSTOP
44 |         dim1.print();
45 |     }
46 | 
47 |     tk::dnn::dataDim_t dim2 = dim;
48 |     printCenteredTitle(" TENSORRT inference ", '=', 30); {
49 |         dim2.print();
50 |         TKDNN_TSTART
51 |         out_data2 = netRT.infer(dim2, data);
52 |         TKDNN_TSTOP
53 |         dim2.print();
54 |     }
55 | 
56 |     std::cout<<"\n======= CUDNN =======\n";
57 |     printDeviceVector(dim.tot(), out_data);
58 |     std::cout<<"\n======= TENSORRT =======\n";
59 |     printDeviceVector(dim.tot(), out_data2);
60 | 
61 |     printCenteredTitle(" CHECK RESULTS ", '=', 30);
62 |     dnnType *out, *out_h;
63 |     int out_dim = net.getOutputDim().tot();
64 |     //readBinaryFile(output_bin, out_dim, &out_h, &out);
65 |     // std::cout<<"CUDNN vs correct"; 
66 |     // int ret_cudnn = checkResult(out_dim, out_data, out) == 0 ? 0: ERROR_CUDNN;
67 |     // std::cout<<"TRT   vs correct"; 
68 |     // int ret_tensorrt = checkResult(out_dim, out_data2, out) == 0 ? 0 : ERROR_TENSORRT;
69 |     std::cout<<"CUDNN vs TRT    "; 
70 |     int ret_cudnn_tensorrt = checkResult(out_dim, out_data, out_data2) == 0 ? 0 : ERROR_CUDNNvsTENSORRT;
71 | 
72 |     return ret_cudnn_tensorrt;
73 | }
74 | 


--------------------------------------------------------------------------------
/tests/test_rtinference/rtinference.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<algorithm>
 3 | #include "tkDNN/tkdnn.h"
 4 | #include <stdlib.h>     /* srand, rand */
 5 | 
 6 | 
 7 | int main(int argc, char *argv[]) {
 8 | 
 9 |     if(argc < 2 || !fileExist(argv[1]))
10 |         FatalError("unable to read serialRT file");
11 | 
12 |     int BATCH_SIZE = 1;
13 |     if(argc >2)
14 |         BATCH_SIZE = atoi(argv[2]);
15 | 
16 |     int NTEST = 100;
17 |     if(argc >3)
18 |         NTEST = atoi(argv[3]);
19 | 
20 |     //always same test 
21 |     srand (0); 
22 | 
23 |     //convert network to tensorRT
24 |     tk::dnn::NetworkRT netRT(NULL, argv[1]);
25 | 
26 |     
27 |     
28 |     tk::dnn::dataDim_t idim = netRT.input_dim;
29 |     tk::dnn::dataDim_t odim = netRT.output_dim;
30 |     idim.n = BATCH_SIZE;
31 |     odim.n = BATCH_SIZE;
32 |     dnnType *input = new float[idim.tot()];
33 |     dnnType *output = new float[odim.tot()];
34 |     dnnType *input_d;
35 |     checkCuda( cudaMalloc(&input_d, idim.tot()*sizeof(dnnType)));
36 | 
37 |     int ret_tensorrt = 0; 
38 |     std::cout<<"Testing with batchsize: "<<BATCH_SIZE<<"\n";
39 |     std::vector<double> stats;
40 |     printCenteredTitle(" TENSORRT inference ", '=', 30); 
41 |     float total_time = 0;
42 |     for(int i=0; (NTEST > 0 ? i<NTEST : true) ; i++) {
43 | 
44 |         // generate input
45 |         for(int j=0; j<netRT.input_dim.tot(); j++) {
46 |             dnnType val = ((float) rand() / (RAND_MAX));
47 |             for(int b=0; b<BATCH_SIZE; b++)
48 |                 input[netRT.input_dim.tot()*b + j] = val;
49 |         }
50 |         checkCuda(cudaMemcpy(input_d, input, idim.tot()*sizeof(dnnType), cudaMemcpyHostToDevice));
51 | 
52 |         tk::dnn::dataDim_t dim = idim;
53 |         TKDNN_TSTART
54 |         netRT.infer(dim, input_d);
55 |         TKDNN_TSTOP
56 |         total_time+= t_ns;
57 |         if(i> 1)
58 |         stats.push_back(t_ns);
59 | 
60 |         // control output
61 |         // std::cout<<"Output Buffers: "<<netRT.getBuffersN()-1<<"\n";
62 |         std::cout<<"Img: "<<i<<"\n";
63 | 	for(int o=1; o<netRT.getBuffersN(); o++) {
64 |             for(int b=1; b<BATCH_SIZE; b++) {
65 |                 dnnType *out_d = (dnnType*) netRT.buffersRT[o];
66 |                 dnnType *out0_d = out_d;
67 |                 dnnType *outI_d = out_d + netRT.buffersDIM[o].tot()*b;
68 |                 ret_tensorrt |= checkResult(netRT.buffersDIM[o].tot(), outI_d, out0_d,true, 10, false) == 0 ? 0 : ERROR_TENSORRT;
69 |             }
70 |         }
71 |     }
72 | 
73 |     double min = *std::min_element(stats.begin(), stats.end()); ///BATCH_SIZE;
74 |     double max = *std::max_element(stats.begin(), stats.end()); ///BATCH_SIZE;
75 |     double mean =0;
76 |     for(int i=0; i<stats.size(); i++) mean += stats[i]; mean /= stats.size();
77 |     //mean /=BATCH_SIZE;
78 | 
79 |     std::cout<<"Min: "<<min<<" ms\n";    
80 |     std::cout<<"Max: "<<max<<" ms\n";    
81 |     std::cout<<"Avg: "<<mean<<" ms\t"<<1000/(mean)<<" FPS\n"<<COL_END;   
82 | 
83 | 
84 |     std::ofstream times;
85 |     times.open("times_rtinference.csv", std::ios_base::app);
86 | 
87 |     std::string net_name;
88 |     removePathAndExtension(argv[1], net_name);
89 | 
90 |     times << net_name<< "_" << BATCH_SIZE << ";" << mean << ";" << min << ";" << max << ";" << 1000./mean << "\n";
91 | 
92 |     times.close();
93 | 
94 |     return ret_tensorrt;
95 | }
96 | 
97 | 


--------------------------------------------------------------------------------