├── .gitignore
├── LICENSE
├── code
    ├── CMakeLists.txt
    ├── include
    │   ├── EntroyCalibrator.h
    │   ├── PluginFactory.h
    │   ├── TrtNet.h
    │   ├── UpsampleLayer.h
    │   ├── Utils.h
    │   ├── YoloConfigs.h
    │   └── YoloLayer.h
    └── src
    │   ├── EntroyCalibrator.cpp
    │   ├── TrtNet.cpp
    │   ├── UpsampleLayer.cpp
    │   ├── UpsampleLayer.cu
    │   └── YoloLayer.cu
├── readme.md
└── sample
    ├── CMakeLists.txt
    ├── argsParser.h
    ├── configs.h
    ├── dataReader.cpp
    ├── dataReader.h
    ├── eval.cpp
    ├── eval.h
    ├── evalClassifNet.cpp
    ├── runNet.cpp
    └── runTwoNets.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | ## General
2 | # Visual Studio Code files
3 | .vscode
4 | 
5 | # build
6 | build
7 | 
8 | # install
9 | install


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 lewes6369
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/code/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | project(trtNet)
 3 | 
 4 | set(CMAKE_BUILD_TYPE Release)
 5 | 
 6 | #include
 7 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
 8 | 
 9 | #src
10 | set(PLUGIN_SOURCES
11 |   src/EntroyCalibrator.cpp
12 |   src/UpsampleLayer.cpp
13 |   src/UpsampleLayer.cu
14 |   src/YoloLayer.cu
15 |   src/TrtNet.cpp
16 | )
17 | 
18 | #
19 | # CUDA Configuration
20 | #
21 | find_package(CUDA REQUIRED)
22 | 
23 | set(CUDA_VERBOSE_BUILD ON)
24 | 
25 | # Specify the cuda host compiler to use the same compiler as cmake.
26 | set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
27 | 
28 | # TensorRT
29 | find_path(TENSORRT_INCLUDE_DIR NvInfer.h
30 |   HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
31 |   PATH_SUFFIXES include)
32 | MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
33 | find_library(TENSORRT_LIBRARY_INFER nvinfer
34 |   HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
35 |   PATH_SUFFIXES lib lib64 lib/x64)
36 | find_library(TENSORRT_LIBRARY_INFER_PLUGIN nvinfer_plugin
37 |   HINTS  ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
38 |   PATH_SUFFIXES lib lib64 lib/x64)
39 |   find_library(TENSORRT_LIBRARY_PARSER nvparsers
40 |   HINTS  ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
41 |   PATH_SUFFIXES lib lib64 lib/x64)
42 | set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_PARSER})
43 | MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
44 | find_package_handle_standard_args(
45 |   TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIBRARY)
46 | if(NOT TENSORRT_FOUND)
47 |   message(ERROR
48 |     "Cannot find TensorRT library.")
49 | endif()
50 | 
51 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")	# -std=gnu++11
52 | 
53 | list(APPEND CUDA_NVCC_FLAGS "-D_FORCE_INLINES -Xcompiler -fPIC")
54 | CUDA_INCLUDE_DIRECTORIES(${CUDNN_INCLUDE_DIR} ${TENSORRT_INCLUDE_DIR})
55 | CUDA_ADD_LIBRARY(TrtNet STATIC ${PLUGIN_SOURCES})
56 | 
57 | target_include_directories(TrtNet PUBLIC ${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDNN_INCLUDE_DIR})
58 | target_link_libraries(TrtNet ${TENSORRT_LIBRARY})


--------------------------------------------------------------------------------
/code/include/EntroyCalibrator.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ENTROY_CALIBRATOR_H
 2 | #define _ENTROY_CALIBRATOR_H
 3 | 
 4 | #include <cudnn.h>
 5 | #include <string>
 6 | #include <vector>
 7 | #include "NvInfer.h"
 8 | #include "Utils.h"
 9 | 
10 | namespace nvinfer1
11 | {
12 | 
13 | class Int8EntropyCalibrator : public IInt8EntropyCalibrator
14 | {
15 | public:
16 | 	Int8EntropyCalibrator(int BatchSize,const std::vector<std::vector<float>>& data,const std::string& CalibDataName = "",bool readCache = true);
17 | 
18 | 	virtual ~Int8EntropyCalibrator();
19 | 
20 | 	int getBatchSize() const override { return mBatchSize; }
21 | 
22 | 	bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
23 | 
24 | 	const void* readCalibrationCache(size_t& length) override;
25 | 
26 | 	void writeCalibrationCache(const void* cache, size_t length) override;
27 | 
28 | private:
29 | 	std::string mCalibDataName;
30 | 	std::vector<std::vector<float>> mDatas;
31 | 	int mBatchSize;
32 | 
33 | 	int mCurBatchIdx;
34 | 	float* mCurBatchData{ nullptr };
35 | 	
36 | 	size_t mInputCount;
37 | 	bool mReadCache;
38 | 	void* mDeviceInput{ nullptr };
39 | 
40 | 	std::vector<char> mCalibrationCache;
41 | };
42 | 
43 | }	//namespace
44 | 
45 | #endif //_ENTROY_CALIBRATOR_H
46 | 


--------------------------------------------------------------------------------
/code/include/PluginFactory.h:
--------------------------------------------------------------------------------
  1 | #ifndef __PLUGIN_FACTORY_H_
  2 | #define __PLUGIN_FACTORY_H_
  3 | 
  4 | #include <vector>
  5 | #include <memory>
  6 | #include <regex>
  7 | #include "UpsampleLayer.h"
  8 | #include "YoloLayer.h"
  9 | #include "NvInferPlugin.h"
 10 | #include "NvCaffeParser.h"
 11 | 
 12 | namespace Tn
 13 | {
 14 |     static constexpr float NEG_SLOPE = 0.1;
 15 |     static constexpr float UPSAMPLE_SCALE = 2.0;
 16 |     static constexpr int CUDA_THREAD_NUM = 512;
 17 | 
 18 |     // Integration for serialization.
 19 |     using nvinfer1::plugin::INvPlugin;
 20 |     using nvinfer1::plugin::createPReLUPlugin;
 21 |     using nvinfer1::UpsampleLayerPlugin;
 22 |     using nvinfer1::YoloLayerPlugin;
 23 |     class PluginFactory : public nvinfer1::IPluginFactory, public nvcaffeparser1::IPluginFactoryExt
 24 |     {
 25 |         public:
 26 |             inline bool isLeakyRelu(const char* layerName)
 27 |             {
 28 |                 return std::regex_match(layerName , std::regex(R"(layer(\d*)-act)"));
 29 |             }
 30 | 
 31 |             inline bool isUpsample(const char* layerName)
 32 |             {
 33 |                 return std::regex_match(layerName , std::regex(R"(layer(\d*)-upsample)"));
 34 |             }
 35 | 
 36 |             inline bool isYolo(const char* layerName)
 37 |             {
 38 |                 return strcmp(layerName,"yolo-det") == 0;
 39 |             }
 40 | 
 41 |             virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override
 42 |             {
 43 |                 assert(isPlugin(layerName));
 44 | 
 45 |                 if(isLeakyRelu(layerName))
 46 |                 {
 47 |                     assert(nbWeights == 0 && weights == nullptr);
 48 |                     mPluginLeakyRelu.emplace_back(std::unique_ptr<INvPlugin, void(*)(INvPlugin*)>(createPReLUPlugin(NEG_SLOPE), nvPluginDeleter));
 49 |                     return mPluginLeakyRelu.back().get();
 50 |                 }
 51 |                 else if (isUpsample(layerName))
 52 |                 {
 53 |                     assert(nbWeights == 0 && weights == nullptr);
 54 |                     mPluginUpsample.emplace_back(std::unique_ptr<UpsampleLayerPlugin>(new UpsampleLayerPlugin(UPSAMPLE_SCALE,CUDA_THREAD_NUM)));
 55 |                     return mPluginUpsample.back().get();
 56 |                 }
 57 |                 else if (isYolo(layerName))
 58 |                 {
 59 |                     assert(nbWeights == 0 && weights == nullptr && mPluginYolo.get() ==  nullptr);
 60 |                     mPluginYolo.reset(new YoloLayerPlugin(CUDA_THREAD_NUM));
 61 |                     return mPluginYolo.get();
 62 |                 }
 63 |                 else
 64 |                 {
 65 |                 assert(0);
 66 |                 return nullptr;
 67 |                 }
 68 |             }
 69 | 
 70 |         nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override
 71 |         {
 72 |             assert(isPlugin(layerName));
 73 | 
 74 |             if (isLeakyRelu(layerName))
 75 |             {
 76 |                 mPluginLeakyRelu.emplace_back(std::unique_ptr<INvPlugin, void (*)(INvPlugin*)>(createPReLUPlugin(serialData, serialLength), nvPluginDeleter));
 77 |                 return mPluginLeakyRelu.back().get();
 78 |             }
 79 |             else if (isUpsample(layerName))
 80 |             {
 81 |                 mPluginUpsample.emplace_back(std::unique_ptr<UpsampleLayerPlugin>(new UpsampleLayerPlugin(serialData, serialLength)));
 82 |                 return mPluginUpsample.back().get();
 83 |             }
 84 |             else if (isYolo(layerName))
 85 |             {
 86 |                 assert(mPluginYolo.get() ==  nullptr);
 87 |                 mPluginYolo.reset(new YoloLayerPlugin(serialData, serialLength));
 88 |                 return mPluginYolo.get();
 89 |             }
 90 |             else
 91 |             {
 92 |                 assert(0);
 93 |                 return nullptr;
 94 |             }
 95 |         }
 96 | 
 97 | 
 98 |         bool isPlugin(const char* name) override
 99 |         {
100 |             return isPluginExt(name);
101 |         }
102 | 
103 |         bool isPluginExt(const char* name) override
104 |         {
105 |             //std::cout << "check plugin " << name  << isYolo(name)<< std::endl;
106 |             return isLeakyRelu(name) || isUpsample(name) || isYolo(name); 
107 |         }
108 | 
109 |         // The application has to destroy the plugin when it knows it's safe to do so.
110 |         void destroyPlugin()
111 |         {
112 |             for (auto& item : mPluginLeakyRelu)
113 |                 item.reset();
114 | 
115 |             for (auto& item : mPluginUpsample)
116 |                 item.reset();
117 | 
118 |             mPluginYolo.reset();
119 |         }
120 | 
121 |         void (*nvPluginDeleter)(INvPlugin*){[](INvPlugin* ptr) { if(ptr) ptr->destroy(); }};
122 | 
123 |         std::vector<std::unique_ptr<INvPlugin,void (*)(INvPlugin*)>> mPluginLeakyRelu{};
124 |         std::vector<std::unique_ptr<UpsampleLayerPlugin>> mPluginUpsample{};
125 |         std::unique_ptr<YoloLayerPlugin> mPluginYolo {nullptr};
126 |     };
127 | }
128 | 
129 | #endif


--------------------------------------------------------------------------------
/code/include/TrtNet.h:
--------------------------------------------------------------------------------
  1 | #ifndef __TRT_NET_H_
  2 | #define __TRT_NET_H_
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | #include <algorithm>
  7 | #include <fstream>
  8 | #include <numeric>
  9 | #include "NvInferPlugin.h"
 10 | #include "NvCaffeParser.h"
 11 | #include "PluginFactory.h"
 12 | #include "Utils.h"
 13 | 
 14 | namespace Tn
 15 | {
 16 |     enum class RUN_MODE
 17 |     {
 18 |         FLOAT32 = 0,
 19 |         FLOAT16 = 1,    
 20 |         INT8 = 2
 21 |     };
 22 | 
 23 |     class trtNet 
 24 |     {
 25 |         public:
 26 |             //Load from caffe model
 27 |             trtNet(const std::string& prototxt,const std::string& caffeModel,const std::vector<std::string>& outputNodesName,
 28 |                     const std::vector<std::vector<float>>& calibratorData, RUN_MODE mode = RUN_MODE::FLOAT32 , int maxBatchSize = 1);
 29 |         
 30 |             //Load from engine file
 31 |             explicit trtNet(const std::string& engineFile);
 32 | 
 33 |             ~trtNet()
 34 |             {
 35 |                 // Release the stream and the buffers
 36 |                 cudaStreamSynchronize(mTrtCudaStream);
 37 |                 cudaStreamDestroy(mTrtCudaStream);
 38 |                 for(auto& item : mTrtCudaBuffer)
 39 |                     cudaFree(item);
 40 | 
 41 |                 mTrtPluginFactory.destroyPlugin();
 42 | 
 43 |                 if(!mTrtRunTime)
 44 |                     mTrtRunTime->destroy();
 45 |                 if(!mTrtContext)
 46 |                     mTrtContext->destroy();
 47 |                 if(!mTrtEngine)
 48 |                     mTrtEngine->destroy();
 49 |             };
 50 | 
 51 |             void saveEngine(std::string fileName)
 52 |             {
 53 |                 if(mTrtEngine)
 54 |                 {
 55 |                     nvinfer1::IHostMemory* data = mTrtEngine->serialize();
 56 |                     std::ofstream file;
 57 |                     file.open(fileName,std::ios::binary | std::ios::out);
 58 |                     if(!file.is_open())
 59 |                     {
 60 |                         std::cout << "read create engine file" << fileName <<" failed" << std::endl;
 61 |                         return;
 62 |                     }
 63 | 
 64 |                     file.write((const char*)data->data(), data->size());
 65 |                     file.close();
 66 |                 }
 67 |             };
 68 | 
 69 |             void doInference(const void* inputData, void* outputData,int batchSize = 1);
 70 |             
 71 |             inline size_t getInputSize() {
 72 |                 return std::accumulate(mTrtBindBufferSize.begin(), mTrtBindBufferSize.begin() + mTrtInputCount,0);
 73 |             };
 74 | 
 75 |             inline size_t getOutputSize() {
 76 |                 return std::accumulate(mTrtBindBufferSize.begin() + mTrtInputCount, mTrtBindBufferSize.end(),0);
 77 |             };
 78 |             
 79 |             void printTime()
 80 |             {
 81 |                 mTrtProfiler.printLayerTimes(mTrtIterationTime);
 82 |             }
 83 |             
 84 |             inline int getBatchSize() {return mTrtBatchSize;};
 85 | 
 86 |         private:
 87 |                 nvinfer1::ICudaEngine* loadModelAndCreateEngine(const char* deployFile, const char* modelFile,int maxBatchSize,
 88 |                                         nvcaffeparser1::ICaffeParser* parser, nvcaffeparser1::IPluginFactory* pluginFactory,
 89 |                                         nvinfer1::IInt8Calibrator* calibrator, nvinfer1::IHostMemory*& trtModelStream,const std::vector<std::string>& outputNodesName);
 90 | 
 91 |                 void InitEngine();
 92 | 
 93 |                 nvinfer1::IExecutionContext* mTrtContext;
 94 |                 nvinfer1::ICudaEngine* mTrtEngine;
 95 |                 nvinfer1::IRuntime* mTrtRunTime;
 96 |                 PluginFactory mTrtPluginFactory;    
 97 |                 cudaStream_t mTrtCudaStream;
 98 |                 Profiler mTrtProfiler;
 99 |                 RUN_MODE mTrtRunMode;
100 | 
101 |                 std::vector<void*> mTrtCudaBuffer;
102 |                 std::vector<int64_t> mTrtBindBufferSize;
103 |                 int mTrtInputCount;
104 |                 int mTrtIterationTime;
105 |                 int mTrtBatchSize;
106 |     };
107 | }
108 | 
109 | #endif //__TRT_NET_H_
110 | 


--------------------------------------------------------------------------------
/code/include/UpsampleLayer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _UPSAMPLE_LAYER_H
 2 | #define _UPSAMPLE_LAYER_H
 3 | 
 4 | #include <assert.h>
 5 | #include <cmath>
 6 | #include <string.h>
 7 | #include <cudnn.h>
 8 | #include <cublas_v2.h>
 9 | #include "NvInfer.h"
10 | #include "Utils.h"
11 | #include <iostream>
12 | 
13 | namespace nvinfer1
14 | {
15 |     class UpsampleLayerPlugin: public IPluginExt
16 |     {
17 |     public:
18 |         explicit UpsampleLayerPlugin(const float scale, const int cudaThread = 512);
19 |         // create the plugin at runtime from a byte stream
20 |         UpsampleLayerPlugin(const void* data, size_t length);
21 | 
22 |         ~UpsampleLayerPlugin();
23 | 
24 |         int getNbOutputs() const override
25 |         {
26 |             return 1;
27 |         }
28 | 
29 |         Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
30 | 
31 |         bool supportsFormat(DataType type, PluginFormat format) const override { 
32 |             //std::cout << "supportsFormat=== type:"  << int(type) << "format" << int(format) << std::endl;
33 |             return (type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kINT8 ) 
34 |             && format == PluginFormat::kNCHW; 
35 |         }
36 | 
37 |         void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override;
38 | 
39 |         int initialize() override;
40 | 
41 |         virtual void terminate() override {
42 |         };
43 | 
44 |         virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
45 | 
46 |         virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
47 | 
48 |         virtual size_t getSerializationSize() override
49 |         {
50 |             return sizeof(nvinfer1::Dims) + sizeof(mDataType) + sizeof(mScale)
51 |             + sizeof(mOutputWidth) + sizeof(mOutputHeight) + sizeof(mThreadCount);
52 |         }
53 | 
54 |         virtual void serialize(void* buffer) override;
55 | 
56 |         template <typename Dtype>
57 |         void forwardGpu(const Dtype* input,Dtype * outputint ,int N,int C,int H ,int W);
58 | 
59 |     private:
60 |         nvinfer1::Dims mCHW;
61 |         DataType mDataType{DataType::kFLOAT};
62 |         float mScale;
63 |         int mOutputWidth;
64 |         int mOutputHeight;
65 |         int mThreadCount;
66 |         
67 |         void* mInputBuffer  {nullptr}; 
68 |         void* mOutputBuffer {nullptr}; 
69 |     };
70 | };
71 | 
72 | #endif 
73 | 


--------------------------------------------------------------------------------
/code/include/Utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef __TRT_UTILS_H_
 2 | #define __TRT_UTILS_H_
 3 | 
 4 | #include <iostream>
 5 | #include <vector>
 6 | #include <algorithm>
 7 | #include <cudnn.h>
 8 | 
 9 | #ifndef CUDA_CHECK
10 | 
11 | #define CUDA_CHECK(callstr)                                                                    \
12 |     {                                                                                          \
13 |         cudaError_t error_code = callstr;                                                      \
14 |         if (error_code != cudaSuccess) {                                                       \
15 |             std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
16 |             assert(0);                                                                         \
17 |         }                                                                                      \
18 |     }
19 | 
20 | #endif
21 | 
22 | namespace Tn
23 | {
24 |     class Profiler : public nvinfer1::IProfiler
25 |     {
26 |     public:
27 |         void printLayerTimes(int itrationsTimes)
28 |         {
29 |             float totalTime = 0;
30 |             for (size_t i = 0; i < mProfile.size(); i++)
31 |             {
32 |                 printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
33 |                 totalTime += mProfile[i].second;
34 |             }
35 |             printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
36 |         }
37 |     private:
38 |         typedef std::pair<std::string, float> Record;
39 |         std::vector<Record> mProfile;
40 | 
41 |         virtual void reportLayerTime(const char* layerName, float ms)
42 |         {
43 |             auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
44 |             if (record == mProfile.end())
45 |                 mProfile.push_back(std::make_pair(layerName, ms));
46 |             else
47 |                 record->second += ms;
48 |         }
49 |     };
50 | 
51 |     //Logger for TensorRT info/warning/errors
52 |     class Logger : public nvinfer1::ILogger
53 |     {
54 |     public:
55 | 
56 |         Logger(): Logger(Severity::kWARNING) {}
57 | 
58 |         Logger(Severity severity): reportableSeverity(severity) {}
59 | 
60 |         void log(Severity severity, const char* msg) override
61 |         {
62 |             // suppress messages with severity enum value greater than the reportable
63 |             if (severity > reportableSeverity) return;
64 | 
65 |             switch (severity)
66 |             {
67 |                 case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
68 |                 case Severity::kERROR: std::cerr << "ERROR: "; break;
69 |                 case Severity::kWARNING: std::cerr << "WARNING: "; break;
70 |                 case Severity::kINFO: std::cerr << "INFO: "; break;
71 |                 default: std::cerr << "UNKNOWN: "; break;
72 |             }
73 |             std::cerr << msg << std::endl;
74 |         }
75 | 
76 |         Severity reportableSeverity{Severity::kWARNING};
77 |     };
78 | 
79 |     template<typename T> 
80 |     void write(char*& buffer, const T& val)
81 |     {
82 |         *reinterpret_cast<T*>(buffer) = val;
83 |         buffer += sizeof(T);
84 |     }
85 | 
86 |     template<typename T> 
87 |     void read(const char*& buffer, T& val)
88 |     {
89 |         val = *reinterpret_cast<const T*>(buffer);
90 |         buffer += sizeof(T);
91 |     }
92 | }
93 | 
94 | #endif


--------------------------------------------------------------------------------
/code/include/YoloConfigs.h:
--------------------------------------------------------------------------------
 1 | #ifndef _YOLO_CONFIGS_H_
 2 | #define _YOLO_CONFIGS_H_
 3 | 
 4 | 
 5 | namespace Yolo
 6 | {
 7 |     static constexpr int CHECK_COUNT = 3;
 8 |     static constexpr float IGNORE_THRESH = 0.5f;
 9 |     static constexpr int CLASS_NUM = 80;
10 | 
11 |     struct YoloKernel
12 |     {
13 |         int width;
14 |         int height;
15 |         float anchors[CHECK_COUNT*2];
16 |     };
17 | 
18 |     //YOLO 608
19 |     YoloKernel yolo1 = {
20 |         19,
21 |         19,
22 |         {116,90,  156,198,  373,326}
23 |     };
24 |     YoloKernel yolo2 = {
25 |         38,
26 |         38,
27 |         {30,61,  62,45,  59,119}
28 |     };
29 |     YoloKernel yolo3 = {
30 |         76,
31 |         76,
32 |         {10,13,  16,30,  33,23}
33 |     };
34 | 
35 |     //YOLO 416
36 |     // YoloKernel yolo1 = {
37 |     //     13,
38 |     //     13,
39 |     //     {116,90,  156,198,  373,326}
40 |     // };
41 |     // YoloKernel yolo2 = {
42 |     //     26,
43 |     //     26,
44 |     //     {30,61,  62,45,  59,119}
45 |     // };
46 |     // YoloKernel yolo3 = {
47 |     //     52,
48 |     //     52,
49 |     //     {10,13,  16,30,  33,23}
50 |     // };
51 | }
52 | 
53 | #endif


--------------------------------------------------------------------------------
/code/include/YoloLayer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _YOLO_LAYER_H
 2 | #define _YOLO_LAYER_H
 3 | 
 4 | #include <assert.h>
 5 | #include <cmath>
 6 | #include <string.h>
 7 | #include <cudnn.h>
 8 | #include <cublas_v2.h>
 9 | #include "NvInfer.h"
10 | #include "Utils.h"
11 | #include <iostream>
12 | 
13 | namespace Yolo
14 | {
15 |     struct YoloKernel;
16 | 
17 |     static constexpr int LOCATIONS = 4;
18 |     struct alignas(float) Detection{
19 |         //x y w h
20 |         float bbox[LOCATIONS];
21 |         //float objectness;
22 |         int classId;
23 |         float prob;
24 |     };
25 | }
26 | 
27 | 
28 | namespace nvinfer1
29 | {
30 |     class YoloLayerPlugin: public IPluginExt
31 |     {
32 |     public:
33 |         explicit YoloLayerPlugin(const int cudaThread = 512);
34 |         YoloLayerPlugin(const void* data, size_t length);
35 | 
36 |         ~YoloLayerPlugin();
37 | 
38 |         int getNbOutputs() const override
39 |         {
40 |             return 1;
41 |         }
42 | 
43 |         Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
44 | 
45 |         bool supportsFormat(DataType type, PluginFormat format) const override { 
46 |             return type == DataType::kFLOAT && format == PluginFormat::kNCHW; 
47 |         }
48 | 
49 |         void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override {};
50 | 
51 |         int initialize() override;
52 | 
53 |         virtual void terminate() override {};
54 | 
55 |         virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
56 | 
57 |         virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
58 | 
59 |         virtual size_t getSerializationSize() override;
60 | 
61 |         virtual void serialize(void* buffer) override;
62 | 
63 |         void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1);
64 | 
65 |         void forwardCpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1);
66 | 
67 |     private:
68 |         int mClassCount;
69 |         int mKernelCount;
70 |         std::vector<Yolo::YoloKernel> mYoloKernel;
71 |         int mThreadCount;
72 | 
73 |         //cpu
74 |         void* mInputBuffer  {nullptr}; 
75 |         void* mOutputBuffer {nullptr}; 
76 |     };
77 | };
78 | 
79 | #endif 
80 | 


--------------------------------------------------------------------------------
/code/src/EntroyCalibrator.cpp:
--------------------------------------------------------------------------------
 1 | #include "EntroyCalibrator.h"
 2 | #include <fstream>
 3 | #include <iterator>
 4 | #include <cassert>
 5 | #include <string.h>
 6 | #include <algorithm>
 7 | 
 8 | namespace nvinfer1
 9 | {
10 |     Int8EntropyCalibrator::Int8EntropyCalibrator(int BatchSize,const std::vector<std::vector<float>>& data,
11 |                                             const std::string& CalibDataName /*= ""*/,bool readCache /*= true*/)
12 |         : mCalibDataName(CalibDataName),mBatchSize(BatchSize),mReadCache(readCache)
13 |     {     
14 |         mDatas.reserve(data.size());
15 |         mDatas = data;
16 | 
17 |         mInputCount =  BatchSize * data[0].size();
18 |         mCurBatchData = new float[mInputCount];
19 |         mCurBatchIdx = 0;
20 |         CUDA_CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
21 |     }
22 | 
23 | 
24 |     Int8EntropyCalibrator::~Int8EntropyCalibrator()
25 |     {
26 |         CUDA_CHECK(cudaFree(mDeviceInput));
27 |         if(mCurBatchData)
28 |             delete[] mCurBatchData;
29 |     }
30 | 
31 | 
32 |     bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
33 |     {
34 |         if (mCurBatchIdx + mBatchSize > int(mDatas.size())) 
35 |              return false;
36 | 
37 |         float* ptr = mCurBatchData;
38 |         size_t imgSize = mInputCount / mBatchSize;
39 |         auto iter = mDatas.begin() + mCurBatchIdx;
40 | 
41 |         std::for_each(iter, iter + mBatchSize, [=,&ptr](std::vector<float>& val){
42 |             assert(imgSize == val.size());
43 |             memcpy(ptr,val.data(),imgSize*sizeof(float));
44 |             
45 |             ptr += imgSize;
46 |         });
47 | 
48 |         CUDA_CHECK(cudaMemcpy(mDeviceInput, mCurBatchData, mInputCount * sizeof(float), cudaMemcpyHostToDevice));
49 |         //std::cout << "input name " << names[0] << std::endl;
50 |         bindings[0] = mDeviceInput;
51 | 
52 |         std::cout << "load batch " << mCurBatchIdx << " to " << mCurBatchIdx + mBatchSize - 1 << std::endl;        
53 |         mCurBatchIdx += mBatchSize;
54 |         return true;
55 |     }
56 | 
57 |     const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
58 |     {
59 |         mCalibrationCache.clear();
60 |         std::ifstream input(mCalibDataName+".calib", std::ios::binary);
61 |         input >> std::noskipws;
62 |         if (mReadCache && input.good())
63 |             std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
64 | 
65 |         length = mCalibrationCache.size();
66 |         return length ? &mCalibrationCache[0] : nullptr;
67 |     }
68 | 
69 |     void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
70 |     {
71 |         std::ofstream output(mCalibDataName+".calib", std::ios::binary);
72 |         output.write(reinterpret_cast<const char*>(cache), length);
73 |     }
74 | 
75 | }


--------------------------------------------------------------------------------
/code/src/TrtNet.cpp:
--------------------------------------------------------------------------------
  1 | #include "TrtNet.h"
  2 | #include "EntroyCalibrator.h"
  3 | #include <cassert>
  4 | #include <chrono>
  5 | #include <cublas_v2.h>
  6 | #include <cudnn.h>
  7 | #include <iostream>
  8 | #include <sstream>
  9 | #include <string.h>
 10 | #include <time.h>
 11 | #include <unordered_map>
 12 | 
 13 | using namespace nvinfer1;
 14 | using namespace nvcaffeparser1;
 15 | using namespace plugin;
 16 | 
 17 | static Tn::Logger gLogger;
 18 | 
 19 | #define RETURN_AND_LOG(ret, severity, message)                                 \
 20 |     do                                                                         \
 21 |     {                                                                          \
 22 |         std::string error_message = "ssd_error_log: " + std::string(message); \
 23 |         gLogger.log(ILogger::Severity::k##severity, error_message.c_str());    \
 24 |         return (ret);                                                          \
 25 |     } while (0)
 26 | 
 27 | inline void* safeCudaMalloc(size_t memSize)
 28 | {
 29 |     void* deviceMem;
 30 |     CUDA_CHECK(cudaMalloc(&deviceMem, memSize));
 31 |     if (deviceMem == nullptr)
 32 |     {
 33 |         std::cerr << "Out of memory" << std::endl;
 34 |         exit(1);
 35 |     }
 36 |     return deviceMem;
 37 | }
 38 | 
 39 | inline int64_t volume(const nvinfer1::Dims& d)
 40 | {
 41 |     return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
 42 | }
 43 | 
 44 | inline unsigned int getElementSize(nvinfer1::DataType t)
 45 | {
 46 |     switch (t)
 47 |     {
 48 |         case nvinfer1::DataType::kINT32: return 4;
 49 |         case nvinfer1::DataType::kFLOAT: return 4;
 50 |         case nvinfer1::DataType::kHALF: return 2;
 51 |         case nvinfer1::DataType::kINT8: return 1;
 52 |     }
 53 |     throw std::runtime_error("Invalid DataType.");
 54 |     return 0;
 55 | }
 56 | 
 57 | namespace Tn
 58 | {
 59 |     trtNet::trtNet(const std::string& prototxt,const std::string& caffemodel,const std::vector<std::string>& outputNodesName,
 60 |                     const std::vector<std::vector<float>>& calibratorData,RUN_MODE mode /*= RUN_MODE::FLOAT32*/,int maxBatchSize /*= 1*/)
 61 |     :mTrtContext(nullptr),mTrtEngine(nullptr),mTrtRunTime(nullptr),mTrtRunMode(mode),mTrtInputCount(0),mTrtIterationTime(0),mTrtBatchSize(maxBatchSize)
 62 |     {
 63 |         std::cout << "init plugin proto: " << prototxt << " caffemodel: " << caffemodel << std::endl;
 64 |         auto parser = createCaffeParser();
 65 | 
 66 |         IHostMemory* trtModelStream{nullptr};
 67 | 
 68 |         Int8EntropyCalibrator * calibrator = nullptr;
 69 |         if (calibratorData.size() > 0 ){
 70 |             auto endPos= prototxt.find_last_of(".");
 71 | 	        auto beginPos= prototxt.find_last_of('/') + 1;
 72 |             std::string calibratorName = prototxt.substr(beginPos,endPos - beginPos);
 73 |             std::cout << "create calibrator,Named:" << calibratorName << std::endl;
 74 |             calibrator = new Int8EntropyCalibrator(maxBatchSize,calibratorData,calibratorName);
 75 |         }
 76 | 
 77 |         PluginFactory pluginFactorySerialize;
 78 |         ICudaEngine* tmpEngine = loadModelAndCreateEngine(prototxt.c_str(),caffemodel.c_str(), maxBatchSize, parser, &pluginFactorySerialize, calibrator, trtModelStream,outputNodesName);
 79 |         assert(tmpEngine != nullptr);
 80 |         assert(trtModelStream != nullptr);
 81 |         if(calibrator){
 82 |             delete calibrator;
 83 |             calibrator = nullptr;
 84 |         }
 85 |         tmpEngine->destroy();
 86 |         pluginFactorySerialize.destroyPlugin();
 87 | 
 88 |         mTrtRunTime = createInferRuntime(gLogger);     
 89 |         assert(mTrtRunTime != nullptr);
 90 |         mTrtEngine= mTrtRunTime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), &mTrtPluginFactory);
 91 |         assert(mTrtEngine != nullptr);
 92 |         // Deserialize the engine.
 93 |         trtModelStream->destroy();
 94 | 
 95 |         InitEngine();
 96 |     }
 97 | 
 98 |     trtNet::trtNet(const std::string& engineFile)
 99 |     :mTrtContext(nullptr),mTrtEngine(nullptr),mTrtRunTime(nullptr),mTrtRunMode(RUN_MODE::FLOAT32),mTrtInputCount(0),mTrtIterationTime(0)
100 |     {
101 |         using namespace std;
102 |         fstream file;
103 |         
104 |         file.open(engineFile,ios::binary | ios::in);
105 |         if(!file.is_open())
106 |         {
107 |             cout << "read engine file" << engineFile <<" failed" << endl;
108 |             return;
109 |         }
110 |         file.seekg(0, ios::end); 
111 |         int length = file.tellg();         
112 |         file.seekg(0, ios::beg); 
113 |         std::unique_ptr<char[]> data(new char[length]);
114 |         file.read(data.get(), length);
115 | 
116 |         file.close();
117 | 
118 |         std::cout << "deserializing" << std::endl;
119 |         mTrtRunTime = createInferRuntime(gLogger);
120 |         assert(mTrtRunTime != nullptr);
121 |         mTrtEngine= mTrtRunTime->deserializeCudaEngine(data.get(), length, &mTrtPluginFactory);
122 |         assert(mTrtEngine != nullptr);
123 | 
124 |         InitEngine();
125 |     }
126 | 
127 |     void trtNet::InitEngine()
128 |     {
129 |         mTrtBatchSize = mTrtEngine->getMaxBatchSize();
130 |         mTrtContext = mTrtEngine->createExecutionContext();
131 |         assert(mTrtContext != nullptr);
132 |         mTrtContext->setProfiler(&mTrtProfiler);
133 | 
134 |         // Input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings()
135 |         int nbBindings = mTrtEngine->getNbBindings();
136 | 
137 |         mTrtCudaBuffer.resize(nbBindings);
138 |         mTrtBindBufferSize.resize(nbBindings);
139 |         for (int i = 0; i < nbBindings; ++i)
140 |         {
141 |             Dims dims = mTrtEngine->getBindingDimensions(i);
142 |             DataType dtype = mTrtEngine->getBindingDataType(i);
143 |             int64_t totalSize = volume(dims) * mTrtBatchSize * getElementSize(dtype);
144 |             mTrtBindBufferSize[i] = totalSize;
145 |             mTrtCudaBuffer[i] = safeCudaMalloc(totalSize);
146 |             if(mTrtEngine->bindingIsInput(i))
147 |                 mTrtInputCount++;
148 |         }
149 | 
150 |         CUDA_CHECK(cudaStreamCreate(&mTrtCudaStream));
151 |     }
152 | 
153 | 
154 |     nvinfer1::ICudaEngine* trtNet::loadModelAndCreateEngine(const char* deployFile, const char* modelFile,int maxBatchSize,
155 |                                         ICaffeParser* parser, nvcaffeparser1::IPluginFactory* pluginFactory,
156 |                                         IInt8Calibrator* calibrator, IHostMemory*& trtModelStream,const std::vector<std::string>& outputNodesName)
157 |     {
158 |         // Create the builder
159 |         IBuilder* builder = createInferBuilder(gLogger);
160 | 
161 |         // Parse the model to populate the network, then set the outputs.
162 |         INetworkDefinition* network = builder->createNetwork();
163 |         parser->setPluginFactory(pluginFactory);
164 | 
165 |         std::cout << "Begin parsing model..." << std::endl;
166 |         const IBlobNameToTensor* blobNameToTensor = parser->parse(deployFile,modelFile, *network, nvinfer1::DataType::kFLOAT);
167 |         if (!blobNameToTensor)
168 |             RETURN_AND_LOG(nullptr, ERROR, "Fail to parse");
169 |         std::cout << "End parsing model..." << std::endl;
170 | 
171 |         // specify which tensors are outputs
172 |         for (auto& name : outputNodesName)
173 |         {
174 |             auto output = blobNameToTensor->find(name.c_str());
175 |             assert(output!=nullptr);
176 |             if (output == nullptr)
177 |                 std::cout << "can not find output named " << name << std::endl;
178 | 
179 |             network->markOutput(*output);
180 |         }
181 | 
182 |         // Build the engine.
183 |         builder->setMaxBatchSize(maxBatchSize);
184 |         builder->setMaxWorkspaceSize(1 << 30);// 1G
185 |         if (mTrtRunMode == RUN_MODE::INT8)
186 |         {
187 |              std::cout <<"setInt8Mode"<<std::endl;
188 |             if (!builder->platformHasFastInt8())
189 |                 std::cout << "Notice: the platform do not has fast for int8" << std::endl;
190 |             builder->setInt8Mode(true);
191 |             builder->setInt8Calibrator(calibrator);
192 |         }
193 |         else if (mTrtRunMode == RUN_MODE::FLOAT16)
194 |         {
195 |             std::cout <<"setFp16Mode"<<std::endl;
196 |             if (!builder->platformHasFastFp16())
197 |                 std::cout << "Notice: the platform do not has fast for fp16" << std::endl;
198 |             builder->setFp16Mode(true);
199 |         }
200 | 
201 |         std::cout << "Begin building engine..." << std::endl;
202 |         ICudaEngine* engine = builder->buildCudaEngine(*network);
203 |         if (!engine)
204 |             RETURN_AND_LOG(nullptr, ERROR, "Unable to create engine");
205 |         std::cout << "End building engine..." << std::endl;
206 | 
207 |         // We don't need the network any more, and we can destroy the parser.
208 |         network->destroy();
209 |         parser->destroy();
210 | 
211 |         // Serialize the engine, then close everything down.
212 |         trtModelStream = engine->serialize();
213 | 
214 |         builder->destroy();
215 |         shutdownProtobufLibrary();
216 |         return engine;
217 |     }
218 | 
219 |     void trtNet::doInference(const void* inputData, void* outputData ,int batchSize /*= 1*/)
220 |     {
221 |         //static const int batchSize = 1;
222 |         assert(mTrtInputCount == 1);
223 |         assert(batchSize <= mTrtBatchSize);
224 | 
225 |         // DMA the input to the GPU,  execute the batch asynchronously, and DMA it back:
226 |         int inputIndex = 0;
227 |         CUDA_CHECK(cudaMemcpyAsync(mTrtCudaBuffer[inputIndex], inputData, mTrtBindBufferSize[inputIndex], cudaMemcpyHostToDevice, mTrtCudaStream));
228 |         auto t_start = std::chrono::high_resolution_clock::now();
229 |         mTrtContext->execute(batchSize, &mTrtCudaBuffer[inputIndex]);
230 |         auto t_end = std::chrono::high_resolution_clock::now();
231 |         float total = std::chrono::duration<float, std::milli>(t_end - t_start).count();
232 |         std::cout << "Time taken for inference is " << total << " ms." << std::endl;
233 | 
234 |         for (size_t bindingIdx = mTrtInputCount; bindingIdx < mTrtBindBufferSize.size(); ++bindingIdx)
235 |         {
236 |             auto size = mTrtBindBufferSize[bindingIdx];
237 |             CUDA_CHECK(cudaMemcpyAsync(outputData, mTrtCudaBuffer[bindingIdx], size, cudaMemcpyDeviceToHost, mTrtCudaStream));
238 |             outputData = (char *)outputData + size;
239 |         }
240 | 
241 |         //cudaStreamSynchronize(mTrtCudaStream);
242 | 
243 |         mTrtIterationTime ++ ;
244 |     }
245 | }


--------------------------------------------------------------------------------
/code/src/UpsampleLayer.cpp:
--------------------------------------------------------------------------------
 1 | #include "UpsampleLayer.h"
 2 | 
 3 | namespace nvinfer1
 4 | {
 5 |   UpsampleLayerPlugin::UpsampleLayerPlugin(const float scale, const int cudaThread /*= 512*/)
 6 |   : mScale(scale),mThreadCount(cudaThread)
 7 |   {
 8 |   }
 9 |   
10 |   UpsampleLayerPlugin::~UpsampleLayerPlugin()
11 |   {
12 |   
13 |   }
14 |   
15 |   // create the plugin at runtime from a byte stream
16 |   UpsampleLayerPlugin::UpsampleLayerPlugin(const void* data, size_t length)
17 |   {
18 |       using namespace Tn;
19 |       const char *d = reinterpret_cast<const char *>(data), *a = d;
20 |       read(d, mCHW);
21 |       read(d, mDataType);
22 |       read(d, mScale);
23 |       read(d, mOutputWidth);
24 |       read(d, mOutputHeight);
25 |       read(d, mThreadCount);
26 |   
27 |       //std::cout << "read:" << a << " " << mOutputWidth<< " " <<mOutputHeight<<std::endl;
28 |       assert(d == a + length);
29 |   }
30 |   
31 |   void UpsampleLayerPlugin::serialize(void* buffer)
32 |   {
33 |       using namespace Tn;
34 |       char* d = static_cast<char*>(buffer), *a = d;
35 |       write(d, mCHW);
36 |       write(d, mDataType);
37 |       write(d, mScale);
38 |       write(d, mOutputWidth);
39 |       write(d, mOutputHeight);
40 |       write(d, mThreadCount);
41 |   
42 |       //std::cout << "write:" << a << " " << mOutputHeight<< " " <<mOutputWidth<<std::endl;
43 |       assert(d == a + getSerializationSize());
44 |   }
45 |   
46 |   int UpsampleLayerPlugin::initialize()
47 |   {
48 |       int inputHeight = mCHW.d[1];
49 |       int inputWidth = mCHW.d[2];
50 |       
51 |       mOutputHeight = inputHeight * mScale;
52 |       mOutputWidth = inputWidth * mScale;
53 |   
54 |       return 0;
55 |   }
56 |   
57 |   void UpsampleLayerPlugin::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)
58 |   {
59 |       //std::cout << "type " << int(type) << "format " << (int)format <<std::endl;
60 |       assert((type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kINT8) && format == PluginFormat::kNCHW);
61 |       mDataType = type;
62 |       
63 |       //std::cout << "configureWithFormat:" <<inputDims[0].d[0]<< " " <<inputDims[0].d[1] << " "<<inputDims[0].d[2] <<std::endl;
64 |   }
65 |   
66 |   //it is called prior to any call to initialize().
67 |   Dims UpsampleLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
68 |   {
69 |       //std::cout <<"Input:" << inputs[0].d[0] << " "<<inputs[0].d[1]<< " "<<inputs[0].d[2]<<std::endl;
70 |       //std::cout <<"nbInputDims : "<<nbInputDims<< " input:" << inputs[0].nbDims << std::endl;
71 |   
72 |       mCHW = inputs[0];
73 |       mOutputHeight = inputs[0].d[1]* mScale;
74 |       mOutputWidth = inputs[0].d[2]* mScale;
75 |       //std::cout << "ouputDims:" << mCHW.d[0] << " " << mOutputHeight << " " << mOutputWidth << std::endl;
76 |       return Dims3(mCHW.d[0], mOutputHeight, mOutputWidth);
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/code/src/UpsampleLayer.cu:
--------------------------------------------------------------------------------
 1 | #include "UpsampleLayer.h"
 2 | 
 3 | namespace nvinfer1
 4 | {
 5 |   __device__ int translate_idx(int ii, int d1, int d2, int d3, int scale_factor) {
 6 |     int x, y, z, w;
 7 |     w = ii % d3;
 8 |     ii = ii/d3;
 9 |     z = ii % d2;
10 |     ii = ii/d2;
11 |     y = ii % d1;
12 |     ii = ii/d1;
13 |     x = ii;
14 |     w = w/scale_factor;
15 |     z = z/scale_factor;
16 |     d2 /= scale_factor;
17 |     d3 /= scale_factor;
18 |     return (((x*d1+y)*d2)+z)*d3+w;
19 |   }
20 | 
21 |   template <typename Dtype>
22 |   __global__ void upscale(const Dtype *input, Dtype *output,
23 |           int no_elements, int scale_factor, int d1, int d2, int d3) {
24 |     int ii = threadIdx.x + blockDim.x * blockIdx.x;
25 |     if (ii >= no_elements) return;
26 |     int ipidx = translate_idx(ii, d1, d2, d3, scale_factor);
27 |     output[ii]=input[ipidx];
28 |   }
29 | 
30 |   template <typename Dtype>
31 |   void UpsampleLayerPlugin::forwardGpu(const Dtype* input,Dtype * output,
32 |       int N,int C,int H ,int W) {
33 | 
34 |     int numElem = N*C*H*W;
35 |     upscale<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount>>>(input,output, numElem, mScale, C, H, W);
36 |   }
37 |   
38 |   size_t type2size(DataType dataType) { 
39 |     size_t _size = 0;
40 |     switch (dataType)
41 |     {
42 |         case DataType::kFLOAT: _size = sizeof(float);break;
43 |         case DataType::kHALF: _size = sizeof(__half);break;
44 |         case DataType::kINT8: _size = sizeof(u_int8_t);break;
45 |         default:std::cerr << "error data type" << std::endl;
46 |     }
47 |     return _size;
48 |   }
49 | 
50 |   int UpsampleLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
51 |   {
52 |       const int channels = mCHW.d[0];
53 |       const int64_t in_height = mCHW.d[1];
54 |       const int64_t in_width = mCHW.d[2];
55 |       const int64_t out_height = mOutputHeight;
56 |       const int64_t out_width = mOutputWidth;
57 |       int totalElems = batchSize * in_height * in_width * channels;
58 |       
59 |       // Handle no-op resizes efficiently.
60 |       if (out_height == in_height && out_width == in_width) {
61 |           CUDA_CHECK(cudaMemcpyAsync(outputs[0], inputs[0], totalElems * type2size(mDataType), cudaMemcpyDeviceToDevice, stream));
62 |           CUDA_CHECK(cudaStreamSynchronize(stream));
63 |           return 0;
64 |       }
65 |       //CUDA_CHECK(cudaStreamSynchronize(stream));
66 |       
67 |        switch (mDataType)
68 |        {
69 |            case DataType::kFLOAT :
70 |               forwardGpu<float>((const float *)inputs[0],(float *)outputs[0],batchSize,mCHW.d[0],mOutputHeight,mOutputWidth);
71 |                break;
72 |            case DataType::kHALF:
73 |                forwardGpu<__half>((const __half *)inputs[0],(__half *)outputs[0],batchSize,mCHW.d[0],mOutputHeight,mOutputWidth);
74 |                break;
75 |            case DataType::kINT8:
76 |                forwardGpu<u_int8_t>((const u_int8_t *)inputs[0],(u_int8_t *)outputs[0],batchSize,mCHW.d[0],mOutputHeight,mOutputWidth);
77 |               break;
78 |            default:
79 |                std::cerr << "error data type" << std::endl;
80 |        }
81 |       return 0;    
82 |   };
83 | }


--------------------------------------------------------------------------------
/code/src/YoloLayer.cu:
--------------------------------------------------------------------------------
  1 | #include "YoloConfigs.h"
  2 | #include "YoloLayer.h"
  3 | 
  4 | using namespace Yolo;
  5 | 
  6 | namespace nvinfer1
  7 | {
  8 |     YoloLayerPlugin::YoloLayerPlugin(const int cudaThread /*= 512*/):mThreadCount(cudaThread)
  9 |     {
 10 |         mClassCount = CLASS_NUM;
 11 |         mYoloKernel.clear();
 12 |         mYoloKernel.push_back(yolo1);
 13 |         mYoloKernel.push_back(yolo2);
 14 |         mYoloKernel.push_back(yolo3);
 15 | 
 16 |         mKernelCount = mYoloKernel.size();
 17 |     }
 18 |     
 19 |     YoloLayerPlugin::~YoloLayerPlugin()
 20 |     {
 21 |         if(mInputBuffer)
 22 |             CUDA_CHECK(cudaFreeHost(mInputBuffer));
 23 | 
 24 |         if(mOutputBuffer)
 25 |             CUDA_CHECK(cudaFreeHost(mOutputBuffer));
 26 |     }
 27 |     
 28 |     // create the plugin at runtime from a byte stream
 29 |     YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
 30 |     {
 31 |         using namespace Tn;
 32 |         const char *d = reinterpret_cast<const char *>(data), *a = d;
 33 |         read(d, mClassCount);
 34 |         read(d, mThreadCount);
 35 |         read(d, mKernelCount);
 36 |         mYoloKernel.resize(mKernelCount);
 37 |         auto kernelSize = mKernelCount*sizeof(YoloKernel);
 38 |         memcpy(mYoloKernel.data(),d,kernelSize);
 39 |         d += kernelSize;
 40 | 
 41 |         assert(d == a + length);
 42 |     }
 43 | 
 44 |     void YoloLayerPlugin::serialize(void* buffer)
 45 |     {
 46 |         using namespace Tn;
 47 |         char* d = static_cast<char*>(buffer), *a = d;
 48 |         write(d, mClassCount);
 49 |         write(d, mThreadCount);
 50 |         write(d, mKernelCount);
 51 |         auto kernelSize = mKernelCount*sizeof(YoloKernel);
 52 |         memcpy(d,mYoloKernel.data(),kernelSize);
 53 |         d += kernelSize;
 54 | 
 55 |         assert(d == a + getSerializationSize());
 56 |     }
 57 |     
 58 |     size_t YoloLayerPlugin::getSerializationSize()
 59 |     {  
 60 |         return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount) + sizeof(Yolo::YoloKernel) * mYoloKernel.size();
 61 |     }
 62 | 
 63 |     int YoloLayerPlugin::initialize()
 64 |     { 
 65 |         int totalCount = 0;
 66 |         for(const auto& yolo : mYoloKernel)
 67 |             totalCount += (LOCATIONS + 1 + mClassCount) * yolo.width*yolo.height * CHECK_COUNT;
 68 |         CUDA_CHECK(cudaHostAlloc(&mInputBuffer, totalCount * sizeof(float), cudaHostAllocDefault));
 69 | 
 70 |         totalCount = 0;//detection count
 71 |         for(const auto& yolo : mYoloKernel)
 72 |             totalCount += yolo.width*yolo.height * CHECK_COUNT;
 73 |         CUDA_CHECK(cudaHostAlloc(&mOutputBuffer, sizeof(float) + totalCount * sizeof(Detection), cudaHostAllocDefault));
 74 |         return 0;
 75 |     }
 76 |     
 77 |     Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
 78 |     {
 79 |         //output the result to channel
 80 |         int totalCount = 0;
 81 |         for(const auto& yolo : mYoloKernel)
 82 |             totalCount += yolo.width*yolo.height * CHECK_COUNT * sizeof(Detection) / sizeof(float);
 83 | 
 84 |         return Dims3(totalCount + 1, 1, 1);
 85 |     }
 86 | 
 87 |     void YoloLayerPlugin::forwardCpu(const float*const * inputs, float* outputs, cudaStream_t stream,int batchSize)
 88 |     {
 89 |         auto Logist = [=](float data){
 90 |             return 1./(1. + exp(-data));
 91 |         };
 92 | 
 93 |         int totalOutputCount = 0;
 94 |             int i = 0;
 95 |         int totalCount = 0;
 96 |             for(const auto& yolo : mYoloKernel)
 97 |             {
 98 |             totalOutputCount += yolo.width*yolo.height * CHECK_COUNT * sizeof(Detection) / sizeof(float);
 99 |             totalCount += (LOCATIONS + 1 + mClassCount) * yolo.width*yolo.height * CHECK_COUNT;
100 |             ++ i;
101 |         }
102 | 
103 |         for (int idx = 0; idx < batchSize;idx++)
104 |         {
105 |             i = 0;
106 |             float* inputData = (float *)mInputBuffer;// + idx *totalCount; //if create more batch size
107 |             for(const auto& yolo : mYoloKernel)
108 |             {
109 |                 int size = (LOCATIONS + 1 + mClassCount) * yolo.width*yolo.height * CHECK_COUNT;
110 |                 CUDA_CHECK(cudaMemcpyAsync(inputData, (float *)inputs[i] + idx * size, size * sizeof(float), cudaMemcpyDeviceToHost, stream));
111 |                 inputData += size;
112 |                 ++ i;
113 |             }
114 | 
115 |             CUDA_CHECK(cudaStreamSynchronize(stream));
116 | 
117 |             inputData = (float *)mInputBuffer ;//+ idx *totalCount; //if create more batch size
118 |             std::vector <Detection> result;
119 |             for (const auto& yolo : mYoloKernel)
120 |             {
121 |                 int stride = yolo.width*yolo.height;
122 |                 for (int j = 0;j < stride ;++j)
123 |                 {
124 |                     for (int k = 0;k < CHECK_COUNT; ++k )
125 |                     {
126 |                         int beginIdx = (LOCATIONS + 1 + mClassCount)* stride *k + j;
127 |                         int objIndex = beginIdx + LOCATIONS*stride;
128 |                         
129 |                         //check obj
130 |                         float objProb = Logist(inputData[objIndex]);   
131 |                         if(objProb <= IGNORE_THRESH)
132 |                             continue;
133 | 
134 |                         //classes
135 |                         int classId = -1;
136 |                         float maxProb = IGNORE_THRESH;
137 |                         for (int c = 0;c< mClassCount;++c){
138 |                             float cProb =  Logist(inputData[beginIdx + (5 + c) * stride]) * objProb;
139 |                             if(cProb > maxProb){
140 |                                 maxProb = cProb;
141 |                                 classId = c;
142 |                             }
143 |                         }
144 |             
145 |                         if(classId >= 0) {
146 |                             Detection det;
147 |                             int row = j / yolo.width;
148 |                             int cols = j % yolo.width;
149 |     
150 |                             //Location
151 |                             det.bbox[0] = (cols + Logist(inputData[beginIdx]))/ yolo.width;
152 |                             det.bbox[1] = (row + Logist(inputData[beginIdx+stride]))/ yolo.height;
153 |                             det.bbox[2] = exp(inputData[beginIdx+2*stride]) * yolo.anchors[2*k];
154 |                             det.bbox[3] = exp(inputData[beginIdx+3*stride]) * yolo.anchors[2*k + 1];
155 |                             det.classId = classId;
156 |                             det.prob = maxProb;
157 | 
158 |                             result.emplace_back(det);
159 |                         }
160 |                     }
161 |                 }
162 | 
163 |                 inputData += (LOCATIONS + 1 + mClassCount) * stride * CHECK_COUNT;
164 |             }
165 | 
166 |             
167 |             int detCount =result.size();
168 |             auto data = (float *)mOutputBuffer;// + idx*(totalOutputCount + 1); //if create more batch size
169 |             float * begin = data;
170 |             //copy count;
171 |             data[0] = (float)detCount;
172 |             data++;
173 |             //copy result
174 |             memcpy(data,result.data(),result.size()*sizeof(Detection));
175 | 
176 |             //(count + det result)
177 |             CUDA_CHECK(cudaMemcpyAsync(outputs, begin,sizeof(float) + result.size()*sizeof(Detection), cudaMemcpyHostToDevice, stream));
178 | 
179 |             outputs += totalOutputCount + 1;
180 |         }
181 |     };
182 | 
183 |     __device__ float Logist(float data){ return 1./(1. + exp(-data)); };
184 | 
185 |     __global__ void CalDetection(const float *input, float *output,int noElements, 
186 |             int yoloWidth,int yoloHeight,const float anchors[CHECK_COUNT*2],int classes,int outputElem) {
187 |  
188 |         int idx = threadIdx.x + blockDim.x * blockIdx.x;
189 |         if (idx >= noElements) return;
190 | 
191 |         int stride = yoloWidth*yoloHeight;
192 |         int bnIdx =  idx / stride;
193 | 
194 |         int curIdx = idx - stride*bnIdx;
195 | 
196 |         const float* curInput = input + bnIdx* ((LOCATIONS + 1 + classes) * stride * CHECK_COUNT);
197 | 
198 |         for (int k = 0;k < CHECK_COUNT; ++k )
199 |         {
200 |             int beginIdx = (LOCATIONS + 1 + classes)* stride *k + curIdx;
201 |             int objIndex = beginIdx + LOCATIONS*stride;
202 |             
203 |             //check objectness
204 |             float objProb = Logist(curInput[objIndex]);
205 |             if(objProb <= IGNORE_THRESH)
206 |                 continue;
207 | 
208 |             int row = curIdx / yoloWidth;
209 |             int cols = curIdx % yoloWidth;
210 |             
211 |             //classes
212 |             int classId = -1;
213 |             float maxProb = IGNORE_THRESH;
214 |             for (int c = 0;c<classes;++c){
215 |                 float cProb =  Logist(curInput[beginIdx + (5 + c) * stride]) * objProb;
216 |                 if(cProb > maxProb){
217 |                     maxProb = cProb;
218 |                     classId = c;
219 |                 }
220 |             }
221 | 
222 |             if(classId >= 0) {
223 |                 float *curOutput = output + bnIdx*outputElem;
224 |                 int resCount = (int)atomicAdd(curOutput,1);
225 |                 char* data = (char * )curOutput + sizeof(float) + resCount*sizeof(Detection);
226 |                 Detection* det =  (Detection*)(data);
227 | 
228 |                 //Location
229 |                 det->bbox[0] = (cols + Logist(curInput[beginIdx]))/ yoloWidth;
230 |                 det->bbox[1] = (row + Logist(curInput[beginIdx+stride]))/ yoloHeight;
231 |                 det->bbox[2] = exp(curInput[beginIdx+2*stride]) * anchors[2*k];
232 |                 det->bbox[3] = exp(curInput[beginIdx+3*stride]) * anchors[2*k + 1];
233 |                 det->classId = classId;
234 |                 det->prob = maxProb;
235 |             }
236 |         }
237 |     }
238 |    
239 |     void YoloLayerPlugin::forwardGpu(const float *const * inputs,float * output,cudaStream_t stream,int batchSize) {
240 |         void* devAnchor;
241 |         size_t AnchorLen = sizeof(float)* CHECK_COUNT*2;
242 |         CUDA_CHECK(cudaMalloc(&devAnchor,AnchorLen));
243 | 
244 |         int outputElem = 1;
245 |         for (unsigned int i = 0;i< mYoloKernel.size();++i)
246 |         {
247 |             const auto& yolo = mYoloKernel[i];
248 |             outputElem += yolo.width*yolo.height * CHECK_COUNT * sizeof(Detection) / sizeof(float);
249 |         }
250 | 
251 |         for(int idx = 0 ;idx < batchSize;++idx)
252 |             CUDA_CHECK(cudaMemset(output + idx*outputElem, 0, sizeof(float)));
253 | 
254 |         int numElem = 0;
255 |         for (unsigned int i = 0;i< mYoloKernel.size();++i)
256 |         {
257 |             const auto& yolo = mYoloKernel[i];
258 |             numElem = yolo.width*yolo.height*batchSize;
259 | 	        CUDA_CHECK(cudaMemcpy(devAnchor,yolo.anchors,AnchorLen,cudaMemcpyHostToDevice));
260 |             CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount>>>
261 |                     (inputs[i],output, numElem, yolo.width, yolo.height, (float *)devAnchor, mClassCount ,outputElem);
262 |         }
263 | 
264 |         CUDA_CHECK(cudaFree(devAnchor));
265 |     }
266 | 
267 | 
268 |     int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
269 |     {
270 |         //assert(batchSize == 1);
271 |         //GPU
272 |         //CUDA_CHECK(cudaStreamSynchronize(stream));
273 |         forwardGpu((const float *const *)inputs,(float *)outputs[0],stream,batchSize);
274 | 
275 |         //CPU
276 |         //forwardCpu((const float *const *)inputs,(float *)outputs[0],stream,batchSize);
277 |         return 0;
278 |     };
279 | 
280 | }
281 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # TRTWrapper
 2 | 
 3 | ### Desc
 4 |     a wrapper for tensorRT net (parser caffe)
 5 | ### Test Environments
 6 |     Ubuntu  16.04
 7 |     TensorRT 5.0.2.6/4.0.1.6
 8 |     CUDA 9.2
 9 | ### About Wraper
10 | you can use the wrapper like this:
11 | ```cpp
12 | //normal
13 | std::vector<std::vector<float>> calibratorData;
14 | trtNet net("vgg16.prototxt","vgg16.caffemodel",{"prob"},calibratorData);
15 | //fp16
16 | trtNet net_fp16("vgg16.prototxt","vgg16.caffemodel",{"prob"},calibratorData,RUN_MODE:FLOAT16);
17 | //int8
18 | trtNet net_int8("vgg16.prototxt","vgg16.caffemodel",{"prob"},calibratorData,RUN_MODE:INT8);
19 | 
20 | //run inference:
21 | net.doInference(input_data.get(), outputData.get());
22 | 
23 | //can print time cost
24 | net.printTime();
25 | 
26 | //can write to engine and load From engine
27 | net.saveEngine("save_1.engine");
28 | trtNet net2("save_1.engine");
29 | ```
30 | when you need add new plugin ,just add the plugin code to pluginFactory
31 | ### Run Sample
32 | ```bash
33 | #for classification
34 | cd sample
35 | mkdir build
36 | cd build && cmake .. && make && make install
37 | cd ..
38 | ./install/runNet --caffemodel=${CAFFE_MODEL_NAME} --prototxt=${CAFFE_PROTOTXT} --input=./test.jpg
39 | ```
40 | 


--------------------------------------------------------------------------------
/sample/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | project(demo)
 3 | 
 4 | set(CMAKE_BUILD_TYPE Release)
 5 | 
 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors")
 7 | 
 8 | #add lib
 9 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../code sample.out)
10 | include_directories({$CMAKE_CURRENT_SOURCE_DIR}/../code)
11 | 
12 | #add opencv
13 | FIND_PACKAGE(OpenCV REQUIRED)
14 | include_directories(${OpenCV_INCLUDE_DIRS})
15 | link_directories(${OpenCV_LIBRARIES_DIRS})
16 | 
17 | #build classifer
18 | add_executable(classifer evalClassifNet.cpp eval.cpp dataReader.cpp)
19 | target_link_libraries(classifer TrtNet ${OpenCV_LIBS})
20 | 
21 | #build runNet
22 | add_executable(runNet runNet.cpp)
23 | target_link_libraries(runNet TrtNet ${OpenCV_LIBS})
24 | 
25 | #build runTwoNets
26 | add_executable(runTwoNets runTwoNets.cpp)
27 | target_link_libraries(runTwoNets TrtNet ${OpenCV_LIBS})
28 | 
29 | install(TARGETS classifer DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/install/)
30 | install(TARGETS runNet DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/install/)
31 | install(TARGETS runTwoNets DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/install/)


--------------------------------------------------------------------------------
/sample/argsParser.h:
--------------------------------------------------------------------------------
  1 | #ifndef __ARGS_PARSER_H_
  2 | #define __ARGS_PARSER_H_
  3 | 
  4 | 
  5 | #include <unordered_map>
  6 | #include <string>
  7 | #include <regex>
  8 | #include <iomanip>
  9 | #include <iostream>
 10 | 
 11 | namespace argsParser
 12 | {
 13 |     using std::string;
 14 | 
 15 |     enum class P_DATA_TYPE
 16 |     {
 17 |         PARSER_BOOL,
 18 |         PARSER_INT,
 19 |         PARSER_FLOAT,
 20 |         PARSER_DOUBLE,
 21 |         PARSER_STRING
 22 |     };
 23 | 
 24 |     struct parserInfo
 25 |     {
 26 |         string desc;
 27 |         string defaultValue;
 28 |         string valueDesc;
 29 | 
 30 |         P_DATA_TYPE dataType;
 31 |         string value;
 32 |     };
 33 | 
 34 |     typedef string Desc;
 35 |     typedef string ValueDesc;
 36 |     typedef string DefaultValue;
 37 | 
 38 |     class parser
 39 |     {
 40 | 
 41 |         #define ADD_ARG_FUNCS(DATA_TYPE) \
 42 |         static void ADD_ARG_##DATA_TYPE(string name,Desc desc,DefaultValue defaultValue,ValueDesc valueDesc =""){   \
 43 |             InnerInitArgs(name,desc,defaultValue,valueDesc,P_DATA_TYPE::PARSER_##DATA_TYPE); \
 44 |         }
 45 | 
 46 |         public:
 47 |         static void InnerInitArgs(string name,Desc desc,DefaultValue defaultValue,ValueDesc valueDesc,P_DATA_TYPE dataType)
 48 |         {
 49 |             mArgs.emplace(std::make_pair(name, parserInfo{desc,defaultValue,valueDesc, dataType ,defaultValue}));
 50 |         }
 51 | 
 52 |         ADD_ARG_FUNCS(INT);
 53 |         ADD_ARG_FUNCS(FLOAT);
 54 |         ADD_ARG_FUNCS(DOUBLE);
 55 |         ADD_ARG_FUNCS(STRING);
 56 |         ADD_ARG_FUNCS(BOOL);
 57 | 
 58 |         static void printDesc()
 59 |         {   
 60 |             for (const auto& data :mArgs )
 61 |             {
 62 |                 string name = data.first;
 63 |                 auto& info = data.second;
 64 |                 
 65 |                 if(info.valueDesc.length() > 0)
 66 |                     name += "=<" + info.valueDesc + ">"; 
 67 | 
 68 |                 std::cout << std::left << std::setw(20) << name;
 69 |                 std::cout << std::setw(2) << "=" << std::setw(2);
 70 |                 std::cout << std::left << std::setw(80) << info.desc + "(default:" + info.defaultValue + ")";
 71 |                 std::cout << std::endl;
 72 |             }
 73 |         }
 74 | 
 75 |         static void parseArgs(int argc,char** argv)
 76 |         {
 77 |             string* str_argvs = new string[argc];
 78 |             for(int i = 0;i<argc;++i){
 79 |                 str_argvs[i] = argv[i];
 80 |                 std::cout << argv[i] << " " << std::endl;
 81 |             }
 82 | 
 83 |             std::regex args_regex(R"(--(.+)=(.+))");
 84 |             std::smatch matches;
 85 |             for (int i = 1;i<argc;++i) {
 86 |                 if(std::regex_match(str_argvs[i], matches, args_regex) && matches.size() ==3 )
 87 |                 {   
 88 |                     string key = matches[1].str();
 89 |                     string value = matches[2].str();
 90 |                     if (mArgs.find(key)!=mArgs.end())
 91 |                         mArgs[key].value = value;
 92 |                     else
 93 |                         std::cout << "do not have the param named:" << key << " ";
 94 |                 }
 95 |                 else
 96 |                     std::cout << "set param wrong ,need \'--{param}={value}\'" << std::endl;
 97 |             } 
 98 | 
 99 |             if(str_argvs)
100 |                 delete [] str_argvs;
101 | 
102 |             std::cout << "####### input args####### " << std::endl;
103 |             for (const auto& data :mArgs )
104 |                 std::cout << data.first << "=" << data.second.value << ";" ;
105 |             std::cout << std::endl; 
106 | 
107 |         }
108 | 
109 |         static int getIntValue(string name)
110 |         {
111 |             return mArgs.find(name)!=mArgs.end() ? std::stoi( mArgs[name].value) : 0;
112 |         }
113 | 
114 |         static float getFloatValue(string name)
115 |         {
116 |             return mArgs.find(name)!=mArgs.end() ? std::stof( mArgs[name].value) : 0.0f;
117 |         }
118 | 
119 |         static double getDoubleValue(string name)
120 |         {
121 |             return mArgs.find(name)!=mArgs.end() ? std::stod( mArgs[name].value) != 0 : 0.0;
122 |         }
123 | 
124 |         static string getStringValue(string name)
125 |         {
126 |             return mArgs.find(name)!=mArgs.end() ? mArgs[name].value : "";
127 |         }
128 | 
129 |         static bool getBoolValue(string name)
130 |         {
131 |             return mArgs.find(name)!=mArgs.end() ? std::stoi( mArgs[name].value) != 0 : 0;
132 |         }
133 | 
134 | 
135 |     private:
136 |         static std::map<string,parserInfo> mArgs;
137 |     };
138 | 
139 |     std::map<string,parserInfo> parser::mArgs ;
140 | };
141 | 
142 | 
143 | #endif
144 | 


--------------------------------------------------------------------------------
/sample/configs.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CONFIGS_H_
 2 | #define _CONFIGS_H_
 3 | 
 4 | #include <string>
 5 | namespace Tn
 6 | {
 7 |     //src input image size
 8 |     static const int INPUT_CHANNEL = 3;
 9 |     static const int INPUT_WIDTH = 608;
10 |     static const int INPUT_HEIGHT = 608; 
11 |     static const int RESIZE_H = 256;
12 |     static const int RESIZE_W = 256;
13 |     static const float SCALE = 0.017f;
14 |     static const char* MEAN_VALUE = "103.94,116.78,123.68";
15 | 
16 |     //input data
17 |     static const char* INPUT_PROTOTXT = "alexnet.prototxt";
18 |     static const char* INPUT_CAFFEMODEL = "alexnet.caffemodel";
19 |     static const std::string INPUT_IMAGE = "test.jpg";
20 |     static const char* EVAL_LIST = "";
21 |     static const char* CALIBRATION_LIST = "";
22 |     static const char* MODE = "fp32";
23 |     static const char* OUTPUTS = "prob";
24 | 
25 |     static const int ITER_TIMES = 1000;
26 | }
27 | #endif


--------------------------------------------------------------------------------
/sample/dataReader.cpp:
--------------------------------------------------------------------------------
  1 | #include "dataReader.h" 
  2 | #include <sstream>
  3 | #include <iostream>
  4 | #include <iomanip>
  5 | #include <fstream>
  6 | 
  7 | using namespace std;
  8 | 
  9 | namespace Tn
 10 | {
 11 |     list<string> readFileList(const string& fileName)
 12 |     {
 13 |         ifstream file(fileName);  
 14 |         if(!file.is_open())
 15 |         {
 16 |             cout << "read file list error,please check file :" << fileName << endl;
 17 |             exit(-1);
 18 |         }
 19 | 
 20 |         string strLine;  
 21 |         list<string> files;
 22 |         while( getline(file,strLine) )                               
 23 |             files.push_back(strLine);
 24 | 
 25 |         file.close();
 26 | 
 27 |         return files;
 28 |     }
 29 | 
 30 |     list<Source> readLabelFileList(const string& fileName)
 31 |     {
 32 |         ifstream file(fileName);  
 33 |         if(!file.is_open())
 34 |         {
 35 |             cout << "read file list error,please check file :" << fileName << endl;
 36 |             exit(-1);
 37 |         }
 38 | 
 39 |         string strLine;  
 40 |         list<Source> result;
 41 |         while(!file.eof())
 42 |         {
 43 |             Source data;
 44 |             file >> data.fileName >> data.label;
 45 |             result.emplace_back(data);
 46 |         } 
 47 | 
 48 |         return result;
 49 |     }
 50 | 
 51 |     vector<string> split(const string& str, char delim)
 52 |     {
 53 |         stringstream ss(str);
 54 |         string token;
 55 |         vector<string> container;
 56 |         while (getline(ss, token, delim)) {
 57 |             container.push_back(token);
 58 |         }
 59 | 
 60 |         return container;
 61 |     }
 62 | 
 63 |     // vector<string> split(string str, string pat) 
 64 |     // { 
 65 |     //     vector<string> bufStr; 
 66 |     //     while (true) 
 67 |     //     { 
 68 |     //         int index = str.find(pat); 
 69 |     //         string subStr = str.substr(0, index); 
 70 |     //         if (!subStr.empty()) 
 71 |     //             bufStr.push_back(subStr); 
 72 |     //         str.erase(0, index + pat.size()); 
 73 |     //         if (index == -1) 
 74 |     //             break; 
 75 |     //     } 
 76 |     //     return bufStr; 
 77 |     // }
 78 | 
 79 |     std::tuple<std::list<std::string>, std::list<std::vector<Bbox>>> readObjectLabelFileList(const string& fileName)
 80 |     {
 81 |         list<string> fileList;
 82 |         list<vector<Bbox>> bBoxes;
 83 | 
 84 |         ifstream file(fileName);  
 85 |         if(!file.is_open())
 86 |         {
 87 |             cout << "read file list error,please check file :" << fileName << endl;
 88 |             exit(-1);
 89 |         }
 90 | 
 91 |         string strLine;  
 92 |         while( getline(file,strLine) )                               
 93 |         { 
 94 |             vector<string> line=split(strLine, '\n');
 95 |             if(line.size() < 1)
 96 |                 continue;
 97 |             vector<string> strs=split(line[0], ' ');            
 98 | 
 99 |             int idx = 0;
100 |             string dataName=strs[idx++];  
101 | 
102 |             int trueBoxCount = (strs.size() - 1)/2;
103 |             vector<Bbox> truthboxes;
104 |             truthboxes.reserve(trueBoxCount);
105 |             for (int i = 0 ;i < trueBoxCount ;++i)
106 |             {
107 |                 //class
108 |                 string classId = strs[idx++];
109 |                 
110 |                 //bbox Length
111 |                 int length = strs[idx].length();
112 |                 //remove bracket [ ]
113 |                 string bbox = strs[idx++].substr(1,length-2);
114 | 
115 |                 vector<string> strs_txt = split(bbox, ','); 
116 |                 Bbox truthbox;
117 |                 truthbox.classId = stoi(classId);
118 |                 truthbox.left = stof(strs_txt[0]);
119 |                 truthbox.top = stof(strs_txt[1]);
120 |                 truthbox.right = truthbox.left + stof(strs_txt[2]);
121 |                 truthbox.bot = truthbox.top + stof(strs_txt[3]);
122 | 
123 |                 truthboxes.push_back(truthbox);
124 |             }
125 |             
126 |             fileList.emplace_back(dataName);
127 |             bBoxes.emplace_back(truthboxes);
128 |         } 
129 | 
130 |         file.close();
131 | 
132 |         return make_tuple(move(fileList),move(bBoxes));
133 |     }
134 | }


--------------------------------------------------------------------------------
/sample/dataReader.h:
--------------------------------------------------------------------------------
 1 | #ifndef _DATA_READER_H_
 2 | #define _DATA_READER_H_
 3 | 
 4 | #include <vector>
 5 | #include <list>
 6 | #include <string>
 7 | #include <tuple>
 8 | 
 9 | namespace Tn
10 | {
11 |     std::list<std::string> readFileList(const std::string& fileName);
12 |     
13 |     struct Source
14 |     {
15 |         std::string fileName;
16 |         int label;
17 |     };
18 |     std::list<Source> readLabelFileList(const std::string& fileName);    
19 | 
20 |     struct Bbox
21 |     {
22 |         int classId;
23 |         int left;
24 |         int right;
25 |         int top;
26 |         int bot;
27 |         float score;
28 |     };
29 |     //[lst<filename>,lst<bbox_vec>]
30 |     std::tuple<std::list<std::string>, std::list<std::vector<Bbox>>> readObjectLabelFileList(const std::string& fileName);
31 | }
32 | 
33 | #endif


--------------------------------------------------------------------------------
/sample/eval.cpp:
--------------------------------------------------------------------------------
  1 | #include "eval.h"
  2 | #include <algorithm>    
  3 | #include <assert.h>
  4 | #include <iostream>
  5 | #include <iomanip>
  6 | #include <cmath>
  7 | 
  8 | using namespace std;
  9 | 
 10 | namespace Tn
 11 | {
 12 |     float evalTopResult(list<vector<float>>& result,list<int>& groundTruth,int* TP /*= nullptr*/,int* FP /*= nullptr*/,int topK /*= 1*/)
 13 |     {
 14 |         int _TP = TP ? *TP: 0;
 15 |         int _FP = FP ? *FP: 0; 
 16 | 
 17 |         assert(result.size() == groundTruth.size());
 18 | 
 19 |         auto pRe = result.begin();
 20 |         auto pGT = groundTruth.begin();
 21 |         for (; pRe != result.end() && pGT != groundTruth.end();
 22 |             ++pRe, ++pGT)
 23 |         {
 24 |             auto& labels = *pRe;
 25 |             int truthClass = *pGT;
 26 |             float gtProb = labels[truthClass];
 27 | 
 28 |             int biggerCount = 0;
 29 |             for (auto& prob : labels)
 30 |             {
 31 |                 if (prob >= gtProb)
 32 |                     ++biggerCount;
 33 |             }
 34 | 
 35 |             biggerCount > topK ? ++_FP : ++_TP;
 36 |         }
 37 | 
 38 |         float accuracy=float(_TP)/(_TP+_FP);
 39 |         if(TP) *TP =_TP;
 40 |         if(FP) *FP =_FP;
 41 | 
 42 |         cout<<"top " << topK <<" accuracy :"<< setprecision(4) << accuracy << endl;
 43 | 
 44 |         return accuracy;
 45 |     }
 46 | 
 47 |     float iou_compute(const Bbox& a,const Bbox& b)
 48 |     {
 49 |         int and_right=min(a.right,b.right);
 50 |         int and_left =max(a.left,b.left);
 51 |         int and_top  =max(a.top,b.top);
 52 |         int and_bot  =min(a.bot,b.bot);
 53 | 
 54 |         if ((and_top>and_bot) || (and_left>and_right))
 55 |         {
 56 |             return 0.0f;
 57 |         }
 58 |         float sand=(and_right-and_left)*(and_bot-and_top)*1.0f;
 59 |         float sa=(a.right-a.left)*(a.bot-a.top)*1.0f;
 60 |         float sb=(b.right-b.left)*(b.bot-b.top)*1.0f;
 61 | 
 62 |         float iou=sand/(sa+sb-sand);
 63 |         return iou;
 64 |     }
 65 | 
 66 |     float evalMAPResult(const list<vector<Bbox>>& bboxesList,const list<vector<Bbox>>& truthboxesList,int classNum,float iouThresh)
 67 |     {
 68 |         assert(bboxesList.size() == truthboxesList.size());
 69 |         cout << "evalMAPResult:" << endl;
 70 | 
 71 |         float* precision = new float[classNum];
 72 |         float* recall = new float[classNum];
 73 |         float* AP = new float[classNum];
 74 | 
 75 |         vector<Bbox> **detBox = nullptr;
 76 |         vector<Bbox> **truthBox = nullptr;
 77 | 
 78 |         int sampleCount = bboxesList.size();
 79 |         detBox = new vector<Bbox>* [sampleCount];
 80 |         truthBox = new vector<Bbox>* [sampleCount];
 81 |         for (int i = 0 ;i < sampleCount ; ++ i)
 82 |         {
 83 |             detBox[i] = new vector<Bbox>[classNum]{};
 84 |             truthBox[i] = new vector<Bbox>[classNum]{};
 85 |         }
 86 | 
 87 |         auto pBoxIter = bboxesList.begin();
 88 |         auto pTrueIter = truthboxesList.begin();
 89 |         for (int i = 0;i< sampleCount;++i , ++pBoxIter , ++pTrueIter)
 90 |         {
 91 |             for (const auto& item : *pBoxIter)
 92 |                 detBox[i][item.classId].push_back(item);
 93 | 
 94 |             for (const auto& item : *pTrueIter)
 95 |                 truthBox[i][item.classId].push_back(item);
 96 |         }
 97 | 
 98 |         for (int i = 0;i < classNum; ++ i)
 99 |         {
100 |             using CheckPair = pair<Bbox,bool>;
101 |             vector< CheckPair > checkPRBoxs;
102 |             int FN = 0;
103 |             for (int j = 0;j< sampleCount;++j)
104 |             {   
105 |                 auto& dboxes = detBox[j][i];
106 |                 auto& tboxes = truthBox[j][i];
107 | 
108 |                 auto checkTBoxes = tboxes;
109 |                 for (const auto& item: dboxes)
110 |                 {
111 |                     int maxIdx = -1;
112 |                     float maxIou = 0;
113 |                     
114 |                     for (const auto& tItem: checkTBoxes)
115 |                     {
116 |                         float iou=iou_compute(item,tItem);
117 |                         //std::cout << "iou" << iou << std::endl;
118 |                         if(iou > maxIou)
119 |                         {
120 |                             maxIdx = &tItem - &checkTBoxes[0];
121 |                             maxIou = iou;
122 |                         }
123 |                     }
124 | 
125 |                     if(maxIou > iouThresh)
126 |                     {
127 |                         checkPRBoxs.push_back({item,true});
128 |                         checkTBoxes.erase(checkTBoxes.begin() + maxIdx);
129 |                     }
130 |                     else
131 |                     {
132 |                         //FP 
133 |                         checkPRBoxs.push_back({item,false});
134 |                     }
135 |                 }
136 |                 //FN
137 |                 FN += checkTBoxes.size();
138 |             }
139 | 
140 |             float TP = count_if(checkPRBoxs.begin(), checkPRBoxs.end(), [](CheckPair& item){return item.second == true;} );
141 | 
142 |             int total = checkPRBoxs.size();
143 |             if(total == 0)
144 |             {
145 |                 AP[i] = 1;
146 |                 continue;
147 |             }
148 | 
149 |             //recall:         
150 |             recall[i] = (std::fabs(TP + FN) < 1e-5) ? 1 : TP / (TP + FN);
151 |             //precision
152 |             precision[i] = TP / total;//total is TP+FP
153 | 
154 |             //compute AP:
155 |             sort(checkPRBoxs.begin(),checkPRBoxs.end(),[](const CheckPair& left,const CheckPair& right){
156 |                 return left.first.score > right.first.score;
157 |                 }
158 |             );
159 | 
160 |             int PR_TP = 0;
161 |             int PR_FP = 0;
162 |             vector< pair<float,float> >  PRValues;  //<P,R>
163 |             for (const auto& item : checkPRBoxs)
164 |             {
165 |                 item.second ? ++PR_TP : ++PR_FP;
166 |                 PRValues.emplace_back( make_pair(PR_TP/ float(PR_TP+PR_FP) , PR_TP / float(TP + FN)) );
167 |             }
168 |             
169 |             float sum = PRValues[0].first * PRValues[0].second;
170 | 
171 |             for (unsigned int m = 0; m < PRValues.size()-1;++m)
172 |             {
173 |                 float w = PRValues[m + 1].second - PRValues[m].second ;
174 |                 float h = PRValues[m + 1].first;
175 |                 sum += w*h;
176 |             }
177 |             
178 |             AP[i] = sum;
179 | 
180 |             cout<< setprecision(4) << "class:" << std::setw(3) << i 
181 |                 << " iou thresh-" << iouThresh 
182 |                 << " AP:" << std::setw(7) << AP[i] 
183 |                 << " recall:" << std::setw(7) << recall[i] 
184 |                 << " precision:" << std::setw(7) << precision[i] << endl;
185 |         }
186 | 
187 |         float sumAp = 0;
188 |         for (int i = 0;i < classNum;++i)
189 |             sumAp += AP[i];
190 |         
191 |         float MAP = sumAp / classNum;
192 |         cout<< "MAP:" << MAP << endl;
193 |         
194 |         if (precision)
195 |             delete[] precision;
196 |         if (recall)
197 |             delete[] recall;
198 |         if (AP)
199 |             delete[] AP;
200 |   
201 |         for (int i = 0;i < sampleCount; ++i)
202 |         {
203 |             delete[] detBox[i];
204 |             delete[] truthBox[i];
205 |         }
206 | 
207 |         delete[] detBox;
208 |         delete[] truthBox;
209 | 
210 |         return MAP;
211 |     }
212 | }


--------------------------------------------------------------------------------
/sample/eval.h:
--------------------------------------------------------------------------------
 1 | #ifndef _EVAL_H_
 2 | #define _EVAL_H_
 3 | 
 4 | #include <vector>
 5 | #include <list>
 6 | #include <string>
 7 | #include "dataReader.h"
 8 | 
 9 | namespace Tn
10 | {
11 |     float evalTopResult(std::list<std::vector<float>>& result,std::list<int>& groundTruth,int* Tp = nullptr,int* FP = nullptr,int topK = 1);
12 |     float evalMAPResult(const std::list<std::vector<Bbox>>& bboxesList,const std::list<std::vector<Bbox>> & truthboxesList,int classNum,float iouThresh);
13 | }
14 | 
15 | #endif


--------------------------------------------------------------------------------
/sample/evalClassifNet.cpp:
--------------------------------------------------------------------------------
  1 | #include <string>
  2 | #include <sstream>
  3 | #include <memory>
  4 | #include <opencv2/opencv.hpp>
  5 | #include "../code/include/TrtNet.h"
  6 | #include "argsParser.h"
  7 | #include "dataReader.h"
  8 | #include "eval.h"
  9 | #include "configs.h"
 10 | 
 11 | using namespace std;
 12 | using namespace argsParser;
 13 | using namespace Tn;
 14 | 
 15 | vector<string> split(const string& str, char delim)
 16 | {
 17 |     stringstream ss(str);
 18 |     string token;
 19 |     vector<string> container;
 20 |     while (getline(ss, token, delim)) {
 21 |         container.push_back(token);
 22 |     }
 23 | 
 24 |     return container;
 25 | }
 26 | 
 27 | cv::Mat GetMeanMat()
 28 | {
 29 |     using namespace cv;
 30 |     static std::unique_ptr<Mat> MeanMat = nullptr;
 31 |     if (MeanMat.get() != nullptr)
 32 |         return *MeanMat;
 33 | 
 34 |     int h = parser::getIntValue("H");
 35 |     int w = parser::getIntValue("W");  
 36 |     int c = parser::getIntValue("C");
 37 |     
 38 |     string meanStr = parser::getStringValue("meanValue");
 39 |     auto meansValues = split(meanStr,',');
 40 |     float scale = parser::getFloatValue("scale");  
 41 |     
 42 |     assert(meansValues.size() == c);
 43 |     vector<Mat> means(c);
 44 |     for (int i = 0 ;i<c ;++i)
 45 |         means[i] = Mat(h, w, CV_32FC1, std::stof(meansValues[i]) * scale);
 46 | 
 47 |     MeanMat.reset(new Mat(h, w, CV_32FC3));
 48 |     cv::merge(means.data(), c, *MeanMat);
 49 | 
 50 |     return *MeanMat;
 51 | }
 52 | 
 53 | 
 54 | vector<float> preprocess(const string& fileName)
 55 | {
 56 |     using namespace cv;
 57 | 
 58 |     Mat img = imread(fileName);
 59 | 
 60 |     if(img.data== nullptr)
 61 |     {
 62 |         std::cout << "can not open image :" << fileName  << std::endl;
 63 |         return {}; 
 64 |     } 
 65 | 
 66 |     int channel = parser::getIntValue("C");
 67 |     
 68 |     //channel 
 69 |     Mat sample;
 70 |     if (img.channels() == 3 && channel == 1)
 71 |         cvtColor(img, sample, cv::COLOR_BGR2GRAY);
 72 |     else if (img.channels() == 4 && channel == 1)
 73 |         cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
 74 |     else if (img.channels() == 4 && channel == 3)
 75 |         cvtColor(img, sample, cv::COLOR_BGRA2BGR);
 76 |     else if (img.channels() == 1 && channel == 3)
 77 |         cvtColor(img, sample, cv::COLOR_GRAY2BGR);
 78 |     else
 79 |         sample = img;
 80 | 
 81 |     //resize
 82 |     int r_h = parser::getIntValue("RH");
 83 |     int r_w = parser::getIntValue("RW"); 
 84 |     cv::Mat resized;
 85 |     cv::resize(sample, resized, cv::Size(r_h,r_w));
 86 | 
 87 |     //crop
 88 |     int h = parser::getIntValue("H");
 89 |     int w = parser::getIntValue("W");  
 90 |     int h_off = 0;
 91 |     int w_off = 0;
 92 |     h_off = (r_h - h) / 2;
 93 |     w_off = (r_w - w) / 2;
 94 |     Rect roi(w_off, h_off, w, h);
 95 |     Mat croppedImg = resized(roi);
 96 | 
 97 |     //to float and scale
 98 |     cv::Mat img_float;
 99 |     float scale = parser::getFloatValue("scale");  
100 |     if (channel == 3)
101 |         croppedImg.convertTo(img_float, CV_32FC3, scale);
102 |     else
103 |         croppedImg.convertTo(img_float, CV_32FC1, scale);
104 | 
105 | 
106 |     // //mean mat
107 |     auto meanFile = GetMeanMat();
108 |     Mat subMeanImg;
109 |     cv::subtract(img_float, meanFile, subMeanImg);
110 | 
111 |     //HWC TO CHW
112 |     vector<Mat> input_channels(channel);
113 |     cv::split(subMeanImg, input_channels.data());
114 | 
115 |     vector<float> result(h*w*channel);
116 |     auto data = result.data();
117 |     int channelLength = h * w;
118 |     for (int i = 0; i < channel; ++i) {
119 |         memcpy(data,input_channels[i].data,channelLength*sizeof(float));
120 |         data += channelLength;
121 |     }
122 | 
123 |     return result;
124 | }
125 | 
126 | int main( int argc, char* argv[] )
127 | {
128 |     parser::ADD_ARG_INT("C",Desc("channel"),DefaultValue(to_string(INPUT_CHANNEL)));
129 |     parser::ADD_ARG_INT("H",Desc("height"),DefaultValue(to_string(INPUT_HEIGHT)));
130 |     parser::ADD_ARG_INT("W",Desc("width"),DefaultValue(to_string(INPUT_WIDTH)));
131 |     parser::ADD_ARG_INT("RH",Desc("image process resized Height"),DefaultValue(to_string(RESIZE_H)));
132 |     parser::ADD_ARG_INT("RW",Desc("image process resized Width"),DefaultValue(to_string(RESIZE_W)));
133 |     parser::ADD_ARG_FLOAT("scale",Desc("image process scale"),DefaultValue(to_string(SCALE)));
134 |     parser::ADD_ARG_STRING("meanValue",Desc("image mean value before scale"),DefaultValue(MEAN_VALUE));
135 |     
136 |     parser::ADD_ARG_STRING("caffemodel",Desc("input caffemodel"),DefaultValue(INPUT_CAFFEMODEL),ValueDesc("file"));
137 |     parser::ADD_ARG_STRING("prototxt",Desc("input deploy"),DefaultValue(INPUT_PROTOTXT),ValueDesc("file"));
138 |     parser::ADD_ARG_STRING("evallist",Desc("load test files from list"),DefaultValue(EVAL_LIST),ValueDesc("file"));
139 |     parser::ADD_ARG_STRING("calib",Desc("load calibration files from list"),DefaultValue(CALIBRATION_LIST),ValueDesc("file"));
140 |     parser::ADD_ARG_STRING("outputs",Desc("output nodes name"),DefaultValue(OUTPUTS));
141 |     parser::ADD_ARG_STRING("mode",Desc("runtime mode"),DefaultValue(MODE), ValueDesc("fp32/fp16/int8"));
142 | 
143 |     if(argc < 2){
144 |         parser::printDesc();
145 |         exit(-1);
146 |     }
147 | 
148 |     parser::parseArgs(argc,argv);
149 | 
150 |     vector<vector<float>> calibData;
151 |     string calibFileList = parser::getStringValue("calib");
152 |     string mode = parser::getStringValue("mode");
153 |     if(calibFileList.length() > 0 && mode == "int8")
154 |     {   
155 |         cout << "find calibration file,loading ..." << endl;
156 |       
157 |         ifstream file(calibFileList);  
158 |         if(!file.is_open())
159 |         {
160 |             cout << "read file list error,please check file :" << calibFileList << endl;
161 |             exit(-1);
162 |         }
163 | 
164 |         string strLine;  
165 |         while( getline(file,strLine) )                               
166 |         { 
167 |             //std::cout << strLine << std::endl;
168 |             auto data = preprocess(strLine);
169 |             calibData.emplace_back(data);
170 |         } 
171 |         file.close();
172 |     }
173 |     
174 |     string deployFile = parser::getStringValue("prototxt");
175 |     string caffemodelFile = parser::getStringValue("caffemodel");
176 |     string outputNodes = parser::getStringValue("outputs");
177 |     auto outputNames = split(outputNodes,',');
178 | 
179 |     trtNet net(deployFile,caffemodelFile,outputNames,calibData);
180 | 
181 |     int outputCount = net.getOutputSize()/sizeof(float);
182 |     std::unique_ptr<float[]> outputData(new float[outputCount]);
183 | 
184 |     list<vector<float>> outputs;
185 |     list<int> groundTruth;
186 |     string listFile = parser::getStringValue("evallist");
187 | 
188 |     cout << "loading process list from " << listFile << endl;
189 |     list<Source> inputs = readLabelFileList(listFile);
190 | 
191 |     int tp1 = 0,fp1 =0;
192 |     int tp5 = 0,fp5 =0;
193 |     const int printInterval = 500;
194 |     int i = 0;
195 | 
196 |     for (const auto& source :inputs)
197 |     {
198 | 
199 |         std::cout << "process: " << source.fileName << std::endl;
200 |         vector<float> inputData = preprocess(source.fileName);
201 |         if (!inputData.data())
202 |             continue;
203 | 
204 |         net.doInference(inputData.data(), outputData.get());
205 | 
206 |         //Get Output    
207 |         auto output = outputData.get();
208 |         
209 |         vector<float> res(output,&output[outputCount]);
210 |         outputs.emplace_back(res);
211 |         groundTruth.push_back(source.label);
212 | 
213 |         if(++i % printInterval == 0)
214 |         {
215 |             evalTopResult(outputs,groundTruth,&tp1,&fp1,1);
216 |             evalTopResult(outputs,groundTruth,&tp5,&fp5,5); 
217 | 
218 |             outputs.clear();
219 |             groundTruth.clear();
220 |         }
221 |     }
222 | 
223 |     evalTopResult(outputs,groundTruth,&tp1,&fp1,1);
224 |     evalTopResult(outputs,groundTruth,&tp5,&fp5,5); 
225 | 
226 |     net.printTime();
227 | 
228 |     return 0;
229 | }
230 | 
231 | 


--------------------------------------------------------------------------------
/sample/runNet.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <sstream>
 3 | #include <memory>
 4 | #include <opencv2/opencv.hpp>
 5 | #include "../code/include/TrtNet.h"
 6 | #include "argsParser.h"
 7 | #include "configs.h"
 8 | 
 9 | using namespace std;
10 | using namespace argsParser;
11 | using namespace Tn;
12 | 
13 | unique_ptr<float[]> prepareImage(const string& fileName)
14 | {
15 |     using namespace cv;
16 | 
17 |     Mat img = imread(fileName);
18 |     if(img.data== nullptr)
19 |     {
20 |         std::cout << "can not open image :" << fileName  << std::endl;
21 |         return std::unique_ptr<float[]>(nullptr); 
22 |     } 
23 | 
24 |     int c = parser::getIntValue("C");
25 |     int h = parser::getIntValue("H");
26 |     int w = parser::getIntValue("W");  
27 | 
28 |     cv::Mat resized;
29 |     cv::resize(img, resized, cv::Size(h,w));
30 | 
31 |     cv::Mat img_float;
32 |     if (c == 3)
33 |         resized.convertTo(img_float, CV_32FC3);
34 |     else
35 |         resized.convertTo(img_float, CV_32FC1);
36 | 
37 |     //HWC TO CHW
38 |     cv::Mat input_channels[c];
39 |     cv::split(img_float, input_channels);
40 | 
41 |     float * data = new float[h*w*c];
42 |     auto result = data;
43 |     int channelLength = h * w;
44 |     for (int i = 0; i < c; ++i) {
45 |         memcpy(data,input_channels[i].data,channelLength*sizeof(float));
46 |         data += channelLength;
47 |     }     
48 | 
49 |     return std::unique_ptr<float[]>(result);
50 | }
51 | 
52 | int main( int argc, char* argv[] )
53 | {
54 |     parser::ADD_ARG_FLOAT("prototxt",Desc("input deploy"),DefaultValue(INPUT_PROTOTXT),ValueDesc("file"));
55 |     parser::ADD_ARG_FLOAT("caffemodel",Desc("input caffemodel"),DefaultValue(INPUT_CAFFEMODEL),ValueDesc("file"));
56 |     parser::ADD_ARG_STRING("input",Desc("input image file"),DefaultValue(INPUT_IMAGE),ValueDesc("file"));
57 |     parser::ADD_ARG_INT("C",Desc("channel"),DefaultValue(to_string(INPUT_CHANNEL)));
58 |     parser::ADD_ARG_INT("H",Desc("height"),DefaultValue(to_string(INPUT_HEIGHT)));
59 |     parser::ADD_ARG_INT("W",Desc("width"),DefaultValue(to_string(INPUT_WIDTH)));
60 |     parser::ADD_ARG_INT("iterTimes",Desc("iterations"),DefaultValue(to_string(ITER_TIMES)));
61 |  
62 |     if(argc < 2){
63 |         parser::printDesc();
64 |         exit(-1);
65 |     }
66 | 
67 |     parser::parseArgs(argc,argv);
68 | 
69 |     string deployFile = parser::getStringValue("prototxt");
70 |     string caffemodelFile = parser::getStringValue("caffemodel");
71 |     std::vector<std::vector<float>> calibratorData;
72 |     trtNet net(deployFile,caffemodelFile,{"prob"},calibratorData);
73 | 
74 |     string inputImage = parser::getStringValue("input");
75 |     auto inputData = prepareImage(inputImage);
76 |     int outputCount = net.getOutputSize()/sizeof(float);
77 |     std::unique_ptr<float[]> outputData(new float[outputCount]);
78 | 
79 |     for (int i = 0 ;i<ITER_TIMES;++i)
80 |         net.doInference(inputData.get(), outputData.get());
81 | 
82 |     net.printTime();
83 | 
84 |     auto result = outputData.get();
85 |     std::cout << "*************result************" << std::endl;
86 |     //argmax
87 |     auto index = std::distance(&result[0], std::max_element(&result[0], &result[outputCount] + 1));
88 |     std::cout << "class:" << index << " " << result[index] << " " << std::endl;
89 |     
90 |     return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/sample/runTwoNets.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <sstream>
 3 | #include <memory>
 4 | #include <opencv2/opencv.hpp>
 5 | #include "../code/include/TrtNet.h"
 6 | #include "argsParser.h"
 7 | #include "configs.h"
 8 | 
 9 | using namespace std;
10 | using namespace argsParser;
11 | using namespace Tn;
12 | 
13 | unique_ptr<float[]> prepareImage(const string& fileName)
14 | {
15 |     using namespace cv;
16 | 
17 |     Mat img = imread(fileName);
18 |     if(img.data== nullptr)
19 |     {
20 |         std::cout << "can not open image :" << fileName  << std::endl;
21 |         return std::unique_ptr<float[]>(nullptr); 
22 |     } 
23 | 
24 |     int c = parser::getIntValue("C");
25 |     int h = parser::getIntValue("H");
26 |     int w = parser::getIntValue("W");  
27 | 
28 |     cv::Mat resized;
29 |     cv::resize(img, resized, cv::Size(h,w));
30 | 
31 |     cv::Mat img_float;
32 |     if (c == 3)
33 |         resized.convertTo(img_float, CV_32FC3);
34 |     else
35 |         resized.convertTo(img_float, CV_32FC1);
36 | 
37 |     //HWC TO CHW
38 |     cv::Mat input_channels[c];
39 |     cv::split(img_float, input_channels);
40 | 
41 |     float * data = new float[h*w*c];
42 |     auto result = data;
43 |     int channelLength = h * w;
44 |     for (int i = 0; i < c; ++i) {
45 |         memcpy(data,input_channels[i].data,channelLength*sizeof(float));
46 |         data += channelLength;
47 |     }     
48 | 
49 |     return std::unique_ptr<float[]>(result);
50 | }
51 | 
52 | int main( int argc, char* argv[] )
53 | {
54 |     parser::ADD_ARG_FLOAT("prototxt",Desc("input deploy"),DefaultValue(INPUT_PROTOTXT),ValueDesc("file"));
55 |     parser::ADD_ARG_FLOAT("caffemodel",Desc("input caffemodel"),DefaultValue(INPUT_CAFFEMODEL),ValueDesc("file"));
56 |     parser::ADD_ARG_STRING("input",Desc("input image file"),DefaultValue(INPUT_IMAGE),ValueDesc("file"));
57 |     parser::ADD_ARG_INT("C",Desc("channel"),DefaultValue(to_string(INPUT_CHANNEL)));
58 |     parser::ADD_ARG_INT("H",Desc("height"),DefaultValue(to_string(INPUT_HEIGHT)));
59 |     parser::ADD_ARG_INT("W",Desc("width"),DefaultValue(to_string(INPUT_WIDTH)));
60 |     parser::ADD_ARG_INT("iterTimes",Desc("iterations"),DefaultValue(to_string(ITER_TIMES)));
61 |  
62 |     if(argc < 2){
63 |         parser::printDesc();
64 |         exit(-1);
65 |     }
66 | 
67 |     parser::parseArgs(argc,argv);
68 | 
69 |     string deployFile = parser::getStringValue("prototxt");
70 |     string caffemodelFile = parser::getStringValue("caffemodel");
71 |     std::vector<std::vector<float>> calibratorData;
72 |     trtNet net(deployFile,caffemodelFile,{"prob"},calibratorData);
73 | 
74 |     //NOTE: test for two nets, but it may crash by some unknown reason.
75 |     trtNet net2(deployFile,caffemodelFile,{"prob"},calibratorData);
76 |     //Change to another net also will crash
77 |     trtNet net3("yolov3.prototxt","yolov3.caffemodel",{"prob"},calibratorData);
78 | 
79 |     string inputImage = parser::getStringValue("input");
80 |     auto inputData = prepareImage(inputImage);
81 |     int outputCount = net.getOutputSize()/sizeof(float);
82 |     std::unique_ptr<float[]> outputData(new float[outputCount]);
83 | 
84 |     for (int i = 0 ;i<ITER_TIMES;++i)
85 |         net.doInference(inputData.get(), outputData.get() );
86 | 
87 |     net.printTime();
88 | 
89 |     auto result = outputData.get();
90 |     std::cout << "*************result************" << std::endl;
91 |     //argmax
92 |     auto index = std::distance(&result[0], std::max_element(&result[0], &result[outputCount] + 1));
93 |     std::cout << "class:" << index << " " << result[index] << " " << std::endl;
94 |     
95 |     return 0;
96 | }
97 | 


--------------------------------------------------------------------------------