├── .gitignore ├── .idea ├── Solov2-TensorRT-CPP.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── CMakeLists.txt ├── InstanceSegment ├── TensorRtSample │ ├── ErrorRecorder.h │ ├── common.h │ ├── half.h │ ├── logger.cpp │ ├── logger.h │ └── logging.h ├── buffer.cpp ├── buffer.h ├── infer.cpp ├── infer.h ├── parameters.cpp ├── parameters.h ├── pipeline.cpp ├── pipeline.h ├── solo.cpp ├── solo.h ├── utils.cpp └── utils.h ├── LICENSE ├── README.md ├── build_model.cpp ├── common.py ├── config ├── config.yaml ├── kitti.png └── solov2_cpp.png ├── demo.cpp ├── main.cpp └── onnx_exporter.py /.gitignore: -------------------------------------------------------------------------------- 1 | /cmake-build-debug/ -------------------------------------------------------------------------------- /.idea/Solov2-TensorRT-CPP.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 24 | 25 | 27 | 28 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 69 | 70 | 71 | 72 | 74 | 75 | 76 | 77 | 79 | 80 | 81 | 82 | 84 | 85 | 86 | 87 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 1641028294137 101 | 113 | 114 | 115 | 116 | 118 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | project(Solov2-TensorRT-CPP) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | 7 | find_package(OpenCV 3.4 REQUIRED) 8 | include_directories(${OpenCV_INCLUDE_DIRS}) 9 | 10 | #Libtorch 11 | set(CMAKE_PREFIX_PATH "/usr/local/lib;/home/chen/app/libtorch") 12 | 13 | find_package(Torch REQUIRED) 14 | include_directories(${TORCH_INCLUDE_DIRS}) 15 | 16 | #CUDA 17 | find_package(CUDA 10.2 REQUIRED) 18 | include_directories(${CUDA_INCLUDE_DIRS}) 19 | 20 | set(TensorRT_LIBS nvinfer nvonnxparser nvinfer_plugin) 21 | include_directories(${TensorRT_INCLUDE_DIRS}) 22 | 23 | 24 | aux_source_directory(InstanceSegment Segment_SOURCES) 25 | aux_source_directory(InstanceSegment/TensorRtSample TensorRtSample_SOURCES) 26 | 27 | add_executable(segment main.cpp ${Segment_SOURCES} ${TensorRtSample_SOURCES}) 28 | target_link_libraries(segment ${CUDA_LIBRARIES} ${OpenCV_LIBRARIES} ${TORCH_LIBRARIES} ${TensorRT_LIBS}) 29 | 30 | add_executable(demo demo.cpp ${Segment_SOURCES} ${TensorRtSample_SOURCES}) 31 | target_link_libraries(demo ${CUDA_LIBRARIES} ${OpenCV_LIBRARIES} ${TORCH_LIBRARIES} ${TensorRT_LIBS}) 32 | 33 | add_executable(build_model build_model.cpp InstanceSegment/parameters.cpp InstanceSegment/TensorRtSample/logger.cpp) 34 | target_link_libraries(build_model ${TensorRT_LIBS} pthread ${OpenCV_LIBRARIES}) 35 | 36 | -------------------------------------------------------------------------------- /InstanceSegment/TensorRtSample/ErrorRecorder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef ERROR_RECORDER_H 18 | #define ERROR_RECORDER_H 19 | #include "NvInferRuntimeCommon.h" 20 | #include "logger.h" 21 | #include "logging.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | using namespace nvinfer1; 28 | //! 29 | //! A simple implementation of the IErrorRecorder interface for 30 | //! use by samples. This interface also can be used as a reference 31 | //! implementation. 32 | //! The sample Error recorder is based on a vector that pairs the error 33 | //! code and the error string into a single element. It also uses 34 | //! standard mutex's and atomics in order to make sure that the code 35 | //! works in a multi-threaded environment. 36 | //! 37 | class SampleErrorRecorder : public IErrorRecorder 38 | { 39 | using errorPair = std::pair; 40 | using errorStack = std::vector; 41 | 42 | public: 43 | SampleErrorRecorder() = default; 44 | 45 | virtual ~SampleErrorRecorder() noexcept {} 46 | int32_t getNbErrors() const noexcept final 47 | { 48 | return mErrorStack.size(); 49 | } 50 | ErrorCode getErrorCode(int32_t errorIdx) const noexcept final 51 | { 52 | return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; 53 | }; 54 | IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final 55 | { 56 | return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); 57 | } 58 | // This class can never overflow since we have dynamic resize via std::vector usage. 59 | bool hasOverflowed() const noexcept final 60 | { 61 | return false; 62 | } 63 | 64 | // Empty the errorStack. 65 | void clear() noexcept final 66 | { 67 | try 68 | { 69 | // grab a lock so that there is no addition while clearing. 70 | std::lock_guard guard(mStackLock); 71 | mErrorStack.clear(); 72 | } 73 | catch (const std::exception& e) 74 | { 75 | sample::gLogFatal << "Internal Error: " << e.what() << std::endl; 76 | } 77 | }; 78 | 79 | //! Simple helper function that 80 | bool empty() const noexcept 81 | { 82 | return mErrorStack.empty(); 83 | } 84 | 85 | bool reportError(ErrorCode val, IErrorRecorder::ErrorDesc desc) noexcept final 86 | { 87 | try 88 | { 89 | std::lock_guard guard(mStackLock); 90 | sample::gLogError << "Error[" << static_cast(val) << "]: " << desc << std::endl; 91 | mErrorStack.push_back(errorPair(val, desc)); 92 | } 93 | catch (const std::exception& e) 94 | { 95 | sample::gLogFatal << "Internal Error: " << e.what() << std::endl; 96 | } 97 | // All errors are considered fatal. 98 | return true; 99 | } 100 | 101 | // Atomically increment or decrement the ref counter. 102 | IErrorRecorder::RefCount incRefCount() noexcept final 103 | { 104 | return ++mRefCount; 105 | } 106 | IErrorRecorder::RefCount decRefCount() noexcept final 107 | { 108 | return --mRefCount; 109 | } 110 | 111 | private: 112 | // Simple helper functions. 113 | const errorPair& operator[](size_t index) const noexcept 114 | { 115 | return mErrorStack[index]; 116 | } 117 | 118 | bool invalidIndexCheck(int32_t index) const noexcept 119 | { 120 | // By converting signed to unsigned, we only need a single check since 121 | // negative numbers turn into large positive greater than the size. 122 | size_t sIndex = index; 123 | return sIndex >= mErrorStack.size(); 124 | } 125 | 126 | // Mutex to hold when locking mErrorStack. 127 | std::mutex mStackLock; 128 | 129 | // Reference count of the class. Destruction of the class when mRefCount 130 | // is not zero causes undefined behavior. 131 | std::atomic mRefCount{0}; 132 | 133 | // The error stack that holds the errors recorded by TensorRT. 134 | errorStack mErrorStack; 135 | 136 | }; // class SampleErrorRecorder 137 | #endif // ERROR_RECORDER_H 138 | -------------------------------------------------------------------------------- /InstanceSegment/TensorRtSample/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TENSORRT_COMMON_H 18 | #define TENSORRT_COMMON_H 19 | 20 | // For loadLibrary 21 | #ifdef _MSC_VER 22 | // Needed so that the max/min definitions in windows.h do not conflict with std::max/min. 23 | #define NOMINMAX 24 | #include 25 | #undef NOMINMAX 26 | #else 27 | #include 28 | #endif 29 | 30 | #include "NvInfer.h" 31 | #include "NvInferPlugin.h" 32 | #include "logger.h" 33 | #include "logging.h" 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | 54 | using namespace nvinfer1; 55 | using namespace plugin; 56 | 57 | #ifdef _MSC_VER 58 | #define FN_NAME __FUNCTION__ 59 | #else 60 | #define FN_NAME __func__ 61 | #endif 62 | 63 | #if defined(__aarch64__) || defined(__QNX__) 64 | #define ENABLE_DLA_API 1 65 | #endif 66 | 67 | #define mCHECK(status) \ 68 | do \ 69 | { \ 70 | auto ret = (status); \ 71 | if (ret != 0) \ 72 | { \ 73 | sample::gLogError << "Cuda failure: " << ret << std::endl; \ 74 | abort(); \ 75 | } \ 76 | } while (0) 77 | 78 | #define CHECK_RETURN_W_MSG(status, val, errMsg) \ 79 | do \ 80 | { \ 81 | if (!(status)) \ 82 | { \ 83 | sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " << FN_NAME << "(), line " << __LINE__ \ 84 | << std::endl; \ 85 | return val; \ 86 | } \ 87 | } while (0) 88 | 89 | #undef ASSERT 90 | #define ASSERT(condition) \ 91 | do \ 92 | { \ 93 | if (!(condition)) \ 94 | { \ 95 | sample::gLogError << "Assertion failure: " << #condition << std::endl; \ 96 | abort(); \ 97 | } \ 98 | } while (0) 99 | 100 | 101 | #define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "") 102 | 103 | #define OBJ_GUARD(A) std::unique_ptr 104 | 105 | template 106 | OBJ_GUARD(T) 107 | makeObjGuard(T_* t) 108 | { 109 | mCHECK(!(std::is_base_of::value || std::is_same::value)); 110 | auto deleter = [](T* t) { t->destroy(); }; 111 | return std::unique_ptr{static_cast(t), deleter}; 112 | } 113 | 114 | constexpr long double operator"" _GiB(long double val) 115 | { 116 | return val * (1 << 30); 117 | } 118 | constexpr long double operator"" _MiB(long double val) 119 | { 120 | return val * (1 << 20); 121 | } 122 | constexpr long double operator"" _KiB(long double val) 123 | { 124 | return val * (1 << 10); 125 | } 126 | 127 | // These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB. 128 | // Since the return type is signed, -1_GiB will work as expected. 129 | constexpr long long int operator"" _GiB(unsigned long long val) 130 | { 131 | return val * (1 << 30); 132 | } 133 | constexpr long long int operator"" _MiB(unsigned long long val) 134 | { 135 | return val * (1 << 20); 136 | } 137 | constexpr long long int operator"" _KiB(unsigned long long val) 138 | { 139 | return val * (1 << 10); 140 | } 141 | 142 | struct SimpleProfiler : public nvinfer1::IProfiler 143 | { 144 | struct Record 145 | { 146 | float time{0}; 147 | int count{0}; 148 | }; 149 | 150 | virtual void reportLayerTime(const char* layerName, float ms) noexcept 151 | { 152 | mProfile[layerName].count++; 153 | mProfile[layerName].time += ms; 154 | if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end()) 155 | { 156 | mLayerNames.push_back(layerName); 157 | } 158 | } 159 | 160 | SimpleProfiler(const char* name, const std::vector& srcProfilers = std::vector()) 161 | : mName(name) 162 | { 163 | for (const auto& srcProfiler : srcProfilers) 164 | { 165 | for (const auto& rec : srcProfiler.mProfile) 166 | { 167 | auto it = mProfile.find(rec.first); 168 | if (it == mProfile.end()) 169 | { 170 | mProfile.insert(rec); 171 | } 172 | else 173 | { 174 | it->second.time += rec.second.time; 175 | it->second.count += rec.second.count; 176 | } 177 | } 178 | } 179 | } 180 | 181 | friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value) 182 | { 183 | out << "========== " << value.mName << " profile ==========" << std::endl; 184 | float totalTime = 0; 185 | std::string layerNameStr = "TensorRT layer name"; 186 | int maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); 187 | for (const auto& elem : value.mProfile) 188 | { 189 | totalTime += elem.second.time; 190 | maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); 191 | } 192 | 193 | auto old_settings = out.flags(); 194 | auto old_precision = out.precision(); 195 | // Output header 196 | { 197 | out << std::setw(maxLayerNameLength) << layerNameStr << " "; 198 | out << std::setw(12) << "Runtime, " 199 | << "%" 200 | << " "; 201 | out << std::setw(12) << "Invocations" 202 | << " "; 203 | out << std::setw(12) << "Runtime, ms" << std::endl; 204 | } 205 | for (size_t i = 0; i < value.mLayerNames.size(); i++) 206 | { 207 | const std::string layerName = value.mLayerNames[i]; 208 | auto elem = value.mProfile.at(layerName); 209 | out << std::setw(maxLayerNameLength) << layerName << " "; 210 | out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%" 211 | << " "; 212 | out << std::setw(12) << elem.count << " "; 213 | out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl; 214 | } 215 | out.flags(old_settings); 216 | out.precision(old_precision); 217 | out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl; 218 | 219 | return out; 220 | } 221 | 222 | private: 223 | std::string mName; 224 | std::vector mLayerNames; 225 | std::map mProfile; 226 | }; 227 | 228 | //! Locate path to file, given its filename or filepath suffix and possible dirs it might lie in. 229 | //! Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path. 230 | inline std::string locateFile( 231 | const std::string& filepathSuffix, const std::vector& directories, bool reportError = true) 232 | { 233 | const int MAX_DEPTH{10}; 234 | bool found{false}; 235 | std::string filepath; 236 | 237 | for (auto& dir : directories) 238 | { 239 | if (!dir.empty() && dir.back() != '/') 240 | { 241 | #ifdef _MSC_VER 242 | filepath = dir + "\\" + filepathSuffix; 243 | #else 244 | filepath = dir + "/" + filepathSuffix; 245 | #endif 246 | } 247 | else 248 | { 249 | filepath = dir + filepathSuffix; 250 | } 251 | 252 | for (int i = 0; i < MAX_DEPTH && !found; i++) 253 | { 254 | const std::ifstream checkFile(filepath); 255 | found = checkFile.is_open(); 256 | if (found) 257 | { 258 | break; 259 | } 260 | 261 | filepath = "../" + filepath; // Try again in parent dir 262 | } 263 | 264 | if (found) 265 | { 266 | break; 267 | } 268 | 269 | filepath.clear(); 270 | } 271 | 272 | // Could not find the file 273 | if (filepath.empty()) 274 | { 275 | const std::string dirList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(), 276 | [](const std::string& a, const std::string& b) { return a + "\n\t" + b; }); 277 | std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << dirList << std::endl; 278 | 279 | if (reportError) 280 | { 281 | std::cout << "&&&& FAILED" << std::endl; 282 | exit(EXIT_FAILURE); 283 | } 284 | } 285 | 286 | return filepath; 287 | } 288 | 289 | inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW) 290 | { 291 | std::ifstream infile(fileName, std::ifstream::binary); 292 | assert(infile.is_open() && "Attempting to read from a file that is not open."); 293 | std::string magic, h, w, max; 294 | infile >> magic >> h >> w >> max; 295 | infile.seekg(1, infile.cur); 296 | infile.read(reinterpret_cast(buffer), inH * inW); 297 | } 298 | 299 | namespace samplesCommon 300 | { 301 | 302 | // Swaps endianness of an integral type. 303 | template ::value, int>::type = 0> 304 | inline T swapEndianness(const T& value) 305 | { 306 | uint8_t bytes[sizeof(T)]; 307 | for (int i = 0; i < static_cast(sizeof(T)); ++i) 308 | { 309 | bytes[sizeof(T) - 1 - i] = *(reinterpret_cast(&value) + i); 310 | } 311 | return *reinterpret_cast(bytes); 312 | } 313 | 314 | class HostMemory 315 | { 316 | public: 317 | HostMemory() = delete; 318 | virtual void* data() const noexcept 319 | { 320 | return mData; 321 | } 322 | virtual std::size_t size() const noexcept 323 | { 324 | return mSize; 325 | } 326 | virtual DataType type() const noexcept 327 | { 328 | return mType; 329 | } 330 | virtual ~HostMemory() {} 331 | 332 | protected: 333 | HostMemory(std::size_t size, DataType type) 334 | : mSize(size) 335 | , mType(type) 336 | { 337 | } 338 | void* mData; 339 | std::size_t mSize; 340 | DataType mType; 341 | }; 342 | 343 | template 344 | class TypedHostMemory : public HostMemory 345 | { 346 | public: 347 | TypedHostMemory(std::size_t size) 348 | : HostMemory(size, dataType) 349 | { 350 | mData = new ElemType[size]; 351 | }; 352 | ~TypedHostMemory() noexcept 353 | { 354 | delete[](ElemType*) mData; 355 | } 356 | ElemType* raw() noexcept 357 | { 358 | return static_cast(data()); 359 | } 360 | }; 361 | 362 | using FloatMemory = TypedHostMemory; 363 | using HalfMemory = TypedHostMemory; 364 | using ByteMemory = TypedHostMemory; 365 | 366 | inline void* safeCudaMalloc(size_t memSize) 367 | { 368 | void* deviceMem; 369 | mCHECK(cudaMalloc(&deviceMem, memSize)); 370 | if (deviceMem == nullptr) 371 | { 372 | std::cerr << "Out of memory" << std::endl; 373 | exit(1); 374 | } 375 | return deviceMem; 376 | } 377 | 378 | inline bool isDebug() 379 | { 380 | return (std::getenv("TENSORRT_DEBUG") ? true : false); 381 | } 382 | 383 | struct InferDeleter 384 | { 385 | template 386 | void operator()(T* obj) const 387 | { 388 | delete obj; 389 | } 390 | }; 391 | 392 | template 393 | using SampleUniquePtr = std::unique_ptr; 394 | 395 | static auto StreamDeleter = [](cudaStream_t* pStream) 396 | { 397 | if (pStream) 398 | { 399 | cudaStreamDestroy(*pStream); 400 | delete pStream; 401 | } 402 | }; 403 | 404 | inline std::unique_ptr makeCudaStream() 405 | { 406 | std::unique_ptr pStream(new cudaStream_t, StreamDeleter); 407 | if (cudaStreamCreate(pStream.get()) != cudaSuccess) 408 | { 409 | pStream.reset(nullptr); 410 | } 411 | 412 | return pStream; 413 | } 414 | 415 | template 416 | std::shared_ptr infer_object(T* obj) 417 | { 418 | if (!obj) 419 | { 420 | throw std::runtime_error(std::string("Failed to create object")); 421 | } 422 | return std::shared_ptr(obj); 423 | } 424 | 425 | //! Return vector of indices that puts magnitudes of sequence in descending order. 426 | template 427 | std::vector argMagnitudeSort(Iter begin, Iter end) 428 | { 429 | std::vector indices(end - begin); 430 | std::iota(indices.begin(), indices.end(), 0); 431 | std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) { return std::abs(begin[j]) < std::abs(begin[i]); }); 432 | return indices; 433 | } 434 | 435 | inline bool readReferenceFile(const std::string& fileName, std::vector& refVector) 436 | { 437 | std::ifstream infile(fileName); 438 | if (!infile.is_open()) 439 | { 440 | std::cout << "ERROR: readReferenceFile: Attempting to read from a file that is not open." << std::endl; 441 | return false; 442 | } 443 | std::string line; 444 | while (std::getline(infile, line)) 445 | { 446 | if (line.empty()) 447 | continue; 448 | refVector.push_back(line); 449 | } 450 | infile.close(); 451 | return true; 452 | } 453 | 454 | template 455 | std::vector classify( 456 | const std::vector& refVector, const std::vector& output, const size_t topK) 457 | { 458 | const auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); 459 | std::vector result; 460 | result.reserve(topK); 461 | for (size_t k = 0; k < topK; ++k) 462 | { 463 | result.push_back(refVector[inds[k]]); 464 | } 465 | return result; 466 | } 467 | 468 | // Returns indices of highest K magnitudes in v. 469 | template 470 | std::vector topKMagnitudes(const std::vector& v, const size_t k) 471 | { 472 | std::vector indices = samplesCommon::argMagnitudeSort(v.cbegin(), v.cend()); 473 | indices.resize(k); 474 | return indices; 475 | } 476 | 477 | template 478 | bool readASCIIFile(const std::string& fileName, const size_t size, std::vector& out) 479 | { 480 | std::ifstream infile(fileName); 481 | if (!infile.is_open()) 482 | { 483 | std::cout << "ERROR readASCIIFile: Attempting to read from a file that is not open." << std::endl; 484 | return false; 485 | } 486 | out.clear(); 487 | out.reserve(size); 488 | out.assign(std::istream_iterator(infile), std::istream_iterator()); 489 | infile.close(); 490 | return true; 491 | } 492 | 493 | template 494 | bool writeASCIIFile(const std::string& fileName, const std::vector& in) 495 | { 496 | std::ofstream outfile(fileName); 497 | if (!outfile.is_open()) 498 | { 499 | std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is not open." << std::endl; 500 | return false; 501 | } 502 | for (auto fn : in) 503 | { 504 | outfile << fn << "\n"; 505 | } 506 | outfile.close(); 507 | return true; 508 | } 509 | 510 | inline void print_version() 511 | { 512 | std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "." << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH 513 | << "." << NV_TENSORRT_BUILD << std::endl; 514 | } 515 | 516 | inline std::string getFileType(const std::string& filepath) 517 | { 518 | return filepath.substr(filepath.find_last_of(".") + 1); 519 | } 520 | 521 | inline std::string toLower(const std::string& inp) 522 | { 523 | std::string out = inp; 524 | std::transform(out.begin(), out.end(), out.begin(), ::tolower); 525 | return out; 526 | } 527 | 528 | inline float getMaxValue(const float* buffer, int64_t size) 529 | { 530 | assert(buffer != nullptr); 531 | assert(size > 0); 532 | return *std::max_element(buffer, buffer + size); 533 | } 534 | 535 | // Ensures that every tensor used by a network has a scale. 536 | // 537 | // All tensors in a network must have a range specified if a calibrator is not used. 538 | // This function is just a utility to globally fill in missing scales for the entire network. 539 | // 540 | // If a tensor does not have a scale, it is assigned inScales or outScales as follows: 541 | // 542 | // * If the tensor is the input to a layer or output of a pooling node, its scale is assigned inScales. 543 | // * Otherwise its scale is assigned outScales. 544 | // 545 | // The default parameter values are intended to demonstrate, for final layers in the network, 546 | // cases where scaling factors are asymmetric. 547 | inline void setAllTensorScales(INetworkDefinition* network, float inScales = 2.0f, float outScales = 4.0f) 548 | { 549 | // Ensure that all layer inputs have a scale. 550 | for (int i = 0; i < network->getNbLayers(); i++) 551 | { 552 | auto layer = network->getLayer(i); 553 | for (int j = 0; j < layer->getNbInputs(); j++) 554 | { 555 | ITensor* input{layer->getInput(j)}; 556 | // Optional inputs are nullptr here and are from RNN layers. 557 | if (input != nullptr && !input->dynamicRangeIsSet()) 558 | { 559 | input->setDynamicRange(-inScales, inScales); 560 | } 561 | } 562 | } 563 | 564 | // Ensure that all layer outputs have a scale. 565 | // Tensors that are also inputs to layers are ingored here 566 | // since the previous loop nest assigned scales to them. 567 | for (int i = 0; i < network->getNbLayers(); i++) 568 | { 569 | auto layer = network->getLayer(i); 570 | for (int j = 0; j < layer->getNbOutputs(); j++) 571 | { 572 | ITensor* output{layer->getOutput(j)}; 573 | // Optional outputs are nullptr here and are from RNN layers. 574 | if (output != nullptr && !output->dynamicRangeIsSet()) 575 | { 576 | // Pooling must have the same input and output scales. 577 | if (layer->getType() == LayerType::kPOOLING) 578 | { 579 | output->setDynamicRange(-inScales, inScales); 580 | } 581 | else 582 | { 583 | output->setDynamicRange(-outScales, outScales); 584 | } 585 | } 586 | } 587 | } 588 | } 589 | 590 | inline void setAllDynamicRanges(INetworkDefinition* network, float inRange = 2.0f, float outRange = 4.0f) 591 | { 592 | return setAllTensorScales(network, inRange, outRange); 593 | } 594 | 595 | inline void setDummyInt8DynamicRanges(const IBuilderConfig* c, INetworkDefinition* n) 596 | { 597 | // Set dummy per-tensor dynamic range if Int8 mode is requested. 598 | if (c->getFlag(BuilderFlag::kINT8)) 599 | { 600 | sample::gLogWarning 601 | << "Int8 calibrator not provided. Generating dummy per-tensor dynamic range. Int8 accuracy is not guaranteed." 602 | << std::endl; 603 | setAllDynamicRanges(n); 604 | } 605 | } 606 | 607 | inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore, bool allowGPUFallback = true) 608 | { 609 | if (useDLACore >= 0) 610 | { 611 | if (builder->getNbDLACores() == 0) 612 | { 613 | std::cerr << "Trying to use DLA core " << useDLACore << " on a platform that doesn't have any DLA cores" 614 | << std::endl; 615 | assert("Error: use DLA core on a platfrom that doesn't have any DLA cores" && false); 616 | } 617 | if (allowGPUFallback) 618 | { 619 | config->setFlag(BuilderFlag::kGPU_FALLBACK); 620 | } 621 | if (!config->getFlag(BuilderFlag::kINT8)) 622 | { 623 | // User has not requested INT8 Mode. 624 | // By default run in FP16 mode. FP32 mode is not permitted. 625 | config->setFlag(BuilderFlag::kFP16); 626 | } 627 | config->setDefaultDeviceType(DeviceType::kDLA); 628 | config->setDLACore(useDLACore); 629 | config->setFlag(BuilderFlag::kSTRICT_TYPES); 630 | } 631 | } 632 | 633 | inline int parseDLA(int argc, char** argv) 634 | { 635 | for (int i = 1; i < argc; i++) 636 | { 637 | std::string arg(argv[i]); 638 | if (strncmp(argv[i], "--useDLACore=", 13) == 0) 639 | return std::stoi(argv[i] + 13); 640 | } 641 | return -1; 642 | } 643 | 644 | inline uint32_t getElementSize(nvinfer1::DataType t) noexcept 645 | { 646 | switch (t) 647 | { 648 | case nvinfer1::DataType::kINT32: return 4; 649 | case nvinfer1::DataType::kFLOAT: return 4; 650 | case nvinfer1::DataType::kHALF: return 2; 651 | case nvinfer1::DataType::kBOOL: 652 | case nvinfer1::DataType::kINT8: return 1; 653 | } 654 | return 0; 655 | } 656 | 657 | inline int64_t volume(const nvinfer1::Dims& d) 658 | { 659 | return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); 660 | } 661 | 662 | inline uint32_t elementSize(DataType t) noexcept 663 | { 664 | switch (t) 665 | { 666 | case DataType::kINT32: 667 | case DataType::kFLOAT: return 4; 668 | case DataType::kHALF: return 2; 669 | case DataType::kBOOL: 670 | case DataType::kINT8: return 1; 671 | } 672 | return 0; 673 | } 674 | 675 | template 676 | inline A divUp(A x, B n) 677 | { 678 | return (x + n - 1) / n; 679 | } 680 | 681 | template 682 | struct PPM 683 | { 684 | std::string magic, fileName; 685 | int h, w, max; 686 | uint8_t buffer[C * H * W]; 687 | }; 688 | 689 | // New vPPM(variable sized PPM) class with variable dimensions. 690 | struct vPPM 691 | { 692 | std::string magic, fileName; 693 | int h, w, max; 694 | std::vector buffer; 695 | }; 696 | 697 | struct BBox 698 | { 699 | float x1, y1, x2, y2; 700 | }; 701 | 702 | template 703 | void readPPMFile(const std::string& filename, samplesCommon::PPM& ppm) 704 | { 705 | ppm.fileName = filename; 706 | std::ifstream infile(filename, std::ifstream::binary); 707 | assert(infile.is_open() && "Attempting to read from a file that is not open."); 708 | infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; 709 | infile.seekg(1, infile.cur); 710 | infile.read(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); 711 | } 712 | 713 | inline void readPPMFile(const std::string& filename, vPPM& ppm, std::vector& input_dir) 714 | { 715 | ppm.fileName = filename; 716 | std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary); 717 | infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; 718 | infile.seekg(1, infile.cur); 719 | 720 | for (int i = 0; i < ppm.w * ppm.h * 3; ++i) 721 | { 722 | ppm.buffer.push_back(0); 723 | } 724 | 725 | infile.read(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); 726 | } 727 | 728 | template 729 | void writePPMFileWithBBox(const std::string& filename, PPM& ppm, const BBox& bbox) 730 | { 731 | std::ofstream outfile("./" + filename, std::ofstream::binary); 732 | assert(!outfile.fail()); 733 | outfile << "P6" 734 | << "\n" 735 | << ppm.w << " " << ppm.h << "\n" 736 | << ppm.max << "\n"; 737 | 738 | auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; 739 | const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); 740 | const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); 741 | const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); 742 | const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1); 743 | 744 | for (int x = x1; x <= x2; ++x) 745 | { 746 | // bbox top border 747 | ppm.buffer[(y1 * ppm.w + x) * 3] = 255; 748 | ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0; 749 | ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0; 750 | // bbox bottom border 751 | ppm.buffer[(y2 * ppm.w + x) * 3] = 255; 752 | ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0; 753 | ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0; 754 | } 755 | 756 | for (int y = y1; y <= y2; ++y) 757 | { 758 | // bbox left border 759 | ppm.buffer[(y * ppm.w + x1) * 3] = 255; 760 | ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0; 761 | ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0; 762 | // bbox right border 763 | ppm.buffer[(y * ppm.w + x2) * 3] = 255; 764 | ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0; 765 | ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0; 766 | } 767 | 768 | outfile.write(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); 769 | } 770 | 771 | inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vector& dets) 772 | { 773 | std::ofstream outfile("./" + filename, std::ofstream::binary); 774 | assert(!outfile.fail()); 775 | outfile << "P6" 776 | << "\n" 777 | << ppm.w << " " << ppm.h << "\n" 778 | << ppm.max << "\n"; 779 | auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; 780 | 781 | for (auto bbox : dets) 782 | { 783 | for (int x = int(bbox.x1); x < int(bbox.x2); ++x) 784 | { 785 | // bbox top border 786 | ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; 787 | ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; 788 | ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; 789 | // bbox bottom border 790 | ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; 791 | ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; 792 | ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; 793 | } 794 | 795 | for (int y = int(bbox.y1); y < int(bbox.y2); ++y) 796 | { 797 | // bbox left border 798 | ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; 799 | ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; 800 | ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; 801 | // bbox right border 802 | ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; 803 | ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; 804 | ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; 805 | } 806 | } 807 | 808 | outfile.write(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); 809 | } 810 | 811 | class TimerBase 812 | { 813 | public: 814 | virtual void start() {} 815 | virtual void stop() {} 816 | float microseconds() const noexcept 817 | { 818 | return mMs * 1000.f; 819 | } 820 | float milliseconds() const noexcept 821 | { 822 | return mMs; 823 | } 824 | float seconds() const noexcept 825 | { 826 | return mMs / 1000.f; 827 | } 828 | void reset() noexcept 829 | { 830 | mMs = 0.f; 831 | } 832 | 833 | protected: 834 | float mMs{0.0f}; 835 | }; 836 | 837 | class GpuTimer : public TimerBase 838 | { 839 | public: 840 | GpuTimer(cudaStream_t stream) 841 | : mStream(stream) 842 | { 843 | mCHECK(cudaEventCreate(&mStart)); 844 | mCHECK(cudaEventCreate(&mStop)); 845 | } 846 | ~GpuTimer() 847 | { 848 | mCHECK(cudaEventDestroy(mStart)); 849 | mCHECK(cudaEventDestroy(mStop)); 850 | } 851 | void start() 852 | { 853 | mCHECK(cudaEventRecord(mStart, mStream)); 854 | } 855 | void stop() 856 | { 857 | mCHECK(cudaEventRecord(mStop, mStream)); 858 | float ms{0.0f}; 859 | mCHECK(cudaEventSynchronize(mStop)); 860 | mCHECK(cudaEventElapsedTime(&ms, mStart, mStop)); 861 | mMs += ms; 862 | } 863 | 864 | private: 865 | cudaEvent_t mStart, mStop; 866 | cudaStream_t mStream; 867 | }; // class GpuTimer 868 | 869 | template 870 | class CpuTimer : public TimerBase 871 | { 872 | public: 873 | using clock_type = Clock; 874 | 875 | void start() 876 | { 877 | mStart = Clock::now(); 878 | } 879 | void stop() 880 | { 881 | mStop = Clock::now(); 882 | mMs += std::chrono::duration{mStop - mStart}.count(); 883 | } 884 | 885 | private: 886 | std::chrono::time_point mStart, mStop; 887 | }; // class CpuTimer 888 | 889 | using PreciseCpuTimer = CpuTimer; 890 | 891 | inline std::vector splitString(std::string str, char delimiter = ',') 892 | { 893 | std::vector splitVect; 894 | std::stringstream ss(str); 895 | std::string substr; 896 | 897 | while (ss.good()) 898 | { 899 | getline(ss, substr, delimiter); 900 | splitVect.emplace_back(std::move(substr)); 901 | } 902 | return splitVect; 903 | } 904 | 905 | // Return m rounded up to nearest multiple of n 906 | inline int roundUp(int m, int n) 907 | { 908 | return ((m + n - 1) / n) * n; 909 | } 910 | 911 | inline int getC(const Dims& d) 912 | { 913 | return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; 914 | } 915 | 916 | inline int getH(const Dims& d) 917 | { 918 | return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; 919 | } 920 | 921 | inline int getW(const Dims& d) 922 | { 923 | return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; 924 | } 925 | 926 | inline void loadLibrary(const std::string& path) 927 | { 928 | #ifdef _MSC_VER 929 | void* handle = LoadLibrary(path.c_str()); 930 | #else 931 | void* handle = dlopen(path.c_str(), RTLD_LAZY); 932 | #endif 933 | if (handle == nullptr) 934 | { 935 | #ifdef _MSC_VER 936 | sample::gLogError << "Could not load plugin library: " << path << std::endl; 937 | #else 938 | sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl; 939 | #endif 940 | } 941 | } 942 | 943 | inline int32_t getSMVersion() 944 | { 945 | int32_t deviceIndex = 0; 946 | mCHECK(cudaGetDevice(&deviceIndex)); 947 | 948 | int32_t major, minor; 949 | mCHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); 950 | mCHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); 951 | 952 | return ((major << 8) | minor); 953 | } 954 | 955 | inline bool isSMSafe() 956 | { 957 | const int32_t smVersion = getSMVersion(); 958 | return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705; 959 | } 960 | 961 | inline bool isDataTypeSupported(DataType dataType) 962 | { 963 | auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); 964 | if (!builder) 965 | { 966 | return false; 967 | } 968 | 969 | if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) 970 | || (dataType == DataType::kHALF && !builder->platformHasFastFp16())) 971 | { 972 | return false; 973 | } 974 | 975 | return true; 976 | } 977 | 978 | } // namespace samplesCommon 979 | 980 | inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) 981 | { 982 | os << "("; 983 | for (int i = 0; i < dims.nbDims; ++i) 984 | { 985 | os << (i ? ", " : "") << dims.d[i]; 986 | } 987 | return os << ")"; 988 | } 989 | 990 | #endif // TENSORRT_COMMON_H 991 | -------------------------------------------------------------------------------- /InstanceSegment/TensorRtSample/logger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "logger.h" 18 | #include "ErrorRecorder.h" 19 | #include "logging.h" 20 | 21 | SampleErrorRecorder gRecorder; 22 | namespace sample 23 | { 24 | Logger gLogger{Logger::Severity::kINFO}; 25 | LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; 26 | LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; 27 | LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; 28 | LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; 29 | LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; 30 | 31 | void setReportableSeverity(Logger::Severity severity) 32 | { 33 | gLogger.setReportableSeverity(severity); 34 | gLogVerbose.setReportableSeverity(severity); 35 | gLogInfo.setReportableSeverity(severity); 36 | gLogWarning.setReportableSeverity(severity); 37 | gLogError.setReportableSeverity(severity); 38 | gLogFatal.setReportableSeverity(severity); 39 | } 40 | } // namespace sample -------------------------------------------------------------------------------- /InstanceSegment/TensorRtSample/logger.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef TENSORRT_LOGGING_H 18 | #define TENSORRT_LOGGING_H 19 | 20 | #include "NvInferRuntimeCommon.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace sample 30 | { 31 | 32 | using Severity = nvinfer1::ILogger::Severity; 33 | 34 | class LogStreamConsumerBuffer : public std::stringbuf{ 35 | public: 36 | LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) 37 | : mOutput(stream) 38 | , mPrefix(prefix) 39 | , mShouldLog(shouldLog) 40 | { 41 | } 42 | 43 | LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) 44 | : mOutput(other.mOutput) 45 | , mPrefix(other.mPrefix) 46 | , mShouldLog(other.mShouldLog) 47 | { 48 | } 49 | 50 | ~LogStreamConsumerBuffer() 51 | { 52 | // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence 53 | // std::streambuf::pptr() gives a pointer to the current position of the output sequence 54 | // if the pointer to the beginning is not equal to the pointer to the current position, 55 | // call putOutput() to log the output to the stream 56 | if (pbase() != pptr()) 57 | { 58 | putOutput(); 59 | } 60 | } 61 | 62 | // synchronizes the stream buffer and returns 0 on success 63 | // synchronizing the stream buffer consists of inserting the buffer contents into the stream, 64 | // resetting the buffer and flushing the stream 65 | virtual int sync() 66 | { 67 | putOutput(); 68 | return 0; 69 | } 70 | 71 | void putOutput() 72 | { 73 | if (mShouldLog) 74 | { 75 | // prepend timestamp 76 | std::time_t timestamp = std::time(nullptr); 77 | tm* tm_local = std::localtime(×tamp); 78 | std::cout << "["; 79 | std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; 80 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; 81 | std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; 82 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; 83 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; 84 | std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; 85 | // std::stringbuf::str() gets the string contents of the buffer 86 | // insert the buffer contents pre-appended by the appropriate prefix into the stream 87 | mOutput << mPrefix << str(); 88 | } 89 | // set the buffer to empty 90 | str(""); 91 | // flush the stream 92 | mOutput.flush(); 93 | } 94 | 95 | void setShouldLog(bool shouldLog) 96 | { 97 | mShouldLog = shouldLog; 98 | } 99 | 100 | private: 101 | std::ostream& mOutput; 102 | std::string mPrefix; 103 | bool mShouldLog; 104 | }; 105 | 106 | //! 107 | //! \class LogStreamConsumerBase 108 | //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer 109 | //! 110 | class LogStreamConsumerBase{ 111 | public: 112 | LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) 113 | : mBuffer(stream, prefix, shouldLog) 114 | { 115 | } 116 | 117 | protected: 118 | LogStreamConsumerBuffer mBuffer; 119 | }; 120 | 121 | //! 122 | //! \class LogStreamConsumer 123 | //! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. 124 | //! Order of base classes is LogStreamConsumerBase and then std::ostream. 125 | //! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field 126 | //! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. 127 | //! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. 128 | //! Please do not change the order of the parent classes. 129 | //! 130 | class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream{ 131 | public: 132 | //! \brief Creates a LogStreamConsumer which logs messages with level severity. 133 | //! Reportable severity determines if the messages are severe enough to be logged. 134 | LogStreamConsumer(Severity reportableSeverity, Severity severity) 135 | : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) 136 | , std::ostream(&mBuffer) // links the stream buffer with the stream 137 | , mShouldLog(severity <= reportableSeverity) 138 | , mSeverity(severity) 139 | { 140 | } 141 | 142 | LogStreamConsumer(LogStreamConsumer&& other) 143 | : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) 144 | , std::ostream(&mBuffer) // links the stream buffer with the stream 145 | , mShouldLog(other.mShouldLog) 146 | , mSeverity(other.mSeverity) 147 | { 148 | } 149 | 150 | void setReportableSeverity(Severity reportableSeverity) 151 | { 152 | mShouldLog = mSeverity <= reportableSeverity; 153 | mBuffer.setShouldLog(mShouldLog); 154 | } 155 | 156 | private: 157 | static std::ostream& severityOstream(Severity severity) 158 | { 159 | return severity >= Severity::kINFO ? std::cout : std::cerr; 160 | } 161 | 162 | static std::string severityPrefix(Severity severity) 163 | { 164 | switch (severity) 165 | { 166 | case Severity::kINTERNAL_ERROR: return "[F] "; 167 | case Severity::kERROR: return "[E] "; 168 | case Severity::kWARNING: return "[W] "; 169 | case Severity::kINFO: return "[I] "; 170 | case Severity::kVERBOSE: return "[V] "; 171 | default: assert(0); return ""; 172 | } 173 | } 174 | 175 | bool mShouldLog; 176 | Severity mSeverity; 177 | }; 178 | 179 | //! \class Logger 180 | //! 181 | //! \brief Class which manages logging of TensorRT tools and samples 182 | //! 183 | //! \details This class provides a common interface for TensorRT tools and samples to log information to the console, 184 | //! and supports logging two types of messages: 185 | //! 186 | //! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) 187 | //! - Test pass/fail messages 188 | //! 189 | //! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is 190 | //! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. 191 | //! 192 | //! In the future, this class could be extended to support dumping test results to a file in some standard format 193 | //! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). 194 | //! 195 | //! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger 196 | //! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT 197 | //! library and messages coming from the sample. 198 | //! 199 | //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the 200 | //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger 201 | //! object. 202 | 203 | class Logger : public nvinfer1::ILogger{ 204 | public: 205 | Logger(Severity severity = Severity::kWARNING) 206 | : mReportableSeverity(severity) 207 | { 208 | } 209 | 210 | //! 211 | //! \enum TestResult 212 | //! \brief Represents the state of a given test 213 | //! 214 | enum class TestResult 215 | { 216 | kRUNNING, //!< The test is running 217 | kPASSED, //!< The test passed 218 | kFAILED, //!< The test failed 219 | kWAIVED //!< The test was waived 220 | }; 221 | 222 | //! 223 | //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger 224 | //! \return The nvinfer1::ILogger associated with this Logger 225 | //! 226 | //! TODO Once all samples are updated to use this method to register the logger with TensorRT, 227 | //! we can eliminate the inheritance of Logger from ILogger 228 | //! 229 | nvinfer1::ILogger& getTRTLogger() noexcept 230 | { 231 | return *this; 232 | } 233 | 234 | //! 235 | //! \brief Implementation of the nvinfer1::ILogger::log() virtual method 236 | //! 237 | //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the 238 | //! inheritance from nvinfer1::ILogger 239 | //! 240 | void log(Severity severity, const char* msg) noexcept override 241 | { 242 | LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; 243 | } 244 | 245 | //! 246 | //! \brief Method for controlling the verbosity of logging output 247 | //! 248 | //! \param severity The logger will only emit messages that have severity of this level or higher. 249 | //! 250 | void setReportableSeverity(Severity severity) 251 | { 252 | mReportableSeverity = severity; 253 | } 254 | 255 | //! 256 | //! \brief Opaque handle that holds logging information for a particular test 257 | //! 258 | //! This object is an opaque handle to information used by the Logger to print test results. 259 | //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used 260 | //! with Logger::reportTest{Start,End}(). 261 | //! 262 | class TestAtom{ 263 | public: 264 | TestAtom(TestAtom&&) = default; 265 | 266 | private: 267 | friend class Logger; 268 | 269 | TestAtom(bool started, const std::string& name, const std::string& cmdline) 270 | : mStarted(started) 271 | , mName(name) 272 | , mCmdline(cmdline) 273 | { 274 | } 275 | 276 | bool mStarted; 277 | std::string mName; 278 | std::string mCmdline; 279 | }; 280 | 281 | //! 282 | //! \brief Define a test for logging 283 | //! 284 | //! \param[in] name The name of the test. This should be a string starting with 285 | //! "TensorRT" and containing dot-separated strings containing 286 | //! the characters [A-Za-z0-9_]. 287 | //! For example, "TensorRT.sample_googlenet" 288 | //! \param[in] cmdline The command line used to reproduce the test 289 | // 290 | //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). 291 | //! 292 | static TestAtom defineTest(const std::string& name, const std::string& cmdline) 293 | { 294 | return TestAtom(false, name, cmdline); 295 | } 296 | 297 | //! 298 | //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments 299 | //! as input 300 | //! 301 | //! \param[in] name The name of the test 302 | //! \param[in] argc The number of command-line arguments 303 | //! \param[in] argv The array of command-line arguments (given as C strings) 304 | //! 305 | //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). 306 | static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) 307 | { 308 | // Append TensorRT version as info 309 | const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; 310 | auto cmdline = genCmdlineString(argc, argv); 311 | return defineTest(vname, cmdline); 312 | } 313 | 314 | //! 315 | //! \brief Report that a test has started. 316 | //! 317 | //! \pre reportTestStart() has not been called yet for the given testAtom 318 | //! 319 | //! \param[in] testAtom The handle to the test that has started 320 | //! 321 | static void reportTestStart(TestAtom& testAtom) 322 | { 323 | reportTestResult(testAtom, TestResult::kRUNNING); 324 | assert(!testAtom.mStarted); 325 | testAtom.mStarted = true; 326 | } 327 | 328 | //! 329 | //! \brief Report that a test has ended. 330 | //! 331 | //! \pre reportTestStart() has been called for the given testAtom 332 | //! 333 | //! \param[in] testAtom The handle to the test that has ended 334 | //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, 335 | //! TestResult::kFAILED, TestResult::kWAIVED 336 | //! 337 | static void reportTestEnd(const TestAtom& testAtom, TestResult result) 338 | { 339 | assert(result != TestResult::kRUNNING); 340 | assert(testAtom.mStarted); 341 | reportTestResult(testAtom, result); 342 | } 343 | 344 | static int reportPass(const TestAtom& testAtom) 345 | { 346 | reportTestEnd(testAtom, TestResult::kPASSED); 347 | return EXIT_SUCCESS; 348 | } 349 | 350 | static int reportFail(const TestAtom& testAtom) 351 | { 352 | reportTestEnd(testAtom, TestResult::kFAILED); 353 | return EXIT_FAILURE; 354 | } 355 | 356 | static int reportWaive(const TestAtom& testAtom) 357 | { 358 | reportTestEnd(testAtom, TestResult::kWAIVED); 359 | return EXIT_SUCCESS; 360 | } 361 | 362 | static int reportTest(const TestAtom& testAtom, bool pass) 363 | { 364 | return pass ? reportPass(testAtom) : reportFail(testAtom); 365 | } 366 | 367 | Severity getReportableSeverity() const 368 | { 369 | return mReportableSeverity; 370 | } 371 | 372 | private: 373 | //! 374 | //! \brief returns an appropriate string for prefixing a log message with the given severity 375 | //! 376 | static const char* severityPrefix(Severity severity) 377 | { 378 | switch (severity) 379 | { 380 | case Severity::kINTERNAL_ERROR: return "[F] "; 381 | case Severity::kERROR: return "[E] "; 382 | case Severity::kWARNING: return "[W] "; 383 | case Severity::kINFO: return "[I] "; 384 | case Severity::kVERBOSE: return "[V] "; 385 | default: assert(0); return ""; 386 | } 387 | } 388 | 389 | //! 390 | //! \brief returns an appropriate string for prefixing a test result message with the given result 391 | //! 392 | static const char* testResultString(TestResult result) 393 | { 394 | switch (result) 395 | { 396 | case TestResult::kRUNNING: return "RUNNING"; 397 | case TestResult::kPASSED: return "PASSED"; 398 | case TestResult::kFAILED: return "FAILED"; 399 | case TestResult::kWAIVED: return "WAIVED"; 400 | default: assert(0); return ""; 401 | } 402 | } 403 | 404 | //! 405 | //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity 406 | //! 407 | static std::ostream& severityOstream(Severity severity) 408 | { 409 | return severity >= Severity::kINFO ? std::cout : std::cerr; 410 | } 411 | 412 | //! 413 | //! \brief method that implements logging test results 414 | //! 415 | static void reportTestResult(const TestAtom& testAtom, TestResult result) 416 | { 417 | severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " 418 | << testAtom.mCmdline << std::endl; 419 | } 420 | 421 | //! 422 | //! \brief generate a command line string from the given (argc, argv) values 423 | //! 424 | static std::string genCmdlineString(int argc, char const* const* argv) 425 | { 426 | std::stringstream ss; 427 | for (int i = 0; i < argc; i++) 428 | { 429 | if (i > 0) 430 | { 431 | ss << " "; 432 | } 433 | ss << argv[i]; 434 | } 435 | return ss.str(); 436 | } 437 | 438 | Severity mReportableSeverity; 439 | }; 440 | 441 | namespace 442 | { 443 | 444 | //! 445 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE 446 | //! 447 | //! Example usage: 448 | //! 449 | //! LOG_VERBOSE(logger) << "hello world" << std::endl; 450 | //! 451 | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) 452 | { 453 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); 454 | } 455 | 456 | //! 457 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO 458 | //! 459 | //! Example usage: 460 | //! 461 | //! LOG_INFO(logger) << "hello world" << std::endl; 462 | //! 463 | inline LogStreamConsumer LOG_INFO(const Logger& logger) 464 | { 465 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); 466 | } 467 | 468 | //! 469 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING 470 | //! 471 | //! Example usage: 472 | //! 473 | //! LOG_WARN(logger) << "hello world" << std::endl; 474 | //! 475 | inline LogStreamConsumer LOG_WARN(const Logger& logger) 476 | { 477 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); 478 | } 479 | 480 | //! 481 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR 482 | //! 483 | //! Example usage: 484 | //! 485 | //! LOG_ERROR(logger) << "hello world" << std::endl; 486 | //! 487 | inline LogStreamConsumer LOG_ERROR(const Logger& logger) 488 | { 489 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); 490 | } 491 | 492 | //! 493 | //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR 494 | //! ("fatal" severity) 495 | //! 496 | //! Example usage: 497 | //! 498 | //! LOG_FATAL(logger) << "hello world" << std::endl; 499 | //! 500 | inline LogStreamConsumer LOG_FATAL(const Logger& logger) 501 | { 502 | return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); 503 | } 504 | 505 | } // anonymous namespace 506 | 507 | } // namespace sample */ 508 | 509 | #endif // TENSORRT_LOGGING_H -------------------------------------------------------------------------------- /InstanceSegment/TensorRtSample/logging.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef LOGGER_H 18 | #define LOGGER_H 19 | 20 | #include "logging.h" 21 | 22 | class SampleErrorRecorder; 23 | extern SampleErrorRecorder gRecorder; 24 | namespace sample 25 | { 26 | extern Logger gLogger; 27 | extern LogStreamConsumer gLogVerbose; 28 | extern LogStreamConsumer gLogInfo; 29 | extern LogStreamConsumer gLogWarning; 30 | extern LogStreamConsumer gLogError; 31 | extern LogStreamConsumer gLogFatal; 32 | 33 | void setReportableSeverity(Logger::Severity severity); 34 | } // namespace sample 35 | 36 | #endif // LOGGER_H -------------------------------------------------------------------------------- /InstanceSegment/buffer.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #include "buffer.h" 11 | #include 12 | #include 13 | #include 14 | #include "parameters.h" 15 | #include "utils.h" 16 | 17 | 18 | MyBuffer::MyBuffer(nvinfer1::ICudaEngine& engine){ 19 | ///申请输出buffer 20 | binding_num=engine.getNbBindings(); 21 | for(int i=0;i GetQueueShapeIndex(int c, int h, int w) 71 | { 72 | int index=-1; 73 | for(int i=0;i< (int)kTensorQueueShape.size();++i){ 74 | if(c==kTensorQueueShape[i][1] && h==kTensorQueueShape[i][2] && w==kTensorQueueShape[i][3]){ 75 | index=i; 76 | break; 77 | } 78 | } 79 | if(index==-1) 80 | return std::nullopt; 81 | else 82 | return index; 83 | } 84 | 85 | void MyBuffer::CudaToTensor(std::vector &inst) 86 | { 87 | inst.resize(kTensorQueueShape.size()); 88 | auto opt=torch::TensorOptions().device(torch::kCUDA).dtype(torch::kFloat); 89 | for(int i=1; i < binding_num; ++i){ 90 | torch::Tensor tensor=torch::from_blob( 91 | gpu_buffer[i], 92 | {dims[i].d[0], dims[i].d[1], dims[i].d[2], dims[i].d[3]}, 93 | opt); 94 | if(std::optional index = GetQueueShapeIndex( 95 | dims[i].d[1], dims[i].d[2], dims[i].d[3]);index){ 96 | inst[*index] = tensor.to(torch::kCUDA); 97 | } 98 | else{ 99 | throw std::runtime_error(fmt::format("GetQueueShapeIndex failed:{}", 100 | Dims2Str(dims[i]))); 101 | } 102 | } 103 | } -------------------------------------------------------------------------------- /InstanceSegment/buffer.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #ifndef INSTANCE_SEGMENT_BUFFER_H 11 | #define INSTANCE_SEGMENT_BUFFER_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | struct MyBuffer{ 20 | public: 21 | using Ptr = std::shared_ptr; 22 | explicit MyBuffer(nvinfer1::ICudaEngine& engine); 23 | ~MyBuffer(); 24 | 25 | void CpyInputToGPU(); 26 | void CpyOutputToCPU(); 27 | 28 | void CudaToTensor(std::vector &inst); 29 | 30 | cudaStream_t stream{}; 31 | int binding_num; 32 | std::string names[12]; 33 | nvinfer1::Dims dims[12]{}; 34 | int size[12]{}; 35 | float **cpu_buffer = new float* [12];//指针数组，包含输入和输出的buffer 36 | void *gpu_buffer[12]{}; //定义指针数组，用于指定GPU上的输入输出缓冲区 37 | }; 38 | 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /InstanceSegment/infer.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "infer.h" 17 | #include "parameters.h" 18 | #include "utils.h" 19 | 20 | 21 | Infer::Infer() 22 | { 23 | ///注册预定义的和自定义的插件 24 | initLibNvInferPlugins(&sample::gLogger.getTRTLogger(),""); 25 | InfoLog("Read model param"); 26 | std::string model_str; 27 | if(std::ifstream ifs(Config::kDetectorSerializePath);ifs.is_open()){ 28 | while(ifs.peek() != EOF){ 29 | std::stringstream ss; 30 | ss<( 43 | nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); 44 | InfoLog("deserializeCudaEngine"); 45 | 46 | ///反序列化模型 47 | engine_=std::shared_ptr( 48 | runtime_->deserializeCudaEngine(model_str.data(), model_str.size()) , InferDeleter()); 49 | InfoLog("createExecutionContext"); 50 | 51 | ///创建执行上下文 52 | context_=std::unique_ptr( 53 | engine_->createExecutionContext()); 54 | if(!context_){ 55 | throw std::runtime_error("can not create context"); 56 | } 57 | 58 | ///创建输入输出的内存 59 | buffer_ = std::make_shared(*engine_); 60 | 61 | Config::input_h=buffer_->dims[0].d[2]; 62 | Config::input_w=buffer_->dims[0].d[3]; 63 | Config::input_c=buffer_->dims[0].d[1]; 64 | 65 | pipeline_=std::make_shared(); 66 | solo_ = std::make_shared(); 67 | 68 | //cv::Mat warn_up_input(cv::Size(1226,370),CV_8UC3,cv::Scalar(128)); 69 | cv::Mat warn_up_input = cv::imread(Config::kWarnUpImagePath); 70 | 71 | if(warn_up_input.empty()){ 72 | ErrorLog("Can not open warn up image:{}", Config::kWarnUpImagePath); 73 | return; 74 | } 75 | 76 | cv::resize(warn_up_input,warn_up_input,cv::Size(Config::kImageWidth, Config::kImageHeight)); 77 | 78 | WarnLog("warn up model,path:{}",Config::kWarnUpImagePath); 79 | 80 | //[[maybe_unused]] auto result = forward(warn_up_input); 81 | 82 | [[maybe_unused]] torch::Tensor mask_tensor; 83 | [[maybe_unused]] std::vector insts_info; 84 | Forward(warn_up_input, mask_tensor, insts_info); 85 | 86 | //if(insts_info.empty())throw std::runtime_error("model not init"); 87 | 88 | InfoLog("infer init finished"); 89 | } 90 | 91 | 92 | 93 | void Infer::Forward(cv::Mat &img, torch::Tensor &mask_tensor, std::vector &insts) 94 | { 95 | TicToc t_all,tt; 96 | ///将图片数据复制到输入buffer,同时实现了图像的归一化 97 | buffer_->gpu_buffer[0] = pipeline_->SetInputTensorCuda(img); 98 | InfoLog("Forward prepare:{} ms", tt.TocThenTic()); 99 | ///推断 100 | context_->enqueue(kBatchSize, buffer_->gpu_buffer, buffer_->stream, nullptr); 101 | InfoLog("Forward enqueue:{} ms", tt.TocThenTic()); 102 | ///将输出数据构建为张量 103 | std::vector outputs; 104 | buffer_->CudaToTensor(outputs); 105 | InfoLog("Forward CudaToTensor:{} ms", tt.TocThenTic()); 106 | ///后处理 107 | solo_->GetSegTensor(outputs, pipeline_->img_info, mask_tensor, insts); 108 | InfoLog("Forward GetSegTensor:{} ms", tt.TocThenTic()); 109 | InfoLog("Forward inst number:{}",insts.size()); 110 | 111 | infer_time_ = t_all.Toc(); 112 | } 113 | 114 | void Infer::VisualizeResult(cv::Mat &input,cv::Mat &mask,std::vector &insts) 115 | { 116 | if(mask.empty()){ 117 | cv::imshow("test",input); 118 | cv::waitKey(1); 119 | } 120 | else{ 121 | mask = pipeline_->ProcessMask(mask, insts); 122 | 123 | cv::Mat image_test; 124 | cv::add(input,mask,image_test); 125 | for(auto &inst : insts){ 126 | if(inst.prob < 0.2) 127 | continue; 128 | inst.name = cfg::CocoLabelVector[inst.label_id]; 129 | cv::Point2i center = (inst.min_pt + inst.max_pt)/2; 130 | std::string show_text = fmt::format("{} {:.2f}",inst.name,inst.prob); 131 | cv::putText(image_test,show_text,center,CV_FONT_HERSHEY_SIMPLEX,0.8, 132 | cv::Scalar(255,0,0),2); 133 | cv::rectangle(image_test, inst.min_pt, inst.max_pt, cv::Scalar(255, 0, 0), 2); 134 | } 135 | cv::putText(image_test, fmt::format("{:.2f} ms", infer_time_), 136 | cv::Point2i(20, 20), CV_FONT_HERSHEY_SIMPLEX, 2, 137 | cv::Scalar(0, 255, 255)); 138 | cv::imshow("test",image_test); 139 | cv::waitKey(1); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /InstanceSegment/infer.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #ifndef INSTANCE_SEGMENT_INFER_H 11 | #define INSTANCE_SEGMENT_INFER_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include "TensorRtSample/common.h" 17 | #include "pipeline.h" 18 | #include "solo.h" 19 | #include "buffer.h" 20 | 21 | 22 | struct InferDeleter{ 23 | template 24 | void operator()(T* obj) const{ 25 | if (obj) 26 | obj->destroy(); 27 | } 28 | }; 29 | 30 | class Infer { 31 | public: 32 | using Ptr = std::shared_ptr; 33 | Infer(); 34 | void Forward(cv::Mat &img, torch::Tensor &mask_tensor, std::vector &insts); 35 | void VisualizeResult(cv::Mat &input,cv::Mat &mask,std::vector &insts); 36 | private: 37 | MyBuffer::Ptr buffer_; 38 | Pipeline::Ptr pipeline_; 39 | Solov2::Ptr solo_; 40 | std::unique_ptr runtime_; 41 | std::shared_ptr engine_; 42 | std::unique_ptr context_; 43 | double infer_time_{0}; 44 | }; 45 | 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /InstanceSegment/parameters.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #include "parameters.h" 11 | #include 12 | 13 | void InitLogger() 14 | { 15 | auto reset_log_file=[](const std::string &path){ 16 | if(!fs::exists(path)){ 17 | std::ifstream file(path);//创建文件 18 | file.close(); 19 | } 20 | else{ 21 | std::ofstream file(path,std::ios::trunc);//清空文件 22 | file.close(); 23 | } 24 | }; 25 | 26 | auto get_log_level=[](const std::string &level_str){ 27 | if(level_str=="debug") 28 | return spdlog::level::debug; 29 | else if(level_str=="info") 30 | return spdlog::level::info; 31 | else if(level_str=="warn") 32 | return spdlog::level::warn; 33 | else if(level_str=="error" || level_str=="err") 34 | return spdlog::level::err; 35 | else if(level_str=="critical") 36 | return spdlog::level::critical; 37 | else{ 38 | cerr<<"log level not right, set default warn"<set_level(get_log_level(Config::kLogLevel)); 46 | sgLogger->flush_on(get_log_level(Config::kLogFlush)); 47 | } 48 | 49 | 50 | 51 | Config::Config(const std::string &file_name) 52 | { 53 | cv::FileStorage fs(file_name, cv::FileStorage::READ); 54 | if(!fs.isOpened()){ 55 | throw std::runtime_error(fmt::format("ERROR: Wrong path to settings:{}\n",file_name)); 56 | } 57 | 58 | fs["IMAGE_HEIGHT"] >> kImageHeight; 59 | fs["IMAGE_WIDTH"] >> kImageWidth; 60 | 61 | fs["LOG_PATH"] >> kLogPath; 62 | fs["LOG_LEVEL"] >> kLogLevel; 63 | fs["LOG_FLUSH"] >> kLogFlush; 64 | 65 | fs["ONNX_PATH"] >> kDetectorOnnxPath; 66 | fs["SERIALIZE_PATH"] >> kDetectorSerializePath; 67 | 68 | fs["SOLO_NMS_PRE"] >> kSoloNmsPre; 69 | fs["SOLO_MAX_PER_IMG"] >> kSoloMaxPerImg; 70 | fs["SOLO_NMS_KERNEL"] >> kSoloNmsKernel; 71 | fs["SOLO_NMS_SIGMA"] >> kSoloNmsSigma; 72 | fs["SOLO_SCORE_THR"] >> kSoloScoreThr; 73 | fs["SOLO_MASK_THR"] >> kSoloMaskThr; 74 | fs["SOLO_UPDATE_THR"] >> kSoloUpdateThr; 75 | 76 | fs["DATASET_DIR"] >> kDatasetPath; 77 | fs["WARN_UP_IMAGE_PATH"] >> kWarnUpImagePath; 78 | fs.release(); 79 | 80 | std::map CocoLabelMap={ 81 | {1, "person"}, {2, "bicycle"}, {3, "car"}, {4, "motorcycle"}, {5, "airplane"}, 82 | {6, "bus"}, {7, "train"}, {8, "truck"}, {9, "boat"}, {10, "traffic light"}, 83 | {11, "fire hydrant"}, {13, "stop sign"}, {14, "parking meter"}, {15, "bench"}, 84 | {16, "bird"}, {17, "cat"}, {18, "dog"}, {19, "horse"}, {20, "sheep"}, {21, "cow"}, 85 | {22, "elephant"}, {23, "bear"}, {24, "zebra"}, {25, "giraffe"}, {27, "backpack"}, 86 | {28, "umbrella"}, {31, "handbag"}, {32, "tie"}, {33, "suitcase"}, {34, "frisbee"}, 87 | {35, "skis"}, {36, "snowboard"}, {37, "sports ball"}, {38, "kite"}, {39, "baseball bat"}, 88 | {40, "baseball glove"}, {41, "skateboard"}, {42, "surfboard"}, {43, "tennis racket"}, 89 | {44, "bottle"}, {46, "wine glass"}, {47, "cup"}, {48, "fork"}, {49, "knife"}, {50, "spoon"}, 90 | {51, "bowl"}, {52, "banana"}, {53, "apple"}, {54, "sandwich"}, {55, "orange"}, 91 | {56, "broccoli"}, {57, "carrot"}, {58, "hot dog"}, {59, "pizza"}, {60, "donut"}, 92 | {61, "cake"}, {62, "chair"}, {63, "couch"}, {64, "potted plant"}, {65, "bed"}, {67, "dining table"}, 93 | {70, "toilet"}, {72, "tv"}, {73, "laptop"}, {74, "mouse"}, {75, "remote"}, {76, "keyboard"}, 94 | {77, "cell phone"}, {78, "microwave"}, {79, "oven"}, {80, "toaster"},{ 81, "sink"}, 95 | {82, "refrigerator"}, {84, "book"}, {85, "clock"},{ 86, "vase"}, {87, "scissors"}, 96 | {88, "teddy bear"}, {89, "hair drier"}, {90, "toothbrush"} 97 | }; 98 | CocoLabelVector.reserve(CocoLabelMap.size()); 99 | for(auto &pair : CocoLabelMap){ 100 | CocoLabelVector.push_back(pair.second); 101 | } 102 | 103 | InitLogger(); 104 | } 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /InstanceSegment/parameters.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #ifndef INSTANCE_SEGMENT_PARAMETER_H 11 | #define INSTANCE_SEGMENT_PARAMETER_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | using std::cout; 24 | using std::endl; 25 | using std::cerr; 26 | using std::string; 27 | using std::pair; 28 | using std::vector; 29 | 30 | using namespace std::chrono_literals; 31 | namespace fs=std::filesystem; 32 | 33 | //图像归一化参数，注意是以RGB的顺序排序 34 | inline float kSoloImageMean[3]={123.675, 116.28, 103.53}; 35 | inline float kSoloImageStd[3]={58.395, 57.12, 57.375}; 36 | constexpr int kBatchSize=1; 37 | constexpr int kSoloTensorChannel=128;//张量的输出通道数应该是128 38 | 39 | inline std::vector kSoloNumGrids={40, 36, 24, 16, 12};//各个层级划分的网格数 40 | inline std::vector kSoloStrides={8, 8, 16, 32, 32};//各个层级的预测结果的stride 41 | 42 | inline std::vector> kTensorQueueShape{ 43 | {1, 128, 12, 12}, 44 | {1, 128, 16, 16}, 45 | {1, 128, 24, 24}, 46 | {1, 128, 36, 36}, 47 | {1, 128, 40, 40}, 48 | {1, 80, 12, 12}, 49 | {1, 80, 16, 16}, 50 | {1, 80, 24, 24}, 51 | {1, 80, 36, 36}, 52 | {1, 80, 40, 40}, 53 | {1, 128, 96, 288} 54 | }; 55 | 56 | 57 | inline std::shared_ptr sgLogger; 58 | 59 | template 60 | inline void DebugLog(const char* fmt, const Arg1 &arg1, const Args&... args){ sgLogger->log(spdlog::level::debug, fmt, arg1, args...);} 61 | template 62 | inline void DebugLog(const T& msg){sgLogger->log(spdlog::level::debug, msg); } 63 | template 64 | inline void InfoLog(const char* fmt, const Arg1 &arg1, const Args&... args){sgLogger->log(spdlog::level::info, fmt, arg1, args...);} 65 | template 66 | inline void InfoLog(const T& msg){sgLogger->log(spdlog::level::info, msg);} 67 | template 68 | inline void WarnLog(const char* fmt, const Arg1 &arg1, const Args&... args){sgLogger->log(spdlog::level::warn, fmt, arg1, args...);} 69 | template 70 | inline void WarnLog(const T& msg){sgLogger->log(spdlog::level::warn, msg);} 71 | template 72 | inline void ErrorLog(const char* fmt, const Arg1 &arg1, const Args&... args){sgLogger->log(spdlog::level::err, fmt, arg1, args...);} 73 | template 74 | inline void ErrorLog(const T& msg){sgLogger->log(spdlog::level::err, msg);} 75 | template 76 | inline void CriticalLog(const char* fmt, const Arg1 &arg1, const Args&... args){sgLogger->log(spdlog::level::critical, fmt, arg1, args...);} 77 | template 78 | inline void CriticalLog(const T& msg){sgLogger->log(spdlog::level::critical, msg);} 79 | 80 | 81 | 82 | class Config { 83 | public: 84 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 85 | using Ptr=std::shared_ptr; 86 | 87 | explicit Config(const std::string &file_name); 88 | 89 | inline static std::string kDetectorOnnxPath; 90 | inline static std::string kDetectorSerializePath; 91 | 92 | inline static int kImageHeight,kImageWidth; 93 | inline static std::vector CocoLabelVector; 94 | 95 | inline static std::string kLogPath; 96 | inline static std::string kLogLevel; 97 | inline static std::string kLogFlush; 98 | 99 | inline static int kSoloNmsPre; 100 | inline static int kSoloMaxPerImg; 101 | inline static std::string kSoloNmsKernel; 102 | inline static float kSoloNmsSigma; 103 | inline static float kSoloScoreThr; 104 | inline static float kSoloMaskThr; 105 | inline static float kSoloUpdateThr; 106 | 107 | inline static string kDatasetPath; 108 | inline static string kWarnUpImagePath; 109 | 110 | inline static std::atomic_bool ok{true}; 111 | 112 | inline static int input_h,input_w,input_c; 113 | }; 114 | 115 | using cfg=Config; 116 | 117 | #endif 118 | 119 | -------------------------------------------------------------------------------- /InstanceSegment/pipeline.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #include "pipeline.h" 11 | 12 | #include 13 | #include 14 | 15 | 16 | using namespace torch::indexing; 17 | using InterpolateFuncOptions=torch::nn::functional::InterpolateFuncOptions; 18 | 19 | 20 | template 21 | std::tuple Pipeline::GetXYWHS(const ImageType &img) 22 | { 23 | img_info.origin_h = img.rows; 24 | img_info.origin_w = img.cols; 25 | 26 | int w, h, x, y; 27 | float r_w = Config::input_w / (img.cols * 1.0f); 28 | float r_h = Config::input_h / (img.rows * 1.0f); 29 | if (r_h > r_w) { 30 | w = Config::input_w; 31 | h = r_w * img.rows; 32 | if(h%2==1)h++;//这里确保h为偶数，便于后面的使用 33 | x = 0; 34 | y = (Config::input_h - h) / 2; 35 | } else { 36 | w = r_h* img.cols; 37 | if(w%2==1)w++; 38 | h = Config::input_h; 39 | x = (Config::input_w - w) / 2; 40 | y = 0; 41 | } 42 | 43 | img_info.rect_x = x; 44 | img_info.rect_y = y; 45 | img_info.rect_w = w; 46 | img_info.rect_h = h; 47 | 48 | return {r_h,r_w}; 49 | } 50 | 51 | 52 | void* Pipeline::SetInputTensorCuda(cv::Mat &img) 53 | { 54 | TicToc tt; 55 | 56 | auto [r_h,r_w] = GetXYWHS(img); 57 | 58 | cv::Mat img_float; 59 | img.convertTo(img_float,CV_32FC3); 60 | sgLogger->debug("SetInputTensorCuda convertTo: {} ms", tt.TocThenTic()); 61 | input_tensor = torch::from_blob(img_float.data, {img_info.origin_h, img_info.origin_w , 3 }, torch::kFloat32).to(torch::kCUDA); 62 | 63 | 64 | sgLogger->debug("SetInputTensorCuda from_blob:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 65 | 66 | ///bgr->rgb 67 | input_tensor = torch::cat({ 68 | input_tensor.index({"...",2}).unsqueeze(2), 69 | input_tensor.index({"...",1}).unsqueeze(2), 70 | input_tensor.index({"...",0}).unsqueeze(2) 71 | },2); 72 | sgLogger->debug("SetInputTensorCuda bgr->rgb:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 73 | 74 | ///hwc->chw 75 | input_tensor = input_tensor.permute({2,0,1}); 76 | sgLogger->debug("SetInputTensorCuda hwc->chw:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 77 | 78 | ///norm 79 | static torch::Tensor mean_t=torch::from_blob(kSoloImageMean, {3, 1, 1}, torch::kFloat32).to(torch::kCUDA). 80 | expand({3, img_info.origin_h, img_info.origin_w}); 81 | static torch::Tensor std_t=torch::from_blob(kSoloImageStd, {3, 1, 1}, torch::kFloat32).to(torch::kCUDA). 82 | expand({3, img_info.origin_h, img_info.origin_w}); 83 | input_tensor = ((input_tensor-mean_t)/std_t); 84 | sgLogger->debug("SetInputTensorCuda norm:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 85 | 86 | ///resize 87 | static auto options=InterpolateFuncOptions().mode(torch::kBilinear).align_corners(true); 88 | options=options.size(std::vector({img_info.rect_h, img_info.rect_w})); 89 | input_tensor = torch::nn::functional::interpolate(input_tensor.unsqueeze(0),options).squeeze(0); 90 | sgLogger->debug("SetInputTensorCuda resize:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 91 | 92 | ///拼接图像边缘 93 | static auto op = torch::TensorOptions(torch::kCUDA).dtype(torch::kFloat32); 94 | static cv::Scalar mag_color(kSoloImageMean[2], kSoloImageMean[1], kSoloImageMean[0]); 95 | if (r_h > r_w) { //在图像顶部和下部拼接空白图像 96 | int cat_w = Config::input_w; 97 | int cat_h = (Config::input_h - img_info.rect_h) / 2; 98 | torch::Tensor cat_t = torch::zeros({3,cat_h,cat_w},op); 99 | input_tensor = torch::cat({cat_t,input_tensor,cat_t},1); 100 | } else { 101 | int cat_w= (Config::input_w - img_info.rect_w) / 2; 102 | int cat_h=Config::input_h; 103 | torch::Tensor cat_t = torch::zeros({3,cat_h,cat_w},op); 104 | input_tensor = torch::cat({cat_t,input_tensor,cat_t},2); 105 | } 106 | sgLogger->debug("SetInputTensorCuda cat:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 107 | 108 | input_tensor = input_tensor.contiguous(); 109 | sgLogger->debug("SetInputTensorCuda contiguous:{} {} ms", Dims2Str(input_tensor.sizes()), tt.TocThenTic()); 110 | 111 | return input_tensor.data_ptr(); 112 | } 113 | 114 | 115 | cv::Mat Pipeline::ProcessPad(cv::Mat &img) 116 | { 117 | TicToc tt; 118 | 119 | GetXYWHS(img); 120 | 121 | //将img resize为(INPUT_W,INPUT_H) 122 | cv::Mat re; 123 | cv::resize(img, re, cv::Size(img_info.rect_w, img_info.rect_h) , 0, 0, cv::INTER_LINEAR); 124 | 125 | sgLogger->debug("ProcessPad resize:{} ms", tt.TocThenTic()); 126 | 127 | //将图片复制到out中 128 | static cv::Scalar mag_color(kSoloImageMean[2], kSoloImageMean[1], kSoloImageMean[0]); 129 | cv::Mat out(Config::input_h, Config::input_w, CV_8UC3, mag_color); 130 | re.copyTo(out(cv::Rect(img_info.rect_x, img_info.rect_y, re.cols, re.rows))); 131 | 132 | sgLogger->debug("ProcessPad copyTo out:{} ms", tt.TocThenTic()); 133 | 134 | return out; 135 | } 136 | 137 | void Pipeline::SetBufferWithNorm(const cv::Mat &img, float *buffer) 138 | { 139 | //assert(Config::inputH==img.rows); 140 | //assert(Config::inputW==img.cols); 141 | int i = 0,b_cnt=0; 142 | auto rows = std::min(img.rows,Config::input_h); 143 | auto cols = std::min(img.cols,Config::input_w); 144 | for (int row = 0; row < rows; ++row) { 145 | uchar* uc_pixel = img.data + row * img.step; 146 | for (int col = 0; col < cols; ++col) { 147 | buffer[b_cnt * 3 * Config::input_h * Config::input_w + i] = (uc_pixel[2] - kSoloImageMean[0]) / kSoloImageStd[0]; 148 | buffer[b_cnt * 3 * Config::input_h * Config::input_w + i + Config::input_h * Config::input_w] = 149 | (uc_pixel[1] - kSoloImageMean[1]) / kSoloImageStd[1]; 150 | buffer[b_cnt * 3 * Config::input_h * Config::input_w + i + 2 * Config::input_h * Config::input_w] = 151 | (uc_pixel[0] - kSoloImageMean[2]) / kSoloImageStd[2]; 152 | uc_pixel += 3; 153 | ++i; 154 | } 155 | } 156 | 157 | } 158 | 159 | 160 | cv::Mat Pipeline::ProcessMask(cv::Mat &mask, std::vector &insts) 161 | { 162 | cv::Mat rect_img = mask(cv::Rect(img_info.rect_x, img_info.rect_y, img_info.rect_w, img_info.rect_h)); 163 | cv::Mat out; 164 | cv::resize(rect_img, out, cv::Size(img_info.origin_w, img_info.origin_h), 0, 0, cv::INTER_LINEAR); 165 | 166 | ///调整包围框 167 | float factor_x = out.cols *1.f / rect_img.cols; 168 | float factor_y = out.rows *1.f / rect_img.rows; 169 | for(auto &inst : insts){ 170 | inst.min_pt.x -= img_info.rect_x; 171 | inst.min_pt.y -= img_info.rect_y; 172 | inst.max_pt.x -= img_info.rect_x; 173 | inst.max_pt.y -= img_info.rect_y; 174 | 175 | inst.min_pt.x *= factor_x; 176 | inst.min_pt.y *= factor_y; 177 | inst.max_pt.x *= factor_x; 178 | inst.max_pt.y *= factor_y; 179 | } 180 | 181 | 182 | return out; 183 | } 184 | 185 | 186 | -------------------------------------------------------------------------------- /InstanceSegment/pipeline.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #ifndef INSTANCE_SEGMENT_PIPELINE_H 11 | #define INSTANCE_SEGMENT_PIPELINE_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "parameters.h" 19 | #include "utils.h" 20 | 21 | class Pipeline { 22 | public: 23 | using Ptr=std::shared_ptr; 24 | Pipeline(){ 25 | 26 | } 27 | 28 | template 29 | std::tuple GetXYWHS(const ImageType &img); 30 | void* SetInputTensorCuda(cv::Mat &img); 31 | void SetBufferWithNorm(const cv::Mat &img, float *buffer); 32 | cv::Mat ProcessPad(cv::Mat &img); 33 | cv::Mat ProcessMask(cv::Mat &mask, std::vector &insts); 34 | 35 | ImageInfo img_info; 36 | torch::Tensor input_tensor; 37 | private: 38 | }; 39 | 40 | 41 | #endif // 42 | -------------------------------------------------------------------------------- /InstanceSegment/solo.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #include "solo.h" 11 | 12 | using namespace std; 13 | using Slice=torch::indexing::Slice; 14 | using InterpolateFuncOptions=torch::nn::functional::InterpolateFuncOptions; 15 | 16 | 17 | torch::Tensor Solov2::MatrixNMS(torch::Tensor &seg_masks,torch::Tensor &cate_labels,torch::Tensor &cate_scores,torch::Tensor &sum_mask) 18 | { 19 | int n_samples=cate_labels.sizes()[0]; 20 | 21 | //seg_masks.shape [n,h,w] -> [n,h*w] 22 | seg_masks = seg_masks.reshape({n_samples,-1}).to(torch::kFloat); 23 | 24 | ///计算两个实例之间的内积，即相交的像素数 25 | auto inter_matrix=torch::mm(seg_masks,seg_masks.transpose(1,0)); 26 | auto sum_mask_x=sum_mask.expand({n_samples,n_samples}); 27 | 28 | ///两个两两实例之间的IOU 29 | auto iou_matrix = (inter_matrix / (sum_mask_x + sum_mask_x.transpose(1,0) - inter_matrix ) ).triu(1); 30 | auto cate_label_x = cate_labels.expand({n_samples,n_samples}); 31 | 32 | auto label_matrix= (cate_label_x==cate_label_x.transpose(1,0)).to(torch::kFloat).triu(1); 33 | 34 | ///计算IoU补偿 35 | auto compensate_iou = std::get<0>( (iou_matrix * label_matrix).max(0) );//max()返回两个张量(最大值和最大值索引)组成的tuple 36 | compensate_iou = compensate_iou.expand({n_samples,n_samples}).transpose(1,0); 37 | auto decay_iou = iou_matrix * label_matrix; 38 | 39 | ///计算实例置信度的衰减系数 40 | torch::Tensor decay_coefficient; 41 | if(Config::kSoloNmsKernel == "gaussian"){ 42 | auto decay_matrix = torch::exp(-1 * Config::kSoloNmsSigma * (decay_iou.pow(2))); 43 | auto compensate_matrix= torch::exp(-1 * Config::kSoloNmsSigma * (compensate_iou.pow(2))); 44 | decay_coefficient = std::get<0>( (decay_matrix / compensate_matrix).min(0) ); 45 | } 46 | else if(Config::kSoloNmsKernel == "linear"){ 47 | auto decay_matrix = (1-decay_iou) / (1-compensate_iou) ; 48 | decay_coefficient = std::get<0>( (decay_matrix).min(0) ); 49 | } 50 | else{ 51 | throw; 52 | } 53 | ///更新置信度 54 | auto cate_scores_update = cate_scores * decay_coefficient; 55 | return cate_scores_update; 56 | } 57 | 58 | 59 | 60 | void Solov2::GetSegTensor(std::vector &outputs, ImageInfo& img_info, torch::Tensor &mask_tensor, 61 | std::vector &insts) 62 | { 63 | torch::Device device = outputs[0].device(); 64 | 65 | constexpr int batch_index=0; 66 | const int kNumStage=kSoloNumGrids.size();//FPN共输出5个层级 67 | 68 | auto kernel_tensor=outputs[0][batch_index].view({kSoloTensorChannel, -1}).permute({1, 0}); 69 | for(int i=1; i < kNumStage; ++i){ 70 | auto kt=outputs[i][batch_index].view({kSoloTensorChannel, -1}); //kt的维度是(128,h*w) 71 | kernel_tensor = torch::cat({kernel_tensor,kt.permute({1,0})},0); 72 | } 73 | constexpr int kChannel=80; 74 | auto cate_tensor=outputs[kNumStage][batch_index].view({kChannel, -1}).permute({1, 0}); 75 | for(int i= kNumStage + 1; i < 2 * kNumStage; ++i){ 76 | auto ct=outputs[i][batch_index].view({kChannel, -1}); //ct的维度是(h*w, 80) 77 | cate_tensor = torch::cat({cate_tensor,ct.permute({1,0})},0); 78 | } 79 | auto feat_tensor=outputs[2 * kNumStage][batch_index]; 80 | 81 | const int kFeatH=(int)feat_tensor.sizes()[1]; 82 | const int kFeatW=(int)feat_tensor.sizes()[2]; 83 | const int kPredNum=(int)cate_tensor.sizes()[0];//所有的实例数量(3872) 84 | 85 | ///过滤掉低于0.1置信度的实例 86 | auto inds= cate_tensor > Config::kSoloScoreThr; 87 | if(inds.sum(torch::IntArrayRef({0,1})).item().toInt() == 0){ 88 | WarnLog("inds.sum(dims) == 0"); 89 | return; 90 | } 91 | cate_tensor=cate_tensor.masked_select(inds); 92 | 93 | ///获得所有满足阈值的，得到的inds中的元素inds[i,j]表示第i个实例是属于j类 94 | inds=inds.nonzero(); 95 | ///获得每个实例的类别 96 | auto cate_labels=inds.index({"...",1}); 97 | ///获得满足阈值的kernel预测 98 | auto pred_index=inds.index({"...",0}); 99 | auto kernel_preds=kernel_tensor.index({pred_index}); 100 | 101 | DebugLog("After kSoloScoreThr: {}",cate_labels.sizes()[0]); 102 | /*for(int i=0;i Config::kSoloMaskThr; 130 | auto sum_masks=seg_masks.sum({1,2}).to(torch::kFloat); 131 | 132 | ///根据strides过滤掉像素点太少的实例 133 | auto keep=sum_masks > strides; 134 | if(keep.sum(0).item().toInt()==0){ 135 | cerr<<"keep.sum(0) == 0"< Config::kSoloNmsPre){ 157 | sort_inds=sort_inds.index({torch::indexing::Slice(torch::indexing::None,Config::kSoloNmsPre)}); 158 | } 159 | seg_masks=seg_masks.index({sort_inds,"..."}); 160 | seg_preds=seg_preds.index({sort_inds,"..."}); 161 | sum_masks=sum_masks.index({sort_inds}); 162 | cate_tensor=cate_tensor.index({sort_inds}); 163 | cate_labels=cate_labels.index({sort_inds}); 164 | 165 | ///执行Matrix NMS 166 | auto cate_scores = MatrixNMS(seg_masks,cate_labels,cate_tensor,sum_masks); 167 | 168 | ///根据新的置信度过滤结果 169 | keep = cate_scores >= Config::kSoloUpdateThr; 170 | if(keep.sum(0).item().toInt() == 0){ 171 | cout<<"keep.sum(0) == 0"< Config::kSoloMaxPerImg){ 188 | sort_inds=sort_inds.index({torch::indexing::Slice(torch::indexing::None,Config::kSoloMaxPerImg)}); 189 | } 190 | seg_preds=seg_preds.index({sort_inds,"..."}); 191 | cate_scores=cate_scores.index({sort_inds}); 192 | cate_labels=cate_labels.index({sort_inds}); 193 | sum_masks = sum_masks.index({sort_inds}); 194 | 195 | DebugLog("seg_preds.dims:{}", Dims2Str(seg_preds.sizes())); 196 | 197 | ///对mask进行双线性上采样, 198 | static auto options=InterpolateFuncOptions().mode(torch::kBilinear).align_corners(true); 199 | auto op1=options.size(std::vector({kFeatH * 4, kFeatW * 4})); 200 | seg_preds = torch::nn::functional::interpolate(seg_preds.unsqueeze(0),op1); 201 | 202 | ///对mask进行裁切、缩放，得到原始图片大小的mask 203 | seg_preds =seg_preds.index({"...",Slice(img_info.rect_y,img_info.rect_y+img_info.rect_h), 204 | Slice(img_info.rect_x,img_info.rect_x+img_info.rect_w)}); 205 | 206 | auto op2=options.size(std::vector({img_info.origin_h, img_info.origin_w})); 207 | seg_preds = torch::nn::functional::interpolate(seg_preds,op2); 208 | seg_preds=seg_preds.squeeze(0); 209 | ///阈值化 210 | mask_tensor = seg_preds > Config::kSoloMaskThr; 211 | 212 | ///根据mask计算包围框 213 | for(int i=0;i( torch::max(nz,0) ); 216 | auto min_xy =std::get<0>( torch::min(nz,0) ); 217 | 218 | InstInfo inst; 219 | inst.id = i; 220 | inst.label_id =cate_labels[i].item().toInt(); 221 | inst.name = Config::CocoLabelVector[inst.label_id]; 222 | inst.max_pt.x = max_xy[1].item().toInt(); 223 | inst.max_pt.y = max_xy[0].item().toInt(); 224 | inst.min_pt.x = min_xy[1].item().toInt(); 225 | inst.min_pt.y = min_xy[0].item().toInt(); 226 | inst.rect = cv::Rect2f(inst.min_pt,inst.max_pt); 227 | inst.prob = cate_scores[i].item().toFloat(); 228 | insts.push_back(inst); 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /InstanceSegment/solo.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #ifndef INSTANCE_SEGMENT_SOLO_H 11 | #define INSTANCE_SEGMENT_SOLO_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "parameters.h" 19 | #include "utils.h" 20 | 21 | class Solov2 { 22 | public: 23 | using Ptr=std::shared_ptr; 24 | Solov2(){ 25 | size_trans_=torch::from_blob(kSoloNumGrids.data(), {int(kSoloNumGrids.size())}, torch::kFloat).clone(); 26 | size_trans_=size_trans_.pow(2).cumsum(0); 27 | } 28 | static torch::Tensor MatrixNMS(torch::Tensor &seg_masks,torch::Tensor &cate_labels,torch::Tensor &cate_scores,torch::Tensor &sum_mask); 29 | void GetSegTensor(std::vector &outputs, ImageInfo& img_info, torch::Tensor &mask_tensor, std::vector &insts); 30 | private: 31 | torch::Tensor size_trans_; 32 | }; 33 | 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /InstanceSegment/utils.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #include "utils.h" 11 | 12 | void DrawText(cv::Mat &img, const std::string &str, const cv::Scalar &color, const cv::Point& pos, float scale, int thickness, bool reverse) { 13 | auto t_size = cv::getTextSize(str, cv::FONT_HERSHEY_SIMPLEX, scale, thickness, nullptr); 14 | cv::Point bottom_left, upper_right; 15 | if (reverse) { 16 | upper_right = pos; 17 | bottom_left = cv::Point(upper_right.x - t_size.width, upper_right.y + t_size.height); 18 | } else { 19 | bottom_left = pos; 20 | upper_right = cv::Point(bottom_left.x + t_size.width, bottom_left.y - t_size.height); 21 | } 22 | 23 | cv::rectangle(img, bottom_left, upper_right, color, -1); 24 | cv::putText(img, str, bottom_left, cv::FONT_HERSHEY_SIMPLEX, scale, cv::Scalar(255, 255, 255),thickness); 25 | } 26 | 27 | void DrawBbox(cv::Mat &img, const cv::Rect2f& bbox, const std::string &label, const cv::Scalar &color) { 28 | cv::rectangle(img, bbox, color); 29 | if (!label.empty()) { 30 | DrawText(img, label, color, bbox.tl()); 31 | } 32 | } 33 | 34 | 35 | float GetBoxIoU(const cv::Point2f &box1_minPt, const cv::Point2f &box1_maxPt, 36 | const cv::Point2f &box2_minPt, const cv::Point2f &box2_maxPt){ 37 | 38 | cv::Point2f center1 = (box1_minPt+box1_maxPt)/2.f; 39 | cv::Point2f center2 = (box2_minPt+box2_maxPt)/2.f; 40 | float w1 = box1_maxPt.x - (float)box1_minPt.x; 41 | float h1 = box1_maxPt.y - (float)box1_minPt.y; 42 | float w2 = box2_maxPt.x - (float)box2_minPt.x; 43 | float h2 = box2_maxPt.y - (float)box2_minPt.y; 44 | 45 | if(std::abs(center1.x - center2.x) >= (w1/2+w2/2) && std::abs(center1.y - center2.y) >= (h1/2+h2/2)){ 46 | return 0; 47 | } 48 | 49 | float inter_w = w1 + w2 - (std::max(center1.x + w1, center2.x + w2) - std::min(center1.x, center2.x)); 50 | float inter_h = h1 + h2 - (std::max(center1.y + h1, center2.y + h2) - std::min(center1.y, center2.y)); 51 | 52 | return (inter_h*inter_w) / (w1*h1 + w2*h2 - inter_h*inter_w); 53 | } 54 | 55 | 56 | /** 57 | * 计算两个box之间的IOU 58 | * @param bb_test 59 | * @param bb_gt 60 | * @return 61 | */ 62 | float GetBoxIoU(const cv::Rect2f &bb_test, const cv::Rect2f &bb_gt) { 63 | auto in = (bb_test & bb_gt).area(); 64 | auto un = bb_test.area() + bb_gt.area() - in; 65 | if (un < DBL_EPSILON) 66 | return 0; 67 | return in / un; 68 | } 69 | 70 | 71 | cv::Scalar ColorMap(int64_t n) { 72 | auto bit_get = [](int64_t x, int64_t i) { 73 | return x & (1 << i); 74 | }; 75 | 76 | int64_t r = 0, g = 0, b = 0; 77 | int64_t i = n; 78 | for (int64_t j = 7; j >= 0; --j) { 79 | r |= bit_get(i, 0) << j; 80 | g |= bit_get(i, 1) << j; 81 | b |= bit_get(i, 2) << j; 82 | i >>= 3; 83 | } 84 | return cv::Scalar(b, g, r); 85 | } 86 | -------------------------------------------------------------------------------- /InstanceSegment/utils.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #ifndef INSTANCE_SEGMENT_UTILS_H 11 | #define INSTANCE_SEGMENT_UTILS_H 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "parameters.h" 24 | 25 | 26 | class TicToc{ 27 | public: 28 | TicToc(){ 29 | Tic(); 30 | } 31 | 32 | void Tic(){ 33 | start_ = std::chrono::system_clock::now(); 34 | } 35 | 36 | double Toc(){ 37 | end_ = std::chrono::system_clock::now(); 38 | std::chrono::duration elapsed_seconds = end_ - start_; 39 | return elapsed_seconds.count() * 1000; 40 | } 41 | 42 | double TocThenTic(){ 43 | auto t= Toc(); 44 | Tic(); 45 | return t; 46 | } 47 | 48 | void TocPrintTic(const char* str){ 49 | cout << str << ":" << Toc() << " ms" << endl; 50 | Tic(); 51 | } 52 | 53 | private: 54 | std::chrono::time_point start_, end_; 55 | }; 56 | 57 | 58 | struct ImageInfo{ 59 | int origin_h,origin_w; 60 | ///图像的裁切信息 61 | int rect_x, rect_y, rect_w, rect_h; 62 | }; 63 | 64 | 65 | struct InstInfo{ 66 | std::string name; 67 | int label_id; 68 | int id; 69 | int track_id; 70 | cv::Point2f min_pt,max_pt; 71 | cv::Rect2f rect; 72 | float prob; 73 | cv::Point2f mask_center; 74 | cv::Mat mask_cv; 75 | torch::Tensor mask_tensor; 76 | }; 77 | 78 | 79 | 80 | template 81 | static std::string Dims2Str(torch::ArrayRef list){ 82 | int i = 0; 83 | std::string text= "["; 84 | for(auto e : list) { 85 | if (i++ > 0) text+= ", "; 86 | text += std::to_string(e); 87 | } 88 | text += "]"; 89 | return text; 90 | } 91 | 92 | static std::string Dims2Str(nvinfer1::Dims list){ 93 | std::string text= "["; 94 | for(int i=0;i 0) text+= ", "; 96 | text += std::to_string(list.d[i]); 97 | } 98 | text += "]"; 99 | return text; 100 | } 101 | 102 | 103 | inline cv::Point2f operator*(const cv::Point2f &lp,const cv::Point2f &rp) 104 | { 105 | return {lp.x * rp.x,lp.y * rp.y}; 106 | } 107 | 108 | template 109 | inline std::string Eigen2Str(const MatrixType &m){ 110 | std::string text; 111 | for(int i=0;i1) 116 | text+="\n"; 117 | } 118 | return text; 119 | } 120 | 121 | 122 | template 123 | inline std::string Vec2Str(const Eigen::Matrix &vec){ 124 | return Eigen2Str(vec.transpose()); 125 | } 126 | 127 | 128 | inline cv::Scalar_ GetRandomColor(){ 129 | static std::default_random_engine rde; 130 | static std::uniform_int_distribution color_rd(0,255); 131 | return {color_rd(rde),color_rd(rde),color_rd(rde)}; 132 | } 133 | 134 | 135 | void DrawText(cv::Mat &img, const std::string &str, const cv::Scalar &color, const cv::Point& pos, float scale= 1.f, int thickness= 1, bool reverse = false); 136 | 137 | void DrawBbox(cv::Mat &img, const cv::Rect2f& bbox, const std::string &label = "", const cv::Scalar &color = {0, 0, 0}); 138 | 139 | 140 | float GetBoxIoU(const cv::Point2f &box1_minPt, const cv::Point2f &box1_maxPt, 141 | const cv::Point2f &box2_minPt, const cv::Point2f &box2_maxPt); 142 | 143 | float GetBoxIoU(const cv::Rect2f &bb_test, const cv::Rect2f &bb_gt); 144 | 145 | cv::Scalar ColorMap(int64_t n); 146 | 147 | 148 | #endif 149 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 ChenJianqu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Solov2-TensorRT-CPP 2 | in this repo, we deployed SOLOv2 to TensorRT with C++. See the [video](https://www.bilibili.com/video/BV1rQ4y1m7mx). 3 | ![solov2_cpp](https://github.com/chenjianqu/Solov2-TensorRT-CPP/blob/main/config/solov2_cpp.png) 4 | 5 | 6 | 7 | ## Requirements 8 | 9 | * Ubuntu 16.04/18.04/20.04 10 | * Cuda10.2 11 | * Cudnn8 12 | * TensorRT8.0.1 13 | * OpenCV 3.4 14 | * Libtorch 1.8.2 15 | * CMake 3.20 16 | 17 | 18 | 19 | ## Acknowledge 20 | 21 | [SOLO](https://github.com/wxinlong/solo_/) 22 | [SOLOv2.tensorRT](https://github.com/zhangjinsong3/SOLOv2.tensorRT) 23 | 24 | 25 | 26 | ## Getting Started 27 | 28 | **1. Install Solov2 from [SOLO](https://github.com/wxinlong/solo/)** 29 | 30 | Download,and run it successfully 31 | 32 | 33 | 34 | **2. Export the ONNX model from original model** 35 | 36 | * **You can follow with** [SOLOv2.tensorRT](https://github.com/zhangjinsong3/SOLOv2.tensorRT). 37 | 38 | 39 | * Use a pre-exported model 40 | 41 | [baidudisk](https://pan.baidu.com/s/1GNQROWqASUoyjUsFPGnRHg), Fetch Code:qdsm 42 | 43 | 44 | * Export models by yourself 45 | 46 | That is, before export, you have to modify some parts of the original SOLOv2 first: 47 | 48 | * 2.1. Modify `SOLO-master/mmdet/models/anchor_heads/solov2_head.py:154:0`： 49 | 50 | Original code of `solov2_head.py` is: 51 | 52 | ```python 53 | # Origin from SOLO 54 | x_range = torch.linspace(-1, 1, ins_feat.shape[-1], device=ins_feat.device) 55 | y_range = torch.linspace(-1, 1, ins_feat.shape[-2], device=ins_feat.device) 56 | y, x = torch.meshgrid(y_range, x_range) 57 | y = y.expand([ins_feat.shape[0], 1, -1, -1]) 58 | x = x.expand([ins_feat.shape[0], 1, -1, -1]) 59 | ``` 60 | 61 | change to: 62 | 63 | ```python 64 | #Modify for onnx export, frozen the input size = 800x800, batch size = 1 65 | size = {0: 100, 1: 100, 2: 50, 3: 25, 4: 25} 66 | feat_h, feat_w = ins_kernel_feat.shape[-2], ins_kernel_feat.shape[-1] 67 | feat_h, feat_w = int(feat_h.cpu().numpy() if isinstance(feat_h, torch.Tensor) else feat_h), int(feat_w.cpu().numpy() if isinstance(feat_w, torch.Tensor) else feat_w) 68 | x_range = torch.linspace(-1, 1, feat_w, device=ins_kernel_feat.device) 69 | y_range = torch.linspace(-1, 1, feat_h, device=ins_kernel_feat.device) 70 | y, x = torch.meshgrid(y_range, x_range) 71 | y = y.expand([1, 1, -1, -1]) 72 | x = x.expand([1, 1, -1, -1]) 73 | coord_feat = torch.cat([x, y], 1) 74 | ins_kernel_feat = torch.cat([ins_kernel_feat, coord_feat], 1) 75 | ``` 76 | 77 | 78 | 79 | * 2.2 Modify `SOLO-master/mmdet/models/detectors/single_stage_ins.py` 80 | 81 | In the function named `forward_dummy()`, add the forward_dummy of mask, such as : 82 | 83 | ```python 84 | def forward_dummy(self, img): 85 | x = self.extract_feat(img) 86 | outs = self.bbox_head(x) 87 | if self.with_mask_feat_head: 88 | mask_feat_pred = self.mask_feat_head( 89 | x[self.mask_feat_head.start_level:self.mask_feat_head.end_level + 1]) 90 | outs = (outs[0], outs[1], mask_feat_pred) 91 | return outs 92 | ``` 93 | 94 | 95 | 96 | * 2.3 Modify `SOLO-master/mmdet/models/mask_heads/mask_feat_head.py` 97 | 98 | In line 108 of `mask_feat_head.py`, original code is: 99 | 100 | ```python 101 | x_range = torch.linspace(-1, 1, input_feat.shape[-1], device=input_feat.device) 102 | y_range = torch.linspace(-1, 1, input_feat.shape[-2], device=input_feat.device) 103 | ``` 104 | 105 | change to: 106 | 107 | ```python 108 | feat_h, feat_w = input_feat.shape[-2], input_feat.shape[-1] # shape get tensor during onnx.export() 109 | feat_h, feat_w = int(feat_h.cpu().numpy() if isinstance(feat_h, torch.Tensor) else feat_h), \ 110 | int(feat_w.cpu().numpy() if isinstance(feat_w, torch.Tensor) else feat_w) 111 | x_range = torch.linspace(-1, 1, feat_w, device=input_feat.device) 112 | y_range = torch.linspace(-1, 1, feat_h, device=input_feat.device) 113 | ``` 114 | 115 | 116 | 117 | * 2.4 Export onnx model 118 | 119 | Move the `onnx_exporter.py` and `common.py` to the `SOLO/demo/`, then run 120 | 121 | ``` 122 | #kitti size 123 | python onnx_exporter.py ../configs/solov2/solov2_light_448_r34_fpn_8gpu_3x.py ../weights/SOLOv2_light_R34.onnx --checkpoint ../checkpoints/SOLOv2_LIGHT_448_R34_3x.pth --shape 384 1152 124 | ``` 125 | 126 | 127 | 128 | **3. build the tensorrt model** 129 | 130 | First, edit the config file:`config.yaml` 131 | ``` 132 | %YAML:1.0 133 | 134 | IMAGE_WIDTH: 1226 135 | IMAGE_HEIGHT: 370 136 | 137 | #SOLO 138 | ONNX_PATH: "/home/chen/ws/dynamic_ws/src/dynamic_vins/weights/solo/SOLOv2_light_R34_1152x384_cuda102.onnx" 139 | SERIALIZE_PATH: "/home/chen/ws/dynamic_ws/src/dynamic_vins/weights/solo/tensorrt_model_1152x384.bin" 140 | 141 | SOLO_NMS_PRE: 500 142 | SOLO_MAX_PER_IMG: 100 143 | SOLO_NMS_KERNEL: "gaussian" 144 | #SOLO_NMS_SIGMA=2.0 145 | SOLO_NMS_SIGMA: 2.0 146 | SOLO_SCORE_THR: 0.1 147 | SOLO_MASK_THR: 0.5 148 | SOLO_UPDATE_THR: 0.2 149 | 150 | LOG_PATH: "./segmentor_log.txt" 151 | LOG_LEVEL: "debug" 152 | LOG_FLUSH: "debug" 153 | 154 | DATASET_DIR: "/media/chen/EC4A17F64A17BBF0/datasets/kitti/odometry/colors/07/image_2/" 155 | WARN_UP_IMAGE_PATH: "/home/chen/CLionProjects/InstanceSegment/config/kitti.png" 156 | ``` 157 | and then,compile the CMake project: 158 | ``` 159 | mkdir build && cd build 160 | 161 | cmake .. 162 | 163 | make -j10 164 | ``` 165 | 166 | Finally, build the tensorrt model: 167 | ``` 168 | cd .. 169 | ./build/build_model ./config/config.yaml 170 | ``` 171 | 172 | 173 | 174 | **4. run the demo** 175 | 176 | if you have the KITTI dataset, set `config.yaml` with right path `DATASET_DIR` ,run: 177 | 178 | ``` 179 | ./build/segment ./config/config.yaml 180 | ``` 181 | 182 | if you not , and just want run at a image, set `config.yaml` with right image path `kWarnUpImagePath`, then run : 183 | ``` 184 | ./build/demo ./config/config.yaml 185 | ``` 186 | 187 | -------------------------------------------------------------------------------- /build_model.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | 11 | #include 12 | #include 13 | 14 | #include "NvInfer.h" 15 | #include "NvOnnxParser.h" 16 | 17 | #include "InstanceSegment/TensorRtSample/common.h" 18 | #include "InstanceSegment/parameters.h" 19 | 20 | 21 | using namespace std; 22 | 23 | struct InferDeleter{ 24 | template 25 | void operator()(T* obj) const{ 26 | if (obj) 27 | obj->destroy(); 28 | } 29 | }; 30 | 31 | 32 | 33 | int Build() 34 | { 35 | cout<<"createInferBuilder"<( 39 | nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); 40 | if(!builder) 41 | return -1; 42 | 43 | ///创建网络定义 44 | cout<<"createNetwork"<(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); 46 | auto network=std::unique_ptr( 47 | builder->createNetworkV2(flag)); 48 | if(!network) 49 | return -1; 50 | 51 | cout<<"createBuilderConfig"<( 53 | builder->createBuilderConfig()); 54 | if(!config) 55 | return -1; 56 | 57 | ///创建parser 58 | cout<<"createParser"<( 60 | nvonnxparser::createParser(*network,sample::gLogger.getTRTLogger())); 61 | if(!parser) 62 | return -1; 63 | 64 | ///读取模型文件 65 | 66 | cout << "parseFromFile:" << Config::kDetectorOnnxPath << endl; 67 | auto verbosity=sample::gLogger.getReportableSeverity(); 68 | auto parsed=parser->parseFromFile(Config::kDetectorOnnxPath.c_str(), static_cast(verbosity)); 69 | if(!parsed) 70 | return -1; 71 | 72 | //设置层工作空间大小 73 | config->setMaxWorkspaceSize(1_GiB); 74 | //使用FP16精度 75 | config->setFlag(nvinfer1::BuilderFlag::kFP16); 76 | 77 | cout<<"input shape:"<getInput(0)->getName()<<" "<getInput(0)->getDimensions()<getOutput(0)->getName()<<" "<getOutput(0)->getDimensions()<( 89 | builder->buildEngineWithConfig(*network,*config),InferDeleter()); 90 | 91 | if(!engine) 92 | return -1; 93 | 94 | cout<<"serializeModel"<serialize(); 96 | 97 | //将序列化模型拷贝到字符串 98 | std::string serialize_str; 99 | serialize_str.resize(serializeModel->size()); 100 | memcpy((void*)serialize_str.data(),serializeModel->data(),serializeModel->size()); 101 | //将字符串输出到文件中 102 | std::ofstream serialize_stream(Config::kDetectorSerializePath); 103 | serialize_stream<critical(e.what()); 127 | cerr< 11 | #include 12 | #include "InstanceSegment/infer.h" 13 | #include "InstanceSegment/parameters.h" 14 | #include "InstanceSegment/utils.h" 15 | 16 | cv::Mat DrawSegment(cv::Mat &img, torch::Tensor &mask_tensor, std::vector &insts) 17 | { 18 | cv::Mat img_show = img.clone(); 19 | if(!insts.empty()){ 20 | auto mask_size=cv::Size(img_show.cols, img_show.rows); 21 | mask_tensor = mask_tensor.to(torch::kInt8).abs().clamp(0,1); 22 | ///计算合并的mask 23 | auto merge_tensor = (mask_tensor.sum(0).clamp(0,1)*255).to(torch::kUInt8).to(torch::kCPU); 24 | auto mask = cv::Mat(mask_size,CV_8UC1,merge_tensor.data_ptr()).clone(); 25 | cv::cvtColor(mask,mask,CV_GRAY2BGR); 26 | cv::scaleAdd(mask, 0.5, img_show, img_show); 27 | 28 | for(auto &inst: insts){ 29 | auto color = GetRandomColor(); 30 | DrawText(img_show, fmt::format("{}:{:.2f}", Config::CocoLabelVector[inst.label_id], inst.prob), 31 | color, inst.rect.tl()); 32 | cv::rectangle(img_show, inst.min_pt, inst.max_pt, color, 1); 33 | } 34 | } 35 | return img_show; 36 | } 37 | 38 | int main(int argc, char **argv) 39 | { 40 | if(argc != 2){ 41 | cerr<<"please input: [config file]"< insts_info; 70 | infer->Forward(img0, mask_tensor, insts_info); 71 | 72 | fmt::print("insts_info.size():{}\n",insts_info.size()); 73 | fmt::print("infer time:{} ms\n", ticToc.Toc()); 74 | 75 | cv::Mat img_show=DrawSegment(img0,mask_tensor,insts_info); 76 | 77 | cv::imshow("raw", img_show); 78 | cv::waitKey(0); 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (C) 2022, Chen Jianqu, Shanghai University 3 | * 4 | * This file is part of Solov2-TensorRT-CPP. 5 | * 6 | * Licensed under the MIT License; 7 | * you may not use this file except in compliance with the License. 8 | *******************************************************/ 9 | 10 | #include 11 | 12 | #include 13 | 14 | #include "InstanceSegment/infer.h" 15 | #include "InstanceSegment/parameters.h" 16 | #include "InstanceSegment/utils.h" 17 | 18 | 19 | cv::Mat DrawSegment(cv::Mat &img, torch::Tensor &mask_tensor, std::vector &insts) 20 | { 21 | cv::Mat img_show = img.clone(); 22 | if(!insts.empty()){ 23 | auto mask_size=cv::Size(img_show.cols, img_show.rows); 24 | mask_tensor = mask_tensor.to(torch::kInt8).abs().clamp(0,1); 25 | ///计算合并的mask 26 | auto merge_tensor = (mask_tensor.sum(0).clamp(0,1)*255).to(torch::kUInt8).to(torch::kCPU); 27 | auto mask = cv::Mat(mask_size,CV_8UC1,merge_tensor.data_ptr()).clone(); 28 | cv::cvtColor(mask,mask,CV_GRAY2BGR); 29 | cv::scaleAdd(mask, 0.5, img_show, img_show); 30 | 31 | for(auto &inst: insts){ 32 | auto color = GetRandomColor(); 33 | DrawText(img_show, fmt::format("{}:{:.2f}", Config::CocoLabelVector[inst.label_id], inst.prob), 34 | color, inst.rect.tl()); 35 | cv::rectangle(img_show, inst.min_pt, inst.max_pt, color, 1); 36 | } 37 | } 38 | return img_show; 39 | } 40 | 41 | 42 | int main(int argc, char **argv) 43 | { 44 | if(argc != 2){ 45 | cerr<<"please input: [config file]"< insts_info; 77 | infer->Forward(img0, mask_tensor, insts_info); 78 | 79 | fmt::print("insts_info.size():{}\n",insts_info.size()); 80 | fmt::print("infer time:{} ms\n", tt.Toc()); 81 | 82 | cv::Mat img_show = DrawSegment(img0,mask_tensor,insts_info); 83 | cv::imshow("img", img_show); 84 | if(auto order=(cv::waitKey(1) & 0xFF); order == 'q') 85 | break; 86 | else if(order==' ') 87 | cv::waitKey(0); 88 | } 89 | 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /onnx_exporter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on 20-9-25 4 | 5 | @author: zjs (01376022) 6 | 7 | export onnx model from torch checkpoint 8 | 9 | @Notice: use onnx-simplifier may help run successfully! SEE: https://github.com/aim-uofa/AdelaiDet/issues/83#issuecomment-635718543 10 | 11 | @Notice: 在不同的位置import tensorrt 可能会出现不同的结果,尽量在开始的时候先import tenosrrt 12 | 13 | @Notice: GN normalize method seems not well implemented on onnx and tensorrt! SEE: https://github.com/aim-uofa/AdelaiDet/issues/31#issuecomment-625217956 ; https://github.com/aim-uofa/AdelaiDet/pull/25#issue-401785580 14 | (but I run well on onnxruntime with version onnx==1.8.0, onnxruntime==1.6.0, pytorch==1.4.0, opset==11) 15 | 16 | @Notice: 加载反序列化engine前,确保运行 trt.init_libnvinfer_plugins(TRT_LOGGER, '') 17 | 18 | @TODO: 1. tensorrt 出来的output顺序与pytorch定义的不一致 19 | @TODO: 2. 支持动态输入, backbone+neck 动态输入ok, head出现以下error(应该是forward_single中的某个操作导致,怀疑是linspace) 20 | ``` 21 | [TensorRT] INTERNAL ERROR: Assertion failed: validateInputsCutensor(src, dst) 22 | ../rtSafe/cuda/cutensorReformat.cpp:227 23 | Aborting... 24 | [TensorRT] VERBOSE: Builder timing cache: created 115 entries, 415 hit(s) 25 | [TensorRT] ERROR: ../rtSafe/cuda/cutensorReformat.cpp (227) - Assertion Error in executeCutensor: 0 (validateInputsCutensor(src, dst)) 26 | 27 | ``` 28 | 29 | 30 | @Usage: 31 | python onnx_exporter.py ../configs/solov2/solov2_light_448_r34_fpn_8gpu_3x.py weights/SOLOv2_light_R34.onnx --checkpoint ../weights/SOLOv2_LIGHT_448_R34_3x.pth --shape 448 672 32 | 33 | """ 34 | import torch 35 | import argparse 36 | import numpy as np 37 | 38 | from demo import common 39 | 40 | from mmdet.apis import init_detector 41 | 42 | input_names = ['input'] 43 | # output_names = ['output'] 44 | # output_names = ['C0', 'C1', 'C2', 'C3'] # for backbone 45 | # output_names = ['C0', 'C1', 'C2', 'C3', 'C4'] # for backbone + neck 46 | # output_names = ['cate_pred_0', 'cate_pred_1', 'cate_pred_2', 'cate_pred_3', 'cate_pred_4', 47 | # 'kernel_pred_0', 'kernel_pred_1', 'kernel_pred_2', 'kernel_pred_3', 'kernel_pred_4', 48 | # 'seg_pred'] # Origin 49 | output_names = ['cate_pred', 'kernel_pred', 'seg_pred'] # add permute & concate 50 | 51 | 52 | def parse_args(): 53 | parser = argparse.ArgumentParser(description='Export a torch model to onnx model') 54 | parser.add_argument('config', help='train config file path') 55 | parser.add_argument('out', help='output ONNX file') 56 | parser.add_argument('--checkpoint', help='checkpoint file of the model') 57 | parser.add_argument('--shape', type=int, nargs='+', default=[224], help='input image size') 58 | args = parser.parse_args() 59 | return args 60 | 61 | 62 | def to_list(inputs): 63 | outputs = [] 64 | for item in inputs: 65 | if isinstance(item, tuple) or isinstance(item, list): 66 | for tp in item: 67 | if isinstance(tp, tuple) or isinstance(tp, list): 68 | for lt in tp: 69 | if isinstance(lt, tuple) or isinstance(lt, list): 70 | print("result is still packed strucure") 71 | elif isinstance(lt, torch.Tensor): 72 | print("collect tensor:", lt.shape) 73 | outputs.append(lt) 74 | elif isinstance(tp, torch.Tensor): 75 | print("collect tensor:", tp.shape) 76 | outputs.append(tp) 77 | elif isinstance(item, torch.Tensor): 78 | print("collect tensor:", item.shape) 79 | outputs.append(item) 80 | print("output item count: %d" % len(outputs)) 81 | return outputs 82 | 83 | 84 | def convert2onnx(args, dummy_input): 85 | ''' Convert torch model to onnx model ''' 86 | # build the model from a config file and a checkpoint file 87 | model = init_detector(args.config, args.checkpoint, device='cuda:0') 88 | 89 | if hasattr(model, 'forward_dummy'): 90 | # model.forward = model.extract_feat 91 | model.forward = model.forward_dummy 92 | print("forward_dummy") 93 | else: 94 | raise NotImplementedError( 95 | 'ONNX exporting is not currently supported with {}'. 96 | format(model.__class__.__name__)) 97 | # torch.onnx.export(model, dummy_input, args.out, input_names=['input'], output_names=['outputs'], verbose=True, opset_version=11) 98 | torch.onnx.export(model, dummy_input, args.out, input_names=input_names, output_names=output_names, verbose=True, opset_version=11) 99 | # traced_script_module = torch.jit.trace(model, dummy_input) 100 | # traced_script_module.save(args.out.replace('onnx', 'pt')) 101 | 102 | 103 | def check(args, dummy_input, check_onnx=True, check_trt=True): 104 | ''' Check the Converted onnx model on onnxruntime and tensorrt ''' 105 | # ======================= Run pytorch model ========================================= 106 | # build the model from a config file and a checkpoint file 107 | model = init_detector(args.config, args.checkpoint, device='cuda:0') 108 | 109 | if hasattr(model, 'forward_dummy'): 110 | # model.forward = model.extract_feat 111 | model.forward = model.forward_dummy 112 | else: 113 | raise NotImplementedError( 114 | 'ONNX exporting is not currently supported with {}'. 115 | format(model.__class__.__name__)) 116 | 117 | with torch.no_grad(): 118 | torch_output = model(dummy_input) 119 | torch_output = to_list(torch_output) 120 | 121 | # ======================= Run onnx on onnxruntime ========================================= 122 | if check_onnx: 123 | import onnxruntime as rt 124 | print("Load onnx model from {}.".format(args.out)) 125 | sess = rt.InferenceSession(args.out) 126 | 127 | # check input and output 128 | for in_blob in sess.get_inputs(): 129 | if in_blob.name not in input_names: 130 | print("Input blob name not match that in the mode") 131 | else: 132 | print("Input {}, shape {} and type {}".format(in_blob.name, in_blob.shape, in_blob.type)) 133 | for out_blob in sess.get_outputs(): 134 | if out_blob.name not in output_names: 135 | print("Output blob name not match that in the mode") 136 | else: 137 | print("Output {}, shape {} and type {}".format(out_blob.name, out_blob.shape, out_blob.type)) 138 | 139 | onnx_output = sess.run(output_names, {input_names[0]: dummy_input.cpu().numpy()}) 140 | 141 | print("onnxruntime") 142 | for i, out in enumerate(onnx_output): 143 | try: 144 | np.testing.assert_allclose(torch_output[i].cpu().detach().numpy(), out, rtol=1e-03, atol=2e-04) 145 | except AssertionError as e: 146 | print("ouput {} mismatch {}".format(output_names[i], e)) 147 | continue 148 | print("ouput {} match\n".format(output_names[i])) 149 | 150 | # ======================= Run onnx on tensorrt ========================================= 151 | if check_trt: 152 | input_shapes = ((1, 3, args.shape[0], args.shape[1]),) # explict shape 153 | # input_shapes = ((1, 3, 448, 448), (1, 3, 608, 608), (1, 3, 768, 768)) # dynamic shape 154 | # shape_matrix = [ 155 | # [1, 3, args.shape[0], args.shape[1]], 156 | # [1, 40, 40, 80], 157 | # [1, 36, 36, 80], 158 | # [1, 24, 24, 80], 159 | # [1, 16, 16, 80], 160 | # [1, 12, 12, 80], 161 | # [1, 128, 40, 40], 162 | # [1, 128, 36, 36], 163 | # [1, 128, 24, 24], 164 | # [1, 128, 16, 16], 165 | # [1, 128, 12, 12], 166 | # [1, 128, args.shape[0] // 4, args.shape[1] // 4] 167 | # ] 168 | # shape_matrix = [ 169 | # [1, 3, args.shape[0], args.shape[1]], 170 | # [3872, 80], 171 | # [3872, 128], 172 | # [1, 128, args.shape[0] // 4, args.shape[1] // 4] 173 | # ] 174 | with common.get_engine(args.out, args.out.replace(".onnx", ".engine"), input_shapes=input_shapes, force_rebuild=False) \ 175 | as engine, engine.create_execution_context() as context: 176 | # Notice: Here we only allocate device memory for speed up 177 | 178 | # DYNAMIC shape 179 | # context.active_optimization_profile = 0 180 | # [context.set_binding_shape(x, tuple(y)) for x, y in enumerate(shape_matrix)] 181 | # inputs, outputs, bindings, stream = common.allocate_buffersV2(engine, context) 182 | 183 | # EXPLICIT shape 184 | inputs, outputs, bindings, stream = common.allocate_buffers(engine) 185 | 186 | # The common.do_inference function will copy the input to the GPU before executing. 187 | inputs[0].host = dummy_input.cpu().numpy() # for torch.Tensor 188 | # ==> Or set device input to the data. 189 | # in this mode, common.do_inference function should not copy inputs.host to inputs.device anymore. 190 | # c_type_pointer = ctypes.c_void_p(int(inputs[0].device)) 191 | # x.cpu().numpy().copy_to_external(c_type_pointer) 192 | trt_outputs = common.do_inferenceV2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, 193 | batch_size=1, h_=args.shape[0], w_=args.shape[1]) 194 | print("tensorrt") 195 | # TODO: tensorrt output order is different from pytorch? Origin 196 | # Origin 197 | # ids = [8, 9, 7, 6, 5, 3, 4, 2, 1, 0, 10] 198 | # Add permute & concate 199 | ids = [1, 0, 2] 200 | for i, (trt_output, id) in enumerate(zip(trt_outputs, ids)): 201 | try: 202 | np.testing.assert_allclose(torch_output[id].cpu().detach().numpy().reshape(-1), trt_output, rtol=1e-03, atol=2e-04) 203 | except AssertionError as e: 204 | print("ouput {} mismatch {}".format(output_names[id], e)) 205 | continue 206 | print("ouput {} match\n".format(output_names[id])) 207 | 208 | print("script done") 209 | 210 | 211 | if __name__ == '__main__': 212 | args = parse_args() 213 | 214 | batch_size=1 215 | 216 | if len(args.shape) == 1: 217 | img_shape = (batch_size, 3, args.shape[0], args.shape[0]) 218 | elif len(args.shape) == 2: 219 | img_shape = (batch_size, 3) + tuple(args.shape) 220 | elif len(args.shape) == 4: 221 | img_shape = tuple(args.shape) 222 | else: 223 | raise ValueError('invalid input shape') 224 | dummy_input = torch.randn(*img_shape, device='cuda:0') 225 | 226 | convert2onnx(args, dummy_input) 227 | #check(args, dummy_input, check_onnx=False, check_trt=True) 228 | --------------------------------------------------------------------------------