├── .gitignore ├── LICENSE ├── README.md ├── TensorRT_test.sln ├── common ├── BatchStream.h ├── argsParser.h ├── buffers.h ├── common.h ├── dumpTFWts.py ├── half.h ├── sampleConfig.h └── windows │ ├── getopt.c │ └── getopt.h └── src ├── sampleCharRNN ├── sampleCharRNN.cpp ├── sampleCharRNN.vcxproj ├── sampleCharRNN.vcxproj.filters └── sampleCharRNN.vcxproj.user ├── sampleDetection ├── sampleDetection.cpp ├── sampleDetection.vcxproj ├── sampleDetection.vcxproj.filters └── sampleDetection.vcxproj.user ├── sampleFasterRCNN ├── README.md ├── car-0.990063.ppm ├── dog-0.997705.ppm ├── dog-0.999603.ppm ├── factoryFasterRCNN.h ├── horse-0.994670.ppm ├── person-0.974725.ppm ├── sampleFasterRCNN.cpp ├── sampleFasterRCNN.vcxproj ├── sampleFasterRCNN.vcxproj.filters └── sampleFasterRCNN.vcxproj.user ├── sampleGoogleNet ├── README.md ├── sampleGoogleNet.cpp ├── sampleGoogleNet.vcxproj ├── sampleGoogleNet.vcxproj.filters └── sampleGoogleNet.vcxproj.user ├── sampleINT8 ├── BatchStream.h ├── CalibrationTablemnist ├── LegacyCalibrator.h ├── int8_caffe.patch ├── sampleINT8.cpp ├── sampleINT8.vcxproj ├── sampleINT8.vcxproj.filters └── sampleINT8.vcxproj.user ├── sampleLoadEngineStream ├── sampleLoadEngineStream.cpp ├── sampleLoadEngineStream.vcxproj ├── sampleLoadEngineStream.vcxproj.filters └── sampleLoadEngineStream.vcxproj.user ├── sampleMLP ├── README.txt ├── convert_weights.py ├── sampleMLP.cpp ├── sampleMLP.vcxproj ├── sampleMLP.vcxproj.filters ├── sampleMLP.vcxproj.user └── update_mlp.patch ├── sampleMNIST ├── README.md ├── sampleMNIST.cpp ├── sampleMNIST.vcxproj ├── sampleMNIST.vcxproj.filters └── sampleMNIST.vcxproj.user ├── sampleMNISTAPI ├── sampleMNISTAPI.cpp ├── sampleMNISTAPI.vcxproj ├── sampleMNISTAPI.vcxproj.filters └── sampleMNISTAPI.vcxproj.user ├── sampleMovieLens ├── README.txt ├── preprocess.py ├── sampleMovieLens.cpp ├── sampleMovieLens.vcxproj ├── sampleMovieLens.vcxproj.filters ├── sampleMovieLens.vcxproj.user └── sampleMovieLensTraining.patch ├── sampleNMT ├── README.txt ├── chptToBin.py ├── component.h ├── cudaError.h ├── data │ ├── benchmarkWriter.cpp │ ├── benchmarkWriter.h │ ├── bleuScoreWriter.cpp │ ├── bleuScoreWriter.h │ ├── dataReader.h │ ├── dataWriter.cpp │ ├── dataWriter.h │ ├── limitedSamplesDataReader.cpp │ ├── limitedSamplesDataReader.h │ ├── sequenceProperties.h │ ├── textReader.cpp │ ├── textReader.h │ ├── textWriter.cpp │ ├── textWriter.h │ ├── vocabulary.cpp │ └── vocabulary.h ├── deviceBuffer.h ├── model │ ├── alignment.h │ ├── attention.h │ ├── beamSearchPolicy.cpp │ ├── beamSearchPolicy.h │ ├── componentWeights.cpp │ ├── componentWeights.h │ ├── context.cpp │ ├── context.h │ ├── debugUtil.cpp │ ├── debugUtil.h │ ├── decoder.h │ ├── embedder.h │ ├── encoder.h │ ├── likelihood.h │ ├── likelihoodCombinationOperator.h │ ├── lstmDecoder.cpp │ ├── lstmDecoder.h │ ├── lstmEncoder.cpp │ ├── lstmEncoder.h │ ├── multiplicativeAlignment.cpp │ ├── multiplicativeAlignment.h │ ├── projection.h │ ├── slpAttention.cpp │ ├── slpAttention.h │ ├── slpEmbedder.cpp │ ├── slpEmbedder.h │ ├── slpProjection.cpp │ ├── slpProjection.h │ ├── softmaxLikelihood.cpp │ └── softmaxLikelihood.h ├── pinnedHostBuffer.h ├── sampleNMT.cpp ├── sampleNMT.vcxproj ├── sampleNMT.vcxproj.filters ├── sampleNMT.vcxproj.user ├── trtUtil.cpp └── trtUtil.h ├── samplePlugin ├── fp16.h ├── samplePlugin.cpp ├── samplePlugin.vcxproj ├── samplePlugin.vcxproj.filters └── samplePlugin.vcxproj.user ├── sampleResNetv2 ├── sampleResNetv2.vcxproj ├── sampleResNetv2.vcxproj.filters ├── sampleResNetv2.vcxproj.user └── sampleResNetv2cpp.cpp ├── sampleUffMNIST ├── sampleUffMNIST.cpp ├── sampleUffMNIST.vcxproj ├── sampleUffMNIST.vcxproj.filters └── sampleUffMNIST.vcxproj.user └── sampleUffSSD ├── BatchStreamPPM.h ├── README.txt ├── car-0.671518.ppm ├── config.py ├── dog-0.880681.ppm ├── dog-0.890010.ppm ├── person-0.549108.ppm ├── sampleUffSSD.cpp ├── sampleUffSSD.vcxproj ├── sampleUffSSD.vcxproj.filters ├── sampleUffSSD.vcxproj.user └── truck-0.782028.ppm /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | output/* 3 | env*.txt 4 | .vs/* 5 | 3rdparty/* 6 | src/sampleSSD/* 7 | src/sampleUffCifar10/* 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Milittle 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorRT5.0 Test Integration 2 | 3 | Project **TensorRT_test** is a TensorRT Library Example integrated based on Windows Visual Studio 2017, which make our machine learning can run fastly at inference stage. 4 | 5 | >you can look more information about **TensorRT** in [TensorRT Dev Guide](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html) 6 | 7 | ## Not NVIDIA TensorRT official Sample(BY Myself) 8 | 9 | * **sampleLoadEngineStream:** deserializing the engine stream by `engineStream.bin` locating in the `{SolutionDir}/data/mnist/` folder. 10 | * **sampleResNetv2**: using the Resnetv2 pb file transform to uff file and executing the inference. 11 | * **sampleDetection**: (Defect Detection Demo)Solving the TensorFlow BatchNormalization operator. TensorRT do not support the BN's Switch and Merge. I use pb graph and remove some nodes about Switching and Merging then merging related node to pb's graph, which convert to uff file using for TensorRT uff parser parsing the model file. 12 | I use ten defect images to inference the results. So the fllowing time performance is 10 images inferencing time. 13 | 14 | **sampleDetection Time consume:** 15 | 16 | | tensorflow(python)- Titan-12G | tensorrt(c++)- Qudra 4G | Conclusion | 17 | | :----------------------------------------: | :--------------------------------: | :--------: | 18 | | pure run time(1344.3049ms) | pure execution time(44.5ms) | 30 times | 19 | | load data and related Tensor nodes(3473ms) | load data and execute(171.373ms) | 20 times | 20 | | GPU mem-2GB | --- | | 21 | 22 | ## Table of Content 23 | 24 | - [TensorRT5.0 Test Integration](#tensorrt50-test-integration) 25 | - [Not NVIDIA TensorRT official Sample(BY Myself)](#not-nvidia-tensorrt-official-sampleby-myself) 26 | - [Table of Content](#table-of-content) 27 | - [Prerequisites](#prerequisites) 28 | - [Getting the code](#getting-the-code) 29 | - [Project Structure](#project-structure) 30 | - [Run the Example using VS](#run-the-example-using-vs) 31 | - [sampleUffMNIST](#sampleuffmnist) 32 | - [sampleUffSSD](#sampleuffssd) 33 | - [sampleMNIST](#samplemnist) 34 | - [sampleMNISTAPI](#samplemnistapi) 35 | - [sampleSSD](#samplessd) 36 | - [samplePlugin](#sampleplugin) 37 | - [sampleCharRNN](#samplecharrnn) 38 | - [sampleFasterRCNN](#samplefasterrcnn) 39 | - [sampleGoogleNet](#samplegooglenet) 40 | - [sampleINT8](#sampleint8) 41 | - [sampleMLP](#samplemlp) 42 | - [sampleMovieLens](#samplemovielens) 43 | - [sampleNMT](#samplenmt) 44 | - [Contact Getting Help](#contact-getting-help) 45 | 46 | ## Prerequisites 47 | 48 | * CUDA 10.0 [DOWNLOAD LINK](https://developer.nvidia.com/cuda-downloads) 49 | * cudnn 7.3 [DOWNLOAD LINK](https://developer.nvidia.com/cudnn) 50 | * You need the Visual Stdio 2017 51 | 52 | ## Getting the code 53 | 54 | You can use the git tool to clone the Project, through: 55 | 56 | ```shell 57 | git clone git@github.com:Milittle/TensorRT_test.git 58 | ``` 59 | 60 | ## Project Structure 61 | 62 | The Following is my Integrated Project's Structure, and you can download **data** and **3rdparty** by: 63 | 64 | **Google Driver** : [data and 3rdparty download link](https://drive.google.com/open?id=1mDKSmK5n2n7KnZhW5mQbUSJTSzZteN8c) 65 | 66 | Once you download the data and 3rdparty, you can open the TenosrRT_test.sln file and exec the samples by Visual Studio 2017. 67 | 68 | Good luck to you. 69 | 70 | ```shell 71 | TensorRT_test: 72 | | 3rdparty 73 | └---| TensorRT-5.0.1.3 74 | | └------------------- 75 | | common 76 | └---| windows 77 | | | argsParser.h 78 | | | BatchStream.h 79 | | | buffers.h 80 | | | common.h 81 | | | dumpTFWts.py 82 | | | half.h 83 | | | sampleConfig.h 84 | | └------------------- 85 | | data 86 | └---| char-rnn 87 | | | example_gif 88 | | | faster-rcnn 89 | | | googlenet 90 | | | mlp 91 | | | mnist 92 | | | movielens 93 | | | nmt 94 | | | ssd 95 | | └------------------- 96 | | src 97 | └---| sampleCharRNN 98 | | | sampleFasterRCNN 99 | | | sampleGoogleNet 100 | | | sampleINT8 101 | | | sampleMLP 102 | | | sampleMNIST 103 | | | sampleMNISTAPI 104 | | | sampleMovieLens 105 | | | sampleNMT 106 | | | samplePlugin 107 | | | sampleUffMNIST 108 | | | sampleUffSSD 109 | | └-------------------- 110 | | .gitignore 111 | └------------------------ 112 | | README.md 113 | └------------------------ 114 | | TensorRT_test.sln 115 | └------------------------ 116 | ``` 117 | 118 | ## Run the Example using VS 119 | 120 | ### sampleUffMNIST 121 | 122 | ![Demo](https://s1.ax1x.com/2018/10/28/ig9UTe.gif) 123 | 124 | ### sampleUffSSD 125 | 126 | This example load the model and build the engine taking a long time, you need more patience. 127 | 128 | step1: Begin parsing model... 129 | 130 | ​ End parsing model... 131 | 132 | step2: Begin building engine... 133 | 134 | ​ End building engine... 135 | 136 | step3: Begin inference. 137 | 138 | ![](https://s1.ax1x.com/2018/10/29/igNDaT.gif) 139 | 140 | ### sampleMNIST 141 | 142 | ![](https://s1.ax1x.com/2018/10/29/igNcRJ.gif) 143 | 144 | ### sampleMNISTAPI 145 | 146 | ![](https://s1.ax1x.com/2018/10/29/igNgz9.gif) 147 | 148 | ### sampleSSD 149 | 150 | This example has some error, I cannot through the model prototxt parser the model. 151 | 152 | ![]() 153 | 154 | ### samplePlugin 155 | 156 | ![](https://s1.ax1x.com/2018/10/29/igNWs1.gif) 157 | 158 | ### sampleCharRNN 159 | 160 | ![](https://s1.ax1x.com/2018/10/29/igN5dK.gif) 161 | 162 | ### sampleFasterRCNN 163 | 164 | ![](https://s1.ax1x.com/2018/10/29/igN7Je.gif) 165 | 166 | ### sampleGoogleNet 167 | 168 | ![](https://s1.ax1x.com/2018/10/29/igNHRH.gif) 169 | 170 | ### sampleINT8 171 | 172 | **Note**: my computer isn't support the FP16 and INT8. so: 173 | 174 | ![](https://s1.ax1x.com/2018/10/29/igNxdf.gif) 175 | 176 | ### sampleMLP 177 | 178 | ![](https://s1.ax1x.com/2018/10/29/igNzo8.gif) 179 | 180 | ### sampleMovieLens 181 | 182 | ![](https://s1.ax1x.com/2018/10/29/igUpFS.gif) 183 | 184 | ### sampleNMT 185 | 186 | ![](https://s1.ax1x.com/2018/10/29/igUNFO.gif) 187 | 188 | ## Contact Getting Help 189 | 190 | **Email:** mizeshuang@gmail.com 191 | 192 | **QQ**: 329804334 193 | 194 | **Author:** Milittle 195 | -------------------------------------------------------------------------------- /common/BatchStream.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCH_STREAM_H 2 | #define BATCH_STREAM_H 3 | 4 | #include "NvInfer.h" 5 | #include "common.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class BatchStream 12 | { 13 | public: 14 | BatchStream(int batchSize, int maxBatches, std::string prefix, std::vector directories) 15 | : mBatchSize(batchSize) 16 | , mMaxBatches(maxBatches) 17 | , mPrefix(prefix) 18 | , mDataDir(directories) 19 | { 20 | FILE* file = fopen(locateFile(mPrefix + std::string("0.batch"), mDataDir).c_str(), "rb"); 21 | int d[4]; 22 | size_t readSize = fread(d, sizeof(int), 4, file); 23 | assert(readSize == 4); 24 | mDims.nbDims = 4; //The number of dimensions. 25 | mDims.d[0] = d[0]; //Batch Size 26 | mDims.d[1] = d[1]; //Channels 27 | mDims.d[2] = d[2]; //Height 28 | mDims.d[3] = d[3]; //Width 29 | 30 | fclose(file); 31 | mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; 32 | mBatch.resize(mBatchSize * mImageSize, 0); 33 | mFileBatch.resize(mDims.d[0] * mImageSize, 0); 34 | reset(0); 35 | } 36 | 37 | // Resets data members 38 | void reset(int firstBatch) 39 | { 40 | mBatchCount = 0; 41 | mFileCount = 0; 42 | mFileBatchPos = mDims.d[0]; 43 | skip(firstBatch); 44 | } 45 | 46 | // Advance to next batch and return true, or return false if there is no batch left. 47 | bool next() 48 | { 49 | if (mBatchCount == mMaxBatches) 50 | return false; 51 | 52 | for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) 53 | { 54 | assert(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); 55 | if (mFileBatchPos == mDims.d[0] && !update()) 56 | return false; 57 | 58 | // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer. 59 | csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); 60 | std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize); 61 | } 62 | mBatchCount++; 63 | return true; 64 | } 65 | 66 | // Skips the batches 67 | void skip(int skipCount) 68 | { 69 | if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0]) 70 | { 71 | mFileCount += skipCount * mBatchSize / mDims.d[0]; 72 | return; 73 | } 74 | 75 | int x = mBatchCount; 76 | for (int i = 0; i < skipCount; i++) 77 | next(); 78 | mBatchCount = x; 79 | } 80 | 81 | float* getBatch() { return &mBatch[0]; } 82 | int getBatchesRead() const { return mBatchCount; } 83 | int getBatchSize() const { return mBatchSize; } 84 | int getImageSize() const { return mImageSize; } 85 | nvinfer1::Dims getDims() const { return mDims; } 86 | 87 | private: 88 | 89 | float* getFileBatch() { return &mFileBatch[0]; } 90 | 91 | bool update() 92 | { 93 | std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + std::string(".batch"), mDataDir); 94 | FILE* file = fopen(inputFileName.c_str(), "rb"); 95 | if (!file) 96 | return false; 97 | 98 | int d[4]; 99 | size_t readSize = fread(d, sizeof(int), 4, file); 100 | assert(readSize == 4); 101 | assert(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]); 102 | size_t readInputCount = fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file); 103 | assert(readInputCount == size_t(mDims.d[0] * mImageSize)); 104 | 105 | fclose(file); 106 | mFileBatchPos = 0; 107 | return true; 108 | } 109 | 110 | int mBatchSize{0}; 111 | int mMaxBatches{0}; 112 | int mBatchCount{0}; 113 | int mFileCount{0}; 114 | int mFileBatchPos{0}; 115 | int mImageSize{0}; 116 | nvinfer1::Dims mDims; 117 | std::vector mBatch; 118 | std::vector mFileBatch; 119 | std::string mPrefix; 120 | std::vector mDataDir; 121 | }; 122 | #endif 123 | -------------------------------------------------------------------------------- /common/argsParser.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSORRT_ARGS_PARSER_H 2 | #define TENSORRT_ARGS_PARSER_H 3 | 4 | #include 5 | #include 6 | #ifdef _MSC_VER 7 | #include "getopt.h" 8 | #else 9 | #include 10 | #endif 11 | #include 12 | 13 | namespace samplesCommon 14 | { 15 | 16 | //! 17 | //! \brief The SampleParams structure groups the basic parameters required by 18 | //! all sample networks. 19 | //! 20 | struct SampleParams 21 | { 22 | int batchSize; //!< Number of inputs in a batch 23 | int dlaCore{-1}; 24 | std::vector dataDirs; //!< Directory paths where sample data files are stored 25 | std::vector inputTensorNames; 26 | std::vector outputTensorNames; 27 | }; 28 | 29 | //! 30 | //! \brief The CaffeSampleParams structure groups the additional parameters required by 31 | //! networks that use caffe 32 | //! 33 | struct CaffeSampleParams : public SampleParams 34 | { 35 | std::string prototxtFileName; //!< Filename of prototxt design file of a network 36 | std::string weightsFileName; //!< Filename of trained weights file of a network 37 | }; 38 | 39 | //! 40 | //! /brief Struct to maintain command-line arguments. 41 | //! 42 | struct Args 43 | { 44 | bool runInInt8{false}; 45 | bool help{false}; 46 | int useDLACore{-1}; 47 | std::vector dataDirs; 48 | }; 49 | 50 | //! 51 | //! \brief Populates the Args struct with the provided command-line parameters. 52 | //! 53 | //! \throw invalid_argument if any of the arguments are not valid 54 | //! 55 | //! \return boolean If return value is true, execution can continue, otherwise program should exit 56 | //! 57 | inline bool parseArgs(Args& args, int argc, char* argv[]) 58 | { 59 | while (1) 60 | { 61 | int arg; 62 | static struct option long_options[] = { 63 | {"help", no_argument, 0, 'h'}, 64 | {"datadir", required_argument, 0, 'd'}, 65 | {"int8", no_argument, 0, 'i'}, 66 | {"useDLACore", required_argument, 0, 'u'}, 67 | {nullptr, 0, nullptr, 0}}; 68 | int option_index = 0; 69 | arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index); 70 | if (arg == -1) 71 | break; 72 | 73 | switch (arg) 74 | { 75 | case 'h': 76 | args.help = true; 77 | return false; 78 | case 'd': 79 | if (optarg) 80 | args.dataDirs.push_back(optarg); 81 | else 82 | { 83 | std::cerr << "ERROR: --datadir requires option argument" << std::endl; 84 | return false; 85 | } 86 | break; 87 | case 'i': 88 | args.runInInt8 = true; 89 | break; 90 | case 'u': 91 | if (optarg) 92 | args.useDLACore = std::stoi(optarg); 93 | break; 94 | default: 95 | return false; 96 | } 97 | } 98 | return true; 99 | } 100 | 101 | } // namespace samplesCommon 102 | 103 | #endif // TENSORRT_ARGS_PARSER_H 104 | -------------------------------------------------------------------------------- /common/dumpTFWts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Script to dump TensorFlow weights in TRT v1 and v2 dump format. 4 | # The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later. 5 | 6 | import sys 7 | import struct 8 | import argparse 9 | try: 10 | import tensorflow as tf 11 | from tensorflow.python import pywrap_tensorflow 12 | except ImportError as err: 13 | sys.stderr.write("""Error: Failed to import module ({})""".format(err)) 14 | sys.exit() 15 | 16 | parser = argparse.ArgumentParser(description='TensorFlow Weight Dumper') 17 | 18 | parser.add_argument('-m', '--model', required=True, help='The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908') 19 | parser.add_argument('-o', '--output', required=True, help='The weight file to dump all the weights to.') 20 | parser.add_argument('-1', '--wtsv1', required=False, default=False, type=bool, help='Dump the weights in the wts v1.') 21 | 22 | opt = parser.parse_args() 23 | 24 | if opt.wtsv1: 25 | print "Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:" 26 | print "Line 0: " 27 | print "Line 1-Num: [buffer name] [buffer type] [buffer size] " 28 | else: 29 | print "Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:" 30 | print "Line 0: " 31 | print "Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] " 32 | 33 | inputbase = opt.model 34 | outputbase = opt.output 35 | 36 | def float_to_hex(f): 37 | return hex(struct.unpack(' 17 | 18 | #if defined( WINGETOPT_SHARED_LIB ) 19 | # if defined( BUILDING_WINGETOPT_DLL ) 20 | # define WINGETOPT_API __declspec(dllexport) 21 | # else 22 | # define WINGETOPT_API __declspec(dllimport) 23 | # endif 24 | #else 25 | # define WINGETOPT_API 26 | #endif 27 | 28 | #ifdef __cplusplus 29 | extern "C" { 30 | #endif 31 | 32 | WINGETOPT_API extern int optind; /* index of first non-option in argv */ 33 | WINGETOPT_API extern int optopt; /* single option character, as parsed */ 34 | WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */ 35 | /* (user may set to zero, to suppress) */ 36 | 37 | WINGETOPT_API extern char *optarg; /* pointer to argument of current option */ 38 | 39 | extern int getopt(int nargc, char * const *nargv, const char *options); 40 | 41 | #ifdef _BSD_SOURCE 42 | /* 43 | * BSD adds the non-standard `optreset' feature, for reinitialisation 44 | * of `getopt' parsing. We support this feature, for applications which 45 | * proclaim their BSD heritage, before including this header; however, 46 | * to maintain portability, developers are advised to avoid it. 47 | */ 48 | # define optreset __mingw_optreset 49 | extern int optreset; 50 | #endif 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | /* 55 | * POSIX requires the `getopt' API to be specified in `unistd.h'; 56 | * thus, `unistd.h' includes this header. However, we do not want 57 | * to expose the `getopt_long' or `getopt_long_only' APIs, when 58 | * included in this manner. Thus, close the standard __GETOPT_H__ 59 | * declarations block, and open an additional __GETOPT_LONG_H__ 60 | * specific block, only when *not* __UNISTD_H_SOURCED__, in which 61 | * to declare the extended API. 62 | */ 63 | #endif /* !defined(__GETOPT_H__) */ 64 | 65 | #if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) 66 | #define __GETOPT_LONG_H__ 67 | 68 | #ifdef __cplusplus 69 | extern "C" { 70 | #endif 71 | 72 | struct option /* specification for a long form option... */ 73 | { 74 | const char *name; /* option name, without leading hyphens */ 75 | int has_arg; /* does it take an argument? */ 76 | int *flag; /* where to save its status, or NULL */ 77 | int val; /* its associated status value */ 78 | }; 79 | 80 | enum /* permitted values for its `has_arg' field... */ 81 | { 82 | no_argument = 0, /* option never takes an argument */ 83 | required_argument, /* option always requires an argument */ 84 | optional_argument /* option may take an argument */ 85 | }; 86 | 87 | extern int getopt_long(int nargc, char * const *nargv, const char *options, 88 | const struct option *long_options, int *idx); 89 | extern int getopt_long_only(int nargc, char * const *nargv, const char *options, 90 | const struct option *long_options, int *idx); 91 | /* 92 | * Previous MinGW implementation had... 93 | */ 94 | #ifndef HAVE_DECL_GETOPT 95 | /* 96 | * ...for the long form API only; keep this for compatibility. 97 | */ 98 | # define HAVE_DECL_GETOPT 1 99 | #endif 100 | 101 | #ifdef __cplusplus 102 | } 103 | #endif 104 | 105 | #endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ 106 | -------------------------------------------------------------------------------- /src/sampleCharRNN/sampleCharRNN.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/sampleCharRNN/sampleCharRNN.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleDetection/sampleDetection.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/sampleDetection/sampleDetection.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleFasterRCNN/README.md: -------------------------------------------------------------------------------- 1 | The FasterRCNN sample uses the dataset from here: 2 | https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz 3 | 4 | The dataset needs to be placed into the data/faster-rcnn directory. 5 | 6 | The commands to do this on linux are as follows: 7 | 8 | cd 9 | wget --no-check-certificate https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 -O data/faster-rcnn/faster-rcnn.tgz 10 | tar zxvf data/faster-rcnn/faster-rcnn.tgz -C data/faster-rcnn --strip-components=1 --exclude=ZF_* 11 | 12 | 翻译: 13 | 14 | FasterRCNN 示例使用的数据模型在如下链接可以下载到: 15 | 16 | https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz 17 | 18 | 下载好的数据模型,直接解压以后放在data/faster-rcnn文件夹下面。 -------------------------------------------------------------------------------- /src/sampleFasterRCNN/car-0.990063.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/car-0.990063.ppm -------------------------------------------------------------------------------- /src/sampleFasterRCNN/dog-0.997705.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/dog-0.997705.ppm -------------------------------------------------------------------------------- /src/sampleFasterRCNN/dog-0.999603.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/dog-0.999603.ppm -------------------------------------------------------------------------------- /src/sampleFasterRCNN/factoryFasterRCNN.h: -------------------------------------------------------------------------------- 1 | #include "NvCaffeParser.h" 2 | #include "NvInferPlugin.h" 3 | 4 | using namespace nvinfer1; 5 | using namespace nvcaffeparser1; 6 | using namespace plugin; 7 | 8 | const int poolingH = 7; 9 | const int poolingW = 7; 10 | const int featureStride = 16; 11 | const int preNmsTop = 6000; 12 | const int nmsMaxOut = 300; 13 | const int anchorsRatioCount = 3; 14 | const int anchorsScaleCount = 3; 15 | const float iouThreshold = 0.7f; 16 | const float minBoxSize = 16; 17 | const float spatialScale = 0.0625f; 18 | const float anchorsRatios[anchorsRatioCount] = { 0.5f, 1.0f, 2.0f }; 19 | const float anchorsScales[anchorsScaleCount] = { 8.0f, 16.0f, 32.0f }; 20 | 21 | class FRCNNPluginFactory : public nvcaffeparser1::IPluginFactoryV2 22 | { 23 | public: 24 | virtual nvinfer1::IPluginV2* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights, const char* libNamespace) override 25 | { 26 | assert(isPluginV2(layerName)); 27 | if (!strcmp(layerName, "RPROIFused")) 28 | { 29 | assert(mPluginRPROI == nullptr); 30 | assert(nbWeights == 0 && weights == nullptr); 31 | mPluginRPROI = std::unique_ptr 32 | (createRPNROIPlugin(featureStride, preNmsTop, nmsMaxOut, iouThreshold, minBoxSize, spatialScale, 33 | DimsHW(poolingH, poolingW), Weights{ nvinfer1::DataType::kFLOAT, anchorsRatios, anchorsRatioCount }, 34 | Weights{ nvinfer1::DataType::kFLOAT, anchorsScales, anchorsScaleCount }), pluginDeleter); 35 | mPluginRPROI.get()->setPluginNamespace(libNamespace); 36 | return mPluginRPROI.get(); 37 | } 38 | else 39 | { 40 | assert(0); 41 | return nullptr; 42 | } 43 | } 44 | 45 | // caffe parser plugin implementation 46 | bool isPluginV2(const char* name) override { return !strcmp(name, "RPROIFused"); } 47 | 48 | void destroyPlugin() 49 | { 50 | mPluginRPROI.reset(); 51 | } 52 | 53 | void (*pluginDeleter)(IPluginV2*) {[](IPluginV2* ptr) {ptr->destroy();}}; 54 | std::unique_ptr mPluginRPROI{nullptr, pluginDeleter}; 55 | 56 | }; 57 | -------------------------------------------------------------------------------- /src/sampleFasterRCNN/horse-0.994670.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/horse-0.994670.ppm -------------------------------------------------------------------------------- /src/sampleFasterRCNN/person-0.974725.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/person-0.974725.ppm -------------------------------------------------------------------------------- /src/sampleFasterRCNN/sampleFasterRCNN.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 头文件 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 源文件 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/sampleFasterRCNN/sampleFasterRCNN.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleGoogleNet/README.md: -------------------------------------------------------------------------------- 1 | # NVIDIA TensorRT Sample "sampleGoogleNet" 2 | 3 | The sampleGoogleNet sample demonstrates how to: 4 | - Build a TensorRT engine from the saved Caffe model 5 | - Set input values to engine, run engine and obtain output 6 | 7 | ## Usage 8 | 9 | This sample can be run as: 10 | 11 | ./sample_googlenet [-h] [--datadir=/path/to/data/dir/] [--useDLACore=N] 12 | 13 | SampleGoogleNet reads two caffe files to build the network: 14 | 15 | * `googlenet.prototxt` - The prototxt file that contains the network design 16 | * `googlenet.caffemodel` - The model file which contains the trained weights 17 | for the network 18 | 19 | By default, the sample expects these files to be in `data/samples/googlenet/` or 20 | `data/googlenet/`. The default directory can be changed by supplying the path as 21 | `--datadir=/new/path/` as a command line argument. 22 | -------------------------------------------------------------------------------- /src/sampleGoogleNet/sampleGoogleNet.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | 源文件 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/sampleGoogleNet/sampleGoogleNet.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleINT8/BatchStream.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCH_STREAM_H 2 | #define BATCH_STREAM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "NvInfer.h" 8 | 9 | std::string locateFile(const std::string& input); 10 | 11 | class BatchStream 12 | { 13 | public: 14 | BatchStream(int batchSize, int maxBatches) : mBatchSize(batchSize), mMaxBatches(maxBatches) 15 | { 16 | FILE* file = fopen(locateFile(std::string("batches/batch0")).c_str(), "rb"); 17 | int d[4]; 18 | fread(d, sizeof(int), 4, file); 19 | mDims = nvinfer1::DimsNCHW{ d[0], d[1], d[2], d[3] }; 20 | fclose(file); 21 | mImageSize = mDims.c()*mDims.h()*mDims.w(); 22 | mBatch.resize(mBatchSize*mImageSize, 0); 23 | mLabels.resize(mBatchSize, 0); 24 | mFileBatch.resize(mDims.n()*mImageSize, 0); 25 | mFileLabels.resize(mDims.n(), 0); 26 | reset(0); 27 | } 28 | 29 | void reset(int firstBatch) 30 | { 31 | mBatchCount = 0; 32 | mFileCount = 0; 33 | mFileBatchPos = mDims.n(); 34 | skip(firstBatch); 35 | } 36 | 37 | bool next() 38 | { 39 | if (mBatchCount == mMaxBatches) 40 | return false; 41 | 42 | for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) 43 | { 44 | assert(mFileBatchPos > 0 && mFileBatchPos <= mDims.n()); 45 | if (mFileBatchPos == mDims.n() && !update()) 46 | return false; 47 | 48 | // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer. 49 | csize = std::min(mBatchSize - batchPos, mDims.n() - mFileBatchPos); 50 | std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize); 51 | std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos); 52 | } 53 | mBatchCount++; 54 | return true; 55 | } 56 | 57 | void skip(int skipCount) 58 | { 59 | if (mBatchSize >= mDims.n() && mBatchSize%mDims.n() == 0 && mFileBatchPos == mDims.n()) 60 | { 61 | mFileCount += skipCount * mBatchSize / mDims.n(); 62 | return; 63 | } 64 | 65 | int x = mBatchCount; 66 | for (int i = 0; i < skipCount; i++) 67 | next(); 68 | mBatchCount = x; 69 | } 70 | 71 | float *getBatch() { return &mBatch[0]; } 72 | float *getLabels() { return &mLabels[0]; } 73 | int getBatchesRead() const { return mBatchCount; } 74 | int getBatchSize() const { return mBatchSize; } 75 | nvinfer1::DimsNCHW getDims() const { return mDims; } 76 | private: 77 | float* getFileBatch() { return &mFileBatch[0]; } 78 | float* getFileLabels() { return &mFileLabels[0]; } 79 | 80 | bool update() 81 | { 82 | std::string inputFileName = locateFile(std::string("batches/batch") + std::to_string(mFileCount++)); 83 | FILE * file = fopen(inputFileName.c_str(), "rb"); 84 | if (!file) 85 | return false; 86 | 87 | int d[4]; 88 | fread(d, sizeof(int), 4, file); 89 | assert(mDims.n() == d[0] && mDims.c() == d[1] && mDims.h() == d[2] && mDims.w() == d[3]); 90 | 91 | size_t readInputCount = fread(getFileBatch(), sizeof(float), mDims.n()*mImageSize, file); 92 | size_t readLabelCount = fread(getFileLabels(), sizeof(float), mDims.n(), file);; 93 | assert(readInputCount == size_t(mDims.n()*mImageSize) && readLabelCount == size_t(mDims.n())); 94 | 95 | fclose(file); 96 | mFileBatchPos = 0; 97 | return true; 98 | } 99 | 100 | int mBatchSize{ 0 }; 101 | int mMaxBatches{ 0 }; 102 | int mBatchCount{ 0 }; 103 | 104 | int mFileCount{ 0 }, mFileBatchPos{ 0 }; 105 | int mImageSize{ 0 }; 106 | 107 | nvinfer1::DimsNCHW mDims; 108 | std::vector mBatch; 109 | std::vector mLabels; 110 | std::vector mFileBatch; 111 | std::vector mFileLabels; 112 | }; 113 | 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /src/sampleINT8/CalibrationTablemnist: -------------------------------------------------------------------------------- 1 | 1 2 | data: 3c000889 3 | pool2: 3d9ccc94 4 | ip1: 3daeff07 5 | prob: 3c010a14 6 | conv2: 3dd33169 7 | pool1: 3c88e7e3 8 | ip2: 3e7d50ec 9 | conv1: 3c8954be 10 | -------------------------------------------------------------------------------- /src/sampleINT8/LegacyCalibrator.h: -------------------------------------------------------------------------------- 1 | #ifndef LEGACY_CALIBRATOR_H 2 | #define LEGACY_CALIBRATOR_H 3 | 4 | #include 5 | #include "NvInfer.h" 6 | #include "BatchStream.h" 7 | #include "cuda_runtime_api.h" 8 | #include 9 | #include 10 | 11 | #include "common.h" 12 | 13 | class Int8LegacyCalibrator : public nvinfer1::IInt8LegacyCalibrator 14 | { 15 | public: 16 | Int8LegacyCalibrator(BatchStream& stream, int firstBatch, double cutoff, double quantile, bool readCache = true) 17 | : mStream(stream), mFirstBatch(firstBatch), mReadCache(readCache) 18 | { 19 | using namespace nvinfer1; 20 | DimsNCHW dims = mStream.getDims(); 21 | mInputCount = mStream.getBatchSize() * dims.c() * dims.h() * dims.w(); 22 | CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); 23 | reset(cutoff, quantile); 24 | } 25 | 26 | virtual ~Int8LegacyCalibrator() 27 | { 28 | CHECK(cudaFree(mDeviceInput)); 29 | } 30 | 31 | int getBatchSize() const override { return mStream.getBatchSize(); } 32 | double getQuantile() const override { return mQuantile; } 33 | double getRegressionCutoff() const override { return mCutoff; } 34 | 35 | bool getBatch(void* bindings[], const char* names[], int nbBindings) override 36 | { 37 | if (!mStream.next()) 38 | return false; 39 | 40 | CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); 41 | bindings[0] = mDeviceInput; 42 | return true; 43 | } 44 | 45 | const void* readCalibrationCache(size_t& length) override 46 | { 47 | mCalibrationCache.clear(); 48 | std::ifstream input(locateFile("CalibrationTable"), std::ios::binary); 49 | input >> std::noskipws; 50 | if (mReadCache && input.good()) 51 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(mCalibrationCache)); 52 | 53 | length = mCalibrationCache.size(); 54 | return length ? &mCalibrationCache[0] : nullptr; 55 | } 56 | 57 | void writeCalibrationCache(const void* cache, size_t length) override 58 | { 59 | std::ofstream output(locateFile("CalibrationTable"), std::ios::binary); 60 | output.write(reinterpret_cast(cache), length); 61 | } 62 | 63 | const void* readHistogramCache(size_t& length) override 64 | { 65 | length = mHistogramCache.size(); 66 | return length ? &mHistogramCache[0] : nullptr; 67 | } 68 | 69 | void writeHistogramCache(const void* cache, size_t length) override 70 | { 71 | mHistogramCache.clear(); 72 | std::copy_n(reinterpret_cast(cache), length, std::back_inserter(mHistogramCache)); 73 | } 74 | 75 | void reset(double cutoff, double quantile) 76 | { 77 | mCutoff = cutoff; 78 | mQuantile = quantile; 79 | mStream.reset(mFirstBatch); 80 | } 81 | 82 | private: 83 | BatchStream mStream; 84 | int mFirstBatch; 85 | double mCutoff, mQuantile; 86 | bool mReadCache{ true }; 87 | 88 | size_t mInputCount; 89 | void* mDeviceInput{ nullptr }; 90 | std::vector mCalibrationCache, mHistogramCache; 91 | }; 92 | 93 | struct CalibrationParameters 94 | { 95 | const char* networkName; 96 | double cutoff; 97 | double quantileIndex; 98 | }; 99 | 100 | CalibrationParameters gCalibrationTable[] = 101 | { 102 | { "alexnet", 0.6, 7.0 }, 103 | { "vgg19", 0.5, 5 }, 104 | { "googlenet", 1, 8.0 }, 105 | { "resnet-50", 0.61, 2.0 }, 106 | { "resnet-101", 0.51, 2.5 }, 107 | { "resnet-152", 0.4, 5.0 } 108 | }; 109 | 110 | static const int gCalibrationTableSize = sizeof(gCalibrationTable) / sizeof(CalibrationParameters); 111 | 112 | double quantileFromIndex(double quantileIndex) 113 | { 114 | return 1 - pow(10, -quantileIndex); 115 | } 116 | 117 | static const int CAL_BATCH_SIZE = 50; 118 | static const int FIRST_CAL_BATCH = 0, NB_CAL_BATCHES = 10; // calibrate over images 0-500 119 | static const int FIRST_CAL_SCORE_BATCH = 100, NB_CAL_SCORE_BATCHES = 100; // score over images 5000-10000 120 | 121 | 122 | void searchCalibrations(double firstCutoff, double cutoffIncrement, int nbCutoffs, 123 | double firstQuantileIndex, double quantileIndexIncrement, int nbQuantiles, 124 | float& bestScore, double& bestCutoff, double& bestQuantileIndex, Int8LegacyCalibrator& calibrator) 125 | { 126 | std::pair scoreModel(int batchSize, int firstBatch, int nbScoreBatches, nvinfer1::DataType type, nvinfer1::IInt8Calibrator* calibrator, bool quiet); 127 | 128 | for (int i = 0; i < nbCutoffs; i++) 129 | { 130 | for (int j = 0; j < nbQuantiles; j++) 131 | { 132 | double cutoff = firstCutoff + double(i) * cutoffIncrement, quantileIndex = firstQuantileIndex + double(j) * quantileIndexIncrement; 133 | calibrator.reset(cutoff, quantileFromIndex(quantileIndex)); 134 | float score = scoreModel(CAL_BATCH_SIZE, FIRST_CAL_SCORE_BATCH, NB_CAL_SCORE_BATCHES, nvinfer1::DataType::kINT8, &calibrator, true).first; // score the model in quiet mode 135 | 136 | std::cout << "Score: " << score << " (cutoff = " << cutoff << ", quantileIndex = " << quantileIndex << ")" << std::endl; 137 | if (score > bestScore) 138 | bestScore = score, bestCutoff = cutoff, bestQuantileIndex = quantileIndex; 139 | } 140 | } 141 | } 142 | 143 | void searchCalibrations(double& bestCutoff, double&bestQuantileIndex) 144 | { 145 | float bestScore = std::numeric_limits::lowest(); 146 | bestCutoff = 0; 147 | bestQuantileIndex = 0; 148 | 149 | std::cout << "searching calibrations" << std::endl; 150 | BatchStream calibrationStream(CAL_BATCH_SIZE, NB_CAL_BATCHES); 151 | Int8LegacyCalibrator calibrator(calibrationStream, 0, quantileFromIndex(0), false); // force calibration by ignoring region cache 152 | 153 | searchCalibrations(1, 0, 1, 2, 1, 7, bestScore, bestCutoff, bestQuantileIndex, calibrator); // search the space with cutoff = 1 (i.e. max'ing over the histogram) 154 | searchCalibrations(0.4, 0.05, 7, 2, 1, 7, bestScore, bestCutoff, bestQuantileIndex, calibrator); // search the space with cutoff = 0.4 to 0.7 (inclusive) 155 | 156 | // narrow in: if our best score is at cutoff 1 then search over quantiles, else over both dimensions 157 | if (bestScore == 1) 158 | searchCalibrations(1, 0, 1, bestQuantileIndex - 0.5, 0.1, 11, bestScore, bestCutoff, bestQuantileIndex, calibrator); 159 | else 160 | searchCalibrations(bestCutoff - 0.04, 0.01, 9, bestQuantileIndex - 0.5, 0.1, 11, bestScore, bestCutoff, bestQuantileIndex, calibrator); 161 | std::cout << "\n\nBest score: " << bestScore << " (cutoff = " << bestCutoff << ", quantileIndex = " << bestQuantileIndex << ")" << std::endl; 162 | } 163 | 164 | std::pair getQuantileAndCutoff(const char* networkName, bool search) 165 | { 166 | double cutoff = 1, quantileIndex = 6; 167 | if (search) 168 | searchCalibrations(cutoff, quantileIndex); 169 | else 170 | { 171 | for (int i = 0; i < gCalibrationTableSize; i++) 172 | { 173 | if (!strcmp(gCalibrationTable[i].networkName, networkName)) 174 | cutoff = gCalibrationTable[i].cutoff, quantileIndex = gCalibrationTable[i].quantileIndex; 175 | } 176 | std::cout << " using preset cutoff " << cutoff << " and quantile index " << quantileIndex << std::endl; 177 | } 178 | return std::make_pair(cutoff, quantileFromIndex(quantileIndex)); 179 | } 180 | 181 | 182 | 183 | #endif 184 | -------------------------------------------------------------------------------- /src/sampleINT8/int8_caffe.patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp 2 | index 66e6301..da615e5 100644 3 | --- a/src/caffe/layers/data_layer.cpp 4 | +++ b/src/caffe/layers/data_layer.cpp 5 | @@ -99,6 +99,26 @@ void DataLayer::load_batch(Batch* batch) { 6 | } 7 | timer.Stop(); 8 | batch_timer.Stop(); 9 | +#define LOG_BATCHES_FOR_INT8_TESTING 1 10 | +#if LOG_BATCHES_FOR_INT8_TESTING 11 | + static int sBatchId = 0; 12 | + char* batch_dump_dir = getenv("TENSORRT_INT8_BATCH_DIRECTORY"); 13 | + if(batch_dump_dir != 0) 14 | + { 15 | + char buffer[1000]; 16 | + sprintf(buffer, "batches/batch%d", sBatchId++); 17 | + FILE* file = fopen(buffer, "w"); 18 | + if(file==0) 19 | + abort(); 20 | + 21 | + int s[4] = { top_shape[0], top_shape[1], top_shape[2], top_shape[3] }; 22 | + fwrite(s, sizeof(int), 4, file); 23 | + fwrite(top_data, sizeof(float), top_shape[0]*top_shape[1]*top_shape[2]*top_shape[3], file); 24 | + fwrite(&top_label[0], sizeof(int), top_shape[0], file); 25 | + fclose(file); 26 | + } 27 | +#endif 28 | + 29 | DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; 30 | DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; 31 | DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; | fromdos 32 | -------------------------------------------------------------------------------- /src/sampleINT8/sampleINT8.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 头文件 20 | 21 | 22 | 头文件 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 源文件 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/sampleINT8/sampleINT8.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | mnist 10 | 11 | 12 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 13 | WindowsLocalDebugger 14 | mnist 15 | 16 | -------------------------------------------------------------------------------- /src/sampleLoadEngineStream/sampleLoadEngineStream.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NvInfer.h" 3 | #include "common.h" 4 | #include 5 | 6 | static Logger gLogger{nvinfer1::ILogger::Severity::kINFO}; 7 | static const int INPUT_H = 28; 8 | static const int INPUT_W = 28; 9 | static const int OUTPUT_SIZE = 10; 10 | 11 | 12 | #define RETURN_AND_LOG(ret, severity, message) \ 13 | do { \ 14 | std::string error_message = "sample_uff_mnist: " + std::string(message); \ 15 | gLogger.log(ILogger::Severity::k ## severity, error_message.c_str()); \ 16 | return (ret); \ 17 | } while(0) 18 | 19 | inline int64_t volume(const Dims& d) 20 | { 21 | int64_t v = 1; 22 | for (int64_t i = 0; i < d.nbDims; i++) 23 | v *= d.d[i]; 24 | return v; 25 | } 26 | 27 | 28 | inline unsigned int elementSize(DataType t) 29 | { 30 | switch (t) 31 | { 32 | case DataType::kINT32: 33 | // Fallthrough, same as kFLOAT 34 | case DataType::kFLOAT: return 4; 35 | case DataType::kHALF: return 2; 36 | case DataType::kINT8: return 1; 37 | } 38 | assert(0); 39 | return 0; 40 | } 41 | 42 | 43 | std::string locateFile(const std::string& input) 44 | { 45 | std::vector dirs{ "data/mnist/", "data/samples/mnist/" }; 46 | return locateFile(input, dirs); 47 | } 48 | 49 | 50 | // simple PGM (portable greyscale map) reader 51 | void readPGMFile(const std::string& filename, uint8_t buffer[INPUT_H*INPUT_W]) 52 | { 53 | readPGMFile(locateFile(filename), buffer, INPUT_H, INPUT_W); 54 | } 55 | 56 | 57 | void* safeCudaMalloc(size_t memSize) 58 | { 59 | void* deviceMem; 60 | CHECK(cudaMalloc(&deviceMem, memSize)); 61 | if (deviceMem == nullptr) 62 | { 63 | std::cerr << "Out of memory" << std::endl; 64 | exit(1); 65 | } 66 | return deviceMem; 67 | } 68 | 69 | 70 | void* createMnistCudaBuffer(int64_t eltCount, DataType dtype, int run) 71 | { 72 | /* in that specific case, eltCount == INPUT_H * INPUT_W */ 73 | assert(eltCount == INPUT_H * INPUT_W); 74 | assert(elementSize(dtype) == sizeof(float)); 75 | 76 | size_t memSize = eltCount * elementSize(dtype); 77 | float* inputs = new float[eltCount]; 78 | 79 | /* read PGM file */ 80 | uint8_t fileData[INPUT_H * INPUT_W]; 81 | readPGMFile(std::to_string(run) + ".pgm", fileData); 82 | 83 | /* display the number in an ascii representation */ 84 | std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl; 85 | for (int i = 0; i < eltCount; i++) 86 | std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n"); 87 | 88 | /* initialize the inputs buffer */ 89 | for (int i = 0; i < eltCount; i++) 90 | inputs[i] = 1.0f - float(fileData[i]) / 255.0f; 91 | 92 | void* deviceMem = safeCudaMalloc(memSize); 93 | CHECK(cudaMemcpy(deviceMem, inputs, memSize, cudaMemcpyHostToDevice)); 94 | 95 | delete[] inputs; 96 | return deviceMem; 97 | } 98 | 99 | 100 | 101 | 102 | std::vector> 103 | calculateBindingBufferSizes(const ICudaEngine& engine, int nbBindings, int batchSize) 104 | { 105 | std::vector> sizes; 106 | for (int i = 0; i < nbBindings; ++i) 107 | { 108 | Dims dims = engine.getBindingDimensions(i); 109 | DataType dtype = engine.getBindingDataType(i); 110 | 111 | int64_t eltCount = volume(dims) * batchSize; 112 | sizes.push_back(std::make_pair(eltCount, dtype)); 113 | } 114 | 115 | return sizes; 116 | } 117 | 118 | 119 | void printOutput(int64_t eltCount, DataType dtype, void* buffer) 120 | { 121 | std::cout << eltCount << " eltCount" << std::endl; 122 | assert(elementSize(dtype) == sizeof(float)); 123 | std::cout << "--- OUTPUT ---" << std::endl; 124 | 125 | size_t memSize = eltCount * elementSize(dtype); 126 | float* outputs = new float[eltCount]; 127 | CHECK(cudaMemcpy(outputs, buffer, memSize, cudaMemcpyDeviceToHost)); 128 | 129 | int maxIdx = 0; 130 | for (int i = 0; i < eltCount; ++i) 131 | if (outputs[i] > outputs[maxIdx]) 132 | maxIdx = i; 133 | 134 | std::ios::fmtflags prevSettings = std::cout.flags(); 135 | std::cout.setf(std::ios::fixed, std::ios::floatfield); 136 | std::cout.precision(6); 137 | for (int64_t eltIdx = 0; eltIdx < eltCount; ++eltIdx) 138 | { 139 | std::cout << eltIdx << " => " << setw(10) << outputs[eltIdx] << "\t : "; 140 | if (eltIdx == maxIdx) 141 | std::cout << "***"; 142 | std::cout << "\n"; 143 | } 144 | std::cout.flags(prevSettings); 145 | 146 | std::cout << std::endl; 147 | delete[] outputs; 148 | } 149 | 150 | 151 | void execute(ICudaEngine& engine) 152 | { 153 | IExecutionContext* context = engine.createExecutionContext(); 154 | 155 | int batchSize = 1; 156 | 157 | int nbBindings = engine.getNbBindings(); 158 | assert(nbBindings == 2); 159 | 160 | std::vector buffers(nbBindings); 161 | auto buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize); 162 | 163 | int bindingIdxInput = 0; 164 | for (int i = 0; i < nbBindings; ++i) 165 | { 166 | if (engine.bindingIsInput(i)) 167 | bindingIdxInput = i; 168 | else 169 | { 170 | auto bufferSizesOutput = buffersSizes[i]; 171 | buffers[i] = safeCudaMalloc(bufferSizesOutput.first * 172 | elementSize(bufferSizesOutput.second)); 173 | } 174 | } 175 | 176 | auto bufferSizesInput = buffersSizes[bindingIdxInput]; 177 | 178 | int iterations = 1; 179 | int numberRun = 10; 180 | for (int i = 0; i < iterations; i++) 181 | { 182 | float total = 0, ms; 183 | for (int run = 0; run < numberRun; run++) 184 | { 185 | buffers[bindingIdxInput] = createMnistCudaBuffer(bufferSizesInput.first, 186 | bufferSizesInput.second, run); 187 | 188 | auto t_start = std::chrono::high_resolution_clock::now(); 189 | context->execute(batchSize, &buffers[0]); 190 | auto t_end = std::chrono::high_resolution_clock::now(); 191 | ms = std::chrono::duration(t_end - t_start).count(); 192 | total += ms; 193 | 194 | for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx) 195 | { 196 | if (engine.bindingIsInput(bindingIdx)) 197 | continue; 198 | 199 | auto bufferSizesOutput = buffersSizes[bindingIdx]; 200 | printOutput(bufferSizesOutput.first, bufferSizesOutput.second, 201 | buffers[bindingIdx]); 202 | } 203 | CHECK(cudaFree(buffers[bindingIdxInput])); 204 | } 205 | 206 | total /= numberRun; 207 | std::cout << "Average over " << numberRun << " runs is " << total << " ms." << std::endl; 208 | } 209 | 210 | for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx) 211 | if (!engine.bindingIsInput(bindingIdx)) 212 | CHECK(cudaFree(buffers[bindingIdx])); 213 | context->destroy(); 214 | } 215 | 216 | 217 | 218 | int main(int argc, char** argv) 219 | { 220 | nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); 221 | nvinfer1::IPluginFactory* factory{ nullptr }; 222 | std::ifstream is("../../data/mnist/engineStream.bin", std::ios::binary); 223 | is.seekg(0, is.end); 224 | int length = is.tellg(); 225 | is.seekg(0, is.beg); 226 | 227 | try { 228 | char *buffer = new char[length]; 229 | is.read(buffer, length); 230 | is.close(); 231 | 232 | nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(buffer, length, factory); 233 | if (!engine) 234 | RETURN_AND_LOG(-1, ERROR, "load the engine failed"); 235 | execute(*engine); 236 | 237 | delete[] buffer; 238 | runtime->destroy(); 239 | engine->destroy(); 240 | 241 | system("pause"); 242 | return 0; 243 | } 244 | catch (...) { 245 | is.close(); 246 | } 247 | return 0; 248 | 249 | } -------------------------------------------------------------------------------- /src/sampleLoadEngineStream/sampleLoadEngineStream.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/sampleLoadEngineStream/sampleLoadEngineStream.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleMLP/README.txt: -------------------------------------------------------------------------------- 1 | This is a simple Multi-Layer Perceptron (MLP) example showing how to generate a MLP that TensorRT can accelerate. 2 | This sample requires Tensorflow > 1.4 to be installed. 3 | This MLP was trained via the following method: 4 | git clone https://github.com/aymericdamien/TensorFlow-Examples.git 5 | cd TensorFlow-Examples 6 | 7 | Apply the patch file, `update_mlp.patch` to save the final result with the command `patch -p1 < /samples/sampleMLP/update_mlp.patch` 8 | Train the MINST MLP with the command `python examples/3_NeuralNetworks/multilayer_perceptron.py` 9 | Convert the trained model weights to a format sampleMLP understands via the command `python /samples/sampleMLP/convert_weights.py -m /tmp/sampleMLP.ckpt -o sampleMLP` 10 | 11 | mkdir -p /data/mlp 12 | cp sampleMLP.wts2 /data/mlp/ 13 | 14 | To build the sample: 15 | cd /samples 16 | make 17 | 18 | To run the sample: 19 | cd /bin 20 | ./sample_mlp 21 | -------------------------------------------------------------------------------- /src/sampleMLP/convert_weights.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Script to convert from TensorFlow weights to TensorRT weights for multilayer perceptron sample. 4 | # Change the remap to properly remap the weights to the name from your trained model 5 | # to the sample expected format. 6 | 7 | import sys 8 | import struct 9 | import argparse 10 | 11 | try: 12 | from tensorflow.python import pywrap_tensorflow as pyTF 13 | except ImportError as err: 14 | sys.stderr.write("""Error: Failed to import module ({})""".format(err)) 15 | sys.exit() 16 | 17 | parser = argparse.ArgumentParser(description='TensorFlow to TensorRT Weight Dumper') 18 | 19 | parser.add_argument('-m', '--model', required=True, help='The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908') 20 | parser.add_argument('-o', '--output', required=True, help='The weight file to dump all the weights to.') 21 | 22 | opt = parser.parse_args() 23 | 24 | print "Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:" 25 | print "Line 0: " 26 | print "Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] " 27 | 28 | inputbase = opt.model 29 | outputbase = opt.output 30 | 31 | # This dictionary translates from the TF weight names to the weight names expected 32 | # by the sampleMLP sample. This is the location that needs to be changed if training 33 | # something other than what is specified in README.txt. 34 | remap = { 35 | 'Variable': 'hiddenWeights0', 36 | 'Variable_1': 'hiddenWeights1', 37 | 'Variable_2': 'outputWeights', 38 | 'Variable_3': 'hiddenBias0', 39 | 'Variable_4': 'hiddenBias1', 40 | 'Variable_5': 'outputBias' 41 | } 42 | 43 | def float_to_hex(f): 44 | return hex(struct.unpack(' 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 源文件 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/sampleMLP/sampleMLP.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleMLP/update_mlp.patch: -------------------------------------------------------------------------------- 1 | diff --git a/examples/3_NeuralNetworks/multilayer_perceptron.py b/examples/3_NeuralNetworks/multilayer_perceptron.py 2 | index cf04b01..44e3986 100644 3 | --- a/examples/3_NeuralNetworks/multilayer_perceptron.py 4 | +++ b/examples/3_NeuralNetworks/multilayer_perceptron.py 5 | @@ -58,11 +58,11 @@ biases = { 6 | # Create model 7 | def multilayer_perceptron(x): 8 | # Hidden fully connected layer with 256 neurons 9 | - layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) 10 | + layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['h1']), biases['b1'])) 11 | # Hidden fully connected layer with 256 neurons 12 | - layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) 13 | + layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])) 14 | # Output fully connected layer with a neuron for each class 15 | - out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] 16 | + out_layer = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['out']), biases['out'])) 17 | return out_layer 18 | 19 | # Construct model 20 | @@ -76,6 +76,9 @@ train_op = optimizer.minimize(loss_op) 21 | # Initializing the variables 22 | init = tf.global_variables_initializer() 23 | 24 | +# 'Saver' op to save and restore all the variables 25 | +saver = tf.train.Saver() 26 | + 27 | with tf.Session() as sess: 28 | sess.run(init) 29 | 30 | @@ -102,3 +105,5 @@ with tf.Session() as sess: 31 | # Calculate accuracy 32 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) 33 | print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels})) 34 | + # Save model weights to disk 35 | + save_path = saver.save(sess, "/tmp/sampleMLP.ckpt") 36 | -------------------------------------------------------------------------------- /src/sampleMNIST/README.md: -------------------------------------------------------------------------------- 1 | # NVIDIA TensorRT Sample "sampleMNIST" 2 | 3 | The sampleMNIST sample demonstrates how to: 4 | - Perform the basic setup and initialization of TensorRT 5 | - Import a trained Caffe MNIST digit classifier 6 | - Perform simple input preprocessing before running the engine, like mean 7 | normalization in this case 8 | - Verify the output from the inference 9 | 10 | ## Usage 11 | 12 | This sample can be run as: 13 | 14 | ./sample_mnist [-h] [--datadir=/path/to/data/dir/] [--useDLA=N] 15 | 16 | SampleMNIST reads two Caffe files to build the network: 17 | 18 | * `mnist.prototxt` - The prototxt file that contains the network design 19 | * `mnist.caffemodel` - The model file which contains the trained weights 20 | for the network 21 | * `mnist_mean.binaryproto` - The binaryproto file which contains the means 22 | 23 | By default, the sample expects these files to be in `data/samples/mnist/` or 24 | `data/mnist/`. The list of default directories can be changed by adding one or 25 | more paths with `--datadir=/new/path/` as a command line argument. 26 | -------------------------------------------------------------------------------- /src/sampleMNIST/sampleMNIST.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | 源文件 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/sampleMNIST/sampleMNIST.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleMNISTAPI/sampleMNISTAPI.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 15.0 23 | {015397B8-A9BC-42ED-B3A1-2151714DFD7C} 24 | sampleMNISTAPI 25 | 10.0.17134.0 26 | 27 | 28 | 29 | Application 30 | true 31 | v141 32 | MultiByte 33 | 34 | 35 | Application 36 | false 37 | v141 38 | true 39 | MultiByte 40 | 41 | 42 | Application 43 | true 44 | v141 45 | MultiByte 46 | 47 | 48 | Application 49 | false 50 | v141 51 | true 52 | MultiByte 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | $(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\bin\ 74 | $(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\tmp\ 75 | $(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib;$(LibraryPath) 76 | 77 | 78 | $(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\bin\ 79 | $(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\tmp\ 80 | $(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib;$(LibraryPath) 81 | 82 | 83 | 84 | Level3 85 | Disabled 86 | true 87 | true 88 | $(CUDA_PATH_V10_0)\include;$(SolutionDir)3rdparty\TensorRT-5.0.1.3\include;$(SolutionDir)common;$(SolutionDir)common\windows 89 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 90 | 91 | 92 | $(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib\*.lib;$(CUDA_PATH_V10_0)\lib\x64\*.lib;%(AdditionalDependencies) 93 | 94 | 95 | 96 | 97 | Level3 98 | Disabled 99 | true 100 | true 101 | $(CUDA_PATH_V10_0)\include;$(SolutionDir)3rdparty\TensorRT-5.0.1.3\include;$(SolutionDir)common;$(SolutionDir)common\windows 102 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 103 | 104 | 105 | $(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib\*.lib;$(CUDA_PATH_V10_0)\lib\x64\*.lib;%(AdditionalDependencies) 106 | 107 | 108 | 109 | 110 | Level3 111 | MaxSpeed 112 | true 113 | true 114 | true 115 | true 116 | 117 | 118 | true 119 | true 120 | 121 | 122 | 123 | 124 | Level3 125 | MaxSpeed 126 | true 127 | true 128 | true 129 | true 130 | 131 | 132 | true 133 | true 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /src/sampleMNISTAPI/sampleMNISTAPI.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/sampleMNISTAPI/sampleMNISTAPI.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleMovieLens/README.txt: -------------------------------------------------------------------------------- 1 | This is a multilayer perceptron(MLP) based Neural Collaborative Filter Recommender example showing how to generate weights for MovieLens dataset for TensorRT that TensorRT can accelerate. 2 | This sample requires Tensorflow <= 1.7.0 to be installed. 3 | This MLP base NCF was trained via the following method: 4 | 5 | Building the sample: 6 | To build the sample: 7 | cd /samples 8 | make -j12 9 | 10 | To run the sample: 11 | 12 | 1. Running Inference: 13 | 14 | cd /bin 15 | ./sample_movielens (default batch=32 i.e. num of users) 16 | ./sample_movielens -b (batch=N i.e. num of users) 17 | ./sample_movielens --verbose (prints inputs, groundtruth values, expected vs predicted probabilities) 18 | 19 | 2. Help/Usage 20 | ./sample_movielens -h 21 | Usage: 22 | ./sample_movielens[-h] 23 | -h Display help information. All single dash optoins enable perf mode. 24 | -b Number of Users i.e. BatchSize (default BatchSize=32). 25 | --useDLA Specify a DLA engine for layers that support DLA. Value can range from 1 to N, where N is the number of DLA engines on the platform. 26 | --verbose Enable verbose perf mode. 27 | 28 | Training model from scratch: 29 | Step 1: 30 | git clone https://github.com/hexiangnan/neural_collaborative_filtering.git 31 | cd neural_collaborative_filtering 32 | git checkout 0cd2681598507f1cc26d110083327069963f4433 33 | 34 | Step 2: 35 | Apply the patch file, `sampleMovieLensTraining.patch` to save dump the frozen protobuf file with command `patch -p1 < /samples/sampleMovieLens/sampleMovieLensTraining.patch` 36 | Train the MLP based NCF with the command `python MLP.py --dataset ml-1m --epochs 20 --batch_size 256 --layers [64,32,16,8] --reg_layers [0.01,0.01,0.01,0.01] --num_neg 4 --lr 0.001 --learner adam --verbose 1 --out 1' 37 | WARNING: Using 0s for reg_layers will cause undefined behavior when training the network. 38 | This step will dump two files: 39 | 1. movielens_ratings.txt 40 | 2. sampleMovieLens.pb 41 | 42 | Step 3: Convert the Frozen .pb file to .uff format using 43 | Command: `python3 convert_to_uff.py sampleMovieLens.pb -p preprocess.py` 44 | preprocess.py is a preprocessing step that needs to be applied to the TensorFlow graph before it can be used by TensorRT. 45 | The reason for this is that TensorFlow's concatenation operation accounts for the batch dimension while TensorRT's concatenation operation does not. 46 | 47 | Note: convert_to_uff.py utility will get installed here: /usr/local/bin/convert-to-uff. 48 | This utility gets installed with UFF .whl file installation shipped with TensorRT. 49 | For installation instructions, see: 50 | https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/#python and click on the 'TensoRT Python API' link 51 | 52 | Step 4: 53 | Copy sampleMovieLens.uff file to /data/movielens 54 | Copy movielens_ratings.txt file to /data/movielens 55 | 56 | Step 5: 57 | Follow instruction above to build and run the sample 58 | -------------------------------------------------------------------------------- /src/sampleMovieLens/preprocess.py: -------------------------------------------------------------------------------- 1 | import graphsurgeon as gs 2 | import tensorflow as tf 3 | 4 | def preprocess(dynamic_graph): 5 | axis = dynamic_graph.find_nodes_by_path("concatenate/concat/axis")[0] 6 | # Set axis to 2, because of discrepancies between TensorFlow and TensorRT. 7 | axis.attr["value"].tensor.int_val[0] = 2 8 | -------------------------------------------------------------------------------- /src/sampleMovieLens/sampleMovieLens.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 源文件 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/sampleMovieLens/sampleMovieLens.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleNMT/README.txt: -------------------------------------------------------------------------------- 1 | The NMT sample is using data fetched and trained using the NMT tutorial ( https://github.com/tensorflow/nmt ). 2 | 3 | 4 | ** Preparing the data ** 5 | 6 | The trained weights, directly usable by the sample, can be fetched from here 7 | https://developer.download.nvidia.com/compute/machine-learning/tensorrt/models/sampleNMT_weights.tar.gz 8 | 'deen/weights' directory should contain all the weight data needed. 9 | 10 | We do not distribute the text and vocabulary data. For the De-En model 11 | ( https://github.com/tensorflow/nmt#wmt-german-english ), 12 | the data needs to be fetched and generated using the following script 13 | https://github.com/tensorflow/nmt/blob/master/nmt/scripts/wmt16_en_de.sh . 14 | It might take some time, since it prepares 4.5M samples dataset for training as well. 15 | * Execute wmt16_en_de.sh and it will create 'wmt16_de_en' directory in the current directory 16 | * 'cd wmt16_de_en' 17 | * 'cp newstest2015.tok.bpe.32000.de newstest2015.tok.bpe.32000.en vocab.bpe.32000.de vocab.bpe.32000.en /deen/.' 18 | 19 | 20 | ** Running the sample ** 21 | 22 | * List all options supported: sample_nmt --help 23 | * Run the sample to generate 'translation_output.txt' : sample_nmt --data_dir=/deen --data_writer=text 24 | * Get the BLEU score for the first 100 sentences : sample_nmt --data_dir=/deen --max_inference_samples=100 25 | 26 | 27 | ** Training De-En model using Tensorflow NMT framework and importing the weight data into the sample. ** 28 | 29 | This section is only relevant if one decides to train the model. 30 | 31 | * The training data set needs to be fetched and preprocessed as was discussed earlier. 32 | * Fetch NMT framework : 'git clone https://github.com/tensorflow/nmt.git' 33 | * Take a look at 'nmt/nmt/standard_hparams/wmt16.json' 34 | The sample currently only implements unidirectional LSTMs and Luong's attention. So, training should account for this. 35 | edit relevant JSON config to have {"attention": "luong", "encoder_type": "uni", ...} 36 | Below is the config we used for training: 37 | { 38 | "attention": "luong", 39 | "attention_architecture": "standard", 40 | "batch_size": 128, 41 | "colocate_gradients_with_ops": true, 42 | "dropout": 0.2, 43 | "encoder_type": "uni", 44 | "eos": "", 45 | "forget_bias": 1.0, 46 | "infer_batch_size": 32, 47 | "init_weight": 0.1, 48 | "learning_rate": 1.0, 49 | "max_gradient_norm": 5.0, 50 | "metrics": ["bleu"], 51 | "num_buckets": 5, 52 | "num_layers": 2, 53 | "num_train_steps": 340000, 54 | "decay_scheme": "luong10", 55 | "num_units": 1024, 56 | "optimizer": "sgd", 57 | "residual": false, 58 | "share_vocab": false, 59 | "subword_option": "bpe", 60 | "sos": "", 61 | "src_max_len": 50, 62 | "src_max_len_infer": null, 63 | "steps_per_external_eval": null, 64 | "steps_per_stats": 100, 65 | "tgt_max_len": 50, 66 | "tgt_max_len_infer": null, 67 | "time_major": true, 68 | "unit_type": "lstm", 69 | "beam_width": 10 70 | } 71 | 72 | The following line can be used for training, provided the training dataset is /tmp/wmt16_de_en: 73 | 74 | python -m nmt.nmt \ 75 | --src=de --tgt=en \ 76 | --hparams_path=/wmt16.json \ 77 | --out_dir=/tmp/deen_nmt \ 78 | --vocab_prefix=/tmp/wmt16_de_en/vocab.bpe.32000 \ 79 | --train_prefix=/tmp/wmt16_de_en/train.tok.clean.bpe.32000 \ 80 | --dev_prefix=/tmp/wmt16_de_en/newstest2013.tok.bpe.32000 \ 81 | --test_prefix=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000 82 | 83 | The following line can be used for the inference in Tensorflow: 84 | python -m nmt.nmt \ 85 | --src=de --tgt=en \ 86 | --ckpt=/tmp/deen_nmt/translate.ckpt-340000 \ 87 | --hparams_path=/wmt16.json \ 88 | --out_dir=/tmp/deen \ 89 | --vocab_prefix=/tmp/wmt16_de_en/vocab.bpe.32000 \ 90 | --inference_input_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.de \ 91 | --inference_output_file=/tmp/deen/output_infer \ 92 | --inference_ref_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.en 93 | 94 | * Importing Tensorflow checkpoint into the sample * 95 | 96 | We provide a tool to convert Tensorflow checkpoint from the NMT framework into binary weight data, readable by the sample. It was tested using Tensorflow 1.6. The tool by default imports the NMT framework. 97 | 98 | * git clone https://github.com/tensorflow/nmt.git 99 | * python ./chptToBin.py \ 100 | --src=de --tgt=en \ 101 | --ckpt=/tmp/deen_nmt/translate.ckpt-340000 \ 102 | --hparams_path=/wmt16.json \ 103 | --out_dir=/tmp/deen \ 104 | --vocab_prefix=/tmp/wmt16_de_en/vocab.bpe.32000 \ 105 | --inference_input_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.de \ 106 | --inference_output_file=/tmp/deen/output_infer \ 107 | --inference_ref_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.en -------------------------------------------------------------------------------- /src/sampleNMT/component.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_COMPONENT_ 2 | #define SAMPLE_NMT_COMPONENT_ 3 | 4 | #include 5 | #include 6 | 7 | namespace nmtSample 8 | { 9 | /** \class Component 10 | * 11 | * \brief a functional part of the sample 12 | * 13 | */ 14 | class Component 15 | { 16 | public: 17 | typedef std::shared_ptr ptr; 18 | 19 | /** 20 | * \brief get the textual description of the component 21 | */ 22 | virtual std::string getInfo() = 0; 23 | 24 | protected: 25 | Component() = default; 26 | 27 | virtual ~Component() = default; 28 | }; 29 | } 30 | 31 | #endif // SAMPLE_NMT_COMPONENT_ 32 | -------------------------------------------------------------------------------- /src/sampleNMT/cudaError.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_CUDA_ERROR_ 2 | #define SAMPLE_NMT_CUDA_ERROR_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define CUDA_CHECK(callstr) \ 9 | { \ 10 | cudaError_t error_code = callstr; \ 11 | if (error_code != cudaSuccess) { \ 12 | std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \ 13 | assert(0); \ 14 | } \ 15 | } 16 | 17 | #endif // SAMPLE_NMT_CUDA_ERROR_ 18 | -------------------------------------------------------------------------------- /src/sampleNMT/data/benchmarkWriter.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmarkWriter.h" 2 | 3 | #include 4 | 5 | namespace nmtSample 6 | { 7 | BenchmarkWriter::BenchmarkWriter() 8 | : mSampleCount(0) 9 | , mInputTokenCount(0) 10 | , mOutputTokenCount(0) 11 | , mStartTS(std::chrono::high_resolution_clock::now()) 12 | { 13 | } 14 | 15 | void BenchmarkWriter::write( 16 | const int* hOutputData, 17 | int actualOutputSequenceLength, 18 | int actualInputSequenceLength) 19 | { 20 | ++mSampleCount; 21 | mInputTokenCount += actualInputSequenceLength; 22 | mOutputTokenCount += actualOutputSequenceLength; 23 | } 24 | 25 | void BenchmarkWriter::initialize() 26 | { 27 | mStartTS = std::chrono::high_resolution_clock::now(); 28 | } 29 | 30 | void BenchmarkWriter::finalize() 31 | { 32 | std::chrono::duration sec = std::chrono::high_resolution_clock::now() - mStartTS; 33 | int totalTokenCount = mInputTokenCount + mOutputTokenCount; 34 | std::cout << mSampleCount << " sequences generated in " << sec.count() << " seconds, " << (mSampleCount / sec.count()) << " samples/sec" << std::endl; 35 | std::cout << totalTokenCount << " tokens processed (source and destination), " << (totalTokenCount / sec.count()) << " tokens/sec" << std::endl; 36 | } 37 | 38 | std::string BenchmarkWriter::getInfo() 39 | { 40 | return "Benchmark Writer"; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/sampleNMT/data/benchmarkWriter.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_BENCHMARK_WRITER_ 2 | #define SAMPLE_NMT_BENCHMARK_WRITER_ 3 | 4 | #include 5 | #include 6 | 7 | #include "dataWriter.h" 8 | 9 | namespace nmtSample 10 | { 11 | /** \class BenchmarkWriter 12 | * 13 | * \brief all it does is to measure the performance of sequence generation 14 | * 15 | */ 16 | class BenchmarkWriter : public DataWriter 17 | { 18 | public: 19 | BenchmarkWriter(); 20 | 21 | void write( 22 | const int* hOutputData, 23 | int actualOutputSequenceLength, 24 | int actualInputSequenceLength) override; 25 | 26 | void initialize() override; 27 | 28 | void finalize() override; 29 | 30 | std::string getInfo() override; 31 | 32 | ~BenchmarkWriter() override = default; 33 | 34 | private: 35 | int mSampleCount; 36 | int mInputTokenCount; 37 | int mOutputTokenCount; 38 | std::chrono::high_resolution_clock::time_point mStartTS; 39 | }; 40 | } 41 | 42 | #endif // SAMPLE_NMT_BENCHMARK_WRITER_ 43 | -------------------------------------------------------------------------------- /src/sampleNMT/data/bleuScoreWriter.cpp: -------------------------------------------------------------------------------- 1 | #include "bleuScoreWriter.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace nmtSample 13 | { 14 | 15 | typedef std::vector Segment_t; 16 | typedef std::map Count_t; 17 | int read(std::vector& samples, std::shared_ptr input, int samplesToRead = 1) 18 | { 19 | std::string line; 20 | int lineCounter = 0; 21 | Segment_t tokens; 22 | samples.resize(0); 23 | std::string pattern("@@ "); 24 | while (lineCounter < samplesToRead && std::getline(*input, line)) 25 | { 26 | // if clean and handle BPE or SPM outputs is required 27 | std::size_t p0 = 0; 28 | while((p0 = line.find(pattern, p0)) != std::string::npos) 29 | { 30 | line.replace(p0, pattern.length(), ""); 31 | } 32 | 33 | // generate error if those special characters exist. Windows needs explicit encoding. 34 | #ifdef _MSC_VER 35 | p0 = line.find(u8"\u2581"); 36 | #else 37 | p0 = line.find("\u2581"); 38 | #endif 39 | assert((p0 == std::string::npos)); 40 | std::istringstream ss(line); 41 | std::string token; 42 | tokens.resize(0); 43 | while (ss >> token) 44 | { 45 | tokens.emplace_back(token); 46 | } 47 | samples.emplace_back(tokens); 48 | lineCounter++; 49 | } 50 | return lineCounter; 51 | } 52 | 53 | Count_t ngramCounts(const Segment_t& segment, int maxOrder = 4) 54 | { 55 | Count_t ngramCounts; 56 | 57 | for (int order = 1; order < maxOrder + 1; order++) 58 | { 59 | for (int i = 0; i < static_cast(segment.size()) - order + 1; i++) 60 | { 61 | Segment_t ngram; 62 | for (int j = i; j < i + order; j++) 63 | ngram.emplace_back(segment[j]); 64 | 65 | auto it = ngramCounts.find(ngram); 66 | if (it != ngramCounts.end()) 67 | { 68 | it->second++; 69 | } 70 | else 71 | ngramCounts[ngram] = 1; 72 | } 73 | } 74 | 75 | return ngramCounts; 76 | } 77 | 78 | Count_t ngramCountIntersection(const Count_t& cnt0, const Count_t& cnt1) 79 | { 80 | Count_t overlap; 81 | // merge the maps 82 | auto it0 = cnt0.begin(), it1 = cnt1.begin(), end0 = cnt0.end(), end1 = cnt1.end(); 83 | while (it0 != end0 && it1 != end1) 84 | { 85 | if (it0->first == it1->first) 86 | { 87 | overlap.emplace(it0->first, std::min(it0->second, it1->second)); 88 | it0++; 89 | it1++; 90 | } 91 | else 92 | { 93 | if (it0->first < it1->first) 94 | it0++; 95 | else 96 | it1++; 97 | } 98 | } 99 | return overlap; 100 | } 101 | 102 | void accumulateBLEU(const std::vector& referenceSamples, 103 | const std::vector& outputSamples, 104 | int maxOrder, 105 | size_t& referenceLength, 106 | size_t& translationLength, 107 | std::vector& matchesByOrder, 108 | std::vector& possibleMatchesByOrder) 109 | { 110 | assert(referenceSamples.size() == outputSamples.size()); 111 | auto reference = referenceSamples.begin(); 112 | auto translation = outputSamples.begin(); 113 | 114 | while (translation != outputSamples.end()) 115 | { 116 | referenceLength += reference->size(); 117 | translationLength += translation->size(); 118 | 119 | Count_t refNgramCounts = ngramCounts(*reference); 120 | Count_t outputNgramCounts = ngramCounts(*translation); 121 | Count_t overlap = ngramCountIntersection(outputNgramCounts, refNgramCounts); 122 | for (auto& ngram : overlap) 123 | { 124 | matchesByOrder[ngram.first.size() - 1] += ngram.second; 125 | } 126 | for (int order = 1; order < maxOrder + 1; order++) 127 | { 128 | int possibleMatches = static_cast(translation->size()) - order + 1; 129 | if (possibleMatches > 0) 130 | possibleMatchesByOrder[order - 1] += possibleMatches; 131 | } 132 | ++translation; 133 | ++reference; 134 | } 135 | } 136 | 137 | BLEUScoreWriter::BLEUScoreWriter(std::shared_ptr referenceTextInput, Vocabulary::ptr vocabulary, int maxOrder) 138 | : mReferenceInput(referenceTextInput) 139 | , mVocabulary(vocabulary) 140 | , mReferenceLength(0) 141 | , mTranslationLength(0) 142 | , mMaxOrder(maxOrder) 143 | , mSmooth(false) 144 | , mMatchesByOrder(maxOrder, 0) 145 | , mPossibleMatchesByOrder(maxOrder, 0) 146 | { 147 | } 148 | 149 | void BLEUScoreWriter::write( 150 | const int* hOutputData, 151 | int actualOutputSequenceLength, 152 | int actualInputSequenceLength) 153 | { 154 | std::vector outputSamples; 155 | std::vector referenceSamples; 156 | int numReferenceSamples = read(referenceSamples, mReferenceInput, 1); 157 | assert(numReferenceSamples == 1); 158 | 159 | Segment_t segment; 160 | std::stringstream filteredSentence(DataWriter::generateText(actualOutputSequenceLength, hOutputData, mVocabulary)); 161 | std::string token; 162 | while (filteredSentence >> token) 163 | { 164 | segment.emplace_back(token); 165 | } 166 | outputSamples.emplace_back(segment); 167 | 168 | accumulateBLEU(referenceSamples, outputSamples, mMaxOrder, mReferenceLength, mTranslationLength, mMatchesByOrder, mPossibleMatchesByOrder); 169 | } 170 | 171 | void BLEUScoreWriter::initialize() 172 | { 173 | } 174 | 175 | void BLEUScoreWriter::finalize() 176 | { 177 | std::cout << "BLEU score = " << getScore() << std::endl; 178 | } 179 | 180 | float BLEUScoreWriter::getScore() const 181 | { 182 | std::vector precisions(mMaxOrder, 0.0); 183 | for (int i = 0; i < mMaxOrder; i++) 184 | { 185 | if (mSmooth) 186 | { 187 | precisions[i] = ((mMatchesByOrder[i] + 1.) / (mPossibleMatchesByOrder[i] + 1.)); 188 | } 189 | else 190 | { 191 | if (mPossibleMatchesByOrder[i] > 0) 192 | precisions[i] = (static_cast(mMatchesByOrder[i]) / mPossibleMatchesByOrder[i]); 193 | else 194 | precisions[i] = 0.0; 195 | } 196 | } 197 | double pLogSum, geoMean; 198 | if (*std::min_element(precisions.begin(), precisions.end()) > 0.0) 199 | { 200 | pLogSum = 0.0; 201 | for (auto p : precisions) 202 | pLogSum += (1. / mMaxOrder) * log(p); 203 | geoMean = exp(pLogSum); 204 | } 205 | else 206 | geoMean = 0.0; 207 | 208 | double ratio = static_cast(mTranslationLength) / mReferenceLength; 209 | double bp; 210 | bp = (ratio > 1.0) ? 1.0 : exp(1.0 - 1.0 / ratio); 211 | return static_cast(geoMean * bp * 100.0); 212 | } 213 | 214 | std::string BLEUScoreWriter::getInfo() 215 | { 216 | std::stringstream ss; 217 | ss << "BLEU Score Writer, max order = " << mMaxOrder; 218 | return ss.str(); 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/sampleNMT/data/bleuScoreWriter.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_BLEU_SCORE_WRITER_ 2 | #define SAMPLE_NMT_BLEU_SCORE_WRITER_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "dataWriter.h" 9 | #include "vocabulary.h" 10 | 11 | namespace nmtSample 12 | { 13 | /** \class BLEUScoreWriter 14 | * 15 | * \brief all it does is to evaluate BLEU score 16 | * 17 | */ 18 | class BLEUScoreWriter : public DataWriter 19 | { 20 | public: 21 | BLEUScoreWriter(std::shared_ptr referenceTextInput, 22 | Vocabulary::ptr vocabulary, 23 | int maxOrder = 4); 24 | 25 | void write( 26 | const int* hOutputData, 27 | int actualOutputSequenceLength, 28 | int actualInputSequenceLength) override; 29 | 30 | void initialize() override; 31 | 32 | void finalize() override; 33 | 34 | std::string getInfo() override; 35 | 36 | float getScore() const; 37 | 38 | ~BLEUScoreWriter() override = default; 39 | 40 | private: 41 | std::shared_ptr mReferenceInput; 42 | Vocabulary::ptr mVocabulary; 43 | size_t mReferenceLength; 44 | size_t mTranslationLength; 45 | int mMaxOrder; 46 | bool mSmooth; 47 | std::vector mMatchesByOrder; 48 | std::vector mPossibleMatchesByOrder; 49 | }; 50 | } 51 | 52 | #endif // SAMPLE_NMT_BLEU_SCORE_WRITER_ 53 | -------------------------------------------------------------------------------- /src/sampleNMT/data/dataReader.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_DATA_READER_ 2 | #define SAMPLE_NMT_DATA_READER_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | 8 | namespace nmtSample 9 | { 10 | /** \class DataReader 11 | * 12 | * \brief reader of sequences of data 13 | * 14 | */ 15 | class DataReader : public Component 16 | { 17 | public: 18 | typedef std::shared_ptr ptr; 19 | 20 | DataReader() = default; 21 | 22 | /** 23 | * \brief reads the batch of smaples/sequences 24 | * 25 | * \return the actual number of samples read 26 | */ 27 | virtual int read( 28 | int samplesToRead, 29 | int maxInputSequenceLength, 30 | int* hInputData, 31 | int* hActualInputSequenceLengths) 32 | = 0; 33 | 34 | /** 35 | * \brief Reset the reader position, the data reader is ready to read the data from th ebeginning again after the function returns 36 | */ 37 | virtual void reset() = 0; 38 | 39 | ~DataReader() override = default; 40 | }; 41 | } 42 | 43 | #endif // SAMPLE_NMT_DATA_READER_ 44 | -------------------------------------------------------------------------------- /src/sampleNMT/data/dataWriter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "dataWriter.h" 4 | 5 | namespace nmtSample 6 | { 7 | std::string DataWriter::generateText(int sequenceLength, const int* currentOutputData, Vocabulary::ptr vocabulary) 8 | { 9 | // if clean and handle BPE outputs is required 10 | std::string delimiter = "@@"; 11 | size_t delimiterSize = delimiter.size(); 12 | std::stringstream sentence; 13 | std::string word(""); 14 | const char* wordDelimiter = ""; 15 | for (int i = 0; i < sequenceLength; ++i) 16 | { 17 | int id = currentOutputData[i]; 18 | if (id != vocabulary->getEndSequenceId()) 19 | { 20 | std::string token = vocabulary->getToken(id); 21 | if ((token.size() >= delimiterSize) && (token.compare(token.size() - delimiterSize, delimiterSize, delimiter) == 0)) 22 | { 23 | word = word + token.erase(token.size() - delimiterSize, delimiterSize); 24 | } 25 | else 26 | { 27 | word = word + token; 28 | sentence << wordDelimiter; 29 | sentence << word; 30 | word = ""; 31 | wordDelimiter = " "; 32 | } 33 | } 34 | } 35 | return sentence.str(); 36 | } 37 | } -------------------------------------------------------------------------------- /src/sampleNMT/data/dataWriter.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_DATA_WRITER_ 2 | #define SAMPLE_NMT_DATA_WRITER_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../component.h" 8 | #include "vocabulary.h" 9 | 10 | namespace nmtSample 11 | { 12 | /** \class DataWriter 13 | * 14 | * \brief writer of sequences of data 15 | * 16 | */ 17 | class DataWriter : public Component 18 | { 19 | public: 20 | typedef std::shared_ptr ptr; 21 | 22 | DataWriter() = default; 23 | 24 | /** 25 | * \brief write the generated sequence 26 | */ 27 | virtual void write( 28 | const int* hOutputData, 29 | int actualOutputSequenceLength, 30 | int actualInputSequenceLength) 31 | = 0; 32 | 33 | /** 34 | * \brief it is called right before inference starts 35 | */ 36 | virtual void initialize() = 0; 37 | 38 | /** 39 | * \brief it is called right after inference ends 40 | */ 41 | virtual void finalize() = 0; 42 | 43 | ~DataWriter() override = default; 44 | 45 | protected: 46 | static std::string generateText(int sequenceLength, const int* currentOutputData, Vocabulary::ptr vocabulary); 47 | }; 48 | } 49 | 50 | #endif // SAMPLE_NMT_DATA_WRITER_ 51 | -------------------------------------------------------------------------------- /src/sampleNMT/data/limitedSamplesDataReader.cpp: -------------------------------------------------------------------------------- 1 | #include "limitedSamplesDataReader.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | LimitedSamplesDataReader::LimitedSamplesDataReader(int maxSamplesToRead, DataReader::ptr originalDataReader) 9 | : gMaxSamplesToRead(maxSamplesToRead) 10 | , gOriginalDataReader(originalDataReader) 11 | , gCurrentPosition(0) 12 | { 13 | } 14 | 15 | int LimitedSamplesDataReader::read( 16 | int samplesToRead, 17 | int maxInputSequenceLength, 18 | int* hInputData, 19 | int* hActualInputSequenceLengths) 20 | { 21 | int limitedSmplesToRead = std::min(samplesToRead, std::max(gMaxSamplesToRead - gCurrentPosition, 0)); 22 | int samplesRead = gOriginalDataReader->read(limitedSmplesToRead, maxInputSequenceLength, hInputData, hActualInputSequenceLengths); 23 | gCurrentPosition += samplesRead; 24 | return samplesRead; 25 | } 26 | 27 | void LimitedSamplesDataReader::reset() 28 | { 29 | gOriginalDataReader->reset(); 30 | gCurrentPosition = 0; 31 | } 32 | 33 | std::string LimitedSamplesDataReader::getInfo() 34 | { 35 | std::stringstream ss; 36 | ss << "Limited Samples Reader, max samples = " << gMaxSamplesToRead << ", original reader info: " << gOriginalDataReader->getInfo(); 37 | return ss.str(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/sampleNMT/data/limitedSamplesDataReader.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_LIMITED_SAMPLES_DATA_READER_ 2 | #define SAMPLE_NMT_LIMITED_SAMPLES_DATA_READER_ 3 | 4 | #include "dataReader.h" 5 | 6 | namespace nmtSample 7 | { 8 | /** \class LimitedSamplesDataReader 9 | * 10 | * \brief wraps another data reader and limits the number of samples to read 11 | * 12 | */ 13 | class LimitedSamplesDataReader : public DataReader 14 | { 15 | public: 16 | LimitedSamplesDataReader(int maxSamplesToRead, DataReader::ptr originalDataReader); 17 | 18 | int read( 19 | int samplesToRead, 20 | int maxInputSequenceLength, 21 | int* hInputData, 22 | int* hActualInputSequenceLengths) override; 23 | 24 | void reset() override; 25 | 26 | std::string getInfo() override; 27 | 28 | private: 29 | int gMaxSamplesToRead; 30 | DataReader::ptr gOriginalDataReader; 31 | int gCurrentPosition; 32 | }; 33 | } 34 | 35 | #endif // SAMPLE_NMT_LIMITED_SAMPLES_DATA_READER_ 36 | -------------------------------------------------------------------------------- /src/sampleNMT/data/sequenceProperties.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_SEQUENCE_PROPERTIES_ 2 | #define SAMPLE_NMT_SEQUENCE_PROPERTIES_ 3 | 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | /** \class SequenceProperties 9 | * 10 | * \brief provides encoder/decoder relevant properties of sequences 11 | * 12 | */ 13 | class SequenceProperties 14 | { 15 | public: 16 | typedef std::shared_ptr ptr; 17 | 18 | SequenceProperties() = default; 19 | 20 | virtual int getStartSequenceId() = 0; 21 | 22 | virtual int getEndSequenceId() = 0; 23 | 24 | virtual ~SequenceProperties() = default; 25 | }; 26 | } 27 | 28 | #endif // SAMPLE_NMT_SEQUENCE_PROPERTIES_ 29 | -------------------------------------------------------------------------------- /src/sampleNMT/data/textReader.cpp: -------------------------------------------------------------------------------- 1 | #include "textReader.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nmtSample 9 | { 10 | TextReader::TextReader(std::shared_ptr textInput, Vocabulary::ptr vocabulary) 11 | : mInput(textInput) 12 | , mVocabulary(vocabulary) 13 | { 14 | } 15 | 16 | int TextReader::read( 17 | int samplesToRead, 18 | int maxInputSequenceLength, 19 | int* hInputData, 20 | int* hActualInputSequenceLengths) 21 | { 22 | std::setlocale(LC_ALL, "en_US.UTF-8"); 23 | std::string line; 24 | 25 | int lineCounter = 0; 26 | while (lineCounter < samplesToRead && std::getline(*mInput, line)) 27 | { 28 | std::istringstream ss(line); 29 | std::string token; 30 | int tokenCounter = 0; 31 | while ((ss >> token) && (tokenCounter < maxInputSequenceLength)) 32 | { 33 | hInputData[maxInputSequenceLength * lineCounter + tokenCounter] = mVocabulary->getId(token); 34 | tokenCounter++; 35 | } 36 | 37 | hActualInputSequenceLengths[lineCounter] = tokenCounter; 38 | 39 | // Fill unused values with valid vocabulary ID, it doesn't necessary have to be eos 40 | std::fill(hInputData + maxInputSequenceLength * lineCounter + tokenCounter, hInputData + maxInputSequenceLength * (lineCounter + 1), mVocabulary->getEndSequenceId()); 41 | 42 | lineCounter++; 43 | } 44 | return lineCounter; 45 | } 46 | 47 | void TextReader::reset() 48 | { 49 | mInput->seekg(0, mInput->beg); 50 | } 51 | 52 | std::string TextReader::getInfo() 53 | { 54 | std::stringstream ss; 55 | ss << "Text Reader, vocabulary size = " << mVocabulary->getSize(); 56 | return ss.str(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/sampleNMT/data/textReader.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_TEXT_READER_ 2 | #define SAMPLE_NMT_TEXT_READER_ 3 | 4 | #include "dataReader.h" 5 | #include "vocabulary.h" 6 | #include 7 | #include 8 | #include 9 | 10 | namespace nmtSample 11 | { 12 | /** \class TextReader 13 | * 14 | * \brief reads sequences of data from input stream 15 | * 16 | */ 17 | class TextReader : public DataReader 18 | { 19 | public: 20 | TextReader(std::shared_ptr textInput, Vocabulary::ptr vocabulary); 21 | 22 | int read( 23 | int samplesToRead, 24 | int maxInputSequenceLength, 25 | int* hInputData, 26 | int* hActualInputSequenceLengths) override; 27 | 28 | void reset() override; 29 | 30 | std::string getInfo() override; 31 | 32 | private: 33 | std::shared_ptr mInput; 34 | Vocabulary::ptr mVocabulary; 35 | }; 36 | } 37 | 38 | #endif // SAMPLE_NMT_TEXT_READER_ 39 | -------------------------------------------------------------------------------- /src/sampleNMT/data/textWriter.cpp: -------------------------------------------------------------------------------- 1 | #include "textWriter.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nmtSample 8 | { 9 | TextWriter::TextWriter(std::shared_ptr textOnput, Vocabulary::ptr vocabulary) 10 | : mOutput(textOnput) 11 | , mVocabulary(vocabulary) 12 | { 13 | } 14 | 15 | void TextWriter::write( 16 | const int* hOutputData, 17 | int actualOutputSequenceLength, 18 | int actualInputSequenceLength) 19 | { 20 | // if clean and handle BPE outputs is required 21 | *mOutput << DataWriter::generateText(actualOutputSequenceLength, hOutputData, mVocabulary) << "\n"; 22 | } 23 | 24 | void TextWriter::initialize() 25 | { 26 | } 27 | 28 | void TextWriter::finalize() 29 | { 30 | } 31 | 32 | std::string TextWriter::getInfo() 33 | { 34 | std::stringstream ss; 35 | ss << "Text Writer, vocabulary size = " << mVocabulary->getSize(); 36 | return ss.str(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/sampleNMT/data/textWriter.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_TEXT_WRITER_ 2 | #define SAMPLE_NMT_TEXT_WRITER_ 3 | 4 | #include 5 | #include 6 | 7 | #include "dataWriter.h" 8 | #include "vocabulary.h" 9 | 10 | namespace nmtSample 11 | { 12 | /** \class TextReader 13 | * 14 | * \brief writes sequences of data into output stream 15 | * 16 | */ 17 | class TextWriter : public DataWriter 18 | { 19 | public: 20 | TextWriter(std::shared_ptr textOnput, Vocabulary::ptr vocabulary); 21 | 22 | void write( 23 | const int* hOutputData, 24 | int actualOutputSequenceLength, 25 | int actualInputSequenceLength) override; 26 | 27 | void initialize() override; 28 | 29 | void finalize() override; 30 | 31 | std::string getInfo() override; 32 | 33 | ~TextWriter() override = default; 34 | 35 | private: 36 | std::shared_ptr mOutput; 37 | Vocabulary::ptr mVocabulary; 38 | }; 39 | } 40 | 41 | #endif // SAMPLE_NMT_TEXT_WRITER_ 42 | -------------------------------------------------------------------------------- /src/sampleNMT/data/vocabulary.cpp: -------------------------------------------------------------------------------- 1 | #include "vocabulary.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nmtSample 8 | { 9 | const std::string Vocabulary::mSosStr = ""; 10 | const std::string Vocabulary::mEosStr = ""; 11 | const std::string Vocabulary::mUnkStr = ""; 12 | 13 | Vocabulary::Vocabulary() 14 | : mNumTokens(0) 15 | { 16 | } 17 | 18 | void Vocabulary::add(const std::string& token) 19 | { 20 | assert(mTokenToId.find(token) == mTokenToId.end()); 21 | mTokenToId[token] = mNumTokens; 22 | mIdToToken.push_back(token); 23 | mNumTokens++; 24 | } 25 | 26 | int Vocabulary::getId(const std::string& token) const 27 | { 28 | auto it = mTokenToId.find(token); 29 | if (it != mTokenToId.end()) 30 | return it->second; 31 | return mUnkId; 32 | } 33 | 34 | std::string Vocabulary::getToken(int id) const 35 | { 36 | assert(id < mNumTokens); 37 | return mIdToToken[id]; 38 | } 39 | 40 | int Vocabulary::getSize() const 41 | { 42 | return mNumTokens; 43 | } 44 | 45 | std::istream& operator>>(std::istream& input, Vocabulary& value) 46 | { 47 | // stream should contain "", "" and "" tokens 48 | std::setlocale(LC_ALL, "en_US.UTF-8"); 49 | std::string line; 50 | std::string word; 51 | while (input >> word) 52 | { 53 | value.add(word); 54 | } 55 | 56 | { 57 | auto it = value.mTokenToId.find(Vocabulary::mSosStr); 58 | assert(it != value.mTokenToId.end()); 59 | value.mSosId = it->second; 60 | } 61 | 62 | { 63 | auto it = value.mTokenToId.find(Vocabulary::mEosStr); 64 | assert(it != value.mTokenToId.end()); 65 | value.mEosId = it->second; 66 | } 67 | 68 | { 69 | auto it = value.mTokenToId.find(Vocabulary::mUnkStr); 70 | assert(it != value.mTokenToId.end()); 71 | value.mUnkId = it->second; 72 | } 73 | 74 | return input; 75 | } 76 | 77 | int Vocabulary::getStartSequenceId() 78 | { 79 | return mSosId; 80 | } 81 | 82 | int Vocabulary::getEndSequenceId() 83 | { 84 | return mEosId; 85 | } 86 | } -------------------------------------------------------------------------------- /src/sampleNMT/data/vocabulary.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_VOCABULARY_ 2 | #define SAMPLE_NMT_VOCABULARY_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "sequenceProperties.h" 10 | 11 | namespace nmtSample 12 | { 13 | /** \class Vocabulary 14 | * 15 | * \brief String<->Id bijection storage 16 | * 17 | */ 18 | class Vocabulary : public SequenceProperties 19 | { 20 | public: 21 | typedef std::shared_ptr ptr; 22 | 23 | Vocabulary(); 24 | 25 | friend std::istream& operator>>(std::istream& input, Vocabulary& value); 26 | 27 | /** 28 | * \brief add new token to vocabulary, ID is auto-generated 29 | */ 30 | void add(const std::string& token); 31 | 32 | /** 33 | * \brief get the ID of the token 34 | */ 35 | int getId(const std::string& token) const; 36 | 37 | /** 38 | * \brief get token by ID 39 | */ 40 | std::string getToken(int id) const; 41 | 42 | /** 43 | * \brief get the number of elements in the vocabulary 44 | */ 45 | int getSize() const; 46 | 47 | int getStartSequenceId() override; 48 | 49 | int getEndSequenceId() override; 50 | 51 | private: 52 | static const std::string mSosStr; 53 | static const std::string mUnkStr; 54 | static const std::string mEosStr; 55 | 56 | std::map mTokenToId; 57 | std::vector mIdToToken; 58 | int mNumTokens; 59 | 60 | int mSosId; 61 | int mEosId; 62 | int mUnkId; 63 | }; 64 | } 65 | 66 | #endif // SAMPLE_NMT_VOCABULARY_ 67 | -------------------------------------------------------------------------------- /src/sampleNMT/deviceBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_DEVICE_BUFFER_ 2 | #define SAMPLE_NMT_DEVICE_BUFFER_ 3 | 4 | #include "cudaError.h" 5 | #include 6 | #include 7 | 8 | namespace nmtSample 9 | { 10 | template 11 | class DeviceBuffer 12 | { 13 | public: 14 | typedef std::shared_ptr> ptr; 15 | 16 | DeviceBuffer(size_t elementCount) 17 | : mBuffer(nullptr) 18 | { 19 | CUDA_CHECK(cudaMalloc(&mBuffer, elementCount * sizeof(T))); 20 | } 21 | 22 | virtual ~DeviceBuffer() 23 | { 24 | if (mBuffer) 25 | { 26 | cudaFree(mBuffer); 27 | } 28 | } 29 | 30 | operator T*() 31 | { 32 | return mBuffer; 33 | } 34 | 35 | operator const T*() const 36 | { 37 | return mBuffer; 38 | } 39 | 40 | protected: 41 | T* mBuffer; 42 | }; 43 | } 44 | 45 | #endif // SAMPLE_NMT_DEVICE_BUFFER_ 46 | -------------------------------------------------------------------------------- /src/sampleNMT/model/alignment.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_ALIGNMENT_ 2 | #define SAMPLE_NMT_ALIGNMENT_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | #include "NvInfer.h" 8 | 9 | namespace nmtSample 10 | { 11 | /** \class Alignment 12 | * 13 | * \brief represents the core of attention mechanism 14 | * 15 | */ 16 | class Alignment : public Component 17 | { 18 | public: 19 | typedef std::shared_ptr ptr; 20 | 21 | Alignment() = default; 22 | 23 | /** 24 | * \brief add the alignment scores calculation to the network 25 | */ 26 | virtual void addToModel( 27 | nvinfer1::INetworkDefinition* network, 28 | nvinfer1::ITensor* attentionKeys, 29 | nvinfer1::ITensor* queryStates, 30 | nvinfer1::ITensor** alignmentScores) 31 | = 0; 32 | 33 | /** 34 | * \brief add attention keys calculation (from source memory states) to the network 35 | * 36 | * The funtion is called if getAttentionKeySize returns positive value 37 | */ 38 | virtual void addAttentionKeys( 39 | nvinfer1::INetworkDefinition* network, 40 | nvinfer1::ITensor* memoryStates, 41 | nvinfer1::ITensor** attentionKeys) 42 | = 0; 43 | 44 | /** 45 | * \brief get the size of the source states 46 | */ 47 | virtual int getSourceStatesSize() = 0; 48 | 49 | /** 50 | * \brief get the size of the attention keys 51 | */ 52 | virtual int getAttentionKeySize() = 0; 53 | 54 | ~Alignment() override = default; 55 | }; 56 | } 57 | 58 | #endif // SAMPLE_NMT_ALIGNMENT_ 59 | -------------------------------------------------------------------------------- /src/sampleNMT/model/attention.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_ATTENTION_ 2 | #define SAMPLE_NMT_ATTENTION_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | #include "NvInfer.h" 8 | 9 | namespace nmtSample 10 | { 11 | /** \class Attention 12 | * 13 | * \brief calculates attention vector from context and decoder output vectors 14 | * 15 | */ 16 | class Attention : public Component 17 | { 18 | public: 19 | typedef std::shared_ptr ptr; 20 | 21 | Attention() = default; 22 | 23 | /** 24 | * \brief add the attention vector calculation to the network 25 | */ 26 | virtual void addToModel( 27 | nvinfer1::INetworkDefinition* network, 28 | nvinfer1::ITensor* inputFromDecoder, 29 | nvinfer1::ITensor* context, 30 | nvinfer1::ITensor** attentionOutput) 31 | = 0; 32 | 33 | /** 34 | * \brief get the size of the attention vector 35 | */ 36 | virtual int getAttentionSize() = 0; 37 | 38 | ~Attention() override = default; 39 | }; 40 | } 41 | 42 | #endif // SAMPLE_NMT_ATTENTION_ 43 | -------------------------------------------------------------------------------- /src/sampleNMT/model/beamSearchPolicy.cpp: -------------------------------------------------------------------------------- 1 | #include "beamSearchPolicy.h" 2 | #ifdef _MSC_VER 3 | //Macro definition needed to avoid name collision with std::min/max and Windows.h min/max 4 | #define NOMINMAX 5 | #endif 6 | #include "nvToolsExt.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace nmtSample 13 | { 14 | BeamSearchPolicy::BeamSearchPolicy( 15 | int endSequenceId, 16 | LikelihoodCombinationOperator::ptr likelihoodCombinationOperator, 17 | int beamWidth) 18 | : mEndSequenceId(endSequenceId) 19 | , mLikelihoodCombinationOperator(likelihoodCombinationOperator) 20 | , mBeamWidth(beamWidth) 21 | { 22 | } 23 | 24 | void BeamSearchPolicy::initialize( 25 | int sampleCount, 26 | int* maxOutputSequenceLengths) 27 | { 28 | mSampleCount = sampleCount; 29 | mMaxOutputSequenceLengths.resize(mSampleCount); 30 | std::copy(maxOutputSequenceLengths, maxOutputSequenceLengths + mSampleCount, &mMaxOutputSequenceLengths[0]); 31 | 32 | mValidSamples.resize(mSampleCount); 33 | std::fill(mValidSamples.begin(), mValidSamples.end(), true); 34 | 35 | mCurrentLikelihoods.resize(mSampleCount * mBeamWidth); 36 | std::fill(mCurrentLikelihoods.begin(), mCurrentLikelihoods.end(), mLikelihoodCombinationOperator->init()); 37 | 38 | mBeamSearchTable.clear(); 39 | 40 | mTimestepId = 0; 41 | 42 | mCandidates.resize(mSampleCount); 43 | mCandidateLikelihoods.resize(mSampleCount); 44 | std::fill(mCandidateLikelihoods.begin(), mCandidateLikelihoods.end(), mLikelihoodCombinationOperator->smallerThanMinimalLikelihood()); 45 | } 46 | 47 | void BeamSearchPolicy::processTimestep( 48 | int validSampleCount, 49 | const float* hCombinedLikelihoods, 50 | const int* hVocabularyIndices, 51 | const int* hRayOptionIndices, 52 | int* hSourceRayIndices, 53 | float* hSourceLikelihoods) 54 | { 55 | ++mTimestepId; 56 | mBeamSearchTable.resize(mTimestepId * mSampleCount * mBeamWidth); 57 | auto baseBeamSearchTable = mBeamSearchTable.begin() + (mTimestepId - 1) * mSampleCount * mBeamWidth; 58 | 59 | for (int sampleId = 0; sampleId < validSampleCount; ++sampleId) 60 | { 61 | auto currentSourceRayIndices = hSourceRayIndices + sampleId * mBeamWidth; 62 | auto currentLikelihoods = hSourceLikelihoods + sampleId * mBeamWidth; 63 | auto currentBeamSearchTable = baseBeamSearchTable + sampleId * mBeamWidth; 64 | 65 | int rayId = 0; 66 | if (mValidSamples[sampleId]) 67 | { 68 | for (; rayId < mBeamWidth; ++rayId) 69 | { 70 | float optionCombinedLikelihood = hCombinedLikelihoods[sampleId * mBeamWidth + rayId]; 71 | 72 | // Check if the current candidate is already better than this option 73 | if (optionCombinedLikelihood <= mCandidateLikelihoods[sampleId]) 74 | break; // The remaining options are even worse 75 | 76 | int optionOriginalRayId = hRayOptionIndices[sampleId * mBeamWidth + rayId] / mBeamWidth; 77 | int optionVocabularyId = hVocabularyIndices[sampleId * mBeamWidth + rayId]; 78 | 79 | if ((optionVocabularyId == mEndSequenceId) || (mTimestepId >= mMaxOutputSequenceLengths[sampleId])) 80 | { 81 | // We have a new candidate output sequence for the sample 82 | mCandidateLikelihoods[sampleId] = optionCombinedLikelihood; 83 | auto& candidate = mCandidates[sampleId]; 84 | candidate.resize(mTimestepId); 85 | backtrack(mTimestepId - 2, sampleId, optionOriginalRayId, &candidate[0], mTimestepId - 2); 86 | candidate[mTimestepId - 1] = optionVocabularyId; 87 | break; 88 | } 89 | 90 | *(currentSourceRayIndices + rayId) = optionOriginalRayId; 91 | *(currentLikelihoods + rayId) = optionCombinedLikelihood; 92 | (currentBeamSearchTable + rayId)->vocabularyId = optionVocabularyId; 93 | (currentBeamSearchTable + rayId)->backtrackId = optionOriginalRayId; 94 | } 95 | 96 | // No valid rays left for the sample 97 | if (rayId == 0) 98 | mValidSamples[sampleId] = false; 99 | } 100 | 101 | // Mark the remaining rays as invalid ones 102 | for (; rayId < mBeamWidth; ++rayId) 103 | { 104 | *(currentSourceRayIndices + rayId) = 0; 105 | *(currentLikelihoods + rayId) = mLikelihoodCombinationOperator->smallerThanMinimalLikelihood(); 106 | (currentBeamSearchTable + rayId)->vocabularyId = mEndSequenceId; 107 | (currentBeamSearchTable + rayId)->backtrackId = 0; 108 | } 109 | } 110 | } 111 | 112 | int BeamSearchPolicy::getTailWithNoWorkRemaining() 113 | { 114 | for (int sampleId = mSampleCount - 1; sampleId >= 0; --sampleId) 115 | { 116 | if (mValidSamples[sampleId]) 117 | return sampleId + 1; 118 | } 119 | return 0; 120 | } 121 | 122 | void BeamSearchPolicy::readGeneratedResult( 123 | int sampleCount, 124 | int maxOutputSequenceLength, 125 | int* hOutputData, 126 | int* hActualOutputSequenceLengths) 127 | { 128 | for (int sampleId = 0; sampleId < sampleCount; ++sampleId) 129 | { 130 | if (mCandidateLikelihoods[sampleId] > mLikelihoodCombinationOperator->smallerThanMinimalLikelihood()) 131 | { 132 | // We have a candidate (finished sequence) 133 | std::copy_n( 134 | mCandidates[sampleId].begin(), 135 | std::min(static_cast(mCandidates[sampleId].size()), maxOutputSequenceLength), 136 | hOutputData + sampleId * maxOutputSequenceLength); 137 | hActualOutputSequenceLengths[sampleId] = mCandidates[sampleId].size(); 138 | } 139 | else 140 | { 141 | // We don't have a finished sequence generated, will output the unfinished one with the highest likelihood 142 | assert(mValidSamples[sampleId]); 143 | backtrack(mTimestepId - 1, sampleId, 0, hOutputData + sampleId * maxOutputSequenceLength, maxOutputSequenceLength - 1); 144 | hActualOutputSequenceLengths[sampleId] = mTimestepId; 145 | } 146 | } 147 | } 148 | 149 | void BeamSearchPolicy::backtrack( 150 | int lastTimestepId, 151 | int sampleId, 152 | int lastTimestepRayId, 153 | int* hOutputData, 154 | int lastTimestepWriteId) const 155 | { 156 | int rayId = lastTimestepRayId; 157 | for (int timestepId = lastTimestepId; timestepId >= 0; --timestepId) 158 | { 159 | const auto& entry = mBeamSearchTable[(timestepId * mSampleCount + sampleId) * mBeamWidth + rayId]; 160 | rayId = entry.backtrackId; 161 | if (timestepId <= lastTimestepWriteId) 162 | hOutputData[timestepId] = entry.vocabularyId; 163 | } 164 | } 165 | 166 | std::string BeamSearchPolicy::getInfo() 167 | { 168 | std::stringstream ss; 169 | ss << "Beam Search Policy, beam = " << mBeamWidth; 170 | return ss.str(); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/sampleNMT/model/beamSearchPolicy.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_BEAM_SEARCH_POLICY_ 2 | #define SAMPLE_NMT_BEAM_SEARCH_POLICY_ 3 | 4 | #include "../component.h" 5 | #include "likelihoodCombinationOperator.h" 6 | 7 | #include 8 | 9 | namespace nmtSample 10 | { 11 | /** \class BeamSearchPolicy 12 | * 13 | * \brief processes the results of one iteration of the generator with beam search and produces input for the next iteration 14 | * 15 | */ 16 | class BeamSearchPolicy : public Component 17 | { 18 | public: 19 | typedef std::shared_ptr ptr; 20 | 21 | BeamSearchPolicy( 22 | int endSequenceId, 23 | LikelihoodCombinationOperator::ptr likelihoodCombinationOperator, 24 | int beamWidth); 25 | 26 | void initialize( 27 | int sampleCount, 28 | int* maxOutputSequenceLengths); 29 | 30 | void processTimestep( 31 | int validSampleCount, 32 | const float* hCombinedLikelihoods, 33 | const int* hVocabularyIndices, 34 | const int* hRayOptionIndices, 35 | int* hSourceRayIndices, 36 | float* hSourceLikelihoods); 37 | 38 | int getTailWithNoWorkRemaining(); 39 | 40 | void readGeneratedResult( 41 | int sampleCount, 42 | int maxOutputSequenceLength, 43 | int* hOutputData, 44 | int* hActualOutputSequenceLengths); 45 | 46 | std::string getInfo() override; 47 | 48 | ~BeamSearchPolicy() override = default; 49 | 50 | protected: 51 | struct Ray 52 | { 53 | int vocabularyId; 54 | int backtrackId; 55 | }; 56 | 57 | void backtrack( 58 | int lastTimestepId, 59 | int sampleId, 60 | int lastTimestepRayId, 61 | int* hOutputData, 62 | int lastTimestepWriteId) const; 63 | 64 | protected: 65 | int mEndSequenceId; 66 | LikelihoodCombinationOperator::ptr mLikelihoodCombinationOperator; 67 | int mBeamWidth; 68 | std::vector mValidSamples; 69 | std::vector mCurrentLikelihoods; 70 | std::vector mBeamSearchTable; 71 | int mSampleCount; 72 | std::vector mMaxOutputSequenceLengths; 73 | int mTimestepId; 74 | 75 | std::vector> mCandidates; 76 | std::vector mCandidateLikelihoods; 77 | }; 78 | } 79 | 80 | #endif // SAMPLE_NMT_BEAM_SEARCH_POLICY_ 81 | -------------------------------------------------------------------------------- /src/sampleNMT/model/componentWeights.cpp: -------------------------------------------------------------------------------- 1 | #include "componentWeights.h" 2 | #include 3 | #include 4 | 5 | namespace nmtSample 6 | { 7 | std::istream& operator>>(std::istream& input, ComponentWeights& value) 8 | { 9 | std::string footerString("trtsamplenmt"); 10 | size_t footerSize = sizeof(int32_t) + footerString.size(); 11 | char* footer = (char*) malloc(footerSize); 12 | 13 | input.seekg(0, std::ios::end); 14 | size_t fileSize = input.tellg(); 15 | 16 | input.seekg(-footerSize, std::ios::end); 17 | input.read(footer, footerSize); 18 | 19 | size_t metaDataCount = ((int32_t*) footer)[0]; 20 | std::string str(footer + sizeof(int32_t), footer + footerSize); 21 | assert(footerString.compare(str) == 0); 22 | free(footer); 23 | 24 | input.seekg(-(footerSize + metaDataCount * sizeof(int32_t)), std::ios::end); 25 | value.mMetaData.resize(metaDataCount); 26 | size_t metaSize = metaDataCount * sizeof(int32_t); 27 | input.read((char*) (&value.mMetaData[0]), metaSize); 28 | 29 | size_t dataSize = fileSize - footerSize - metaSize; 30 | input.seekg(0, input.beg); 31 | value.mWeights.resize(dataSize); 32 | input.read(&value.mWeights[0], dataSize); 33 | return input; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/sampleNMT/model/componentWeights.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_COMPONENT_WEIGHTS_ 2 | #define SAMPLE_NMT_COMPONENT_WEIGHTS_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nmtSample 9 | { 10 | /** \class ComponentWeights 11 | * 12 | * \brief weights storage 13 | * 14 | */ 15 | class ComponentWeights 16 | { 17 | public: 18 | typedef std::shared_ptr ptr; 19 | 20 | ComponentWeights() = default; 21 | 22 | friend std::istream& operator>>(std::istream& input, ComponentWeights& value); 23 | 24 | public: 25 | std::vector mMetaData; 26 | std::vector mWeights; 27 | }; 28 | } 29 | 30 | #endif // SAMPLE_NMT_COMPONENT_WEIGHTS_ 31 | -------------------------------------------------------------------------------- /src/sampleNMT/model/context.cpp: -------------------------------------------------------------------------------- 1 | #include "context.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | void Context::addToModel( 9 | nvinfer1::INetworkDefinition* network, 10 | nvinfer1::ITensor* actualInputSequenceLengths, 11 | nvinfer1::ITensor* memoryStates, 12 | nvinfer1::ITensor* alignmentScores, 13 | nvinfer1::ITensor** contextOutput) 14 | { 15 | auto raggedSoftmaxLayer = network->addRaggedSoftMax(*alignmentScores, *actualInputSequenceLengths); 16 | assert(raggedSoftmaxLayer != nullptr); 17 | raggedSoftmaxLayer->setName("Context Ragged Softmax"); 18 | auto softmaxTensor = raggedSoftmaxLayer->getOutput(0); 19 | assert(softmaxTensor != nullptr); 20 | 21 | auto mmLayer = network->addMatrixMultiply( 22 | *softmaxTensor, 23 | false, 24 | *memoryStates, 25 | false); 26 | assert(mmLayer != nullptr); 27 | mmLayer->setName("Context Matrix Multiply"); 28 | *contextOutput = mmLayer->getOutput(0); 29 | assert(*contextOutput != nullptr); 30 | } 31 | 32 | std::string Context::getInfo() 33 | { 34 | return "Ragged softmax + Batch GEMM"; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/sampleNMT/model/context.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_CONTEXT_ 2 | #define SAMPLE_NMT_CONTEXT_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | #include "NvInfer.h" 8 | 9 | namespace nmtSample 10 | { 11 | /** \class Context 12 | * 13 | * \brief calculates context vector from raw alignment scores and memory states 14 | * 15 | */ 16 | class Context : public Component 17 | { 18 | public: 19 | typedef std::shared_ptr ptr; 20 | 21 | Context() = default; 22 | 23 | /** 24 | * \brief add the context vector calculation to the network 25 | */ 26 | void addToModel( 27 | nvinfer1::INetworkDefinition* network, 28 | nvinfer1::ITensor* actualInputSequenceLengths, 29 | nvinfer1::ITensor* memoryStates, 30 | nvinfer1::ITensor* alignmentScores, 31 | nvinfer1::ITensor** contextOutput); 32 | 33 | std::string getInfo() override; 34 | 35 | ~Context() override = default; 36 | }; 37 | } 38 | 39 | #endif // SAMPLE_NMT_CONTEXT_ 40 | -------------------------------------------------------------------------------- /src/sampleNMT/model/debugUtil.cpp: -------------------------------------------------------------------------------- 1 | #include "debugUtil.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "../cudaError.h" 7 | 8 | namespace nmtSample 9 | { 10 | std::list DebugUtil::mPlugins; 11 | 12 | DebugUtil::DumpTensorPlugin::DumpTensorPlugin(std::shared_ptr out) 13 | : mOut(out) 14 | { 15 | } 16 | 17 | int DebugUtil::DumpTensorPlugin::getNbOutputs() const 18 | { 19 | return 1; 20 | } 21 | 22 | nvinfer1::Dims DebugUtil::DumpTensorPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) 23 | { 24 | return inputs[0]; 25 | } 26 | 27 | void DebugUtil::DumpTensorPlugin::configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) 28 | { 29 | mDims = inputDims[0]; 30 | 31 | *mOut << "Max batch size = " << maxBatchSize << std::endl; 32 | *mOut << "Tensor dimensions = "; 33 | mTensorVolume = 1; 34 | for (int i = 0; i < mDims.nbDims; ++i) 35 | { 36 | if (i > 0) 37 | *mOut << "x"; 38 | *mOut << mDims.d[i]; 39 | mTensorVolume *= mDims.d[i]; 40 | } 41 | mElemsPerRow = 1; 42 | for (int i = mDims.nbDims - 1; i >= 0; --i) 43 | { 44 | if (mElemsPerRow == 1) 45 | mElemsPerRow *= mDims.d[i]; 46 | } 47 | *mOut << std::endl; 48 | 49 | mData = std::make_shared>(mTensorVolume * maxBatchSize); 50 | } 51 | 52 | int DebugUtil::DumpTensorPlugin::initialize() 53 | { 54 | return 0; 55 | } 56 | 57 | void DebugUtil::DumpTensorPlugin::terminate() 58 | { 59 | mOut.reset(); 60 | mData.reset(); 61 | } 62 | 63 | size_t DebugUtil::DumpTensorPlugin::getWorkspaceSize(int maxBatchSize) const 64 | { 65 | return 0; 66 | } 67 | 68 | int DebugUtil::DumpTensorPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) 69 | { 70 | int totalElems = batchSize * mTensorVolume; 71 | 72 | CUDA_CHECK(cudaMemcpyAsync(*mData, inputs[0], totalElems * sizeof(float), cudaMemcpyDeviceToHost, stream)); 73 | CUDA_CHECK(cudaStreamSynchronize(stream)); 74 | CUDA_CHECK(cudaMemcpyAsync(outputs[0], inputs[0], totalElems * sizeof(float), cudaMemcpyDeviceToDevice, stream)); 75 | 76 | *mOut << "Batch size = " << batchSize << "\n"; 77 | int rowCount = totalElems / mElemsPerRow; 78 | for (int rowId = 0; rowId < rowCount; ++rowId) 79 | { 80 | for (int i = 0; i < mElemsPerRow; ++i) 81 | { 82 | if (i > 0) 83 | *mOut << " "; 84 | *mOut << (*mData)[rowId * mElemsPerRow + i]; 85 | } 86 | *mOut << "\n"; 87 | } 88 | *mOut << std::endl; 89 | 90 | return 0; 91 | } 92 | 93 | size_t DebugUtil::DumpTensorPlugin::getSerializationSize() 94 | { 95 | assert(0); 96 | return 0; 97 | } 98 | 99 | void DebugUtil::DumpTensorPlugin::serialize(void* buffer) 100 | { 101 | assert(0); 102 | } 103 | 104 | void DebugUtil::addDumpTensorToStream( 105 | nvinfer1::INetworkDefinition* network, 106 | nvinfer1::ITensor* input, 107 | nvinfer1::ITensor** output, 108 | std::shared_ptr out) 109 | { 110 | assert(!input->getBroadcastAcrossBatch()); 111 | auto plugin = std::make_shared(out); 112 | nvinfer1::ITensor* inputTensors[] = {input}; 113 | auto pluginLayer = network->addPlugin(inputTensors, 1, *plugin); 114 | assert(pluginLayer != nullptr); 115 | *output = pluginLayer->getOutput(0); 116 | assert(*output != nullptr); 117 | mPlugins.push_back(plugin); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/sampleNMT/model/debugUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_DEBUG_UTIL_ 2 | #define SAMPLE_NMT_DEBUG_UTIL_ 3 | 4 | #include "NvInfer.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../pinnedHostBuffer.h" 11 | 12 | namespace nmtSample 13 | { 14 | /** \class DebugUtil 15 | * 16 | * \brief container for static debug utility functions 17 | * 18 | */ 19 | class DebugUtil 20 | { 21 | private: 22 | class DumpTensorPlugin : public nvinfer1::IPlugin 23 | { 24 | public: 25 | typedef std::shared_ptr ptr; 26 | 27 | DumpTensorPlugin(std::shared_ptr out); 28 | 29 | ~DumpTensorPlugin() override = default; 30 | 31 | int getNbOutputs() const override; 32 | 33 | nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) override; 34 | 35 | void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) override; 36 | 37 | int initialize() override; 38 | 39 | void terminate() override; 40 | 41 | size_t getWorkspaceSize(int maxBatchSize) const override; 42 | 43 | int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override; 44 | 45 | size_t getSerializationSize() override; 46 | 47 | void serialize(void* buffer) override; 48 | 49 | private: 50 | std::shared_ptr mOut; 51 | nvinfer1::Dims mDims; 52 | int mMaxBatchSize; 53 | int mTensorVolume; 54 | int mElemsPerRow; 55 | PinnedHostBuffer::ptr mData; 56 | }; 57 | 58 | public: 59 | static void addDumpTensorToStream( 60 | nvinfer1::INetworkDefinition* network, 61 | nvinfer1::ITensor* input, 62 | nvinfer1::ITensor** output, 63 | std::shared_ptr out); 64 | 65 | private: 66 | static std::list mPlugins; 67 | }; 68 | } 69 | 70 | #endif // SAMPLE_NMT_DEBUG_UTIL_ 71 | -------------------------------------------------------------------------------- /src/sampleNMT/model/decoder.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_DECODER_ 2 | #define SAMPLE_NMT_DECODER_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../component.h" 8 | #include "NvInfer.h" 9 | 10 | namespace nmtSample 11 | { 12 | /** \class Decoder 13 | * 14 | * \brief encodes single input into output states 15 | * 16 | */ 17 | class Decoder : public Component 18 | { 19 | public: 20 | typedef std::shared_ptr ptr; 21 | 22 | Decoder() = default; 23 | 24 | /** 25 | * \brief add the memory, cell, and hidden states to the network 26 | */ 27 | virtual void addToModel( 28 | nvinfer1::INetworkDefinition* network, 29 | nvinfer1::ITensor* inputData, 30 | nvinfer1::ITensor** inputStates, 31 | nvinfer1::ITensor** outputData, 32 | nvinfer1::ITensor** outputStates) 33 | = 0; 34 | 35 | /** 36 | * \brief get the sizes (vector of them) of the hidden state vectors 37 | */ 38 | virtual std::vector getStateSizes() = 0; 39 | 40 | ~Decoder() override = default; 41 | }; 42 | } 43 | 44 | #endif // SAMPLE_NMT_DECODER_ 45 | -------------------------------------------------------------------------------- /src/sampleNMT/model/embedder.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_EMBEDDER_ 2 | #define SAMPLE_NMT_EMBEDDER_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | #include "NvInfer.h" 8 | 9 | namespace nmtSample 10 | { 11 | /** \class Embedder 12 | * 13 | * \brief projects 1-hot vectors (represented as a vector with indices) into dense embedding space 14 | * 15 | */ 16 | class Embedder : public Component 17 | { 18 | public: 19 | typedef std::shared_ptr ptr; 20 | 21 | Embedder() = default; 22 | 23 | /** 24 | * \brief add the embedding vector calculation to the network 25 | */ 26 | virtual void addToModel( 27 | nvinfer1::INetworkDefinition* network, 28 | nvinfer1::ITensor* input, 29 | nvinfer1::ITensor** output) 30 | = 0; 31 | 32 | /** 33 | * \brief get the upper bound for the possible values of indices 34 | */ 35 | virtual int getInputDimensionSize() = 0; 36 | 37 | ~Embedder() override = default; 38 | }; 39 | } 40 | 41 | #endif // SAMPLE_NMT_EMBEDDER_ 42 | -------------------------------------------------------------------------------- /src/sampleNMT/model/encoder.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_ENCODER_ 2 | #define SAMPLE_NMT_ENCODER_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../component.h" 8 | #include "NvInfer.h" 9 | 10 | namespace nmtSample 11 | { 12 | /** \class Encoder 13 | * 14 | * \brief encodes input sentences into output states 15 | * 16 | */ 17 | class Encoder : public Component 18 | { 19 | public: 20 | typedef std::shared_ptr ptr; 21 | 22 | Encoder() = default; 23 | 24 | /** 25 | * \brief add the memory and last timestep states to the network 26 | * lastTimestepHiddenStates is the pointer to the tensor where the encoder stores all layer hidden states for the last timestep (which is dependent on the sample), 27 | * the function should define the tensor, it could be nullptr indicating these data are not needed 28 | */ 29 | virtual void addToModel( 30 | nvinfer1::INetworkDefinition* network, 31 | int maxInputSequenceLength, 32 | nvinfer1::ITensor* inputEmbeddedData, 33 | nvinfer1::ITensor* actualInputSequenceLengths, 34 | nvinfer1::ITensor** inputStates, 35 | nvinfer1::ITensor** memoryStates, 36 | nvinfer1::ITensor** lastTimestepStates) 37 | = 0; 38 | 39 | /** 40 | * \brief get the size of the memory state vector 41 | */ 42 | virtual int getMemoryStatesSize() = 0; 43 | 44 | /** 45 | * \brief get the sizes (vector of them) of the hidden state vectors 46 | */ 47 | virtual std::vector getStateSizes() = 0; 48 | 49 | ~Encoder() override = default; 50 | }; 51 | } 52 | 53 | #endif // SAMPLE_NMT_ENCODER_ 54 | -------------------------------------------------------------------------------- /src/sampleNMT/model/likelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_LIKELIHOOD_ 2 | #define SAMPLE_NMT_LIKELIHOOD_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | #include "NvInfer.h" 8 | #include "likelihoodCombinationOperator.h" 9 | 10 | namespace nmtSample 11 | { 12 | /** \class Likelihood 13 | * 14 | * \brief calculates likelihood and TopK indices for the raw input logits 15 | * 16 | */ 17 | class Likelihood : public Component 18 | { 19 | public: 20 | typedef std::shared_ptr ptr; 21 | 22 | Likelihood() = default; 23 | 24 | virtual LikelihoodCombinationOperator::ptr getLikelihoodCombinationOperator() const = 0; 25 | 26 | /** 27 | * \brief add calculation of likelihood and TopK indices to the network 28 | */ 29 | virtual void addToModel( 30 | nvinfer1::INetworkDefinition* network, 31 | int beamWidth, 32 | nvinfer1::ITensor* inputLogits, 33 | nvinfer1::ITensor* inputLikelihoods, 34 | nvinfer1::ITensor** newCombinedLikelihoods, 35 | nvinfer1::ITensor** newRayOptionIndices, 36 | nvinfer1::ITensor** newVocabularyIndices) 37 | = 0; 38 | 39 | ~Likelihood() override = default; 40 | }; 41 | } 42 | 43 | #endif // SAMPLE_NMT_LIKELIHOOD_ 44 | -------------------------------------------------------------------------------- /src/sampleNMT/model/likelihoodCombinationOperator.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_LIKELIHOOD_COMBINATION_ 2 | #define SAMPLE_NMT_LIKELIHOOD_COMBINATION_ 3 | 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | class LikelihoodCombinationOperator 9 | { 10 | public: 11 | typedef std::shared_ptr ptr; 12 | 13 | // The return value should be less or equal to rayLikelihood 14 | virtual float combine(float rayLikelihood, float optionLikelihood) const = 0; 15 | 16 | virtual float init() const = 0; 17 | 18 | virtual float smallerThanMinimalLikelihood() const = 0; 19 | 20 | virtual ~LikelihoodCombinationOperator() = default; 21 | 22 | protected: 23 | LikelihoodCombinationOperator() = default; 24 | }; 25 | } 26 | 27 | #endif // SAMPLE_NMT_LIKELIHOOD_COMBINATION_ 28 | -------------------------------------------------------------------------------- /src/sampleNMT/model/lstmDecoder.cpp: -------------------------------------------------------------------------------- 1 | #include "lstmDecoder.h" 2 | 3 | #include "..\trtUtil.h" 4 | 5 | #include "debugUtil.h" 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace nmtSample 12 | { 13 | LSTMDecoder::LSTMDecoder(ComponentWeights::ptr weights) 14 | : mWeights(weights) 15 | { 16 | // please refer to chpt_to_bin.py for the details on the format 17 | assert(mWeights->mMetaData.size() >= 4); 18 | nvinfer1::DataType dataType = static_cast(mWeights->mMetaData[0]); 19 | assert(dataType == nvinfer1::DataType::kFLOAT); 20 | mRNNKind = mWeights->mMetaData[1]; 21 | mNumLayers = mWeights->mMetaData[2]; 22 | mNumUnits = mWeights->mMetaData[3]; 23 | size_t elementSize = inferTypeToBytes(dataType); 24 | // compute weights offsets 25 | size_t dataSize = 2 * mNumUnits; 26 | size_t kernelOffset = 0; 27 | size_t biasStartOffset = ((4 * dataSize + 4 * mNumUnits) * mNumUnits) * elementSize 28 | + 8 * mNumUnits * mNumUnits * (mNumLayers - 1) * elementSize; 29 | size_t biasOffset = biasStartOffset; 30 | int numGates = 8; 31 | for (int layerIndex = 0; layerIndex < mNumLayers; layerIndex++) 32 | { 33 | for (int gateIndex = 0; gateIndex < numGates; gateIndex++) 34 | { 35 | // encoder input size == mNumUnits 36 | int64_t inputSize = ((layerIndex == 0) && (gateIndex < 4)) ? dataSize : mNumUnits; 37 | nvinfer1::Weights gateKernelWeights{dataType, &mWeights->mWeights[0] + kernelOffset, inputSize * mNumUnits}; 38 | nvinfer1::Weights gateBiasWeights{dataType, &mWeights->mWeights[0] + biasOffset, mNumUnits}; 39 | mGateKernelWeights.push_back(std::move(gateKernelWeights)); 40 | mGateBiasWeights.push_back(std::move(gateBiasWeights)); 41 | kernelOffset = kernelOffset + inputSize * mNumUnits * elementSize; 42 | biasOffset = biasOffset + mNumUnits * elementSize; 43 | } 44 | } 45 | assert(kernelOffset + biasOffset - biasStartOffset == mWeights->mWeights.size()); 46 | } 47 | 48 | void LSTMDecoder::addToModel( 49 | nvinfer1::INetworkDefinition* network, 50 | nvinfer1::ITensor* inputEmbeddedData, 51 | nvinfer1::ITensor** inputStates, 52 | nvinfer1::ITensor** outputData, 53 | nvinfer1::ITensor** outputStates) 54 | { 55 | int beamWidth; 56 | int inputWidth; 57 | { 58 | auto dims = inputEmbeddedData->getDimensions(); 59 | assert(dims.nbDims == 2); 60 | assert(dims.type[0] == nvinfer1::DimensionType::kINDEX); 61 | beamWidth = dims.d[0]; 62 | assert(dims.type[1] == nvinfer1::DimensionType::kCHANNEL); 63 | inputWidth = dims.d[1]; 64 | } 65 | 66 | nvinfer1::ITensor* shuffledInput; 67 | { 68 | auto shuffleLayer = network->addShuffle(*inputEmbeddedData); 69 | assert(shuffleLayer != nullptr); 70 | shuffleLayer->setName("Reshape input for LSTM decoder"); 71 | nvinfer1::Dims shuffleDims{3, {beamWidth, 1, inputWidth}, {nvinfer1::DimensionType::kINDEX, nvinfer1::DimensionType::kSEQUENCE, nvinfer1::DimensionType::kCHANNEL}}; 72 | shuffleLayer->setReshapeDimensions(shuffleDims); 73 | shuffledInput = shuffleLayer->getOutput(0); 74 | assert(shuffledInput != nullptr); 75 | } 76 | 77 | auto decoderLayer = network->addRNNv2( 78 | *shuffledInput, 79 | mNumLayers, 80 | mNumUnits, 81 | 1, 82 | nvinfer1::RNNOperation::kLSTM); 83 | assert(decoderLayer != nullptr); 84 | decoderLayer->setName("LSTM decoder"); 85 | 86 | decoderLayer->setInputMode(nvinfer1::RNNInputMode::kLINEAR); 87 | decoderLayer->setDirection(nvinfer1::RNNDirection::kUNIDIRECTION); 88 | 89 | std::vector gateOrder({nvinfer1::RNNGateType::kFORGET, 90 | nvinfer1::RNNGateType::kINPUT, 91 | nvinfer1::RNNGateType::kCELL, 92 | nvinfer1::RNNGateType::kOUTPUT}); 93 | for (size_t i = 0; i < mGateKernelWeights.size(); i++) 94 | { 95 | // we have 4 + 4 gates 96 | bool isW = ((i % 8) < 4); 97 | decoderLayer->setWeightsForGate(i / 8, gateOrder[i % 4], isW, mGateKernelWeights[i]); 98 | decoderLayer->setBiasForGate(i / 8, gateOrder[i % 4], isW, mGateBiasWeights[i]); 99 | } 100 | 101 | decoderLayer->setHiddenState(*inputStates[0]); 102 | decoderLayer->setCellState(*inputStates[1]); 103 | *outputData = decoderLayer->getOutput(0); 104 | assert(*outputData != nullptr); 105 | 106 | { 107 | auto shuffleLayer = network->addShuffle(**outputData); 108 | assert(shuffleLayer != nullptr); 109 | shuffleLayer->setName("Reshape output from LSTM decoder"); 110 | nvinfer1::Dims shuffleDims{2, {beamWidth, mNumUnits}, {nvinfer1::DimensionType::kINDEX, nvinfer1::DimensionType::kCHANNEL}}; 111 | shuffleLayer->setReshapeDimensions(shuffleDims); 112 | auto shuffledOutput = shuffleLayer->getOutput(0); 113 | assert(shuffledOutput != nullptr); 114 | *outputData = shuffledOutput; 115 | } 116 | 117 | // Per layer hidden output 118 | outputStates[0] = decoderLayer->getOutput(1); 119 | assert(outputStates[0] != nullptr); 120 | 121 | // Per layer cell output 122 | outputStates[1] = decoderLayer->getOutput(2); 123 | assert(outputStates[1] != nullptr); 124 | } 125 | 126 | std::vector LSTMDecoder::getStateSizes() 127 | { 128 | nvinfer1::Dims hiddenStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}}; 129 | nvinfer1::Dims cellStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}}; 130 | return std::vector({hiddenStateDims, cellStateDims}); 131 | } 132 | 133 | std::string LSTMDecoder::getInfo() 134 | { 135 | std::stringstream ss; 136 | ss << "LSTM Decoder, num layers = " << mNumLayers << ", num units = " << mNumUnits; 137 | return ss.str(); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/sampleNMT/model/lstmDecoder.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_LSTM_DECODER_ 2 | #define SAMPLE_NMT_LSTM_DECODER_ 3 | 4 | #include "decoder.h" 5 | 6 | #include "componentWeights.h" 7 | 8 | namespace nmtSample 9 | { 10 | /** \class LSTMDecoder 11 | * 12 | * \brief encodes single input into output states with LSTM 13 | * 14 | */ 15 | class LSTMDecoder : public Decoder 16 | { 17 | public: 18 | LSTMDecoder(ComponentWeights::ptr weights); 19 | 20 | void addToModel( 21 | nvinfer1::INetworkDefinition* network, 22 | nvinfer1::ITensor* inputEmbeddedData, 23 | nvinfer1::ITensor** inputStates, 24 | nvinfer1::ITensor** outputData, 25 | nvinfer1::ITensor** outputStates) override; 26 | 27 | std::vector getStateSizes() override; 28 | 29 | std::string getInfo() override; 30 | 31 | ~LSTMDecoder() override = default; 32 | 33 | protected: 34 | ComponentWeights::ptr mWeights; 35 | std::vector mGateKernelWeights; 36 | std::vector mGateBiasWeights; 37 | bool mRNNKind; 38 | int mNumLayers; 39 | int mNumUnits; 40 | }; 41 | } 42 | 43 | #endif // SAMPLE_NMT_LSTM_DECODER_ 44 | -------------------------------------------------------------------------------- /src/sampleNMT/model/lstmEncoder.cpp: -------------------------------------------------------------------------------- 1 | #include "lstmEncoder.h" 2 | #include "..\trtUtil.h" 3 | 4 | #include 5 | #include 6 | 7 | namespace nmtSample 8 | { 9 | 10 | LSTMEncoder::LSTMEncoder(ComponentWeights::ptr weights) 11 | : mWeights(weights) 12 | { 13 | // please refer to chpt_to_bin.py for the details on the format 14 | assert(mWeights->mMetaData.size() >= 4); 15 | const nvinfer1::DataType dataType = static_cast(mWeights->mMetaData[0]); 16 | assert(dataType == nvinfer1::DataType::kFLOAT); 17 | mRNNKind = mWeights->mMetaData[1]; 18 | mNumLayers = mWeights->mMetaData[2]; 19 | mNumUnits = mWeights->mMetaData[3]; 20 | 21 | size_t elementSize = inferTypeToBytes(dataType); 22 | // compute weights offsets 23 | size_t kernelOffset = 0; 24 | size_t biasStartOffset = ((4 * mNumUnits + 4 * mNumUnits) * mNumUnits * mNumLayers) * elementSize; 25 | size_t biasOffset = biasStartOffset; 26 | int numGates = 8; 27 | for (int layerIndex = 0; layerIndex < mNumLayers; layerIndex++) 28 | { 29 | for (int gateIndex = 0; gateIndex < numGates; gateIndex++) 30 | { 31 | // encoder input size == mNumUnits 32 | int64_t inputSize = ((layerIndex == 0) && (gateIndex < 4)) ? mNumUnits : mNumUnits; 33 | nvinfer1::Weights gateKernelWeights{dataType, &mWeights->mWeights[0] + kernelOffset, inputSize * mNumUnits}; 34 | nvinfer1::Weights gateBiasWeights{dataType, &mWeights->mWeights[0] + biasOffset, mNumUnits}; 35 | mGateKernelWeights.push_back(std::move(gateKernelWeights)); 36 | mGateBiasWeights.push_back(std::move(gateBiasWeights)); 37 | kernelOffset = kernelOffset + inputSize * mNumUnits * elementSize; 38 | biasOffset = biasOffset + mNumUnits * elementSize; 39 | } 40 | } 41 | assert(kernelOffset + biasOffset - biasStartOffset == mWeights->mWeights.size()); 42 | } 43 | 44 | void LSTMEncoder::addToModel( 45 | nvinfer1::INetworkDefinition* network, 46 | int maxInputSequenceLength, 47 | nvinfer1::ITensor* inputEmbeddedData, 48 | nvinfer1::ITensor* actualInputSequenceLengths, 49 | nvinfer1::ITensor** inputStates, 50 | nvinfer1::ITensor** memoryStates, 51 | nvinfer1::ITensor** lastTimestepStates) 52 | { 53 | auto encoderLayer = network->addRNNv2( 54 | *inputEmbeddedData, 55 | mNumLayers, 56 | mNumUnits, 57 | maxInputSequenceLength, 58 | nvinfer1::RNNOperation::kLSTM); 59 | assert(encoderLayer != nullptr); 60 | encoderLayer->setName("LSTM encoder"); 61 | 62 | encoderLayer->setSequenceLengths(*actualInputSequenceLengths); 63 | encoderLayer->setInputMode(nvinfer1::RNNInputMode::kLINEAR); 64 | encoderLayer->setDirection(nvinfer1::RNNDirection::kUNIDIRECTION); 65 | 66 | std::vector gateOrder({nvinfer1::RNNGateType::kFORGET, 67 | nvinfer1::RNNGateType::kINPUT, 68 | nvinfer1::RNNGateType::kCELL, 69 | nvinfer1::RNNGateType::kOUTPUT}); 70 | for (size_t i = 0; i < mGateKernelWeights.size(); i++) 71 | { 72 | // we have 4 + 4 gates 73 | bool isW = ((i % 8) < 4); 74 | encoderLayer->setWeightsForGate(i / 8, gateOrder[i % 4], isW, mGateKernelWeights[i]); 75 | encoderLayer->setBiasForGate(i / 8, gateOrder[i % 4], isW, mGateBiasWeights[i]); 76 | } 77 | 78 | encoderLayer->setHiddenState(*inputStates[0]); 79 | encoderLayer->setCellState(*inputStates[1]); 80 | *memoryStates = encoderLayer->getOutput(0); 81 | assert(*memoryStates != nullptr); 82 | 83 | if (lastTimestepStates) 84 | { 85 | // Per layer hidden output 86 | lastTimestepStates[0] = encoderLayer->getOutput(1); 87 | assert(lastTimestepStates[0] != nullptr); 88 | 89 | // Per layer cell output 90 | lastTimestepStates[1] = encoderLayer->getOutput(2); 91 | assert(lastTimestepStates[1] != nullptr); 92 | } 93 | } 94 | 95 | int LSTMEncoder::getMemoryStatesSize() 96 | { 97 | return mNumUnits; 98 | } 99 | 100 | std::vector LSTMEncoder::getStateSizes() 101 | { 102 | nvinfer1::Dims hiddenStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}}; 103 | nvinfer1::Dims cellStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}}; 104 | return std::vector({hiddenStateDims, cellStateDims}); 105 | } 106 | 107 | std::string LSTMEncoder::getInfo() 108 | { 109 | std::stringstream ss; 110 | ss << "LSTM Encoder, num layers = " << mNumLayers << ", num units = " << mNumUnits; 111 | return ss.str(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/sampleNMT/model/lstmEncoder.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_LSTM_ENCODER_ 2 | #define SAMPLE_NMT_LSTM_ENCODER_ 3 | 4 | #include "encoder.h" 5 | 6 | #include "componentWeights.h" 7 | 8 | namespace nmtSample 9 | { 10 | /** \class LSTMEncoder 11 | * 12 | * \brief encodes input sentences into output states using LSTM 13 | * 14 | */ 15 | class LSTMEncoder : public Encoder 16 | { 17 | public: 18 | LSTMEncoder(ComponentWeights::ptr weights); 19 | 20 | void addToModel( 21 | nvinfer1::INetworkDefinition* network, 22 | int maxInputSequenceLength, 23 | nvinfer1::ITensor* inputEmbeddedData, 24 | nvinfer1::ITensor* actualInputSequenceLengths, 25 | nvinfer1::ITensor** inputStates, 26 | nvinfer1::ITensor** memoryStates, 27 | nvinfer1::ITensor** lastTimestepStates) override; 28 | 29 | int getMemoryStatesSize() override; 30 | 31 | std::vector getStateSizes() override; 32 | 33 | std::string getInfo() override; 34 | 35 | ~LSTMEncoder() override = default; 36 | 37 | protected: 38 | ComponentWeights::ptr mWeights; 39 | std::vector mGateKernelWeights; 40 | std::vector mGateBiasWeights; 41 | bool mRNNKind; 42 | int mNumLayers; 43 | int mNumUnits; 44 | }; 45 | } 46 | 47 | #endif // SAMPLE_NMT_LSTM_ENCODER_ 48 | -------------------------------------------------------------------------------- /src/sampleNMT/model/multiplicativeAlignment.cpp: -------------------------------------------------------------------------------- 1 | #include "multiplicativeAlignment.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | MultiplicativeAlignment::MultiplicativeAlignment(ComponentWeights::ptr weights) 9 | : mWeights(weights) 10 | { 11 | // please refer to chpt_to_bin.py for the details on the format 12 | assert(mWeights->mMetaData.size() >= 3); 13 | mKernelWeights.type = static_cast(mWeights->mMetaData[0]); 14 | assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT); 15 | mInputChannelCount = mWeights->mMetaData[1]; 16 | mOutputChannelCount = mWeights->mMetaData[2]; 17 | 18 | mKernelWeights.values = (void*) (&mWeights->mWeights[0]); 19 | mKernelWeights.count = mInputChannelCount * mOutputChannelCount; 20 | } 21 | 22 | void MultiplicativeAlignment::addToModel( 23 | nvinfer1::INetworkDefinition* network, 24 | nvinfer1::ITensor* attentionKeys, 25 | nvinfer1::ITensor* queryStates, 26 | nvinfer1::ITensor** alignmentScores) 27 | { 28 | auto mmLayer = network->addMatrixMultiply( 29 | *queryStates, 30 | false, 31 | *attentionKeys, 32 | true); 33 | assert(mmLayer != nullptr); 34 | mmLayer->setName("Raw Alignment Scores MM (Queries x Keys) in multiplicative attention"); 35 | *alignmentScores = mmLayer->getOutput(0); 36 | assert(*alignmentScores != nullptr); 37 | } 38 | 39 | void MultiplicativeAlignment::addAttentionKeys( 40 | nvinfer1::INetworkDefinition* network, 41 | nvinfer1::ITensor* memoryStates, 42 | nvinfer1::ITensor** attentionKeys) 43 | { 44 | nvinfer1::Dims weightDims{2, {mInputChannelCount, mOutputChannelCount}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}}; 45 | auto constLayer = network->addConstant(weightDims, mKernelWeights); 46 | assert(constLayer != nullptr); 47 | constLayer->setName("Matrix in multiplicative attention"); 48 | auto weights = constLayer->getOutput(0); 49 | assert(weights != nullptr); 50 | 51 | auto mmLayer = network->addMatrixMultiply( 52 | *memoryStates, 53 | false, 54 | *weights, 55 | false); 56 | assert(mmLayer != nullptr); 57 | mmLayer->setName("Attention Keys MM in multiplicative attention"); 58 | *attentionKeys = mmLayer->getOutput(0); 59 | assert(*attentionKeys != nullptr); 60 | } 61 | 62 | int MultiplicativeAlignment::getSourceStatesSize() 63 | { 64 | return mInputChannelCount; 65 | } 66 | 67 | int MultiplicativeAlignment::getAttentionKeySize() 68 | { 69 | return mOutputChannelCount; 70 | } 71 | 72 | std::string MultiplicativeAlignment::getInfo() 73 | { 74 | std::stringstream ss; 75 | ss << "Multiplicative Alignment, source states size = " << mInputChannelCount << ", attention keys size = " << mOutputChannelCount; 76 | return ss.str(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/sampleNMT/model/multiplicativeAlignment.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_MULTIPLICATIVE_ALIGNMENT_ 2 | #define SAMPLE_NMT_MULTIPLICATIVE_ALIGNMENT_ 3 | 4 | #include "alignment.h" 5 | 6 | #include "componentWeights.h" 7 | 8 | namespace nmtSample 9 | { 10 | /** \class MultiplicativeAlignment 11 | * 12 | * \brief alignment scores from Luong attention mechanism 13 | * 14 | */ 15 | class MultiplicativeAlignment : public Alignment 16 | { 17 | public: 18 | MultiplicativeAlignment(ComponentWeights::ptr weights); 19 | 20 | void addToModel( 21 | nvinfer1::INetworkDefinition* network, 22 | nvinfer1::ITensor* attentionKeys, 23 | nvinfer1::ITensor* queryStates, 24 | nvinfer1::ITensor** alignmentScores) override; 25 | 26 | void addAttentionKeys( 27 | nvinfer1::INetworkDefinition* network, 28 | nvinfer1::ITensor* memoryStates, 29 | nvinfer1::ITensor** attentionKeys) override; 30 | 31 | int getSourceStatesSize() override; 32 | 33 | int getAttentionKeySize() override; 34 | 35 | std::string getInfo() override; 36 | 37 | ~MultiplicativeAlignment() override = default; 38 | 39 | protected: 40 | ComponentWeights::ptr mWeights; 41 | nvinfer1::Weights mKernelWeights; 42 | int mInputChannelCount; 43 | int mOutputChannelCount; 44 | }; 45 | } 46 | 47 | #endif // SAMPLE_NMT_MULTIPLICATIVE_ALIGNMENT_ 48 | -------------------------------------------------------------------------------- /src/sampleNMT/model/projection.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_PROJECTION_ 2 | #define SAMPLE_NMT_PROJECTION_ 3 | 4 | #include 5 | 6 | #include "../component.h" 7 | #include "NvInfer.h" 8 | 9 | namespace nmtSample 10 | { 11 | /** \class Projection 12 | * 13 | * \brief calculates raw logits 14 | * 15 | */ 16 | class Projection : public Component 17 | { 18 | public: 19 | typedef std::shared_ptr ptr; 20 | 21 | Projection() = default; 22 | 23 | /** 24 | * \brief add raw logits to the network 25 | */ 26 | virtual void addToModel( 27 | nvinfer1::INetworkDefinition* network, 28 | nvinfer1::ITensor* input, 29 | nvinfer1::ITensor** outputLogits) 30 | = 0; 31 | 32 | /** 33 | * \brief get the size of raw logits vector 34 | */ 35 | virtual int getOutputSize() = 0; 36 | 37 | ~Projection() override = default; 38 | }; 39 | } 40 | 41 | #endif // SAMPLE_NMT_PROJECTION_ 42 | -------------------------------------------------------------------------------- /src/sampleNMT/model/slpAttention.cpp: -------------------------------------------------------------------------------- 1 | #include "slpAttention.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | SLPAttention::SLPAttention(ComponentWeights::ptr weights) 9 | : mWeights(weights) 10 | { 11 | // please refer to chpt_to_bin.py for the details on the format 12 | assert(mWeights->mMetaData.size() >= 3); 13 | mKernelWeights.type = static_cast(mWeights->mMetaData[0]); 14 | assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT); 15 | mInputChannelCount = mWeights->mMetaData[1]; 16 | mOutputChannelCount = mWeights->mMetaData[2]; 17 | 18 | mKernelWeights.values = (void*) (&mWeights->mWeights[0]); 19 | mKernelWeights.count = mInputChannelCount * mOutputChannelCount; 20 | } 21 | 22 | void SLPAttention::addToModel( 23 | nvinfer1::INetworkDefinition* network, 24 | nvinfer1::ITensor* inputFromDecoder, 25 | nvinfer1::ITensor* context, 26 | nvinfer1::ITensor** attentionOutput) 27 | { 28 | nvinfer1::ITensor* inputTensors[] = {inputFromDecoder, context}; 29 | auto concatLayer = network->addConcatenation(inputTensors, 2); 30 | assert(concatLayer != nullptr); 31 | concatLayer->setName("Concatinate decoder output and context"); 32 | concatLayer->setAxis(1); 33 | auto concatinatedTensor = concatLayer->getOutput(0); 34 | assert(concatinatedTensor != nullptr); 35 | 36 | nvinfer1::Dims weightDims{2, {mInputChannelCount, mOutputChannelCount}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}}; 37 | auto constLayer = network->addConstant(weightDims, mKernelWeights); 38 | assert(constLayer != nullptr); 39 | constLayer->setName("Attention Matrix"); 40 | auto weights = constLayer->getOutput(0); 41 | assert(weights != nullptr); 42 | 43 | auto mmLayer = network->addMatrixMultiply( 44 | *concatinatedTensor, 45 | false, 46 | *weights, 47 | false); 48 | assert(mmLayer != nullptr); 49 | mmLayer->setName("Attention Matrix Multiply"); 50 | *attentionOutput = mmLayer->getOutput(0); 51 | assert(*attentionOutput != nullptr); 52 | } 53 | 54 | int SLPAttention::getAttentionSize() 55 | { 56 | return mOutputChannelCount; 57 | } 58 | 59 | std::string SLPAttention::getInfo() 60 | { 61 | std::stringstream ss; 62 | ss << "SLP Attention, num inputs = " << mInputChannelCount << ", num outputs = " << mOutputChannelCount; 63 | return ss.str(); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/sampleNMT/model/slpAttention.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_SLP_ATTENTION_ 2 | #define SAMPLE_NMT_SLP_ATTENTION_ 3 | 4 | #include "attention.h" 5 | 6 | #include "componentWeights.h" 7 | 8 | namespace nmtSample 9 | { 10 | /** \class SLPAttention 11 | * 12 | * \brief Linear attention calculation 13 | * 14 | * Calculates attention vector by concatinating input from the decoder with context vector 15 | * and projecting the result into attention space by multiplying with weight matrix 16 | * 17 | */ 18 | class SLPAttention : public Attention 19 | { 20 | public: 21 | SLPAttention(ComponentWeights::ptr weights); 22 | 23 | void addToModel( 24 | nvinfer1::INetworkDefinition* network, 25 | nvinfer1::ITensor* inputFromDecoder, 26 | nvinfer1::ITensor* context, 27 | nvinfer1::ITensor** attentionOutput) override; 28 | 29 | int getAttentionSize() override; 30 | 31 | std::string getInfo() override; 32 | 33 | protected: 34 | ComponentWeights::ptr mWeights; 35 | nvinfer1::Weights mKernelWeights; 36 | int mInputChannelCount; 37 | int mOutputChannelCount; 38 | }; 39 | } 40 | 41 | #endif // SAMPLE_NMT_SLP_ATTENTION_ 42 | -------------------------------------------------------------------------------- /src/sampleNMT/model/slpEmbedder.cpp: -------------------------------------------------------------------------------- 1 | #include "slpEmbedder.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | SLPEmbedder::SLPEmbedder(ComponentWeights::ptr weights) 9 | : mWeights(weights) 10 | { 11 | // please refer to chpt_to_bin.py for the details on the format 12 | assert(mWeights->mMetaData.size() >= 3); 13 | mKernelWeights.type = static_cast(mWeights->mMetaData[0]); 14 | assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT); 15 | mNumInputs = mWeights->mMetaData[1]; 16 | mNumOutputs = mWeights->mMetaData[2]; 17 | 18 | mKernelWeights.values = (void*) (&mWeights->mWeights[0]); 19 | mKernelWeights.count = mNumInputs * mNumOutputs; 20 | } 21 | 22 | void SLPEmbedder::addToModel( 23 | nvinfer1::INetworkDefinition* network, 24 | nvinfer1::ITensor* input, 25 | nvinfer1::ITensor** output) 26 | { 27 | nvinfer1::Dims weightDims{2, {mNumInputs, mNumOutputs}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}}; 28 | auto constLayer = network->addConstant(weightDims, mKernelWeights); 29 | assert(constLayer != nullptr); 30 | constLayer->setName("Embedding matrix"); 31 | auto weights = constLayer->getOutput(0); 32 | assert(weights != nullptr); 33 | 34 | auto gatherLayer = network->addGather(*weights, *input, 0); 35 | assert(gatherLayer != nullptr); 36 | gatherLayer->setName("Gather in embedding"); 37 | *output = gatherLayer->getOutput(0); 38 | assert(*output != nullptr); 39 | } 40 | 41 | int SLPEmbedder::getInputDimensionSize() 42 | { 43 | return mNumInputs; 44 | } 45 | 46 | std::string SLPEmbedder::getInfo() 47 | { 48 | std::stringstream ss; 49 | ss << "SLP Embedder, num inputs = " << mNumInputs << ", num outputs = " << mNumOutputs; 50 | return ss.str(); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/sampleNMT/model/slpEmbedder.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_SLP_EMBEDDER_ 2 | #define SAMPLE_NMT_SLP_EMBEDDER_ 3 | 4 | #include "embedder.h" 5 | 6 | #include "componentWeights.h" 7 | 8 | #include "NvInfer.h" 9 | 10 | #include 11 | 12 | namespace nmtSample 13 | { 14 | /** \class SLPEmbedder 15 | * 16 | * \brief selects the embedding vector from the weight matrix using index provided in the input 17 | * 18 | */ 19 | class SLPEmbedder : public Embedder 20 | { 21 | public: 22 | SLPEmbedder(ComponentWeights::ptr weights); 23 | 24 | void addToModel( 25 | nvinfer1::INetworkDefinition* network, 26 | nvinfer1::ITensor* input, 27 | nvinfer1::ITensor** output) override; 28 | 29 | int getInputDimensionSize() override; 30 | 31 | std::string getInfo() override; 32 | 33 | ~SLPEmbedder() override = default; 34 | 35 | protected: 36 | ComponentWeights::ptr mWeights; 37 | nvinfer1::Weights mKernelWeights; 38 | int mNumInputs; 39 | int mNumOutputs; 40 | }; 41 | } 42 | 43 | #endif // SAMPLE_NMT_SLP_EMBEDDER_ 44 | -------------------------------------------------------------------------------- /src/sampleNMT/model/slpProjection.cpp: -------------------------------------------------------------------------------- 1 | #include "slpProjection.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace nmtSample 7 | { 8 | SLPProjection::SLPProjection(ComponentWeights::ptr weights) 9 | : mWeights(weights) 10 | { 11 | // please refer to chpt_to_bin.py for the details on the format 12 | assert(mWeights->mMetaData.size() >= 3); 13 | mKernelWeights.type = static_cast(mWeights->mMetaData[0]); 14 | assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT); 15 | mInputChannelCount = mWeights->mMetaData[1]; 16 | mOutputChannelCount = mWeights->mMetaData[2]; 17 | 18 | mKernelWeights.values = (void*) (&mWeights->mWeights[0]); 19 | mKernelWeights.count = mInputChannelCount * mOutputChannelCount; 20 | } 21 | 22 | void SLPProjection::addToModel( 23 | nvinfer1::INetworkDefinition* network, 24 | nvinfer1::ITensor* input, 25 | nvinfer1::ITensor** outputLogits) 26 | { 27 | nvinfer1::Dims weightDims{2, {mInputChannelCount, mOutputChannelCount}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}}; 28 | auto constLayer = network->addConstant(weightDims, mKernelWeights); 29 | assert(constLayer != nullptr); 30 | constLayer->setName("Projection matrix"); 31 | auto weights = constLayer->getOutput(0); 32 | assert(weights != nullptr); 33 | 34 | auto mmLayer = network->addMatrixMultiply( 35 | *input, 36 | false, 37 | *weights, 38 | false); 39 | assert(mmLayer != nullptr); 40 | mmLayer->setName("Projection Matrix Multiply"); 41 | *outputLogits = mmLayer->getOutput(0); 42 | assert(*outputLogits != nullptr); 43 | } 44 | 45 | int SLPProjection::getOutputSize() 46 | { 47 | return mOutputChannelCount; 48 | } 49 | 50 | std::string SLPProjection::getInfo() 51 | { 52 | std::stringstream ss; 53 | ss << "SLP Projection, num inputs = " << mInputChannelCount << ", num outputs = " << mOutputChannelCount; 54 | return ss.str(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/sampleNMT/model/slpProjection.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_SLP_PROJECTION_ 2 | #define SAMPLE_NMT_SLP_PROJECTION_ 3 | 4 | #include "projection.h" 5 | 6 | #include "componentWeights.h" 7 | 8 | namespace nmtSample 9 | { 10 | /** \class SLPProjection 11 | * 12 | * \brief Linear logits calculation 13 | * 14 | * Calculates logits vector by multiplying input vector with weight matrix 15 | * 16 | */ 17 | class SLPProjection : public Projection 18 | { 19 | public: 20 | SLPProjection(ComponentWeights::ptr weights); 21 | 22 | void addToModel( 23 | nvinfer1::INetworkDefinition* network, 24 | nvinfer1::ITensor* input, 25 | nvinfer1::ITensor** outputLogits) override; 26 | 27 | int getOutputSize() override; 28 | 29 | std::string getInfo() override; 30 | 31 | ~SLPProjection() override = default; 32 | 33 | protected: 34 | ComponentWeights::ptr mWeights; 35 | nvinfer1::Weights mKernelWeights; 36 | int mInputChannelCount; 37 | int mOutputChannelCount; 38 | }; 39 | } 40 | 41 | #endif // SAMPLE_NMT_SLP_PROJECTION_ 42 | -------------------------------------------------------------------------------- /src/sampleNMT/model/softmaxLikelihood.cpp: -------------------------------------------------------------------------------- 1 | #include "softmaxLikelihood.h" 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace nmtSample 8 | { 9 | void SoftmaxLikelihood::addToModel( 10 | nvinfer1::INetworkDefinition* network, 11 | int beamWidth, 12 | nvinfer1::ITensor* inputLogits, 13 | nvinfer1::ITensor* inputLikelihoods, 14 | nvinfer1::ITensor** newCombinedLikelihoods, 15 | nvinfer1::ITensor** newRayOptionIndices, 16 | nvinfer1::ITensor** newVocabularyIndices) 17 | { 18 | auto softmaxLayer = network->addSoftMax(*inputLogits); 19 | assert(softmaxLayer != nullptr); 20 | softmaxLayer->setName("Softmax in likelihood calculation"); 21 | softmaxLayer->setAxes(2); 22 | auto softmaxTensor = softmaxLayer->getOutput(0); 23 | assert(softmaxTensor != nullptr); 24 | 25 | auto topKLayer = network->addTopK(*softmaxTensor, nvinfer1::TopKOperation::kMAX, beamWidth, 2); 26 | assert(topKLayer != nullptr); 27 | topKLayer->setName("TopK 1st in likelihood calculation"); 28 | auto newLikelihoods = topKLayer->getOutput(0); 29 | assert(newLikelihoods != nullptr); 30 | auto vocabularyIndices = topKLayer->getOutput(1); 31 | assert(vocabularyIndices != nullptr); 32 | 33 | auto eltWiseLayer = network->addElementWise(*newLikelihoods, *inputLikelihoods, nvinfer1::ElementWiseOperation::kPROD); 34 | assert(eltWiseLayer != nullptr); 35 | eltWiseLayer->setName("EltWise multiplication in likelihood calculation"); 36 | auto combinedLikelihoods = eltWiseLayer->getOutput(0); 37 | assert(combinedLikelihoods != nullptr); 38 | 39 | auto shuffleLayer = network->addShuffle(*combinedLikelihoods); 40 | assert(shuffleLayer != nullptr); 41 | shuffleLayer->setName("Reshape combined likelihoods"); 42 | nvinfer1::Dims shuffleDims{1, {beamWidth * beamWidth}, {nvinfer1::DimensionType::kCHANNEL}}; 43 | shuffleLayer->setReshapeDimensions(shuffleDims); 44 | auto reshapedCombinedLikelihoods = shuffleLayer->getOutput(0); 45 | assert(reshapedCombinedLikelihoods != nullptr); 46 | 47 | auto topKLayer2 = network->addTopK(*reshapedCombinedLikelihoods, nvinfer1::TopKOperation::kMAX, beamWidth, 1); 48 | assert(topKLayer2 != nullptr); 49 | topKLayer2->setName("TopK 2nd in likelihood calculation"); 50 | *newCombinedLikelihoods = topKLayer2->getOutput(0); 51 | assert(*newCombinedLikelihoods != nullptr); 52 | *newRayOptionIndices = topKLayer2->getOutput(1); 53 | assert(*newRayOptionIndices != nullptr); 54 | 55 | auto shuffleLayer2 = network->addShuffle(*vocabularyIndices); 56 | assert(shuffleLayer2 != nullptr); 57 | shuffleLayer2->setName("Reshape vocabulary indices"); 58 | nvinfer1::Dims shuffleDims2{1, {beamWidth * beamWidth}, {nvinfer1::DimensionType::kCHANNEL}}; 59 | shuffleLayer2->setReshapeDimensions(shuffleDims2); 60 | auto reshapedVocabularyIndices = shuffleLayer2->getOutput(0); 61 | assert(reshapedVocabularyIndices != nullptr); 62 | 63 | auto gatherLayer = network->addGather(*reshapedVocabularyIndices, **newRayOptionIndices, 0); 64 | assert(gatherLayer != nullptr); 65 | gatherLayer->setName("Shuffle vocabulary indices"); 66 | *newVocabularyIndices = gatherLayer->getOutput(0); 67 | assert(*newVocabularyIndices != nullptr); 68 | } 69 | 70 | float SoftmaxLikelihood::SoftmaxLikelihoodCombinationOperator::combine(float rayLikelihood, float optionLikelihood) const 71 | { 72 | return rayLikelihood * optionLikelihood; 73 | } 74 | 75 | float SoftmaxLikelihood::SoftmaxLikelihoodCombinationOperator::init() const 76 | { 77 | return 1.0F; 78 | } 79 | 80 | float SoftmaxLikelihood::SoftmaxLikelihoodCombinationOperator::smallerThanMinimalLikelihood() const 81 | { 82 | return -1.0F; 83 | } 84 | 85 | LikelihoodCombinationOperator::ptr SoftmaxLikelihood::getLikelihoodCombinationOperator() const 86 | { 87 | return std::make_shared(); 88 | } 89 | 90 | std::string SoftmaxLikelihood::getInfo() 91 | { 92 | return "Softmax Likelihood"; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/sampleNMT/model/softmaxLikelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_SOFTMAX_LIKELIHOOD_ 2 | #define SAMPLE_NMT_SOFTMAX_LIKELIHOOD_ 3 | 4 | #include "NvInfer.h" 5 | #include "likelihood.h" 6 | 7 | namespace nmtSample 8 | { 9 | /** \class SoftmaxLikelihood 10 | * 11 | * \brief calculates softmax likelihood and TopK indices for the raw input logits 12 | * 13 | */ 14 | class SoftmaxLikelihood : public Likelihood 15 | { 16 | private: 17 | class SoftmaxLikelihoodCombinationOperator : public LikelihoodCombinationOperator 18 | { 19 | public: 20 | SoftmaxLikelihoodCombinationOperator() = default; 21 | 22 | float combine(float rayLikelihood, float optionLikelihood) const override; 23 | 24 | float init() const override; 25 | 26 | float smallerThanMinimalLikelihood() const override; 27 | 28 | ~SoftmaxLikelihoodCombinationOperator() override = default; 29 | }; 30 | 31 | public: 32 | SoftmaxLikelihood() = default; 33 | 34 | LikelihoodCombinationOperator::ptr getLikelihoodCombinationOperator() const override; 35 | 36 | void addToModel( 37 | nvinfer1::INetworkDefinition* network, 38 | int beamWidth, 39 | nvinfer1::ITensor* inputLogits, 40 | nvinfer1::ITensor* inputLikelihoods, 41 | nvinfer1::ITensor** newCombinedLikelihoods, 42 | nvinfer1::ITensor** newRayOptionIndices, 43 | nvinfer1::ITensor** newVocabularyIndices) override; 44 | 45 | std::string getInfo() override; 46 | 47 | ~SoftmaxLikelihood() override = default; 48 | }; 49 | } 50 | 51 | #endif // SAMPLE_NMT_SOFTMAX_LIKELIHOOD_ 52 | -------------------------------------------------------------------------------- /src/sampleNMT/pinnedHostBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_PINNED_HOST_BUFFER_ 2 | #define SAMPLE_NMT_PINNED_HOST_BUFFER_ 3 | 4 | #include "cudaError.h" 5 | #include 6 | #include 7 | 8 | namespace nmtSample 9 | { 10 | /** \class PinnedHostBuffer 11 | * 12 | * \brief wrapper for the pinned host memory region 13 | * 14 | */ 15 | template 16 | class PinnedHostBuffer 17 | { 18 | public: 19 | typedef std::shared_ptr> ptr; 20 | 21 | PinnedHostBuffer(size_t elementCount) 22 | : mBuffer(nullptr) 23 | { 24 | CUDA_CHECK(cudaHostAlloc(&mBuffer, elementCount * sizeof(T), cudaHostAllocDefault)); 25 | } 26 | 27 | virtual ~PinnedHostBuffer() 28 | { 29 | if (mBuffer) 30 | { 31 | cudaFreeHost(mBuffer); 32 | } 33 | } 34 | 35 | operator T*() 36 | { 37 | return mBuffer; 38 | } 39 | 40 | operator const T*() const 41 | { 42 | return mBuffer; 43 | } 44 | 45 | protected: 46 | T* mBuffer; 47 | }; 48 | } 49 | 50 | #endif // SAMPLE_NMT_PINNED_HOST_BUFFER_ 51 | -------------------------------------------------------------------------------- /src/sampleNMT/sampleNMT.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 头文件 20 | 21 | 22 | 头文件 23 | 24 | 25 | 头文件 26 | 27 | 28 | 头文件 29 | 30 | 31 | 头文件 32 | 33 | 34 | 头文件 35 | 36 | 37 | 头文件 38 | 39 | 40 | 头文件 41 | 42 | 43 | 头文件 44 | 45 | 46 | 头文件 47 | 48 | 49 | 头文件 50 | 51 | 52 | 头文件 53 | 54 | 55 | 头文件 56 | 57 | 58 | 头文件 59 | 60 | 61 | 头文件 62 | 63 | 64 | 头文件 65 | 66 | 67 | 头文件 68 | 69 | 70 | 头文件 71 | 72 | 73 | 头文件 74 | 75 | 76 | 头文件 77 | 78 | 79 | 头文件 80 | 81 | 82 | 头文件 83 | 84 | 85 | 头文件 86 | 87 | 88 | 头文件 89 | 90 | 91 | 头文件 92 | 93 | 94 | 头文件 95 | 96 | 97 | 头文件 98 | 99 | 100 | 头文件 101 | 102 | 103 | 头文件 104 | 105 | 106 | 头文件 107 | 108 | 109 | 头文件 110 | 111 | 112 | 头文件 113 | 114 | 115 | 头文件 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 源文件 124 | 125 | 126 | 源文件 127 | 128 | 129 | 源文件 130 | 131 | 132 | 源文件 133 | 134 | 135 | 源文件 136 | 137 | 138 | 源文件 139 | 140 | 141 | 源文件 142 | 143 | 144 | 源文件 145 | 146 | 147 | 源文件 148 | 149 | 150 | 源文件 151 | 152 | 153 | 源文件 154 | 155 | 156 | 源文件 157 | 158 | 159 | 源文件 160 | 161 | 162 | 源文件 163 | 164 | 165 | 源文件 166 | 167 | 168 | 源文件 169 | 170 | 171 | 源文件 172 | 173 | 174 | 源文件 175 | 176 | 177 | 源文件 178 | 179 | 180 | 源文件 181 | 182 | 183 | -------------------------------------------------------------------------------- /src/sampleNMT/sampleNMT.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleNMT/trtUtil.cpp: -------------------------------------------------------------------------------- 1 | #include "trtUtil.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace nmtSample 8 | { 9 | int inferTypeToBytes(nvinfer1::DataType t) 10 | { 11 | switch (t) 12 | { 13 | case nvinfer1::DataType::kFLOAT: return sizeof(float); break; 14 | case nvinfer1::DataType::kHALF: return sizeof(int16_t); break; 15 | default: assert(0); break; 16 | } 17 | }; 18 | 19 | int getVolume(nvinfer1::Dims dims) 20 | { 21 | return std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/sampleNMT/trtUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLE_NMT_TRT_UTIL_ 2 | #define SAMPLE_NMT_TRT_UTIL_ 3 | 4 | #include "NvInfer.h" 5 | 6 | namespace nmtSample 7 | { 8 | int inferTypeToBytes(nvinfer1::DataType t); 9 | 10 | int getVolume(nvinfer1::Dims dims); 11 | } 12 | 13 | #endif // SAMPLE_NMT_TRT_UTIL_ 14 | -------------------------------------------------------------------------------- /src/samplePlugin/fp16.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRT_FP16_H_ 2 | #define _TRT_FP16_H_ 3 | 4 | #include 5 | 6 | namespace fp16 7 | { 8 | // Code added before equivalent code was available via cuda. 9 | // This code needs to be removed when we ship for cuda-9.2. 10 | template T bitwise_cast(U u) 11 | { 12 | return *reinterpret_cast(&u); 13 | } 14 | 15 | __half __float2half(float f) 16 | { 17 | uint32_t x = bitwise_cast(f); 18 | uint32_t u = (x & 0x7fffffff); 19 | 20 | // Get rid of +NaN/-NaN case first. 21 | if (u > 0x7f800000) 22 | return bitwise_cast<__half, uint16_t>(uint16_t(0x7fff)); 23 | 24 | uint16_t sign = ((x >> 16) & 0x8000); 25 | 26 | // Get rid of +Inf/-Inf, +0/-0. 27 | if (u > 0x477fefff) 28 | return bitwise_cast<__half, uint16_t>(sign | uint16_t(0x7c00)); 29 | 30 | if (u < 0x33000001) 31 | return bitwise_cast<__half, uint16_t>(sign | uint16_t(0x0000)); 32 | 33 | uint32_t exponent = ((u >> 23) & 0xff); 34 | uint32_t mantissa = (u & 0x7fffff); 35 | 36 | uint32_t shift; 37 | if (exponent > 0x70) 38 | { 39 | shift = 13; 40 | exponent -= 0x70; 41 | } 42 | else 43 | { 44 | shift = 0x7e - exponent; 45 | exponent = 0; 46 | mantissa |= 0x800000; 47 | } 48 | 49 | uint32_t lsb = (1 << shift); 50 | uint32_t lsb_s1 = (lsb >> 1); 51 | uint32_t lsb_m1 = (lsb - 1); 52 | 53 | // Round to nearest even. 54 | uint32_t remainder = (mantissa & lsb_m1); 55 | mantissa >>= shift; 56 | if ( (remainder > lsb_s1) || ((remainder == lsb_s1) && (mantissa & 0x1)) ) 57 | { 58 | ++mantissa; 59 | if (!(mantissa & 0x3ff)) 60 | { 61 | ++exponent; 62 | mantissa = 0; 63 | } 64 | } 65 | 66 | return bitwise_cast<__half, uint16_t>(sign | uint16_t(exponent<<10) | uint16_t(mantissa)); 67 | } 68 | 69 | float __half2float(__half h) 70 | { 71 | uint16_t x = bitwise_cast(h); 72 | uint32_t sign = ((x >> 15) & 1); 73 | uint32_t exponent = ((x >> 10) & 0x1f); 74 | uint32_t mantissa = (static_cast(x & 0x3ff) << 13); 75 | 76 | if (exponent == 0x1f) 77 | { /* NaN or Inf */ 78 | if (mantissa != 0) 79 | { // NaN 80 | sign = 0; 81 | mantissa = 0x7fffff; 82 | } 83 | else // Inf 84 | mantissa = 0; 85 | exponent = 0xff; 86 | } 87 | else if (!exponent) 88 | { /* Denorm or Zero */ 89 | if (mantissa) { 90 | unsigned int msb; 91 | exponent = 0x71; 92 | do 93 | { 94 | msb = (mantissa & 0x400000); 95 | mantissa <<= 1; /* normalize */ 96 | --exponent; 97 | } 98 | while (!msb); 99 | mantissa &= 0x7fffff; /* 1.mantissa is implicit */ 100 | } 101 | } 102 | else 103 | exponent += 0x70; 104 | return bitwise_cast( (sign<<31) | (exponent<<23) | mantissa ); 105 | } 106 | 107 | }; 108 | 109 | #endif // _TRT_FP16_H_ 110 | -------------------------------------------------------------------------------- /src/samplePlugin/samplePlugin.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 头文件 20 | 21 | 22 | 23 | 24 | 源文件 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/samplePlugin/samplePlugin.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleResNetv2/sampleResNetv2.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/sampleResNetv2/sampleResNetv2.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleUffMNIST/sampleUffMNIST.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/sampleUffMNIST/sampleUffMNIST.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleUffSSD/BatchStreamPPM.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCH_STREAM_PPM_H 2 | #define BATCH_STREAM_PPM_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "NvInfer.h" 9 | #include "common.h" 10 | 11 | std::string locateFile(const std::string& input); 12 | 13 | static constexpr int INPUT_C = 3; 14 | static constexpr int INPUT_H = 300; 15 | static constexpr int INPUT_W = 300; 16 | 17 | const char* INPUT_BLOB_NAME = "Input"; 18 | 19 | class BatchStream 20 | { 21 | public: 22 | BatchStream(int batchSize, int maxBatches) : mBatchSize(batchSize), mMaxBatches(maxBatches) 23 | { 24 | mDims = nvinfer1::DimsNCHW{batchSize, 3, 300, 300 }; 25 | mImageSize = mDims.c() * mDims.h() * mDims.w(); 26 | mBatch.resize(mBatchSize * mImageSize, 0); 27 | mLabels.resize(mBatchSize, 0); 28 | mFileBatch.resize(mDims.n() * mImageSize, 0); 29 | mFileLabels.resize(mDims.n(), 0); 30 | reset(0); 31 | } 32 | 33 | void reset(int firstBatch) 34 | { 35 | mBatchCount = 0; 36 | mFileCount = 0; 37 | mFileBatchPos = mDims.n(); 38 | skip(firstBatch); 39 | } 40 | 41 | bool next() 42 | { 43 | if (mBatchCount == mMaxBatches) 44 | return false; 45 | 46 | for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) 47 | { 48 | assert(mFileBatchPos > 0 && mFileBatchPos <= mDims.n()); 49 | if (mFileBatchPos == mDims.n() && !update()) 50 | return false; 51 | 52 | // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer. 53 | csize = std::min(mBatchSize - batchPos, mDims.n() - mFileBatchPos); 54 | std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize); 55 | } 56 | mBatchCount++; 57 | return true; 58 | } 59 | 60 | void skip(int skipCount) 61 | { 62 | if (mBatchSize >= mDims.n() && mBatchSize % mDims.n() == 0 && mFileBatchPos == mDims.n()) 63 | { 64 | mFileCount += skipCount * mBatchSize / mDims.n(); 65 | return; 66 | } 67 | 68 | int x = mBatchCount; 69 | for (int i = 0; i < skipCount; i++) 70 | next(); 71 | mBatchCount = x; 72 | } 73 | 74 | float *getBatch() { return mBatch.data(); } 75 | float *getLabels() { return mLabels.data(); } 76 | int getBatchesRead() const { return mBatchCount; } 77 | int getBatchSize() const { return mBatchSize; } 78 | nvinfer1::DimsNCHW getDims() const { return mDims; } 79 | private: 80 | float* getFileBatch() { return mFileBatch.data(); } 81 | float* getFileLabels() { return mFileLabels.data(); } 82 | 83 | bool update() 84 | { 85 | std::vector fNames; 86 | 87 | std::ifstream file(locateFile("list.txt")); 88 | if(file) 89 | { 90 | std::cout << "Batch #" << mFileCount << "\n"; 91 | file.seekg(((mBatchCount * mBatchSize))*7); 92 | } 93 | for(int i = 1; i <= mBatchSize; i++) 94 | { 95 | std::string sName; 96 | std::getline(file, sName); 97 | sName = sName + ".ppm"; 98 | 99 | std::cout << "Calibrating with file " << sName << std::endl; 100 | fNames.emplace_back(sName); 101 | } 102 | mFileCount++; 103 | 104 | std::vector> ppms(fNames.size()); 105 | for (uint32_t i = 0; i < fNames.size(); ++i) 106 | { 107 | readPPMFile(locateFile(fNames[i]), ppms[i]); 108 | } 109 | std::vector data(samplesCommon::volume(mDims)); 110 | 111 | long int volChl = mDims.h() * mDims.w(); 112 | 113 | for (int i = 0, volImg = mDims.c() * mDims.h() * mDims.w(); i < mBatchSize; ++i) 114 | { 115 | for (int c = 0; c < mDims.c(); ++c) 116 | { 117 | for (int j = 0; j < volChl; ++j) 118 | { 119 | data[i * volImg + c * volChl + j] = (2.0 / 255.0) * float(ppms[i].buffer[j * mDims.c() + c]) - 1.0; 120 | } 121 | } 122 | } 123 | 124 | std::copy_n(data.data(), mDims.n() * mImageSize, getFileBatch()); 125 | 126 | mFileBatchPos = 0; 127 | return true; 128 | } 129 | 130 | int mBatchSize{0}; 131 | int mMaxBatches{0}; 132 | int mBatchCount{0}; 133 | 134 | int mFileCount{0}, mFileBatchPos{0}; 135 | int mImageSize{0}; 136 | 137 | nvinfer1::DimsNCHW mDims; 138 | std::vector mBatch; 139 | std::vector mLabels; 140 | std::vector mFileBatch; 141 | std::vector mFileLabels; 142 | }; 143 | 144 | class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator 145 | { 146 | public: 147 | Int8EntropyCalibrator(BatchStream& stream, int firstBatch, std::string calibrationTableName, bool readCache = true) 148 | : mStream(stream), 149 | mCalibrationTableName(std::move(calibrationTableName)), 150 | mReadCache(readCache) 151 | { 152 | nvinfer1::DimsNCHW dims = mStream.getDims(); 153 | mInputCount = samplesCommon::volume(dims); 154 | CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); 155 | mStream.reset(firstBatch); 156 | } 157 | 158 | virtual ~Int8EntropyCalibrator() 159 | { 160 | CHECK(cudaFree(mDeviceInput)); 161 | } 162 | 163 | int getBatchSize() const override { return mStream.getBatchSize(); } 164 | 165 | bool getBatch(void* bindings[], const char* names[], int nbBindings) override 166 | { 167 | if (!mStream.next()) 168 | return false; 169 | 170 | CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); 171 | assert(!strcmp(names[0], INPUT_BLOB_NAME)); 172 | bindings[0] = mDeviceInput; 173 | return true; 174 | } 175 | 176 | const void* readCalibrationCache(size_t& length) override 177 | { 178 | mCalibrationCache.clear(); 179 | std::ifstream input(mCalibrationTableName, std::ios::binary); 180 | input >> std::noskipws; 181 | if (mReadCache && input.good()) 182 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(mCalibrationCache)); 183 | length = mCalibrationCache.size(); 184 | return length ? mCalibrationCache.data() : nullptr; 185 | } 186 | 187 | void writeCalibrationCache(const void* cache, size_t length) override 188 | { 189 | std::ofstream output(mCalibrationTableName, std::ios::binary); 190 | output.write(reinterpret_cast(cache), length); 191 | } 192 | 193 | private: 194 | BatchStream mStream; 195 | std::string mCalibrationTableName; 196 | bool mReadCache{true}; 197 | 198 | size_t mInputCount; 199 | void* mDeviceInput{nullptr}; 200 | std::vector mCalibrationCache; 201 | }; 202 | #endif 203 | -------------------------------------------------------------------------------- /src/sampleUffSSD/README.txt: -------------------------------------------------------------------------------- 1 | The sampleUffSSD example is based on the following paper, SSD: Single Shot MultiBox 2 | Detector (https://arxiv.org/abs/1512.02325). The SSD network performs the 3 | task of object detection and localization in a single forward pass of the network. 4 | The tensorflow SSD network was trained on the InceptionV2 architecture using 5 | the MSCOCO dataset. 6 | 7 | The sample makes use of TensorRT plugins to run the SSD network. To use these 8 | plugins the TensorFlow graph needs to be preprocessed. 9 | 10 | Steps to generate UFF file: 11 | 0. Make sure you have the UFF converter installed. For installation instructions, see: 12 | https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/#python and click on the 'TensorRT Python API' link. 13 | 14 | 1. Get the pre-trained Tensorflow model (ssd_inception_v2_coco) from: 15 | http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz 16 | 17 | 2. Call the UFF converter with the preprocessing flag set (-p [config_file]). 18 | The config.py script specifies the preprocessing operations necessary for SSD TF graph. 19 | It must be copied to the working directory for the file to be imported properly. 20 | The plugin nodes and plugin parameters used in config.py should match the registered plugins 21 | in TensorRT. Please read the plugins documentation for more details. 22 | 23 | 'convert-to-uff --input-file frozen_inference_graph.pb -O NMS -p config.py' 24 | 25 | This script saves the converted .uff file in the same directory as the input with 26 | the name frozen_inference_graph.pb.uff. Copy this converted .uff file to the 27 | data directory as sample_ssd_relu6.uff /data/ssd/sample_ssd_relu6.uff 28 | 29 | The sample also requires a labels .txt file with a list of all labels used to 30 | train the model. Current example for this network is /data/ssd/ssd_coco_labels.txt 31 | 32 | Steps to run the network: 33 | 1. To run the network in FP32 mode, ./sample_uff_ssd 34 | 2. To run the network in INT8 mode, ./sample_uff_ssd --int8 35 | 36 | To run the network in INT8 mode, refer to BatchStreamPPM.h for details on how 37 | calibration can be performed. Currently we require a file (list.txt) with 38 | a list of all PPM images for calibration in the /data/ssd/ folder. 39 | The PPM images to be used for calibration can also reside in the same folder. 40 | 41 | NOTE - There might be some precision loss when running the network in INT8 42 | mode causing some objects to go undetected. Our general observation is that 43 | >500 images is a good number for calibration purposes. 44 | -------------------------------------------------------------------------------- /src/sampleUffSSD/car-0.671518.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/car-0.671518.ppm -------------------------------------------------------------------------------- /src/sampleUffSSD/config.py: -------------------------------------------------------------------------------- 1 | import graphsurgeon as gs 2 | import tensorflow as tf 3 | 4 | Input = gs.create_node("Input", 5 | op="Placeholder", 6 | dtype=tf.float32, 7 | shape=[1, 3, 300, 300]) 8 | PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT", 9 | numLayers=6, 10 | minSize=0.2, 11 | maxSize=0.95, 12 | aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], 13 | variance=[0.1,0.1,0.2,0.2], 14 | featureMapShapes=[19, 10, 5, 3, 2, 1]) 15 | NMS = gs.create_plugin_node(name="NMS", op="NMS_TRT", 16 | shareLocation=1, 17 | varianceEncodedInTarget=0, 18 | backgroundLabelId=0, 19 | confidenceThreshold=1e-8, 20 | nmsThreshold=0.6, 21 | topK=100, 22 | keepTopK=100, 23 | numClasses=91, 24 | inputOrder=[0, 2, 1], 25 | confSigmoid=1, 26 | isNormalized=1, 27 | scoreConverter="SIGMOID") 28 | concat_priorbox = gs.create_node(name="concat_priorbox", op="ConcatV2", dtype=tf.float32, axis=2) 29 | concat_box_loc = gs.create_plugin_node("concat_box_loc", op="FlattenConcat_TRT", dtype=tf.float32, axis=1, ignoreBatch=0) 30 | concat_box_conf = gs.create_plugin_node("concat_box_conf", op="FlattenConcat_TRT", dtype=tf.float32, axis=1, ignoreBatch=0) 31 | 32 | namespace_plugin_map = { 33 | "MultipleGridAnchorGenerator": PriorBox, 34 | "Postprocessor": NMS, 35 | "Preprocessor": Input, 36 | "ToFloat": Input, 37 | "image_tensor": Input, 38 | "MultipleGridAnchorGenerator/Concatenate": concat_priorbox, 39 | "concat": concat_box_loc, 40 | "concat_1": concat_box_conf 41 | } 42 | 43 | def preprocess(dynamic_graph): 44 | # Now create a new graph by collapsing namespaces 45 | dynamic_graph.collapse_namespaces(namespace_plugin_map) 46 | # Remove the outputs, so we just have a single output node (NMS). 47 | dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False) 48 | -------------------------------------------------------------------------------- /src/sampleUffSSD/dog-0.880681.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/dog-0.880681.ppm -------------------------------------------------------------------------------- /src/sampleUffSSD/dog-0.890010.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/dog-0.890010.ppm -------------------------------------------------------------------------------- /src/sampleUffSSD/person-0.549108.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/person-0.549108.ppm -------------------------------------------------------------------------------- /src/sampleUffSSD/sampleUffSSD.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 头文件 20 | 21 | 22 | 23 | 24 | 源文件 25 | 26 | 27 | 源文件 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/sampleUffSSD/sampleUffSSD.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 8 | WindowsLocalDebugger 9 | 10 | 11 | PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib 12 | WindowsLocalDebugger 13 | 14 | -------------------------------------------------------------------------------- /src/sampleUffSSD/truck-0.782028.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/truck-0.782028.ppm --------------------------------------------------------------------------------