├── .gitignore
├── LICENSE
├── README.md
├── TensorRT_test.sln
├── common
    ├── BatchStream.h
    ├── argsParser.h
    ├── buffers.h
    ├── common.h
    ├── dumpTFWts.py
    ├── half.h
    ├── sampleConfig.h
    └── windows
    │   ├── getopt.c
    │   └── getopt.h
└── src
    ├── sampleCharRNN
        ├── sampleCharRNN.cpp
        ├── sampleCharRNN.vcxproj
        ├── sampleCharRNN.vcxproj.filters
        └── sampleCharRNN.vcxproj.user
    ├── sampleDetection
        ├── sampleDetection.cpp
        ├── sampleDetection.vcxproj
        ├── sampleDetection.vcxproj.filters
        └── sampleDetection.vcxproj.user
    ├── sampleFasterRCNN
        ├── README.md
        ├── car-0.990063.ppm
        ├── dog-0.997705.ppm
        ├── dog-0.999603.ppm
        ├── factoryFasterRCNN.h
        ├── horse-0.994670.ppm
        ├── person-0.974725.ppm
        ├── sampleFasterRCNN.cpp
        ├── sampleFasterRCNN.vcxproj
        ├── sampleFasterRCNN.vcxproj.filters
        └── sampleFasterRCNN.vcxproj.user
    ├── sampleGoogleNet
        ├── README.md
        ├── sampleGoogleNet.cpp
        ├── sampleGoogleNet.vcxproj
        ├── sampleGoogleNet.vcxproj.filters
        └── sampleGoogleNet.vcxproj.user
    ├── sampleINT8
        ├── BatchStream.h
        ├── CalibrationTablemnist
        ├── LegacyCalibrator.h
        ├── int8_caffe.patch
        ├── sampleINT8.cpp
        ├── sampleINT8.vcxproj
        ├── sampleINT8.vcxproj.filters
        └── sampleINT8.vcxproj.user
    ├── sampleLoadEngineStream
        ├── sampleLoadEngineStream.cpp
        ├── sampleLoadEngineStream.vcxproj
        ├── sampleLoadEngineStream.vcxproj.filters
        └── sampleLoadEngineStream.vcxproj.user
    ├── sampleMLP
        ├── README.txt
        ├── convert_weights.py
        ├── sampleMLP.cpp
        ├── sampleMLP.vcxproj
        ├── sampleMLP.vcxproj.filters
        ├── sampleMLP.vcxproj.user
        └── update_mlp.patch
    ├── sampleMNIST
        ├── README.md
        ├── sampleMNIST.cpp
        ├── sampleMNIST.vcxproj
        ├── sampleMNIST.vcxproj.filters
        └── sampleMNIST.vcxproj.user
    ├── sampleMNISTAPI
        ├── sampleMNISTAPI.cpp
        ├── sampleMNISTAPI.vcxproj
        ├── sampleMNISTAPI.vcxproj.filters
        └── sampleMNISTAPI.vcxproj.user
    ├── sampleMovieLens
        ├── README.txt
        ├── preprocess.py
        ├── sampleMovieLens.cpp
        ├── sampleMovieLens.vcxproj
        ├── sampleMovieLens.vcxproj.filters
        ├── sampleMovieLens.vcxproj.user
        └── sampleMovieLensTraining.patch
    ├── sampleNMT
        ├── README.txt
        ├── chptToBin.py
        ├── component.h
        ├── cudaError.h
        ├── data
        │   ├── benchmarkWriter.cpp
        │   ├── benchmarkWriter.h
        │   ├── bleuScoreWriter.cpp
        │   ├── bleuScoreWriter.h
        │   ├── dataReader.h
        │   ├── dataWriter.cpp
        │   ├── dataWriter.h
        │   ├── limitedSamplesDataReader.cpp
        │   ├── limitedSamplesDataReader.h
        │   ├── sequenceProperties.h
        │   ├── textReader.cpp
        │   ├── textReader.h
        │   ├── textWriter.cpp
        │   ├── textWriter.h
        │   ├── vocabulary.cpp
        │   └── vocabulary.h
        ├── deviceBuffer.h
        ├── model
        │   ├── alignment.h
        │   ├── attention.h
        │   ├── beamSearchPolicy.cpp
        │   ├── beamSearchPolicy.h
        │   ├── componentWeights.cpp
        │   ├── componentWeights.h
        │   ├── context.cpp
        │   ├── context.h
        │   ├── debugUtil.cpp
        │   ├── debugUtil.h
        │   ├── decoder.h
        │   ├── embedder.h
        │   ├── encoder.h
        │   ├── likelihood.h
        │   ├── likelihoodCombinationOperator.h
        │   ├── lstmDecoder.cpp
        │   ├── lstmDecoder.h
        │   ├── lstmEncoder.cpp
        │   ├── lstmEncoder.h
        │   ├── multiplicativeAlignment.cpp
        │   ├── multiplicativeAlignment.h
        │   ├── projection.h
        │   ├── slpAttention.cpp
        │   ├── slpAttention.h
        │   ├── slpEmbedder.cpp
        │   ├── slpEmbedder.h
        │   ├── slpProjection.cpp
        │   ├── slpProjection.h
        │   ├── softmaxLikelihood.cpp
        │   └── softmaxLikelihood.h
        ├── pinnedHostBuffer.h
        ├── sampleNMT.cpp
        ├── sampleNMT.vcxproj
        ├── sampleNMT.vcxproj.filters
        ├── sampleNMT.vcxproj.user
        ├── trtUtil.cpp
        └── trtUtil.h
    ├── samplePlugin
        ├── fp16.h
        ├── samplePlugin.cpp
        ├── samplePlugin.vcxproj
        ├── samplePlugin.vcxproj.filters
        └── samplePlugin.vcxproj.user
    ├── sampleResNetv2
        ├── sampleResNetv2.vcxproj
        ├── sampleResNetv2.vcxproj.filters
        ├── sampleResNetv2.vcxproj.user
        └── sampleResNetv2cpp.cpp
    ├── sampleUffMNIST
        ├── sampleUffMNIST.cpp
        ├── sampleUffMNIST.vcxproj
        ├── sampleUffMNIST.vcxproj.filters
        └── sampleUffMNIST.vcxproj.user
    └── sampleUffSSD
        ├── BatchStreamPPM.h
        ├── README.txt
        ├── car-0.671518.ppm
        ├── config.py
        ├── dog-0.880681.ppm
        ├── dog-0.890010.ppm
        ├── person-0.549108.ppm
        ├── sampleUffSSD.cpp
        ├── sampleUffSSD.vcxproj
        ├── sampleUffSSD.vcxproj.filters
        ├── sampleUffSSD.vcxproj.user
        └── truck-0.782028.ppm


/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | output/*
3 | env*.txt
4 | .vs/*
5 | 3rdparty/*
6 | src/sampleSSD/*
7 | src/sampleUffCifar10/*
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Milittle
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TensorRT5.0 Test Integration
  2 | 
  3 | Project **TensorRT_test** is a TensorRT Library Example integrated based on Windows Visual Studio 2017, which make our machine learning can run fastly at inference stage.
  4 | 
  5 | >you can look more information about **TensorRT** in [TensorRT Dev Guide](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html) 
  6 | 
  7 | ## Not NVIDIA TensorRT official Sample(BY Myself)
  8 | 
  9 | * **sampleLoadEngineStream:** deserializing the engine stream by `engineStream.bin` locating in the `{SolutionDir}/data/mnist/` folder.
 10 | * **sampleResNetv2**: using the Resnetv2 pb file transform to uff file and executing the inference.
 11 | * **sampleDetection**: (Defect Detection Demo)Solving the TensorFlow BatchNormalization operator. TensorRT do not support the BN's Switch and Merge. I use pb graph and remove some nodes about Switching and Merging then merging related node to pb's graph, which  convert to uff file using for TensorRT uff parser parsing the model file.
 12 | I use ten defect images to inference the results. So the fllowing time performance is 10 images inferencing time.
 13 | 
 14 | **sampleDetection Time consume:**
 15 | 
 16 | |      tensorflow（python）- Titan-12G       |     tensorrt（c++）- Qudra 4G      | Conclusion |
 17 | | :----------------------------------------: | :--------------------------------: | :--------: |
 18 | |        pure run time（1344.3049ms）        |   pure execution time（44.5ms）    |  30 times  |
 19 | | load data and related Tensor nodes（3473ms） | load data and execute（171.373ms） |  20 times  |
 20 | |                GPU mem-2GB                 |                ---                 |            |
 21 | 
 22 | ## Table of Content
 23 | 
 24 | - [TensorRT5.0 Test Integration](#tensorrt50-test-integration)
 25 |   - [Not NVIDIA TensorRT official Sample(BY Myself)](#not-nvidia-tensorrt-official-sampleby-myself)
 26 |   - [Table of Content](#table-of-content)
 27 |   - [Prerequisites](#prerequisites)
 28 |   - [Getting the code](#getting-the-code)
 29 |   - [Project Structure](#project-structure)
 30 |   - [Run the Example using VS](#run-the-example-using-vs)
 31 |     - [sampleUffMNIST](#sampleuffmnist)
 32 |     - [sampleUffSSD](#sampleuffssd)
 33 |     - [sampleMNIST](#samplemnist)
 34 |     - [sampleMNISTAPI](#samplemnistapi)
 35 |     - [sampleSSD](#samplessd)
 36 |     - [samplePlugin](#sampleplugin)
 37 |     - [sampleCharRNN](#samplecharrnn)
 38 |     - [sampleFasterRCNN](#samplefasterrcnn)
 39 |     - [sampleGoogleNet](#samplegooglenet)
 40 |     - [sampleINT8](#sampleint8)
 41 |     - [sampleMLP](#samplemlp)
 42 |     - [sampleMovieLens](#samplemovielens)
 43 |     - [sampleNMT](#samplenmt)
 44 |   - [Contact Getting Help](#contact-getting-help)
 45 | 
 46 | ## Prerequisites
 47 | 
 48 | * CUDA 10.0  [DOWNLOAD LINK](https://developer.nvidia.com/cuda-downloads)
 49 | * cudnn 7.3 [DOWNLOAD LINK](https://developer.nvidia.com/cudnn)
 50 | * You need the Visual Stdio 2017
 51 | 
 52 | ## Getting the code
 53 | 
 54 | You can use the git tool to clone the Project, through:
 55 | 
 56 | ```shell
 57 | git clone git@github.com:Milittle/TensorRT_test.git
 58 | ```
 59 | 
 60 | ## Project Structure
 61 | 
 62 | The Following is my Integrated Project's Structure, and you can download **data** and **3rdparty** by:
 63 | 
 64 | **Google Driver** : [data and 3rdparty download link](https://drive.google.com/open?id=1mDKSmK5n2n7KnZhW5mQbUSJTSzZteN8c)
 65 | 
 66 | Once you download the data and 3rdparty, you can open the TenosrRT_test.sln file and exec the samples by Visual Studio 2017.
 67 | 
 68 | Good luck to you.
 69 | 
 70 | ```shell
 71 | TensorRT_test:
 72 | |	3rdparty
 73 | └---|	TensorRT-5.0.1.3
 74 | |	└-------------------
 75 | |	common
 76 | └---|	windows
 77 | |	|	argsParser.h
 78 | |	|	BatchStream.h
 79 | |	|	buffers.h
 80 | |	|	common.h
 81 | |	|	dumpTFWts.py
 82 | |	|	half.h
 83 | |	|	sampleConfig.h
 84 | |	└-------------------
 85 | |	data
 86 | └---|	char-rnn
 87 | |	|	example_gif
 88 | |	|	faster-rcnn
 89 | |	|	googlenet
 90 | |	|	mlp
 91 | |	|	mnist
 92 | |	|	movielens
 93 | |	|	nmt
 94 | |	|	ssd
 95 | |	└-------------------
 96 | |	src
 97 | └---|	sampleCharRNN
 98 | |	|	sampleFasterRCNN
 99 | |	|	sampleGoogleNet
100 | |	|	sampleINT8
101 | |	|	sampleMLP
102 | |	|	sampleMNIST
103 | |	|	sampleMNISTAPI
104 | |	|	sampleMovieLens
105 | |	|	sampleNMT
106 | |	|	samplePlugin
107 | |	|	sampleUffMNIST
108 | |	|	sampleUffSSD
109 | |	└--------------------
110 | |	.gitignore
111 | └------------------------
112 | |	README.md
113 | └------------------------
114 | |	TensorRT_test.sln
115 | └------------------------
116 | ```
117 | 
118 | ## Run the Example using VS
119 | 
120 | ### sampleUffMNIST
121 | 
122 | ![Demo](https://s1.ax1x.com/2018/10/28/ig9UTe.gif)
123 | 
124 | ### sampleUffSSD
125 | 
126 | This example load the model and build the engine taking a long time, you need more patience.
127 | 
128 | step1: Begin parsing model...
129 | 
130 | ​	    End parsing model...
131 | 
132 | step2: Begin building engine...
133 | 
134 | ​	    End building engine...
135 | 
136 | step3: Begin inference.
137 | 
138 | ![](https://s1.ax1x.com/2018/10/29/igNDaT.gif)
139 | 
140 | ### sampleMNIST
141 | 
142 | ![](https://s1.ax1x.com/2018/10/29/igNcRJ.gif)
143 | 
144 | ### sampleMNISTAPI
145 | 
146 | ![](https://s1.ax1x.com/2018/10/29/igNgz9.gif)
147 | 
148 | ### sampleSSD
149 | 
150 | This example has some error, I cannot through the model prototxt parser the model.
151 | 
152 | ![]()
153 | 
154 | ### samplePlugin
155 | 
156 | ![](https://s1.ax1x.com/2018/10/29/igNWs1.gif)
157 | 
158 | ### sampleCharRNN
159 | 
160 | ![](https://s1.ax1x.com/2018/10/29/igN5dK.gif)
161 | 
162 | ### sampleFasterRCNN
163 | 
164 | ![](https://s1.ax1x.com/2018/10/29/igN7Je.gif)
165 | 
166 | ### sampleGoogleNet
167 | 
168 | ![](https://s1.ax1x.com/2018/10/29/igNHRH.gif)
169 | 
170 | ### sampleINT8
171 | 
172 | **Note**: my computer isn't support the FP16 and INT8. so:
173 | 
174 | ![](https://s1.ax1x.com/2018/10/29/igNxdf.gif)
175 | 
176 | ### sampleMLP
177 | 
178 | ![](https://s1.ax1x.com/2018/10/29/igNzo8.gif)
179 | 
180 | ### sampleMovieLens
181 | 
182 | ![](https://s1.ax1x.com/2018/10/29/igUpFS.gif)
183 | 
184 | ### sampleNMT
185 | 
186 | ![](https://s1.ax1x.com/2018/10/29/igUNFO.gif)
187 | 
188 | ## Contact Getting Help
189 | 
190 | **Email:** mizeshuang@gmail.com
191 | 
192 | **QQ**: 329804334
193 | 
194 | **Author:**  Milittle
195 | 


--------------------------------------------------------------------------------
/common/BatchStream.h:
--------------------------------------------------------------------------------
  1 | #ifndef BATCH_STREAM_H
  2 | #define BATCH_STREAM_H
  3 | 
  4 | #include "NvInfer.h"
  5 | #include "common.h"
  6 | #include <algorithm>
  7 | #include <assert.h>
  8 | #include <stdio.h>
  9 | #include <vector>
 10 | 
 11 | class BatchStream
 12 | {
 13 | public:
 14 |     BatchStream(int batchSize, int maxBatches, std::string prefix, std::vector<std::string> directories)
 15 |         : mBatchSize(batchSize)
 16 |         , mMaxBatches(maxBatches)
 17 |         , mPrefix(prefix)
 18 |         , mDataDir(directories)
 19 |     {
 20 |         FILE* file = fopen(locateFile(mPrefix + std::string("0.batch"), mDataDir).c_str(), "rb");
 21 |         int d[4];
 22 |         size_t readSize = fread(d, sizeof(int), 4, file);
 23 |         assert(readSize == 4);
 24 |         mDims.nbDims = 4;  //The number of dimensions.
 25 |         mDims.d[0] = d[0]; //Batch Size
 26 |         mDims.d[1] = d[1]; //Channels
 27 |         mDims.d[2] = d[2]; //Height
 28 |         mDims.d[3] = d[3]; //Width
 29 | 
 30 |         fclose(file);
 31 |         mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
 32 |         mBatch.resize(mBatchSize * mImageSize, 0);
 33 |         mFileBatch.resize(mDims.d[0] * mImageSize, 0);
 34 |         reset(0);
 35 |     }
 36 | 
 37 |     // Resets data members
 38 |     void reset(int firstBatch)
 39 |     {
 40 |         mBatchCount = 0;
 41 |         mFileCount = 0;
 42 |         mFileBatchPos = mDims.d[0];
 43 |         skip(firstBatch);
 44 |     }
 45 | 
 46 |     // Advance to next batch and return true, or return false if there is no batch left.
 47 |     bool next()
 48 |     {
 49 |         if (mBatchCount == mMaxBatches)
 50 |             return false;
 51 | 
 52 |         for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize)
 53 |         {
 54 |             assert(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
 55 |             if (mFileBatchPos == mDims.d[0] && !update())
 56 |                 return false;
 57 | 
 58 |             // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer.
 59 |             csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
 60 |             std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize);
 61 |         }
 62 |         mBatchCount++;
 63 |         return true;
 64 |     }
 65 | 
 66 |     // Skips the batches
 67 |     void skip(int skipCount)
 68 |     {
 69 |         if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0])
 70 |         {
 71 |             mFileCount += skipCount * mBatchSize / mDims.d[0];
 72 |             return;
 73 |         }
 74 | 
 75 |         int x = mBatchCount;
 76 |         for (int i = 0; i < skipCount; i++)
 77 |             next();
 78 |         mBatchCount = x;
 79 |     }
 80 | 
 81 |     float* getBatch() { return &mBatch[0]; }
 82 |     int getBatchesRead() const { return mBatchCount; }
 83 |     int getBatchSize() const { return mBatchSize; }
 84 |     int getImageSize() const { return mImageSize; }
 85 |     nvinfer1::Dims getDims() const { return mDims; }
 86 | 
 87 | private:
 88 | 
 89 |     float* getFileBatch() { return &mFileBatch[0]; }
 90 | 
 91 |     bool update()
 92 |     {
 93 |         std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + std::string(".batch"), mDataDir);
 94 |         FILE* file = fopen(inputFileName.c_str(), "rb");
 95 |         if (!file)
 96 |             return false;
 97 | 
 98 |         int d[4];
 99 |         size_t readSize = fread(d, sizeof(int), 4, file);
100 |         assert(readSize == 4);
101 |         assert(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]);
102 |         size_t readInputCount = fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
103 |         assert(readInputCount == size_t(mDims.d[0] * mImageSize));
104 | 
105 |         fclose(file);
106 |         mFileBatchPos = 0;
107 |         return true;
108 |     }
109 | 
110 |     int mBatchSize{0};
111 |     int mMaxBatches{0};
112 |     int mBatchCount{0};
113 |     int mFileCount{0};
114 |     int mFileBatchPos{0};
115 |     int mImageSize{0};
116 |     nvinfer1::Dims mDims;
117 |     std::vector<float> mBatch;
118 |     std::vector<float> mFileBatch;
119 |     std::string mPrefix;
120 |     std::vector<std::string> mDataDir;
121 | };
122 | #endif
123 | 


--------------------------------------------------------------------------------
/common/argsParser.h:
--------------------------------------------------------------------------------
  1 | #ifndef TENSORRT_ARGS_PARSER_H
  2 | #define TENSORRT_ARGS_PARSER_H
  3 | 
  4 | #include <vector>
  5 | #include <string>
  6 | #ifdef _MSC_VER
  7 |     #include "getopt.h"
  8 | #else
  9 |     #include <getopt.h>
 10 | #endif
 11 | #include <iostream>
 12 | 
 13 | namespace samplesCommon
 14 | {
 15 | 
 16 | //!
 17 | //! \brief The SampleParams structure groups the basic parameters required by
 18 | //!        all sample networks.
 19 | //!
 20 | struct SampleParams
 21 | {
 22 |     int batchSize;                     //!< Number of inputs in a batch
 23 |     int dlaCore{-1};
 24 |     std::vector<std::string> dataDirs; //!< Directory paths where sample data files are stored
 25 |     std::vector<std::string> inputTensorNames;
 26 |     std::vector<std::string> outputTensorNames;
 27 | };
 28 | 
 29 | //!
 30 | //! \brief The CaffeSampleParams structure groups the additional parameters required by
 31 | //!         networks that use caffe
 32 | //!
 33 | struct CaffeSampleParams : public SampleParams
 34 | {
 35 |     std::string prototxtFileName; //!< Filename of prototxt design file of a network
 36 |     std::string weightsFileName;  //!< Filename of trained weights file of a network
 37 | };
 38 | 
 39 | //!
 40 | //! /brief Struct to maintain command-line arguments.
 41 | //!
 42 | struct Args
 43 | {
 44 |     bool runInInt8{false};
 45 |     bool help{false};
 46 |     int useDLACore{-1};
 47 |     std::vector<std::string> dataDirs;
 48 | };
 49 | 
 50 | //!
 51 | //! \brief Populates the Args struct with the provided command-line parameters.
 52 | //!
 53 | //! \throw invalid_argument if any of the arguments are not valid
 54 | //!
 55 | //! \return boolean If return value is true, execution can continue, otherwise program should exit
 56 | //!
 57 | inline bool parseArgs(Args& args, int argc, char* argv[])
 58 | {
 59 |     while (1)
 60 |     {
 61 |         int arg;
 62 |         static struct option long_options[] = {
 63 |             {"help", no_argument, 0, 'h'},
 64 |             {"datadir", required_argument, 0, 'd'},
 65 |             {"int8", no_argument, 0, 'i'},
 66 |             {"useDLACore", required_argument, 0, 'u'},
 67 |             {nullptr, 0, nullptr, 0}};
 68 |         int option_index = 0;
 69 |         arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
 70 |         if (arg == -1)
 71 |             break;
 72 | 
 73 |         switch (arg)
 74 |         {
 75 |         case 'h':
 76 |             args.help = true;
 77 |             return false;
 78 |         case 'd':
 79 |             if (optarg)
 80 |                 args.dataDirs.push_back(optarg);
 81 |             else
 82 |             {
 83 |                 std::cerr << "ERROR: --datadir requires option argument" << std::endl;
 84 |                 return false;
 85 |             }
 86 |             break;
 87 |         case 'i':
 88 |             args.runInInt8 = true;
 89 |             break;
 90 |         case 'u':
 91 |             if (optarg)
 92 |                 args.useDLACore = std::stoi(optarg);
 93 |             break;
 94 |         default:
 95 |             return false;
 96 |         }
 97 |     }
 98 |     return true;
 99 | }
100 | 
101 | } // namespace samplesCommon
102 | 
103 | #endif // TENSORRT_ARGS_PARSER_H
104 | 


--------------------------------------------------------------------------------
/common/dumpTFWts.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Script to dump TensorFlow weights in TRT v1 and v2 dump format.
  4 | # The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later.
  5 | 
  6 | import sys
  7 | import struct
  8 | import argparse
  9 | try:
 10 |     import tensorflow as tf
 11 |     from tensorflow.python import pywrap_tensorflow
 12 | except ImportError as err:
 13 |     sys.stderr.write("""Error: Failed to import module ({})""".format(err))
 14 |     sys.exit()
 15 | 
 16 | parser = argparse.ArgumentParser(description='TensorFlow Weight Dumper')
 17 | 
 18 | parser.add_argument('-m', '--model', required=True, help='The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908')
 19 | parser.add_argument('-o', '--output', required=True, help='The weight file to dump all the weights to.')
 20 | parser.add_argument('-1', '--wtsv1', required=False, default=False, type=bool, help='Dump the weights in the wts v1.')
 21 | 
 22 | opt = parser.parse_args()
 23 | 
 24 | if opt.wtsv1:
 25 |     print "Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:"
 26 |     print "Line 0: <number of buffers in the file>"
 27 |     print "Line 1-Num: [buffer name] [buffer type] [buffer size] <hex values>"
 28 | else:
 29 |     print "Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:"
 30 |     print "Line 0: <number of buffers in the file>"
 31 |     print "Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] <buffer shaped size bytes of data>"
 32 | 
 33 | inputbase = opt.model
 34 | outputbase = opt.output
 35 | 
 36 | def float_to_hex(f):
 37 |     return hex(struct.unpack('<I', struct.pack('<f', f))[0])
 38 | 
 39 | def getTRTType(tensor):
 40 |     if tf.as_dtype(tensor.dtype) == tf.float32:
 41 |         return 0
 42 |     if tf.as_dtype(tensor.dtype) == tf.float16:
 43 |         return 1
 44 |     if tf.as_dtype(tensor.dtype) == tf.int8:
 45 |         return 2
 46 |     if tf.as_dtype(tensor.dtype) == tf.int32:
 47 |         return 3
 48 |     print("Tensor data type of %s is not supported in TensorRT"%(tensor.dtype))
 49 |     sys.exit();
 50 | 
 51 | try:
 52 |    # Open output file
 53 |     if opt.wtsv1:
 54 |         outputFileName = outputbase + ".wts"
 55 |     else:
 56 |         outputFileName = outputbase + ".wts2"
 57 |     outputFile = open(outputFileName, 'w')
 58 | 
 59 |     # read vars from checkpoint
 60 |     reader = pywrap_tensorflow.NewCheckpointReader(inputbase)
 61 |     var_to_shape_map = reader.get_variable_to_shape_map()
 62 | 
 63 |     # Record count of weights
 64 |     count = 0
 65 |     for key in sorted(var_to_shape_map):
 66 |         count += 1
 67 |     outputFile.write("%s\n"%(count))
 68 | 
 69 |     # Dump the weights in either v1 or v2 format
 70 |     for key in sorted(var_to_shape_map):
 71 |         tensor = reader.get_tensor(key)
 72 |         file_key = key.replace('/','_')
 73 |         typeOfElem = getTRTType(tensor)
 74 |         val = tensor.shape
 75 |         if opt.wtsv1:
 76 |             val = tensor.size
 77 |         print("%s %s %s "%(file_key, typeOfElem, val))
 78 |         flat_tensor = tensor.flatten()
 79 |         outputFile.write("%s 0 %s "%(file_key, val))
 80 |         if opt.wtsv1:
 81 |             for weight in flat_tensor:
 82 |                 hexval = float_to_hex(float(weight))
 83 |                 outputFile.write("%s "%(hexval[2:]))
 84 |         else:
 85 |             outputFile.write(flat_tensor.tobytes())
 86 |         outputFile.write("\n");
 87 |     outputFile.close()
 88 | 
 89 | except Exception as e:  # pylint: disable=broad-except
 90 |     print(str(e))
 91 |     if "corrupted compressed block contents" in str(e):
 92 |         print("It's likely that your checkpoint file has been compressed "
 93 |                 "with SNAPPY.")
 94 |         if ("Data loss" in str(e) and
 95 |                 (any([e in inputbase for e in [".index", ".meta", ".data"]]))):
 96 |             proposed_file = ".".join(inputbase.split(".")[0:-1])
 97 |             v2_file_error_template = """
 98 |            It's likely that this is a V2 checkpoint and you need to provide the filename
 99 |            *prefix*.  Try removing the '.' and extension.  Try:
100 |            inspect checkpoint --file_name = {}"""
101 |             print(v2_file_error_template.format(proposed_file))
102 | 


--------------------------------------------------------------------------------
/common/windows/getopt.h:
--------------------------------------------------------------------------------
  1 | #ifndef __GETOPT_H__
  2 | /**
  3 |  * DISCLAIMER
  4 |  * This file has no copyright assigned and is placed in the Public Domain.
  5 |  * This file is a part of the w64 mingw-runtime package.
  6 |  *
  7 |  * The w64 mingw-runtime package and its code is distributed in the hope that it 
  8 |  * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR 
  9 |  * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to 
 10 |  * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 11 |  */
 12 | 
 13 | #define __GETOPT_H__
 14 | 
 15 | /* All the headers include this file. */
 16 | #include <crtdefs.h>
 17 | 
 18 | #if defined( WINGETOPT_SHARED_LIB )
 19 | # if defined( BUILDING_WINGETOPT_DLL )
 20 | #  define WINGETOPT_API __declspec(dllexport)
 21 | # else
 22 | #  define WINGETOPT_API __declspec(dllimport)
 23 | # endif
 24 | #else
 25 | # define WINGETOPT_API
 26 | #endif
 27 | 
 28 | #ifdef __cplusplus
 29 | extern "C" {
 30 | #endif
 31 | 
 32 | WINGETOPT_API extern int optind;		/* index of first non-option in argv      */
 33 | WINGETOPT_API extern int optopt;		/* single option character, as parsed     */
 34 | WINGETOPT_API extern int opterr;		/* flag to enable built-in diagnostics... */
 35 | 				/* (user may set to zero, to suppress)    */
 36 | 
 37 | WINGETOPT_API extern char *optarg;		/* pointer to argument of current option  */
 38 | 
 39 | extern int getopt(int nargc, char * const *nargv, const char *options);
 40 | 
 41 | #ifdef _BSD_SOURCE
 42 | /*
 43 |  * BSD adds the non-standard `optreset' feature, for reinitialisation
 44 |  * of `getopt' parsing.  We support this feature, for applications which
 45 |  * proclaim their BSD heritage, before including this header; however,
 46 |  * to maintain portability, developers are advised to avoid it.
 47 |  */
 48 | # define optreset  __mingw_optreset
 49 | extern int optreset;
 50 | #endif
 51 | #ifdef __cplusplus
 52 | }
 53 | #endif
 54 | /*
 55 |  * POSIX requires the `getopt' API to be specified in `unistd.h';
 56 |  * thus, `unistd.h' includes this header.  However, we do not want
 57 |  * to expose the `getopt_long' or `getopt_long_only' APIs, when
 58 |  * included in this manner.  Thus, close the standard __GETOPT_H__
 59 |  * declarations block, and open an additional __GETOPT_LONG_H__
 60 |  * specific block, only when *not* __UNISTD_H_SOURCED__, in which
 61 |  * to declare the extended API.
 62 |  */
 63 | #endif /* !defined(__GETOPT_H__) */
 64 | 
 65 | #if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
 66 | #define __GETOPT_LONG_H__
 67 | 
 68 | #ifdef __cplusplus
 69 | extern "C" {
 70 | #endif
 71 | 
 72 | struct option		/* specification for a long form option...	*/
 73 | {
 74 |   const char *name;		/* option name, without leading hyphens */
 75 |   int         has_arg;		/* does it take an argument?		*/
 76 |   int        *flag;		/* where to save its status, or NULL	*/
 77 |   int         val;		/* its associated status value		*/
 78 | };
 79 | 
 80 | enum    		/* permitted values for its `has_arg' field...	*/
 81 | {
 82 |   no_argument = 0,      	/* option never takes an argument	*/
 83 |   required_argument,		/* option always requires an argument	*/
 84 |   optional_argument		/* option may take an argument		*/
 85 | };
 86 | 
 87 | extern int getopt_long(int nargc, char * const *nargv, const char *options,
 88 |     const struct option *long_options, int *idx);
 89 | extern int getopt_long_only(int nargc, char * const *nargv, const char *options,
 90 |     const struct option *long_options, int *idx);
 91 | /*
 92 |  * Previous MinGW implementation had...
 93 |  */
 94 | #ifndef HAVE_DECL_GETOPT
 95 | /*
 96 |  * ...for the long form API only; keep this for compatibility.
 97 |  */
 98 | # define HAVE_DECL_GETOPT	1
 99 | #endif
100 | 
101 | #ifdef __cplusplus
102 | }
103 | #endif
104 | 
105 | #endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
106 | 


--------------------------------------------------------------------------------
/src/sampleCharRNN/sampleCharRNN.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleCharRNN.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/src/sampleCharRNN/sampleCharRNN.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleDetection/sampleDetection.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleDetection.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/src/sampleDetection/sampleDetection.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/README.md:
--------------------------------------------------------------------------------
 1 | The FasterRCNN sample uses the dataset from here:
 2 | https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz
 3 | 
 4 | The dataset needs to be placed into the data/faster-rcnn directory.
 5 | 
 6 | The commands to do this on linux are as follows:
 7 | 
 8 | cd <TensorRT directory>
 9 | wget --no-check-certificate https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0 -O data/faster-rcnn/faster-rcnn.tgz
10 | tar zxvf data/faster-rcnn/faster-rcnn.tgz -C data/faster-rcnn --strip-components=1 --exclude=ZF_*
11 | 
12 | 翻译：
13 | 
14 | FasterRCNN 示例使用的数据模型在如下链接可以下载到：
15 | 
16 | https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz
17 | 
18 | 下载好的数据模型，直接解压以后放在data/faster-rcnn文件夹下面。


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/car-0.990063.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/car-0.990063.ppm


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/dog-0.997705.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/dog-0.997705.ppm


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/dog-0.999603.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/dog-0.999603.ppm


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/factoryFasterRCNN.h:
--------------------------------------------------------------------------------
 1 | #include "NvCaffeParser.h"
 2 | #include "NvInferPlugin.h"
 3 | 
 4 | using namespace nvinfer1;
 5 | using namespace nvcaffeparser1;
 6 | using namespace plugin;
 7 | 
 8 | const int poolingH = 7;
 9 | const int poolingW = 7;
10 | const int featureStride = 16;
11 | const int preNmsTop = 6000;
12 | const int nmsMaxOut = 300;
13 | const int anchorsRatioCount = 3;
14 | const int anchorsScaleCount = 3;
15 | const float iouThreshold = 0.7f;
16 | const float minBoxSize = 16;
17 | const float spatialScale = 0.0625f;
18 | const float anchorsRatios[anchorsRatioCount] = { 0.5f, 1.0f, 2.0f };
19 | const float anchorsScales[anchorsScaleCount] = { 8.0f, 16.0f, 32.0f };
20 | 
21 | class FRCNNPluginFactory : public nvcaffeparser1::IPluginFactoryV2
22 | {
23 | public:
24 | 	virtual nvinfer1::IPluginV2* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights, const char* libNamespace) override
25 | 	{
26 | 		assert(isPluginV2(layerName));
27 | 		if (!strcmp(layerName, "RPROIFused"))
28 | 		{
29 |             assert(mPluginRPROI == nullptr);
30 | 			assert(nbWeights == 0 && weights == nullptr);
31 |             mPluginRPROI = std::unique_ptr<IPluginV2, decltype(pluginDeleter)>
32 |             (createRPNROIPlugin(featureStride, preNmsTop, nmsMaxOut, iouThreshold, minBoxSize, spatialScale,
33 | 					DimsHW(poolingH, poolingW), Weights{ nvinfer1::DataType::kFLOAT, anchorsRatios, anchorsRatioCount },
34 | 					Weights{ nvinfer1::DataType::kFLOAT, anchorsScales, anchorsScaleCount }), pluginDeleter);
35 |             mPluginRPROI.get()->setPluginNamespace(libNamespace);
36 |             return mPluginRPROI.get();
37 | 		}
38 | 		else
39 | 		{
40 | 			assert(0);
41 | 			return nullptr;
42 | 		}
43 | 	}
44 | 	
45 |     // caffe parser plugin implementation
46 |     bool isPluginV2(const char* name) override { return !strcmp(name, "RPROIFused"); }
47 |     
48 |     void destroyPlugin()
49 |     {
50 |         mPluginRPROI.reset();
51 |     }
52 | 
53 |     void (*pluginDeleter)(IPluginV2*) {[](IPluginV2* ptr) {ptr->destroy();}};
54 |     std::unique_ptr<IPluginV2, decltype(pluginDeleter)> mPluginRPROI{nullptr, pluginDeleter};
55 | 
56 | };
57 | 


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/horse-0.994670.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/horse-0.994670.ppm


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/person-0.974725.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleFasterRCNN/person-0.974725.ppm


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/sampleFasterRCNN.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClInclude Include="factoryFasterRCNN.h">
19 |       <Filter>头文件</Filter>
20 |     </ClInclude>
21 |   </ItemGroup>
22 |   <ItemGroup>
23 |     <None Include="README.md" />
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <ClCompile Include="sampleFasterRCNN.cpp">
27 |       <Filter>源文件</Filter>
28 |     </ClCompile>
29 |   </ItemGroup>
30 | </Project>


--------------------------------------------------------------------------------
/src/sampleFasterRCNN/sampleFasterRCNN.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleGoogleNet/README.md:
--------------------------------------------------------------------------------
 1 | # NVIDIA TensorRT Sample "sampleGoogleNet"
 2 | 
 3 | The sampleGoogleNet sample demonstrates how to:
 4 | - Build a TensorRT engine from the saved Caffe model
 5 | - Set input values to engine, run engine and obtain output
 6 | 
 7 | ## Usage
 8 | 
 9 | This sample can be run as:
10 | 
11 |     ./sample_googlenet [-h] [--datadir=/path/to/data/dir/] [--useDLACore=N]
12 | 
13 | SampleGoogleNet reads two caffe files to build the network:
14 | 
15 | * `googlenet.prototxt` - The prototxt file that contains the network design
16 | * `googlenet.caffemodel` - The model file which contains the trained weights
17 |   for the network
18 | 
19 | By default, the sample expects these files to be in `data/samples/googlenet/` or
20 | `data/googlenet/`. The default directory can be changed by supplying the path as
21 | `--datadir=/new/path/` as a command line argument.
22 | 


--------------------------------------------------------------------------------
/src/sampleGoogleNet/sampleGoogleNet.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleGoogleNet.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="..\..\common\windows\getopt.c">
22 |       <Filter>源文件</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 | </Project>


--------------------------------------------------------------------------------
/src/sampleGoogleNet/sampleGoogleNet.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleINT8/BatchStream.h:
--------------------------------------------------------------------------------
  1 | #ifndef BATCH_STREAM_H
  2 | #define BATCH_STREAM_H
  3 | 
  4 | #include <vector>
  5 | #include <assert.h>
  6 | #include <algorithm>
  7 | #include "NvInfer.h"
  8 | 
  9 | std::string locateFile(const std::string& input);
 10 | 
 11 | class BatchStream
 12 | {
 13 | public:
 14 | 	BatchStream(int batchSize, int maxBatches) : mBatchSize(batchSize), mMaxBatches(maxBatches)
 15 | 	{
 16 | 		FILE* file = fopen(locateFile(std::string("batches/batch0")).c_str(), "rb");
 17 | 		int d[4];
 18 | 		fread(d, sizeof(int), 4, file);
 19 | 		mDims = nvinfer1::DimsNCHW{ d[0], d[1], d[2], d[3] };
 20 | 		fclose(file);
 21 | 		mImageSize = mDims.c()*mDims.h()*mDims.w();
 22 | 		mBatch.resize(mBatchSize*mImageSize, 0);
 23 | 		mLabels.resize(mBatchSize, 0);
 24 | 		mFileBatch.resize(mDims.n()*mImageSize, 0);
 25 | 		mFileLabels.resize(mDims.n(), 0);
 26 | 		reset(0);
 27 | 	}
 28 | 
 29 | 	void reset(int firstBatch)
 30 | 	{
 31 | 		mBatchCount = 0;
 32 | 		mFileCount = 0;
 33 | 		mFileBatchPos = mDims.n();
 34 | 		skip(firstBatch);
 35 | 	}
 36 | 
 37 | 	bool next()
 38 | 	{
 39 | 		if (mBatchCount == mMaxBatches)
 40 | 			return false;
 41 | 
 42 | 		for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize)
 43 | 		{
 44 | 			assert(mFileBatchPos > 0 && mFileBatchPos <= mDims.n());
 45 | 			if (mFileBatchPos == mDims.n() && !update())
 46 | 				return false;
 47 | 
 48 | 			// copy the smaller of: elements left to fulfill the request, or elements left in the file buffer.
 49 | 			csize = std::min(mBatchSize - batchPos, mDims.n() - mFileBatchPos);
 50 | 			std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize);
 51 | 			std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos);
 52 | 		}
 53 | 		mBatchCount++;
 54 | 		return true;
 55 | 	}
 56 | 
 57 | 	void skip(int skipCount)
 58 | 	{
 59 | 		if (mBatchSize >= mDims.n() && mBatchSize%mDims.n() == 0 && mFileBatchPos == mDims.n())
 60 | 		{
 61 | 			mFileCount += skipCount * mBatchSize / mDims.n();
 62 | 			return;
 63 | 		}
 64 | 
 65 | 		int x = mBatchCount;
 66 | 		for (int i = 0; i < skipCount; i++)
 67 | 			next();
 68 | 		mBatchCount = x;
 69 | 	}
 70 | 
 71 | 	float *getBatch() { return &mBatch[0]; }
 72 | 	float *getLabels() { return &mLabels[0]; }
 73 | 	int getBatchesRead() const { return mBatchCount; }
 74 | 	int getBatchSize() const { return mBatchSize; }
 75 | 	nvinfer1::DimsNCHW getDims() const { return mDims; }
 76 | private:
 77 | 	float* getFileBatch() { return &mFileBatch[0]; }
 78 | 	float* getFileLabels() { return &mFileLabels[0]; }
 79 | 
 80 | 	bool update()
 81 | 	{
 82 | 		std::string inputFileName = locateFile(std::string("batches/batch") + std::to_string(mFileCount++));
 83 | 		FILE * file = fopen(inputFileName.c_str(), "rb");
 84 | 		if (!file)
 85 | 			return false;
 86 | 
 87 | 		int d[4];
 88 | 		fread(d, sizeof(int), 4, file);
 89 | 		assert(mDims.n() == d[0] && mDims.c() == d[1] && mDims.h() == d[2] && mDims.w() == d[3]);
 90 | 
 91 | 		size_t readInputCount = fread(getFileBatch(), sizeof(float), mDims.n()*mImageSize, file);
 92 | 		size_t readLabelCount = fread(getFileLabels(), sizeof(float), mDims.n(), file);;
 93 | 		assert(readInputCount == size_t(mDims.n()*mImageSize) && readLabelCount == size_t(mDims.n()));
 94 | 
 95 | 		fclose(file);
 96 | 		mFileBatchPos = 0;
 97 | 		return true;
 98 | 	}
 99 | 
100 | 	int mBatchSize{ 0 };
101 | 	int mMaxBatches{ 0 };
102 | 	int mBatchCount{ 0 };
103 | 
104 | 	int mFileCount{ 0 }, mFileBatchPos{ 0 };
105 | 	int mImageSize{ 0 };
106 | 
107 | 	nvinfer1::DimsNCHW mDims;
108 | 	std::vector<float> mBatch;
109 | 	std::vector<float> mLabels;
110 | 	std::vector<float> mFileBatch;
111 | 	std::vector<float> mFileLabels;
112 | };
113 | 
114 | 
115 | #endif
116 | 


--------------------------------------------------------------------------------
/src/sampleINT8/CalibrationTablemnist:
--------------------------------------------------------------------------------
 1 | 1
 2 | data: 3c000889
 3 | pool2: 3d9ccc94
 4 | ip1: 3daeff07
 5 | prob: 3c010a14
 6 | conv2: 3dd33169
 7 | pool1: 3c88e7e3
 8 | ip2: 3e7d50ec
 9 | conv1: 3c8954be
10 | 


--------------------------------------------------------------------------------
/src/sampleINT8/LegacyCalibrator.h:
--------------------------------------------------------------------------------
  1 | #ifndef LEGACY_CALIBRATOR_H
  2 | #define LEGACY_CALIBRATOR_H
  3 | 
  4 | #include <iostream>
  5 | #include "NvInfer.h"
  6 | #include "BatchStream.h"
  7 | #include "cuda_runtime_api.h"
  8 | #include <fstream>
  9 | #include <iterator>
 10 | 
 11 | #include "common.h"
 12 | 
 13 | class Int8LegacyCalibrator : public nvinfer1::IInt8LegacyCalibrator
 14 | {
 15 | public:
 16 | 	Int8LegacyCalibrator(BatchStream& stream, int firstBatch, double cutoff, double quantile, bool readCache = true)
 17 | 		: mStream(stream), mFirstBatch(firstBatch), mReadCache(readCache)
 18 | 	{
 19 | 		using namespace nvinfer1;
 20 | 		DimsNCHW dims = mStream.getDims();
 21 | 		mInputCount = mStream.getBatchSize() * dims.c() * dims.h() * dims.w();
 22 | 		CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
 23 | 		reset(cutoff, quantile);
 24 | 	}
 25 | 
 26 | 	virtual ~Int8LegacyCalibrator()
 27 | 	{
 28 | 		CHECK(cudaFree(mDeviceInput));
 29 | 	}
 30 | 
 31 | 	int getBatchSize() const override { return mStream.getBatchSize(); }
 32 | 	double getQuantile() const override { return mQuantile; }
 33 | 	double getRegressionCutoff() const override { return mCutoff; }
 34 | 
 35 | 	bool getBatch(void* bindings[], const char* names[], int nbBindings) override
 36 | 	{
 37 | 		if (!mStream.next())
 38 | 			return false;
 39 | 
 40 | 		CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
 41 | 		bindings[0] = mDeviceInput;
 42 | 		return true;
 43 | 	}
 44 | 
 45 | 	const void* readCalibrationCache(size_t& length) override
 46 | 	{
 47 | 		mCalibrationCache.clear();
 48 | 		std::ifstream input(locateFile("CalibrationTable"), std::ios::binary);
 49 | 		input >> std::noskipws;
 50 | 		if (mReadCache && input.good())
 51 | 			std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
 52 | 
 53 | 		length = mCalibrationCache.size();
 54 | 		return length ? &mCalibrationCache[0] : nullptr;
 55 | 	}
 56 | 
 57 | 	void writeCalibrationCache(const void* cache, size_t length) override
 58 | 	{
 59 | 		std::ofstream output(locateFile("CalibrationTable"), std::ios::binary);
 60 | 		output.write(reinterpret_cast<const char*>(cache), length);
 61 | 	}
 62 | 
 63 | 	const void* readHistogramCache(size_t& length) override
 64 | 	{
 65 | 		length = mHistogramCache.size();
 66 | 		return length ? &mHistogramCache[0] : nullptr;
 67 | 	}
 68 | 
 69 | 	void writeHistogramCache(const void* cache, size_t length) override
 70 | 	{
 71 | 		mHistogramCache.clear();
 72 | 		std::copy_n(reinterpret_cast<const char*>(cache), length, std::back_inserter(mHistogramCache));
 73 | 	}
 74 | 
 75 | 	void reset(double cutoff, double quantile)
 76 | 	{
 77 | 		mCutoff = cutoff;
 78 | 		mQuantile = quantile;
 79 | 		mStream.reset(mFirstBatch);
 80 | 	}
 81 | 
 82 | private:
 83 | 	BatchStream mStream;
 84 | 	int mFirstBatch;
 85 | 	double mCutoff, mQuantile;
 86 | 	bool mReadCache{ true };
 87 | 
 88 | 	size_t mInputCount;
 89 | 	void* mDeviceInput{ nullptr };
 90 | 	std::vector<char> mCalibrationCache, mHistogramCache;
 91 | };
 92 | 
 93 | struct CalibrationParameters
 94 | {
 95 | 	const char* networkName;
 96 | 	double cutoff;
 97 | 	double quantileIndex;
 98 | };
 99 | 
100 | CalibrationParameters gCalibrationTable[] =
101 | {
102 | 	{ "alexnet", 0.6, 7.0 },
103 | 	{ "vgg19", 0.5, 5 },
104 | 	{ "googlenet", 1, 8.0 },
105 | 	{ "resnet-50", 0.61, 2.0 },
106 | 	{ "resnet-101", 0.51, 2.5 },
107 | 	{ "resnet-152", 0.4, 5.0 }
108 | };
109 | 
110 | static const int gCalibrationTableSize = sizeof(gCalibrationTable) / sizeof(CalibrationParameters);
111 | 
112 | double quantileFromIndex(double quantileIndex)
113 | {
114 | 	return 1 - pow(10, -quantileIndex);
115 | }
116 | 
117 | static const int CAL_BATCH_SIZE = 50;
118 | static const int FIRST_CAL_BATCH = 0, NB_CAL_BATCHES = 10;					// calibrate over images 0-500
119 | static const int FIRST_CAL_SCORE_BATCH = 100, NB_CAL_SCORE_BATCHES = 100;	// score over images 5000-10000
120 | 
121 | 
122 | void searchCalibrations(double firstCutoff, double cutoffIncrement, int nbCutoffs,
123 | 	double firstQuantileIndex, double quantileIndexIncrement, int nbQuantiles,
124 | 	float& bestScore, double& bestCutoff, double& bestQuantileIndex, Int8LegacyCalibrator& calibrator)
125 | {
126 | 	std::pair<float, float> scoreModel(int batchSize, int firstBatch, int nbScoreBatches, nvinfer1::DataType type, nvinfer1::IInt8Calibrator* calibrator, bool quiet);
127 | 
128 | 	for (int i = 0; i < nbCutoffs; i++)
129 | 	{
130 | 		for (int j = 0; j < nbQuantiles; j++)
131 | 		{
132 | 			double cutoff = firstCutoff + double(i) * cutoffIncrement, quantileIndex = firstQuantileIndex + double(j) * quantileIndexIncrement;
133 | 			calibrator.reset(cutoff, quantileFromIndex(quantileIndex));
134 | 			float score = scoreModel(CAL_BATCH_SIZE, FIRST_CAL_SCORE_BATCH, NB_CAL_SCORE_BATCHES, nvinfer1::DataType::kINT8, &calibrator, true).first;		// score the model in quiet mode
135 | 
136 | 			std::cout << "Score: " << score << " (cutoff = " << cutoff << ", quantileIndex = " << quantileIndex << ")" << std::endl;
137 | 			if (score > bestScore)
138 | 				bestScore = score, bestCutoff = cutoff, bestQuantileIndex = quantileIndex;
139 | 		}
140 | 	}
141 | }
142 | 
143 | void searchCalibrations(double& bestCutoff, double&bestQuantileIndex)
144 | {
145 | 	float bestScore = std::numeric_limits<float>::lowest();
146 | 	bestCutoff = 0;
147 | 	bestQuantileIndex = 0;
148 | 
149 | 	std::cout << "searching calibrations" << std::endl;
150 | 	BatchStream calibrationStream(CAL_BATCH_SIZE, NB_CAL_BATCHES);
151 | 	Int8LegacyCalibrator calibrator(calibrationStream, 0, quantileFromIndex(0), false);					// force calibration by ignoring region cache
152 | 
153 | 	searchCalibrations(1, 0, 1, 2, 1, 7, bestScore, bestCutoff, bestQuantileIndex, calibrator);			// search the space with cutoff = 1 (i.e. max'ing over the histogram)
154 | 	searchCalibrations(0.4, 0.05, 7, 2, 1, 7, bestScore, bestCutoff, bestQuantileIndex, calibrator);	// search the space with cutoff = 0.4 to 0.7 (inclusive)
155 | 
156 | 	// narrow in: if our best score is at cutoff 1 then search over quantiles, else over both dimensions
157 | 	if (bestScore == 1)
158 | 		searchCalibrations(1, 0, 1, bestQuantileIndex - 0.5, 0.1, 11, bestScore, bestCutoff, bestQuantileIndex, calibrator);
159 | 	else
160 | 		searchCalibrations(bestCutoff - 0.04, 0.01, 9, bestQuantileIndex - 0.5, 0.1, 11, bestScore, bestCutoff, bestQuantileIndex, calibrator);
161 | 	std::cout << "\n\nBest score: " << bestScore << " (cutoff = " << bestCutoff << ", quantileIndex = " << bestQuantileIndex << ")" << std::endl;
162 | }
163 | 
164 | std::pair<double, double> getQuantileAndCutoff(const char* networkName, bool search)
165 | {
166 | 	double cutoff = 1, quantileIndex = 6;
167 | 	if (search)
168 | 		searchCalibrations(cutoff, quantileIndex);
169 | 	else
170 | 	{
171 | 		for (int i = 0; i < gCalibrationTableSize; i++)
172 | 		{
173 | 			if (!strcmp(gCalibrationTable[i].networkName, networkName))
174 | 				cutoff = gCalibrationTable[i].cutoff, quantileIndex = gCalibrationTable[i].quantileIndex;
175 | 		}
176 | 		std::cout << " using preset cutoff " << cutoff << " and quantile index " << quantileIndex << std::endl;
177 | 	}
178 | 	return std::make_pair(cutoff, quantileFromIndex(quantileIndex));
179 | }
180 | 
181 | 
182 | 
183 | #endif
184 | 


--------------------------------------------------------------------------------
/src/sampleINT8/int8_caffe.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
 2 | index 66e6301..da615e5 100644
 3 | --- a/src/caffe/layers/data_layer.cpp
 4 | +++ b/src/caffe/layers/data_layer.cpp
 5 | @@ -99,6 +99,26 @@ void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
 6 |    }
 7 |    timer.Stop();
 8 |    batch_timer.Stop();
 9 | +#define LOG_BATCHES_FOR_INT8_TESTING 1
10 | +#if LOG_BATCHES_FOR_INT8_TESTING
11 | +  static int sBatchId = 0;
12 | +  char* batch_dump_dir = getenv("TENSORRT_INT8_BATCH_DIRECTORY");
13 | +  if(batch_dump_dir != 0)
14 | +  {
15 | +    char buffer[1000];
16 | +    sprintf(buffer, "batches/batch%d", sBatchId++);
17 | +    FILE* file = fopen(buffer, "w");    
18 | +    if(file==0)
19 | +      abort();
20 | +
21 | +    int s[4] = { top_shape[0], top_shape[1], top_shape[2], top_shape[3] };
22 | +    fwrite(s, sizeof(int), 4, file);
23 | +    fwrite(top_data, sizeof(float), top_shape[0]*top_shape[1]*top_shape[2]*top_shape[3], file);
24 | +    fwrite(&top_label[0], sizeof(int), top_shape[0], file);
25 | +    fclose(file);
26 | +  }
27 | +#endif
28 | +
29 |    DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
30 |    DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
31 |    DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; | fromdos
32 | 


--------------------------------------------------------------------------------
/src/sampleINT8/sampleINT8.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClInclude Include="BatchStream.h">
19 |       <Filter>头文件</Filter>
20 |     </ClInclude>
21 |     <ClInclude Include="LegacyCalibrator.h">
22 |       <Filter>头文件</Filter>
23 |     </ClInclude>
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <None Include="CalibrationTablemnist" />
27 |     <None Include="int8_caffe.patch" />
28 |   </ItemGroup>
29 |   <ItemGroup>
30 |     <ClCompile Include="sampleINT8.cpp">
31 |       <Filter>源文件</Filter>
32 |     </ClCompile>
33 |   </ItemGroup>
34 | </Project>


--------------------------------------------------------------------------------
/src/sampleINT8/sampleINT8.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |     <LocalDebuggerCommandArguments>mnist</LocalDebuggerCommandArguments>
10 |   </PropertyGroup>
11 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
12 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
13 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
14 |     <LocalDebuggerCommandArguments>mnist</LocalDebuggerCommandArguments>
15 |   </PropertyGroup>
16 | </Project>


--------------------------------------------------------------------------------
/src/sampleLoadEngineStream/sampleLoadEngineStream.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "NvInfer.h"
  3 | #include "common.h"
  4 | #include <fstream>
  5 | 
  6 | static Logger gLogger{nvinfer1::ILogger::Severity::kINFO};
  7 | static const int INPUT_H = 28;
  8 | static const int INPUT_W = 28;
  9 | static const int OUTPUT_SIZE = 10;
 10 | 
 11 | 
 12 | #define RETURN_AND_LOG(ret, severity, message)                                              \
 13 |     do {                                                                                    \
 14 |         std::string error_message = "sample_uff_mnist: " + std::string(message);            \
 15 |         gLogger.log(ILogger::Severity::k ## severity, error_message.c_str());               \
 16 |         return (ret);                                                                       \
 17 |     } while(0)
 18 | 
 19 | inline int64_t volume(const Dims& d)
 20 | {
 21 | 	int64_t v = 1;
 22 | 	for (int64_t i = 0; i < d.nbDims; i++)
 23 | 		v *= d.d[i];
 24 | 	return v;
 25 | }
 26 | 
 27 | 
 28 | inline unsigned int elementSize(DataType t)
 29 | {
 30 | 	switch (t)
 31 | 	{
 32 | 	case DataType::kINT32:
 33 | 		// Fallthrough, same as kFLOAT
 34 | 	case DataType::kFLOAT: return 4;
 35 | 	case DataType::kHALF: return 2;
 36 | 	case DataType::kINT8: return 1;
 37 | 	}
 38 | 	assert(0);
 39 | 	return 0;
 40 | }
 41 | 
 42 | 
 43 | std::string locateFile(const std::string& input)
 44 | {
 45 | 	std::vector<std::string> dirs{ "data/mnist/", "data/samples/mnist/" };
 46 | 	return locateFile(input, dirs);
 47 | }
 48 | 
 49 | 
 50 | // simple PGM (portable greyscale map) reader
 51 | void readPGMFile(const std::string& filename, uint8_t buffer[INPUT_H*INPUT_W])
 52 | {
 53 | 	readPGMFile(locateFile(filename), buffer, INPUT_H, INPUT_W);
 54 | }
 55 | 
 56 | 
 57 | void* safeCudaMalloc(size_t memSize)
 58 | {
 59 | 	void* deviceMem;
 60 | 	CHECK(cudaMalloc(&deviceMem, memSize));
 61 | 	if (deviceMem == nullptr)
 62 | 	{
 63 | 		std::cerr << "Out of memory" << std::endl;
 64 | 		exit(1);
 65 | 	}
 66 | 	return deviceMem;
 67 | }
 68 | 
 69 | 
 70 | void* createMnistCudaBuffer(int64_t eltCount, DataType dtype, int run)
 71 | {
 72 | 	/* in that specific case, eltCount == INPUT_H * INPUT_W */
 73 | 	assert(eltCount == INPUT_H * INPUT_W);
 74 | 	assert(elementSize(dtype) == sizeof(float));
 75 | 
 76 | 	size_t memSize = eltCount * elementSize(dtype);
 77 | 	float* inputs = new float[eltCount];
 78 | 
 79 | 	/* read PGM file */
 80 | 	uint8_t fileData[INPUT_H * INPUT_W];
 81 | 	readPGMFile(std::to_string(run) + ".pgm", fileData);
 82 | 
 83 | 	/* display the number in an ascii representation */
 84 | 	std::cout << "\n\n\n---------------------------" << "\n\n\n" << std::endl;
 85 | 	for (int i = 0; i < eltCount; i++)
 86 | 		std::cout << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % INPUT_W) ? "" : "\n");
 87 | 
 88 | 	/* initialize the inputs buffer */
 89 | 	for (int i = 0; i < eltCount; i++)
 90 | 		inputs[i] = 1.0f - float(fileData[i]) / 255.0f;
 91 | 
 92 | 	void* deviceMem = safeCudaMalloc(memSize);
 93 | 	CHECK(cudaMemcpy(deviceMem, inputs, memSize, cudaMemcpyHostToDevice));
 94 | 
 95 | 	delete[] inputs;
 96 | 	return deviceMem;
 97 | }
 98 | 
 99 | 
100 | 
101 | 
102 | std::vector<std::pair<int64_t, DataType>>
103 | calculateBindingBufferSizes(const ICudaEngine& engine, int nbBindings, int batchSize)
104 | {
105 | 	std::vector<std::pair<int64_t, DataType>> sizes;
106 | 	for (int i = 0; i < nbBindings; ++i)
107 | 	{
108 | 		Dims dims = engine.getBindingDimensions(i);
109 | 		DataType dtype = engine.getBindingDataType(i);
110 | 
111 | 		int64_t eltCount = volume(dims) * batchSize;
112 | 		sizes.push_back(std::make_pair(eltCount, dtype));
113 | 	}
114 | 
115 | 	return sizes;
116 | }
117 | 
118 | 
119 | void printOutput(int64_t eltCount, DataType dtype, void* buffer)
120 | {
121 | 	std::cout << eltCount << " eltCount" << std::endl;
122 | 	assert(elementSize(dtype) == sizeof(float));
123 | 	std::cout << "--- OUTPUT ---" << std::endl;
124 | 
125 | 	size_t memSize = eltCount * elementSize(dtype);
126 | 	float* outputs = new float[eltCount];
127 | 	CHECK(cudaMemcpy(outputs, buffer, memSize, cudaMemcpyDeviceToHost));
128 | 
129 | 	int maxIdx = 0;
130 | 	for (int i = 0; i < eltCount; ++i)
131 | 		if (outputs[i] > outputs[maxIdx])
132 | 			maxIdx = i;
133 | 
134 | 	std::ios::fmtflags prevSettings = std::cout.flags();
135 | 	std::cout.setf(std::ios::fixed, std::ios::floatfield);
136 | 	std::cout.precision(6);
137 | 	for (int64_t eltIdx = 0; eltIdx < eltCount; ++eltIdx)
138 | 	{
139 | 		std::cout << eltIdx << " => " << setw(10) << outputs[eltIdx] << "\t : ";
140 | 		if (eltIdx == maxIdx)
141 | 			std::cout << "***";
142 | 		std::cout << "\n";
143 | 	}
144 | 	std::cout.flags(prevSettings);
145 | 
146 | 	std::cout << std::endl;
147 | 	delete[] outputs;
148 | }
149 | 
150 | 
151 | void execute(ICudaEngine& engine)
152 | {
153 | 	IExecutionContext* context = engine.createExecutionContext();
154 | 
155 | 	int batchSize = 1;
156 | 
157 | 	int nbBindings = engine.getNbBindings();
158 | 	assert(nbBindings == 2);
159 | 
160 | 	std::vector<void*> buffers(nbBindings);
161 | 	auto buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize);
162 | 
163 | 	int bindingIdxInput = 0;
164 | 	for (int i = 0; i < nbBindings; ++i)
165 | 	{
166 | 		if (engine.bindingIsInput(i))
167 | 			bindingIdxInput = i;
168 | 		else
169 | 		{
170 | 			auto bufferSizesOutput = buffersSizes[i];
171 | 			buffers[i] = safeCudaMalloc(bufferSizesOutput.first *
172 | 				elementSize(bufferSizesOutput.second));
173 | 		}
174 | 	}
175 | 
176 | 	auto bufferSizesInput = buffersSizes[bindingIdxInput];
177 | 
178 | 	int iterations = 1;
179 | 	int numberRun = 10;
180 | 	for (int i = 0; i < iterations; i++)
181 | 	{
182 | 		float total = 0, ms;
183 | 		for (int run = 0; run < numberRun; run++)
184 | 		{
185 | 			buffers[bindingIdxInput] = createMnistCudaBuffer(bufferSizesInput.first,
186 | 				bufferSizesInput.second, run);
187 | 
188 | 			auto t_start = std::chrono::high_resolution_clock::now();
189 | 			context->execute(batchSize, &buffers[0]);
190 | 			auto t_end = std::chrono::high_resolution_clock::now();
191 | 			ms = std::chrono::duration<float, std::milli>(t_end - t_start).count();
192 | 			total += ms;
193 | 
194 | 			for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
195 | 			{
196 | 				if (engine.bindingIsInput(bindingIdx))
197 | 					continue;
198 | 
199 | 				auto bufferSizesOutput = buffersSizes[bindingIdx];
200 | 				printOutput(bufferSizesOutput.first, bufferSizesOutput.second,
201 | 					buffers[bindingIdx]);
202 | 			}
203 | 			CHECK(cudaFree(buffers[bindingIdxInput]));
204 | 		}
205 | 
206 | 		total /= numberRun;
207 | 		std::cout << "Average over " << numberRun << " runs is " << total << " ms." << std::endl;
208 | 	}
209 | 
210 | 	for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
211 | 		if (!engine.bindingIsInput(bindingIdx))
212 | 			CHECK(cudaFree(buffers[bindingIdx]));
213 | 	context->destroy();
214 | }
215 | 
216 | 
217 | 
218 | int main(int argc, char** argv)
219 | {
220 | 	nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
221 | 	nvinfer1::IPluginFactory* factory{ nullptr };
222 | 	std::ifstream is("../../data/mnist/engineStream.bin", std::ios::binary);
223 | 	is.seekg(0, is.end);
224 | 	int length = is.tellg();
225 | 	is.seekg(0, is.beg);
226 | 
227 | 	try {
228 | 		char *buffer = new char[length];
229 | 		is.read(buffer, length);
230 | 		is.close();
231 | 
232 | 		nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(buffer, length, factory);
233 | 		if (!engine)
234 | 			RETURN_AND_LOG(-1, ERROR, "load the engine failed");
235 | 		execute(*engine);
236 | 
237 | 		delete[] buffer;
238 | 		runtime->destroy();
239 | 		engine->destroy();
240 | 
241 | 		system("pause");
242 | 		return 0;
243 | 	}
244 | 	catch (...) {
245 | 		is.close();
246 | 	}
247 | 	return 0;
248 | 	
249 | }


--------------------------------------------------------------------------------
/src/sampleLoadEngineStream/sampleLoadEngineStream.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleLoadEngineStream.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/src/sampleLoadEngineStream/sampleLoadEngineStream.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleMLP/README.txt:
--------------------------------------------------------------------------------
 1 | This is a simple Multi-Layer Perceptron (MLP) example showing how to generate a MLP that TensorRT can accelerate.
 2 | This sample requires Tensorflow > 1.4 to be installed.
 3 | This MLP was trained via the following method:
 4 | git clone https://github.com/aymericdamien/TensorFlow-Examples.git
 5 | cd TensorFlow-Examples
 6 | 
 7 | Apply the patch file, `update_mlp.patch` to save the final result with the command `patch -p1 < <TensorRT Install>/samples/sampleMLP/update_mlp.patch`
 8 | Train the MINST MLP with the command `python examples/3_NeuralNetworks/multilayer_perceptron.py`
 9 | Convert the trained model weights to a format sampleMLP understands via the command `python <TensorRT Install>/samples/sampleMLP/convert_weights.py -m /tmp/sampleMLP.ckpt -o sampleMLP`
10 | 
11 | mkdir -p <TensorRT Install>/data/mlp
12 | cp sampleMLP.wts2 <TensorRT Install>/data/mlp/
13 | 
14 | To build the sample:
15 | cd <TensorRT Install>/samples
16 | make
17 | 
18 | To run the sample:
19 | cd <TensorRT Install>/bin
20 | ./sample_mlp
21 | 


--------------------------------------------------------------------------------
/src/sampleMLP/convert_weights.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Script to convert from TensorFlow weights to TensorRT weights for multilayer perceptron sample.
 4 | # Change the remap to properly remap the weights to the name from your trained model
 5 | # to the sample expected format.
 6 | 
 7 | import sys
 8 | import struct
 9 | import argparse
10 | 
11 | try:
12 |     from tensorflow.python import pywrap_tensorflow as pyTF
13 | except ImportError as err:
14 |     sys.stderr.write("""Error: Failed to import module ({})""".format(err))
15 |     sys.exit()
16 | 
17 | parser = argparse.ArgumentParser(description='TensorFlow to TensorRT Weight Dumper')
18 | 
19 | parser.add_argument('-m', '--model', required=True, help='The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908')
20 | parser.add_argument('-o', '--output', required=True, help='The weight file to dump all the weights to.')
21 | 
22 | opt = parser.parse_args()
23 | 
24 | print "Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:"
25 | print "Line 0: <number of buffers in the file>"
26 | print "Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] <buffer shaped size bytes of data>"
27 | 
28 | inputbase = opt.model
29 | outputbase = opt.output
30 | 
31 | # This dictionary translates from the TF weight names to the weight names expected 
32 | # by the sampleMLP sample. This is the location that needs to be changed if training
33 | # something other than what is specified in README.txt.
34 | remap = {
35 |     'Variable': 'hiddenWeights0',
36 |     'Variable_1': 'hiddenWeights1',
37 |     'Variable_2': 'outputWeights',
38 |     'Variable_3': 'hiddenBias0',
39 |     'Variable_4': 'hiddenBias1',
40 |     'Variable_5': 'outputBias'
41 | }
42 | 
43 | def float_to_hex(f):
44 |     return hex(struct.unpack('<I', struct.pack('<f', f))[0])
45 | 
46 | try:
47 |    reader = pyTF.NewCheckpointReader(inputbase)
48 |    tensorDict = reader.get_variable_to_shape_map()
49 |    outputFileName = outputbase + ".wts2"
50 |    outputFile = open(outputFileName, 'w')
51 |    count = 0
52 | 
53 |    for key in sorted(tensorDict):
54 |        # Don't count weights that aren't used for inferencing.
55 |        if ("Adam" in key or "power" in key):
56 |            continue
57 |        count += 1
58 |    outputFile.write("%s\n"%(count))
59 | 
60 |    for key in sorted(tensorDict):
61 |        # In order to save space, we don't dump weights that aren't required.
62 |        if ("Adam" in key or "power" in key):
63 |            continue
64 |        tensor = reader.get_tensor(key)
65 |        file_key = remap[key.replace('/','_')]
66 |        val = tensor.shape
67 |        print("%s 0 %s "%(file_key, val))
68 |        flat_tensor = tensor.flatten()
69 |        outputFile.write("%s 0 %s "%(file_key, val))
70 |        outputFile.write(flat_tensor.tobytes())
71 |        outputFile.write("\n");
72 |    outputFile.close()
73 | 
74 | except Exception as error:
75 |     print(str(error))
76 | 


--------------------------------------------------------------------------------
/src/sampleMLP/sampleMLP.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <None Include="convert_weights.py" />
19 |     <None Include="update_mlp.patch" />
20 |   </ItemGroup>
21 |   <ItemGroup>
22 |     <Text Include="README.txt" />
23 |   </ItemGroup>
24 |   <ItemGroup>
25 |     <ClCompile Include="sampleMLP.cpp">
26 |       <Filter>源文件</Filter>
27 |     </ClCompile>
28 |   </ItemGroup>
29 | </Project>


--------------------------------------------------------------------------------
/src/sampleMLP/sampleMLP.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleMLP/update_mlp.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/examples/3_NeuralNetworks/multilayer_perceptron.py b/examples/3_NeuralNetworks/multilayer_perceptron.py
 2 | index cf04b01..44e3986 100644
 3 | --- a/examples/3_NeuralNetworks/multilayer_perceptron.py
 4 | +++ b/examples/3_NeuralNetworks/multilayer_perceptron.py
 5 | @@ -58,11 +58,11 @@ biases = {
 6 |  # Create model
 7 |  def multilayer_perceptron(x):
 8 |      # Hidden fully connected layer with 256 neurons
 9 | -    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
10 | +    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
11 |      # Hidden fully connected layer with 256 neurons
12 | -    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
13 | +    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
14 |      # Output fully connected layer with a neuron for each class
15 | -    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
16 | +    out_layer = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['out']), biases['out']))
17 |      return out_layer
18 |  
19 |  # Construct model
20 | @@ -76,6 +76,9 @@ train_op = optimizer.minimize(loss_op)
21 |  # Initializing the variables
22 |  init = tf.global_variables_initializer()
23 |  
24 | +# 'Saver' op to save and restore all the variables
25 | +saver = tf.train.Saver()
26 | +
27 |  with tf.Session() as sess:
28 |      sess.run(init)
29 |  
30 | @@ -102,3 +105,5 @@ with tf.Session() as sess:
31 |      # Calculate accuracy
32 |      accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
33 |      print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
34 | +    # Save model weights to disk
35 | +    save_path = saver.save(sess, "/tmp/sampleMLP.ckpt")
36 | 


--------------------------------------------------------------------------------
/src/sampleMNIST/README.md:
--------------------------------------------------------------------------------
 1 | # NVIDIA TensorRT Sample "sampleMNIST"
 2 | 
 3 | The sampleMNIST sample demonstrates how to:
 4 | - Perform the basic setup and initialization of TensorRT
 5 | - Import a trained Caffe MNIST digit classifier
 6 | - Perform simple input preprocessing before running the engine, like mean
 7 |   normalization in this case
 8 | - Verify the output from the inference
 9 | 
10 | ## Usage
11 | 
12 | This sample can be run as:
13 | 
14 |     ./sample_mnist [-h] [--datadir=/path/to/data/dir/] [--useDLA=N]
15 | 
16 | SampleMNIST reads two Caffe files to build the network:
17 | 
18 | * `mnist.prototxt` - The prototxt file that contains the network design
19 | * `mnist.caffemodel` - The model file which contains the trained weights
20 |   for the network
21 | * `mnist_mean.binaryproto` - The binaryproto file which contains the means
22 | 
23 | By default, the sample expects these files to be in `data/samples/mnist/` or
24 | `data/mnist/`. The list of default directories can be changed by adding one or
25 | more paths with `--datadir=/new/path/` as a command line argument.
26 | 


--------------------------------------------------------------------------------
/src/sampleMNIST/sampleMNIST.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleMNIST.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="..\..\common\windows\getopt.c">
22 |       <Filter>源文件</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <None Include="README.md" />
27 |   </ItemGroup>
28 | </Project>


--------------------------------------------------------------------------------
/src/sampleMNIST/sampleMNIST.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleMNISTAPI/sampleMNISTAPI.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <VCProjectVersion>15.0</VCProjectVersion>
 23 |     <ProjectGuid>{015397B8-A9BC-42ED-B3A1-2151714DFD7C}</ProjectGuid>
 24 |     <RootNamespace>sampleMNISTAPI</RootNamespace>
 25 |     <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
 26 |   </PropertyGroup>
 27 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 28 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 29 |     <ConfigurationType>Application</ConfigurationType>
 30 |     <UseDebugLibraries>true</UseDebugLibraries>
 31 |     <PlatformToolset>v141</PlatformToolset>
 32 |     <CharacterSet>MultiByte</CharacterSet>
 33 |   </PropertyGroup>
 34 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 35 |     <ConfigurationType>Application</ConfigurationType>
 36 |     <UseDebugLibraries>false</UseDebugLibraries>
 37 |     <PlatformToolset>v141</PlatformToolset>
 38 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 39 |     <CharacterSet>MultiByte</CharacterSet>
 40 |   </PropertyGroup>
 41 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 42 |     <ConfigurationType>Application</ConfigurationType>
 43 |     <UseDebugLibraries>true</UseDebugLibraries>
 44 |     <PlatformToolset>v141</PlatformToolset>
 45 |     <CharacterSet>MultiByte</CharacterSet>
 46 |   </PropertyGroup>
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 48 |     <ConfigurationType>Application</ConfigurationType>
 49 |     <UseDebugLibraries>false</UseDebugLibraries>
 50 |     <PlatformToolset>v141</PlatformToolset>
 51 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 52 |     <CharacterSet>MultiByte</CharacterSet>
 53 |   </PropertyGroup>
 54 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 55 |   <ImportGroup Label="ExtensionSettings">
 56 |   </ImportGroup>
 57 |   <ImportGroup Label="Shared">
 58 |   </ImportGroup>
 59 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 66 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 67 |   </ImportGroup>
 68 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 69 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 70 |   </ImportGroup>
 71 |   <PropertyGroup Label="UserMacros" />
 72 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 73 |     <OutDir>$(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\bin\</OutDir>
 74 |     <IntDir>$(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\tmp\</IntDir>
 75 |     <LibraryPath>$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib;$(LibraryPath)</LibraryPath>
 76 |   </PropertyGroup>
 77 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 78 |     <OutDir>$(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\bin\</OutDir>
 79 |     <IntDir>$(SolutionDir)output\$(Platform)\$(Configuration)\$(ProjectName)\tmp\</IntDir>
 80 |     <LibraryPath>$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib;$(LibraryPath)</LibraryPath>
 81 |   </PropertyGroup>
 82 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 83 |     <ClCompile>
 84 |       <WarningLevel>Level3</WarningLevel>
 85 |       <Optimization>Disabled</Optimization>
 86 |       <SDLCheck>true</SDLCheck>
 87 |       <ConformanceMode>true</ConformanceMode>
 88 |       <AdditionalIncludeDirectories>$(CUDA_PATH_V10_0)\include;$(SolutionDir)3rdparty\TensorRT-5.0.1.3\include;$(SolutionDir)common;$(SolutionDir)common\windows</AdditionalIncludeDirectories>
 89 |       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 90 |     </ClCompile>
 91 |     <Link>
 92 |       <AdditionalDependencies>$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib\*.lib;$(CUDA_PATH_V10_0)\lib\x64\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
 93 |     </Link>
 94 |   </ItemDefinitionGroup>
 95 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 96 |     <ClCompile>
 97 |       <WarningLevel>Level3</WarningLevel>
 98 |       <Optimization>Disabled</Optimization>
 99 |       <SDLCheck>true</SDLCheck>
100 |       <ConformanceMode>true</ConformanceMode>
101 |       <AdditionalIncludeDirectories>$(CUDA_PATH_V10_0)\include;$(SolutionDir)3rdparty\TensorRT-5.0.1.3\include;$(SolutionDir)common;$(SolutionDir)common\windows</AdditionalIncludeDirectories>
102 |       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
103 |     </ClCompile>
104 |     <Link>
105 |       <AdditionalDependencies>$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib\*.lib;$(CUDA_PATH_V10_0)\lib\x64\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
106 |     </Link>
107 |   </ItemDefinitionGroup>
108 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
109 |     <ClCompile>
110 |       <WarningLevel>Level3</WarningLevel>
111 |       <Optimization>MaxSpeed</Optimization>
112 |       <FunctionLevelLinking>true</FunctionLevelLinking>
113 |       <IntrinsicFunctions>true</IntrinsicFunctions>
114 |       <SDLCheck>true</SDLCheck>
115 |       <ConformanceMode>true</ConformanceMode>
116 |     </ClCompile>
117 |     <Link>
118 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
119 |       <OptimizeReferences>true</OptimizeReferences>
120 |     </Link>
121 |   </ItemDefinitionGroup>
122 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
123 |     <ClCompile>
124 |       <WarningLevel>Level3</WarningLevel>
125 |       <Optimization>MaxSpeed</Optimization>
126 |       <FunctionLevelLinking>true</FunctionLevelLinking>
127 |       <IntrinsicFunctions>true</IntrinsicFunctions>
128 |       <SDLCheck>true</SDLCheck>
129 |       <ConformanceMode>true</ConformanceMode>
130 |     </ClCompile>
131 |     <Link>
132 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
133 |       <OptimizeReferences>true</OptimizeReferences>
134 |     </Link>
135 |   </ItemDefinitionGroup>
136 |   <ItemGroup>
137 |     <ClCompile Include="sampleMNISTAPI.cpp" />
138 |   </ItemGroup>
139 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
140 |   <ImportGroup Label="ExtensionTargets">
141 |   </ImportGroup>
142 | </Project>


--------------------------------------------------------------------------------
/src/sampleMNISTAPI/sampleMNISTAPI.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleMNISTAPI.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/src/sampleMNISTAPI/sampleMNISTAPI.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleMovieLens/README.txt:
--------------------------------------------------------------------------------
 1 | This is a multilayer perceptron(MLP) based Neural Collaborative Filter Recommender example showing how to generate weights for MovieLens dataset for TensorRT that TensorRT can accelerate.
 2 | This sample requires Tensorflow <= 1.7.0 to be installed.
 3 | This MLP base NCF was trained via the following method:
 4 | 
 5 | Building the sample:
 6 | To build the sample:
 7 | cd <TensorRT Install>/samples
 8 | make -j12
 9 | 
10 | To run the sample:
11 | 
12 | 1. Running Inference:
13 | 
14 | cd <TensorRT Install>/bin
15 | ./sample_movielens  (default batch=32 i.e. num of users)
16 | ./sample_movielens -b <N> (batch=N i.e. num of users)
17 | ./sample_movielens --verbose (prints inputs, groundtruth values, expected vs predicted probabilities)
18 | 
19 | 2. Help/Usage
20 |     ./sample_movielens -h
21 |     Usage:
22 |         ./sample_movielens[-h]
23 |         -h        Display help information. All single dash optoins enable perf mode.
24 |         -b        Number of Users i.e. BatchSize (default BatchSize=32).
25 |         --useDLA  Specify a DLA engine for layers that support DLA. Value can range from 1 to N, where N is the number of DLA engines on the platform.
26 |         --verbose Enable verbose perf mode.
27 | 
28 | Training model from scratch:
29 | Step 1:
30 |     git clone https://github.com/hexiangnan/neural_collaborative_filtering.git
31 |     cd neural_collaborative_filtering
32 |     git checkout 0cd2681598507f1cc26d110083327069963f4433
33 | 
34 | Step 2:
35 |     Apply the patch file, `sampleMovieLensTraining.patch` to save dump the frozen protobuf file with command `patch -p1 < <TensorRT Install>/samples/sampleMovieLens/sampleMovieLensTraining.patch`
36 |     Train the MLP based NCF with the command `python MLP.py --dataset ml-1m --epochs 20 --batch_size 256 --layers [64,32,16,8] --reg_layers [0.01,0.01,0.01,0.01] --num_neg 4 --lr 0.001 --learner adam --verbose 1 --out 1'
37 |     WARNING: Using 0s for reg_layers will cause undefined behavior when training the network.
38 |     This step will dump two files:
39 |         1. movielens_ratings.txt
40 |         2. sampleMovieLens.pb
41 | 
42 | Step 3: Convert the Frozen .pb file to .uff format using
43 |     Command: `python3 convert_to_uff.py sampleMovieLens.pb -p preprocess.py`
44 |     preprocess.py is a preprocessing step that needs to be applied to the TensorFlow graph before it can be used by TensorRT.
45 |     The reason for this is that TensorFlow's concatenation operation accounts for the batch dimension while TensorRT's concatenation operation does not.
46 | 
47 |     Note: convert_to_uff.py utility will get installed here: /usr/local/bin/convert-to-uff.
48 |         This utility gets installed with UFF .whl file installation shipped with TensorRT.
49 |         For installation instructions, see:
50 |         https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/#python and click on the 'TensoRT Python API' link
51 | 
52 | Step 4:
53 |     Copy sampleMovieLens.uff file to <TensorRT Install>/data/movielens
54 |     Copy movielens_ratings.txt file to <TensorRT Install>/data/movielens
55 | 
56 | Step 5:
57 |     Follow instruction above to build and run the sample
58 | 


--------------------------------------------------------------------------------
/src/sampleMovieLens/preprocess.py:
--------------------------------------------------------------------------------
1 | import graphsurgeon as gs
2 | import tensorflow as tf
3 | 
4 | def preprocess(dynamic_graph):
5 |     axis = dynamic_graph.find_nodes_by_path("concatenate/concat/axis")[0]
6 |     # Set axis to 2, because of discrepancies between TensorFlow and TensorRT.
7 |     axis.attr["value"].tensor.int_val[0] = 2
8 | 


--------------------------------------------------------------------------------
/src/sampleMovieLens/sampleMovieLens.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <Text Include="README.txt" />
19 |   </ItemGroup>
20 |   <ItemGroup>
21 |     <ClCompile Include="sampleMovieLens.cpp">
22 |       <Filter>源文件</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <None Include="sampleMovieLensTraining.patch" />
27 |   </ItemGroup>
28 | </Project>


--------------------------------------------------------------------------------
/src/sampleMovieLens/sampleMovieLens.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleNMT/README.txt:
--------------------------------------------------------------------------------
  1 | The NMT sample is using data fetched and trained using the NMT tutorial ( https://github.com/tensorflow/nmt ).
  2 | 
  3 | 
  4 | ** Preparing the data **
  5 | 
  6 | The trained weights, directly usable by the sample, can be fetched from here 
  7 | https://developer.download.nvidia.com/compute/machine-learning/tensorrt/models/sampleNMT_weights.tar.gz
  8 | 'deen/weights' directory should contain all the weight data needed.
  9 | 
 10 | We do not distribute the text and vocabulary data. For the De-En model 
 11 | ( https://github.com/tensorflow/nmt#wmt-german-english ), 
 12 | the data needs to be fetched and generated using the following script
 13 | https://github.com/tensorflow/nmt/blob/master/nmt/scripts/wmt16_en_de.sh . 
 14 | It might take some time, since it prepares 4.5M samples dataset for training as well.
 15 |  * Execute wmt16_en_de.sh and it will create 'wmt16_de_en' directory in the current directory
 16 |  * 'cd wmt16_de_en'
 17 |  * 'cp newstest2015.tok.bpe.32000.de  newstest2015.tok.bpe.32000.en  vocab.bpe.32000.de  vocab.bpe.32000.en <path_to_data_directory>/deen/.'
 18 | 
 19 | 
 20 | ** Running the sample **
 21 | 
 22 |  * List all options supported: sample_nmt --help
 23 |  * Run the sample to generate 'translation_output.txt' : sample_nmt --data_dir=<path_to_data_directory>/deen --data_writer=text
 24 |  * Get the BLEU score for the first 100 sentences : sample_nmt --data_dir=<path_to_data_directory>/deen --max_inference_samples=100
 25 | 
 26 | 
 27 | ** Training De-En model using Tensorflow NMT framework and importing the weight data into the sample. **
 28 | 
 29 | This section is only relevant if one decides to train the model. 
 30 | 
 31 |  * The training data set needs to be fetched and preprocessed as was discussed earlier.
 32 |  * Fetch NMT framework : 'git clone https://github.com/tensorflow/nmt.git'
 33 |  * Take a look at 'nmt/nmt/standard_hparams/wmt16.json'
 34 |    The sample currently only implements unidirectional LSTMs and Luong's attention. So, training should account for this.
 35 |    edit relevant JSON config to have {"attention": "luong", "encoder_type": "uni", ...}
 36 |    Below is the config we used for training:
 37 |     {
 38 |       "attention": "luong",
 39 |       "attention_architecture": "standard",
 40 |       "batch_size": 128,
 41 |       "colocate_gradients_with_ops": true,
 42 |       "dropout": 0.2,
 43 |       "encoder_type": "uni",
 44 |       "eos": "</s>",
 45 |       "forget_bias": 1.0,
 46 |       "infer_batch_size": 32,
 47 |       "init_weight": 0.1,
 48 |       "learning_rate": 1.0,
 49 |       "max_gradient_norm": 5.0,
 50 |       "metrics": ["bleu"],
 51 |       "num_buckets": 5,
 52 |       "num_layers": 2,
 53 |       "num_train_steps": 340000,
 54 |       "decay_scheme": "luong10",
 55 |       "num_units": 1024,
 56 |       "optimizer": "sgd",
 57 |       "residual": false,
 58 |       "share_vocab": false,
 59 |       "subword_option": "bpe",
 60 |       "sos": "<s>",
 61 |       "src_max_len": 50,
 62 |       "src_max_len_infer": null,
 63 |       "steps_per_external_eval": null,
 64 |       "steps_per_stats": 100,
 65 |       "tgt_max_len": 50,
 66 |       "tgt_max_len_infer": null,
 67 |       "time_major": true,
 68 |       "unit_type": "lstm",
 69 |       "beam_width": 10
 70 |     }
 71 | 
 72 |  The following line can be used for training, provided the training dataset is /tmp/wmt16_de_en:
 73 | 
 74 |     python -m nmt.nmt \
 75 |     --src=de --tgt=en \
 76 |     --hparams_path=<path_to_json_config>/wmt16.json \
 77 |     --out_dir=/tmp/deen_nmt \
 78 |     --vocab_prefix=/tmp/wmt16_de_en/vocab.bpe.32000 \
 79 |     --train_prefix=/tmp/wmt16_de_en/train.tok.clean.bpe.32000 \
 80 |     --dev_prefix=/tmp/wmt16_de_en/newstest2013.tok.bpe.32000 \
 81 |     --test_prefix=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000
 82 |     
 83 |  The following line can be used for the inference in Tensorflow:
 84 |     python -m nmt.nmt \
 85 |         --src=de --tgt=en \
 86 |         --ckpt=/tmp/deen_nmt/translate.ckpt-340000 \
 87 |         --hparams_path=<path_to_json_config>/wmt16.json \
 88 |         --out_dir=/tmp/deen \
 89 |         --vocab_prefix=/tmp/wmt16_de_en/vocab.bpe.32000 \
 90 |         --inference_input_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.de \
 91 |         --inference_output_file=/tmp/deen/output_infer \
 92 |         --inference_ref_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.en
 93 |  
 94 |  * Importing Tensorflow checkpoint into the sample *
 95 |  
 96 |  We provide a tool to convert Tensorflow checkpoint from the NMT framework into binary weight data, readable by the sample. It was tested using Tensorflow 1.6. The tool by default imports the NMT framework.
 97 |  
 98 |      * git clone https://github.com/tensorflow/nmt.git
 99 |      * python ./chptToBin.py \
100 |         --src=de --tgt=en \
101 |         --ckpt=/tmp/deen_nmt/translate.ckpt-340000 \
102 |         --hparams_path=<path_to_json_config>/wmt16.json \
103 |         --out_dir=/tmp/deen \
104 |         --vocab_prefix=/tmp/wmt16_de_en/vocab.bpe.32000 \
105 |         --inference_input_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.de \
106 |         --inference_output_file=/tmp/deen/output_infer \
107 |         --inference_ref_file=/tmp/wmt16_de_en/newstest2015.tok.bpe.32000.en


--------------------------------------------------------------------------------
/src/sampleNMT/component.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_COMPONENT_
 2 | #define SAMPLE_NMT_COMPONENT_
 3 | 
 4 | #include <memory>
 5 | #include <string>
 6 | 
 7 | namespace nmtSample
 8 | {
 9 | /** \class Component
10 |     *
11 |     * \brief a functional part of the sample 
12 |     *
13 |     */
14 | class Component
15 | {
16 | public:
17 |     typedef std::shared_ptr<Component> ptr;
18 | 
19 |     /**
20 |         * \brief get the textual description of the component
21 |         */
22 |     virtual std::string getInfo() = 0;
23 | 
24 | protected:
25 |     Component() = default;
26 | 
27 |     virtual ~Component() = default;
28 | };
29 | }
30 | 
31 | #endif // SAMPLE_NMT_COMPONENT_
32 | 


--------------------------------------------------------------------------------
/src/sampleNMT/cudaError.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_CUDA_ERROR_
 2 | #define SAMPLE_NMT_CUDA_ERROR_
 3 | 
 4 | #include <cassert>
 5 | #include <cuda_runtime.h>
 6 | #include <iostream>
 7 | 
 8 | #define CUDA_CHECK(callstr)                                                                    \
 9 |     {                                                                                          \
10 |         cudaError_t error_code = callstr;                                                      \
11 |         if (error_code != cudaSuccess) {                                                       \
12 |             std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
13 |             assert(0);                                                                         \
14 |         }                                                                                      \
15 |     }
16 | 
17 | #endif // SAMPLE_NMT_CUDA_ERROR_
18 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/benchmarkWriter.cpp:
--------------------------------------------------------------------------------
 1 | #include "benchmarkWriter.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | namespace nmtSample
 6 | {
 7 | BenchmarkWriter::BenchmarkWriter()
 8 |     : mSampleCount(0)
 9 |     , mInputTokenCount(0)
10 |     , mOutputTokenCount(0)
11 |     , mStartTS(std::chrono::high_resolution_clock::now())
12 | {
13 | }
14 | 
15 | void BenchmarkWriter::write(
16 |     const int* hOutputData,
17 |     int actualOutputSequenceLength,
18 |     int actualInputSequenceLength)
19 | {
20 |     ++mSampleCount;
21 |     mInputTokenCount += actualInputSequenceLength;
22 |     mOutputTokenCount += actualOutputSequenceLength;
23 | }
24 | 
25 | void BenchmarkWriter::initialize()
26 | {
27 |     mStartTS = std::chrono::high_resolution_clock::now();
28 | }
29 | 
30 | void BenchmarkWriter::finalize()
31 | {
32 |     std::chrono::duration<float> sec = std::chrono::high_resolution_clock::now() - mStartTS;
33 |     int totalTokenCount = mInputTokenCount + mOutputTokenCount;
34 |     std::cout << mSampleCount << " sequences generated in " << sec.count() << " seconds, " << (mSampleCount / sec.count()) << " samples/sec" << std::endl;
35 |     std::cout << totalTokenCount << " tokens processed (source and destination), " << (totalTokenCount / sec.count()) << " tokens/sec" << std::endl;
36 | }
37 | 
38 | std::string BenchmarkWriter::getInfo()
39 | {
40 |     return "Benchmark Writer";
41 | }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/benchmarkWriter.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_BENCHMARK_WRITER_
 2 | #define SAMPLE_NMT_BENCHMARK_WRITER_
 3 | 
 4 | #include <chrono>
 5 | #include <memory>
 6 | 
 7 | #include "dataWriter.h"
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class BenchmarkWriter
12 |     *
13 |     * \brief all it does is to measure the performance of sequence generation
14 |     *
15 |     */
16 | class BenchmarkWriter : public DataWriter
17 | {
18 | public:
19 |     BenchmarkWriter();
20 | 
21 |     void write(
22 |         const int* hOutputData,
23 |         int actualOutputSequenceLength,
24 |         int actualInputSequenceLength) override;
25 | 
26 |     void initialize() override;
27 | 
28 |     void finalize() override;
29 | 
30 |     std::string getInfo() override;
31 | 
32 |     ~BenchmarkWriter() override = default;
33 | 
34 | private:
35 |     int mSampleCount;
36 |     int mInputTokenCount;
37 |     int mOutputTokenCount;
38 |     std::chrono::high_resolution_clock::time_point mStartTS;
39 | };
40 | }
41 | 
42 | #endif // SAMPLE_NMT_BENCHMARK_WRITER_
43 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/bleuScoreWriter.cpp:
--------------------------------------------------------------------------------
  1 | #include "bleuScoreWriter.h"
  2 | 
  3 | #include <algorithm>
  4 | #include <cassert>
  5 | #include <cmath>
  6 | #include <iostream>
  7 | #include <map>
  8 | #include <sstream>
  9 | #include <string>
 10 | #include <vector>
 11 | 
 12 | namespace nmtSample
 13 | {
 14 | 
 15 | typedef std::vector<std::string> Segment_t;
 16 | typedef std::map<Segment_t, int> Count_t;
 17 | int read(std::vector<Segment_t>& samples, std::shared_ptr<std::istream> input, int samplesToRead = 1)
 18 | {
 19 |     std::string line;
 20 |     int lineCounter = 0;
 21 |     Segment_t tokens;
 22 |     samples.resize(0);
 23 |     std::string pattern("@@ ");
 24 |     while (lineCounter < samplesToRead && std::getline(*input, line))
 25 |     {
 26 |         // if clean and handle BPE or SPM outputs is required
 27 |         std::size_t p0 = 0;
 28 |         while((p0 = line.find(pattern, p0)) != std::string::npos)
 29 |         {
 30 |             line.replace(p0, pattern.length(), "");
 31 |         }
 32 | 
 33 |         // generate error if those special characters exist. Windows needs explicit encoding.
 34 | #ifdef _MSC_VER
 35 |         p0 = line.find(u8"\u2581");
 36 | #else
 37 |         p0 = line.find("\u2581");
 38 | #endif
 39 |         assert((p0 == std::string::npos));
 40 |         std::istringstream ss(line);
 41 |         std::string token;
 42 |         tokens.resize(0);
 43 |         while (ss >> token)
 44 |         {
 45 |             tokens.emplace_back(token);
 46 |         }
 47 |         samples.emplace_back(tokens);
 48 |         lineCounter++;
 49 |     }
 50 |     return lineCounter;
 51 | }
 52 | 
 53 | Count_t ngramCounts(const Segment_t& segment, int maxOrder = 4)
 54 | {
 55 |     Count_t ngramCounts;
 56 | 
 57 |     for (int order = 1; order < maxOrder + 1; order++)
 58 |     {
 59 |         for (int i = 0; i < static_cast<int>(segment.size()) - order + 1; i++)
 60 |         {
 61 |             Segment_t ngram;
 62 |             for (int j = i; j < i + order; j++)
 63 |                 ngram.emplace_back(segment[j]);
 64 | 
 65 |             auto it = ngramCounts.find(ngram);
 66 |             if (it != ngramCounts.end())
 67 |             {
 68 |                 it->second++;
 69 |             }
 70 |             else
 71 |                 ngramCounts[ngram] = 1;
 72 |         }
 73 |     }
 74 | 
 75 |     return ngramCounts;
 76 | }
 77 | 
 78 | Count_t ngramCountIntersection(const Count_t& cnt0, const Count_t& cnt1)
 79 | {
 80 |     Count_t overlap;
 81 |     // merge the maps
 82 |     auto it0 = cnt0.begin(), it1 = cnt1.begin(), end0 = cnt0.end(), end1 = cnt1.end();
 83 |     while (it0 != end0 && it1 != end1)
 84 |     {
 85 |         if (it0->first == it1->first)
 86 |         {
 87 |             overlap.emplace(it0->first, std::min(it0->second, it1->second));
 88 |             it0++;
 89 |             it1++;
 90 |         }
 91 |         else
 92 |         {
 93 |             if (it0->first < it1->first)
 94 |                 it0++;
 95 |             else
 96 |                 it1++;
 97 |         }
 98 |     }
 99 |     return overlap;
100 | }
101 | 
102 | void accumulateBLEU(const std::vector<Segment_t>& referenceSamples,
103 |                     const std::vector<Segment_t>& outputSamples,
104 |                     int maxOrder,
105 |                     size_t& referenceLength,
106 |                     size_t& translationLength,
107 |                     std::vector<size_t>& matchesByOrder,
108 |                     std::vector<size_t>& possibleMatchesByOrder)
109 | {
110 |     assert(referenceSamples.size() == outputSamples.size());
111 |     auto reference = referenceSamples.begin();
112 |     auto translation = outputSamples.begin();
113 | 
114 |     while (translation != outputSamples.end())
115 |     {
116 |         referenceLength += reference->size();
117 |         translationLength += translation->size();
118 | 
119 |         Count_t refNgramCounts = ngramCounts(*reference);
120 |         Count_t outputNgramCounts = ngramCounts(*translation);
121 |         Count_t overlap = ngramCountIntersection(outputNgramCounts, refNgramCounts);
122 |         for (auto& ngram : overlap)
123 |         {
124 |             matchesByOrder[ngram.first.size() - 1] += ngram.second;
125 |         }
126 |         for (int order = 1; order < maxOrder + 1; order++)
127 |         {
128 |             int possibleMatches = static_cast<int>(translation->size()) - order + 1;
129 |             if (possibleMatches > 0)
130 |                 possibleMatchesByOrder[order - 1] += possibleMatches;
131 |         }
132 |         ++translation;
133 |         ++reference;
134 |     }
135 | }
136 | 
137 | BLEUScoreWriter::BLEUScoreWriter(std::shared_ptr<std::istream> referenceTextInput, Vocabulary::ptr vocabulary, int maxOrder)
138 |     : mReferenceInput(referenceTextInput)
139 |     , mVocabulary(vocabulary)
140 |     , mReferenceLength(0)
141 |     , mTranslationLength(0)
142 |     , mMaxOrder(maxOrder)
143 |     , mSmooth(false)
144 |     , mMatchesByOrder(maxOrder, 0)
145 |     , mPossibleMatchesByOrder(maxOrder, 0)
146 | {
147 | }
148 | 
149 | void BLEUScoreWriter::write(
150 |     const int* hOutputData,
151 |     int actualOutputSequenceLength,
152 |     int actualInputSequenceLength)
153 | {
154 |     std::vector<Segment_t> outputSamples;
155 |     std::vector<Segment_t> referenceSamples;
156 |     int numReferenceSamples = read(referenceSamples, mReferenceInput, 1);
157 |     assert(numReferenceSamples == 1);
158 | 
159 |     Segment_t segment;
160 |     std::stringstream filteredSentence(DataWriter::generateText(actualOutputSequenceLength, hOutputData, mVocabulary));
161 |     std::string token;
162 |     while (filteredSentence >> token)
163 |     {
164 |         segment.emplace_back(token);
165 |     }
166 |     outputSamples.emplace_back(segment);
167 | 
168 |     accumulateBLEU(referenceSamples, outputSamples, mMaxOrder, mReferenceLength, mTranslationLength, mMatchesByOrder, mPossibleMatchesByOrder);
169 | }
170 | 
171 | void BLEUScoreWriter::initialize()
172 | {
173 | }
174 | 
175 | void BLEUScoreWriter::finalize()
176 | {
177 |     std::cout << "BLEU score = " << getScore() << std::endl;
178 | }
179 | 
180 | float BLEUScoreWriter::getScore() const
181 | {
182 |     std::vector<double> precisions(mMaxOrder, 0.0);
183 |     for (int i = 0; i < mMaxOrder; i++)
184 |     {
185 |         if (mSmooth)
186 |         {
187 |             precisions[i] = ((mMatchesByOrder[i] + 1.) / (mPossibleMatchesByOrder[i] + 1.));
188 |         }
189 |         else
190 |         {
191 |             if (mPossibleMatchesByOrder[i] > 0)
192 |                 precisions[i] = (static_cast<double>(mMatchesByOrder[i]) / mPossibleMatchesByOrder[i]);
193 |             else
194 |                 precisions[i] = 0.0;
195 |         }
196 |     }
197 |     double pLogSum, geoMean;
198 |     if (*std::min_element(precisions.begin(), precisions.end()) > 0.0)
199 |     {
200 |         pLogSum = 0.0;
201 |         for (auto p : precisions)
202 |             pLogSum += (1. / mMaxOrder) * log(p);
203 |         geoMean = exp(pLogSum);
204 |     }
205 |     else
206 |         geoMean = 0.0;
207 | 
208 |     double ratio = static_cast<double>(mTranslationLength) / mReferenceLength;
209 |     double bp;
210 |     bp = (ratio > 1.0) ? 1.0 : exp(1.0 - 1.0 / ratio);
211 |     return static_cast<float>(geoMean * bp * 100.0);
212 | }
213 | 
214 | std::string BLEUScoreWriter::getInfo()
215 | {
216 |     std::stringstream ss;
217 |     ss << "BLEU Score Writer, max order = " << mMaxOrder;
218 |     return ss.str();
219 | }
220 | }
221 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/bleuScoreWriter.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_BLEU_SCORE_WRITER_
 2 | #define SAMPLE_NMT_BLEU_SCORE_WRITER_
 3 | 
 4 | #include <istream>
 5 | #include <memory>
 6 | #include <vector>
 7 | 
 8 | #include "dataWriter.h"
 9 | #include "vocabulary.h"
10 | 
11 | namespace nmtSample
12 | {
13 | /** \class BLEUScoreWriter
14 |     *
15 |     * \brief all it does is to evaluate BLEU score
16 |     *
17 |     */
18 | class BLEUScoreWriter : public DataWriter
19 | {
20 | public:
21 |     BLEUScoreWriter(std::shared_ptr<std::istream> referenceTextInput,
22 |                     Vocabulary::ptr vocabulary,
23 |                     int maxOrder = 4);
24 | 
25 |     void write(
26 |         const int* hOutputData,
27 |         int actualOutputSequenceLength,
28 |         int actualInputSequenceLength) override;
29 | 
30 |     void initialize() override;
31 | 
32 |     void finalize() override;
33 | 
34 |     std::string getInfo() override;
35 | 
36 |     float getScore() const;
37 | 
38 |     ~BLEUScoreWriter() override = default;
39 | 
40 | private:
41 |     std::shared_ptr<std::istream> mReferenceInput;
42 |     Vocabulary::ptr mVocabulary;
43 |     size_t mReferenceLength;
44 |     size_t mTranslationLength;
45 |     int mMaxOrder;
46 |     bool mSmooth;
47 |     std::vector<size_t> mMatchesByOrder;
48 |     std::vector<size_t> mPossibleMatchesByOrder;
49 | };
50 | }
51 | 
52 | #endif // SAMPLE_NMT_BLEU_SCORE_WRITER_
53 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/dataReader.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_DATA_READER_
 2 | #define SAMPLE_NMT_DATA_READER_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class DataReader
11 |     *
12 |     * \brief reader of sequences of data
13 |     *
14 |     */
15 | class DataReader : public Component
16 | {
17 | public:
18 |     typedef std::shared_ptr<DataReader> ptr;
19 | 
20 |     DataReader() = default;
21 | 
22 |     /**
23 |         * \brief reads the batch of smaples/sequences
24 |         *
25 |         * \return the actual number of samples read
26 |         */
27 |     virtual int read(
28 |         int samplesToRead,
29 |         int maxInputSequenceLength,
30 |         int* hInputData,
31 |         int* hActualInputSequenceLengths)
32 |         = 0;
33 | 
34 |     /**
35 |         * \brief Reset the reader position, the data reader is ready to read the data from th ebeginning again after the function returns
36 |         */
37 |     virtual void reset() = 0;
38 | 
39 |     ~DataReader() override = default;
40 | };
41 | }
42 | 
43 | #endif // SAMPLE_NMT_DATA_READER_
44 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/dataWriter.cpp:
--------------------------------------------------------------------------------
 1 | #include <sstream>
 2 | 
 3 | #include "dataWriter.h"
 4 | 
 5 | namespace nmtSample
 6 | {
 7 | std::string DataWriter::generateText(int sequenceLength, const int* currentOutputData, Vocabulary::ptr vocabulary)
 8 | {
 9 |     // if clean and handle BPE outputs is required
10 |     std::string delimiter = "@@";
11 |     size_t delimiterSize = delimiter.size();
12 |     std::stringstream sentence;
13 |     std::string word("");
14 |     const char* wordDelimiter = "";
15 |     for (int i = 0; i < sequenceLength; ++i)
16 |     {
17 |         int id = currentOutputData[i];
18 |         if (id != vocabulary->getEndSequenceId())
19 |         {
20 |             std::string token = vocabulary->getToken(id);
21 |             if ((token.size() >= delimiterSize) && (token.compare(token.size() - delimiterSize, delimiterSize, delimiter) == 0))
22 |             {
23 |                 word = word + token.erase(token.size() - delimiterSize, delimiterSize);
24 |             }
25 |             else
26 |             {
27 |                 word = word + token;
28 |                 sentence << wordDelimiter;
29 |                 sentence << word;
30 |                 word = "";
31 |                 wordDelimiter = " ";
32 |             }
33 |         }
34 |     }
35 |     return sentence.str();
36 | }
37 | }


--------------------------------------------------------------------------------
/src/sampleNMT/data/dataWriter.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_DATA_WRITER_
 2 | #define SAMPLE_NMT_DATA_WRITER_
 3 | 
 4 | #include <memory>
 5 | #include <string>
 6 | 
 7 | #include "../component.h"
 8 | #include "vocabulary.h"
 9 | 
10 | namespace nmtSample
11 | {
12 | /** \class DataWriter
13 |     *
14 |     * \brief writer of sequences of data
15 |     *
16 |     */
17 | class DataWriter : public Component
18 | {
19 | public:
20 |     typedef std::shared_ptr<DataWriter> ptr;
21 | 
22 |     DataWriter() = default;
23 | 
24 |     /**
25 |         * \brief write the generated sequence
26 |         */
27 |     virtual void write(
28 |         const int* hOutputData,
29 |         int actualOutputSequenceLength,
30 |         int actualInputSequenceLength)
31 |         = 0;
32 | 
33 |     /**
34 |         * \brief it is called right before inference starts
35 |         */
36 |     virtual void initialize() = 0;
37 | 
38 |     /**
39 |         * \brief it is called right after inference ends
40 |         */
41 |     virtual void finalize() = 0;
42 | 
43 |     ~DataWriter() override = default;
44 | 
45 | protected:
46 |     static std::string generateText(int sequenceLength, const int* currentOutputData, Vocabulary::ptr vocabulary);
47 | };
48 | }
49 | 
50 | #endif // SAMPLE_NMT_DATA_WRITER_
51 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/limitedSamplesDataReader.cpp:
--------------------------------------------------------------------------------
 1 | #include "limitedSamplesDataReader.h"
 2 | 
 3 | #include <sstream>
 4 | #include <algorithm>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | LimitedSamplesDataReader::LimitedSamplesDataReader(int maxSamplesToRead, DataReader::ptr originalDataReader)
 9 |     : gMaxSamplesToRead(maxSamplesToRead)
10 |     , gOriginalDataReader(originalDataReader)
11 |     , gCurrentPosition(0)
12 | {
13 | }
14 | 
15 | int LimitedSamplesDataReader::read(
16 |     int samplesToRead,
17 |     int maxInputSequenceLength,
18 |     int* hInputData,
19 |     int* hActualInputSequenceLengths)
20 | {
21 |     int limitedSmplesToRead = std::min(samplesToRead, std::max(gMaxSamplesToRead - gCurrentPosition, 0));
22 |     int samplesRead = gOriginalDataReader->read(limitedSmplesToRead, maxInputSequenceLength, hInputData, hActualInputSequenceLengths);
23 |     gCurrentPosition += samplesRead;
24 |     return samplesRead;
25 | }
26 | 
27 | void LimitedSamplesDataReader::reset()
28 | {
29 |     gOriginalDataReader->reset();
30 |     gCurrentPosition = 0;
31 | }
32 | 
33 | std::string LimitedSamplesDataReader::getInfo()
34 | {
35 |     std::stringstream ss;
36 |     ss << "Limited Samples Reader, max samples = " << gMaxSamplesToRead << ", original reader info: " << gOriginalDataReader->getInfo();
37 |     return ss.str();
38 | }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/limitedSamplesDataReader.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_LIMITED_SAMPLES_DATA_READER_
 2 | #define SAMPLE_NMT_LIMITED_SAMPLES_DATA_READER_
 3 | 
 4 | #include "dataReader.h"
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | /** \class LimitedSamplesDataReader
 9 |     *
10 |     * \brief wraps another data reader and limits the number of samples to read
11 |     *
12 |     */
13 | class LimitedSamplesDataReader : public DataReader
14 | {
15 | public:
16 |     LimitedSamplesDataReader(int maxSamplesToRead, DataReader::ptr originalDataReader);
17 | 
18 |     int read(
19 |         int samplesToRead,
20 |         int maxInputSequenceLength,
21 |         int* hInputData,
22 |         int* hActualInputSequenceLengths) override;
23 | 
24 |     void reset() override;
25 | 
26 |     std::string getInfo() override;
27 | 
28 | private:
29 |     int gMaxSamplesToRead;
30 |     DataReader::ptr gOriginalDataReader;
31 |     int gCurrentPosition;
32 | };
33 | }
34 | 
35 | #endif // SAMPLE_NMT_LIMITED_SAMPLES_DATA_READER_
36 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/sequenceProperties.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_SEQUENCE_PROPERTIES_
 2 | #define SAMPLE_NMT_SEQUENCE_PROPERTIES_
 3 | 
 4 | #include <memory>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | /** \class SequenceProperties
 9 |     *
10 |     * \brief provides encoder/decoder relevant properties of sequences
11 |     *
12 |     */
13 | class SequenceProperties
14 | {
15 | public:
16 |     typedef std::shared_ptr<SequenceProperties> ptr;
17 | 
18 |     SequenceProperties() = default;
19 | 
20 |     virtual int getStartSequenceId() = 0;
21 | 
22 |     virtual int getEndSequenceId() = 0;
23 | 
24 |     virtual ~SequenceProperties() = default;
25 | };
26 | }
27 | 
28 | #endif // SAMPLE_NMT_SEQUENCE_PROPERTIES_
29 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/textReader.cpp:
--------------------------------------------------------------------------------
 1 | #include "textReader.h"
 2 | 
 3 | #include <algorithm>
 4 | #include <clocale>
 5 | #include <fstream>
 6 | #include <sstream>
 7 | 
 8 | namespace nmtSample
 9 | {
10 | TextReader::TextReader(std::shared_ptr<std::istream> textInput, Vocabulary::ptr vocabulary)
11 |     : mInput(textInput)
12 |     , mVocabulary(vocabulary)
13 | {
14 | }
15 | 
16 | int TextReader::read(
17 |     int samplesToRead,
18 |     int maxInputSequenceLength,
19 |     int* hInputData,
20 |     int* hActualInputSequenceLengths)
21 | {
22 |     std::setlocale(LC_ALL, "en_US.UTF-8");
23 |     std::string line;
24 | 
25 |     int lineCounter = 0;
26 |     while (lineCounter < samplesToRead && std::getline(*mInput, line))
27 |     {
28 |         std::istringstream ss(line);
29 |         std::string token;
30 |         int tokenCounter = 0;
31 |         while ((ss >> token) && (tokenCounter < maxInputSequenceLength))
32 |         {
33 |             hInputData[maxInputSequenceLength * lineCounter + tokenCounter] = mVocabulary->getId(token);
34 |             tokenCounter++;
35 |         }
36 | 
37 |         hActualInputSequenceLengths[lineCounter] = tokenCounter;
38 | 
39 |         // Fill unused values with valid vocabulary ID, it doesn't necessary have to be eos
40 |         std::fill(hInputData + maxInputSequenceLength * lineCounter + tokenCounter, hInputData + maxInputSequenceLength * (lineCounter + 1), mVocabulary->getEndSequenceId());
41 | 
42 |         lineCounter++;
43 |     }
44 |     return lineCounter;
45 | }
46 | 
47 | void TextReader::reset()
48 | {
49 |     mInput->seekg(0, mInput->beg);
50 | }
51 | 
52 | std::string TextReader::getInfo()
53 | {
54 |     std::stringstream ss;
55 |     ss << "Text Reader, vocabulary size = " << mVocabulary->getSize();
56 |     return ss.str();
57 | }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/textReader.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_TEXT_READER_
 2 | #define SAMPLE_NMT_TEXT_READER_
 3 | 
 4 | #include "dataReader.h"
 5 | #include "vocabulary.h"
 6 | #include <istream>
 7 | #include <memory>
 8 | #include <string>
 9 | 
10 | namespace nmtSample
11 | {
12 | /** \class TextReader
13 |     *
14 |     * \brief reads sequences of data from input stream
15 |     *
16 |     */
17 | class TextReader : public DataReader
18 | {
19 | public:
20 |     TextReader(std::shared_ptr<std::istream> textInput, Vocabulary::ptr vocabulary);
21 | 
22 |     int read(
23 |         int samplesToRead,
24 |         int maxInputSequenceLength,
25 |         int* hInputData,
26 |         int* hActualInputSequenceLengths) override;
27 | 
28 |     void reset() override;
29 | 
30 |     std::string getInfo() override;
31 | 
32 | private:
33 |     std::shared_ptr<std::istream> mInput;
34 |     Vocabulary::ptr mVocabulary;
35 | };
36 | }
37 | 
38 | #endif // SAMPLE_NMT_TEXT_READER_
39 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/textWriter.cpp:
--------------------------------------------------------------------------------
 1 | #include "textWriter.h"
 2 | 
 3 | #include <iostream>
 4 | #include <regex>
 5 | #include <sstream>
 6 | 
 7 | namespace nmtSample
 8 | {
 9 | TextWriter::TextWriter(std::shared_ptr<std::ostream> textOnput, Vocabulary::ptr vocabulary)
10 |     : mOutput(textOnput)
11 |     , mVocabulary(vocabulary)
12 | {
13 | }
14 | 
15 | void TextWriter::write(
16 |     const int* hOutputData,
17 |     int actualOutputSequenceLength,
18 |     int actualInputSequenceLength)
19 | {
20 |     // if clean and handle BPE outputs is required
21 |     *mOutput << DataWriter::generateText(actualOutputSequenceLength, hOutputData, mVocabulary) << "\n";
22 | }
23 | 
24 | void TextWriter::initialize()
25 | {
26 | }
27 | 
28 | void TextWriter::finalize()
29 | {
30 | }
31 | 
32 | std::string TextWriter::getInfo()
33 | {
34 |     std::stringstream ss;
35 |     ss << "Text Writer, vocabulary size = " << mVocabulary->getSize();
36 |     return ss.str();
37 | }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/textWriter.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_TEXT_WRITER_
 2 | #define SAMPLE_NMT_TEXT_WRITER_
 3 | 
 4 | #include <memory>
 5 | #include <ostream>
 6 | 
 7 | #include "dataWriter.h"
 8 | #include "vocabulary.h"
 9 | 
10 | namespace nmtSample
11 | {
12 | /** \class TextReader
13 |     *
14 |     * \brief writes sequences of data into output stream
15 |     *
16 |     */
17 | class TextWriter : public DataWriter
18 | {
19 | public:
20 |     TextWriter(std::shared_ptr<std::ostream> textOnput, Vocabulary::ptr vocabulary);
21 | 
22 |     void write(
23 |         const int* hOutputData,
24 |         int actualOutputSequenceLength,
25 |         int actualInputSequenceLength) override;
26 | 
27 |     void initialize() override;
28 | 
29 |     void finalize() override;
30 | 
31 |     std::string getInfo() override;
32 | 
33 |     ~TextWriter() override = default;
34 | 
35 | private:
36 |     std::shared_ptr<std::ostream> mOutput;
37 |     Vocabulary::ptr mVocabulary;
38 | };
39 | }
40 | 
41 | #endif // SAMPLE_NMT_TEXT_WRITER_
42 | 


--------------------------------------------------------------------------------
/src/sampleNMT/data/vocabulary.cpp:
--------------------------------------------------------------------------------
 1 | #include "vocabulary.h"
 2 | #include <assert.h>
 3 | #include <clocale>
 4 | #include <iostream>
 5 | #include <istream>
 6 | 
 7 | namespace nmtSample
 8 | {
 9 | const std::string Vocabulary::mSosStr = "<s>";
10 | const std::string Vocabulary::mEosStr = "</s>";
11 | const std::string Vocabulary::mUnkStr = "<unk>";
12 | 
13 | Vocabulary::Vocabulary()
14 |     : mNumTokens(0)
15 | {
16 | }
17 | 
18 | void Vocabulary::add(const std::string& token)
19 | {
20 |     assert(mTokenToId.find(token) == mTokenToId.end());
21 |     mTokenToId[token] = mNumTokens;
22 |     mIdToToken.push_back(token);
23 |     mNumTokens++;
24 | }
25 | 
26 | int Vocabulary::getId(const std::string& token) const
27 | {
28 |     auto it = mTokenToId.find(token);
29 |     if (it != mTokenToId.end())
30 |         return it->second;
31 |     return mUnkId;
32 | }
33 | 
34 | std::string Vocabulary::getToken(int id) const
35 | {
36 |     assert(id < mNumTokens);
37 |     return mIdToToken[id];
38 | }
39 | 
40 | int Vocabulary::getSize() const
41 | {
42 |     return mNumTokens;
43 | }
44 | 
45 | std::istream& operator>>(std::istream& input, Vocabulary& value)
46 | {
47 |     // stream should contain "<s>", "</s>" and "<unk>" tokens
48 |     std::setlocale(LC_ALL, "en_US.UTF-8");
49 |     std::string line;
50 |     std::string word;
51 |     while (input >> word)
52 |     {
53 |         value.add(word);
54 |     }
55 | 
56 |     {
57 |         auto it = value.mTokenToId.find(Vocabulary::mSosStr);
58 |         assert(it != value.mTokenToId.end());
59 |         value.mSosId = it->second;
60 |     }
61 | 
62 |     {
63 |         auto it = value.mTokenToId.find(Vocabulary::mEosStr);
64 |         assert(it != value.mTokenToId.end());
65 |         value.mEosId = it->second;
66 |     }
67 | 
68 |     {
69 |         auto it = value.mTokenToId.find(Vocabulary::mUnkStr);
70 |         assert(it != value.mTokenToId.end());
71 |         value.mUnkId = it->second;
72 |     }
73 | 
74 |     return input;
75 | }
76 | 
77 | int Vocabulary::getStartSequenceId()
78 | {
79 |     return mSosId;
80 | }
81 | 
82 | int Vocabulary::getEndSequenceId()
83 | {
84 |     return mEosId;
85 | }
86 | }


--------------------------------------------------------------------------------
/src/sampleNMT/data/vocabulary.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_VOCABULARY_
 2 | #define SAMPLE_NMT_VOCABULARY_
 3 | 
 4 | #include <map>
 5 | #include <memory>
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | #include "sequenceProperties.h"
10 | 
11 | namespace nmtSample
12 | {
13 | /** \class Vocabulary
14 |     *
15 |     * \brief String<->Id bijection storage
16 |     *
17 |     */
18 | class Vocabulary : public SequenceProperties
19 | {
20 | public:
21 |     typedef std::shared_ptr<Vocabulary> ptr;
22 | 
23 |     Vocabulary();
24 | 
25 |     friend std::istream& operator>>(std::istream& input, Vocabulary& value);
26 | 
27 |     /**
28 |         * \brief add new token to vocabulary, ID is auto-generated
29 |         */
30 |     void add(const std::string& token);
31 | 
32 |     /**
33 |         * \brief get the ID of the token
34 |         */
35 |     int getId(const std::string& token) const;
36 | 
37 |     /**
38 |         * \brief get token by ID
39 |         */
40 |     std::string getToken(int id) const;
41 | 
42 |     /**
43 |         * \brief get the number of elements in the vocabulary
44 |         */
45 |     int getSize() const;
46 | 
47 |     int getStartSequenceId() override;
48 | 
49 |     int getEndSequenceId() override;
50 | 
51 | private:
52 |     static const std::string mSosStr;
53 |     static const std::string mUnkStr;
54 |     static const std::string mEosStr;
55 | 
56 |     std::map<std::string, int> mTokenToId;
57 |     std::vector<std::string> mIdToToken;
58 |     int mNumTokens;
59 | 
60 |     int mSosId;
61 |     int mEosId;
62 |     int mUnkId;
63 | };
64 | }
65 | 
66 | #endif // SAMPLE_NMT_VOCABULARY_
67 | 


--------------------------------------------------------------------------------
/src/sampleNMT/deviceBuffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_DEVICE_BUFFER_
 2 | #define SAMPLE_NMT_DEVICE_BUFFER_
 3 | 
 4 | #include "cudaError.h"
 5 | #include <cuda_runtime_api.h>
 6 | #include <memory>
 7 | 
 8 | namespace nmtSample
 9 | {
10 | template <typename T>
11 | class DeviceBuffer
12 | {
13 | public:
14 |     typedef std::shared_ptr<DeviceBuffer<T>> ptr;
15 | 
16 |     DeviceBuffer(size_t elementCount)
17 |         : mBuffer(nullptr)
18 |     {
19 |         CUDA_CHECK(cudaMalloc(&mBuffer, elementCount * sizeof(T)));
20 |     }
21 | 
22 |     virtual ~DeviceBuffer()
23 |     {
24 |         if (mBuffer)
25 |         {
26 |             cudaFree(mBuffer);
27 |         }
28 |     }
29 | 
30 |     operator T*()
31 |     {
32 |         return mBuffer;
33 |     }
34 | 
35 |     operator const T*() const
36 |     {
37 |         return mBuffer;
38 |     }
39 | 
40 | protected:
41 |     T* mBuffer;
42 | };
43 | }
44 | 
45 | #endif // SAMPLE_NMT_DEVICE_BUFFER_
46 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/alignment.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_ALIGNMENT_
 2 | #define SAMPLE_NMT_ALIGNMENT_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | #include "NvInfer.h"
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class Alignment
12 |     *
13 |     * \brief represents the core of attention mechanism 
14 |     *
15 |     */
16 | class Alignment : public Component
17 | {
18 | public:
19 |     typedef std::shared_ptr<Alignment> ptr;
20 | 
21 |     Alignment() = default;
22 | 
23 |     /**
24 |         * \brief add the alignment scores calculation to the network
25 |         */
26 |     virtual void addToModel(
27 |         nvinfer1::INetworkDefinition* network,
28 |         nvinfer1::ITensor* attentionKeys,
29 |         nvinfer1::ITensor* queryStates,
30 |         nvinfer1::ITensor** alignmentScores)
31 |         = 0;
32 | 
33 |     /**
34 |         * \brief add attention keys calculation (from source memory states) to the network
35 |         *
36 |         * The funtion is called if getAttentionKeySize returns positive value
37 |         */
38 |     virtual void addAttentionKeys(
39 |         nvinfer1::INetworkDefinition* network,
40 |         nvinfer1::ITensor* memoryStates,
41 |         nvinfer1::ITensor** attentionKeys)
42 |         = 0;
43 | 
44 |     /**
45 |         * \brief get the size of the source states
46 |         */
47 |     virtual int getSourceStatesSize() = 0;
48 | 
49 |     /**
50 |         * \brief get the size of the attention keys
51 |         */
52 |     virtual int getAttentionKeySize() = 0;
53 | 
54 |     ~Alignment() override = default;
55 | };
56 | }
57 | 
58 | #endif // SAMPLE_NMT_ALIGNMENT_
59 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/attention.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_ATTENTION_
 2 | #define SAMPLE_NMT_ATTENTION_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | #include "NvInfer.h"
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class Attention
12 |     *
13 |     * \brief calculates attention vector from context and decoder output vectors 
14 |     *
15 |     */
16 | class Attention : public Component
17 | {
18 | public:
19 |     typedef std::shared_ptr<Attention> ptr;
20 | 
21 |     Attention() = default;
22 | 
23 |     /**
24 |         * \brief add the attention vector calculation to the network
25 |         */
26 |     virtual void addToModel(
27 |         nvinfer1::INetworkDefinition* network,
28 |         nvinfer1::ITensor* inputFromDecoder,
29 |         nvinfer1::ITensor* context,
30 |         nvinfer1::ITensor** attentionOutput)
31 |         = 0;
32 | 
33 |     /**
34 |         * \brief get the size of the attention vector
35 |         */
36 |     virtual int getAttentionSize() = 0;
37 | 
38 |     ~Attention() override = default;
39 | };
40 | }
41 | 
42 | #endif // SAMPLE_NMT_ATTENTION_
43 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/beamSearchPolicy.cpp:
--------------------------------------------------------------------------------
  1 | #include "beamSearchPolicy.h"
  2 | #ifdef _MSC_VER
  3 | //Macro definition needed to avoid name collision with std::min/max and Windows.h min/max
  4 | #define NOMINMAX
  5 | #endif
  6 | #include "nvToolsExt.h"
  7 | #include <algorithm>
  8 | #include <cassert>
  9 | #include <limits>
 10 | #include <sstream>
 11 | 
 12 | namespace nmtSample
 13 | {
 14 | BeamSearchPolicy::BeamSearchPolicy(
 15 |     int endSequenceId,
 16 |     LikelihoodCombinationOperator::ptr likelihoodCombinationOperator,
 17 |     int beamWidth)
 18 |     : mEndSequenceId(endSequenceId)
 19 |     , mLikelihoodCombinationOperator(likelihoodCombinationOperator)
 20 |     , mBeamWidth(beamWidth)
 21 | {
 22 | }
 23 | 
 24 | void BeamSearchPolicy::initialize(
 25 |     int sampleCount,
 26 |     int* maxOutputSequenceLengths)
 27 | {
 28 |     mSampleCount = sampleCount;
 29 |     mMaxOutputSequenceLengths.resize(mSampleCount);
 30 |     std::copy(maxOutputSequenceLengths, maxOutputSequenceLengths + mSampleCount, &mMaxOutputSequenceLengths[0]);
 31 | 
 32 |     mValidSamples.resize(mSampleCount);
 33 |     std::fill(mValidSamples.begin(), mValidSamples.end(), true);
 34 | 
 35 |     mCurrentLikelihoods.resize(mSampleCount * mBeamWidth);
 36 |     std::fill(mCurrentLikelihoods.begin(), mCurrentLikelihoods.end(), mLikelihoodCombinationOperator->init());
 37 | 
 38 |     mBeamSearchTable.clear();
 39 | 
 40 |     mTimestepId = 0;
 41 | 
 42 |     mCandidates.resize(mSampleCount);
 43 |     mCandidateLikelihoods.resize(mSampleCount);
 44 |     std::fill(mCandidateLikelihoods.begin(), mCandidateLikelihoods.end(), mLikelihoodCombinationOperator->smallerThanMinimalLikelihood());
 45 | }
 46 | 
 47 | void BeamSearchPolicy::processTimestep(
 48 |     int validSampleCount,
 49 |     const float* hCombinedLikelihoods,
 50 |     const int* hVocabularyIndices,
 51 |     const int* hRayOptionIndices,
 52 |     int* hSourceRayIndices,
 53 |     float* hSourceLikelihoods)
 54 | {
 55 |     ++mTimestepId;
 56 |     mBeamSearchTable.resize(mTimestepId * mSampleCount * mBeamWidth);
 57 |     auto baseBeamSearchTable = mBeamSearchTable.begin() + (mTimestepId - 1) * mSampleCount * mBeamWidth;
 58 | 
 59 |     for (int sampleId = 0; sampleId < validSampleCount; ++sampleId)
 60 |     {
 61 |         auto currentSourceRayIndices = hSourceRayIndices + sampleId * mBeamWidth;
 62 |         auto currentLikelihoods = hSourceLikelihoods + sampleId * mBeamWidth;
 63 |         auto currentBeamSearchTable = baseBeamSearchTable + sampleId * mBeamWidth;
 64 | 
 65 |         int rayId = 0;
 66 |         if (mValidSamples[sampleId])
 67 |         {
 68 |             for (; rayId < mBeamWidth; ++rayId)
 69 |             {
 70 |                 float optionCombinedLikelihood = hCombinedLikelihoods[sampleId * mBeamWidth + rayId];
 71 | 
 72 |                 // Check if the current candidate is already better than this option
 73 |                 if (optionCombinedLikelihood <= mCandidateLikelihoods[sampleId])
 74 |                     break; // The remaining options are even worse
 75 | 
 76 |                 int optionOriginalRayId = hRayOptionIndices[sampleId * mBeamWidth + rayId] / mBeamWidth;
 77 |                 int optionVocabularyId = hVocabularyIndices[sampleId * mBeamWidth + rayId];
 78 | 
 79 |                 if ((optionVocabularyId == mEndSequenceId) || (mTimestepId >= mMaxOutputSequenceLengths[sampleId]))
 80 |                 {
 81 |                     // We have a new candidate output sequence for the sample
 82 |                     mCandidateLikelihoods[sampleId] = optionCombinedLikelihood;
 83 |                     auto& candidate = mCandidates[sampleId];
 84 |                     candidate.resize(mTimestepId);
 85 |                     backtrack(mTimestepId - 2, sampleId, optionOriginalRayId, &candidate[0], mTimestepId - 2);
 86 |                     candidate[mTimestepId - 1] = optionVocabularyId;
 87 |                     break;
 88 |                 }
 89 | 
 90 |                 *(currentSourceRayIndices + rayId) = optionOriginalRayId;
 91 |                 *(currentLikelihoods + rayId) = optionCombinedLikelihood;
 92 |                 (currentBeamSearchTable + rayId)->vocabularyId = optionVocabularyId;
 93 |                 (currentBeamSearchTable + rayId)->backtrackId = optionOriginalRayId;
 94 |             }
 95 | 
 96 |             // No valid rays left for the sample
 97 |             if (rayId == 0)
 98 |                 mValidSamples[sampleId] = false;
 99 |         }
100 | 
101 |         // Mark the remaining rays as invalid ones
102 |         for (; rayId < mBeamWidth; ++rayId)
103 |         {
104 |             *(currentSourceRayIndices + rayId) = 0;
105 |             *(currentLikelihoods + rayId) = mLikelihoodCombinationOperator->smallerThanMinimalLikelihood();
106 |             (currentBeamSearchTable + rayId)->vocabularyId = mEndSequenceId;
107 |             (currentBeamSearchTable + rayId)->backtrackId = 0;
108 |         }
109 |     }
110 | }
111 | 
112 | int BeamSearchPolicy::getTailWithNoWorkRemaining()
113 | {
114 |     for (int sampleId = mSampleCount - 1; sampleId >= 0; --sampleId)
115 |     {
116 |         if (mValidSamples[sampleId])
117 |             return sampleId + 1;
118 |     }
119 |     return 0;
120 | }
121 | 
122 | void BeamSearchPolicy::readGeneratedResult(
123 |     int sampleCount,
124 |     int maxOutputSequenceLength,
125 |     int* hOutputData,
126 |     int* hActualOutputSequenceLengths)
127 | {
128 |     for (int sampleId = 0; sampleId < sampleCount; ++sampleId)
129 |     {
130 |         if (mCandidateLikelihoods[sampleId] > mLikelihoodCombinationOperator->smallerThanMinimalLikelihood())
131 |         {
132 |             // We have a candidate (finished sequence)
133 |             std::copy_n(
134 |                 mCandidates[sampleId].begin(),
135 |                 std::min(static_cast<int>(mCandidates[sampleId].size()), maxOutputSequenceLength),
136 |                 hOutputData + sampleId * maxOutputSequenceLength);
137 |             hActualOutputSequenceLengths[sampleId] = mCandidates[sampleId].size();
138 |         }
139 |         else
140 |         {
141 |             // We don't have a finished sequence generated, will output the unfinished one with the highest likelihood
142 |             assert(mValidSamples[sampleId]);
143 |             backtrack(mTimestepId - 1, sampleId, 0, hOutputData + sampleId * maxOutputSequenceLength, maxOutputSequenceLength - 1);
144 |             hActualOutputSequenceLengths[sampleId] = mTimestepId;
145 |         }
146 |     }
147 | }
148 | 
149 | void BeamSearchPolicy::backtrack(
150 |     int lastTimestepId,
151 |     int sampleId,
152 |     int lastTimestepRayId,
153 |     int* hOutputData,
154 |     int lastTimestepWriteId) const
155 | {
156 |     int rayId = lastTimestepRayId;
157 |     for (int timestepId = lastTimestepId; timestepId >= 0; --timestepId)
158 |     {
159 |         const auto& entry = mBeamSearchTable[(timestepId * mSampleCount + sampleId) * mBeamWidth + rayId];
160 |         rayId = entry.backtrackId;
161 |         if (timestepId <= lastTimestepWriteId)
162 |             hOutputData[timestepId] = entry.vocabularyId;
163 |     }
164 | }
165 | 
166 | std::string BeamSearchPolicy::getInfo()
167 | {
168 |     std::stringstream ss;
169 |     ss << "Beam Search Policy, beam = " << mBeamWidth;
170 |     return ss.str();
171 | }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/beamSearchPolicy.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_BEAM_SEARCH_POLICY_
 2 | #define SAMPLE_NMT_BEAM_SEARCH_POLICY_
 3 | 
 4 | #include "../component.h"
 5 | #include "likelihoodCombinationOperator.h"
 6 | 
 7 | #include <vector>
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class BeamSearchPolicy
12 |     *
13 |     * \brief processes the results of one iteration of the generator with beam search and produces input for the next iteration
14 |     *
15 |     */
16 | class BeamSearchPolicy : public Component
17 | {
18 | public:
19 |     typedef std::shared_ptr<BeamSearchPolicy> ptr;
20 | 
21 |     BeamSearchPolicy(
22 |         int endSequenceId,
23 |         LikelihoodCombinationOperator::ptr likelihoodCombinationOperator,
24 |         int beamWidth);
25 | 
26 |     void initialize(
27 |         int sampleCount,
28 |         int* maxOutputSequenceLengths);
29 | 
30 |     void processTimestep(
31 |         int validSampleCount,
32 |         const float* hCombinedLikelihoods,
33 |         const int* hVocabularyIndices,
34 |         const int* hRayOptionIndices,
35 |         int* hSourceRayIndices,
36 |         float* hSourceLikelihoods);
37 | 
38 |     int getTailWithNoWorkRemaining();
39 | 
40 |     void readGeneratedResult(
41 |         int sampleCount,
42 |         int maxOutputSequenceLength,
43 |         int* hOutputData,
44 |         int* hActualOutputSequenceLengths);
45 | 
46 |     std::string getInfo() override;
47 | 
48 |     ~BeamSearchPolicy() override = default;
49 | 
50 | protected:
51 |     struct Ray
52 |     {
53 |         int vocabularyId;
54 |         int backtrackId;
55 |     };
56 | 
57 |     void backtrack(
58 |         int lastTimestepId,
59 |         int sampleId,
60 |         int lastTimestepRayId,
61 |         int* hOutputData,
62 |         int lastTimestepWriteId) const;
63 | 
64 | protected:
65 |     int mEndSequenceId;
66 |     LikelihoodCombinationOperator::ptr mLikelihoodCombinationOperator;
67 |     int mBeamWidth;
68 |     std::vector<bool> mValidSamples;
69 |     std::vector<float> mCurrentLikelihoods;
70 |     std::vector<Ray> mBeamSearchTable;
71 |     int mSampleCount;
72 |     std::vector<int> mMaxOutputSequenceLengths;
73 |     int mTimestepId;
74 | 
75 |     std::vector<std::vector<int>> mCandidates;
76 |     std::vector<float> mCandidateLikelihoods;
77 | };
78 | }
79 | 
80 | #endif // SAMPLE_NMT_BEAM_SEARCH_POLICY_
81 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/componentWeights.cpp:
--------------------------------------------------------------------------------
 1 | #include "componentWeights.h"
 2 | #include <cassert>
 3 | #include <string>
 4 | 
 5 | namespace nmtSample
 6 | {
 7 | std::istream& operator>>(std::istream& input, ComponentWeights& value)
 8 | {
 9 |     std::string footerString("trtsamplenmt");
10 |     size_t footerSize = sizeof(int32_t) + footerString.size();
11 |     char* footer = (char*) malloc(footerSize);
12 | 
13 |     input.seekg(0, std::ios::end);
14 |     size_t fileSize = input.tellg();
15 | 
16 |     input.seekg(-footerSize, std::ios::end);
17 |     input.read(footer, footerSize);
18 | 
19 |     size_t metaDataCount = ((int32_t*) footer)[0];
20 |     std::string str(footer + sizeof(int32_t), footer + footerSize);
21 |     assert(footerString.compare(str) == 0);
22 |     free(footer);
23 | 
24 |     input.seekg(-(footerSize + metaDataCount * sizeof(int32_t)), std::ios::end);
25 |     value.mMetaData.resize(metaDataCount);
26 |     size_t metaSize = metaDataCount * sizeof(int32_t);
27 |     input.read((char*) (&value.mMetaData[0]), metaSize);
28 | 
29 |     size_t dataSize = fileSize - footerSize - metaSize;
30 |     input.seekg(0, input.beg);
31 |     value.mWeights.resize(dataSize);
32 |     input.read(&value.mWeights[0], dataSize);
33 |     return input;
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/componentWeights.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_COMPONENT_WEIGHTS_
 2 | #define SAMPLE_NMT_COMPONENT_WEIGHTS_
 3 | 
 4 | #include <iostream>
 5 | #include <memory>
 6 | #include <vector>
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class ComponentWeights
11 |     *
12 |     * \brief weights storage 
13 |     *
14 |     */
15 | class ComponentWeights
16 | {
17 | public:
18 |     typedef std::shared_ptr<ComponentWeights> ptr;
19 | 
20 |     ComponentWeights() = default;
21 | 
22 |     friend std::istream& operator>>(std::istream& input, ComponentWeights& value);
23 | 
24 | public:
25 |     std::vector<int> mMetaData;
26 |     std::vector<char> mWeights;
27 | };
28 | }
29 | 
30 | #endif // SAMPLE_NMT_COMPONENT_WEIGHTS_
31 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/context.cpp:
--------------------------------------------------------------------------------
 1 | #include "context.h"
 2 | 
 3 | #include <cassert>
 4 | #include <sstream>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | void Context::addToModel(
 9 |     nvinfer1::INetworkDefinition* network,
10 |     nvinfer1::ITensor* actualInputSequenceLengths,
11 |     nvinfer1::ITensor* memoryStates,
12 |     nvinfer1::ITensor* alignmentScores,
13 |     nvinfer1::ITensor** contextOutput)
14 | {
15 |     auto raggedSoftmaxLayer = network->addRaggedSoftMax(*alignmentScores, *actualInputSequenceLengths);
16 |     assert(raggedSoftmaxLayer != nullptr);
17 |     raggedSoftmaxLayer->setName("Context Ragged Softmax");
18 |     auto softmaxTensor = raggedSoftmaxLayer->getOutput(0);
19 |     assert(softmaxTensor != nullptr);
20 | 
21 |     auto mmLayer = network->addMatrixMultiply(
22 |         *softmaxTensor,
23 |         false,
24 |         *memoryStates,
25 |         false);
26 |     assert(mmLayer != nullptr);
27 |     mmLayer->setName("Context Matrix Multiply");
28 |     *contextOutput = mmLayer->getOutput(0);
29 |     assert(*contextOutput != nullptr);
30 | }
31 | 
32 | std::string Context::getInfo()
33 | {
34 |     return "Ragged softmax + Batch GEMM";
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/context.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_CONTEXT_
 2 | #define SAMPLE_NMT_CONTEXT_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | #include "NvInfer.h"
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class Context
12 |     *
13 |     * \brief calculates context vector from raw alignment scores and memory states
14 |     *
15 |     */
16 | class Context : public Component
17 | {
18 | public:
19 |     typedef std::shared_ptr<Context> ptr;
20 | 
21 |     Context() = default;
22 | 
23 |     /**
24 |         * \brief add the context vector calculation to the network
25 |         */
26 |     void addToModel(
27 |         nvinfer1::INetworkDefinition* network,
28 |         nvinfer1::ITensor* actualInputSequenceLengths,
29 |         nvinfer1::ITensor* memoryStates,
30 |         nvinfer1::ITensor* alignmentScores,
31 |         nvinfer1::ITensor** contextOutput);
32 | 
33 |     std::string getInfo() override;
34 | 
35 |     ~Context() override = default;
36 | };
37 | }
38 | 
39 | #endif // SAMPLE_NMT_CONTEXT_
40 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/debugUtil.cpp:
--------------------------------------------------------------------------------
  1 | #include "debugUtil.h"
  2 | 
  3 | #include <cassert>
  4 | #include <cuda_runtime_api.h>
  5 | 
  6 | #include "../cudaError.h"
  7 | 
  8 | namespace nmtSample
  9 | {
 10 | std::list<DebugUtil::DumpTensorPlugin::ptr> DebugUtil::mPlugins;
 11 | 
 12 | DebugUtil::DumpTensorPlugin::DumpTensorPlugin(std::shared_ptr<std::ostream> out)
 13 |     : mOut(out)
 14 | {
 15 | }
 16 | 
 17 | int DebugUtil::DumpTensorPlugin::getNbOutputs() const
 18 | {
 19 |     return 1;
 20 | }
 21 | 
 22 | nvinfer1::Dims DebugUtil::DumpTensorPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims)
 23 | {
 24 |     return inputs[0];
 25 | }
 26 | 
 27 | void DebugUtil::DumpTensorPlugin::configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize)
 28 | {
 29 |     mDims = inputDims[0];
 30 | 
 31 |     *mOut << "Max batch size = " << maxBatchSize << std::endl;
 32 |     *mOut << "Tensor dimensions = ";
 33 |     mTensorVolume = 1;
 34 |     for (int i = 0; i < mDims.nbDims; ++i)
 35 |     {
 36 |         if (i > 0)
 37 |             *mOut << "x";
 38 |         *mOut << mDims.d[i];
 39 |         mTensorVolume *= mDims.d[i];
 40 |     }
 41 |     mElemsPerRow = 1;
 42 |     for (int i = mDims.nbDims - 1; i >= 0; --i)
 43 |     {
 44 |         if (mElemsPerRow == 1)
 45 |             mElemsPerRow *= mDims.d[i];
 46 |     }
 47 |     *mOut << std::endl;
 48 | 
 49 |     mData = std::make_shared<PinnedHostBuffer<float>>(mTensorVolume * maxBatchSize);
 50 | }
 51 | 
 52 | int DebugUtil::DumpTensorPlugin::initialize()
 53 | {
 54 |     return 0;
 55 | }
 56 | 
 57 | void DebugUtil::DumpTensorPlugin::terminate()
 58 | {
 59 |     mOut.reset();
 60 |     mData.reset();
 61 | }
 62 | 
 63 | size_t DebugUtil::DumpTensorPlugin::getWorkspaceSize(int maxBatchSize) const
 64 | {
 65 |     return 0;
 66 | }
 67 | 
 68 | int DebugUtil::DumpTensorPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
 69 | {
 70 |     int totalElems = batchSize * mTensorVolume;
 71 | 
 72 |     CUDA_CHECK(cudaMemcpyAsync(*mData, inputs[0], totalElems * sizeof(float), cudaMemcpyDeviceToHost, stream));
 73 |     CUDA_CHECK(cudaStreamSynchronize(stream));
 74 |     CUDA_CHECK(cudaMemcpyAsync(outputs[0], inputs[0], totalElems * sizeof(float), cudaMemcpyDeviceToDevice, stream));
 75 | 
 76 |     *mOut << "Batch size = " << batchSize << "\n";
 77 |     int rowCount = totalElems / mElemsPerRow;
 78 |     for (int rowId = 0; rowId < rowCount; ++rowId)
 79 |     {
 80 |         for (int i = 0; i < mElemsPerRow; ++i)
 81 |         {
 82 |             if (i > 0)
 83 |                 *mOut << " ";
 84 |             *mOut << (*mData)[rowId * mElemsPerRow + i];
 85 |         }
 86 |         *mOut << "\n";
 87 |     }
 88 |     *mOut << std::endl;
 89 | 
 90 |     return 0;
 91 | }
 92 | 
 93 | size_t DebugUtil::DumpTensorPlugin::getSerializationSize()
 94 | {
 95 |     assert(0);
 96 |     return 0;
 97 | }
 98 | 
 99 | void DebugUtil::DumpTensorPlugin::serialize(void* buffer)
100 | {
101 |     assert(0);
102 | }
103 | 
104 | void DebugUtil::addDumpTensorToStream(
105 |     nvinfer1::INetworkDefinition* network,
106 |     nvinfer1::ITensor* input,
107 |     nvinfer1::ITensor** output,
108 |     std::shared_ptr<std::ostream> out)
109 | {
110 |     assert(!input->getBroadcastAcrossBatch());
111 |     auto plugin = std::make_shared<DumpTensorPlugin>(out);
112 |     nvinfer1::ITensor* inputTensors[] = {input};
113 |     auto pluginLayer = network->addPlugin(inputTensors, 1, *plugin);
114 |     assert(pluginLayer != nullptr);
115 |     *output = pluginLayer->getOutput(0);
116 |     assert(*output != nullptr);
117 |     mPlugins.push_back(plugin);
118 | }
119 | }
120 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/debugUtil.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_DEBUG_UTIL_
 2 | #define SAMPLE_NMT_DEBUG_UTIL_
 3 | 
 4 | #include "NvInfer.h"
 5 | 
 6 | #include <list>
 7 | #include <memory>
 8 | #include <ostream>
 9 | 
10 | #include "../pinnedHostBuffer.h"
11 | 
12 | namespace nmtSample
13 | {
14 | /** \class DebugUtil
15 |     *
16 |     * \brief container for static debug utility functions
17 |     *
18 |     */
19 | class DebugUtil
20 | {
21 | private:
22 |     class DumpTensorPlugin : public nvinfer1::IPlugin
23 |     {
24 |     public:
25 |         typedef std::shared_ptr<DumpTensorPlugin> ptr;
26 | 
27 |         DumpTensorPlugin(std::shared_ptr<std::ostream> out);
28 | 
29 |         ~DumpTensorPlugin() override = default;
30 | 
31 |         int getNbOutputs() const override;
32 | 
33 |         nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) override;
34 | 
35 |         void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) override;
36 | 
37 |         int initialize() override;
38 | 
39 |         void terminate() override;
40 | 
41 |         size_t getWorkspaceSize(int maxBatchSize) const override;
42 | 
43 |         int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override;
44 | 
45 |         size_t getSerializationSize() override;
46 | 
47 |         void serialize(void* buffer) override;
48 | 
49 |     private:
50 |         std::shared_ptr<std::ostream> mOut;
51 |         nvinfer1::Dims mDims;
52 |         int mMaxBatchSize;
53 |         int mTensorVolume;
54 |         int mElemsPerRow;
55 |         PinnedHostBuffer<float>::ptr mData;
56 |     };
57 | 
58 | public:
59 |     static void addDumpTensorToStream(
60 |         nvinfer1::INetworkDefinition* network,
61 |         nvinfer1::ITensor* input,
62 |         nvinfer1::ITensor** output,
63 |         std::shared_ptr<std::ostream> out);
64 | 
65 | private:
66 |     static std::list<DumpTensorPlugin::ptr> mPlugins;
67 | };
68 | }
69 | 
70 | #endif // SAMPLE_NMT_DEBUG_UTIL_
71 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/decoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_DECODER_
 2 | #define SAMPLE_NMT_DECODER_
 3 | 
 4 | #include <memory>
 5 | #include <vector>
 6 | 
 7 | #include "../component.h"
 8 | #include "NvInfer.h"
 9 | 
10 | namespace nmtSample
11 | {
12 | /** \class Decoder
13 |     *
14 |     * \brief encodes single input into output states
15 |     *
16 |     */
17 | class Decoder : public Component
18 | {
19 | public:
20 |     typedef std::shared_ptr<Decoder> ptr;
21 | 
22 |     Decoder() = default;
23 | 
24 |     /**
25 |         * \brief add the memory, cell, and hidden states to the network
26 |         */
27 |     virtual void addToModel(
28 |         nvinfer1::INetworkDefinition* network,
29 |         nvinfer1::ITensor* inputData,
30 |         nvinfer1::ITensor** inputStates,
31 |         nvinfer1::ITensor** outputData,
32 |         nvinfer1::ITensor** outputStates)
33 |         = 0;
34 | 
35 |     /**
36 |         * \brief get the sizes (vector of them) of the hidden state vectors
37 |         */
38 |     virtual std::vector<nvinfer1::Dims> getStateSizes() = 0;
39 | 
40 |     ~Decoder() override = default;
41 | };
42 | }
43 | 
44 | #endif // SAMPLE_NMT_DECODER_
45 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/embedder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_EMBEDDER_
 2 | #define SAMPLE_NMT_EMBEDDER_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | #include "NvInfer.h"
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class Embedder
12 |     *
13 |     * \brief projects 1-hot vectors (represented as a vector with indices) into dense embedding space
14 |     *
15 |     */
16 | class Embedder : public Component
17 | {
18 | public:
19 |     typedef std::shared_ptr<Embedder> ptr;
20 | 
21 |     Embedder() = default;
22 | 
23 |     /**
24 |         * \brief add the embedding vector calculation to the network
25 |         */
26 |     virtual void addToModel(
27 |         nvinfer1::INetworkDefinition* network,
28 |         nvinfer1::ITensor* input,
29 |         nvinfer1::ITensor** output)
30 |         = 0;
31 | 
32 |     /**
33 |         * \brief get the upper bound for the possible values of indices 
34 |         */
35 |     virtual int getInputDimensionSize() = 0;
36 | 
37 |     ~Embedder() override = default;
38 | };
39 | }
40 | 
41 | #endif // SAMPLE_NMT_EMBEDDER_
42 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/encoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_ENCODER_
 2 | #define SAMPLE_NMT_ENCODER_
 3 | 
 4 | #include <memory>
 5 | #include <vector>
 6 | 
 7 | #include "../component.h"
 8 | #include "NvInfer.h"
 9 | 
10 | namespace nmtSample
11 | {
12 | /** \class Encoder
13 |     *
14 |     * \brief encodes input sentences into output states
15 |     *
16 |     */
17 | class Encoder : public Component
18 | {
19 | public:
20 |     typedef std::shared_ptr<Encoder> ptr;
21 | 
22 |     Encoder() = default;
23 | 
24 |     /**
25 |         * \brief add the memory and last timestep states to the network
26 |         * lastTimestepHiddenStates is the pointer to the tensor where the encoder stores all layer hidden states for the last timestep (which is dependent on the sample),
27 |         * the function should define the tensor, it could be nullptr indicating these data are not needed
28 |         */
29 |     virtual void addToModel(
30 |         nvinfer1::INetworkDefinition* network,
31 |         int maxInputSequenceLength,
32 |         nvinfer1::ITensor* inputEmbeddedData,
33 |         nvinfer1::ITensor* actualInputSequenceLengths,
34 |         nvinfer1::ITensor** inputStates,
35 |         nvinfer1::ITensor** memoryStates,
36 |         nvinfer1::ITensor** lastTimestepStates)
37 |         = 0;
38 | 
39 |     /**
40 |         * \brief get the size of the memory state vector
41 |         */
42 |     virtual int getMemoryStatesSize() = 0;
43 | 
44 |     /**
45 |         * \brief get the sizes (vector of them) of the hidden state vectors
46 |         */
47 |     virtual std::vector<nvinfer1::Dims> getStateSizes() = 0;
48 | 
49 |     ~Encoder() override = default;
50 | };
51 | }
52 | 
53 | #endif // SAMPLE_NMT_ENCODER_
54 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/likelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_LIKELIHOOD_
 2 | #define SAMPLE_NMT_LIKELIHOOD_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | #include "NvInfer.h"
 8 | #include "likelihoodCombinationOperator.h"
 9 | 
10 | namespace nmtSample
11 | {
12 | /** \class Likelihood
13 |     *
14 |     * \brief calculates likelihood and TopK indices for the raw input logits
15 |     *
16 |     */
17 | class Likelihood : public Component
18 | {
19 | public:
20 |     typedef std::shared_ptr<Likelihood> ptr;
21 | 
22 |     Likelihood() = default;
23 | 
24 |     virtual LikelihoodCombinationOperator::ptr getLikelihoodCombinationOperator() const = 0;
25 | 
26 |     /**
27 |         * \brief add calculation of likelihood and TopK indices to the network
28 |         */
29 |     virtual void addToModel(
30 |         nvinfer1::INetworkDefinition* network,
31 |         int beamWidth,
32 |         nvinfer1::ITensor* inputLogits,
33 |         nvinfer1::ITensor* inputLikelihoods,
34 |         nvinfer1::ITensor** newCombinedLikelihoods,
35 |         nvinfer1::ITensor** newRayOptionIndices,
36 |         nvinfer1::ITensor** newVocabularyIndices)
37 |         = 0;
38 | 
39 |     ~Likelihood() override = default;
40 | };
41 | }
42 | 
43 | #endif // SAMPLE_NMT_LIKELIHOOD_
44 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/likelihoodCombinationOperator.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_LIKELIHOOD_COMBINATION_
 2 | #define SAMPLE_NMT_LIKELIHOOD_COMBINATION_
 3 | 
 4 | #include <memory>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | class LikelihoodCombinationOperator
 9 | {
10 | public:
11 |     typedef std::shared_ptr<LikelihoodCombinationOperator> ptr;
12 | 
13 |     // The  return value should be less or equal to rayLikelihood
14 |     virtual float combine(float rayLikelihood, float optionLikelihood) const = 0;
15 | 
16 |     virtual float init() const = 0;
17 | 
18 |     virtual float smallerThanMinimalLikelihood() const = 0;
19 | 
20 |     virtual ~LikelihoodCombinationOperator() = default;
21 | 
22 | protected:
23 |     LikelihoodCombinationOperator() = default;
24 | };
25 | }
26 | 
27 | #endif // SAMPLE_NMT_LIKELIHOOD_COMBINATION_
28 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/lstmDecoder.cpp:
--------------------------------------------------------------------------------
  1 | #include "lstmDecoder.h"
  2 | 
  3 | #include "..\trtUtil.h"
  4 | 
  5 | #include "debugUtil.h"
  6 | #include <fstream>
  7 | 
  8 | #include <cassert>
  9 | #include <sstream>
 10 | 
 11 | namespace nmtSample
 12 | {
 13 | LSTMDecoder::LSTMDecoder(ComponentWeights::ptr weights)
 14 |     : mWeights(weights)
 15 | {
 16 |     // please refer to chpt_to_bin.py for the details on the format
 17 |     assert(mWeights->mMetaData.size() >= 4);
 18 |     nvinfer1::DataType dataType = static_cast<nvinfer1::DataType>(mWeights->mMetaData[0]);
 19 |     assert(dataType == nvinfer1::DataType::kFLOAT);
 20 |     mRNNKind = mWeights->mMetaData[1];
 21 |     mNumLayers = mWeights->mMetaData[2];
 22 |     mNumUnits = mWeights->mMetaData[3];
 23 |     size_t elementSize = inferTypeToBytes(dataType);
 24 |     // compute weights offsets
 25 |     size_t dataSize = 2 * mNumUnits;
 26 |     size_t kernelOffset = 0;
 27 |     size_t biasStartOffset = ((4 * dataSize + 4 * mNumUnits) * mNumUnits) * elementSize
 28 |         + 8 * mNumUnits * mNumUnits * (mNumLayers - 1) * elementSize;
 29 |     size_t biasOffset = biasStartOffset;
 30 |     int numGates = 8;
 31 |     for (int layerIndex = 0; layerIndex < mNumLayers; layerIndex++)
 32 |     {
 33 |         for (int gateIndex = 0; gateIndex < numGates; gateIndex++)
 34 |         {
 35 |             // encoder input size == mNumUnits
 36 |             int64_t inputSize = ((layerIndex == 0) && (gateIndex < 4)) ? dataSize : mNumUnits;
 37 |             nvinfer1::Weights gateKernelWeights{dataType, &mWeights->mWeights[0] + kernelOffset, inputSize * mNumUnits};
 38 |             nvinfer1::Weights gateBiasWeights{dataType, &mWeights->mWeights[0] + biasOffset, mNumUnits};
 39 |             mGateKernelWeights.push_back(std::move(gateKernelWeights));
 40 |             mGateBiasWeights.push_back(std::move(gateBiasWeights));
 41 |             kernelOffset = kernelOffset + inputSize * mNumUnits * elementSize;
 42 |             biasOffset = biasOffset + mNumUnits * elementSize;
 43 |         }
 44 |     }
 45 |     assert(kernelOffset + biasOffset - biasStartOffset == mWeights->mWeights.size());
 46 | }
 47 | 
 48 | void LSTMDecoder::addToModel(
 49 |     nvinfer1::INetworkDefinition* network,
 50 |     nvinfer1::ITensor* inputEmbeddedData,
 51 |     nvinfer1::ITensor** inputStates,
 52 |     nvinfer1::ITensor** outputData,
 53 |     nvinfer1::ITensor** outputStates)
 54 | {
 55 |     int beamWidth;
 56 |     int inputWidth;
 57 |     {
 58 |         auto dims = inputEmbeddedData->getDimensions();
 59 |         assert(dims.nbDims == 2);
 60 |         assert(dims.type[0] == nvinfer1::DimensionType::kINDEX);
 61 |         beamWidth = dims.d[0];
 62 |         assert(dims.type[1] == nvinfer1::DimensionType::kCHANNEL);
 63 |         inputWidth = dims.d[1];
 64 |     }
 65 | 
 66 |     nvinfer1::ITensor* shuffledInput;
 67 |     {
 68 |         auto shuffleLayer = network->addShuffle(*inputEmbeddedData);
 69 |         assert(shuffleLayer != nullptr);
 70 |         shuffleLayer->setName("Reshape input for LSTM decoder");
 71 |         nvinfer1::Dims shuffleDims{3, {beamWidth, 1, inputWidth}, {nvinfer1::DimensionType::kINDEX, nvinfer1::DimensionType::kSEQUENCE, nvinfer1::DimensionType::kCHANNEL}};
 72 |         shuffleLayer->setReshapeDimensions(shuffleDims);
 73 |         shuffledInput = shuffleLayer->getOutput(0);
 74 |         assert(shuffledInput != nullptr);
 75 |     }
 76 | 
 77 |     auto decoderLayer = network->addRNNv2(
 78 |         *shuffledInput,
 79 |         mNumLayers,
 80 |         mNumUnits,
 81 |         1,
 82 |         nvinfer1::RNNOperation::kLSTM);
 83 |     assert(decoderLayer != nullptr);
 84 |     decoderLayer->setName("LSTM decoder");
 85 | 
 86 |     decoderLayer->setInputMode(nvinfer1::RNNInputMode::kLINEAR);
 87 |     decoderLayer->setDirection(nvinfer1::RNNDirection::kUNIDIRECTION);
 88 | 
 89 |     std::vector<nvinfer1::RNNGateType> gateOrder({nvinfer1::RNNGateType::kFORGET,
 90 |                                                   nvinfer1::RNNGateType::kINPUT,
 91 |                                                   nvinfer1::RNNGateType::kCELL,
 92 |                                                   nvinfer1::RNNGateType::kOUTPUT});
 93 |     for (size_t i = 0; i < mGateKernelWeights.size(); i++)
 94 |     {
 95 |         // we have 4 + 4 gates
 96 |         bool isW = ((i % 8) < 4);
 97 |         decoderLayer->setWeightsForGate(i / 8, gateOrder[i % 4], isW, mGateKernelWeights[i]);
 98 |         decoderLayer->setBiasForGate(i / 8, gateOrder[i % 4], isW, mGateBiasWeights[i]);
 99 |     }
100 | 
101 |     decoderLayer->setHiddenState(*inputStates[0]);
102 |     decoderLayer->setCellState(*inputStates[1]);
103 |     *outputData = decoderLayer->getOutput(0);
104 |     assert(*outputData != nullptr);
105 | 
106 |     {
107 |         auto shuffleLayer = network->addShuffle(**outputData);
108 |         assert(shuffleLayer != nullptr);
109 |         shuffleLayer->setName("Reshape output from LSTM decoder");
110 |         nvinfer1::Dims shuffleDims{2, {beamWidth, mNumUnits}, {nvinfer1::DimensionType::kINDEX, nvinfer1::DimensionType::kCHANNEL}};
111 |         shuffleLayer->setReshapeDimensions(shuffleDims);
112 |         auto shuffledOutput = shuffleLayer->getOutput(0);
113 |         assert(shuffledOutput != nullptr);
114 |         *outputData = shuffledOutput;
115 |     }
116 | 
117 |     // Per layer hidden output
118 |     outputStates[0] = decoderLayer->getOutput(1);
119 |     assert(outputStates[0] != nullptr);
120 | 
121 |     // Per layer cell output
122 |     outputStates[1] = decoderLayer->getOutput(2);
123 |     assert(outputStates[1] != nullptr);
124 | }
125 | 
126 | std::vector<nvinfer1::Dims> LSTMDecoder::getStateSizes()
127 | {
128 |     nvinfer1::Dims hiddenStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}};
129 |     nvinfer1::Dims cellStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}};
130 |     return std::vector<nvinfer1::Dims>({hiddenStateDims, cellStateDims});
131 | }
132 | 
133 | std::string LSTMDecoder::getInfo()
134 | {
135 |     std::stringstream ss;
136 |     ss << "LSTM Decoder, num layers = " << mNumLayers << ", num units = " << mNumUnits;
137 |     return ss.str();
138 | }
139 | }
140 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/lstmDecoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_LSTM_DECODER_
 2 | #define SAMPLE_NMT_LSTM_DECODER_
 3 | 
 4 | #include "decoder.h"
 5 | 
 6 | #include "componentWeights.h"
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class LSTMDecoder
11 |     *
12 |     * \brief encodes single input into output states with LSTM
13 |     *
14 |     */
15 | class LSTMDecoder : public Decoder
16 | {
17 | public:
18 |     LSTMDecoder(ComponentWeights::ptr weights);
19 | 
20 |     void addToModel(
21 |         nvinfer1::INetworkDefinition* network,
22 |         nvinfer1::ITensor* inputEmbeddedData,
23 |         nvinfer1::ITensor** inputStates,
24 |         nvinfer1::ITensor** outputData,
25 |         nvinfer1::ITensor** outputStates) override;
26 | 
27 |     std::vector<nvinfer1::Dims> getStateSizes() override;
28 | 
29 |     std::string getInfo() override;
30 | 
31 |     ~LSTMDecoder() override = default;
32 | 
33 | protected:
34 |     ComponentWeights::ptr mWeights;
35 |     std::vector<nvinfer1::Weights> mGateKernelWeights;
36 |     std::vector<nvinfer1::Weights> mGateBiasWeights;
37 |     bool mRNNKind;
38 |     int mNumLayers;
39 |     int mNumUnits;
40 | };
41 | }
42 | 
43 | #endif // SAMPLE_NMT_LSTM_DECODER_
44 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/lstmEncoder.cpp:
--------------------------------------------------------------------------------
  1 | #include "lstmEncoder.h"
  2 | #include "..\trtUtil.h"
  3 | 
  4 | #include <cassert>
  5 | #include <sstream>
  6 | 
  7 | namespace nmtSample
  8 | {
  9 | 
 10 | LSTMEncoder::LSTMEncoder(ComponentWeights::ptr weights)
 11 |     : mWeights(weights)
 12 | {
 13 |     // please refer to chpt_to_bin.py for the details on the format
 14 |     assert(mWeights->mMetaData.size() >= 4);
 15 |     const nvinfer1::DataType dataType = static_cast<nvinfer1::DataType>(mWeights->mMetaData[0]);
 16 |     assert(dataType == nvinfer1::DataType::kFLOAT);
 17 |     mRNNKind = mWeights->mMetaData[1];
 18 |     mNumLayers = mWeights->mMetaData[2];
 19 |     mNumUnits = mWeights->mMetaData[3];
 20 | 
 21 |     size_t elementSize = inferTypeToBytes(dataType);
 22 |     // compute weights offsets
 23 |     size_t kernelOffset = 0;
 24 |     size_t biasStartOffset = ((4 * mNumUnits + 4 * mNumUnits) * mNumUnits * mNumLayers) * elementSize;
 25 |     size_t biasOffset = biasStartOffset;
 26 |     int numGates = 8;
 27 |     for (int layerIndex = 0; layerIndex < mNumLayers; layerIndex++)
 28 |     {
 29 |         for (int gateIndex = 0; gateIndex < numGates; gateIndex++)
 30 |         {
 31 |             // encoder input size == mNumUnits
 32 |             int64_t inputSize = ((layerIndex == 0) && (gateIndex < 4)) ? mNumUnits : mNumUnits;
 33 |             nvinfer1::Weights gateKernelWeights{dataType, &mWeights->mWeights[0] + kernelOffset, inputSize * mNumUnits};
 34 |             nvinfer1::Weights gateBiasWeights{dataType, &mWeights->mWeights[0] + biasOffset, mNumUnits};
 35 |             mGateKernelWeights.push_back(std::move(gateKernelWeights));
 36 |             mGateBiasWeights.push_back(std::move(gateBiasWeights));
 37 |             kernelOffset = kernelOffset + inputSize * mNumUnits * elementSize;
 38 |             biasOffset = biasOffset + mNumUnits * elementSize;
 39 |         }
 40 |     }
 41 |     assert(kernelOffset + biasOffset - biasStartOffset == mWeights->mWeights.size());
 42 | }
 43 | 
 44 | void LSTMEncoder::addToModel(
 45 |     nvinfer1::INetworkDefinition* network,
 46 |     int maxInputSequenceLength,
 47 |     nvinfer1::ITensor* inputEmbeddedData,
 48 |     nvinfer1::ITensor* actualInputSequenceLengths,
 49 |     nvinfer1::ITensor** inputStates,
 50 |     nvinfer1::ITensor** memoryStates,
 51 |     nvinfer1::ITensor** lastTimestepStates)
 52 | {
 53 |     auto encoderLayer = network->addRNNv2(
 54 |         *inputEmbeddedData,
 55 |         mNumLayers,
 56 |         mNumUnits,
 57 |         maxInputSequenceLength,
 58 |         nvinfer1::RNNOperation::kLSTM);
 59 |     assert(encoderLayer != nullptr);
 60 |     encoderLayer->setName("LSTM encoder");
 61 | 
 62 |     encoderLayer->setSequenceLengths(*actualInputSequenceLengths);
 63 |     encoderLayer->setInputMode(nvinfer1::RNNInputMode::kLINEAR);
 64 |     encoderLayer->setDirection(nvinfer1::RNNDirection::kUNIDIRECTION);
 65 | 
 66 |     std::vector<nvinfer1::RNNGateType> gateOrder({nvinfer1::RNNGateType::kFORGET,
 67 |                                                   nvinfer1::RNNGateType::kINPUT,
 68 |                                                   nvinfer1::RNNGateType::kCELL,
 69 |                                                   nvinfer1::RNNGateType::kOUTPUT});
 70 |     for (size_t i = 0; i < mGateKernelWeights.size(); i++)
 71 |     {
 72 |         // we have 4 + 4 gates
 73 |         bool isW = ((i % 8) < 4);
 74 |         encoderLayer->setWeightsForGate(i / 8, gateOrder[i % 4], isW, mGateKernelWeights[i]);
 75 |         encoderLayer->setBiasForGate(i / 8, gateOrder[i % 4], isW, mGateBiasWeights[i]);
 76 |     }
 77 | 
 78 |     encoderLayer->setHiddenState(*inputStates[0]);
 79 |     encoderLayer->setCellState(*inputStates[1]);
 80 |     *memoryStates = encoderLayer->getOutput(0);
 81 |     assert(*memoryStates != nullptr);
 82 | 
 83 |     if (lastTimestepStates)
 84 |     {
 85 |         // Per layer hidden output
 86 |         lastTimestepStates[0] = encoderLayer->getOutput(1);
 87 |         assert(lastTimestepStates[0] != nullptr);
 88 | 
 89 |         // Per layer cell output
 90 |         lastTimestepStates[1] = encoderLayer->getOutput(2);
 91 |         assert(lastTimestepStates[1] != nullptr);
 92 |     }
 93 | }
 94 | 
 95 | int LSTMEncoder::getMemoryStatesSize()
 96 | {
 97 |     return mNumUnits;
 98 | }
 99 | 
100 | std::vector<nvinfer1::Dims> LSTMEncoder::getStateSizes()
101 | {
102 |     nvinfer1::Dims hiddenStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}};
103 |     nvinfer1::Dims cellStateDims{2, {mNumLayers, mNumUnits}, {nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kCHANNEL}};
104 |     return std::vector<nvinfer1::Dims>({hiddenStateDims, cellStateDims});
105 | }
106 | 
107 | std::string LSTMEncoder::getInfo()
108 | {
109 |     std::stringstream ss;
110 |     ss << "LSTM Encoder, num layers = " << mNumLayers << ", num units = " << mNumUnits;
111 |     return ss.str();
112 | }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/lstmEncoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_LSTM_ENCODER_
 2 | #define SAMPLE_NMT_LSTM_ENCODER_
 3 | 
 4 | #include "encoder.h"
 5 | 
 6 | #include "componentWeights.h"
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class LSTMEncoder
11 |     *
12 |     * \brief encodes input sentences into output states using LSTM
13 |     *
14 |     */
15 | class LSTMEncoder : public Encoder
16 | {
17 | public:
18 |     LSTMEncoder(ComponentWeights::ptr weights);
19 | 
20 |     void addToModel(
21 |         nvinfer1::INetworkDefinition* network,
22 |         int maxInputSequenceLength,
23 |         nvinfer1::ITensor* inputEmbeddedData,
24 |         nvinfer1::ITensor* actualInputSequenceLengths,
25 |         nvinfer1::ITensor** inputStates,
26 |         nvinfer1::ITensor** memoryStates,
27 |         nvinfer1::ITensor** lastTimestepStates) override;
28 | 
29 |     int getMemoryStatesSize() override;
30 | 
31 |     std::vector<nvinfer1::Dims> getStateSizes() override;
32 | 
33 |     std::string getInfo() override;
34 | 
35 |     ~LSTMEncoder() override = default;
36 | 
37 | protected:
38 |     ComponentWeights::ptr mWeights;
39 |     std::vector<nvinfer1::Weights> mGateKernelWeights;
40 |     std::vector<nvinfer1::Weights> mGateBiasWeights;
41 |     bool mRNNKind;
42 |     int mNumLayers;
43 |     int mNumUnits;
44 | };
45 | }
46 | 
47 | #endif // SAMPLE_NMT_LSTM_ENCODER_
48 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/multiplicativeAlignment.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiplicativeAlignment.h"
 2 | 
 3 | #include <cassert>
 4 | #include <sstream>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | MultiplicativeAlignment::MultiplicativeAlignment(ComponentWeights::ptr weights)
 9 |     : mWeights(weights)
10 | {
11 |     // please refer to chpt_to_bin.py for the details on the format
12 |     assert(mWeights->mMetaData.size() >= 3);
13 |     mKernelWeights.type = static_cast<nvinfer1::DataType>(mWeights->mMetaData[0]);
14 |     assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT);
15 |     mInputChannelCount = mWeights->mMetaData[1];
16 |     mOutputChannelCount = mWeights->mMetaData[2];
17 | 
18 |     mKernelWeights.values = (void*) (&mWeights->mWeights[0]);
19 |     mKernelWeights.count = mInputChannelCount * mOutputChannelCount;
20 | }
21 | 
22 | void MultiplicativeAlignment::addToModel(
23 |     nvinfer1::INetworkDefinition* network,
24 |     nvinfer1::ITensor* attentionKeys,
25 |     nvinfer1::ITensor* queryStates,
26 |     nvinfer1::ITensor** alignmentScores)
27 | {
28 |     auto mmLayer = network->addMatrixMultiply(
29 |         *queryStates,
30 |         false,
31 |         *attentionKeys,
32 |         true);
33 |     assert(mmLayer != nullptr);
34 |     mmLayer->setName("Raw Alignment Scores MM (Queries x Keys) in multiplicative attention");
35 |     *alignmentScores = mmLayer->getOutput(0);
36 |     assert(*alignmentScores != nullptr);
37 | }
38 | 
39 | void MultiplicativeAlignment::addAttentionKeys(
40 |     nvinfer1::INetworkDefinition* network,
41 |     nvinfer1::ITensor* memoryStates,
42 |     nvinfer1::ITensor** attentionKeys)
43 | {
44 |     nvinfer1::Dims weightDims{2, {mInputChannelCount, mOutputChannelCount}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}};
45 |     auto constLayer = network->addConstant(weightDims, mKernelWeights);
46 |     assert(constLayer != nullptr);
47 |     constLayer->setName("Matrix in multiplicative attention");
48 |     auto weights = constLayer->getOutput(0);
49 |     assert(weights != nullptr);
50 | 
51 |     auto mmLayer = network->addMatrixMultiply(
52 |         *memoryStates,
53 |         false,
54 |         *weights,
55 |         false);
56 |     assert(mmLayer != nullptr);
57 |     mmLayer->setName("Attention Keys MM in multiplicative attention");
58 |     *attentionKeys = mmLayer->getOutput(0);
59 |     assert(*attentionKeys != nullptr);
60 | }
61 | 
62 | int MultiplicativeAlignment::getSourceStatesSize()
63 | {
64 |     return mInputChannelCount;
65 | }
66 | 
67 | int MultiplicativeAlignment::getAttentionKeySize()
68 | {
69 |     return mOutputChannelCount;
70 | }
71 | 
72 | std::string MultiplicativeAlignment::getInfo()
73 | {
74 |     std::stringstream ss;
75 |     ss << "Multiplicative Alignment, source states size = " << mInputChannelCount << ", attention keys size = " << mOutputChannelCount;
76 |     return ss.str();
77 | }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/multiplicativeAlignment.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_MULTIPLICATIVE_ALIGNMENT_
 2 | #define SAMPLE_NMT_MULTIPLICATIVE_ALIGNMENT_
 3 | 
 4 | #include "alignment.h"
 5 | 
 6 | #include "componentWeights.h"
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class MultiplicativeAlignment
11 |     *
12 |     * \brief alignment scores from Luong attention mechanism 
13 |     *
14 |     */
15 | class MultiplicativeAlignment : public Alignment
16 | {
17 | public:
18 |     MultiplicativeAlignment(ComponentWeights::ptr weights);
19 | 
20 |     void addToModel(
21 |         nvinfer1::INetworkDefinition* network,
22 |         nvinfer1::ITensor* attentionKeys,
23 |         nvinfer1::ITensor* queryStates,
24 |         nvinfer1::ITensor** alignmentScores) override;
25 | 
26 |     void addAttentionKeys(
27 |         nvinfer1::INetworkDefinition* network,
28 |         nvinfer1::ITensor* memoryStates,
29 |         nvinfer1::ITensor** attentionKeys) override;
30 | 
31 |     int getSourceStatesSize() override;
32 | 
33 |     int getAttentionKeySize() override;
34 | 
35 |     std::string getInfo() override;
36 | 
37 |     ~MultiplicativeAlignment() override = default;
38 | 
39 | protected:
40 |     ComponentWeights::ptr mWeights;
41 |     nvinfer1::Weights mKernelWeights;
42 |     int mInputChannelCount;
43 |     int mOutputChannelCount;
44 | };
45 | }
46 | 
47 | #endif // SAMPLE_NMT_MULTIPLICATIVE_ALIGNMENT_
48 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/projection.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_PROJECTION_
 2 | #define SAMPLE_NMT_PROJECTION_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "../component.h"
 7 | #include "NvInfer.h"
 8 | 
 9 | namespace nmtSample
10 | {
11 | /** \class Projection
12 |     *
13 |     * \brief calculates raw logits
14 |     *
15 |     */
16 | class Projection : public Component
17 | {
18 | public:
19 |     typedef std::shared_ptr<Projection> ptr;
20 | 
21 |     Projection() = default;
22 | 
23 |     /**
24 |         * \brief add raw logits to the network
25 |         */
26 |     virtual void addToModel(
27 |         nvinfer1::INetworkDefinition* network,
28 |         nvinfer1::ITensor* input,
29 |         nvinfer1::ITensor** outputLogits)
30 |         = 0;
31 | 
32 |     /**
33 |         * \brief get the size of raw logits vector
34 |         */
35 |     virtual int getOutputSize() = 0;
36 | 
37 |     ~Projection() override = default;
38 | };
39 | }
40 | 
41 | #endif // SAMPLE_NMT_PROJECTION_
42 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/slpAttention.cpp:
--------------------------------------------------------------------------------
 1 | #include "slpAttention.h"
 2 | 
 3 | #include <cassert>
 4 | #include <sstream>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | SLPAttention::SLPAttention(ComponentWeights::ptr weights)
 9 |     : mWeights(weights)
10 | {
11 |     // please refer to chpt_to_bin.py for the details on the format
12 |     assert(mWeights->mMetaData.size() >= 3);
13 |     mKernelWeights.type = static_cast<nvinfer1::DataType>(mWeights->mMetaData[0]);
14 |     assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT);
15 |     mInputChannelCount = mWeights->mMetaData[1];
16 |     mOutputChannelCount = mWeights->mMetaData[2];
17 | 
18 |     mKernelWeights.values = (void*) (&mWeights->mWeights[0]);
19 |     mKernelWeights.count = mInputChannelCount * mOutputChannelCount;
20 | }
21 | 
22 | void SLPAttention::addToModel(
23 |     nvinfer1::INetworkDefinition* network,
24 |     nvinfer1::ITensor* inputFromDecoder,
25 |     nvinfer1::ITensor* context,
26 |     nvinfer1::ITensor** attentionOutput)
27 | {
28 |     nvinfer1::ITensor* inputTensors[] = {inputFromDecoder, context};
29 |     auto concatLayer = network->addConcatenation(inputTensors, 2);
30 |     assert(concatLayer != nullptr);
31 |     concatLayer->setName("Concatinate decoder output and context");
32 |     concatLayer->setAxis(1);
33 |     auto concatinatedTensor = concatLayer->getOutput(0);
34 |     assert(concatinatedTensor != nullptr);
35 | 
36 |     nvinfer1::Dims weightDims{2, {mInputChannelCount, mOutputChannelCount}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}};
37 |     auto constLayer = network->addConstant(weightDims, mKernelWeights);
38 |     assert(constLayer != nullptr);
39 |     constLayer->setName("Attention Matrix");
40 |     auto weights = constLayer->getOutput(0);
41 |     assert(weights != nullptr);
42 | 
43 |     auto mmLayer = network->addMatrixMultiply(
44 |         *concatinatedTensor,
45 |         false,
46 |         *weights,
47 |         false);
48 |     assert(mmLayer != nullptr);
49 |     mmLayer->setName("Attention Matrix Multiply");
50 |     *attentionOutput = mmLayer->getOutput(0);
51 |     assert(*attentionOutput != nullptr);
52 | }
53 | 
54 | int SLPAttention::getAttentionSize()
55 | {
56 |     return mOutputChannelCount;
57 | }
58 | 
59 | std::string SLPAttention::getInfo()
60 | {
61 |     std::stringstream ss;
62 |     ss << "SLP Attention, num inputs = " << mInputChannelCount << ", num outputs = " << mOutputChannelCount;
63 |     return ss.str();
64 | }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/slpAttention.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_SLP_ATTENTION_
 2 | #define SAMPLE_NMT_SLP_ATTENTION_
 3 | 
 4 | #include "attention.h"
 5 | 
 6 | #include "componentWeights.h"
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class SLPAttention
11 |     *
12 |     * \brief Linear attention calculation
13 |     *
14 |     * Calculates attention vector by concatinating input from the decoder with context vector
15 |     * and projecting the result into attention space by multiplying with weight matrix  
16 |     *
17 |     */
18 | class SLPAttention : public Attention
19 | {
20 | public:
21 |     SLPAttention(ComponentWeights::ptr weights);
22 | 
23 |     void addToModel(
24 |         nvinfer1::INetworkDefinition* network,
25 |         nvinfer1::ITensor* inputFromDecoder,
26 |         nvinfer1::ITensor* context,
27 |         nvinfer1::ITensor** attentionOutput) override;
28 | 
29 |     int getAttentionSize() override;
30 | 
31 |     std::string getInfo() override;
32 | 
33 | protected:
34 |     ComponentWeights::ptr mWeights;
35 |     nvinfer1::Weights mKernelWeights;
36 |     int mInputChannelCount;
37 |     int mOutputChannelCount;
38 | };
39 | }
40 | 
41 | #endif // SAMPLE_NMT_SLP_ATTENTION_
42 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/slpEmbedder.cpp:
--------------------------------------------------------------------------------
 1 | #include "slpEmbedder.h"
 2 | 
 3 | #include <cassert>
 4 | #include <sstream>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | SLPEmbedder::SLPEmbedder(ComponentWeights::ptr weights)
 9 |     : mWeights(weights)
10 | {
11 |     // please refer to chpt_to_bin.py for the details on the format
12 |     assert(mWeights->mMetaData.size() >= 3);
13 |     mKernelWeights.type = static_cast<nvinfer1::DataType>(mWeights->mMetaData[0]);
14 |     assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT);
15 |     mNumInputs = mWeights->mMetaData[1];
16 |     mNumOutputs = mWeights->mMetaData[2];
17 | 
18 |     mKernelWeights.values = (void*) (&mWeights->mWeights[0]);
19 |     mKernelWeights.count = mNumInputs * mNumOutputs;
20 | }
21 | 
22 | void SLPEmbedder::addToModel(
23 |     nvinfer1::INetworkDefinition* network,
24 |     nvinfer1::ITensor* input,
25 |     nvinfer1::ITensor** output)
26 | {
27 |     nvinfer1::Dims weightDims{2, {mNumInputs, mNumOutputs}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}};
28 |     auto constLayer = network->addConstant(weightDims, mKernelWeights);
29 |     assert(constLayer != nullptr);
30 |     constLayer->setName("Embedding matrix");
31 |     auto weights = constLayer->getOutput(0);
32 |     assert(weights != nullptr);
33 | 
34 |     auto gatherLayer = network->addGather(*weights, *input, 0);
35 |     assert(gatherLayer != nullptr);
36 |     gatherLayer->setName("Gather in embedding");
37 |     *output = gatherLayer->getOutput(0);
38 |     assert(*output != nullptr);
39 | }
40 | 
41 | int SLPEmbedder::getInputDimensionSize()
42 | {
43 |     return mNumInputs;
44 | }
45 | 
46 | std::string SLPEmbedder::getInfo()
47 | {
48 |     std::stringstream ss;
49 |     ss << "SLP Embedder, num inputs = " << mNumInputs << ", num outputs = " << mNumOutputs;
50 |     return ss.str();
51 | }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/slpEmbedder.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_SLP_EMBEDDER_
 2 | #define SAMPLE_NMT_SLP_EMBEDDER_
 3 | 
 4 | #include "embedder.h"
 5 | 
 6 | #include "componentWeights.h"
 7 | 
 8 | #include "NvInfer.h"
 9 | 
10 | #include <vector>
11 | 
12 | namespace nmtSample
13 | {
14 | /** \class SLPEmbedder
15 |     *
16 |     * \brief selects the embedding vector from the weight matrix using index provided in the input
17 |     *
18 |     */
19 | class SLPEmbedder : public Embedder
20 | {
21 | public:
22 |     SLPEmbedder(ComponentWeights::ptr weights);
23 | 
24 |     void addToModel(
25 |         nvinfer1::INetworkDefinition* network,
26 |         nvinfer1::ITensor* input,
27 |         nvinfer1::ITensor** output) override;
28 | 
29 |     int getInputDimensionSize() override;
30 | 
31 |     std::string getInfo() override;
32 | 
33 |     ~SLPEmbedder() override = default;
34 | 
35 | protected:
36 |     ComponentWeights::ptr mWeights;
37 |     nvinfer1::Weights mKernelWeights;
38 |     int mNumInputs;
39 |     int mNumOutputs;
40 | };
41 | }
42 | 
43 | #endif // SAMPLE_NMT_SLP_EMBEDDER_
44 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/slpProjection.cpp:
--------------------------------------------------------------------------------
 1 | #include "slpProjection.h"
 2 | 
 3 | #include <cassert>
 4 | #include <sstream>
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | SLPProjection::SLPProjection(ComponentWeights::ptr weights)
 9 |     : mWeights(weights)
10 | {
11 |     // please refer to chpt_to_bin.py for the details on the format
12 |     assert(mWeights->mMetaData.size() >= 3);
13 |     mKernelWeights.type = static_cast<nvinfer1::DataType>(mWeights->mMetaData[0]);
14 |     assert(mKernelWeights.type == nvinfer1::DataType::kFLOAT);
15 |     mInputChannelCount = mWeights->mMetaData[1];
16 |     mOutputChannelCount = mWeights->mMetaData[2];
17 | 
18 |     mKernelWeights.values = (void*) (&mWeights->mWeights[0]);
19 |     mKernelWeights.count = mInputChannelCount * mOutputChannelCount;
20 | }
21 | 
22 | void SLPProjection::addToModel(
23 |     nvinfer1::INetworkDefinition* network,
24 |     nvinfer1::ITensor* input,
25 |     nvinfer1::ITensor** outputLogits)
26 | {
27 |     nvinfer1::Dims weightDims{2, {mInputChannelCount, mOutputChannelCount}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kCHANNEL}};
28 |     auto constLayer = network->addConstant(weightDims, mKernelWeights);
29 |     assert(constLayer != nullptr);
30 |     constLayer->setName("Projection matrix");
31 |     auto weights = constLayer->getOutput(0);
32 |     assert(weights != nullptr);
33 | 
34 |     auto mmLayer = network->addMatrixMultiply(
35 |         *input,
36 |         false,
37 |         *weights,
38 |         false);
39 |     assert(mmLayer != nullptr);
40 |     mmLayer->setName("Projection Matrix Multiply");
41 |     *outputLogits = mmLayer->getOutput(0);
42 |     assert(*outputLogits != nullptr);
43 | }
44 | 
45 | int SLPProjection::getOutputSize()
46 | {
47 |     return mOutputChannelCount;
48 | }
49 | 
50 | std::string SLPProjection::getInfo()
51 | {
52 |     std::stringstream ss;
53 |     ss << "SLP Projection, num inputs = " << mInputChannelCount << ", num outputs = " << mOutputChannelCount;
54 |     return ss.str();
55 | }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/slpProjection.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_SLP_PROJECTION_
 2 | #define SAMPLE_NMT_SLP_PROJECTION_
 3 | 
 4 | #include "projection.h"
 5 | 
 6 | #include "componentWeights.h"
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class SLPProjection
11 |     *
12 |     * \brief Linear logits calculation
13 |     *
14 |     * Calculates logits vector by multiplying input vector with weight matrix  
15 |     *
16 |     */
17 | class SLPProjection : public Projection
18 | {
19 | public:
20 |     SLPProjection(ComponentWeights::ptr weights);
21 | 
22 |     void addToModel(
23 |         nvinfer1::INetworkDefinition* network,
24 |         nvinfer1::ITensor* input,
25 |         nvinfer1::ITensor** outputLogits) override;
26 | 
27 |     int getOutputSize() override;
28 | 
29 |     std::string getInfo() override;
30 | 
31 |     ~SLPProjection() override = default;
32 | 
33 | protected:
34 |     ComponentWeights::ptr mWeights;
35 |     nvinfer1::Weights mKernelWeights;
36 |     int mInputChannelCount;
37 |     int mOutputChannelCount;
38 | };
39 | }
40 | 
41 | #endif // SAMPLE_NMT_SLP_PROJECTION_
42 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/softmaxLikelihood.cpp:
--------------------------------------------------------------------------------
 1 | #include "softmaxLikelihood.h"
 2 | 
 3 | #include <cassert>
 4 | 
 5 | #include <math.h>
 6 | 
 7 | namespace nmtSample
 8 | {
 9 | void SoftmaxLikelihood::addToModel(
10 |     nvinfer1::INetworkDefinition* network,
11 |     int beamWidth,
12 |     nvinfer1::ITensor* inputLogits,
13 |     nvinfer1::ITensor* inputLikelihoods,
14 |     nvinfer1::ITensor** newCombinedLikelihoods,
15 |     nvinfer1::ITensor** newRayOptionIndices,
16 |     nvinfer1::ITensor** newVocabularyIndices)
17 | {
18 |     auto softmaxLayer = network->addSoftMax(*inputLogits);
19 |     assert(softmaxLayer != nullptr);
20 |     softmaxLayer->setName("Softmax in likelihood calculation");
21 |     softmaxLayer->setAxes(2);
22 |     auto softmaxTensor = softmaxLayer->getOutput(0);
23 |     assert(softmaxTensor != nullptr);
24 | 
25 |     auto topKLayer = network->addTopK(*softmaxTensor, nvinfer1::TopKOperation::kMAX, beamWidth, 2);
26 |     assert(topKLayer != nullptr);
27 |     topKLayer->setName("TopK 1st in likelihood calculation");
28 |     auto newLikelihoods = topKLayer->getOutput(0);
29 |     assert(newLikelihoods != nullptr);
30 |     auto vocabularyIndices = topKLayer->getOutput(1);
31 |     assert(vocabularyIndices != nullptr);
32 | 
33 |     auto eltWiseLayer = network->addElementWise(*newLikelihoods, *inputLikelihoods, nvinfer1::ElementWiseOperation::kPROD);
34 |     assert(eltWiseLayer != nullptr);
35 |     eltWiseLayer->setName("EltWise multiplication in likelihood calculation");
36 |     auto combinedLikelihoods = eltWiseLayer->getOutput(0);
37 |     assert(combinedLikelihoods != nullptr);
38 | 
39 |     auto shuffleLayer = network->addShuffle(*combinedLikelihoods);
40 |     assert(shuffleLayer != nullptr);
41 |     shuffleLayer->setName("Reshape combined likelihoods");
42 |     nvinfer1::Dims shuffleDims{1, {beamWidth * beamWidth}, {nvinfer1::DimensionType::kCHANNEL}};
43 |     shuffleLayer->setReshapeDimensions(shuffleDims);
44 |     auto reshapedCombinedLikelihoods = shuffleLayer->getOutput(0);
45 |     assert(reshapedCombinedLikelihoods != nullptr);
46 | 
47 |     auto topKLayer2 = network->addTopK(*reshapedCombinedLikelihoods, nvinfer1::TopKOperation::kMAX, beamWidth, 1);
48 |     assert(topKLayer2 != nullptr);
49 |     topKLayer2->setName("TopK 2nd in likelihood calculation");
50 |     *newCombinedLikelihoods = topKLayer2->getOutput(0);
51 |     assert(*newCombinedLikelihoods != nullptr);
52 |     *newRayOptionIndices = topKLayer2->getOutput(1);
53 |     assert(*newRayOptionIndices != nullptr);
54 | 
55 |     auto shuffleLayer2 = network->addShuffle(*vocabularyIndices);
56 |     assert(shuffleLayer2 != nullptr);
57 |     shuffleLayer2->setName("Reshape vocabulary indices");
58 |     nvinfer1::Dims shuffleDims2{1, {beamWidth * beamWidth}, {nvinfer1::DimensionType::kCHANNEL}};
59 |     shuffleLayer2->setReshapeDimensions(shuffleDims2);
60 |     auto reshapedVocabularyIndices = shuffleLayer2->getOutput(0);
61 |     assert(reshapedVocabularyIndices != nullptr);
62 | 
63 |     auto gatherLayer = network->addGather(*reshapedVocabularyIndices, **newRayOptionIndices, 0);
64 |     assert(gatherLayer != nullptr);
65 |     gatherLayer->setName("Shuffle vocabulary indices");
66 |     *newVocabularyIndices = gatherLayer->getOutput(0);
67 |     assert(*newVocabularyIndices != nullptr);
68 | }
69 | 
70 | float SoftmaxLikelihood::SoftmaxLikelihoodCombinationOperator::combine(float rayLikelihood, float optionLikelihood) const
71 | {
72 |     return rayLikelihood * optionLikelihood;
73 | }
74 | 
75 | float SoftmaxLikelihood::SoftmaxLikelihoodCombinationOperator::init() const
76 | {
77 |     return 1.0F;
78 | }
79 | 
80 | float SoftmaxLikelihood::SoftmaxLikelihoodCombinationOperator::smallerThanMinimalLikelihood() const
81 | {
82 |     return -1.0F;
83 | }
84 | 
85 | LikelihoodCombinationOperator::ptr SoftmaxLikelihood::getLikelihoodCombinationOperator() const
86 | {
87 |     return std::make_shared<SoftmaxLikelihoodCombinationOperator>();
88 | }
89 | 
90 | std::string SoftmaxLikelihood::getInfo()
91 | {
92 |     return "Softmax Likelihood";
93 | }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/sampleNMT/model/softmaxLikelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_SOFTMAX_LIKELIHOOD_
 2 | #define SAMPLE_NMT_SOFTMAX_LIKELIHOOD_
 3 | 
 4 | #include "NvInfer.h"
 5 | #include "likelihood.h"
 6 | 
 7 | namespace nmtSample
 8 | {
 9 | /** \class SoftmaxLikelihood
10 |     *
11 |     * \brief calculates softmax likelihood and TopK indices for the raw input logits
12 |     *
13 |     */
14 | class SoftmaxLikelihood : public Likelihood
15 | {
16 | private:
17 |     class SoftmaxLikelihoodCombinationOperator : public LikelihoodCombinationOperator
18 |     {
19 |     public:
20 |         SoftmaxLikelihoodCombinationOperator() = default;
21 | 
22 |         float combine(float rayLikelihood, float optionLikelihood) const override;
23 | 
24 |         float init() const override;
25 | 
26 |         float smallerThanMinimalLikelihood() const override;
27 | 
28 |         ~SoftmaxLikelihoodCombinationOperator() override = default;
29 |     };
30 | 
31 | public:
32 |     SoftmaxLikelihood() = default;
33 | 
34 |     LikelihoodCombinationOperator::ptr getLikelihoodCombinationOperator() const override;
35 | 
36 |     void addToModel(
37 |         nvinfer1::INetworkDefinition* network,
38 |         int beamWidth,
39 |         nvinfer1::ITensor* inputLogits,
40 |         nvinfer1::ITensor* inputLikelihoods,
41 |         nvinfer1::ITensor** newCombinedLikelihoods,
42 |         nvinfer1::ITensor** newRayOptionIndices,
43 |         nvinfer1::ITensor** newVocabularyIndices) override;
44 | 
45 |     std::string getInfo() override;
46 | 
47 |     ~SoftmaxLikelihood() override = default;
48 | };
49 | }
50 | 
51 | #endif // SAMPLE_NMT_SOFTMAX_LIKELIHOOD_
52 | 


--------------------------------------------------------------------------------
/src/sampleNMT/pinnedHostBuffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_PINNED_HOST_BUFFER_
 2 | #define SAMPLE_NMT_PINNED_HOST_BUFFER_
 3 | 
 4 | #include "cudaError.h"
 5 | #include <cuda_runtime_api.h>
 6 | #include <memory>
 7 | 
 8 | namespace nmtSample
 9 | {
10 | /** \class PinnedHostBuffer
11 |     *
12 |     * \brief wrapper for the pinned host memory region  
13 |     *
14 |     */
15 | template <typename T>
16 | class PinnedHostBuffer
17 | {
18 | public:
19 |     typedef std::shared_ptr<PinnedHostBuffer<T>> ptr;
20 | 
21 |     PinnedHostBuffer(size_t elementCount)
22 |         : mBuffer(nullptr)
23 |     {
24 |         CUDA_CHECK(cudaHostAlloc(&mBuffer, elementCount * sizeof(T), cudaHostAllocDefault));
25 |     }
26 | 
27 |     virtual ~PinnedHostBuffer()
28 |     {
29 |         if (mBuffer)
30 |         {
31 |             cudaFreeHost(mBuffer);
32 |         }
33 |     }
34 | 
35 |     operator T*()
36 |     {
37 |         return mBuffer;
38 |     }
39 | 
40 |     operator const T*() const
41 |     {
42 |         return mBuffer;
43 |     }
44 | 
45 | protected:
46 |     T* mBuffer;
47 | };
48 | }
49 | 
50 | #endif // SAMPLE_NMT_PINNED_HOST_BUFFER_
51 | 


--------------------------------------------------------------------------------
/src/sampleNMT/sampleNMT.vcxproj.filters:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup>
  4 |     <Filter Include="源文件">
  5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
  6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
  7 |     </Filter>
  8 |     <Filter Include="头文件">
  9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
 10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
 11 |     </Filter>
 12 |     <Filter Include="资源文件">
 13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
 14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
 15 |     </Filter>
 16 |   </ItemGroup>
 17 |   <ItemGroup>
 18 |     <ClInclude Include="component.h">
 19 |       <Filter>头文件</Filter>
 20 |     </ClInclude>
 21 |     <ClInclude Include="cudaError.h">
 22 |       <Filter>头文件</Filter>
 23 |     </ClInclude>
 24 |     <ClInclude Include="deviceBuffer.h">
 25 |       <Filter>头文件</Filter>
 26 |     </ClInclude>
 27 |     <ClInclude Include="pinnedHostBuffer.h">
 28 |       <Filter>头文件</Filter>
 29 |     </ClInclude>
 30 |     <ClInclude Include="trtUtil.h">
 31 |       <Filter>头文件</Filter>
 32 |     </ClInclude>
 33 |     <ClInclude Include="model\alignment.h">
 34 |       <Filter>头文件</Filter>
 35 |     </ClInclude>
 36 |     <ClInclude Include="model\attention.h">
 37 |       <Filter>头文件</Filter>
 38 |     </ClInclude>
 39 |     <ClInclude Include="model\beamSearchPolicy.h">
 40 |       <Filter>头文件</Filter>
 41 |     </ClInclude>
 42 |     <ClInclude Include="model\componentWeights.h">
 43 |       <Filter>头文件</Filter>
 44 |     </ClInclude>
 45 |     <ClInclude Include="model\context.h">
 46 |       <Filter>头文件</Filter>
 47 |     </ClInclude>
 48 |     <ClInclude Include="model\debugUtil.h">
 49 |       <Filter>头文件</Filter>
 50 |     </ClInclude>
 51 |     <ClInclude Include="model\decoder.h">
 52 |       <Filter>头文件</Filter>
 53 |     </ClInclude>
 54 |     <ClInclude Include="model\embedder.h">
 55 |       <Filter>头文件</Filter>
 56 |     </ClInclude>
 57 |     <ClInclude Include="model\encoder.h">
 58 |       <Filter>头文件</Filter>
 59 |     </ClInclude>
 60 |     <ClInclude Include="model\likelihood.h">
 61 |       <Filter>头文件</Filter>
 62 |     </ClInclude>
 63 |     <ClInclude Include="model\likelihoodCombinationOperator.h">
 64 |       <Filter>头文件</Filter>
 65 |     </ClInclude>
 66 |     <ClInclude Include="model\lstmDecoder.h">
 67 |       <Filter>头文件</Filter>
 68 |     </ClInclude>
 69 |     <ClInclude Include="model\lstmEncoder.h">
 70 |       <Filter>头文件</Filter>
 71 |     </ClInclude>
 72 |     <ClInclude Include="model\multiplicativeAlignment.h">
 73 |       <Filter>头文件</Filter>
 74 |     </ClInclude>
 75 |     <ClInclude Include="model\projection.h">
 76 |       <Filter>头文件</Filter>
 77 |     </ClInclude>
 78 |     <ClInclude Include="model\slpAttention.h">
 79 |       <Filter>头文件</Filter>
 80 |     </ClInclude>
 81 |     <ClInclude Include="model\slpEmbedder.h">
 82 |       <Filter>头文件</Filter>
 83 |     </ClInclude>
 84 |     <ClInclude Include="model\slpProjection.h">
 85 |       <Filter>头文件</Filter>
 86 |     </ClInclude>
 87 |     <ClInclude Include="model\softmaxLikelihood.h">
 88 |       <Filter>头文件</Filter>
 89 |     </ClInclude>
 90 |     <ClInclude Include="data\benchmarkWriter.h">
 91 |       <Filter>头文件</Filter>
 92 |     </ClInclude>
 93 |     <ClInclude Include="data\bleuScoreWriter.h">
 94 |       <Filter>头文件</Filter>
 95 |     </ClInclude>
 96 |     <ClInclude Include="data\dataReader.h">
 97 |       <Filter>头文件</Filter>
 98 |     </ClInclude>
 99 |     <ClInclude Include="data\dataWriter.h">
100 |       <Filter>头文件</Filter>
101 |     </ClInclude>
102 |     <ClInclude Include="data\limitedSamplesDataReader.h">
103 |       <Filter>头文件</Filter>
104 |     </ClInclude>
105 |     <ClInclude Include="data\sequenceProperties.h">
106 |       <Filter>头文件</Filter>
107 |     </ClInclude>
108 |     <ClInclude Include="data\textReader.h">
109 |       <Filter>头文件</Filter>
110 |     </ClInclude>
111 |     <ClInclude Include="data\textWriter.h">
112 |       <Filter>头文件</Filter>
113 |     </ClInclude>
114 |     <ClInclude Include="data\vocabulary.h">
115 |       <Filter>头文件</Filter>
116 |     </ClInclude>
117 |   </ItemGroup>
118 |   <ItemGroup>
119 |     <Text Include="README.txt" />
120 |   </ItemGroup>
121 |   <ItemGroup>
122 |     <ClCompile Include="sampleNMT.cpp">
123 |       <Filter>源文件</Filter>
124 |     </ClCompile>
125 |     <ClCompile Include="trtUtil.cpp">
126 |       <Filter>源文件</Filter>
127 |     </ClCompile>
128 |     <ClCompile Include="model\beamSearchPolicy.cpp">
129 |       <Filter>源文件</Filter>
130 |     </ClCompile>
131 |     <ClCompile Include="model\componentWeights.cpp">
132 |       <Filter>源文件</Filter>
133 |     </ClCompile>
134 |     <ClCompile Include="model\context.cpp">
135 |       <Filter>源文件</Filter>
136 |     </ClCompile>
137 |     <ClCompile Include="model\debugUtil.cpp">
138 |       <Filter>源文件</Filter>
139 |     </ClCompile>
140 |     <ClCompile Include="model\lstmDecoder.cpp">
141 |       <Filter>源文件</Filter>
142 |     </ClCompile>
143 |     <ClCompile Include="model\lstmEncoder.cpp">
144 |       <Filter>源文件</Filter>
145 |     </ClCompile>
146 |     <ClCompile Include="model\multiplicativeAlignment.cpp">
147 |       <Filter>源文件</Filter>
148 |     </ClCompile>
149 |     <ClCompile Include="model\slpAttention.cpp">
150 |       <Filter>源文件</Filter>
151 |     </ClCompile>
152 |     <ClCompile Include="model\slpEmbedder.cpp">
153 |       <Filter>源文件</Filter>
154 |     </ClCompile>
155 |     <ClCompile Include="model\slpProjection.cpp">
156 |       <Filter>源文件</Filter>
157 |     </ClCompile>
158 |     <ClCompile Include="model\softmaxLikelihood.cpp">
159 |       <Filter>源文件</Filter>
160 |     </ClCompile>
161 |     <ClCompile Include="data\benchmarkWriter.cpp">
162 |       <Filter>源文件</Filter>
163 |     </ClCompile>
164 |     <ClCompile Include="data\bleuScoreWriter.cpp">
165 |       <Filter>源文件</Filter>
166 |     </ClCompile>
167 |     <ClCompile Include="data\dataWriter.cpp">
168 |       <Filter>源文件</Filter>
169 |     </ClCompile>
170 |     <ClCompile Include="data\limitedSamplesDataReader.cpp">
171 |       <Filter>源文件</Filter>
172 |     </ClCompile>
173 |     <ClCompile Include="data\textReader.cpp">
174 |       <Filter>源文件</Filter>
175 |     </ClCompile>
176 |     <ClCompile Include="data\textWriter.cpp">
177 |       <Filter>源文件</Filter>
178 |     </ClCompile>
179 |     <ClCompile Include="data\vocabulary.cpp">
180 |       <Filter>源文件</Filter>
181 |     </ClCompile>
182 |   </ItemGroup>
183 | </Project>


--------------------------------------------------------------------------------
/src/sampleNMT/sampleNMT.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleNMT/trtUtil.cpp:
--------------------------------------------------------------------------------
 1 | #include "trtUtil.h"
 2 | 
 3 | #include <cassert>
 4 | #include <functional>
 5 | #include <numeric>
 6 | 
 7 | namespace nmtSample
 8 | {
 9 | int inferTypeToBytes(nvinfer1::DataType t)
10 | {
11 |     switch (t)
12 |     {
13 |     case nvinfer1::DataType::kFLOAT: return sizeof(float); break;
14 |     case nvinfer1::DataType::kHALF: return sizeof(int16_t); break;
15 |     default: assert(0); break;
16 |     }
17 | };
18 | 
19 | int getVolume(nvinfer1::Dims dims)
20 | {
21 |     return std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int>());
22 | }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/sampleNMT/trtUtil.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLE_NMT_TRT_UTIL_
 2 | #define SAMPLE_NMT_TRT_UTIL_
 3 | 
 4 | #include "NvInfer.h"
 5 | 
 6 | namespace nmtSample
 7 | {
 8 | int inferTypeToBytes(nvinfer1::DataType t);
 9 | 
10 | int getVolume(nvinfer1::Dims dims);
11 | }
12 | 
13 | #endif // SAMPLE_NMT_TRT_UTIL_
14 | 


--------------------------------------------------------------------------------
/src/samplePlugin/fp16.h:
--------------------------------------------------------------------------------
  1 | #ifndef _TRT_FP16_H_
  2 | #define _TRT_FP16_H_
  3 | 
  4 | #include <cublas_v2.h>
  5 | 
  6 | namespace fp16
  7 | {
  8 | // Code added before equivalent code was available via cuda.
  9 | // This code needs to be removed when we ship for cuda-9.2.
 10 | template<typename T, typename U> T bitwise_cast(U u)
 11 | {
 12 |     return *reinterpret_cast<T*>(&u);
 13 | }
 14 | 
 15 | __half __float2half(float f)
 16 | {
 17 |     uint32_t x = bitwise_cast<uint32_t, float>(f);
 18 |     uint32_t u = (x & 0x7fffffff);
 19 | 
 20 |     // Get rid of +NaN/-NaN case first.
 21 |     if (u > 0x7f800000)
 22 |         return bitwise_cast<__half, uint16_t>(uint16_t(0x7fff));
 23 |   
 24 |     uint16_t sign = ((x >> 16) & 0x8000);
 25 |   
 26 |     // Get rid of +Inf/-Inf, +0/-0.
 27 |     if (u > 0x477fefff)
 28 |         return bitwise_cast<__half, uint16_t>(sign | uint16_t(0x7c00));
 29 | 
 30 |     if (u < 0x33000001)
 31 |         return bitwise_cast<__half, uint16_t>(sign | uint16_t(0x0000));
 32 | 
 33 |     uint32_t exponent = ((u >> 23) & 0xff);
 34 |     uint32_t mantissa = (u & 0x7fffff);
 35 | 
 36 |     uint32_t shift;
 37 |     if (exponent > 0x70)
 38 |     {
 39 |         shift = 13;
 40 |         exponent -= 0x70;
 41 |     }
 42 |     else
 43 |     {
 44 |         shift = 0x7e - exponent;
 45 |         exponent = 0;
 46 |         mantissa |= 0x800000;
 47 |     }
 48 | 
 49 |     uint32_t lsb    = (1 << shift);
 50 |     uint32_t lsb_s1 = (lsb >> 1);
 51 |     uint32_t lsb_m1 = (lsb - 1);
 52 |   
 53 |     // Round to nearest even.
 54 |     uint32_t remainder = (mantissa & lsb_m1);
 55 |     mantissa >>= shift;
 56 |     if ( (remainder > lsb_s1) || ((remainder == lsb_s1) && (mantissa & 0x1)) )
 57 |     {
 58 |         ++mantissa;
 59 |         if (!(mantissa & 0x3ff))
 60 |         {
 61 |             ++exponent;
 62 |             mantissa = 0;
 63 |         }
 64 |     }
 65 |     
 66 |     return bitwise_cast<__half, uint16_t>(sign | uint16_t(exponent<<10) | uint16_t(mantissa));
 67 | }
 68 | 
 69 | float __half2float(__half h)
 70 | {
 71 |     uint16_t x        = bitwise_cast<uint16_t,__half>(h);
 72 |     uint32_t sign     = ((x >> 15) & 1);
 73 |     uint32_t exponent = ((x >> 10) & 0x1f);
 74 |     uint32_t mantissa = (static_cast<uint32_t>(x & 0x3ff) << 13);
 75 | 
 76 |     if (exponent == 0x1f)
 77 |     {  /* NaN or Inf */
 78 |         if (mantissa != 0)
 79 |         {   // NaN
 80 |             sign     = 0;
 81 |             mantissa = 0x7fffff;
 82 |         }
 83 |         else // Inf
 84 |             mantissa = 0;
 85 |         exponent = 0xff;
 86 |     }
 87 |     else if (!exponent)
 88 |     {  /* Denorm or Zero */
 89 |         if (mantissa) {
 90 |             unsigned int msb;
 91 |             exponent = 0x71;
 92 |             do
 93 |             {
 94 |                 msb = (mantissa & 0x400000);
 95 |                 mantissa <<= 1; /* normalize */
 96 |                 --exponent;
 97 |             }
 98 |             while (!msb);
 99 |             mantissa &= 0x7fffff; /* 1.mantissa is implicit */
100 |         }
101 |     }
102 |     else
103 |         exponent += 0x70;
104 |     return bitwise_cast<float, uint32_t>( (sign<<31) | (exponent<<23) | mantissa );
105 | }
106 | 
107 | };
108 | 
109 | #endif // _TRT_FP16_H_
110 | 


--------------------------------------------------------------------------------
/src/samplePlugin/samplePlugin.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClInclude Include="fp16.h">
19 |       <Filter>头文件</Filter>
20 |     </ClInclude>
21 |   </ItemGroup>
22 |   <ItemGroup>
23 |     <ClCompile Include="samplePlugin.cpp">
24 |       <Filter>源文件</Filter>
25 |     </ClCompile>
26 |   </ItemGroup>
27 | </Project>


--------------------------------------------------------------------------------
/src/samplePlugin/samplePlugin.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleResNetv2/sampleResNetv2.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleResNetv2cpp.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/src/sampleResNetv2/sampleResNetv2.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleUffMNIST/sampleUffMNIST.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="sampleUffMNIST.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/src/sampleUffMNIST/sampleUffMNIST.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleUffSSD/BatchStreamPPM.h:
--------------------------------------------------------------------------------
  1 | #ifndef BATCH_STREAM_PPM_H
  2 | #define BATCH_STREAM_PPM_H
  3 | #include <vector>
  4 | #include <assert.h>
  5 | #include <algorithm>
  6 | #include <iomanip>
  7 | #include <fstream>
  8 | #include "NvInfer.h"
  9 | #include "common.h"
 10 | 
 11 | std::string locateFile(const std::string& input);
 12 | 
 13 | static constexpr int INPUT_C = 3;
 14 | static constexpr int INPUT_H = 300;
 15 | static constexpr int INPUT_W = 300;
 16 | 
 17 | const char* INPUT_BLOB_NAME = "Input";
 18 | 
 19 | class BatchStream
 20 | {
 21 | public:
 22 | 	BatchStream(int batchSize, int maxBatches) : mBatchSize(batchSize), mMaxBatches(maxBatches)
 23 | 	{
 24 | 		mDims = nvinfer1::DimsNCHW{batchSize, 3, 300, 300 };
 25 | 		mImageSize = mDims.c() * mDims.h() * mDims.w();
 26 | 		mBatch.resize(mBatchSize * mImageSize, 0);
 27 | 		mLabels.resize(mBatchSize, 0);
 28 | 		mFileBatch.resize(mDims.n() * mImageSize, 0);
 29 | 		mFileLabels.resize(mDims.n(), 0);
 30 | 		reset(0);
 31 | 	}
 32 | 
 33 | 	void reset(int firstBatch)
 34 | 	{
 35 | 		mBatchCount = 0;
 36 | 		mFileCount = 0;
 37 | 		mFileBatchPos = mDims.n();
 38 | 		skip(firstBatch);
 39 | 	}
 40 | 
 41 | 	bool next()
 42 | 	{
 43 | 		if (mBatchCount == mMaxBatches)
 44 | 			return false;
 45 | 
 46 | 		for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize)
 47 | 		{
 48 | 			assert(mFileBatchPos > 0 && mFileBatchPos <= mDims.n());
 49 | 			if (mFileBatchPos == mDims.n() && !update())
 50 | 				return false;
 51 | 
 52 | 			// copy the smaller of: elements left to fulfill the request, or elements left in the file buffer.
 53 | 			csize = std::min(mBatchSize - batchPos, mDims.n() - mFileBatchPos);
 54 | 			std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize);
 55 | 		}
 56 | 		mBatchCount++;
 57 | 		return true;
 58 | 	}
 59 | 
 60 | 	void skip(int skipCount)
 61 | 	{
 62 | 		if (mBatchSize >= mDims.n() && mBatchSize % mDims.n() == 0 && mFileBatchPos == mDims.n())
 63 | 		{
 64 | 			mFileCount += skipCount * mBatchSize / mDims.n();
 65 | 			return;
 66 | 		}
 67 | 
 68 | 		int x = mBatchCount;
 69 | 		for (int i = 0; i < skipCount; i++)
 70 | 			next();
 71 | 		mBatchCount = x;
 72 | 	}
 73 | 
 74 | 	float *getBatch() { return mBatch.data(); }
 75 | 	float *getLabels() { return mLabels.data(); }
 76 | 	int getBatchesRead() const { return mBatchCount; }
 77 | 	int getBatchSize() const { return mBatchSize; }
 78 | 	nvinfer1::DimsNCHW getDims() const { return mDims; }
 79 | private:
 80 | 	float* getFileBatch() { return mFileBatch.data(); }
 81 | 	float* getFileLabels() { return mFileLabels.data(); }
 82 | 
 83 | 	bool update()
 84 | 	{
 85 |         std::vector<std::string> fNames;
 86 | 
 87 | 	    std::ifstream file(locateFile("list.txt"));
 88 |         if(file)
 89 |         {
 90 |             std::cout  << "Batch #" << mFileCount << "\n";
 91 |             file.seekg(((mBatchCount * mBatchSize))*7);
 92 |         }
 93 |         for(int i = 1; i <= mBatchSize; i++)
 94 |         {
 95 |             std::string sName;
 96 |             std::getline(file, sName);
 97 |             sName = sName + ".ppm";
 98 | 
 99 |             std::cout << "Calibrating with file " << sName << std::endl;
100 |             fNames.emplace_back(sName);
101 |         }
102 |         mFileCount++;
103 | 
104 |         std::vector<samplesCommon::PPM<INPUT_C, INPUT_H, INPUT_W>> ppms(fNames.size());
105 |         for (uint32_t i = 0; i < fNames.size(); ++i)
106 |         {
107 | 	    readPPMFile(locateFile(fNames[i]), ppms[i]);
108 |         }
109 |         std::vector<float> data(samplesCommon::volume(mDims));
110 | 
111 |         long int volChl = mDims.h() * mDims.w();
112 | 
113 |         for (int i = 0, volImg = mDims.c() * mDims.h() * mDims.w(); i < mBatchSize; ++i)
114 |         {
115 |             for (int c = 0; c < mDims.c(); ++c)
116 |             {
117 |                 for (int j = 0; j < volChl; ++j)
118 |                 {
119 |                     data[i * volImg + c * volChl + j] = (2.0 / 255.0) * float(ppms[i].buffer[j * mDims.c() + c]) - 1.0;
120 |                 }
121 |             }
122 |         }
123 | 
124 |         std::copy_n(data.data(), mDims.n() * mImageSize, getFileBatch());
125 | 
126 | 		mFileBatchPos = 0;
127 | 		return true;
128 | 	}
129 | 
130 | 	int mBatchSize{0};
131 | 	int mMaxBatches{0};
132 | 	int mBatchCount{0};
133 | 
134 | 	int mFileCount{0}, mFileBatchPos{0};
135 | 	int mImageSize{0};
136 | 
137 | 	nvinfer1::DimsNCHW mDims;
138 | 	std::vector<float> mBatch;
139 | 	std::vector<float> mLabels;
140 | 	std::vector<float> mFileBatch;
141 | 	std::vector<float> mFileLabels;
142 | };
143 | 
144 | class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator
145 | {
146 | public:
147 |     Int8EntropyCalibrator(BatchStream& stream, int firstBatch, std::string calibrationTableName, bool readCache = true)
148 |         : mStream(stream),
149 |         mCalibrationTableName(std::move(calibrationTableName)),
150 |         mReadCache(readCache)
151 |     {
152 |     	nvinfer1::DimsNCHW dims = mStream.getDims();
153 |         mInputCount = samplesCommon::volume(dims);
154 |         CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
155 |         mStream.reset(firstBatch);
156 |     }
157 | 
158 |     virtual ~Int8EntropyCalibrator()
159 |     {
160 |         CHECK(cudaFree(mDeviceInput));
161 |     }
162 | 
163 |     int getBatchSize() const override { return mStream.getBatchSize(); }
164 | 
165 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) override
166 |     {
167 |         if (!mStream.next())
168 |             return false;
169 | 
170 |         CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
171 |         assert(!strcmp(names[0], INPUT_BLOB_NAME));
172 |         bindings[0] = mDeviceInput;
173 |         return true;
174 |     }
175 | 
176 |     const void* readCalibrationCache(size_t& length) override
177 |     {
178 |         mCalibrationCache.clear();
179 |         std::ifstream input(mCalibrationTableName, std::ios::binary);
180 |         input >> std::noskipws;
181 |         if (mReadCache && input.good())
182 |             std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
183 |         length = mCalibrationCache.size();
184 |         return length ? mCalibrationCache.data() : nullptr;
185 |     }
186 | 
187 |     void writeCalibrationCache(const void* cache, size_t length) override
188 |     {
189 |         std::ofstream output(mCalibrationTableName, std::ios::binary);
190 |         output.write(reinterpret_cast<const char*>(cache), length);
191 |     }
192 | 
193 | private:
194 |     BatchStream mStream;
195 |     std::string mCalibrationTableName;
196 |     bool mReadCache{true};
197 | 
198 |     size_t mInputCount;
199 |     void* mDeviceInput{nullptr};
200 |     std::vector<char> mCalibrationCache;
201 | };
202 | #endif
203 | 


--------------------------------------------------------------------------------
/src/sampleUffSSD/README.txt:
--------------------------------------------------------------------------------
 1 | The sampleUffSSD example is based on the following paper, SSD: Single Shot MultiBox
 2 | Detector (https://arxiv.org/abs/1512.02325). The SSD network performs the
 3 | task of object detection and localization in a single forward pass of the network.
 4 | The tensorflow SSD network was trained on the InceptionV2 architecture using
 5 | the MSCOCO dataset.
 6 | 
 7 | The sample makes use of TensorRT plugins to run the SSD network. To use these
 8 | plugins the TensorFlow graph needs to be preprocessed.
 9 | 
10 | Steps to generate UFF file:
11 |     0. Make sure you have the UFF converter installed. For installation instructions, see:
12 |         https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/#python and click on the 'TensorRT Python API' link.
13 | 
14 |     1. Get the pre-trained Tensorflow model (ssd_inception_v2_coco) from:
15 |         http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz
16 | 
17 |     2. Call the UFF converter with the preprocessing flag set (-p [config_file]).
18 |         The config.py script specifies the preprocessing operations necessary for SSD TF graph.
19 |         It must be copied to the working directory for the file to be imported properly.
20 |         The plugin nodes and plugin parameters used in config.py should match the registered plugins
21 |         in TensorRT. Please read the plugins documentation for more details.
22 | 
23 |         'convert-to-uff --input-file frozen_inference_graph.pb -O NMS -p config.py'
24 | 
25 | This script saves the converted .uff file in the same directory as the input with
26 | the name frozen_inference_graph.pb.uff. Copy this converted .uff file to the
27 | data directory as sample_ssd_relu6.uff <TensorRT Install>/data/ssd/sample_ssd_relu6.uff
28 | 
29 | The sample also requires a labels .txt file with a list of all labels used to
30 | train the model. Current example for this network is <TensorRT Install>/data/ssd/ssd_coco_labels.txt
31 | 
32 | Steps to run the network:
33 |     1. To run the network in FP32 mode, ./sample_uff_ssd
34 |     2. To run the network in INT8 mode, ./sample_uff_ssd --int8
35 | 
36 | To run the network in INT8 mode, refer to BatchStreamPPM.h for details on how
37 | calibration can be performed. Currently we require a file (list.txt) with
38 | a list of all PPM images for calibration in the <TensorRT Install>/data/ssd/ folder.
39 | The PPM images to be used for calibration can also reside in the same folder.
40 | 
41 | NOTE - There might be some precision loss when running the network in INT8
42 | mode causing some objects to go undetected. Our general observation is that
43 | >500 images is a good number for calibration purposes.
44 | 


--------------------------------------------------------------------------------
/src/sampleUffSSD/car-0.671518.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/car-0.671518.ppm


--------------------------------------------------------------------------------
/src/sampleUffSSD/config.py:
--------------------------------------------------------------------------------
 1 | import graphsurgeon as gs
 2 | import tensorflow as tf
 3 | 
 4 | Input = gs.create_node("Input",
 5 |     op="Placeholder",
 6 |     dtype=tf.float32,
 7 |     shape=[1, 3, 300, 300])
 8 | PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
 9 |     numLayers=6,
10 |     minSize=0.2,
11 |     maxSize=0.95,
12 |     aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
13 |     variance=[0.1,0.1,0.2,0.2],
14 |     featureMapShapes=[19, 10, 5, 3, 2, 1])
15 | NMS = gs.create_plugin_node(name="NMS", op="NMS_TRT",
16 |     shareLocation=1,
17 |     varianceEncodedInTarget=0,
18 |     backgroundLabelId=0,
19 |     confidenceThreshold=1e-8,
20 |     nmsThreshold=0.6,
21 |     topK=100,
22 |     keepTopK=100,
23 |     numClasses=91,
24 |     inputOrder=[0, 2, 1],
25 |     confSigmoid=1,
26 |     isNormalized=1,
27 |     scoreConverter="SIGMOID")
28 | concat_priorbox = gs.create_node(name="concat_priorbox", op="ConcatV2", dtype=tf.float32, axis=2)
29 | concat_box_loc = gs.create_plugin_node("concat_box_loc", op="FlattenConcat_TRT", dtype=tf.float32, axis=1, ignoreBatch=0)
30 | concat_box_conf = gs.create_plugin_node("concat_box_conf", op="FlattenConcat_TRT", dtype=tf.float32, axis=1, ignoreBatch=0)
31 | 
32 | namespace_plugin_map = {
33 |     "MultipleGridAnchorGenerator": PriorBox,
34 |     "Postprocessor": NMS,
35 |     "Preprocessor": Input,
36 |     "ToFloat": Input,
37 |     "image_tensor": Input,
38 |     "MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
39 |     "concat": concat_box_loc,
40 |     "concat_1": concat_box_conf
41 | }
42 | 
43 | def preprocess(dynamic_graph):
44 |     # Now create a new graph by collapsing namespaces
45 |     dynamic_graph.collapse_namespaces(namespace_plugin_map)
46 |     # Remove the outputs, so we just have a single output node (NMS).
47 |     dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False)
48 | 


--------------------------------------------------------------------------------
/src/sampleUffSSD/dog-0.880681.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/dog-0.880681.ppm


--------------------------------------------------------------------------------
/src/sampleUffSSD/dog-0.890010.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/dog-0.890010.ppm


--------------------------------------------------------------------------------
/src/sampleUffSSD/person-0.549108.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/person-0.549108.ppm


--------------------------------------------------------------------------------
/src/sampleUffSSD/sampleUffSSD.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClInclude Include="BatchStreamPPM.h">
19 |       <Filter>头文件</Filter>
20 |     </ClInclude>
21 |   </ItemGroup>
22 |   <ItemGroup>
23 |     <ClCompile Include="sampleUffSSD.cpp">
24 |       <Filter>源文件</Filter>
25 |     </ClCompile>
26 |     <ClCompile Include="..\..\common\windows\getopt.c">
27 |       <Filter>源文件</Filter>
28 |     </ClCompile>
29 |   </ItemGroup>
30 | </Project>


--------------------------------------------------------------------------------
/src/sampleUffSSD/sampleUffSSD.vcxproj.user:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>true</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 7 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
11 |     <LocalDebuggerEnvironment>PATH=$(PATH);$(SolutionDir)3rdparty\TensorRT-5.0.1.3\lib</LocalDebuggerEnvironment>
12 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
13 |   </PropertyGroup>
14 | </Project>


--------------------------------------------------------------------------------
/src/sampleUffSSD/truck-0.782028.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Milittle/TensorRT_test/2894b7efc3661714bdc2e919d3de32fe4f0b96b5/src/sampleUffSSD/truck-0.782028.ppm


--------------------------------------------------------------------------------