├── README.md
├── api_server.py
├── infer_lenet.py
└── train_lenet.py


/README.md:
--------------------------------------------------------------------------------
 1 | # TensorRT-5_Inference_Engine_Python
 2 | TensorRT-5  based inference engine in Python
 3 | 
 4 | ## Purpose
 5 | This is a POC project.
 6 | Aim of this project is to get acquinted with use of TensorRT api in Python3.
 7 | 
 8 | The project includes 
 9 |   
10 |     1. Tensorflow script to train a Lenet Classifier.
11 |     
12 |     2. Python Webserver script to host a REST api to perform inference.
13 |     
14 |     3. Client script to do concurrent requests to REST api to check inference performance.
15 |     
16 | 
17 | ## Steps:
18 | 
19 |      $ python3 train_lenet.py
20 |     
21 | 
22 | This will train a LeNet classifier, convert it to UFF format and save to disk
23 | 
24 |      $ python3 infer_lenet.py
25 |    
26 |    
27 | This will help you to understand how to run the model using TensorRT.
28 | 
29 |       $ python3 api_server.py
30 |    
31 | This will host a flask server which will accept http POST reuests to perform inference. 
32 | 
33 |       $ python3 client.py
34 |       
35 | This script will do concurrent requests to REST api hosted on **localhost:5000/predict**.
36 | 
37 | 
38 | Num of concurrent requests can be changed by changing **max_workers** in the script.
39 | 
40 | 
41 | While working with Pycuda, I found that it takes around 2 sec send a POST request --> perform inference on 1 image -> return results.
42 | 
43 | 
44 | Hence, to get higher throughput I changed the batch size from 1 to 1024, 2048, 4096 etc. 
45 | 
46 | 
47 | **BATCH_SIZE** variable in api_server.py will help you set up thebatch size for inference.
48 | 
49 | Hence, for every POST request I send to the api, TensorRT is given BATCH_SIZE no of images to infer, making sure we get higher throughput.
50 | 
51 | On my laptop with **Nvidia 940MX** I was able to infer **4096 images (each of size 28*28)** in **7.762 sec**
52 | 
53 |      
54 | ## Installation:
55 | 
56 | TensorRT 5 can be downloaded from [here](https://developer.nvidia.com/tensorrt)
57 | 
58 | Installation instructions are present  : [here](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html)
59 | 
60 | I highly recommend reading Developer Guide for TensorRT 5 before going through this project's code.
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/api_server.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import tensorrt as trt 
 3 | import pycuda.driver as cuda
 4 | 
 5 | from flask import Flask, url_for
 6 | from flask import request
 7 | from flask import json
 8 | import cv2
 9 | import time
10 | import tensorflow as tf
11 | 
12 | 
13 | app = Flask(__name__)
14 | 
15 | MNIST_DATASETS = tf.contrib.learn.datasets.load_dataset("mnist")
16 | 
17 | BATCH_SIZE = 4096
18 | print("batch size : ", BATCH_SIZE)
19 | 
20 | 
21 | img, label = MNIST_DATASETS.test.next_batch(BATCH_SIZE)
22 | img = img[:]
23 | img = img.reshape((1, BATCH_SIZE * 784))
24 | img = img.astype(np.float32)
25 | label = label[:]
26 | 
27 | print("labels : ", label)
28 | print("img shape : " , img.shape)
29 | print('size : ', img.nbytes / 1024 /1024)
30 | 
31 | model_file = "model_data/mnist.uff"
32 | 
33 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
34 | 
35 | builder = trt.Builder(TRT_LOGGER)  
36 | network = builder.create_network() 
37 | 
38 | parser =  trt.UffParser()
39 | parser.register_input("Placeholder", (1, 28, 28))
40 | parser.register_output("fc2/Relu")
41 | parser.parse(model_file, network)
42 | builder.max_batch_size = BATCH_SIZE
43 | builder.max_workspace_size = 1 << 20
44 | 
45 | 
46 | @app.route("/predict", methods = ["POST"])
47 | def predict():
48 | 
49 |     time1 = time.time()
50 |     cuda.init()
51 |     device = cuda.Device(0)
52 |     ctx = device.make_context()
53 |     time2 = time.time()
54 |     print("time to get context : ", time2 - time1)
55 | 
56 |     with builder.build_cuda_engine(network) as engine:
57 |         output = np.empty(10 * BATCH_SIZE, dtype = np.float32)
58 | 
59 |         d_input = cuda.mem_alloc(1 * img.nbytes)
60 |         d_output = cuda.mem_alloc(1 * output.nbytes)
61 |         bindings=[int(d_input), int(d_output)]
62 | 
63 |         stream = cuda.Stream()
64 | 
65 |         with engine.create_execution_context() as context:
66 |             cuda.memcpy_htod_async(d_input, img, stream)
67 | 
68 |             context.execute_async(bindings = bindings, stream_handle=stream.handle, batch_size = BATCH_SIZE)
69 | 
70 |             cuda.memcpy_dtoh_async(output, d_output, stream)
71 | 
72 |             stream.synchronize()
73 | 
74 |             # print("true label : ", label)
75 | 
76 |             result = []
77 |             accuracy = np.zeros((1, BATCH_SIZE), np.uint8)
78 |             for ii in range(BATCH_SIZE):
79 |                 result.append(np.argmax(output[ii*10:(ii+1)*10]))
80 |                 if result[ii] == label[ii]:
81 |                     accuracy[0, ii] = 1
82 |                 # print(output[ii*10:(ii+1)*10])
83 |             # print(result)
84 |             print("accuracy : ", np.sum(accuracy) / BATCH_SIZE)
85 | 
86 |     ctx.pop()
87 |     return "Done\n"#str(output)
88 | 
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     app.run()


--------------------------------------------------------------------------------
/infer_lenet.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np 
 3 | import tensorrt as trt 
 4 | import pycuda.driver as cuda
 5 | import pycuda.autoinit
 6 | 
 7 | 
 8 | 
 9 | MNIST_DATASETS = tf.contrib.learn.datasets.load_dataset("mnist")
10 | 
11 | img, label = MNIST_DATASETS.test.next_batch(1)
12 | img = img[0]
13 | img = img.astype(np.float32)
14 | label = label[0]
15 | 
16 | model_file = "model_data/mnist.uff"
17 | 
18 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
19 | 
20 | 
21 | builder = trt.Builder(TRT_LOGGER)  
22 | network = builder.create_network() 
23 | 
24 | with trt.UffParser() as parser:
25 | 
26 | 
27 |     parser.register_input("Placeholder", (1, 28, 28))
28 |     parser.register_output("fc2/Softmax")
29 |     parser.parse(model_file, network)
30 | 
31 | 
32 |     builder.max_batch_size = 1
33 |     builder.max_workspace_size = 1 << 10
34 | 
35 | 
36 |     # h_input = cuda.pagelocked_empty(engine.get_binding_shape(0).volume(), dtype=np.float32)
37 | 
38 |     # h_output = cuda.pagelocked_empty(engine.get_binding_shape(1).volume(), dtype=np.float32)
39 | 
40 |     # d_input = cuda.mem_alloc(h_input.nbytes)
41 | 
42 |     # d_output = cuda.mem_alloc(h_output.nbytes)
43 | 
44 | 
45 |     with builder.build_cuda_engine(network) as engine:
46 |         output = np.empty(10, dtype = np.float32)
47 | 
48 | 
49 |         # Alocate device memory
50 |         d_input = cuda.mem_alloc(1 * img.nbytes)
51 |         d_output = cuda.mem_alloc(1 * output.nbytes)
52 |         bindings=[int(d_input), int(d_output)]
53 | 
54 |         stream = cuda.Stream()
55 | 
56 |         with engine.create_execution_context() as context:
57 |             cuda.memcpy_htod_async(d_input, img, stream)
58 | 
59 |             context.execute_async(bindings = bindings, stream_handle=stream.handle)
60 | 
61 |             cuda.memcpy_dtoh_async(output, d_output, stream)
62 | 
63 |             stream.synchronize()
64 | 
65 |             print("true label : ", label)
66 |             print(np.argmax(output))
67 |             print(output)
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/train_lenet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np 
  3 | import time 
  4 | import os
  5 | import uff
  6 | 
  7 | 
  8 | STARTER_LEARNING_RATE = 1e-3
  9 | BATCH_SIZE = 4
 10 | NUM_CLASSES = 10
 11 | MAX_STEPS = 10000
 12 | IMAGE_SIZE = 28
 13 | IMAGE_PIXELS = IMAGE_SIZE ** 2
 14 | OUTPUT_NAMES = ["fc2/Softmax"]
 15 | 
 16 | def WeightsVariable(shape):
 17 |     return tf.Variable(tf.truncated_normal(shape, stddev=0.1, name='weights'))
 18 | 
 19 | def BiasVariable(shape):
 20 |     return tf.Variable(tf.constant(0.1, shape=shape, name='biases'))
 21 | 
 22 | def Conv2d(x, W, b, strides=1):
 23 |     # Conv2D wrapper, with bias and relu activation
 24 |     filter_size = W.get_shape().as_list()
 25 |     pad_size = filter_size[0]//2
 26 |     pad_mat = np.array([[0,0],[pad_size,pad_size],[pad_size,pad_size],[0,0]])
 27 |     x = tf.pad(x, pad_mat)
 28 |     x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='VALID')
 29 |     x = tf.nn.bias_add(x, b)
 30 |     return tf.nn.relu(x)
 31 | 
 32 | def MaxPool2x2(x, k=2):
 33 |     # MaxPool2D wrapper
 34 |     pad_size = k//2
 35 |     pad_mat = np.array([[0,0],[pad_size,pad_size],[pad_size,pad_size],[0,0]])
 36 |     return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='VALID')
 37 | 
 38 | 
 39 | 
 40 | def network(images):
 41 |     # Convolution 1
 42 |     with tf.name_scope('conv1'):
 43 |         weights = WeightsVariable([5,5,1,32])
 44 |         biases = BiasVariable([32])
 45 |         conv1 = tf.nn.relu(Conv2d(images, weights, biases))
 46 |         pool1 = MaxPool2x2(conv1)
 47 | 
 48 |     # Convolution 2
 49 |     with tf.name_scope('conv2'):
 50 |         weights = WeightsVariable([5,5,32,64])
 51 |         biases = BiasVariable([64])
 52 |         conv2 = tf.nn.relu(Conv2d(pool1, weights, biases))
 53 |         pool2 = MaxPool2x2(conv2)
 54 |         pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
 55 | 
 56 |     # Fully Connected 1
 57 |     with tf.name_scope('fc1'):
 58 |         weights = WeightsVariable([7 * 7 * 64, 1024])
 59 |         biases = BiasVariable([1024])
 60 |         fc1 = tf.nn.relu(tf.matmul(pool2_flat, weights) + biases)
 61 | 
 62 |     # Fully Connected 2
 63 |     with tf.name_scope('fc2'):
 64 |         weights = WeightsVariable([1024, 10])
 65 |         biases = BiasVariable([10])
 66 |         fc2 = tf.nn.relu(tf.matmul(fc1, weights) + biases)
 67 | 
 68 |     return fc2
 69 | 
 70 | 
 71 | def loss_metrics(logits, labels):
 72 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
 73 |                                                                    logits=logits,
 74 |                                                                    name='softmax')
 75 |     return tf.reduce_mean(cross_entropy, name='softmax_mean')
 76 | 
 77 | 
 78 | def training(loss):
 79 |     tf.summary.scalar('loss', loss)
 80 |     global_step = tf.Variable(0, name='global_step', trainable=False)
 81 |     learning_rate = tf.train.exponential_decay(STARTER_LEARNING_RATE,
 82 |                                                global_step,
 83 |                                                100000,
 84 |                                                0.75,
 85 |                                                staircase=True)
 86 |     tf.summary.scalar('learning_rate', learning_rate)
 87 |     optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
 88 |     train_op = optimizer.minimize(loss, global_step=global_step)
 89 |     return train_op
 90 | 
 91 | 
 92 | def evaluation(logits, labels):
 93 |     correct = tf.nn.in_top_k(logits, labels, 1)
 94 |     return tf.reduce_sum(tf.cast(correct, tf.int32))
 95 | 
 96 | 
 97 | def do_eval(sess,
 98 |             eval_correct,
 99 |             images_placeholder,
100 |             labels_placeholder,
101 |             data_set,
102 |             summary):
103 | 
104 |     true_count = 0
105 |     steps_per_epoch = data_set.num_examples // BATCH_SIZE
106 |     num_examples = steps_per_epoch * BATCH_SIZE
107 |     for step in range(steps_per_epoch):
108 |         feed_dict = fill_feed_dict(data_set,
109 |                                    images_placeholder,
110 |                                    labels_placeholder)
111 |         log, correctness = sess.run([summary, eval_correct], feed_dict=feed_dict)
112 |         true_count += correctness
113 |     precision = float(true_count) / num_examples
114 |     tf.summary.scalar('precision', tf.constant(precision))
115 |     print('Num examples %d, Num Correct: %d Precision @ 1: %0.04f' %
116 |           (num_examples, true_count, precision))
117 |     return log
118 | 
119 | 
120 | def placeholder_inputs(batch_size):
121 |     images_placeholder = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
122 |     labels_placeholder = tf.placeholder(tf.int32, shape=(None))
123 |     return images_placeholder, labels_placeholder
124 | 
125 | 
126 | 
127 | def fill_feed_dict(data_set, images_pl, labels_pl):
128 |     images_feed, labels_feed = data_set.next_batch(BATCH_SIZE)
129 |     feed_dict = {
130 |         images_pl: np.reshape(images_feed, (-1,28,28,1)),
131 |         labels_pl: labels_feed,
132 |     }
133 |     return feed_dict
134 | 
135 | 
136 | 
137 | def run_training(data_sets):
138 |     with tf.Graph().as_default():
139 |         images_placeholder, labels_placeholder = placeholder_inputs(BATCH_SIZE)
140 |         logits = network(images_placeholder)
141 |         loss = loss_metrics(logits, labels_placeholder)
142 |         train_op = training(loss)
143 |         eval_correct = evaluation(logits, labels_placeholder)
144 |         summary = tf.summary.merge_all()
145 |         init = tf.global_variables_initializer()
146 |         saver = tf.train.Saver()
147 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
148 |         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
149 |         summary_writer = tf.summary.FileWriter("/tmp/tensorflow/mnist/log",
150 |                                                graph=tf.get_default_graph())
151 |         test_writer = tf.summary.FileWriter("/tmp/tensorflow/mnist/log/validation",
152 |                                             graph=tf.get_default_graph())
153 |         sess.run(init)
154 |         for step in range(MAX_STEPS):
155 |             start_time = time.time()
156 |             feed_dict = fill_feed_dict(data_sets.train,
157 |                                        images_placeholder,
158 |                                        labels_placeholder)
159 |             _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
160 |             duration = time.time() - start_time
161 |             if step % 100 == 0:
162 |                 print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
163 |                 summary_str = sess.run(summary, feed_dict=feed_dict)
164 |                 summary_writer.add_summary(summary_str, step)
165 |                 summary_writer.flush()
166 |             if (step + 1) % 1000 == 0 or (step + 1) == MAX_STEPS:
167 |                 checkpoint_file = os.path.join("/tmp/tensorflow/mnist/log", "model.ckpt")
168 |                 saver.save(sess, checkpoint_file, global_step=step)
169 |                 print('Validation Data Eval:')
170 |                 log = do_eval(sess,
171 |                               eval_correct,
172 |                               images_placeholder,
173 |                               labels_placeholder,
174 |                               data_sets.validation,
175 |                               summary)
176 |                 test_writer.add_summary(log, step)
177 |         # Return sess
178 | 
179 |         graphdef = tf.get_default_graph().as_graph_def()
180 |         frozen_graph = tf.graph_util.convert_variables_to_constants(sess,
181 |                                                                     graphdef,
182 |                                                                     OUTPUT_NAMES)
183 |         return tf.graph_util.remove_training_nodes(frozen_graph)
184 | 
185 | 
186 | MNIST_DATASETS = tf.contrib.learn.datasets.load_dataset("mnist")
187 | tf_model = run_training(MNIST_DATASETS)
188 | 
189 | print("done training")
190 | 
191 | uff_model = uff.from_tensorflow(tf_model, ["fc2/Relu"])
192 | 
193 | f = open("model_data/mnist.uff", "wb")
194 | f.write(uff_model)
195 | f.close()
196 | print("saved uff model")
197 | 


--------------------------------------------------------------------------------