├── .gitignore
├── .dockerignore
├── tf_classify_client.sh
├── docker-build.sh
├── p2p_client.sh
├── tf_classify_server.js
├── test
    ├── test_tf_classify_server.sh
    ├── test_basic_proxy.sh
    ├── test_tensorflow.sh
    ├── test_label_image.sh
    ├── test_p2p_proxy.sh
    └── test_seaport_proxy.sh
├── tf_classify_server.sh
├── p2p_proxy.js
├── echo_server.py
├── seaport_proxy.js
├── package.json
├── basic_proxy.js
├── tf_classify_server.py
├── LICENSE
├── Dockerfile
├── label_image.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | **/*.swp
2 | **/*.pyc
3 | **/node_modules
4 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | **/*.swp
2 | **/*.pyc
3 | **/node_modules
4 | 


--------------------------------------------------------------------------------
/tf_classify_client.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | curl -v -XPOST localhost:12480 -F"data=@$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg"
4 | 


--------------------------------------------------------------------------------
/docker-build.sh:
--------------------------------------------------------------------------------
1 | sudo docker build -t liubowei/simple-ml-serving:latest . && sudo docker run -it --net=host liubowei/simple-ml-serving:latest /bin/bash
2 | 


--------------------------------------------------------------------------------
/p2p_client.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #!/bin/bash
4 | 
5 | curl -v -XPOST localhost:`curl localhost:12480` -F"data=@$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg"
6 | 


--------------------------------------------------------------------------------
/tf_classify_server.js:
--------------------------------------------------------------------------------
1 | // Usage : node tf_classify_server.js
2 | const port = require('seaport').connect(12481).register('tf_classify_server')
3 | console.log(`Launching tf classify worker on ${port}`)
4 | require('child_process').exec(`/bin/bash ./tf_classify_server.sh ${port}`)
5 | 


--------------------------------------------------------------------------------
/test/test_tf_classify_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # tests that launching a single classifier worker works and we are able to POST to it
 3 | 
 4 | cd "$HOME"
 5 | 
 6 | bash tf_classify_server.sh &
 7 | PID="$!"
 8 | echo 'started server ; sleeping 5'
 9 | sleep 5
10 | bash tf_classify_client.sh
11 | kill "$PID"
12 | 


--------------------------------------------------------------------------------
/test/test_basic_proxy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # tests that the basic load balancer proxy works
 3 | 
 4 | cd "$HOME"
 5 | 
 6 | node basic_proxy.js 12481,12482,12483 &
 7 | PID="$!"
 8 | PGID=$(echo `ps -o pgid= "$PID"`)
 9 | echo 'started servers ; sleeping 5'
10 | sleep 5
11 | bash tf_classify_client.sh
12 | kill -- -"$PGID"
13 | 


--------------------------------------------------------------------------------
/tf_classify_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # usage: bash tf_classify_server.sh [PORT_NUMBER]
 3 | python tf_classify_server.py \
 4 |     --graph=/tmp/output_graph.pb \
 5 |     --labels=/tmp/output_labels.txt \
 6 |     --output_layer=final_result:0 \
 7 |     --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg \
 8 |     "$@"
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/test_tensorflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # verifies that bazel and tensorflow installed correctly
 3 | 
 4 | cd /tensorflow && \
 5 |   bazel-bin/tensorflow/examples/image_retraining/label_image \
 6 |     --graph=/tmp/output_graph.pb \
 7 |     --labels=/tmp/output_labels.txt \
 8 |     --output_layer=final_result:0 \
 9 |     --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg
10 | 
11 | 


--------------------------------------------------------------------------------
/test/test_label_image.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # verifies that label_image.py works on multiple images quickly and successfully
 3 | 
 4 | cd "$HOME"
 5 | 
 6 | { for i in `seq 1 5` ; do echo $HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg ; done ; } | python label_image.py \
 7 |     --graph=/tmp/output_graph.pb \
 8 |     --labels=/tmp/output_labels.txt \
 9 |     --output_layer=final_result:0 \
10 |     --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg
11 | 


--------------------------------------------------------------------------------
/test/test_p2p_proxy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # tests that the p2p proxy works
 3 | 
 4 | cd "$HOME"
 5 | 
 6 | (
 7 |   node seaport_proxy.js &
 8 |   node tf_classify_server.js &
 9 |   sleep 5
10 |   bash p2p_client.sh
11 |   node tf_classify_server.js &
12 |   node tf_classify_server.js &
13 |   sleep 5
14 |   bash p2p_client.sh
15 |   bash p2p_client.sh
16 |   bash p2p_client.sh
17 |   sleep 1000
18 | ) &
19 | PID=$!
20 | PGID=$(echo `ps -o pgid= "$PID"`)
21 | sleep 15
22 | kill -- -"$PGID"
23 | 


--------------------------------------------------------------------------------
/p2p_proxy.js:
--------------------------------------------------------------------------------
 1 | // Usage : node p2p_proxy.js
 2 | const seaportServer = require('seaport').createServer()
 3 | seaportServer.listen(12481)
 4 | 
 5 | let i = 0
 6 | require('http').createServer((req, res) => {
 7 |   seaportServer.get('tf_classify_server', worker_ports => {
 8 |     const this_port = worker_ports[ (i++) % worker_ports.length ].port
 9 |     res.end(`${this_port}\n`)
10 |   })
11 | }).listen(12480)
12 | console.log(`P2P seaport proxy listening on ${12480} to '${'tf_classify_server'}' servers registered to ${12481}`)
13 | 


--------------------------------------------------------------------------------
/test/test_seaport_proxy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # tests that the seaport proxy works
 3 | 
 4 | cd "$HOME"
 5 | 
 6 | (
 7 |   node seaport_proxy.js &
 8 |   node tf_classify_server.js &
 9 |   sleep 5
10 |   bash tf_classify_client.sh
11 |   
12 |   node tf_classify_server.js &
13 |   node tf_classify_server.js &
14 |   sleep 5
15 |   bash tf_classify_client.sh
16 |   bash tf_classify_client.sh
17 |   bash tf_classify_client.sh
18 |   sleep 1000
19 | ) &
20 | PID=$!
21 | PGID=$(echo `ps -o pgid= "$PID"`)
22 | sleep 15
23 | kill -- -"$PGID"
24 | 
25 | 


--------------------------------------------------------------------------------
/echo_server.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # usage: python echo_server.py to launch the server ; and then in another session, do
 3 | # curl -v -XPOST 127.0.0.1:12480 -F "data=@./image.jpg"
 4 | from flask import Flask, request
 5 | app = Flask(__name__)
 6 | @app.route('/', methods=['POST'])
 7 | def classify():
 8 |     try:
 9 |         data = request.files.get('data').read()
10 |         print repr(data)[:1000]
11 |         return data, 200
12 |     except Exception as e:
13 |         return repr(e), 500
14 | app.run(host='127.0.0.1',port=12480)
15 | 


--------------------------------------------------------------------------------
/seaport_proxy.js:
--------------------------------------------------------------------------------
 1 | // Usage : node seaport_proxy.js
 2 | const seaportServer = require('seaport').createServer()
 3 | seaportServer.listen(12481)
 4 | const proxy = require('http-proxy').createProxyServer({})
 5 | proxy.on('error', () => console.log('proxy error'))
 6 | 
 7 | let i = 0
 8 | require('http').createServer((req, res) => {
 9 |   seaportServer.get('tf_classify_server', worker_ports => {
10 |     const this_port = worker_ports[ (i++) % worker_ports.length ].port
11 |     proxy.web(req,res, {target: 'http://localhost:' + this_port })
12 |   })
13 | }).listen(12480)
14 | console.log(`Seaport proxy listening on ${12480} to '${'tf_classify_server'}' servers registered to ${12481}`)
15 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "simple-ml-serving",
 3 |   "version": "1.0.0",
 4 |   "description": "https://github.com/hiveml/simple-ml-serving",
 5 |   "main": "basic_proxy.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/hiveml/simple-ml-serving.git"
12 |   },
13 |   "author": "",
14 |   "license": "ISC",
15 |   "bugs": {
16 |     "url": "https://github.com/hiveml/simple-ml-serving/issues"
17 |   },
18 |   "homepage": "https://github.com/hiveml/simple-ml-serving#readme",
19 |   "dependencies": {
20 |     "http-proxy": "^1.16.2",
21 |     "minimist": "^1.2.0",
22 |     "seaport": "^2.0.9"
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/basic_proxy.js:
--------------------------------------------------------------------------------
 1 | // Usage : node basic_proxy.js WORKER_PORT_0,WORKER_PORT_1,... [PROXY_PORT]
 2 | const worker_ports = process.argv[2].split(',')
 3 | const proxy_port = process.argv[3] || 12480
 4 | if (worker_ports.length === 0) { console.err('missing worker ports') ; process.exit(1) }
 5 | 
 6 | const proxy = require('http-proxy').createProxyServer({})
 7 | proxy.on('error', () => console.log('proxy error'))
 8 | 
 9 | let i = 0
10 | require('http').createServer((req, res) => {
11 |   proxy.web(req,res, {target: 'http://localhost:' + worker_ports[ (i++) % worker_ports.length ]})
12 | }).listen(proxy_port)
13 | console.log(`Proxying localhost:${proxy_port} to [${worker_ports.toString()}]`)
14 | 
15 | // spin up the ML workers
16 | const { exec } = require('child_process')
17 | worker_ports.map(port => exec(`/bin/bash ./tf_classify_server.sh ${port}`))
18 | 
19 | 


--------------------------------------------------------------------------------
/tf_classify_server.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # usage: bash tf_classify_server.sh [PORT_NUMBER]
 3 | from flask import Flask, request
 4 | import tensorflow as tf
 5 | import label_image as tf_classify
 6 | import json
 7 | app = Flask(__name__)
 8 | FLAGS, unparsed = tf_classify.parser.parse_known_args()
 9 | labels = tf_classify.load_labels(FLAGS.labels)
10 | tf_classify.load_graph(FLAGS.graph)
11 | sess = tf.Session()
12 | @app.route('/', methods=['POST'])
13 | def classify():
14 |     try:
15 |         data = request.files.get('data').read()
16 |         result = tf_classify.run_graph(data, labels, FLAGS.input_layer, FLAGS.output_layer, FLAGS.num_top_predictions, sess)
17 |         return json.dumps(result), 200
18 |     except Exception as e:
19 |         return repr(e), 500
20 | app.run(host='127.0.0.1',port=12480 if len(unparsed) == 0 else int(unparsed[0]))
21 | 
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Hive.ai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:latest-devel
 2 | 
 3 | MAINTAINER Bowei Liu <liubowei@gmail.com>
 4 | 
 5 | WORKDIR /root
 6 | 
 7 | RUN apt-get update && apt-get install -y --no-install-recommends \
 8 |         screen \
 9 |         tmux \
10 |         vim
11 | 
12 | RUN curl -O http://download.tensorflow.org/example_images/flower_photos.tgz && \
13 |     tar xzf flower_photos.tgz 
14 | 
15 | WORKDIR /tensorflow
16 | 
17 | RUN bazel build tensorflow/examples/image_retraining:retrain \
18 |                 tensorflow/examples/image_retraining:label_image
19 | 
20 | RUN bazel-bin/tensorflow/examples/image_retraining/retrain \
21 |         --image_dir "$HOME"/flower_photos \
22 |         --how_many_training_steps=200
23 | 
24 | WORKDIR /root
25 | 
26 | 
27 | RUN pip install -U flask twisted
28 | 
29 | RUN curl -sSL https://nodejs.org/dist/v8.9.0/node-v8.9.0-linux-x64.tar.gz | \
30 |     tar xzf - --strip-components=1                                            \
31 |               --exclude="README.md"                                           \
32 |               --exclude="LICENSE"                                             \
33 |               --exclude="ChangeLog"                                           \
34 |               -C "/usr/local"
35 | 
36 | RUN npm install http-proxy && \
37 |     npm install -g seaport http-server 
38 | 
39 | # recommend using --net=host, but if not, this exposes at least one port to the host
40 | EXPOSE 12480
41 | 
42 | COPY . /root/
43 | RUN npm i
44 | RUN chmod u+x /root/*.sh
45 | 
46 | CMD /bin/bash
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/label_image.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Simple image classification with Inception.
 16 | 
 17 | Run image classification with your model.
 18 | 
 19 | This script is usually used with retrain.py found in this same
 20 | directory.
 21 | 
 22 | This program creates a graph from a saved GraphDef protocol buffer,
 23 | and runs inference on an input JPEG image. You are required
 24 | to pass in the graph file and the txt file.
 25 | 
 26 | It outputs human readable strings of the top 5 predictions along with
 27 | their probabilities.
 28 | 
 29 | Change the --image_file argument to any jpg image to compute a
 30 | classification of that image.
 31 | 
 32 | Example usage:
 33 | python label_image.py --graph=retrained_graph.pb
 34 |   --labels=retrained_labels.txt
 35 |   --image=flower_photos/daisy/54377391_15648e8d18.jpg
 36 | 
 37 | NOTE: To learn to use this file and retrain.py, please see:
 38 | 
 39 | https://codelabs.developers.google.com/codelabs/tensorflow-for-poets
 40 | """
 41 | from __future__ import absolute_import
 42 | from __future__ import division
 43 | from __future__ import print_function
 44 | 
 45 | import argparse
 46 | import sys
 47 | 
 48 | import tensorflow as tf
 49 | 
 50 | parser = argparse.ArgumentParser()
 51 | parser.add_argument(
 52 |     '--image', required=True, type=str, help='Absolute path to image file.')
 53 | parser.add_argument(
 54 |     '--num_top_predictions',
 55 |     type=int,
 56 |     default=5,
 57 |     help='Display this many predictions.')
 58 | parser.add_argument(
 59 |     '--graph',
 60 |     required=True,
 61 |     type=str,
 62 |     help='Absolute path to graph file (.pb)')
 63 | parser.add_argument(
 64 |     '--labels',
 65 |     required=True,
 66 |     type=str,
 67 |     help='Absolute path to labels file (.txt)')
 68 | parser.add_argument(
 69 |     '--output_layer',
 70 |     type=str,
 71 |     default='final_result:0',
 72 |     help='Name of the result operation')
 73 | parser.add_argument(
 74 |     '--input_layer',
 75 |     type=str,
 76 |     default='DecodeJpeg/contents:0',
 77 |     help='Name of the input operation')
 78 | 
 79 | 
 80 | def load_image(filename):
 81 |   """Read in the image_data to be classified."""
 82 |   return tf.gfile.FastGFile(filename, 'rb').read()
 83 | 
 84 | 
 85 | def load_labels(filename):
 86 |   """Read in labels, one label per line."""
 87 |   return [line.rstrip() for line in tf.gfile.GFile(filename)]
 88 | 
 89 | 
 90 | def load_graph(filename):
 91 |   """Unpersists graph from file as default graph."""
 92 |   with tf.gfile.FastGFile(filename, 'rb') as f:
 93 |     graph_def = tf.GraphDef()
 94 |     graph_def.ParseFromString(f.read())
 95 |     tf.import_graph_def(graph_def, name='')
 96 | 
 97 | 
 98 | def run_graph(image_data, labels, input_layer_name, output_layer_name,
 99 |               num_top_predictions, sess):
100 |   # Feed the image_data as input to the graph.
101 |   #   predictions will contain a two-dimensional array, where one
102 |   #   dimension represents the input image count, and the other has
103 |   #   predictions per class
104 |   softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
105 |   predictions, = sess.run(softmax_tensor, {input_layer_name: image_data})
106 | 
107 |   # Sort to show labels in order of confidence
108 |   top_k = predictions.argsort()[-num_top_predictions:][::-1]
109 |   for node_id in top_k:
110 |     human_string = labels[node_id]
111 |     score = predictions[node_id]
112 |     print('%s (score = %.5f)' % (human_string, score))
113 |   return [ (labels[node_id], predictions[node_id].item()) for node_id in top_k ]
114 | 
115 | def main(argv):
116 |   """Runs inference on an image."""
117 |   if argv[1:]:
118 |     raise ValueError('Unused Command Line Args: %s' % argv[1:])
119 | 
120 |   if not tf.gfile.Exists(FLAGS.image):
121 |     tf.logging.fatal('image file does not exist %s', FLAGS.image)
122 | 
123 |   if not tf.gfile.Exists(FLAGS.labels):
124 |     tf.logging.fatal('labels file does not exist %s', FLAGS.labels)
125 | 
126 |   if not tf.gfile.Exists(FLAGS.graph):
127 |     tf.logging.fatal('graph file does not exist %s', FLAGS.graph)
128 | 
129 |   # load image
130 |   image_data = load_image(FLAGS.image)
131 | 
132 |   # load labels
133 |   labels = load_labels(FLAGS.labels)
134 | 
135 |   # load graph, which is stored in the default session
136 |   load_graph(FLAGS.graph)
137 | 
138 |   with tf.Session() as sess:
139 |     for line in sys.stdin:
140 |       run_graph(image_data, labels, FLAGS.input_layer, FLAGS.output_layer,
141 |                 FLAGS.num_top_predictions, sess)
142 | 
143 | 
144 | if __name__ == '__main__':
145 |   FLAGS, unparsed = parser.parse_known_args()
146 |   tf.app.run(main=main, argv=sys.argv[:1]+unparsed)
147 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | https://github.com/hiveml/simple-ml-serving
  2 | 
  3 | https://blog.thehive.ai
  4 | 
  5 | # simple-ml-serving
  6 | #### Or, I just trained a machine learning model - now what ?
  7 | 
  8 | This post goes over a quick and dirty way to deploy a trained machine learning model to production.
  9 | 
 10 | Read this if: You've successfully trained an ML model using a framework such as Tensorflow or Caffe that you would like to put up as a demo, preferably sooner rather than later, and you prefer lighter solutions rather than spinning up an entire tech stack.
 11 | 
 12 | Reading time: 10-15 mins
 13 | 
 14 | TL;DR Read and understand the files in `test`.
 15 | * [Check your tensorflow installation](test/test_tensorflow.sh)
 16 | * [Run online classification from stdin](test/test_label_image.sh)
 17 | * [Run online classification on localhost](test/test_tf_classify_server.sh)
 18 | * [Put classifiers behind a hardcoded proxy](test/test_basic_proxy.sh)
 19 | * [Put classifiers behind a proxy with service discovery](test/test_seaport_proxy.sh)
 20 | * [Call classifiers using a pseudo-DNS](test/test_p2p_proxy.sh)
 21 | 
 22 | ### ML in production ###
 23 | 
 24 | When we first entered the machine learning space here at Hive, we had been doing manual image moderation for half a year, giving us millions of ground truth labeled images. This allowed us to train a state-of-the-art deep convolutional image classification model from scratch (i.e. randomized weights) in under a week, specialized for our use case. The more typical ML use case, though, is usually on the order of hundreds of images, for which I would recommend fine-tuning an existing model. For instance, https://www.tensorflow.org/tutorials/image_retraining has a great tutorial on how to fine-tune an Imagenet model (trained on 1.2M images, 1000 classes) to classify a sample dataset of flowers (3647 images, 5 classes).
 25 | 
 26 | For a quick tl;dr of the linked Tensorflow tutorial, after installing bazel and tensorflow, you would need to run the following code, which takes around 30 mins to build and 5 minutes to train:
 27 | 
 28 | ```
 29 | (
 30 |   cd "$HOME" && \
 31 |   curl -O http://download.tensorflow.org/example_images/flower_photos.tgz && \
 32 |   tar xzf flower_photos.tgz ;
 33 | ) && \
 34 | bazel build tensorflow/examples/image_retraining:retrain \
 35 |             tensorflow/examples/image_retraining:label_image \
 36 |   && \
 37 | bazel-bin/tensorflow/examples/image_retraining/retrain \
 38 |     --image_dir "$HOME"/flower_photos \
 39 |     --how_many_training_steps=200 
 40 |   && \
 41 | bazel-bin/tensorflow/examples/image_retraining/label_image \
 42 |     --graph=/tmp/output_graph.pb \
 43 |     --labels=/tmp/output_labels.txt \
 44 |     --output_layer=final_result:0 \
 45 |     --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg
 46 | ```
 47 | 
 48 | Alternatively, if you have [Docker](https://www.docker.com/get-docker) installed, you can use this [prebuilt Docker image](https://hub.docker.com/r/liubowei/simple-ml-serving/) like so:
 49 | 
 50 | ```
 51 | sudo docker run -it --net=host liubowei/simple-ml-serving:latest /bin/bash
 52 | 
 53 | >>> cat test.sh && bash test.sh
 54 | ```
 55 | 
 56 | which puts you into an interactive shell inside the container and runs the above command; you can also follow along with the rest of this post inside the container if you wish.
 57 | 
 58 | Now, tensorflow has saved the model information into `/tmp/output_graph.pb` and `/tmp/output_labels.txt,` which are passed above as command-line parameters to the [label_image.py](https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/examples/image_retraining/label_image.py) script . Google's image_recognition tutorial also links to [another inference script](https://github.com/tensorflow/models/blob/master/tutorials/image/imagenet/classify_image.py#L130), but we will be sticking with label_image.py for now.
 59 | 
 60 | ## Converting one-shot inference to online inference (Tensorflow) ##
 61 | 
 62 | If we just want to accept file names from standard input, one per line, we can do "online" inference quite easily:
 63 | 
 64 | ```
 65 | while read line ; do 
 66 | bazel-bin/tensorflow/examples/image_retraining/label_image \
 67 | --graph=/tmp/output_graph.pb --labels=/tmp/output_labels.txt \
 68 | --output_layer=final_result:0 \
 69 | --image="$line" ;
 70 | done
 71 | ```
 72 | 
 73 | From a performance standpoint, though, this is terrible - we are reloading the neural net, the weights, the entire Tensorflow framework, and python itself, for every input example!
 74 | 
 75 | We can do better. Let's start by editing the label_image.py script -- for me, this is located in `bazel-bin/tensorflow/examples/image_retraining/label_image.runfiles/org_tensorflow/tensorflow/examples/image_retraining/label_image.py`.
 76 | 
 77 | Let's change the lines
 78 | 
 79 | ```
 80 | 141:  run_graph(image_data, labels, FLAGS.input_layer, FLAGS.output_layer,
 81 | 142:        FLAGS.num_top_predictions)
 82 | ```
 83 | 
 84 | to
 85 | 
 86 | ```
 87 | 141:  for line in sys.stdin:
 88 | 142:    run_graph(load_image(line), labels, FLAGS.input_layer, FLAGS.output_layer,
 89 | 142:        FLAGS.num_top_predictions)
 90 | ```
 91 | 
 92 | This is indeed a lot faster, but this is still not the best we can do!
 93 | 
 94 | The reason is the `with tf.Session() as sess` construction on line 100. Tensorflow is essentially loading all the computation into memory every time `run_graph` is called. This becomes apparent once you start trying to do inference on the GPU -- you can see the GPU memory go up and down as Tensorflow loads and unloads the model parameters to and from the GPU. As far as I know, this construction is not present in other ML frameworks like Caffe or Pytorch.
 95 | 
 96 | The solution is then to pull the `with` statement out, and pass in a `sess` variable to `run_graph`:
 97 | 
 98 | ```
 99 | def run_graph(image_data, labels, input_layer_name, output_layer_name,
100 |               num_top_predictions, sess):
101 |     # Feed the image_data as input to the graph.
102 |     #   predictions will contain a two-dimensional array, where one
103 |     #   dimension represents the input image count, and the other has
104 |     #   predictions per class
105 |     softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
106 |     predictions, = sess.run(softmax_tensor, {input_layer_name: image_data})
107 |     # Sort to show labels in order of confidence
108 |     top_k = predictions.argsort()[-num_top_predictions:][::-1]
109 |     for node_id in top_k:
110 |       human_string = labels[node_id]
111 |       score = predictions[node_id]
112 |       print('%s (score = %.5f)' % (human_string, score))
113 |     return [ (labels[node_id], predictions[node_id].item()) for node_id in top_k ] # numpy floats are not json serializable, have to run item
114 | 
115 | ...
116 | 
117 |   with tf.Session() as sess:
118 |     for line in sys.stdin:
119 |       run_graph(load_image(line), labels, FLAGS.input_layer, FLAGS.output_layer,
120 |           FLAGS.num_top_predictions, sess)
121 | ```
122 | 
123 | (see code at https://github.com/hiveml/simple-ml-serving/blob/master/label_image.py)
124 | 
125 | If you run this, you should find that it takes around 0.1 sec per image, quite fast enough for online use.
126 | 
127 | ## Converting one-shot inference to online inference (Other ML Frameworks) ##
128 | 
129 | Caffe uses its `net.forward` code which is very easy to put into a callable framework: see http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
130 | 
131 | Mxnet is also very unique: it actually has ready-to-go inference server code publicly available: https://github.com/awslabs/mxnet-model-server.
132 | 
133 | Further details coming soon!
134 | 
135 | ## Deployment ##
136 | The plan is to wrap this code in a Flask app and turn it into a HTTP microservice. If you haven't heard of it, Flask is a very lightweight Python web framework which allows you to spin up an http api server with minimal work.
137 | 
138 | As a quick reference, here's a flask app that receives POST requests with multipart form data:
139 | 
140 | ```
141 | #!/usr/bin/env python
142 | # usage: python echo.py to launch the server ; and then in another session, do
143 | # curl -v -XPOST 127.0.0.1:12480 -F "data=@./image.jpg"
144 | from flask import Flask, request
145 | app = Flask(__name__)
146 | @app.route('/', methods=['POST'])
147 | def classify():
148 |     try:
149 |         data = request.files.get('data').read()
150 |         print repr(data)[:1000]
151 |         return data, 200
152 |     except Exception as e:
153 |         return repr(e), 500
154 | app.run(host='127.0.0.1',port=12480)
155 | ```
156 | 
157 | And here is the corresponding flask app hooked up to `run_graph` above:
158 | 
159 | ```
160 | #!/usr/bin/env python
161 | # usage: bash tf_classify_server.sh
162 | from flask import Flask, request
163 | import tensorflow as tf
164 | import label_image as tf_classify
165 | import json
166 | app = Flask(__name__)
167 | FLAGS, unparsed = tf_classify.parser.parse_known_args()
168 | labels = tf_classify.load_labels(FLAGS.labels)
169 | tf_classify.load_graph(FLAGS.graph)
170 | sess = tf.Session()
171 | @app.route('/', methods=['POST'])
172 | def classify():
173 |     try:
174 |         data = request.files.get('data').read()
175 |         result = tf_classify.run_graph(data, labels, FLAGS.input_layer, FLAGS.output_layer, FLAGS.num_top_predictions, sess)
176 |         return json.dumps(result), 200
177 |     except Exception as e:
178 |         return repr(e), 500
179 | app.run(host='127.0.0.1',port=12480)
180 | 
181 | ```
182 | 
183 | This looks quite good, except for the fact that flask and tensorflow are both fully synchronous - flask processes one request at a time in the order they are received, and Tensorflow fully occupies the thread when doing the image classification.
184 | 
185 | As it's written, the speed bottleneck is probably still in the actual computation work, so there's not much point upgrading the Flask wrapper code. And maybe this code is sufficient to handle your load, for now.
186 | 
187 | There are 2 obvious ways to scale up request throughput: scale up horizontally by increasing the number of workers, which is covered in the next section, or scale up vertically by utilizing a GPU and batching logic. Implementing the latter requires a webserver that is able to handle multiple pending requests at once, and decide whether to keep waiting for a larger batch or send it off to the Tensorflow graph thread to be classified, for which this Flask app is horrendously unsuited. Two possibilities are using Twisted + Klein for keeping code in Python, or Node.js + ZeroMQ if you prefer first class event loop support and the ability to hook into non-Python ML frameworks such as Torch.
188 | 
189 | ## Scaling up: load balancing and service discovery ##
190 | 
191 | OK, so now we have a single server serving our model, but maybe it's too slow or our load is getting too high. We'd like to spin up more of these servers - how can we distribute requests across each of them?
192 | 
193 | The ordinary method is to add a proxy layer, perhaps haproxy or nginx, which balances the load between the backend servers while presenting a single uniform interface to the client. For use later in this section, here is some sample code that runs a rudimentary Node.js load balancer http proxy:
194 | 
195 | ```
196 | // Usage : node basic_proxy.js WORKER_PORT_0,WORKER_PORT_1,...
197 | const worker_ports = process.argv[2].split(',')
198 | if (worker_ports.length === 0) { console.err('missing worker ports') ; process.exit(1) }
199 | 
200 | const proxy = require('http-proxy').createProxyServer({})
201 | proxy.on('error', () => console.log('proxy error'))
202 | 
203 | let i = 0
204 | require('http').createServer((req, res) => {
205 |   proxy.web(req,res, {target: 'http://localhost:' + worker_ports[ (i++) % worker_ports.length ]})
206 | }).listen(12480)
207 | console.log(`Proxying localhost:${12480} to [${worker_ports.toString()}]`)
208 | 
209 | // spin up the ML workers
210 | const { exec } = require('child_process')
211 | worker_ports.map(port => exec(`/bin/bash ./tf_classify_server.sh ${port}`))
212 | ```
213 | 
214 | To automatically detect how many backend servers are up and where they are located, people generally use a "service discovery" tool, which may be bundled with the load balancer or be separate. Some well-known ones are Consul and Zookeeper. Setting up and learning how to use one is beyond the scope of this article, so I've included a very rudimentary proxy using the node.js service discovery package `seaport`.
215 | 
216 | Proxy code:
217 | 
218 | ```
219 | // Usage : node seaport_proxy.js
220 | const seaportServer = require('seaport').createServer()
221 | seaportServer.listen(12481)
222 | const proxy = require('http-proxy').createProxyServer({})
223 | proxy.on('error', () => console.log('proxy error'))
224 | 
225 | let i = 0
226 | require('http').createServer((req, res) => {
227 |   seaportServer.get('tf_classify_server', worker_ports => {
228 |     const this_port = worker_ports[ (i++) % worker_ports.length ].port
229 |     proxy.web(req,res, {target: 'http://localhost:' + this_port })
230 |   })
231 | }).listen(12480)
232 | console.log(`Seaport proxy listening on ${12480} to '${'tf_classify_server'}' servers registered to ${12481}`)
233 | ```
234 | 
235 | Worker code:
236 | ```
237 | // Usage : node tf_classify_server.js
238 | const port = require('seaport').connect(12481).register('tf_classify_server')
239 | console.log(`Launching tf classify worker on ${port}`)
240 | require('child_process').exec(`/bin/bash ./tf_classify_server.sh ${port}`)
241 | ```
242 | 
243 | However, as applied to ML, this setup runs into a bandwidth problem.
244 | 
245 | At anywhere from tens to hundreds of images a second, the system becomes bottlenecked on network bandwidth. In the current setup, all the data has to go through our single `seaport` master, which is the single endpoint presented to the client.
246 | 
247 | To solve this, we need our clients to not hit the single endpoint at `http://127.0.0.1:12480`, but instead to automatically rotate between backend servers to hit. If you know some networking, this sounds precisely like a job for DNS!
248 | 
249 | However, setting up a custom DNS server is again beyond the scope of this article. Instead, by changing the clients to follow a 2-step "manual DNS" protocol, we can reuse our rudimentary seaport proxy to implement a "peer-to-peer" protocol in which clients connect directly to their servers:
250 | 
251 | 
252 | Proxy code:
253 | ```
254 | // Usage : node p2p_proxy.js
255 | const seaportServer = require('seaport').createServer()
256 | seaportServer.listen(12481)
257 | 
258 | let i = 0
259 | require('http').createServer((req, res) => {
260 |   seaportServer.get('tf_classify_server', worker_ports => {
261 |     const this_port = worker_ports[ (i++) % worker_ports.length ].port
262 |     res.end(`${this_port}\n`)
263 |   })
264 | }).listen(12480)
265 | console.log(`P2P seaport proxy listening on ${12480} to '${'tf_classify_server'}' servers registered to ${12481}`)
266 | ```
267 | (The worker code is the same as above.)
268 | 
269 | Client code:
270 | ```
271 | curl -v -XPOST localhost:`curl localhost:12480` -F"data=@$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg"
272 | ```
273 | 
274 | ## RPC deployment ##
275 | 
276 | It's possible to replace the Flask interface above with a ZeroMQ interface, turning this code into an RPC microservice. Further details and code snippets coming soon!
277 | 
278 | ## Conclusion and further reading ##
279 | 
280 | At this point, you should have something working in production, but it's certainly not futureproof. There are several important topics that were not covered in this guide:
281 | 
282 | * Automatically deploying and setting up on new hardware
283 |   - Notable tools include Openstack/VMware if you're on your own hardware, Chef/Puppet for installing Docker and handling networking routes, and Docker for installing Tensorflow, Python, and everything else
284 |   - Kubernetes or Marathon/Mesos are also great if you're on the cloud
285 | * Model version management
286 |   - Not too hard to handle this manually at first
287 |   - Tensorflow Serving is a great tool that handles this, as well as batching and overall deployment, very thoroughly. The downsides are that it's a bit hard to setup and to write client code for, and in addition doesn't support Caffe/PyTorch
288 | * How to migrate your ML code off Matlab
289 |   - Don't try to use Matlab in production. Just don't
290 | * GPU drivers, Cuda, CUDNN
291 |   - Use nvidia-docker and try to find some Dockerfiles online
292 |   - There's also some work that goes into managing GPU resources, if you have more than one per box. Marathon/Mesos does this well, but at Hive we use a homebrewed tool that supports fractional GPUs
293 | * Postprocessing
294 |   - Generally you'll want a frontend to present the ML results, but it's also a good idea to have an intermediate postprocessing layer so that you can make slight tweaks to the model results or confidences without having to redeploy a second classifier.
295 |   - Once you get a few different ML models in production, it's also common to mix and match them for different use cases -- run model A only if models B and C are both inconclusive, run model D in Caffe and pass the results to model E in Tensorflow, etc. 
296 | 
297 | 


--------------------------------------------------------------------------------