├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.json ├── application ├── __init__.py ├── server.py ├── static │ ├── css │ │ ├── base.css │ │ └── index.css │ └── js │ │ ├── index.js │ │ └── sketch.js └── templates │ ├── base.html │ └── index.html ├── docs ├── architecture.PNG └── top.PNG ├── ml ├── __init__.py ├── data_processor.py ├── model.py ├── model_api.py ├── resource.py ├── store │ └── .gitkeep ├── tests │ ├── __init__.py │ ├── test_data_processor.py │ ├── test_model.py │ ├── test_model_api.py │ ├── test_resource.py │ └── test_trainer.py └── trainer.py ├── requirements.txt ├── run_application.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm 61 | 62 | ## Directory-based project format 63 | .idea/ 64 | 65 | ## File-based project format 66 | *.ipr 67 | *.iml 68 | *.iws 69 | 70 | # iPython notebook 71 | .ipynb_checkpoints 72 | 73 | .vscode 74 | data 75 | !data/.gitkeep 76 | ml/store/* 77 | !ml/store/.gitkeep 78 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | # For Slackbot Dockerfile 4 | 5 | ENTRYPOINT [] 6 | CMD [ "/bin/bash" ] 7 | 8 | # Remove (large file sizes) MKL optimizations. 9 | RUN conda install -y nomkl 10 | RUN conda install -y numpy scipy scikit-learn cython 11 | 12 | ADD ./requirements.txt /tmp/requirements.txt 13 | RUN pip install -qr /tmp/requirements.txt 14 | 15 | ADD . /opt/ml_in_app 16 | WORKDIR /opt/ml_in_app 17 | 18 | CMD python run_application.py 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Takahiro Kubo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning In Application 2 | 3 | Practical implemantation of Machine Learning in the Application. 4 | 5 | ## Architecture 6 | 7 | ![architecture.PNG](./docs/architecture.PNG) 8 | 9 | * Model: Machine Learning Model 10 | * Trainer: Training the model. So training process (loss, optimizer) is separated from Model. 11 | * Model API: Interface between the Model and Application. 12 | * DataProcessor: Load the data and preprocess it. It is used in Trainer and ModelAPI. 13 | * Resource: It manages the parameters for Trainer, Model and DataProcessor. 14 | 15 | ## Demo Application 16 | 17 | ![top.PNG](./docs/top.PNG) 18 | 19 | handwritten number recognizer by Chainer. 20 | 21 | You can deploy this application by docker. 22 | 23 | Please refer [this](https://devcenter.heroku.com/articles/container-registry-and-runtime) tutorial to deploy the application. 24 | 25 | * `heroku plugins:install heroku-container-registry` 26 | * `heroku container:login` 27 | * `git clone https://github.com/icoxfog417/machine_learning_in_application.git` 28 | * `heroku create` 29 | * `heroku container:push web` 30 | * `heroku open` 31 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ML in Application", 3 | "description": "handwritten number recognizer by chainer", 4 | "repository": "https://github.com/icoxfog417/machine_learning_in_application", 5 | "keywords": ["Python", "tornado", "Chainer"], 6 | "env": { 7 | "SECRET_TOKEN": { 8 | "description": "A secret key for verifying the integrity of signed cookies.", 9 | "generator": "secret" 10 | } 11 | }, 12 | "image": "registry.heroku.com/ml-in-app/web" 13 | } 14 | -------------------------------------------------------------------------------- /application/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tie301837' 2 | -------------------------------------------------------------------------------- /application/server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tornado.web 3 | from ml.model_api import ModelAPI 4 | from ml.data_processor import DataProcessor 5 | from ml.resource import Resource 6 | 7 | 8 | DATA_PATH = os.path.join(os.path.dirname(__file__), "../data/feedbacks.txt") 9 | 10 | 11 | class IndexHandler(tornado.web.RequestHandler): 12 | def get(self): 13 | self.render("index.html", title="title") 14 | 15 | 16 | class PredictionHandler(tornado.web.RequestHandler): 17 | 18 | def post(self): 19 | resp = {"result": str(-1)} 20 | data = self.get_arguments("data[]") 21 | 22 | r = Resource() 23 | if not os.path.isdir(r.model_path): 24 | from ml.model import NumberRecognizeNN 25 | from ml.trainer import Trainer 26 | model = NumberRecognizeNN(r.INPUT_SIZE, r.OUTPUT_SIZE) 27 | trainer = Trainer(model, r) 28 | x, y = r.load_training_data() 29 | trainer.train(x, y) 30 | api = ModelAPI(r) 31 | 32 | if len(data) > 0: 33 | _data = [float(d) for d in data] 34 | predicted = api.predict(_data) 35 | resp["result"] = str(predicted[0]) 36 | 37 | self.write(resp) 38 | 39 | 40 | class FeedbackHandler(tornado.web.RequestHandler): 41 | 42 | def post(self): 43 | data = self.get_arguments("data[]") 44 | if len(data) > 0: 45 | r = Resource() 46 | r.save_data(DATA_PATH, data) 47 | else: 48 | result = "feedback format is wrong." 49 | 50 | resp = {"result": ""} 51 | self.write(resp) 52 | 53 | 54 | class Application(tornado.web.Application): 55 | 56 | def __init__(self): 57 | handlers = [ 58 | (r"/", IndexHandler), 59 | (r"/predict", PredictionHandler), 60 | (r"/feedback", FeedbackHandler), 61 | ] 62 | 63 | settings = dict( 64 | template_path=os.path.join(os.path.dirname(__file__), "templates"), 65 | static_path=os.path.join(os.path.dirname(__file__), "static"), 66 | cookie_secret=os.environ.get("SECRET_TOKEN", "__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__"), 67 | xsrf_cookies=True, 68 | debug=True, 69 | ) 70 | 71 | super(Application, self).__init__(handlers, **settings) 72 | -------------------------------------------------------------------------------- /application/static/css/base.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: #ededed; 3 | } 4 | 5 | header h1 small:before { 6 | content: "|"; 7 | margin: 0 0.5em; 8 | font-size: 1.6em; 9 | } 10 | 11 | footer { 12 | background: #ccc; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /application/static/css/index.css: -------------------------------------------------------------------------------- 1 | .canvas-area{ 2 | background-color:lavender; 3 | border-radius:10px; 4 | padding: 10px 0px; 5 | } 6 | #canvas{ 7 | border: 1px solid silver; 8 | background-color:white 9 | } 10 | .btn-submit{ 11 | width: 82px; 12 | } 13 | .btn-clear{ 14 | width: 82px; 15 | } 16 | .predicteds{ 17 | background-color:whitesmoke; 18 | } 19 | .result { 20 | height: 120px; 21 | line-height: 120px; 22 | text-align: center; 23 | float:left; 24 | margin: 5px; 25 | border-radius: 10px; 26 | border: 1px solid lightsteelblue; 27 | } 28 | .result .answer { 29 | font-size: 100px; 30 | width: 120px; 31 | height: 120px; 32 | text-align: center; 33 | float:left; 34 | } -------------------------------------------------------------------------------- /application/static/js/index.js: -------------------------------------------------------------------------------- 1 | var MAIN_ELEMENT = "#main" 2 | var CANVAS_ID = "canvas" 3 | 4 | var Prediction = (function () { 5 | function Prediction(image, sample) { 6 | this.image = image; 7 | this.sampleImage = sample[0]; 8 | this.sampleData = sample[1]; 9 | this.result = -1; 10 | } 11 | 12 | Prediction.prototype.envelop = function (data) { 13 | var getCookie = function(name){ 14 | var r = document.cookie.match("\\b" + name + "=([^;]*)\\b"); 15 | return r ? r[1] : undefined; 16 | } 17 | var envelope = { 18 | _xsrf: getCookie("_xsrf"), 19 | "data[]": data 20 | } 21 | return envelope; 22 | } 23 | 24 | Prediction.prototype.imageSrc = function () { 25 | return this.image.toDataURL(); 26 | } 27 | 28 | Prediction.prototype.execute = function () { 29 | var self = this; 30 | var d = new $.Deferred; 31 | $.post("/predict", self.envelop(self.sampleData), function(prediction){ 32 | self.result = prediction["result"]; 33 | d.resolve(self) 34 | }) 35 | return d.promise(); 36 | }; 37 | 38 | Prediction.prototype.feedback = function (value) { 39 | var self = this; 40 | var d = new $.Deferred; 41 | var feedback = [parseInt(value)]; 42 | feedback = feedback.concat(self.sampleData); 43 | $.post("/feedback", self.envelop(feedback), function(feedbacked){ 44 | if(feedbacked["result"] == ""){ 45 | self.result = feedback[0]; 46 | d.resolve(); 47 | }else{ 48 | d.reject(feedbacked["result"]); 49 | } 50 | }) 51 | return d.promise(); 52 | }; 53 | 54 | return Prediction; 55 | })(); 56 | 57 | Vue.config.delimiters = ["[[", "]]"]; 58 | Vue.config.prefix = "data-v-"; 59 | Vue.component("predict-item", { 60 | template: "#predict-item", 61 | methods: { 62 | beginEdit: function(){ 63 | this.state.editing = true; 64 | }, 65 | endEdit: function(){ 66 | var state = this.state; 67 | if(state.value >= 0 && state.value < 10 && (state.value != this.result)){ 68 | var original = this.result; 69 | this.$data.feedback(state.value).fail(function(msg){ 70 | state.value = original; 71 | }) 72 | }else{ 73 | state.value = this.result; 74 | } 75 | state.editing = false; 76 | } 77 | } 78 | }); 79 | var app = new Vue({ 80 | el: MAIN_ELEMENT, 81 | data: { 82 | canvas: null, 83 | SNAP_SIZE: 120, 84 | SAMPLE_SIZE: 80, 85 | predicts: [] 86 | }, 87 | created: function(){ 88 | this.canvas = new Canvas(CANVAS_ID, { 89 | strokeStyle: "black" 90 | }); 91 | }, 92 | methods:{ 93 | clear: function(){ 94 | this.canvas.clear(); 95 | }, 96 | injectState: function(p){ 97 | p.state = { 98 | editing: false, 99 | value: p.result 100 | } 101 | }, 102 | submit: function(){ 103 | var self = this; 104 | var image = self.canvas.snapShot(self.SNAP_SIZE); 105 | var sample = self.canvas.toSample(self.SAMPLE_SIZE, self.SAMPLE_SIZE); 106 | var total = sample[1].reduce(function(a, b){ return a + b; }); 107 | if(total == 0){ 108 | return false; 109 | } 110 | var p = new Prediction(image, sample); 111 | p.execute().done(function(p){ 112 | self.injectState(p); 113 | self.predicts.unshift(p); 114 | self.clear(); 115 | }) 116 | } 117 | } 118 | }); 119 | -------------------------------------------------------------------------------- /application/static/js/sketch.js: -------------------------------------------------------------------------------- 1 | var Dot = (function () { 2 | function Dot(x, y) { 3 | this.x = x; 4 | this.y = y; 5 | } 6 | return Dot; 7 | })(); 8 | 9 | var Stroke = (function () { 10 | function Stroke() { 11 | this.dots = []; 12 | } 13 | Stroke.prototype.draw = function (dot) { 14 | this.dots.push(dot); 15 | }; 16 | return Stroke; 17 | })(); 18 | 19 | var Canvas = (function () { 20 | function Canvas(canvasId, pencil) { 21 | var self = this 22 | self.$canvas = $("#" + canvasId); 23 | self.drawing = false; 24 | self.strokes = []; 25 | 26 | self.pencil = { 27 | strokeStyle: "#df4b26", 28 | lineJoin: "round", 29 | lineWidth: 10 30 | }; 31 | 32 | if(arguments.length > 1){ 33 | for(var k in self.pencil){ 34 | if(k in pencil){ 35 | self.pencil[k] = pencil[k]; 36 | } 37 | } 38 | } 39 | 40 | self.$canvas 41 | .on("mousedown", function(e){ 42 | var d = self.getPosition(e); 43 | self.draw(d); 44 | }) 45 | .on("mousemove", function(e){ 46 | if(self.drawing){ 47 | var d = self.getPosition(e); 48 | self.draw(d); 49 | } 50 | }) 51 | .on("mouseup mouseleave", function(e){ 52 | self.drawing = false; 53 | }) 54 | 55 | } 56 | 57 | Canvas.prototype.getCanvas = function () { 58 | return this.$canvas.get(0); 59 | } 60 | 61 | Canvas.prototype.getContext = function () { 62 | return this.getCanvas().getContext("2d"); 63 | } 64 | 65 | Canvas.prototype.getPosition = function (event) { 66 | var canvasOffset = this.$canvas.offset(); 67 | var relX = event.pageX - canvasOffset.left; 68 | var relY = event.pageY - canvasOffset.top; 69 | return new Dot(relX, relY); 70 | } 71 | 72 | Canvas.prototype.draw = function (dot) { 73 | var stroking = null; 74 | if(!this.drawing){ 75 | stroking = new Stroke(); 76 | this.strokes.push(stroking); 77 | this.drawing = true; 78 | }else{ 79 | stroking = this.strokes[this.strokes.length - 1]; 80 | } 81 | 82 | if(stroking != null){ 83 | stroking.draw(dot); 84 | this.flush(); 85 | } 86 | }; 87 | 88 | Canvas.prototype.flush = function(){ 89 | var context = this.getContext(); 90 | context.clearRect(0, 0, context.canvas.width, context.canvas.height); 91 | 92 | context.strokeStyle = this.pencil.strokeStyle; 93 | context.lineJoin = this.pencil.lineJoin; 94 | context.lineWidth = this.pencil.lineWidth; 95 | 96 | for(var i = 0; i < this.strokes.length; i++) { 97 | var s = this.strokes[i]; 98 | var preDot = null; 99 | for(var j = 0; j < s.dots.length; j++){ 100 | context.beginPath(); 101 | 102 | var d = s.dots[j]; 103 | if(preDot == null){ 104 | context.moveTo(d.x, d.y); 105 | }else{ 106 | context.moveTo(preDot.x, preDot.y); 107 | } 108 | context.lineTo(d.x, d.y); 109 | preDot = d; 110 | 111 | context.closePath(); 112 | context.stroke(); 113 | } 114 | } 115 | } 116 | 117 | Canvas.prototype.clear = function(){ 118 | this.strokes = []; 119 | this.drawing = false; 120 | 121 | var context = this.getContext(); 122 | context.clearRect(0, 0, context.canvas.width, context.canvas.height); 123 | } 124 | 125 | Canvas.prototype.snapShot = function(x, y){ 126 | var snap = document.createElement("canvas"); 127 | 128 | if(arguments.length < 1){ 129 | snap.width = this.getCanvas().width; 130 | snap.height = this.getCanvas().height; 131 | var context = snap.getContext("2d"); 132 | context.drawImage(this.getContext().canvas, 0, 0); 133 | }else if(arguments.length < 2){ 134 | snap.width = snap.height = x; 135 | var context = snap.getContext("2d"); 136 | context.drawImage(this.getContext().canvas, 0, 0, x, x); 137 | }else{ 138 | snap.width = x; 139 | snap.height = y; 140 | var context = snap.getContext("2d"); 141 | context.drawImage(this.getContext().canvas, 0, 0, x, y); 142 | } 143 | 144 | return snap; 145 | } 146 | 147 | Canvas.prototype.toSample = function(x, y){ 148 | var sample = this.snapShot(x, y); 149 | var ctx = sample.getContext("2d"); 150 | 151 | var src = ctx.getImageData(0, 0, x, y); 152 | var dst = ctx.createImageData(x, y); 153 | var data = []; 154 | for (var i = 0; i < src.data.length; i += 4) { 155 | var rgb = src.data[i] + src.data[i+1] + src.data[i+2]; 156 | var sum = rgb + src.data[i+3]; 157 | data.push(Math.sqrt(Math.min(sum,255))) 158 | dst.data[i] = dst.data[i+1] = dst.data[i+2] = rgb / 3; 159 | dst.data[i+3] = src.data[i+3]; 160 | } 161 | 162 | ctx.putImageData(dst, 0, 0); 163 | return [sample, data] 164 | 165 | } 166 | 167 | return Canvas; 168 | })(); -------------------------------------------------------------------------------- /application/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Number Recognition 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | {% block head %}{% end %} 17 | 18 | 19 |
20 |
21 |

Number Recognizerpowerd by Chainer

22 |
23 |
24 | {% block body %}{% end %} 25 |
26 |
27 | {% block bottom %}{% end %} 28 | 29 | -------------------------------------------------------------------------------- /application/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block head %} 4 | 5 | 6 | 7 | 8 | {% end %} 9 | 10 | {% block bottom %} 11 | 12 | {% end %} 13 | 14 | {% block body %} 15 |
16 |
17 |
18 | 19 |
20 |
21 | 22 | 23 |
24 |
25 | 26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | 35 | 49 | 50 | {% module xsrf_form_html() %} 51 | {% end %} -------------------------------------------------------------------------------- /docs/architecture.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/docs/architecture.PNG -------------------------------------------------------------------------------- /docs/top.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/docs/top.PNG -------------------------------------------------------------------------------- /ml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/ml/__init__.py -------------------------------------------------------------------------------- /ml/data_processor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class DataProcessor(): 5 | 6 | def __init__(self, means=(), stds=()): 7 | self.means = means 8 | self.stds = stds 9 | 10 | def format_x(self, x, size=-1): 11 | _x = x 12 | if isinstance(x, (tuple, list)): 13 | _x = np.array([x]) 14 | 15 | if size > 0 and _x.shape[1] != size: 16 | _x = self.adjust(x, size) 17 | 18 | _x = _x.astype(np.float32, copy=False) 19 | 20 | if len(self.means) > 0 and len(self.stds) > 0: 21 | return (_x - self.means) / self.stds 22 | else: 23 | return _x 24 | 25 | def adjust(self, x, size): 26 | def max_pooling(v): 27 | sqrt = lambda _x: int(np.ceil(np.sqrt(_x))) 28 | _target_square_size = sqrt(size) 29 | square_size = sqrt(len(v)) 30 | conv_size = int(square_size // _target_square_size) 31 | image = np.reshape(v, (square_size, square_size)) 32 | _pooled = [] 33 | for i in range(size): 34 | row, col = int(i // _target_square_size * conv_size), int(i % _target_square_size * conv_size) 35 | mp = np.max(image[row:row + conv_size, col: col + conv_size]) 36 | _pooled.append(mp) 37 | return np.array(_pooled) 38 | 39 | x = np.array([max_pooling(_v) for _v in x]) 40 | return x 41 | 42 | def format_y(self, y): 43 | _y = y 44 | if isinstance(y , int): 45 | _y = np.array([y]) 46 | _y = _y.astype(np.int32, copy=False) 47 | return _y 48 | 49 | def set_normalization_params(self, x): 50 | self.means = np.mean(x, axis=0, dtype=np.float32) 51 | self.stds = np.std(x, axis=0, dtype=np.float32) 52 | # simple trick to avoid 0 divide 53 | self.stds[self.stds < 1.0e-6] = np.max(x) - np.min(x) 54 | self.means[self.stds < 1.0e-6] = np.min(x) 55 | 56 | def batch_iter(self, X, y, batch_size, epoch=1): 57 | indices = np.array(range(len(y))) 58 | appendix = batch_size - len(y) % batch_size 59 | for e in range(epoch): 60 | np.random.shuffle(indices) 61 | batch_indices = np.concatenate([indices, indices[:appendix]]) 62 | batch_count = len(batch_indices) // batch_size 63 | for b in range(batch_count): 64 | elements = batch_indices[b * batch_size:(b + 1) * batch_size] 65 | x_batch = X[elements] 66 | y_batch = y[elements] 67 | epoch_end = True if b == batch_count - 1 else False 68 | yield x_batch, y_batch, epoch_end 69 | -------------------------------------------------------------------------------- /ml/model.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | import chainer.functions as F 3 | import chainer.links as L 4 | 5 | 6 | class NumberRecognizeNN(chainer.Chain): 7 | 8 | def __init__(self, input_size, output_size, hidden_size=200, layer_size=3): 9 | self.input_size = input_size 10 | self.output_size = output_size 11 | self.hidden_size = hidden_size 12 | self.layer_size = layer_size 13 | super(NumberRecognizeNN, self).__init__( 14 | l1=L.Linear(self.input_size, hidden_size), 15 | l2=L.Linear(hidden_size, hidden_size), 16 | l3=L.Linear(hidden_size, self.output_size), 17 | ) 18 | 19 | def __call__(self, x): 20 | h1 = F.relu(self.l1(x)) 21 | h2 = F.relu(self.l2(h1)) 22 | o = F.sigmoid(self.l3(h2)) 23 | return o 24 | -------------------------------------------------------------------------------- /ml/model_api.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ml.model import NumberRecognizeNN 3 | from ml.data_processor import DataProcessor 4 | 5 | 6 | class ModelAPI(): 7 | 8 | def __init__(self, resource): 9 | self.resource = resource 10 | self.model = NumberRecognizeNN(resource.INPUT_SIZE, resource.OUTPUT_SIZE) 11 | resource.load_model(self.model) 12 | 13 | means, stds = resource.load_normalization_params() 14 | self.dp = DataProcessor(means, stds) 15 | 16 | def predict(self, data): 17 | _data = data 18 | if isinstance(data, (tuple, list)): 19 | _data = np.array([data], dtype=np.float32) 20 | 21 | f_data = self.dp.format_x(_data, size=self.resource.INPUT_SIZE) 22 | predicted = self.model(f_data) 23 | number = np.argmax(predicted.data, axis=1) 24 | return number 25 | -------------------------------------------------------------------------------- /ml/resource.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from datetime import datetime 4 | import numpy as np 5 | from chainer import serializers 6 | from ml.data_processor import DataProcessor 7 | 8 | 9 | class Resource(): 10 | INPUT_SIZE = 64 # 8 x 8 image size 11 | OUTPUT_SIZE = 10 # 10 classification 12 | 13 | def __init__(self, root=""): 14 | self.root = root if root else os.path.join(os.path.dirname(__file__), "./store") 15 | self.model_path = os.path.join(self.root, "./model") 16 | self.param_file = os.path.join(self.root, "./params.json") 17 | 18 | def save_normalization_params(self, means, stds): 19 | to_list = lambda ls: ls if isinstance(ls, (tuple, list)) else ls.tolist() 20 | params = { 21 | "means": to_list(means), 22 | "stds": to_list(stds) 23 | } 24 | serialized = json.dumps(params) 25 | with open(self.param_file, "wb") as f: 26 | f.write(serialized.encode("utf-8")) 27 | 28 | def load_normalization_params(self): 29 | loaded = {} 30 | if not os.path.isfile(self.param_file): 31 | raise Exception("Normalization parameter file does not exist.") 32 | 33 | with open(self.param_file, "rb") as f: 34 | loaded = json.loads(f.read().decode("utf-8")) 35 | 36 | to_array = lambda x: np.array([float(_x) for _x in x], dtype=np.float32) 37 | 38 | return to_array(loaded["means"]), to_array(loaded["stds"]) 39 | 40 | def load_training_data(self): 41 | from sklearn.datasets import load_digits 42 | # predifine set is from scikit-learn dataset 43 | # http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html 44 | 45 | digits = load_digits() 46 | x = digits.data 47 | y = digits.target 48 | 49 | return x, y 50 | 51 | def save_data(self, path, data): 52 | with open(path, "ab") as f: 53 | label = int(data[0]) 54 | features = [float(d) for d in data[1:]] 55 | if len(features) > self.INPUT_SIZE: 56 | dp = DataProcessor() 57 | features = dp.adjust(np.array([features]), self.INPUT_SIZE).tolist()[0] 58 | elif len(features) < self.INPUT_SIZE: 59 | raise Exception("Size mismatch when saving the data.") 60 | line = "\t".join([str(e) for e in [label] + features]) + "\n" 61 | f.write(line.encode("utf-8")) 62 | 63 | def load_data(self, path): 64 | x = [] 65 | y = [] 66 | with open(path, mode="r", encoding="utf-8") as f: 67 | for line in f: 68 | line = line.strip() 69 | label, features = self.read_data(line) 70 | x.append(features) 71 | y.append(label) 72 | x = np.array(x, dtype=np.float32) 73 | y = np.array(y, dtype=np.float32) 74 | return x, y 75 | 76 | def read_data(self, line): 77 | elements = line.split("\t") 78 | label = int(elements[0]) 79 | features = [float(e) for e in elements[1:]] 80 | return label, features 81 | 82 | def save_model(self, model): 83 | if not os.path.exists(self.model_path): 84 | os.mkdir(self.model_path) 85 | timestamp = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S") 86 | model_file = os.path.join(self.model_path, "./" + model.__class__.__name__.lower() + "_" + timestamp + ".model") 87 | serializers.save_npz(model_file, model) 88 | 89 | def load_model(self, model): 90 | if not os.path.exists(self.model_path): 91 | raise Exception("model file directory does not exist.") 92 | 93 | suffix = ".model" 94 | keyword = model.__class__.__name__.lower() 95 | candidates = [] 96 | for f in os.listdir(self.model_path): 97 | if keyword in f and f.endswith(suffix): 98 | candidates.append(f) 99 | candidates.sort() 100 | latest = candidates[-1] 101 | #print("targets {}, pick up {}.".format(candidates, latest)) 102 | model_file = os.path.join(self.model_path, latest) 103 | serializers.load_npz(model_file, model) 104 | -------------------------------------------------------------------------------- /ml/store/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/ml/store/.gitkeep -------------------------------------------------------------------------------- /ml/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/ml/tests/__init__.py -------------------------------------------------------------------------------- /ml/tests/test_data_processor.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 3 | import math 4 | import unittest 5 | import numpy as np 6 | from ml.resource import Resource 7 | from ml.data_processor import DataProcessor 8 | 9 | 10 | class TestDataProcessor(unittest.TestCase): 11 | 12 | def test_format_x(self): 13 | means = np.array([0, 0.1, 0.2]) 14 | stds = np.array([1, 1.5, 0.5]) 15 | dp = DataProcessor(means=means, stds=stds) 16 | data = np.array([[1, 2, 3], [4, 5, 6]]) 17 | x = dp.format_x(data) 18 | _x = (data - means) / stds 19 | for i in range(x.shape[0]): 20 | for j in range(x.shape[1]): 21 | self.assertEqual(x[i][j], _x[i][j]) 22 | 23 | def test_format_x_resize(self): 24 | dp = DataProcessor() 25 | data = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]]) 26 | x = dp.format_x(data, size=4) 27 | v = x[0].tolist() 28 | self.assertEqual(v[0], 6) 29 | self.assertEqual(v[1], 8) 30 | self.assertEqual(v[2], 14) 31 | self.assertEqual(v[3], 16) 32 | 33 | def test_batch_iter(self): 34 | batch_size = 10 35 | dp = DataProcessor() 36 | r = Resource() 37 | train_x, train_y = r.load_training_data() 38 | batch_count = math.ceil(len(train_y) / batch_size) 39 | 40 | i = 0 41 | for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size): 42 | self.assertEqual(batch_size, len(x_batch)) 43 | self.assertEqual(batch_size, len(y_batch)) 44 | if i < batch_count - 1: 45 | self.assertFalse(epoch_end) 46 | else: 47 | self.assertTrue(epoch_end) 48 | i += 1 49 | self.assertEqual(i, batch_count) 50 | 51 | 52 | if __name__ == "__main__": 53 | unittest.main() 54 | 55 | -------------------------------------------------------------------------------- /ml/tests/test_model.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 3 | import unittest 4 | import numpy as np 5 | from ml.model import NumberRecognizeNN 6 | 7 | 8 | 9 | class TestModel(unittest.TestCase): 10 | 11 | def test_forward(self): 12 | input_size = 100 13 | output_size = 10 14 | data_length = 50 15 | test_data = self.create_test_data(input_size, data_length) 16 | 17 | model = NumberRecognizeNN(input_size, output_size) 18 | output = model(test_data) 19 | self.assertEqual((data_length, output_size), output.data.shape) 20 | 21 | def create_test_data(self, input_size, length): 22 | input = np.random.rand(length, input_size).astype(np.float32) 23 | return input 24 | 25 | 26 | if __name__ == "__main__": 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /ml/tests/test_model_api.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 3 | import unittest 4 | import shutil 5 | from sklearn.metrics import accuracy_score 6 | from ml.model import NumberRecognizeNN 7 | from ml.model_api import ModelAPI 8 | from ml.trainer import Trainer 9 | from ml.data_processor import DataProcessor 10 | from ml.resource import Resource 11 | 12 | 13 | class TestModelAPI(unittest.TestCase): 14 | TEST_DIR = "" 15 | 16 | @classmethod 17 | def setUpClass(cls): 18 | path = os.path.join(os.path.dirname(__file__), "./test_model_api") 19 | if not os.path.isdir(path): 20 | os.mkdir(path) 21 | cls.TEST_DIR = path 22 | 23 | @classmethod 24 | def tearDownClass(cls): 25 | if os.path.isdir(cls.TEST_DIR): 26 | shutil.rmtree(cls.TEST_DIR) 27 | 28 | def test_model_api(self): 29 | model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE) 30 | r = Resource(self.TEST_DIR) 31 | trainer = Trainer(model, r) 32 | dp = DataProcessor() 33 | data, target = r.load_training_data() 34 | api_test_size = 200 35 | 36 | print("Train the model for API Test.") 37 | trainer.train(data[:-api_test_size], target[:-api_test_size], epoch=5) 38 | 39 | model_api = ModelAPI(r) 40 | predicted = model_api.predict(data[-api_test_size:]) 41 | teacher = target[-api_test_size:] 42 | score = accuracy_score(teacher, predicted) 43 | print("Model API score is {}".format(score)) 44 | 45 | 46 | if __name__ == "__main__": 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /ml/tests/test_resource.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 3 | import unittest 4 | import shutil 5 | import time 6 | from ml.resource import Resource 7 | from ml.model import NumberRecognizeNN 8 | 9 | 10 | class TestResource(unittest.TestCase): 11 | TEST_DIR = "" 12 | 13 | @classmethod 14 | def setUpClass(cls): 15 | path = os.path.join(os.path.dirname(__file__), "./test_resource") 16 | if not os.path.isdir(path): 17 | os.mkdir(path) 18 | cls.TEST_DIR = path 19 | 20 | @classmethod 21 | def tearDownClass(cls): 22 | if os.path.isdir(cls.TEST_DIR): 23 | shutil.rmtree(cls.TEST_DIR) 24 | 25 | def test_normalization_parameter(self): 26 | means = (0.0, 1.0, 0.2) 27 | stds = (0.5, 0.2, 3.0) 28 | r = Resource(self.TEST_DIR) 29 | r.save_normalization_params(means, stds) 30 | self.assertTrue(os.path.isfile(r.param_file)) 31 | loaded_means, loaded_stds = r.load_normalization_params() 32 | for i in range(len(means)): 33 | self.assertTrue(means[i] - loaded_means[i] < 1e-10) 34 | self.assertTrue(stds[i] - loaded_stds[i] < 1e-10) 35 | 36 | def test_save_data(self): 37 | r = Resource(self.TEST_DIR) 38 | data_file = self.TEST_DIR + "/data_file.txt" 39 | data1 = ["0"] + ["0"] * 6400 # label + feature 40 | data2 = ["9"] + ["1"] * 6400 # label + feature 41 | r.save_data(data_file, data1) 42 | r.save_data(data_file, data2) 43 | 44 | x, y = r.load_data(data_file) 45 | self.assertEqual(2, len(x)) 46 | self.assertEqual(2, len(y)) 47 | self.assertEqual(0, y[0]) 48 | self.assertEqual(9, y[1]) 49 | self.assertEqual(0, x[0][0]) 50 | self.assertEqual(1, x[1][0]) 51 | 52 | def test_model(self): 53 | model = NumberRecognizeNN(10, 10) 54 | r = Resource(self.TEST_DIR) 55 | r.save_model(model) 56 | time.sleep(1) 57 | r.save_model(model) 58 | r.load_model(model) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /ml/tests/test_trainer.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) 3 | import unittest 4 | import shutil 5 | import numpy as np 6 | from ml.model import NumberRecognizeNN 7 | from ml.data_processor import DataProcessor 8 | from ml.trainer import Trainer 9 | from ml.resource import Resource 10 | 11 | 12 | class TestTrainer(unittest.TestCase): 13 | TEST_DIR = "" 14 | 15 | @classmethod 16 | def setUpClass(cls): 17 | path = os.path.join(os.path.dirname(__file__), "./test_trainer") 18 | if not os.path.isdir(path): 19 | os.mkdir(path) 20 | cls.TEST_DIR = path 21 | 22 | @classmethod 23 | def tearDownClass(cls): 24 | if os.path.isdir(cls.TEST_DIR): 25 | shutil.rmtree(cls.TEST_DIR) 26 | 27 | def test_train(self): 28 | model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE) 29 | r = Resource(self.TEST_DIR) 30 | trainer = Trainer(model, r) 31 | dp = DataProcessor() 32 | data, target = r.load_training_data() 33 | print("Test Train the model") 34 | trainer.train(data, target, epoch=5) 35 | 36 | def test_baseline(self): 37 | from sklearn.svm import SVC 38 | from sklearn.metrics import accuracy_score 39 | r = Resource(self.TEST_DIR) 40 | dp = DataProcessor() 41 | data, target = r.load_training_data() 42 | dp.set_normalization_params(data) 43 | f_data, f_target = dp.format_x(data), dp.format_y(target) 44 | 45 | test_size = 200 46 | model = SVC() 47 | model.fit(f_data[:-test_size], f_target[:-test_size]) 48 | 49 | predicted = model.predict(f_data[-test_size:]) 50 | teacher = f_target[-test_size:] 51 | score = accuracy_score(teacher, predicted) 52 | print("Baseline score is {}".format(score)) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | 58 | -------------------------------------------------------------------------------- /ml/trainer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.model_selection import train_test_split 3 | import chainer 4 | from chainer.functions.loss import softmax_cross_entropy 5 | from chainer.functions.evaluation import accuracy 6 | from ml.data_processor import DataProcessor 7 | 8 | 9 | class Trainer(): 10 | 11 | def __init__(self, model, resource): 12 | self.model = model 13 | self.resource = resource 14 | 15 | def train(self, data, target, batch_size=100, epoch=5, test_size=0.3, report_interval_epoch=1): 16 | dp = DataProcessor() 17 | dp.set_normalization_params(data) 18 | self.resource.save_normalization_params(dp.means, dp.stds) 19 | _data = dp.format_x(data) 20 | _target = dp.format_y(target) 21 | train_x, test_x, train_y, test_y = train_test_split(_data, _target, test_size=test_size) 22 | 23 | optimizer = chainer.optimizers.Adam() 24 | optimizer.use_cleargrads() 25 | optimizer.setup(self.model) 26 | loss = lambda pred, teacher: softmax_cross_entropy.softmax_cross_entropy(pred, teacher) 27 | for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size, epoch): 28 | predicted = self.model(x_batch) 29 | optimizer.update(loss, predicted, y_batch) 30 | if epoch_end: 31 | train_acc = accuracy.accuracy(predicted, y_batch) 32 | predicted_to_test = self.model(test_x) 33 | test_acc = accuracy.accuracy(predicted_to_test, test_y) 34 | print("train accuracy={}, test accuracy={}".format(train_acc.data, test_acc.data)) 35 | self.resource.save_model(self.model) 36 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | chainer>=2.0.0 2 | tornado>=4.5.1 3 | -------------------------------------------------------------------------------- /run_application.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tornado.ioloop 3 | import tornado.httpserver 4 | import tornado.escape 5 | from tornado.options import define, options 6 | from application.server import Application 7 | 8 | # Define command line arguments 9 | define("port", default=3000, help="run on the given port", type=int) 10 | 11 | 12 | def main(): 13 | # tornado.options.parse_command_line() 14 | http_server = tornado.httpserver.HTTPServer(Application()) 15 | port = int(os.environ.get("PORT", options.port)) 16 | print("server is running on port {0}".format(port)) 17 | http_server.listen(port) 18 | tornado.ioloop.IOLoop.current().start() 19 | 20 | if __name__ == "__main__": 21 | try: 22 | main() 23 | except Exception as ex: 24 | print(ex) 25 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from ml.model import NumberRecognizeNN 4 | from ml.data_processor import DataProcessor 5 | from ml.trainer import Trainer 6 | from ml.resource import Resource 7 | 8 | 9 | def train(data_file, batch_size, epoch, test_size): 10 | r = Resource() 11 | dp = DataProcessor() 12 | model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE) 13 | try: 14 | dp.means, dp.stds = r.load_normalization_params() 15 | r.load_model(model) 16 | print("load the model") 17 | except Exception as ex: 18 | print("trained model does not exist.") 19 | 20 | x = None 21 | y = None 22 | if data_file: 23 | x, y = r.load_data(data_file) 24 | else: 25 | x, y = r.load_training_data() 26 | 27 | trainer = Trainer(model, r) 28 | print("begin training") 29 | trainer.train(x, y, batch_size=batch_size, epoch=epoch, test_size=test_size) 30 | 31 | 32 | if __name__ == "__main__": 33 | parser = argparse.ArgumentParser(description="Train the Model") 34 | parser.add_argument("--data", help="training file", default="") 35 | parser.add_argument("--batch_size", help="batch size", default=100, type=int) 36 | parser.add_argument("--epoch", help="epoch size", default=5, type=int) 37 | parser.add_argument("--test_size", help="test_size", default=0.3, type=float) 38 | args = parser.parse_args() 39 | 40 | train(args.data, args.batch_size, args.epoch, args.test_size) 41 | --------------------------------------------------------------------------------