├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.json
├── application
├── __init__.py
├── server.py
├── static
│ ├── css
│ │ ├── base.css
│ │ └── index.css
│ └── js
│ │ ├── index.js
│ │ └── sketch.js
└── templates
│ ├── base.html
│ └── index.html
├── docs
├── architecture.PNG
└── top.PNG
├── ml
├── __init__.py
├── data_processor.py
├── model.py
├── model_api.py
├── resource.py
├── store
│ └── .gitkeep
├── tests
│ ├── __init__.py
│ ├── test_data_processor.py
│ ├── test_model.py
│ ├── test_model_api.py
│ ├── test_resource.py
│ └── test_trainer.py
└── trainer.py
├── requirements.txt
├── run_application.py
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 |
47 | # Translations
48 | *.mo
49 | *.pot
50 |
51 | # Django stuff:
52 | *.log
53 |
54 | # Sphinx documentation
55 | docs/_build/
56 |
57 | # PyBuilder
58 | target/
59 |
60 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm
61 |
62 | ## Directory-based project format
63 | .idea/
64 |
65 | ## File-based project format
66 | *.ipr
67 | *.iml
68 | *.iws
69 |
70 | # iPython notebook
71 | .ipynb_checkpoints
72 |
73 | .vscode
74 | data
75 | !data/.gitkeep
76 | ml/store/*
77 | !ml/store/.gitkeep
78 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3
2 |
3 | # For Slackbot Dockerfile
4 |
5 | ENTRYPOINT []
6 | CMD [ "/bin/bash" ]
7 |
8 | # Remove (large file sizes) MKL optimizations.
9 | RUN conda install -y nomkl
10 | RUN conda install -y numpy scipy scikit-learn cython
11 |
12 | ADD ./requirements.txt /tmp/requirements.txt
13 | RUN pip install -qr /tmp/requirements.txt
14 |
15 | ADD . /opt/ml_in_app
16 | WORKDIR /opt/ml_in_app
17 |
18 | CMD python run_application.py
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Takahiro Kubo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Machine Learning In Application
2 |
3 | Practical implemantation of Machine Learning in the Application.
4 |
5 | ## Architecture
6 |
7 | 
8 |
9 | * Model: Machine Learning Model
10 | * Trainer: Training the model. So training process (loss, optimizer) is separated from Model.
11 | * Model API: Interface between the Model and Application.
12 | * DataProcessor: Load the data and preprocess it. It is used in Trainer and ModelAPI.
13 | * Resource: It manages the parameters for Trainer, Model and DataProcessor.
14 |
15 | ## Demo Application
16 |
17 | 
18 |
19 | handwritten number recognizer by Chainer.
20 |
21 | You can deploy this application by docker.
22 |
23 | Please refer [this](https://devcenter.heroku.com/articles/container-registry-and-runtime) tutorial to deploy the application.
24 |
25 | * `heroku plugins:install heroku-container-registry`
26 | * `heroku container:login`
27 | * `git clone https://github.com/icoxfog417/machine_learning_in_application.git`
28 | * `heroku create`
29 | * `heroku container:push web`
30 | * `heroku open`
31 |
--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ML in Application",
3 | "description": "handwritten number recognizer by chainer",
4 | "repository": "https://github.com/icoxfog417/machine_learning_in_application",
5 | "keywords": ["Python", "tornado", "Chainer"],
6 | "env": {
7 | "SECRET_TOKEN": {
8 | "description": "A secret key for verifying the integrity of signed cookies.",
9 | "generator": "secret"
10 | }
11 | },
12 | "image": "registry.heroku.com/ml-in-app/web"
13 | }
14 |
--------------------------------------------------------------------------------
/application/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tie301837'
2 |
--------------------------------------------------------------------------------
/application/server.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tornado.web
3 | from ml.model_api import ModelAPI
4 | from ml.data_processor import DataProcessor
5 | from ml.resource import Resource
6 |
7 |
8 | DATA_PATH = os.path.join(os.path.dirname(__file__), "../data/feedbacks.txt")
9 |
10 |
11 | class IndexHandler(tornado.web.RequestHandler):
12 | def get(self):
13 | self.render("index.html", title="title")
14 |
15 |
16 | class PredictionHandler(tornado.web.RequestHandler):
17 |
18 | def post(self):
19 | resp = {"result": str(-1)}
20 | data = self.get_arguments("data[]")
21 |
22 | r = Resource()
23 | if not os.path.isdir(r.model_path):
24 | from ml.model import NumberRecognizeNN
25 | from ml.trainer import Trainer
26 | model = NumberRecognizeNN(r.INPUT_SIZE, r.OUTPUT_SIZE)
27 | trainer = Trainer(model, r)
28 | x, y = r.load_training_data()
29 | trainer.train(x, y)
30 | api = ModelAPI(r)
31 |
32 | if len(data) > 0:
33 | _data = [float(d) for d in data]
34 | predicted = api.predict(_data)
35 | resp["result"] = str(predicted[0])
36 |
37 | self.write(resp)
38 |
39 |
40 | class FeedbackHandler(tornado.web.RequestHandler):
41 |
42 | def post(self):
43 | data = self.get_arguments("data[]")
44 | if len(data) > 0:
45 | r = Resource()
46 | r.save_data(DATA_PATH, data)
47 | else:
48 | result = "feedback format is wrong."
49 |
50 | resp = {"result": ""}
51 | self.write(resp)
52 |
53 |
54 | class Application(tornado.web.Application):
55 |
56 | def __init__(self):
57 | handlers = [
58 | (r"/", IndexHandler),
59 | (r"/predict", PredictionHandler),
60 | (r"/feedback", FeedbackHandler),
61 | ]
62 |
63 | settings = dict(
64 | template_path=os.path.join(os.path.dirname(__file__), "templates"),
65 | static_path=os.path.join(os.path.dirname(__file__), "static"),
66 | cookie_secret=os.environ.get("SECRET_TOKEN", "__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__"),
67 | xsrf_cookies=True,
68 | debug=True,
69 | )
70 |
71 | super(Application, self).__init__(handlers, **settings)
72 |
--------------------------------------------------------------------------------
/application/static/css/base.css:
--------------------------------------------------------------------------------
1 | body {
2 | background: #ededed;
3 | }
4 |
5 | header h1 small:before {
6 | content: "|";
7 | margin: 0 0.5em;
8 | font-size: 1.6em;
9 | }
10 |
11 | footer {
12 | background: #ccc;
13 | }
14 |
15 |
--------------------------------------------------------------------------------
/application/static/css/index.css:
--------------------------------------------------------------------------------
1 | .canvas-area{
2 | background-color:lavender;
3 | border-radius:10px;
4 | padding: 10px 0px;
5 | }
6 | #canvas{
7 | border: 1px solid silver;
8 | background-color:white
9 | }
10 | .btn-submit{
11 | width: 82px;
12 | }
13 | .btn-clear{
14 | width: 82px;
15 | }
16 | .predicteds{
17 | background-color:whitesmoke;
18 | }
19 | .result {
20 | height: 120px;
21 | line-height: 120px;
22 | text-align: center;
23 | float:left;
24 | margin: 5px;
25 | border-radius: 10px;
26 | border: 1px solid lightsteelblue;
27 | }
28 | .result .answer {
29 | font-size: 100px;
30 | width: 120px;
31 | height: 120px;
32 | text-align: center;
33 | float:left;
34 | }
--------------------------------------------------------------------------------
/application/static/js/index.js:
--------------------------------------------------------------------------------
1 | var MAIN_ELEMENT = "#main"
2 | var CANVAS_ID = "canvas"
3 |
4 | var Prediction = (function () {
5 | function Prediction(image, sample) {
6 | this.image = image;
7 | this.sampleImage = sample[0];
8 | this.sampleData = sample[1];
9 | this.result = -1;
10 | }
11 |
12 | Prediction.prototype.envelop = function (data) {
13 | var getCookie = function(name){
14 | var r = document.cookie.match("\\b" + name + "=([^;]*)\\b");
15 | return r ? r[1] : undefined;
16 | }
17 | var envelope = {
18 | _xsrf: getCookie("_xsrf"),
19 | "data[]": data
20 | }
21 | return envelope;
22 | }
23 |
24 | Prediction.prototype.imageSrc = function () {
25 | return this.image.toDataURL();
26 | }
27 |
28 | Prediction.prototype.execute = function () {
29 | var self = this;
30 | var d = new $.Deferred;
31 | $.post("/predict", self.envelop(self.sampleData), function(prediction){
32 | self.result = prediction["result"];
33 | d.resolve(self)
34 | })
35 | return d.promise();
36 | };
37 |
38 | Prediction.prototype.feedback = function (value) {
39 | var self = this;
40 | var d = new $.Deferred;
41 | var feedback = [parseInt(value)];
42 | feedback = feedback.concat(self.sampleData);
43 | $.post("/feedback", self.envelop(feedback), function(feedbacked){
44 | if(feedbacked["result"] == ""){
45 | self.result = feedback[0];
46 | d.resolve();
47 | }else{
48 | d.reject(feedbacked["result"]);
49 | }
50 | })
51 | return d.promise();
52 | };
53 |
54 | return Prediction;
55 | })();
56 |
57 | Vue.config.delimiters = ["[[", "]]"];
58 | Vue.config.prefix = "data-v-";
59 | Vue.component("predict-item", {
60 | template: "#predict-item",
61 | methods: {
62 | beginEdit: function(){
63 | this.state.editing = true;
64 | },
65 | endEdit: function(){
66 | var state = this.state;
67 | if(state.value >= 0 && state.value < 10 && (state.value != this.result)){
68 | var original = this.result;
69 | this.$data.feedback(state.value).fail(function(msg){
70 | state.value = original;
71 | })
72 | }else{
73 | state.value = this.result;
74 | }
75 | state.editing = false;
76 | }
77 | }
78 | });
79 | var app = new Vue({
80 | el: MAIN_ELEMENT,
81 | data: {
82 | canvas: null,
83 | SNAP_SIZE: 120,
84 | SAMPLE_SIZE: 80,
85 | predicts: []
86 | },
87 | created: function(){
88 | this.canvas = new Canvas(CANVAS_ID, {
89 | strokeStyle: "black"
90 | });
91 | },
92 | methods:{
93 | clear: function(){
94 | this.canvas.clear();
95 | },
96 | injectState: function(p){
97 | p.state = {
98 | editing: false,
99 | value: p.result
100 | }
101 | },
102 | submit: function(){
103 | var self = this;
104 | var image = self.canvas.snapShot(self.SNAP_SIZE);
105 | var sample = self.canvas.toSample(self.SAMPLE_SIZE, self.SAMPLE_SIZE);
106 | var total = sample[1].reduce(function(a, b){ return a + b; });
107 | if(total == 0){
108 | return false;
109 | }
110 | var p = new Prediction(image, sample);
111 | p.execute().done(function(p){
112 | self.injectState(p);
113 | self.predicts.unshift(p);
114 | self.clear();
115 | })
116 | }
117 | }
118 | });
119 |
--------------------------------------------------------------------------------
/application/static/js/sketch.js:
--------------------------------------------------------------------------------
1 | var Dot = (function () {
2 | function Dot(x, y) {
3 | this.x = x;
4 | this.y = y;
5 | }
6 | return Dot;
7 | })();
8 |
9 | var Stroke = (function () {
10 | function Stroke() {
11 | this.dots = [];
12 | }
13 | Stroke.prototype.draw = function (dot) {
14 | this.dots.push(dot);
15 | };
16 | return Stroke;
17 | })();
18 |
19 | var Canvas = (function () {
20 | function Canvas(canvasId, pencil) {
21 | var self = this
22 | self.$canvas = $("#" + canvasId);
23 | self.drawing = false;
24 | self.strokes = [];
25 |
26 | self.pencil = {
27 | strokeStyle: "#df4b26",
28 | lineJoin: "round",
29 | lineWidth: 10
30 | };
31 |
32 | if(arguments.length > 1){
33 | for(var k in self.pencil){
34 | if(k in pencil){
35 | self.pencil[k] = pencil[k];
36 | }
37 | }
38 | }
39 |
40 | self.$canvas
41 | .on("mousedown", function(e){
42 | var d = self.getPosition(e);
43 | self.draw(d);
44 | })
45 | .on("mousemove", function(e){
46 | if(self.drawing){
47 | var d = self.getPosition(e);
48 | self.draw(d);
49 | }
50 | })
51 | .on("mouseup mouseleave", function(e){
52 | self.drawing = false;
53 | })
54 |
55 | }
56 |
57 | Canvas.prototype.getCanvas = function () {
58 | return this.$canvas.get(0);
59 | }
60 |
61 | Canvas.prototype.getContext = function () {
62 | return this.getCanvas().getContext("2d");
63 | }
64 |
65 | Canvas.prototype.getPosition = function (event) {
66 | var canvasOffset = this.$canvas.offset();
67 | var relX = event.pageX - canvasOffset.left;
68 | var relY = event.pageY - canvasOffset.top;
69 | return new Dot(relX, relY);
70 | }
71 |
72 | Canvas.prototype.draw = function (dot) {
73 | var stroking = null;
74 | if(!this.drawing){
75 | stroking = new Stroke();
76 | this.strokes.push(stroking);
77 | this.drawing = true;
78 | }else{
79 | stroking = this.strokes[this.strokes.length - 1];
80 | }
81 |
82 | if(stroking != null){
83 | stroking.draw(dot);
84 | this.flush();
85 | }
86 | };
87 |
88 | Canvas.prototype.flush = function(){
89 | var context = this.getContext();
90 | context.clearRect(0, 0, context.canvas.width, context.canvas.height);
91 |
92 | context.strokeStyle = this.pencil.strokeStyle;
93 | context.lineJoin = this.pencil.lineJoin;
94 | context.lineWidth = this.pencil.lineWidth;
95 |
96 | for(var i = 0; i < this.strokes.length; i++) {
97 | var s = this.strokes[i];
98 | var preDot = null;
99 | for(var j = 0; j < s.dots.length; j++){
100 | context.beginPath();
101 |
102 | var d = s.dots[j];
103 | if(preDot == null){
104 | context.moveTo(d.x, d.y);
105 | }else{
106 | context.moveTo(preDot.x, preDot.y);
107 | }
108 | context.lineTo(d.x, d.y);
109 | preDot = d;
110 |
111 | context.closePath();
112 | context.stroke();
113 | }
114 | }
115 | }
116 |
117 | Canvas.prototype.clear = function(){
118 | this.strokes = [];
119 | this.drawing = false;
120 |
121 | var context = this.getContext();
122 | context.clearRect(0, 0, context.canvas.width, context.canvas.height);
123 | }
124 |
125 | Canvas.prototype.snapShot = function(x, y){
126 | var snap = document.createElement("canvas");
127 |
128 | if(arguments.length < 1){
129 | snap.width = this.getCanvas().width;
130 | snap.height = this.getCanvas().height;
131 | var context = snap.getContext("2d");
132 | context.drawImage(this.getContext().canvas, 0, 0);
133 | }else if(arguments.length < 2){
134 | snap.width = snap.height = x;
135 | var context = snap.getContext("2d");
136 | context.drawImage(this.getContext().canvas, 0, 0, x, x);
137 | }else{
138 | snap.width = x;
139 | snap.height = y;
140 | var context = snap.getContext("2d");
141 | context.drawImage(this.getContext().canvas, 0, 0, x, y);
142 | }
143 |
144 | return snap;
145 | }
146 |
147 | Canvas.prototype.toSample = function(x, y){
148 | var sample = this.snapShot(x, y);
149 | var ctx = sample.getContext("2d");
150 |
151 | var src = ctx.getImageData(0, 0, x, y);
152 | var dst = ctx.createImageData(x, y);
153 | var data = [];
154 | for (var i = 0; i < src.data.length; i += 4) {
155 | var rgb = src.data[i] + src.data[i+1] + src.data[i+2];
156 | var sum = rgb + src.data[i+3];
157 | data.push(Math.sqrt(Math.min(sum,255)))
158 | dst.data[i] = dst.data[i+1] = dst.data[i+2] = rgb / 3;
159 | dst.data[i+3] = src.data[i+3];
160 | }
161 |
162 | ctx.putImageData(dst, 0, 0);
163 | return [sample, data]
164 |
165 | }
166 |
167 | return Canvas;
168 | })();
--------------------------------------------------------------------------------
/application/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Number Recognition
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | {% block head %}{% end %}
17 |
18 |
19 |
20 |
21 | Number Recognizerpowerd by Chainer
22 |
23 |
24 | {% block body %}{% end %}
25 |
26 |
27 | {% block bottom %}{% end %}
28 |
29 |
--------------------------------------------------------------------------------
/application/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block head %}
4 |
5 |
6 |
7 |
8 | {% end %}
9 |
10 | {% block bottom %}
11 |
12 | {% end %}
13 |
14 | {% block body %}
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
![]()
26 |
27 |
28 |
33 |
34 |
35 |
49 |
50 | {% module xsrf_form_html() %}
51 | {% end %}
--------------------------------------------------------------------------------
/docs/architecture.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/docs/architecture.PNG
--------------------------------------------------------------------------------
/docs/top.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/docs/top.PNG
--------------------------------------------------------------------------------
/ml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/ml/__init__.py
--------------------------------------------------------------------------------
/ml/data_processor.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class DataProcessor():
5 |
6 | def __init__(self, means=(), stds=()):
7 | self.means = means
8 | self.stds = stds
9 |
10 | def format_x(self, x, size=-1):
11 | _x = x
12 | if isinstance(x, (tuple, list)):
13 | _x = np.array([x])
14 |
15 | if size > 0 and _x.shape[1] != size:
16 | _x = self.adjust(x, size)
17 |
18 | _x = _x.astype(np.float32, copy=False)
19 |
20 | if len(self.means) > 0 and len(self.stds) > 0:
21 | return (_x - self.means) / self.stds
22 | else:
23 | return _x
24 |
25 | def adjust(self, x, size):
26 | def max_pooling(v):
27 | sqrt = lambda _x: int(np.ceil(np.sqrt(_x)))
28 | _target_square_size = sqrt(size)
29 | square_size = sqrt(len(v))
30 | conv_size = int(square_size // _target_square_size)
31 | image = np.reshape(v, (square_size, square_size))
32 | _pooled = []
33 | for i in range(size):
34 | row, col = int(i // _target_square_size * conv_size), int(i % _target_square_size * conv_size)
35 | mp = np.max(image[row:row + conv_size, col: col + conv_size])
36 | _pooled.append(mp)
37 | return np.array(_pooled)
38 |
39 | x = np.array([max_pooling(_v) for _v in x])
40 | return x
41 |
42 | def format_y(self, y):
43 | _y = y
44 | if isinstance(y , int):
45 | _y = np.array([y])
46 | _y = _y.astype(np.int32, copy=False)
47 | return _y
48 |
49 | def set_normalization_params(self, x):
50 | self.means = np.mean(x, axis=0, dtype=np.float32)
51 | self.stds = np.std(x, axis=0, dtype=np.float32)
52 | # simple trick to avoid 0 divide
53 | self.stds[self.stds < 1.0e-6] = np.max(x) - np.min(x)
54 | self.means[self.stds < 1.0e-6] = np.min(x)
55 |
56 | def batch_iter(self, X, y, batch_size, epoch=1):
57 | indices = np.array(range(len(y)))
58 | appendix = batch_size - len(y) % batch_size
59 | for e in range(epoch):
60 | np.random.shuffle(indices)
61 | batch_indices = np.concatenate([indices, indices[:appendix]])
62 | batch_count = len(batch_indices) // batch_size
63 | for b in range(batch_count):
64 | elements = batch_indices[b * batch_size:(b + 1) * batch_size]
65 | x_batch = X[elements]
66 | y_batch = y[elements]
67 | epoch_end = True if b == batch_count - 1 else False
68 | yield x_batch, y_batch, epoch_end
69 |
--------------------------------------------------------------------------------
/ml/model.py:
--------------------------------------------------------------------------------
1 | import chainer
2 | import chainer.functions as F
3 | import chainer.links as L
4 |
5 |
6 | class NumberRecognizeNN(chainer.Chain):
7 |
8 | def __init__(self, input_size, output_size, hidden_size=200, layer_size=3):
9 | self.input_size = input_size
10 | self.output_size = output_size
11 | self.hidden_size = hidden_size
12 | self.layer_size = layer_size
13 | super(NumberRecognizeNN, self).__init__(
14 | l1=L.Linear(self.input_size, hidden_size),
15 | l2=L.Linear(hidden_size, hidden_size),
16 | l3=L.Linear(hidden_size, self.output_size),
17 | )
18 |
19 | def __call__(self, x):
20 | h1 = F.relu(self.l1(x))
21 | h2 = F.relu(self.l2(h1))
22 | o = F.sigmoid(self.l3(h2))
23 | return o
24 |
--------------------------------------------------------------------------------
/ml/model_api.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ml.model import NumberRecognizeNN
3 | from ml.data_processor import DataProcessor
4 |
5 |
6 | class ModelAPI():
7 |
8 | def __init__(self, resource):
9 | self.resource = resource
10 | self.model = NumberRecognizeNN(resource.INPUT_SIZE, resource.OUTPUT_SIZE)
11 | resource.load_model(self.model)
12 |
13 | means, stds = resource.load_normalization_params()
14 | self.dp = DataProcessor(means, stds)
15 |
16 | def predict(self, data):
17 | _data = data
18 | if isinstance(data, (tuple, list)):
19 | _data = np.array([data], dtype=np.float32)
20 |
21 | f_data = self.dp.format_x(_data, size=self.resource.INPUT_SIZE)
22 | predicted = self.model(f_data)
23 | number = np.argmax(predicted.data, axis=1)
24 | return number
25 |
--------------------------------------------------------------------------------
/ml/resource.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | from datetime import datetime
4 | import numpy as np
5 | from chainer import serializers
6 | from ml.data_processor import DataProcessor
7 |
8 |
9 | class Resource():
10 | INPUT_SIZE = 64 # 8 x 8 image size
11 | OUTPUT_SIZE = 10 # 10 classification
12 |
13 | def __init__(self, root=""):
14 | self.root = root if root else os.path.join(os.path.dirname(__file__), "./store")
15 | self.model_path = os.path.join(self.root, "./model")
16 | self.param_file = os.path.join(self.root, "./params.json")
17 |
18 | def save_normalization_params(self, means, stds):
19 | to_list = lambda ls: ls if isinstance(ls, (tuple, list)) else ls.tolist()
20 | params = {
21 | "means": to_list(means),
22 | "stds": to_list(stds)
23 | }
24 | serialized = json.dumps(params)
25 | with open(self.param_file, "wb") as f:
26 | f.write(serialized.encode("utf-8"))
27 |
28 | def load_normalization_params(self):
29 | loaded = {}
30 | if not os.path.isfile(self.param_file):
31 | raise Exception("Normalization parameter file does not exist.")
32 |
33 | with open(self.param_file, "rb") as f:
34 | loaded = json.loads(f.read().decode("utf-8"))
35 |
36 | to_array = lambda x: np.array([float(_x) for _x in x], dtype=np.float32)
37 |
38 | return to_array(loaded["means"]), to_array(loaded["stds"])
39 |
40 | def load_training_data(self):
41 | from sklearn.datasets import load_digits
42 | # predifine set is from scikit-learn dataset
43 | # http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html
44 |
45 | digits = load_digits()
46 | x = digits.data
47 | y = digits.target
48 |
49 | return x, y
50 |
51 | def save_data(self, path, data):
52 | with open(path, "ab") as f:
53 | label = int(data[0])
54 | features = [float(d) for d in data[1:]]
55 | if len(features) > self.INPUT_SIZE:
56 | dp = DataProcessor()
57 | features = dp.adjust(np.array([features]), self.INPUT_SIZE).tolist()[0]
58 | elif len(features) < self.INPUT_SIZE:
59 | raise Exception("Size mismatch when saving the data.")
60 | line = "\t".join([str(e) for e in [label] + features]) + "\n"
61 | f.write(line.encode("utf-8"))
62 |
63 | def load_data(self, path):
64 | x = []
65 | y = []
66 | with open(path, mode="r", encoding="utf-8") as f:
67 | for line in f:
68 | line = line.strip()
69 | label, features = self.read_data(line)
70 | x.append(features)
71 | y.append(label)
72 | x = np.array(x, dtype=np.float32)
73 | y = np.array(y, dtype=np.float32)
74 | return x, y
75 |
76 | def read_data(self, line):
77 | elements = line.split("\t")
78 | label = int(elements[0])
79 | features = [float(e) for e in elements[1:]]
80 | return label, features
81 |
82 | def save_model(self, model):
83 | if not os.path.exists(self.model_path):
84 | os.mkdir(self.model_path)
85 | timestamp = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S")
86 | model_file = os.path.join(self.model_path, "./" + model.__class__.__name__.lower() + "_" + timestamp + ".model")
87 | serializers.save_npz(model_file, model)
88 |
89 | def load_model(self, model):
90 | if not os.path.exists(self.model_path):
91 | raise Exception("model file directory does not exist.")
92 |
93 | suffix = ".model"
94 | keyword = model.__class__.__name__.lower()
95 | candidates = []
96 | for f in os.listdir(self.model_path):
97 | if keyword in f and f.endswith(suffix):
98 | candidates.append(f)
99 | candidates.sort()
100 | latest = candidates[-1]
101 | #print("targets {}, pick up {}.".format(candidates, latest))
102 | model_file = os.path.join(self.model_path, latest)
103 | serializers.load_npz(model_file, model)
104 |
--------------------------------------------------------------------------------
/ml/store/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/ml/store/.gitkeep
--------------------------------------------------------------------------------
/ml/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/icoxfog417/machine_learning_in_application/c6004a43e646a85bd2ccba2249254c1bab9a7709/ml/tests/__init__.py
--------------------------------------------------------------------------------
/ml/tests/test_data_processor.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
3 | import math
4 | import unittest
5 | import numpy as np
6 | from ml.resource import Resource
7 | from ml.data_processor import DataProcessor
8 |
9 |
10 | class TestDataProcessor(unittest.TestCase):
11 |
12 | def test_format_x(self):
13 | means = np.array([0, 0.1, 0.2])
14 | stds = np.array([1, 1.5, 0.5])
15 | dp = DataProcessor(means=means, stds=stds)
16 | data = np.array([[1, 2, 3], [4, 5, 6]])
17 | x = dp.format_x(data)
18 | _x = (data - means) / stds
19 | for i in range(x.shape[0]):
20 | for j in range(x.shape[1]):
21 | self.assertEqual(x[i][j], _x[i][j])
22 |
23 | def test_format_x_resize(self):
24 | dp = DataProcessor()
25 | data = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]])
26 | x = dp.format_x(data, size=4)
27 | v = x[0].tolist()
28 | self.assertEqual(v[0], 6)
29 | self.assertEqual(v[1], 8)
30 | self.assertEqual(v[2], 14)
31 | self.assertEqual(v[3], 16)
32 |
33 | def test_batch_iter(self):
34 | batch_size = 10
35 | dp = DataProcessor()
36 | r = Resource()
37 | train_x, train_y = r.load_training_data()
38 | batch_count = math.ceil(len(train_y) / batch_size)
39 |
40 | i = 0
41 | for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size):
42 | self.assertEqual(batch_size, len(x_batch))
43 | self.assertEqual(batch_size, len(y_batch))
44 | if i < batch_count - 1:
45 | self.assertFalse(epoch_end)
46 | else:
47 | self.assertTrue(epoch_end)
48 | i += 1
49 | self.assertEqual(i, batch_count)
50 |
51 |
52 | if __name__ == "__main__":
53 | unittest.main()
54 |
55 |
--------------------------------------------------------------------------------
/ml/tests/test_model.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
3 | import unittest
4 | import numpy as np
5 | from ml.model import NumberRecognizeNN
6 |
7 |
8 |
9 | class TestModel(unittest.TestCase):
10 |
11 | def test_forward(self):
12 | input_size = 100
13 | output_size = 10
14 | data_length = 50
15 | test_data = self.create_test_data(input_size, data_length)
16 |
17 | model = NumberRecognizeNN(input_size, output_size)
18 | output = model(test_data)
19 | self.assertEqual((data_length, output_size), output.data.shape)
20 |
21 | def create_test_data(self, input_size, length):
22 | input = np.random.rand(length, input_size).astype(np.float32)
23 | return input
24 |
25 |
26 | if __name__ == "__main__":
27 | unittest.main()
28 |
--------------------------------------------------------------------------------
/ml/tests/test_model_api.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
3 | import unittest
4 | import shutil
5 | from sklearn.metrics import accuracy_score
6 | from ml.model import NumberRecognizeNN
7 | from ml.model_api import ModelAPI
8 | from ml.trainer import Trainer
9 | from ml.data_processor import DataProcessor
10 | from ml.resource import Resource
11 |
12 |
13 | class TestModelAPI(unittest.TestCase):
14 | TEST_DIR = ""
15 |
16 | @classmethod
17 | def setUpClass(cls):
18 | path = os.path.join(os.path.dirname(__file__), "./test_model_api")
19 | if not os.path.isdir(path):
20 | os.mkdir(path)
21 | cls.TEST_DIR = path
22 |
23 | @classmethod
24 | def tearDownClass(cls):
25 | if os.path.isdir(cls.TEST_DIR):
26 | shutil.rmtree(cls.TEST_DIR)
27 |
28 | def test_model_api(self):
29 | model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE)
30 | r = Resource(self.TEST_DIR)
31 | trainer = Trainer(model, r)
32 | dp = DataProcessor()
33 | data, target = r.load_training_data()
34 | api_test_size = 200
35 |
36 | print("Train the model for API Test.")
37 | trainer.train(data[:-api_test_size], target[:-api_test_size], epoch=5)
38 |
39 | model_api = ModelAPI(r)
40 | predicted = model_api.predict(data[-api_test_size:])
41 | teacher = target[-api_test_size:]
42 | score = accuracy_score(teacher, predicted)
43 | print("Model API score is {}".format(score))
44 |
45 |
46 | if __name__ == "__main__":
47 | unittest.main()
48 |
--------------------------------------------------------------------------------
/ml/tests/test_resource.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
3 | import unittest
4 | import shutil
5 | import time
6 | from ml.resource import Resource
7 | from ml.model import NumberRecognizeNN
8 |
9 |
10 | class TestResource(unittest.TestCase):
11 | TEST_DIR = ""
12 |
13 | @classmethod
14 | def setUpClass(cls):
15 | path = os.path.join(os.path.dirname(__file__), "./test_resource")
16 | if not os.path.isdir(path):
17 | os.mkdir(path)
18 | cls.TEST_DIR = path
19 |
20 | @classmethod
21 | def tearDownClass(cls):
22 | if os.path.isdir(cls.TEST_DIR):
23 | shutil.rmtree(cls.TEST_DIR)
24 |
25 | def test_normalization_parameter(self):
26 | means = (0.0, 1.0, 0.2)
27 | stds = (0.5, 0.2, 3.0)
28 | r = Resource(self.TEST_DIR)
29 | r.save_normalization_params(means, stds)
30 | self.assertTrue(os.path.isfile(r.param_file))
31 | loaded_means, loaded_stds = r.load_normalization_params()
32 | for i in range(len(means)):
33 | self.assertTrue(means[i] - loaded_means[i] < 1e-10)
34 | self.assertTrue(stds[i] - loaded_stds[i] < 1e-10)
35 |
36 | def test_save_data(self):
37 | r = Resource(self.TEST_DIR)
38 | data_file = self.TEST_DIR + "/data_file.txt"
39 | data1 = ["0"] + ["0"] * 6400 # label + feature
40 | data2 = ["9"] + ["1"] * 6400 # label + feature
41 | r.save_data(data_file, data1)
42 | r.save_data(data_file, data2)
43 |
44 | x, y = r.load_data(data_file)
45 | self.assertEqual(2, len(x))
46 | self.assertEqual(2, len(y))
47 | self.assertEqual(0, y[0])
48 | self.assertEqual(9, y[1])
49 | self.assertEqual(0, x[0][0])
50 | self.assertEqual(1, x[1][0])
51 |
52 | def test_model(self):
53 | model = NumberRecognizeNN(10, 10)
54 | r = Resource(self.TEST_DIR)
55 | r.save_model(model)
56 | time.sleep(1)
57 | r.save_model(model)
58 | r.load_model(model)
59 |
60 |
61 | if __name__ == "__main__":
62 | unittest.main()
63 |
--------------------------------------------------------------------------------
/ml/tests/test_trainer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
3 | import unittest
4 | import shutil
5 | import numpy as np
6 | from ml.model import NumberRecognizeNN
7 | from ml.data_processor import DataProcessor
8 | from ml.trainer import Trainer
9 | from ml.resource import Resource
10 |
11 |
12 | class TestTrainer(unittest.TestCase):
13 | TEST_DIR = ""
14 |
15 | @classmethod
16 | def setUpClass(cls):
17 | path = os.path.join(os.path.dirname(__file__), "./test_trainer")
18 | if not os.path.isdir(path):
19 | os.mkdir(path)
20 | cls.TEST_DIR = path
21 |
22 | @classmethod
23 | def tearDownClass(cls):
24 | if os.path.isdir(cls.TEST_DIR):
25 | shutil.rmtree(cls.TEST_DIR)
26 |
27 | def test_train(self):
28 | model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE)
29 | r = Resource(self.TEST_DIR)
30 | trainer = Trainer(model, r)
31 | dp = DataProcessor()
32 | data, target = r.load_training_data()
33 | print("Test Train the model")
34 | trainer.train(data, target, epoch=5)
35 |
36 | def test_baseline(self):
37 | from sklearn.svm import SVC
38 | from sklearn.metrics import accuracy_score
39 | r = Resource(self.TEST_DIR)
40 | dp = DataProcessor()
41 | data, target = r.load_training_data()
42 | dp.set_normalization_params(data)
43 | f_data, f_target = dp.format_x(data), dp.format_y(target)
44 |
45 | test_size = 200
46 | model = SVC()
47 | model.fit(f_data[:-test_size], f_target[:-test_size])
48 |
49 | predicted = model.predict(f_data[-test_size:])
50 | teacher = f_target[-test_size:]
51 | score = accuracy_score(teacher, predicted)
52 | print("Baseline score is {}".format(score))
53 |
54 |
55 | if __name__ == "__main__":
56 | unittest.main()
57 |
58 |
--------------------------------------------------------------------------------
/ml/trainer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.model_selection import train_test_split
3 | import chainer
4 | from chainer.functions.loss import softmax_cross_entropy
5 | from chainer.functions.evaluation import accuracy
6 | from ml.data_processor import DataProcessor
7 |
8 |
9 | class Trainer():
10 |
11 | def __init__(self, model, resource):
12 | self.model = model
13 | self.resource = resource
14 |
15 | def train(self, data, target, batch_size=100, epoch=5, test_size=0.3, report_interval_epoch=1):
16 | dp = DataProcessor()
17 | dp.set_normalization_params(data)
18 | self.resource.save_normalization_params(dp.means, dp.stds)
19 | _data = dp.format_x(data)
20 | _target = dp.format_y(target)
21 | train_x, test_x, train_y, test_y = train_test_split(_data, _target, test_size=test_size)
22 |
23 | optimizer = chainer.optimizers.Adam()
24 | optimizer.use_cleargrads()
25 | optimizer.setup(self.model)
26 | loss = lambda pred, teacher: softmax_cross_entropy.softmax_cross_entropy(pred, teacher)
27 | for x_batch, y_batch, epoch_end in dp.batch_iter(train_x, train_y, batch_size, epoch):
28 | predicted = self.model(x_batch)
29 | optimizer.update(loss, predicted, y_batch)
30 | if epoch_end:
31 | train_acc = accuracy.accuracy(predicted, y_batch)
32 | predicted_to_test = self.model(test_x)
33 | test_acc = accuracy.accuracy(predicted_to_test, test_y)
34 | print("train accuracy={}, test accuracy={}".format(train_acc.data, test_acc.data))
35 | self.resource.save_model(self.model)
36 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | chainer>=2.0.0
2 | tornado>=4.5.1
3 |
--------------------------------------------------------------------------------
/run_application.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tornado.ioloop
3 | import tornado.httpserver
4 | import tornado.escape
5 | from tornado.options import define, options
6 | from application.server import Application
7 |
8 | # Define command line arguments
9 | define("port", default=3000, help="run on the given port", type=int)
10 |
11 |
12 | def main():
13 | # tornado.options.parse_command_line()
14 | http_server = tornado.httpserver.HTTPServer(Application())
15 | port = int(os.environ.get("PORT", options.port))
16 | print("server is running on port {0}".format(port))
17 | http_server.listen(port)
18 | tornado.ioloop.IOLoop.current().start()
19 |
20 | if __name__ == "__main__":
21 | try:
22 | main()
23 | except Exception as ex:
24 | print(ex)
25 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | from ml.model import NumberRecognizeNN
4 | from ml.data_processor import DataProcessor
5 | from ml.trainer import Trainer
6 | from ml.resource import Resource
7 |
8 |
9 | def train(data_file, batch_size, epoch, test_size):
10 | r = Resource()
11 | dp = DataProcessor()
12 | model = NumberRecognizeNN(Resource.INPUT_SIZE, Resource.OUTPUT_SIZE)
13 | try:
14 | dp.means, dp.stds = r.load_normalization_params()
15 | r.load_model(model)
16 | print("load the model")
17 | except Exception as ex:
18 | print("trained model does not exist.")
19 |
20 | x = None
21 | y = None
22 | if data_file:
23 | x, y = r.load_data(data_file)
24 | else:
25 | x, y = r.load_training_data()
26 |
27 | trainer = Trainer(model, r)
28 | print("begin training")
29 | trainer.train(x, y, batch_size=batch_size, epoch=epoch, test_size=test_size)
30 |
31 |
32 | if __name__ == "__main__":
33 | parser = argparse.ArgumentParser(description="Train the Model")
34 | parser.add_argument("--data", help="training file", default="")
35 | parser.add_argument("--batch_size", help="batch size", default=100, type=int)
36 | parser.add_argument("--epoch", help="epoch size", default=5, type=int)
37 | parser.add_argument("--test_size", help="test_size", default=0.3, type=float)
38 | args = parser.parse_args()
39 |
40 | train(args.data, args.batch_size, args.epoch, args.test_size)
41 |
--------------------------------------------------------------------------------