├── examples
    ├── exp_1.py
    └── exp_2.py
├── LICENSE
├── README.md
├── .gitignore
├── mnist.py
├── minimum.py
└── chainer.py


/examples/exp_1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | class Variable(object):
 6 | 
 7 |     def __init__(self, data):
 8 |         self.data = data
 9 |         self.creator = None
10 | 
11 |     def set_creator(self, gen_func):
12 |         self.creator = gen_func
13 | 
14 | 
15 | class Function(object):
16 | 
17 |     def __call__(self, in_var):
18 |         in_data = in_var.data
19 |         output = self.forward(in_data)
20 |         ret = Variable(output)
21 |         ret.set_creator(self)
22 |         self.input = in_var
23 |         self.output = ret
24 |         return ret
25 | 
26 |     def forward(self, in_data):
27 |         return in_data
28 | 
29 | data = [0, 1, 2, 3]
30 | x = Variable(data)
31 | 
32 | f_1 = Function()
33 | y_1 = f_1(x)
34 | f_2 = Function()
35 | y_2 = f_2(y_1)
36 | 
37 | print(y_2.data)
38 | print(y_2.creator)                           # => f_2
39 | print(y_2.creator.input)                     # => y_1
40 | print(y_2.creator.input.creator)             # => f_1
41 | print(y_2.creator.input.creator.input)       # => x
42 | print(y_2.creator.input.creator.input.data)  # => data
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Shunta Saito
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 1f-Chainer
 2 | 
 3 | One file Chainer
 4 | 
 5 | ## About One-file Chainer
 6 | 
 7 | - Core functionalities of Chainer are implemented in 1 file
 8 | - Only Linear, ReLU, MeanSquaredError and SGD are available
 9 | - You can run "mnist.py" to make sure that it can train a 3-layers perceptron for classifying MNIST
10 | - 1 hot feature:
11 | 
12 |   - You can switch all computations to GPU-mode by just replacing `import numpy as xp` with `import cupy as xp` found in the top of the file "chainer.py"
13 | 
14 | 
15 | ## MNIST Training example
16 | 
17 | ### Run on CPU
18 | 
19 | - Just run `mnist.py`
20 | 
21 | ```
22 | python mnist.py
23 | ```
24 | 
25 | ### Run on GPU
26 | 
27 | - Replace `import numpy as xp` with `import cupy as xp` in `chainer.py`
28 | - Then, run `mnist.py` with an option `--gpu 0`
29 | 
30 | ```
31 | python mnist.py --gpu 0
32 | ```
33 | 
34 | ## minimum.py?
35 | 
36 | "minimum.py" in this repository is another implementation example of 3-layers perceptron for MNIST. It contains
37 | 
38 | - Linear
39 | - ReLU
40 | - Softmax cross entropy (grad)
41 | - Training code for MNIST
42 | - Accuracy calculation
43 | 
44 | in **just 74 lines**. ("chainer.py" have 205 lines)
45 | 
46 | Try:
47 | 
48 | ```
49 | python minimum.py
50 | ```
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | mldata
 3 | loss.png
 4 | .sync-config.cson
 5 | 
 6 | # Byte-compiled / optimized / DLL files
 7 | __pycache__/
 8 | *.py[cod]
 9 | *$py.class
10 | 
11 | # C extensions
12 | *.so
13 | 
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | 
32 | # PyInstaller
33 | #  Usually these files are written by a python script from a template
34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 | 
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 | 
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .hypothesis/
52 | 
53 | # Translations
54 | *.mo
55 | *.pot
56 | 
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | 
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 | 
65 | # Scrapy stuff:
66 | .scrapy
67 | 
68 | # Sphinx documentation
69 | docs/_build/
70 | 
71 | # PyBuilder
72 | target/
73 | 
74 | # IPython Notebook
75 | .ipynb_checkpoints
76 | 
77 | # pyenv
78 | .python-version
79 | 
80 | # celery beat schedule file
81 | celerybeat-schedule
82 | 
83 | # dotenv
84 | .env
85 | 
86 | # virtualenv
87 | venv/
88 | ENV/
89 | 
90 | # Spyder project settings
91 | .spyderproject
92 | 
93 | # Rope project settings
94 | .ropeproject
95 | 


--------------------------------------------------------------------------------
/mnist.py:
--------------------------------------------------------------------------------
 1 | from chainer import *
 2 | from sklearn.datasets import fetch_mldata
 3 | 
 4 | import argparse
 5 | import cupy as xp
 6 | import numpy as np
 7 | import sys
 8 | 
 9 | if 'linux' in sys.platform:
10 |     import matplotlib
11 |     matplotlib.use('Agg')
12 | 
13 | import matplotlib.pyplot as plt
14 | 
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('--gpu', type=int, default=-1)
17 | args = parser.parse_args()
18 | 
19 | if __name__ == '__main__':
20 |     mnist, t, bsize = fetch_mldata('MNIST original', data_home='.'), 60000, 32
21 |     td, tl = mnist.data[:t].astype(np.float32) / 255.0, mnist.target[:t]
22 |     tl = np.array([tl == i for i in range(10)]).T.astype(np.float32)
23 |     perm = np.random.permutation(len(td))
24 |     td, tl = td[perm], tl[perm]
25 |     if args.gpu >= 0:
26 |         td, tl = xp.asarray(td), xp.asarray(tl)
27 | 
28 |     model = Chain(
29 |         l1=Linear(784, 100),
30 |         l2=Linear(100, 100),
31 |         l3=Linear(100, 10)
32 |     )
33 | 
34 |     def forward(x):
35 |         h = model.l1(x)
36 |         h = relu(h)
37 |         h = model.l2(h)
38 |         h = relu(h)
39 |         h = model.l3(h)
40 |         return h
41 | 
42 |     opt = SGD(lr=0.1)
43 |     opt.setup(model)
44 | 
45 |     n_iter, losses = 0, []
46 |     for epoch in range(1):
47 |         for i in range(0, len(td), bsize):
48 |             x = Variable(td[i:i + bsize])
49 |             t = Variable(tl[i:i + bsize])
50 |             # forward
51 |             y = forward(x)
52 |             # backward
53 |             loss = mean_squared_error(y, t)
54 |             model.zerograds()
55 |             loss.backward()
56 |             opt.update()
57 |             n_iter += 1
58 |             losses.append(loss.data)
59 | 
60 |     plt.plot(losses)
61 |     plt.savefig('loss.png')
62 | 


--------------------------------------------------------------------------------
/examples/exp_2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import numpy as xp
 5 | 
 6 | 
 7 | class Variable(object):
 8 | 
 9 |     def __init__(self, data):
10 |         self.data = data
11 |         self.creator = None
12 |         self.grad = 1
13 | 
14 |     def set_creator(self, gen_func):
15 |         self.creator = gen_func
16 | 
17 |     def backward(self):
18 |         if self.creator is None:  # input data
19 |             return
20 |         func = self.creator
21 |         while func:
22 |             gy = func.output.grad
23 |             func.input.grad = func.backward(gy)
24 |             func = func.input.creator
25 | 
26 | 
27 | class Function(object):
28 | 
29 |     def __call__(self, in_var):
30 |         in_data = in_var.data
31 |         output = self.forward(in_data)
32 |         ret = Variable(output)
33 |         ret.set_creator(self)
34 |         self.input = in_var
35 |         self.output = ret
36 |         return ret
37 | 
38 |     def forward(self, in_data):
39 |         NotImplementedError()
40 | 
41 |     def backward(self, grad_output):
42 |         NotImplementedError()
43 | 
44 | 
45 | class Mul(Function):
46 | 
47 |     def __init__(self, init_w):
48 |         self.w = init_w  # Initialize the parameter
49 | 
50 |     def forward(self, in_var):
51 |         return in_var * self.w
52 | 
53 |     def backward(self, grad_output):
54 |         gx = self.w * grad_output
55 |         self.gw = self.input
56 |         return gx
57 | 
58 | data = xp.array([0, 1, 2, 3])
59 | 
60 | f1 = Mul(2)
61 | f2 = Mul(3)
62 | f3 = Mul(4)
63 | 
64 | y0 = Variable(data)
65 | y1 = f1(y0)          # y1 = y0 * 2
66 | y2 = f2(y1)          # y2 = y1 * 3
67 | y3 = f3(y2)          # y3 = y2 * 4
68 | 
69 | print(y0.data)
70 | print(y1.data)
71 | print(y2.data)
72 | print(y3.data)
73 | 
74 | y3.backward()
75 | 
76 | print(y3.grad)  # df3 / dy3 = 1
77 | print(y2.grad)  # df3 / dy2 = (df3 / dy3) * (dy3 / dy2) = 1 * 4
78 | print(y1.grad)  # df3 / dy1 = (df3 / dy3) * (dy3 / dy2) * (dy2 / dy1) = 1 * 4 * 3
79 | print(y0.grad)  # df3 / dy0 = (df3 / dy3) * (dy3 / dy2) * (dy2 / dy1) * (dy1 / dy0) = 1 * 4 * 3 * 2
80 | 
81 | print(f3.gw.data)
82 | print(f2.gw.data)
83 | print(f1.gw.data)
84 | 


--------------------------------------------------------------------------------
/minimum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2016 Shunta Saito
 5 | 
 6 | from sklearn.datasets import fetch_mldata
 7 | import numpy
 8 | numpy.random.seed(1988)
 9 | 
10 | class Linear(object):
11 | 
12 |     def __init__(self, in_sz, out_sz):
13 |         self.W = numpy.random.randn(out_sz, in_sz) * numpy.sqrt(2. / in_sz)
14 |         self.b = numpy.zeros((out_sz,))
15 | 
16 |     def __call__(self, x):
17 |         self.x = x
18 |         return x.dot(self.W.T) + self.b
19 | 
20 |     def update(self, gy, lr):
21 |         self.W -= lr * gy.T.dot(self.x)
22 |         self.b -= lr * gy.sum(axis=0)
23 |         return gy.dot(self.W)
24 | 
25 | class ReLU(object):
26 | 
27 |     def __call__(self, x):
28 |         self.x = x
29 |         return numpy.maximum(0, x)
30 | 
31 |     def update(self, gy, lr):
32 |         return gy * (self.x > 0)
33 | 
34 | model = [
35 |     Linear(784, 100),
36 |     ReLU(),
37 |     Linear(100, 100),
38 |     ReLU(),
39 |     Linear(100, 10)
40 | ]
41 | 
42 | def forward(model, x):
43 |     for layer in model:
44 |         x = layer(x)
45 |     return x
46 | 
47 | def update(model, gy, lr=0.0001):
48 |     for layer in reversed(model):
49 |         gy = layer.update(gy, lr)
50 | 
51 | def softmax_cross_entropy_gy(y, t):
52 |     return (numpy.exp(y.T) / numpy.exp(y.T).sum(axis=0)).T - t
53 | 
54 | def accuracy(y, t):
55 |     n_correct = numpy.sum(t[numpy.arange(len(t)), y.argmax(axis=1)])
56 |     return n_correct / float(len(t))
57 | 
58 | mnist = fetch_mldata('MNIST original', data_home='.')
59 | td, tl = mnist.data[:60000] / 255.0, mnist.target[:60000]
60 | tl = numpy.array([tl == i for i in range(10)]).T.astype(numpy.int)
61 | perm = numpy.random.permutation(len(td))
62 | td, tl = td[perm], tl[perm]
63 | 
64 | for epoch in range(30):
65 |     for i in range(0, len(td), 128):
66 |         x, t = td[i:i + 128], tl[i:i + 128]
67 |         y = forward(model, x)
68 |         gy = softmax_cross_entropy_gy(y, t)
69 |         update(model, gy)
70 | 
71 | vd, vl = mnist.data[60000:] / 255.0, mnist.target[60000:]
72 | vl = numpy.array([vl == i for i in range(10)]).T.astype(numpy.int)
73 | y = forward(model, vd)
74 | print(accuracy(y, vl))
75 | 


--------------------------------------------------------------------------------
/chainer.py:
--------------------------------------------------------------------------------
  1 | import numpy as xp
  2 | import heapq
  3 | 
  4 | class Variable(object):
  5 | 
  6 |     def __init__(self, data, grad=None, name=None):
  7 |         self.data = data
  8 |         self.rank = 0
  9 |         self.grad = grad
 10 |         self.creator = None
 11 |         self.name = name
 12 | 
 13 |     def set_creator(self, gen_func):
 14 |         self.creator = gen_func
 15 |         self.rank = gen_func.rank + 1
 16 | 
 17 |     def backward(self):
 18 |         if self.creator is None:
 19 |             return
 20 |         if self.data.size == 1 and self.grad is None:  # Loss variable
 21 |             self.grad = xp.ones_like(self.data)
 22 | 
 23 |         cand_funcs = []
 24 |         seen_set = set()
 25 |         seen_vars = set()
 26 |         need_copy = set()
 27 | 
 28 |         def add_cand(cand):
 29 |             if cand not in seen_set:
 30 |                 heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
 31 |                 seen_set.add(cand)
 32 | 
 33 |         add_cand(self.creator)
 34 | 
 35 |         while cand_funcs:
 36 |             _, _, func = heapq.heappop(cand_funcs)
 37 |             in_data = [x.data for x in func.inputs]
 38 |             out_grad = [y.grad for y in func.outputs]
 39 |             gxs = func.backward(in_data, out_grad)
 40 |             for x, gx in zip(func.inputs, gxs):
 41 |                 if gx is None:
 42 |                     continue
 43 |                 id_x = id(x)
 44 |                 if x.creator is None:  # leaf
 45 |                     if x.grad is None:
 46 |                         x.grad = gx
 47 |                         need_copy.add(id_x)
 48 |                     elif id_x in need_copy:
 49 |                         x.grad = x.grad + gx
 50 |                         need_copy.remove(id_x)
 51 |                     else:
 52 |                         x.grad += gx
 53 |                 else:  # not a leaf
 54 |                     add_cand(x.creator)
 55 |                     if id_x not in seen_vars:
 56 |                         x.grad = gx
 57 |                         seen_vars.add(id_x)
 58 |                         need_copy.add(id_x)
 59 |                     elif id_x in need_copy:
 60 |                         x.grad = gx + x.grad
 61 |                         need_copy.remove(id_x)
 62 |                     else:
 63 |                         x.grad += gx
 64 | 
 65 |     def zerograd(self):
 66 |         self.grad.fill(0)
 67 | 
 68 | class Function(object):
 69 | 
 70 |     def __call__(self, *inputs):
 71 |         in_data = [x.data for x in inputs]
 72 |         outputs = self.forward(in_data)
 73 |         ret = [Variable(y) for y in outputs]
 74 |         self.rank = max([x.rank for x in inputs])
 75 |         for y in ret:
 76 |             y.set_creator(self)
 77 |         self.inputs = inputs
 78 |         self.outputs = ret
 79 |         return ret if len(ret) > 1 else ret[0]
 80 | 
 81 |     def forward(self, inputs):
 82 |         NotImplementedError()
 83 | 
 84 |     def backward(self, inputs, grad_outputs):
 85 |         NotImplementedError()
 86 | 
 87 | class Link(object):
 88 | 
 89 |     def __init__(self, **params):
 90 |         for name, value in params.items():
 91 |             grad = xp.full_like(value, 0)
 92 |             var = Variable(value, grad, name)
 93 |             self.__dict__[name] = var
 94 | 
 95 |     def params(self):
 96 |         for param in self.__dict__.values():
 97 |             yield param
 98 | 
 99 |     def namedparams(self):
100 |         for name, param in self.__dict__.items():
101 |             yield '/' + name, param
102 | 
103 |     def zerograds(self):
104 |         for param in self.params():
105 |             param.zerograd()
106 | 
107 | class Chain(Link):
108 | 
109 |     def __init__(self, **links):
110 |         super(Chain, self).__init__()
111 |         self.children = []
112 |         for name, link in links.items():
113 |             self.children.append(name)
114 |             self.__dict__[name] = link
115 | 
116 |     def params(self):
117 |         for name in self.children:
118 |             for param in self.__dict__[name].params():
119 |                 yield param
120 | 
121 |     def namedparams(self):
122 |         for name in self.children:
123 |             prefix = '/' + name
124 |             for path, param in self.__dict__[name].namedparams():
125 |                 yield prefix + path, param
126 | 
127 | class Linear(Link):
128 | 
129 |     def __init__(self, in_size, out_size):
130 |         n = xp.random.normal
131 |         scale = xp.sqrt(2. / in_size)
132 |         W = n(loc=0.0, scale=scale, size=(out_size, in_size))
133 |         b = n(loc=0.0, scale=scale, size=(out_size,))
134 |         super(Linear, self).__init__(
135 |             W=W.astype(xp.float32), b=b.astype(xp.float32))
136 | 
137 |     def __call__(self, x):
138 |         return LinearFunction()(x, self.W, self.b)
139 | 
140 | class LinearFunction(Function):
141 | 
142 |     def forward(self, inputs):
143 |         x, W, b = inputs
144 |         return x.dot(W.T) + b,
145 | 
146 |     def backward(self, inputs, grad_outputs):
147 |         x, W, b = inputs
148 |         gy = grad_outputs[0]
149 |         gx = gy.dot(W).reshape(x.shape)
150 |         gW = gy.T.dot(x)
151 |         gb = gy.sum(0)
152 |         return gx, gW, gb
153 | 
154 | class ReLU(Function):
155 | 
156 |     def forward(self, inputs):
157 |         return xp.maximum(inputs[0], 0),
158 | 
159 |     def backward(self, inputs, grad_outputs):
160 |         return grad_outputs[0] * (inputs[0] > 0),
161 | 
162 | def relu(x):
163 |     return ReLU()(x)
164 | 
165 | class MeanSquaredError(Function):
166 | 
167 |     def forward(self, inputs):
168 |         x0, x1 = inputs
169 |         self.diff = x0 - x1
170 |         diff = self.diff.ravel()
171 |         return diff.dot(diff) / diff.size,
172 | 
173 |     def backward(self, inputs, grad_outputs):
174 |         gy = grad_outputs[0]
175 |         coeff = gy * (2. / self.diff.size)
176 |         gx0 = coeff * self.diff
177 |         return gx0, -gx0
178 | 
179 | def mean_squared_error(x0, x1):
180 |     return MeanSquaredError()(x0, x1)
181 | 
182 | class Optimizer(object):
183 | 
184 |     def setup(self, link):
185 |         self.target = link
186 |         self.states = {}
187 |         self.prepare()
188 | 
189 |     def prepare(self):
190 |         for name, param in self.target.namedparams():
191 |             if name not in self.states:
192 |                 self.states[name] = {}
193 | 
194 |     def update(self):
195 |         self.prepare()
196 |         for name, param in self.target.namedparams():
197 |             self.update_one(param, self.states[name])
198 | 
199 | class SGD(Optimizer):
200 | 
201 |     def __init__(self, lr=0.01):
202 |         self.lr = lr
203 | 
204 |     def update_one(self, param, state):
205 |         param.data -= self.lr * param.grad
206 | 


--------------------------------------------------------------------------------