├── examples ├── exp_1.py └── exp_2.py ├── LICENSE ├── README.md ├── .gitignore ├── mnist.py ├── minimum.py └── chainer.py /examples/exp_1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class Variable(object): 6 | 7 | def __init__(self, data): 8 | self.data = data 9 | self.creator = None 10 | 11 | def set_creator(self, gen_func): 12 | self.creator = gen_func 13 | 14 | 15 | class Function(object): 16 | 17 | def __call__(self, in_var): 18 | in_data = in_var.data 19 | output = self.forward(in_data) 20 | ret = Variable(output) 21 | ret.set_creator(self) 22 | self.input = in_var 23 | self.output = ret 24 | return ret 25 | 26 | def forward(self, in_data): 27 | return in_data 28 | 29 | data = [0, 1, 2, 3] 30 | x = Variable(data) 31 | 32 | f_1 = Function() 33 | y_1 = f_1(x) 34 | f_2 = Function() 35 | y_2 = f_2(y_1) 36 | 37 | print(y_2.data) 38 | print(y_2.creator) # => f_2 39 | print(y_2.creator.input) # => y_1 40 | print(y_2.creator.input.creator) # => f_1 41 | print(y_2.creator.input.creator.input) # => x 42 | print(y_2.creator.input.creator.input.data) # => data 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Shunta Saito 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 1f-Chainer 2 | 3 | One file Chainer 4 | 5 | ## About One-file Chainer 6 | 7 | - Core functionalities of Chainer are implemented in 1 file 8 | - Only Linear, ReLU, MeanSquaredError and SGD are available 9 | - You can run "mnist.py" to make sure that it can train a 3-layers perceptron for classifying MNIST 10 | - 1 hot feature: 11 | 12 | - You can switch all computations to GPU-mode by just replacing `import numpy as xp` with `import cupy as xp` found in the top of the file "chainer.py" 13 | 14 | 15 | ## MNIST Training example 16 | 17 | ### Run on CPU 18 | 19 | - Just run `mnist.py` 20 | 21 | ``` 22 | python mnist.py 23 | ``` 24 | 25 | ### Run on GPU 26 | 27 | - Replace `import numpy as xp` with `import cupy as xp` in `chainer.py` 28 | - Then, run `mnist.py` with an option `--gpu 0` 29 | 30 | ``` 31 | python mnist.py --gpu 0 32 | ``` 33 | 34 | ## minimum.py? 35 | 36 | "minimum.py" in this repository is another implementation example of 3-layers perceptron for MNIST. It contains 37 | 38 | - Linear 39 | - ReLU 40 | - Softmax cross entropy (grad) 41 | - Training code for MNIST 42 | - Accuracy calculation 43 | 44 | in **just 74 lines**. ("chainer.py" have 205 lines) 45 | 46 | Try: 47 | 48 | ``` 49 | python minimum.py 50 | ``` 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | mldata 3 | loss.png 4 | .sync-config.cson 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # IPython Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | 93 | # Rope project settings 94 | .ropeproject 95 | -------------------------------------------------------------------------------- /mnist.py: -------------------------------------------------------------------------------- 1 | from chainer import * 2 | from sklearn.datasets import fetch_mldata 3 | 4 | import argparse 5 | import cupy as xp 6 | import numpy as np 7 | import sys 8 | 9 | if 'linux' in sys.platform: 10 | import matplotlib 11 | matplotlib.use('Agg') 12 | 13 | import matplotlib.pyplot as plt 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--gpu', type=int, default=-1) 17 | args = parser.parse_args() 18 | 19 | if __name__ == '__main__': 20 | mnist, t, bsize = fetch_mldata('MNIST original', data_home='.'), 60000, 32 21 | td, tl = mnist.data[:t].astype(np.float32) / 255.0, mnist.target[:t] 22 | tl = np.array([tl == i for i in range(10)]).T.astype(np.float32) 23 | perm = np.random.permutation(len(td)) 24 | td, tl = td[perm], tl[perm] 25 | if args.gpu >= 0: 26 | td, tl = xp.asarray(td), xp.asarray(tl) 27 | 28 | model = Chain( 29 | l1=Linear(784, 100), 30 | l2=Linear(100, 100), 31 | l3=Linear(100, 10) 32 | ) 33 | 34 | def forward(x): 35 | h = model.l1(x) 36 | h = relu(h) 37 | h = model.l2(h) 38 | h = relu(h) 39 | h = model.l3(h) 40 | return h 41 | 42 | opt = SGD(lr=0.1) 43 | opt.setup(model) 44 | 45 | n_iter, losses = 0, [] 46 | for epoch in range(1): 47 | for i in range(0, len(td), bsize): 48 | x = Variable(td[i:i + bsize]) 49 | t = Variable(tl[i:i + bsize]) 50 | # forward 51 | y = forward(x) 52 | # backward 53 | loss = mean_squared_error(y, t) 54 | model.zerograds() 55 | loss.backward() 56 | opt.update() 57 | n_iter += 1 58 | losses.append(loss.data) 59 | 60 | plt.plot(losses) 61 | plt.savefig('loss.png') 62 | -------------------------------------------------------------------------------- /examples/exp_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as xp 5 | 6 | 7 | class Variable(object): 8 | 9 | def __init__(self, data): 10 | self.data = data 11 | self.creator = None 12 | self.grad = 1 13 | 14 | def set_creator(self, gen_func): 15 | self.creator = gen_func 16 | 17 | def backward(self): 18 | if self.creator is None: # input data 19 | return 20 | func = self.creator 21 | while func: 22 | gy = func.output.grad 23 | func.input.grad = func.backward(gy) 24 | func = func.input.creator 25 | 26 | 27 | class Function(object): 28 | 29 | def __call__(self, in_var): 30 | in_data = in_var.data 31 | output = self.forward(in_data) 32 | ret = Variable(output) 33 | ret.set_creator(self) 34 | self.input = in_var 35 | self.output = ret 36 | return ret 37 | 38 | def forward(self, in_data): 39 | NotImplementedError() 40 | 41 | def backward(self, grad_output): 42 | NotImplementedError() 43 | 44 | 45 | class Mul(Function): 46 | 47 | def __init__(self, init_w): 48 | self.w = init_w # Initialize the parameter 49 | 50 | def forward(self, in_var): 51 | return in_var * self.w 52 | 53 | def backward(self, grad_output): 54 | gx = self.w * grad_output 55 | self.gw = self.input 56 | return gx 57 | 58 | data = xp.array([0, 1, 2, 3]) 59 | 60 | f1 = Mul(2) 61 | f2 = Mul(3) 62 | f3 = Mul(4) 63 | 64 | y0 = Variable(data) 65 | y1 = f1(y0) # y1 = y0 * 2 66 | y2 = f2(y1) # y2 = y1 * 3 67 | y3 = f3(y2) # y3 = y2 * 4 68 | 69 | print(y0.data) 70 | print(y1.data) 71 | print(y2.data) 72 | print(y3.data) 73 | 74 | y3.backward() 75 | 76 | print(y3.grad) # df3 / dy3 = 1 77 | print(y2.grad) # df3 / dy2 = (df3 / dy3) * (dy3 / dy2) = 1 * 4 78 | print(y1.grad) # df3 / dy1 = (df3 / dy3) * (dy3 / dy2) * (dy2 / dy1) = 1 * 4 * 3 79 | print(y0.grad) # df3 / dy0 = (df3 / dy3) * (dy3 / dy2) * (dy2 / dy1) * (dy1 / dy0) = 1 * 4 * 3 * 2 80 | 81 | print(f3.gw.data) 82 | print(f2.gw.data) 83 | print(f1.gw.data) 84 | -------------------------------------------------------------------------------- /minimum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2016 Shunta Saito 5 | 6 | from sklearn.datasets import fetch_mldata 7 | import numpy 8 | numpy.random.seed(1988) 9 | 10 | class Linear(object): 11 | 12 | def __init__(self, in_sz, out_sz): 13 | self.W = numpy.random.randn(out_sz, in_sz) * numpy.sqrt(2. / in_sz) 14 | self.b = numpy.zeros((out_sz,)) 15 | 16 | def __call__(self, x): 17 | self.x = x 18 | return x.dot(self.W.T) + self.b 19 | 20 | def update(self, gy, lr): 21 | self.W -= lr * gy.T.dot(self.x) 22 | self.b -= lr * gy.sum(axis=0) 23 | return gy.dot(self.W) 24 | 25 | class ReLU(object): 26 | 27 | def __call__(self, x): 28 | self.x = x 29 | return numpy.maximum(0, x) 30 | 31 | def update(self, gy, lr): 32 | return gy * (self.x > 0) 33 | 34 | model = [ 35 | Linear(784, 100), 36 | ReLU(), 37 | Linear(100, 100), 38 | ReLU(), 39 | Linear(100, 10) 40 | ] 41 | 42 | def forward(model, x): 43 | for layer in model: 44 | x = layer(x) 45 | return x 46 | 47 | def update(model, gy, lr=0.0001): 48 | for layer in reversed(model): 49 | gy = layer.update(gy, lr) 50 | 51 | def softmax_cross_entropy_gy(y, t): 52 | return (numpy.exp(y.T) / numpy.exp(y.T).sum(axis=0)).T - t 53 | 54 | def accuracy(y, t): 55 | n_correct = numpy.sum(t[numpy.arange(len(t)), y.argmax(axis=1)]) 56 | return n_correct / float(len(t)) 57 | 58 | mnist = fetch_mldata('MNIST original', data_home='.') 59 | td, tl = mnist.data[:60000] / 255.0, mnist.target[:60000] 60 | tl = numpy.array([tl == i for i in range(10)]).T.astype(numpy.int) 61 | perm = numpy.random.permutation(len(td)) 62 | td, tl = td[perm], tl[perm] 63 | 64 | for epoch in range(30): 65 | for i in range(0, len(td), 128): 66 | x, t = td[i:i + 128], tl[i:i + 128] 67 | y = forward(model, x) 68 | gy = softmax_cross_entropy_gy(y, t) 69 | update(model, gy) 70 | 71 | vd, vl = mnist.data[60000:] / 255.0, mnist.target[60000:] 72 | vl = numpy.array([vl == i for i in range(10)]).T.astype(numpy.int) 73 | y = forward(model, vd) 74 | print(accuracy(y, vl)) 75 | -------------------------------------------------------------------------------- /chainer.py: -------------------------------------------------------------------------------- 1 | import numpy as xp 2 | import heapq 3 | 4 | class Variable(object): 5 | 6 | def __init__(self, data, grad=None, name=None): 7 | self.data = data 8 | self.rank = 0 9 | self.grad = grad 10 | self.creator = None 11 | self.name = name 12 | 13 | def set_creator(self, gen_func): 14 | self.creator = gen_func 15 | self.rank = gen_func.rank + 1 16 | 17 | def backward(self): 18 | if self.creator is None: 19 | return 20 | if self.data.size == 1 and self.grad is None: # Loss variable 21 | self.grad = xp.ones_like(self.data) 22 | 23 | cand_funcs = [] 24 | seen_set = set() 25 | seen_vars = set() 26 | need_copy = set() 27 | 28 | def add_cand(cand): 29 | if cand not in seen_set: 30 | heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand)) 31 | seen_set.add(cand) 32 | 33 | add_cand(self.creator) 34 | 35 | while cand_funcs: 36 | _, _, func = heapq.heappop(cand_funcs) 37 | in_data = [x.data for x in func.inputs] 38 | out_grad = [y.grad for y in func.outputs] 39 | gxs = func.backward(in_data, out_grad) 40 | for x, gx in zip(func.inputs, gxs): 41 | if gx is None: 42 | continue 43 | id_x = id(x) 44 | if x.creator is None: # leaf 45 | if x.grad is None: 46 | x.grad = gx 47 | need_copy.add(id_x) 48 | elif id_x in need_copy: 49 | x.grad = x.grad + gx 50 | need_copy.remove(id_x) 51 | else: 52 | x.grad += gx 53 | else: # not a leaf 54 | add_cand(x.creator) 55 | if id_x not in seen_vars: 56 | x.grad = gx 57 | seen_vars.add(id_x) 58 | need_copy.add(id_x) 59 | elif id_x in need_copy: 60 | x.grad = gx + x.grad 61 | need_copy.remove(id_x) 62 | else: 63 | x.grad += gx 64 | 65 | def zerograd(self): 66 | self.grad.fill(0) 67 | 68 | class Function(object): 69 | 70 | def __call__(self, *inputs): 71 | in_data = [x.data for x in inputs] 72 | outputs = self.forward(in_data) 73 | ret = [Variable(y) for y in outputs] 74 | self.rank = max([x.rank for x in inputs]) 75 | for y in ret: 76 | y.set_creator(self) 77 | self.inputs = inputs 78 | self.outputs = ret 79 | return ret if len(ret) > 1 else ret[0] 80 | 81 | def forward(self, inputs): 82 | NotImplementedError() 83 | 84 | def backward(self, inputs, grad_outputs): 85 | NotImplementedError() 86 | 87 | class Link(object): 88 | 89 | def __init__(self, **params): 90 | for name, value in params.items(): 91 | grad = xp.full_like(value, 0) 92 | var = Variable(value, grad, name) 93 | self.__dict__[name] = var 94 | 95 | def params(self): 96 | for param in self.__dict__.values(): 97 | yield param 98 | 99 | def namedparams(self): 100 | for name, param in self.__dict__.items(): 101 | yield '/' + name, param 102 | 103 | def zerograds(self): 104 | for param in self.params(): 105 | param.zerograd() 106 | 107 | class Chain(Link): 108 | 109 | def __init__(self, **links): 110 | super(Chain, self).__init__() 111 | self.children = [] 112 | for name, link in links.items(): 113 | self.children.append(name) 114 | self.__dict__[name] = link 115 | 116 | def params(self): 117 | for name in self.children: 118 | for param in self.__dict__[name].params(): 119 | yield param 120 | 121 | def namedparams(self): 122 | for name in self.children: 123 | prefix = '/' + name 124 | for path, param in self.__dict__[name].namedparams(): 125 | yield prefix + path, param 126 | 127 | class Linear(Link): 128 | 129 | def __init__(self, in_size, out_size): 130 | n = xp.random.normal 131 | scale = xp.sqrt(2. / in_size) 132 | W = n(loc=0.0, scale=scale, size=(out_size, in_size)) 133 | b = n(loc=0.0, scale=scale, size=(out_size,)) 134 | super(Linear, self).__init__( 135 | W=W.astype(xp.float32), b=b.astype(xp.float32)) 136 | 137 | def __call__(self, x): 138 | return LinearFunction()(x, self.W, self.b) 139 | 140 | class LinearFunction(Function): 141 | 142 | def forward(self, inputs): 143 | x, W, b = inputs 144 | return x.dot(W.T) + b, 145 | 146 | def backward(self, inputs, grad_outputs): 147 | x, W, b = inputs 148 | gy = grad_outputs[0] 149 | gx = gy.dot(W).reshape(x.shape) 150 | gW = gy.T.dot(x) 151 | gb = gy.sum(0) 152 | return gx, gW, gb 153 | 154 | class ReLU(Function): 155 | 156 | def forward(self, inputs): 157 | return xp.maximum(inputs[0], 0), 158 | 159 | def backward(self, inputs, grad_outputs): 160 | return grad_outputs[0] * (inputs[0] > 0), 161 | 162 | def relu(x): 163 | return ReLU()(x) 164 | 165 | class MeanSquaredError(Function): 166 | 167 | def forward(self, inputs): 168 | x0, x1 = inputs 169 | self.diff = x0 - x1 170 | diff = self.diff.ravel() 171 | return diff.dot(diff) / diff.size, 172 | 173 | def backward(self, inputs, grad_outputs): 174 | gy = grad_outputs[0] 175 | coeff = gy * (2. / self.diff.size) 176 | gx0 = coeff * self.diff 177 | return gx0, -gx0 178 | 179 | def mean_squared_error(x0, x1): 180 | return MeanSquaredError()(x0, x1) 181 | 182 | class Optimizer(object): 183 | 184 | def setup(self, link): 185 | self.target = link 186 | self.states = {} 187 | self.prepare() 188 | 189 | def prepare(self): 190 | for name, param in self.target.namedparams(): 191 | if name not in self.states: 192 | self.states[name] = {} 193 | 194 | def update(self): 195 | self.prepare() 196 | for name, param in self.target.namedparams(): 197 | self.update_one(param, self.states[name]) 198 | 199 | class SGD(Optimizer): 200 | 201 | def __init__(self, lr=0.01): 202 | self.lr = lr 203 | 204 | def update_one(self, param, state): 205 | param.data -= self.lr * param.grad 206 | --------------------------------------------------------------------------------