├── .gitignore ├── README.md ├── gpu ├── ch01 │ ├── figure_gpu.png │ ├── layers.py │ ├── loss_gpu.png │ ├── optimizer.py │ ├── show_spiral_dataset.py │ ├── spiral.py │ ├── train.py │ ├── train_custom_loop.py │ ├── trainer.py │ └── two_layer_net.py ├── ch03 │ ├── cbow_predict.py │ ├── simple_cbow.py │ ├── train.py │ └── util.py ├── ch04 │ ├── cbow.py │ ├── cbow.py~ │ ├── negative_sampling_layer.py │ ├── netative_sampling_layer.py~ │ └── train.py ├── ch05 │ ├── simple_rnnlm.py │ ├── train.py │ └── train_custom_loop.py ├── ch06 │ ├── better_rnnlm.py │ └── train_better_rnnlm.py └── common │ ├── base_model.py │ ├── config.py │ ├── functions.py │ ├── layers.py │ ├── np.py │ ├── optimizer.py │ ├── time_layers.py │ ├── trainer.py │ └── util.py ├── python ├── 20190327 │ ├── sigmoid.py │ └── sigmoid_class.py ├── ch1 │ ├── affine.py │ ├── forward_net.py │ ├── show_spiral_dataset.py │ ├── sigmoid.py │ ├── sigmoid_class.py │ ├── train_custom_loop.py │ └── two_layer_net.py ├── ch3 │ ├── cbow.py │ ├── dot.py │ ├── matmul.py │ ├── simple_cbow.py │ └── train.py ├── ch4 │ ├── cbow.py │ ├── cbow_params.pkl │ ├── negative_sampling_layer.py │ ├── ptb.py │ ├── ptb.train.npy │ ├── ptb.train.txt │ ├── ptb.vocab.pkl │ ├── train.py │ └── train_loss.png ├── ch5 │ ├── simple_rnnlm.py │ ├── train.py │ └── train_custom_loop.py ├── ch6 │ ├── Figure_1.png │ ├── Rnnlm.pkl │ ├── clip_grads.py │ ├── lstm_backward_graph.jpg │ ├── rnn_gradient_graph.py │ ├── rnn_gradient_graph_clip.py │ ├── rnnlm.py │ └── train_rnnlm.py ├── common │ ├── config.py │ ├── functions.py │ ├── layers.py │ ├── np.py │ ├── optimizer.py │ ├── time_layers.py │ ├── trainer.py │ └── util.py ├── dataset │ ├── __init__.py │ ├── addition.txt │ ├── date.txt │ ├── ptb.py │ ├── sequence.py │ └── spiral.py ├── memo.txt └── upstream │ ├── ch01 │ ├── forward_net.py │ ├── show_spiral_dataset.py │ ├── train.py │ ├── train_custom_loop.py │ └── two_layer_net.py │ ├── common │ ├── __init__.py │ ├── base_model.py │ ├── config.py │ ├── functions.py │ ├── layers.py │ ├── np.py │ ├── optimizer.py │ ├── time_layers.py │ ├── trainer.py │ └── util.py │ └── dataset │ ├── __init__.py │ ├── addition.txt │ ├── date.txt │ ├── ptb.py │ ├── sequence.py │ └── spiral.py ├── python_team2 ├── 20190327 │ ├── sigmoid.py │ └── sigmoid_class.py ├── .gitignore ├── Pipfile ├── Pipfile.lock ├── README.md ├── ch01 │ ├── forward_net.py │ ├── plots.py │ ├── show_spiral_dataset.py │ ├── train.py │ ├── train_custom_loop.py │ └── two_layer_net.py ├── ch02 │ ├── co_matrix.py │ ├── ranking.py │ ├── similarity.py │ └── words.py ├── ch03 │ ├── cbow_predict.py │ ├── simple_cbow.py │ ├── train.py │ ├── w_in.py │ └── w_in_matmul.py └── common │ ├── layers.py │ ├── optimizer.py │ └── util.py └── ruby ├── .bundle └── config ├── Gemfile ├── Gemfile.lock ├── Rakefile ├── examples ├── ch01 │ ├── show_spiral_dataset.rb │ ├── spiral.rb │ ├── train.rb │ ├── train_custom_loop.rb │ └── two_layers_net.rb └── ch03 │ ├── cbow_predict.rb │ └── train.rb ├── lib ├── adam.rb ├── affine.rb ├── embedding.rb ├── embedding_dot.rb ├── mat_mul.rb ├── negative_sampling_loss.rb ├── optimizer.rb ├── rnn.rb ├── sigmoid.rb ├── simple_cbow.rb ├── softmax_with_loss.rb ├── time_embedding.rb ├── time_rnn.rb ├── trainer.rb └── util.rb └── test ├── affine_test.rb ├── mat_mul_test.rb ├── optimizer_test.rb ├── rnn_test.rb ├── simple_cbow_test.rb ├── softmax_with_loss_test.rb ├── test_helper.rb ├── time_embedding_test.rb ├── time_rnn_test.rb └── two_layers_net_test.rb /.gitignore: -------------------------------------------------------------------------------- 1 | ruby/.idea/* 2 | ruby/vendor/* 3 | *.swp 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deep-learning-from-scratch-2 2 | 3 | こちらのイベントで利用するリポジトリです 4 | https://retrieva.connpass.com/event/123223/ 5 | 6 | Twitterハッシュタグ `#retrieva_nlp` 7 | 8 | イベント中にモブプログラミングしながら実装していく予定です 9 | 10 | - 教科書チーム 11 | - GPUチーム 12 | - Rubyチーム 13 | -------------------------------------------------------------------------------- /gpu/ch01/figure_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/gpu/ch01/figure_gpu.png -------------------------------------------------------------------------------- /gpu/ch01/layers.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | 3 | class MatMul: 4 | 5 | def __init__(self, W): 6 | self.params = [W] 7 | self.grads = [cp.zeros_like(W)] 8 | self.x = None 9 | 10 | def forward(self, x): 11 | W, = self.params 12 | out = cp.dot(x, W) 13 | self.x = x 14 | return out 15 | 16 | def backward(self, dout): 17 | W, = self.params 18 | dx = cp.dot(dout, W.T) 19 | dW = cp.dot(self.x.T, dout) 20 | self.grads[0][...] = dW 21 | return dx 22 | 23 | class Sigmoid: 24 | def __init__(self): 25 | self.params, self.grads = [], [] 26 | self.x = None 27 | 28 | def forward(self, x): 29 | out = 1. / (1.0 + cp.exp(-x)) 30 | self.out = out 31 | return out 32 | 33 | def backward(self, dout): 34 | dx = dout * (1.0 - self.out) * self.out 35 | return dx 36 | 37 | class Affine: 38 | def __init__(self, W, b): 39 | self.params = [W, b] 40 | self.grads = [cp.zeros_like(W), cp.zeros_like(b)] 41 | self.x = None 42 | 43 | def forward(self, x): 44 | W, b = self.params 45 | out = cp.dot(x, W) + b 46 | self.x = x 47 | return out 48 | 49 | def backward(self, dout): 50 | W,b = self.params 51 | dx = cp.dot(dout, W.T) 52 | dW = cp.dot(self.x.T, dout) 53 | db = cp.sum(dout, axis=0) 54 | self.grads[0][...] = dW 55 | self.grads[1][...] = db 56 | return dx 57 | 58 | class SoftmaxWithLoss: 59 | def __init__(self): 60 | self.params, self.grads = [], [] 61 | self.y = None 62 | self.t = None 63 | 64 | def forward(self, x, t): 65 | if x.ndim == 2: # ミニバッチ使用時 66 | x = x - x.max(axis=1, keepdims=True) 67 | x = cp.exp(x) 68 | y = x / x.sum(axis=1, keepdims=True) 69 | elif x.ndim == 1: 70 | x = x - cp.max(x) 71 | y = cp.exp(x) / cp.sum(cp.exp(x)) 72 | 73 | if y.ndim == 1: 74 | t = t.reshape(1, t.size) 75 | y = y.reshape(1, y.size) 76 | 77 | # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 78 | if t.size == y.size: 79 | t = t.argmax(axis=1) 80 | 81 | batch_size = y.shape[0] 82 | loss = - 1.0 * cp.sum(t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size 83 | self.y = y 84 | self.t = t 85 | return loss 86 | 87 | def backward(self, dout=1): 88 | batch_size = self.t.shape[0] 89 | dx = self.y.copy() 90 | dx[cp.arange(batch_size), self.t] -= 1 91 | dx *= dout 92 | dx = dx / batch_size 93 | return dx 94 | -------------------------------------------------------------------------------- /gpu/ch01/loss_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/gpu/ch01/loss_gpu.png -------------------------------------------------------------------------------- /gpu/ch01/optimizer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import cupy as cp 4 | 5 | class SGD: 6 | ''' 7 | 確率的勾配降下法(Stochastic Gradient Descent) 8 | ''' 9 | def __init__(self, lr=0.01): 10 | self.lr = lr 11 | 12 | def update(self, params, grads): 13 | for i in range(len(params)): 14 | params[i] -= self.lr * grads[i] 15 | 16 | 17 | class Momentum: 18 | ''' 19 | Momentum SGD 20 | ''' 21 | def __init__(self, lr=0.01, momentum=0.9): 22 | self.lr = lr 23 | self.momentum = momentum 24 | self.v = None 25 | 26 | def update(self, params, grads): 27 | if self.v is None: 28 | self.v = [] 29 | for param in params: 30 | self.v.append(cp.zeros_like(param)) 31 | 32 | for i in range(len(params)): 33 | self.v[i] = self.momentum * self.v[i] - self.lr * grads[i] 34 | params[i] += self.v[i] 35 | 36 | 37 | class Nesterov: 38 | ''' 39 | Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901) 40 | ''' 41 | def __init__(self, lr=0.01, momentum=0.9): 42 | self.lr = lr 43 | self.momentum = momentum 44 | self.v = None 45 | 46 | def update(self, params, grads): 47 | if self.v is None: 48 | self.v = [] 49 | for param in params: 50 | self.v.append(cp.zeros_like(param)) 51 | 52 | for i in range(len(params)): 53 | self.v[i] *= self.momentum 54 | self.v[i] -= self.lr * grads[i] 55 | params[i] += self.momentum * self.momentum * self.v[i] 56 | params[i] -= (1 + self.momentum) * self.lr * grads[i] 57 | 58 | 59 | class AdaGrad: 60 | ''' 61 | AdaGrad 62 | ''' 63 | def __init__(self, lr=0.01): 64 | self.lr = lr 65 | self.h = None 66 | 67 | def update(self, params, grads): 68 | if self.h is None: 69 | self.h = [] 70 | for param in params: 71 | self.h.append(cp.zeros_like(param)) 72 | 73 | for i in range(len(params)): 74 | self.h[i] += grads[i] * grads[i] 75 | params[i] -= self.lr * grads[i] / (cp.sqrt(self.h[i]) + 1e-7) 76 | 77 | 78 | class RMSprop: 79 | ''' 80 | RMSprop 81 | ''' 82 | def __init__(self, lr=0.01, decay_rate = 0.99): 83 | self.lr = lr 84 | self.decay_rate = decay_rate 85 | self.h = None 86 | 87 | def update(self, params, grads): 88 | if self.h is None: 89 | self.h = [] 90 | for param in params: 91 | self.h.append(cp.zeros_like(param)) 92 | 93 | for i in range(len(params)): 94 | self.h[i] *= self.decay_rate 95 | self.h[i] += (1 - self.decay_rate) * grads[i] * grads[i] 96 | params[i] -= self.lr * grads[i] / (cp.sqrt(self.h[i]) + 1e-7) 97 | 98 | 99 | class Adam: 100 | ''' 101 | Adam (http://arxiv.org/abs/1412.6980v8) 102 | ''' 103 | def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): 104 | self.lr = lr 105 | self.beta1 = beta1 106 | self.beta2 = beta2 107 | self.iter = 0 108 | self.m = None 109 | self.v = None 110 | 111 | def update(self, params, grads): 112 | if self.m is None: 113 | self.m, self.v = [], [] 114 | for param in params: 115 | self.m.append(cp.zeros_like(param)) 116 | self.v.append(cp.zeros_like(param)) 117 | 118 | self.iter += 1 119 | lr_t = self.lr * cp.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) 120 | 121 | for i in range(len(params)): 122 | self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i]) 123 | self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i]) 124 | 125 | params[i] -= lr_t * self.m[i] / (cp.sqrt(self.v[i]) + 1e-7) 126 | -------------------------------------------------------------------------------- /gpu/ch01/show_spiral_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') # 親ディレクトリのファイルをインポートするための設定 4 | import spiral 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | x, t = spiral.load_data() 9 | print('x', x.shape) # (300, 2) 10 | print('t', t.shape) # (300, 3) 11 | 12 | # データ点のプロット 13 | N = 100 14 | CLS_NUM = 3 15 | markers = ['o', 'x', '^'] 16 | for i in range(CLS_NUM): 17 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) 18 | plt.show() 19 | -------------------------------------------------------------------------------- /gpu/ch01/spiral.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | 6 | def load_data(seed=1984): 7 | np.random.seed(seed) 8 | N = 100 # number of sample each class 9 | DIM = 2 # number of data elements 10 | CLS_NUM = 3 # number of class 11 | 12 | x = np.zeros((N * CLS_NUM, DIM)) 13 | t = np.zeros((N * CLS_NUM, CLS_NUM), dtype=np.int) 14 | 15 | for j in range(CLS_NUM): 16 | for i in range(N): 17 | rate = i / N 18 | radius = 1.0 * rate 19 | theta = j * 4.0 + 4.0 * rate + np.random.randn() * 0.2 20 | 21 | ix = N * j + i 22 | x[ix] = np.array([radius * np.sin(theta), 23 | radius * np.cos(theta)], dtype=np.float32).flatten() 24 | t[ix, j] = 1 25 | 26 | return x, t 27 | -------------------------------------------------------------------------------- /gpu/ch01/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from optimizer import SGD 4 | from trainer import Trainer 5 | import spiral 6 | import cupy as cp 7 | from two_layer_net import TwoLayerNet 8 | 9 | 10 | if __name__ == '__main__': 11 | max_epoch = 300 12 | batch_size = 30 13 | hidden_size = 10 14 | learning_rate = 1.0 15 | 16 | x, t = spiral.load_data() 17 | x_cp = cp.array(x) 18 | t_cp = cp.array(t) 19 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 20 | optimizer = SGD(lr=learning_rate) 21 | 22 | trainer = Trainer(model, optimizer) 23 | trainer.fit(x_cp, t_cp, max_epoch, batch_size, eval_interval=5) 24 | trainer.plot() 25 | -------------------------------------------------------------------------------- /gpu/ch01/train_custom_loop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import cupy as cp 5 | from optimizer import SGD 6 | import spiral 7 | import matplotlib.pyplot as plt 8 | plt.switch_backend('agg') 9 | from two_layer_net import TwoLayerNet 10 | 11 | 12 | if __name__ == '__main__': 13 | print("# 1. hyper parameter settings") 14 | max_epoch = 300 15 | batch_size = 30 16 | hidden_size = 10 17 | learning_rate = 1.0 18 | 19 | print("# 2. load data and generate model and optimizer") 20 | x, t = spiral.load_data() 21 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 22 | optimizer = SGD(lr=learning_rate) 23 | 24 | # variables for learning 25 | data_size = len(x) 26 | max_iters = data_size // batch_size 27 | total_loss = 0 28 | loss_count = 0 29 | loss_list = [] 30 | 31 | print("#3 start epoch") 32 | for epoch in range(max_epoch): 33 | # 3. shuffle data 34 | idx = np.random.permutation(data_size) 35 | x = x[idx] 36 | t = t[idx] 37 | 38 | for iters in range(max_iters): 39 | batch_x = cp.asarray(x[iters * batch_size: (iters + 1) * batch_size]) 40 | batch_t = cp.asarray(t[iters * batch_size: (iters + 1) * batch_size]) 41 | 42 | # 4. process grads and update parameters 43 | loss = model.forward(batch_x, batch_t) 44 | model.backward() 45 | optimizer.update(model.params, model.grads) 46 | 47 | total_loss += loss 48 | loss_count += 1 49 | 50 | # 5. output learning result 51 | if (iters + 1) % 10 == 0: 52 | avg_loss = total_loss / loss_count 53 | print('| epoch %d | iter %d / %d | loss %.2f' 54 | % (epoch + 1, iters + 1, max_iters, avg_loss)) 55 | loss_list.append(avg_loss) 56 | total_loss, loss_count = 0, 0 57 | 58 | print("# plot learning result") 59 | plt.plot(np.arange(len(loss_list)), loss_list, label='train') 60 | plt.xlabel('iterations (x10)') 61 | plt.ylabel('loss') 62 | plt.savefig("loss_gpu.png") 63 | plt.cla() 64 | 65 | # plot boundary 66 | h = 0.001 67 | x_min, x_max = np.min(x[:, 0]) - .1, np.max(x[:, 0]) + .1 68 | y_min, y_max = np.min(x[:, 1]) - .1, np.max(x[:, 1]) + .1 69 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 70 | X = cp.asarray(np.c_[xx.ravel(), yy.ravel()].astype(np.float32)) 71 | score = cp.asnumpy(model.predict(X)) 72 | predict_cls = np.argmax(score, axis=1) 73 | Z = predict_cls.reshape(xx.shape) 74 | plt.contourf(xx, yy, Z) 75 | plt.axis('off') 76 | 77 | # plot data points 78 | x, t = spiral.load_data() 79 | N = 100 80 | CLS_NUM = 3 81 | markers = ['o', 'x', '^'] 82 | for i in range(CLS_NUM): 83 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) 84 | plt.savefig("figure_gpu.png") 85 | -------------------------------------------------------------------------------- /gpu/ch01/trainer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | import numpy as np 5 | import cupy as cp 6 | import time 7 | import matplotlib.pyplot as plt 8 | plt.switch_backend('agg') 9 | 10 | class Trainer: 11 | def __init__(self, model, optimizer): 12 | self.model = model 13 | self.optimizer = optimizer 14 | self.loss_list = [] 15 | self.eval_interval = None 16 | self.current_epoch = 0 17 | 18 | def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20): 19 | data_size = len(x) 20 | max_iters = data_size // batch_size 21 | self.eval_interval = eval_interval 22 | model, optimizer = self.model, self.optimizer 23 | total_loss = 0 24 | loss_count = 0 25 | 26 | start_time = time.time() 27 | for epoch in range(max_epoch): 28 | # シャッフル 29 | idx = cp.random.permutation(cp.arange(data_size)) 30 | x = x[idx] 31 | t = t[idx] 32 | 33 | for iters in range(max_iters): 34 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 35 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 36 | 37 | # 勾配を求め、パラメータを更新 38 | loss = model.forward(batch_x, batch_t) 39 | model.backward() 40 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 41 | if max_grad is not None: 42 | clip_grads(grads, max_grad) 43 | optimizer.update(params, grads) 44 | total_loss += loss 45 | loss_count += 1 46 | 47 | # 評価 48 | if (eval_interval is not None) and (iters % eval_interval) == 0: 49 | avg_loss = total_loss / loss_count 50 | elapsed_time = time.time() - start_time 51 | print('| epoch %d | iter %d / %d | time %d[s] | loss %.2f' 52 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss)) 53 | self.loss_list.append(float(avg_loss)) 54 | total_loss, loss_count = 0, 0 55 | 56 | self.current_epoch += 1 57 | 58 | def plot(self, ylim=None): 59 | x = np.arange(len(self.loss_list)) 60 | if ylim is not None: 61 | plt.ylim(*ylim) 62 | plt.plot(x, self.loss_list, label='train') 63 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 64 | plt.ylabel('loss') 65 | plt.savefig('figure.png') 66 | #plt.show() 67 | 68 | 69 | class RnnlmTrainer: 70 | def __init__(self, model, optimizer): 71 | self.model = model 72 | self.optimizer = optimizer 73 | self.time_idx = None 74 | self.ppl_list = None 75 | self.eval_interval = None 76 | self.current_epoch = 0 77 | 78 | def get_batch(self, x, t, batch_size, time_size): 79 | batch_x = cp.empty((batch_size, time_size), dtype='i') 80 | batch_t = cp.empty((batch_size, time_size), dtype='i') 81 | 82 | data_size = len(x) 83 | jump = data_size // batch_size 84 | offsets = [i * jump for i in range(batch_size)] # バッチの各サンプルの読み込み開始位置 85 | 86 | for time in range(time_size): 87 | for i, offset in enumerate(offsets): 88 | batch_x[i, time] = x[(offset + self.time_idx) % data_size] 89 | batch_t[i, time] = t[(offset + self.time_idx) % data_size] 90 | self.time_idx += 1 91 | return batch_x, batch_t 92 | 93 | def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35, 94 | max_grad=None, eval_interval=20): 95 | data_size = len(xs) 96 | max_iters = data_size // (batch_size * time_size) 97 | self.time_idx = 0 98 | self.ppl_list = [] 99 | self.eval_interval = eval_interval 100 | model, optimizer = self.model, self.optimizer 101 | total_loss = 0 102 | loss_count = 0 103 | 104 | start_time = time.time() 105 | for epoch in range(max_epoch): 106 | for iters in range(max_iters): 107 | batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size) 108 | 109 | # 勾配を求め、パラメータを更新 110 | loss = model.forward(batch_x, batch_t) 111 | model.backward() 112 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 113 | if max_grad is not None: 114 | clip_grads(grads, max_grad) 115 | optimizer.update(params, grads) 116 | total_loss += loss 117 | loss_count += 1 118 | 119 | # パープレキシティの評価 120 | if (eval_interval is not None) and (iters % eval_interval) == 0: 121 | ppl = cp.exp(total_loss / loss_count) 122 | elapsed_time = time.time() - start_time 123 | print('| epoch %d | iter %d / %d | time %d[s] | perplexity %.2f' 124 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl)) 125 | self.ppl_list.append(float(ppl)) 126 | total_loss, loss_count = 0, 0 127 | 128 | self.current_epoch += 1 129 | 130 | def plot(self, ylim=None): 131 | x = numpy.arange(len(self.ppl_list)) 132 | if ylim is not None: 133 | plt.ylim(*ylim) 134 | plt.plot(x, self.ppl_list, label='train') 135 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 136 | plt.ylabel('perplexity') 137 | plt.show() 138 | 139 | 140 | def remove_duplicate(params, grads): 141 | ''' 142 | パラメータ配列中の重複する重みをひとつに集約し、 143 | その重みに対応する勾配を加算する 144 | ''' 145 | params, grads = params[:], grads[:] # copy list 146 | 147 | while True: 148 | find_flg = False 149 | L = len(params) 150 | 151 | for i in range(0, L - 1): 152 | for j in range(i + 1, L): 153 | # 重みを共有する場合 154 | if params[i] is params[j]: 155 | grads[i] += grads[j] # 勾配の加算 156 | find_flg = True 157 | params.pop(j) 158 | grads.pop(j) 159 | # 転置行列として重みを共有する場合(weight tying) 160 | elif params[i].ndim == 2 and params[j].ndim == 2 and \ 161 | params[i].T.shape == params[j].shape and cp.all(params[i].T == params[j]): 162 | grads[i] += grads[j].T 163 | find_flg = True 164 | params.pop(j) 165 | grads.pop(j) 166 | 167 | if find_flg: break 168 | if find_flg: break 169 | 170 | if not find_flg: break 171 | 172 | return params, grads 173 | 174 | 175 | def clip_grads(grads, max_norm): 176 | total_norm = 0 177 | for grad in grads: 178 | total_norm += cp.sum(grad ** 2) 179 | total_norm = cp.sqrt(total_norm) 180 | 181 | rate = max_norm / (total_norm + 1e-6) 182 | if rate < 1: 183 | for grad in grads: 184 | grad *= rate 185 | -------------------------------------------------------------------------------- /gpu/ch01/two_layer_net.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | 3 | from layers import Affine, Sigmoid, SoftmaxWithLoss 4 | 5 | class TwoLayerNet: 6 | def __init__(self, input_size, hidden_size, output_size): 7 | I, H, O = input_size, hidden_size, output_size 8 | 9 | # initialize weight and bias 10 | W1 = 0.01 * cp.random.randn(I, H) 11 | b1 = cp.zeros(H) 12 | W2 = 0.01 * cp.random.randn(H, O) 13 | b2 = cp.zeros(O) 14 | 15 | # create layer 16 | self.layers = [ 17 | Affine(W1, b1), 18 | Sigmoid(), 19 | Affine(W2, b2) 20 | ] 21 | self.loss_layer = SoftmaxWithLoss() 22 | 23 | # combine all weight and grads into list 24 | self.params, self.grads = [], [] 25 | 26 | for layer in self.layers: 27 | self.params += layer.params 28 | self.grads += layer.grads 29 | 30 | def predict(self, x): 31 | for layer in self.layers: 32 | x = layer.forward(x) 33 | return x 34 | 35 | def forward(self, x, t): 36 | score = self.predict(x) 37 | loss = self.loss_layer.forward(score, t) 38 | return loss 39 | 40 | def backward(self, dout=1): 41 | dout = self.loss_layer.backward(dout) 42 | for layer in reversed(self.layers): 43 | dout = layer.backward(dout) 44 | return dout 45 | -------------------------------------------------------------------------------- /gpu/ch03/cbow_predict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import cupy as cp 4 | from ch01.layers import MatMul 5 | 6 | #サンプルのコンテキストデータ 7 | c0 = cp.array([[1, 0, 0, 0, 0, 0, 0]]) 8 | c1 = cp.array([[0, 0, 0, 1, 0, 0, 0]]) 9 | 10 | W_in = cp.random.randn(7, 3) 11 | W_out = cp.random.randn(3, 7) 12 | 13 | in_layer0 = MatMul(W_in) 14 | in_layer1 = MatMul(W_in) 15 | out_layer = MatMul(W_out) 16 | 17 | h0 = in_layer0.forward(c0) 18 | h1 = in_layer1.forward(c1) 19 | h = 0.5 * (h0 + h1) 20 | s = out_layer.forward(h) 21 | 22 | print(s) 23 | -------------------------------------------------------------------------------- /gpu/ch03/simple_cbow.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | 3 | from ch01.layers import MatMul, SoftmaxWithLoss 4 | 5 | class SimpleCBOW: 6 | def __init__(self, vocab_size, hidden_size): 7 | V, H = vocab_size, hidden_size 8 | 9 | #重みの初期化 10 | W_in = 0.01 * cp.random.randn(V, H).astype(cp.float32) 11 | W_out = 0.01 * cp.random.randn(H, V).astype(cp.float32) 12 | 13 | #レイヤの生成 14 | self.in_layer0 = MatMul(W_in) 15 | self.in_layer1 = MatMul(W_in) 16 | self.out_layer = MatMul(W_out) 17 | self.loss_layer = SoftmaxWithLoss() 18 | 19 | # すべての重みと購買をリストにまとめる 20 | layers = [self.in_layer0, self.in_layer1, self.out_layer] 21 | self.params, self.grads = [],[] 22 | for layer in layers: 23 | self.params += layer.params 24 | self.grads += layer.grads 25 | 26 | #メンバ変数に単語の分散表現を追加 27 | self.word_vecs = W_in 28 | 29 | def forward(self, contexts, target): 30 | h0 = self.in_layer0.forward(contexts[:, 0]) 31 | h1 = self.in_layer1.forward(contexts[:, 1]) 32 | h = (h0 + h1) * 0.5 33 | score = self.out_layer.forward(h) 34 | loss = self.loss_layer.forward(score, target) 35 | return loss 36 | 37 | def backward(self, dout=1): 38 | ds = self.loss_layer.backward(dout) 39 | da = self.out_layer.backward(ds) 40 | da *= 0.5 41 | self.in_layer1.backward(da) 42 | self.in_layer0.backward(da) 43 | return None 44 | -------------------------------------------------------------------------------- /gpu/ch03/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cupy as cp 3 | sys.path.append('..') 4 | from ch01.trainer import Trainer 5 | from ch01.optimizer import Adam 6 | from simple_cbow import SimpleCBOW 7 | from util import preprocess, create_contexts_target, convert_one_hot 8 | 9 | #import ipdb; ipdb.set_trace() 10 | 11 | window_size = 1 12 | hidden_size = 5 13 | batch_size = 3 14 | max_epoch = 1000 15 | 16 | text = 'You say goodbye and I say hello.' 17 | corpus, word_to_id, id_to_word = preprocess(text) 18 | 19 | vocab_size = len(word_to_id) 20 | contexts, target = create_contexts_target(corpus, window_size) 21 | target = convert_one_hot(target, vocab_size) 22 | contexts = convert_one_hot(contexts, vocab_size) 23 | 24 | model = SimpleCBOW(vocab_size, hidden_size) 25 | optimizer = Adam() 26 | trainer = Trainer(model, optimizer) 27 | 28 | trainer.fit(contexts, target, max_epoch, batch_size) 29 | trainer.plot() 30 | -------------------------------------------------------------------------------- /gpu/ch03/util.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | 3 | def preprocess(text): 4 | text = text.lower() 5 | text = text.replace('.', ' .') 6 | words = text.split(' ') 7 | 8 | word_to_id = {} 9 | id_to_word = {} 10 | for word in words: 11 | if word not in word_to_id: 12 | new_id = len(word_to_id) 13 | word_to_id[word] = new_id 14 | id_to_word[new_id] = word 15 | corpus = cp.array([word_to_id[w] for w in words]) 16 | return corpus, word_to_id, id_to_word 17 | 18 | 19 | def create_contexts_target(corpus, window_size=1): 20 | target = corpus[window_size: -window_size] 21 | contexts = [] 22 | 23 | for idx in range(window_size, len(corpus)-window_size): 24 | cs = [] 25 | for t in range(-window_size, window_size + 1): 26 | if t == 0: 27 | continue 28 | cs.append(int(corpus[idx + t])) 29 | contexts.append(cs) 30 | 31 | return cp.array(contexts), cp.array(target) 32 | 33 | 34 | def convert_one_hot(corpus, vocab_size): 35 | '''one-hot表現への変換 36 | :param corpus: 単語IDのリスト(1次元もしくは2次元のNumPy配列) 37 | :param vocab_size: 語彙数 38 | :return: one-hot表現(2次元もしくは3次元のNumPy配列) 39 | ''' 40 | N = corpus.shape[0] 41 | 42 | if corpus.ndim == 1: 43 | one_hot = cp.zeros((N, vocab_size), dtype=cp.int32) 44 | for idx, word_id in enumerate(corpus): 45 | one_hot[idx, word_id] = 1 46 | 47 | elif corpus.ndim == 2: 48 | C = corpus.shape[1] 49 | one_hot = cp.zeros((N, C, vocab_size), dtype=cp.int32) 50 | for idx_0, word_ids in enumerate(corpus): 51 | for idx_1, word_id in enumerate(word_ids): 52 | one_hot[idx_0, idx_1, word_id] = 1 53 | 54 | return one_hot 55 | -------------------------------------------------------------------------------- /gpu/ch04/cbow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cupy as cp 3 | from negative_sampling_layer import NegativeSamplingLoss 4 | import sys 5 | sys.path.append('..') 6 | from common.layers import Embedding 7 | 8 | 9 | class CBOW: 10 | def __init__(self, vocab_size, hidden_size, window_size, corpus): 11 | V, H = vocab_size, hidden_size 12 | W_in = 0.01 * cp.random.randn(V, H).astype('f') 13 | W_out = 0.01 * cp.random.randn(V, H).astype('f') 14 | 15 | self.in_layers = [] 16 | for i in range(2 * window_size): 17 | layer = Embedding(W_in) 18 | self.in_layers.append(layer) 19 | 20 | self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) 21 | 22 | layers = self.in_layers + [self.ns_loss] 23 | self.params, self.grads = [], [] 24 | for layer in layers: 25 | self.params += layer.params 26 | self.grads += layer.grads 27 | 28 | self.word_vecs = W_in 29 | 30 | def forward(self, contexts, target): 31 | h = 0 32 | for i, layer in enumerate(self.in_layers): 33 | h += layer.forward(contexts[:, i]) 34 | h *= 1 / len(self.in_layers) 35 | loss = self.ns_loss.forward(h, target) 36 | return loss 37 | 38 | def backward(self, dout=1): 39 | dout = self.ns_loss.backward(dout) 40 | dout *= 1 / len(self.in_layers) 41 | for layer in self.in_layers: 42 | layer.backward(dout) 43 | return None 44 | -------------------------------------------------------------------------------- /gpu/ch04/cbow.py~: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import negative_sampling_layer import Nega 3 | import sys 4 | -------------------------------------------------------------------------------- /gpu/ch04/negative_sampling_layer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import numpy as np 3 | import cupy as cp 4 | import sys 5 | sys.path.append('..') 6 | from common.layers import Embedding, SigmoidWithLoss 7 | 8 | 9 | class EmbeddingDot: 10 | def __init__(self, W): 11 | self.embed = Embedding(W) 12 | self.params = self.embed.params 13 | self.grads = self.embed.grads 14 | self.cache = None 15 | 16 | def forward(self, h, idx): 17 | target_W = self.embed.forward(idx) 18 | out = cp.sum(target_W * h, axis=1) 19 | self.cache = (h, target_W) 20 | return out 21 | 22 | def backward(self, dout): 23 | h, target_W = self.cache 24 | dout = dout.reshape(dout.shape[0], 1) 25 | dtarget_W = dout * h 26 | self.embed.backward(dtarget_W) 27 | dh = dout * target_W 28 | return dh 29 | 30 | 31 | class UnigramSampler: 32 | def __init__(self, corpus, power, sample_size): 33 | self.sample_size = sample_size 34 | self.vocab_size = None 35 | self.word_p = None 36 | 37 | counts = collections.Counter() 38 | counts.update(corpus) 39 | 40 | vocab_size = len(counts) 41 | self.vocab_size = vocab_size 42 | self.word_p = np.zeros(vocab_size) 43 | for i in range(vocab_size): 44 | self.word_p[i] = counts[i] 45 | 46 | self.word_p = np.power(self.word_p, power) 47 | self.word_p /= np.sum(self.word_p) 48 | 49 | def get_negative_sample(self, target): 50 | batch_size = target.shape[0] 51 | negative_sample = cp.random.choice(self.vocab_size, size=(batch_size, self.sample_size), 52 | replace=True, p=self.word_p) 53 | return negative_sample 54 | 55 | 56 | class NegativeSamplingLoss: 57 | def __init__(self, W, corpus, power=0.75, sample_size=5): 58 | self.sample_size = sample_size 59 | self.sampler = UnigramSampler(corpus, power, sample_size) 60 | self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)] 61 | self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)] 62 | 63 | self.params, self.grads = [], [] 64 | for layer in self.embed_dot_layers: 65 | self.params += layer.params 66 | self.grads += layer.grads 67 | 68 | def forward(self, h, target): 69 | batch_size = target.shape[0] 70 | negative_sample = self.sampler.get_negative_sample(target) 71 | 72 | score = self.embed_dot_layers[0].forward(h, target) 73 | correct_label = cp.ones(batch_size, dtype=cp.int32) 74 | loss = self.loss_layers[0].forward(score, correct_label) 75 | 76 | negative_label = cp.zeros(batch_size, dtype=cp.int32) 77 | for i in range(self.sample_size): 78 | negative_target = negative_sample[:, i] 79 | score = self.embed_dot_layers[1 + i].forward(h, negative_target) 80 | loss += self.loss_layers[1 + i].forward(score, negative_label) 81 | 82 | return loss 83 | 84 | def backward(self, dout=1): 85 | dh = 0 86 | for l0, l1 in zip(self.loss_layers, self.embed_dot_layers): 87 | dscore = l0.backward(dout) 88 | dh += l1.backward(dscore) 89 | 90 | return dh 91 | 92 | -------------------------------------------------------------------------------- /gpu/ch04/netative_sampling_layer.py~: -------------------------------------------------------------------------------- 1 | import collections 2 | import numpy as np 3 | import sys 4 | sys.path.append('..') 5 | from common.layers import Embedding 6 | 7 | 8 | class EmbeddingDot: 9 | def __init__(self, W): 10 | self.embed = Embedding(W) 11 | self.params = self.embed.params 12 | self.grads = self.embed_grads 13 | self.cache = None 14 | 15 | def forward(self, h, idx): 16 | target_W = self.embed.forward(idx) 17 | out = np.sum(target_W * h, axis=1) 18 | self.cache = (h, target_W) 19 | return out 20 | 21 | def backward(self, dout): 22 | h, target_W = self.cache 23 | dout = dout.reshape(dout.shape[0], 1) 24 | dtarget_W = dout * h 25 | self.embed.backward(dtarget_W) 26 | dh = dout * target_W 27 | return dh 28 | 29 | 30 | class UnigramSapmler: 31 | def __init__(self, corpus, power, sample_size): 32 | self.sampelsize = sample_size 33 | self.vocab_size = None 34 | self.word_p = None 35 | 36 | counts = collections.Counter() 37 | counts.update(corpus) 38 | 39 | vocab_size = len(counts) 40 | self.vocab_size = vocab_size 41 | self.word_p = np.zeros(vocab_size) 42 | for i in range(vocab_size): 43 | self.word_p[i] = counts[i] 44 | 45 | self.word_p = np.power(self.word_p, power) 46 | self.word_p /= np.sum(self.word_p) 47 | 48 | 49 | def get_negative_sample(self, target): 50 | batch_size = target.shape[0] 51 | 52 | if not GPU: 53 | negative_sample = np.zeros((batch_size, self.sampel_size), dtype=np.int32) 54 | 55 | for i in range(batch_size): 56 | p = self.word_p.copy() 57 | target_idx = target[i] 58 | p[target_idx] = 0 59 | p /= p.sum() 60 | negative_sample[i, :] = np.ramdom.choice(self.vocab_size, 61 | size=self.sample_size, replace=False, p=p) 62 | 63 | 64 | 65 | class NegativeSamplingLoss: 66 | def __init__(self, W, corpus, power=0.75, sample_size=5): 67 | self.sample_size = sample_size 68 | self.sampler = UnigramSapmler(corpus, power, sample_size) 69 | -------------------------------------------------------------------------------- /gpu/ch04/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | import cupy as cp 5 | from common import config 6 | import pickle 7 | from common.trainer import Trainer 8 | from common.optimizer import Adam 9 | from cbow import CBOW 10 | from common.util import create_contexts_target, to_gpu, to_cpu 11 | from dataset import ptb 12 | 13 | window_size = 5 14 | hidden_size = 100 15 | batch_size = 100 16 | max_epoch = 10 17 | 18 | corpus, word_to_id, id_to_word = ptb.load_data('train') 19 | vocab_size = len(word_to_id) 20 | 21 | contexts, target = create_contexts_target(corpus, window_size) 22 | print(contexts.shape, target.shape) 23 | contexts, target = to_gpu(contexts), to_gpu(target) 24 | 25 | model = CBOW(vocab_size, hidden_size, window_size, corpus) 26 | optimizer = Adam() 27 | trainer = Trainer(model, optimizer) 28 | 29 | trainer.fit(contexts, target, max_epoch, batch_size) 30 | trainer.plot() 31 | 32 | word_vecs = model.word_vecs 33 | word_vec = to_cpu(word_vecs) 34 | 35 | params = {} 36 | params['word_vecs'] = word_vecs.astype(np.float16) 37 | params['word_to_id'] = word_to_id 38 | params['id_to_word'] = id_to_word 39 | pkl_file = 'cbow_params.pkl' 40 | with open(pkl_file, 'wb') as f: 41 | pickle.dump(params, f, -1) 42 | 43 | -------------------------------------------------------------------------------- /gpu/ch05/simple_rnnlm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cupy as cp 3 | sys.path.append('..') 4 | from common.time_layers import TimeAffine, TimeEmbedding, TimeRNN, TimeSoftmaxWithLoss 5 | 6 | 7 | class SimpleRnnlm: 8 | def __init__(self, vocab_size, wordvec_size, hidden_size): 9 | V, D, H = vocab_size, wordvec_size, hidden_size 10 | rn = cp.random.randn 11 | 12 | embed_W = (rn(V, D) / 100).astype('f') 13 | rnn_Wx = (rn(D, H) / cp.sqrt(D)).astype('f') 14 | rnn_Wh = (rn(H, H) / cp.sqrt(H)).astype('f') 15 | rnn_b = cp.zeros(H).astype('f') 16 | affine_W = (rn(H, V) / cp.sqrt(H)).astype('f') 17 | affine_b = cp.zeros(V).astype('f') 18 | 19 | self.layers = [ 20 | TimeEmbedding(embed_W), 21 | TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True), 22 | TimeAffine(affine_W, affine_b) 23 | ] 24 | self.loss_layer = TimeSoftmaxWithLoss() 25 | self.rnn_layer = self.layers[1] 26 | 27 | self.params, self.grads = [], [] 28 | for layer in self.layers: 29 | self.params += layer.params 30 | self.grads += layer.grads 31 | 32 | def forward(self, xs, ts): 33 | for layer in self.layers: 34 | xs = layer.forward(xs) 35 | 36 | loss = self.loss_layer.forward(xs, ts) 37 | return loss 38 | 39 | def backward(self, dout=1): 40 | dout = self.loss_layer.backward(dout) 41 | for layer in reversed(self.layers): 42 | dout = layer.backward(dout) 43 | return dout 44 | 45 | def reset_state(self): 46 | self.rnn_layer.reset_state() 47 | -------------------------------------------------------------------------------- /gpu/ch05/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | # import matplotlib.pyplot as plt 3 | import cupy as cp 4 | sys.path.append('..') 5 | from common.optimizer import SGD 6 | from dataset import ptb 7 | from simple_rnnlm import SimpleRnnlm 8 | from common.trainer import RnnlmTrainer 9 | 10 | 11 | batch_size = 10 12 | wordvec_size = 100 13 | hidden_size = 100 14 | time_size = 5 15 | lr = 0.1 16 | max_epoch = 100 17 | 18 | corpus, word_to_id, id_to_word = ptb.load_data('train') 19 | corpus_size = 1000 20 | corpus = corpus[:corpus_size] 21 | vocab_size = int(max(corpus) + 1) 22 | 23 | xs = corpus[:-1] 24 | ts = corpus[1:] 25 | data_size = len(xs) 26 | print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size)) 27 | 28 | 29 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) 30 | optimizer = SGD(lr) 31 | trainer = RnnlmTrainer(model, optimizer) 32 | 33 | trainer.fit(xs, ts, max_epoch, batch_size, time_size) 34 | -------------------------------------------------------------------------------- /gpu/ch05/train_custom_loop.py: -------------------------------------------------------------------------------- 1 | import sys 2 | # import matplotlib.pyplot as plt 3 | import cupy as cp 4 | sys.path.append('..') 5 | from common.optimizer import SGD 6 | from dataset import ptb 7 | from simple_rnnlm import SimpleRnnlm 8 | 9 | batch_size = 10 10 | wordvec_size = 100 11 | hidden_size = 100 12 | time_size = 5 13 | lr = 0.1 14 | max_epoch = 100 15 | 16 | corpus, word_to_id, id_to_word = ptb.load_data('train') 17 | corpus_size = 1000 18 | corpus = corpus[:corpus_size] 19 | vocab_size = int(max(corpus) + 1) 20 | 21 | xs = corpus[:-1] 22 | ts = corpus[1:] 23 | data_size = len(xs) 24 | print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size)) 25 | 26 | max_iter = data_size // (batch_size * time_size) 27 | time_idx = 0 28 | total_loss = 0 29 | loss_count = 0 30 | ppl_list = [] 31 | 32 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) 33 | optimizer = SGD(lr) 34 | 35 | jump = (corpus_size - 1) // batch_size 36 | offsets = [i * jump for i in range(batch_size)] 37 | 38 | for epoch in range(max_epoch): 39 | for iter in range(max_iter): 40 | batch_x = cp.empty((batch_size, time_size), dtype='i') 41 | batch_t = cp.empty((batch_size, time_size), dtype='i') 42 | for t in range(time_size): 43 | for i, offset in enumerate(offsets): 44 | batch_x[i, t] = xs[(offset + time_idx) % data_size] 45 | batch_t[i, t] = ts[(offset + time_idx) % data_size] 46 | time_idx += 1 47 | 48 | loss = model.forward(batch_x, batch_t) 49 | model.backward() 50 | optimizer.update(model.params, model.grads) 51 | total_loss += loss 52 | loss_count += 1 53 | 54 | ppl = cp.exp(-total_loss / loss_count) 55 | print('| epoch %d | perplexity %.2f | loss %.2f' % (epoch+1, ppl, loss)) 56 | ppl_list.append(float(ppl)) 57 | total_loss, loss_count = 0, 0 58 | -------------------------------------------------------------------------------- /gpu/ch06/better_rnnlm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import cupy as cp 4 | from common.time_layers import TimeAffine, TimeEmbedding, TimeLSTM, TimeDropout, TimeSoftmaxWithLoss 5 | from common.base_model import BaseModel 6 | 7 | 8 | class BetterRnnlm(BaseModel): 9 | def __init__(self, vocab_size=10000, wordvec_size=650, 10 | hidden_size=650, dropout_ratio=0.5): 11 | V, D, H = vocab_size, wordvec_size, hidden_size 12 | rn = cp.random.randn 13 | 14 | embed_W = (rn(V, D) / 100).astype('f') 15 | lstm_Wx1 = (rn(D, 4 * H) / cp.sqrt(D)).astype('f') 16 | lstm_Wh1 = (rn(H, 4 * H) / cp.sqrt(H)).astype('f') 17 | lstm_b1 = cp.zeros(4 * H).astype('f') 18 | lstm_Wx2 = (rn(H, 4 * H) / cp.sqrt(H)).astype('f') 19 | lstm_Wh2 = (rn(H, 4 * H) / cp.sqrt(H)).astype('f') 20 | lstm_b2 = cp.zeros(4 * H).astype('f') 21 | affine_b = cp.zeros(V).astype('f') 22 | 23 | self.layers = [ 24 | TimeEmbedding(embed_W), 25 | TimeDropout(dropout_ratio), 26 | TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True), 27 | TimeDropout(dropout_ratio), 28 | TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True), 29 | TimeDropout(dropout_ratio), 30 | TimeAffine(embed_W.T, affine_b) 31 | ] 32 | self.loss_layer = TimeSoftmaxWithLoss() 33 | self.lstm_layers = [self.layers[2], self.layers[4]] 34 | self.drop_layers = [self.layers[1], self.layers[3], self.layers[5]] 35 | self.params, self.grads = [], [] 36 | for layer in self.layers: 37 | self.params += layer.params 38 | self.grads += layer.grads 39 | 40 | def predict(self, xs, train_flg=False): 41 | for layer in self.drop_layers: 42 | layer.train_flg = train_flg 43 | for layer in self.layers: 44 | xs = layer.forward(xs) 45 | return xs 46 | 47 | def forward(self, xs, ts, train_flg=True): 48 | score = self.predict(xs, train_flg) 49 | loss = self.loss_layer.forward(score, ts) 50 | return loss 51 | 52 | def backward(self, dout=1): 53 | dout = self.loss_layer.backward(dout) 54 | for layer in reversed(self.layers): 55 | dout = layer.backward(dout) 56 | return dout 57 | 58 | def reset_state(self): 59 | for layer in self.lstm_layers: 60 | layer.reset_state() 61 | -------------------------------------------------------------------------------- /gpu/ch06/train_better_rnnlm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | from common import config 4 | from common.optimizer import SGD 5 | from common.trainer import RnnlmTrainer 6 | from common.util import eval_perplexity, to_gpu 7 | from dataset import ptb 8 | from better_rnnlm import BetterRnnlm 9 | 10 | batch_size = 20 11 | wordvec_size = 650 12 | hidden_size = 650 13 | time_size = 35 14 | lr = 20.0 15 | max_epoch = 40 16 | max_grad = 0.25 17 | dropout = 0.5 18 | 19 | corpus, word_to_id, id_to_word = ptb.load_data('train') 20 | corpus_val, _, _ = ptb.load_data('val') 21 | corpus_test, _, _ = ptb.load_data('test') 22 | 23 | corpus = to_gpu(corpus) 24 | corpus_val = to_gpu(corpus_val) 25 | corpus_test = to_gpu(corpus_test) 26 | 27 | vocab_size = len(word_to_id) 28 | xs = corpus[:-1] 29 | ts = corpus[1:] 30 | 31 | model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout) 32 | optimizer = SGD(lr) 33 | trainer = RnnlmTrainer(model, optimizer) 34 | 35 | best_ppl = float('inf') 36 | for epoch in range(max_epoch): 37 | trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad) 38 | model.reset_state() 39 | ppl = eval_perplexity(model, corpus_val) 40 | print('valid perplexity: ', ppl) 41 | 42 | if best_ppl > ppl: 43 | best_ppl = ppl 44 | model.save_params() 45 | else: 46 | lr /= 4.0 47 | optimizer.lr = lr 48 | 49 | model.reset_state() 50 | print('-'*50) 51 | -------------------------------------------------------------------------------- /gpu/common/base_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import os 4 | import pickle 5 | import cupy as cp 6 | from common.util import to_gpu, to_cpu 7 | 8 | 9 | class BaseModel: 10 | def __init__(self): 11 | self.params, self.grads = None, None 12 | 13 | def forward(self, *args): 14 | raise NotImplementedError 15 | 16 | def backward(self, *args): 17 | raise NotImplementedError 18 | 19 | def save_params(self, file_name=None): 20 | if file_name is None: 21 | file_name = self.__class__.__name__ + '.pkl' 22 | 23 | params = [p.astype(cp.float16) for p in self.params] 24 | params = [to_cpu(p) for p in params] 25 | 26 | with open(file_name, 'wb') as f: 27 | pickle.dump(params, f) 28 | 29 | def load_params(self, file_name=None): 30 | if file_name is None: 31 | file_name = self.__class__.__name__ + '.pkl' 32 | 33 | if '/' in file_name: 34 | file_name = file_name.replace('/', os.sep) 35 | 36 | if not os.path.exists(file_name): 37 | raise IOError('No file: ' + file_name) 38 | 39 | with open(file_name, 'rb') as f: 40 | params = pickle.load(f) 41 | 42 | params = [p.astype('f') for p in params] 43 | params = [to_gpu(p) for p in params] 44 | 45 | for i, param in enumerate(self.params): 46 | param[...] = params[i] 47 | -------------------------------------------------------------------------------- /gpu/common/config.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | GPU = False 4 | -------------------------------------------------------------------------------- /gpu/common/functions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # from common.np import * 3 | import numpy as np 4 | import cupy as cp 5 | 6 | 7 | def sigmoid(x): 8 | return 1 / (1 + cp.exp(-x)) 9 | 10 | 11 | def relu(x): 12 | return cp.maximum(0, x) 13 | 14 | 15 | def softmax(x): 16 | if x.ndim == 2: 17 | x = x - x.max(axis=1, keepdims=True) 18 | x = cp.exp(x) 19 | x /= x.sum(axis=1, keepdims=True) 20 | elif x.ndim == 1: 21 | x = x - cp.max(x) 22 | x = cp.exp(x) / cp.sum(cp.exp(x)) 23 | 24 | return x 25 | 26 | 27 | def cross_entropy_error(y, t): 28 | if y.ndim == 1: 29 | t = t.reshape(1, t.size) 30 | y = y.reshape(1, y.size) 31 | 32 | # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 33 | if t.size == y.size: 34 | t = t.argmax(axis=1) 35 | 36 | batch_size = y.shape[0] 37 | 38 | return -cp.sum(cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size 39 | -------------------------------------------------------------------------------- /gpu/common/layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cupy as cp 3 | from common.functions import softmax, cross_entropy_error 4 | 5 | 6 | class MatMul: 7 | def __init__(self, W): 8 | self.params = [W] 9 | self.grads = [cp.zeros_like(W)] 10 | self.x = None 11 | 12 | def forward(self, x): 13 | W, = self.params 14 | out = cp.dot(x, W) 15 | self.x = x 16 | return out 17 | 18 | def backward(self, dout): 19 | W, = self.params 20 | dx = cp.dot(dout, W.T) 21 | dW = cp.dot(self.x.T, dout) 22 | self.grads[0][...] = dW 23 | return dx 24 | 25 | 26 | # 他の章でも使うようなので ch1/forward_net.py からコピーしてbackwardを実装 27 | class Sigmoid: 28 | def __init__(self): 29 | self.params = [] 30 | self.grads = [] 31 | 32 | def forward(self, x): 33 | self.out = 1 / (1 + cp.exp(-x)) 34 | return self.out 35 | 36 | def backward(self, dout): 37 | dx = dout * (1 - self.out) * self.out 38 | return dx 39 | 40 | 41 | class SigmoidWithLoss: 42 | def __init__(self): 43 | self.params, self.grads = [], [] 44 | self.loss = None 45 | self.y = None # sigmoidの出力 46 | self.t = None # 教師データ 47 | 48 | def forward(self, x, t): 49 | self.t = t 50 | self.y = 1 / (1 + cp.exp(-x)) 51 | 52 | self.loss = cross_entropy_error(cp.c_[1 - self.y, self.y], self.t) 53 | 54 | return self.loss 55 | 56 | def backward(self, dout=1): 57 | batch_size = self.t.shape[0] 58 | 59 | dx = (self.y - self.t) * dout / batch_size 60 | return dx 61 | 62 | class SigmoidWithLoss: 63 | def __init__(self): 64 | self.params, self.grads = [], [] 65 | self.loss = None 66 | self.y = None # sigmoidの出力 67 | self.t = None # 教師データ 68 | 69 | def forward(self, x, t): 70 | self.t = t 71 | self.y = 1 / (1 + cp.exp(-x)) 72 | 73 | self.loss = cross_entropy_error(cp.c_[1 - self.y, self.y], self.t) 74 | 75 | return self.loss 76 | 77 | def backward(self, dout=1): 78 | batch_size = self.t.shape[0] 79 | 80 | dx = (self.y - self.t) * dout / batch_size 81 | return dx 82 | 83 | 84 | 85 | 86 | class Affine: 87 | def __init__(self, W, b): 88 | self.mm = MatMul(W) # MatMulを使って実装してみる 89 | self.params = [W, b] 90 | self.grads = [ 91 | self.mm.grads[0], # modelが初期化した直後のgradsを参照するため、MatMulのgradsを参照するようにする 92 | cp.zeros_like(b), 93 | ] 94 | 95 | def forward(self, x): 96 | _, b = self.params 97 | out = self.mm.forward(x) + b 98 | return out 99 | 100 | def backward(self, dout): 101 | dx = self.mm.backward(dout) 102 | db = cp.sum(dout, axis = 0) 103 | # self.grads[0] はmm.backwardで更新される 104 | self.grads[1] = db.copy() 105 | return dx 106 | 107 | class TwoLayerNet: 108 | def __init__(self, input_size, hidden_size, output_size): 109 | I, H, O = input_size, hidden_size, output_size 110 | 111 | # 重みとバイアスの初期化 112 | W1 = cp.random.randn(I, H) 113 | b1 = cp.random.randn(H) 114 | W2 = cp.random.randn(H, O) 115 | b2 = cp.random.randn(O) 116 | 117 | # レイヤの生成 118 | self.layers = [ 119 | Affine(W1, b1), 120 | Sigmoid(), 121 | Affine(W2, b2) 122 | ] 123 | 124 | # すべての重みをリストにまとめる 125 | self.params = [] 126 | for layer in self.layers: 127 | self.params += layer.params 128 | 129 | def predict(self, x): 130 | for layer in self.layers: 131 | x = layer.forward(x) 132 | return x 133 | 134 | 135 | # FROM https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/layers.py 136 | class Softmax: 137 | def __init__(self): 138 | self.params, self.grads = [], [] 139 | self.out = None 140 | 141 | def forward(self, x): 142 | self.out = softmax(x) 143 | return self.out 144 | 145 | def backward(self, dout): 146 | dx = self.out * dout 147 | sumdx = cp.sum(dx, axis=1, keepdims=True) 148 | dx -= self.out * sumdx 149 | return dx 150 | 151 | 152 | class SoftmaxWithLoss: 153 | def __init__(self): 154 | self.params, self.grads = [], [] 155 | self.y = None # softmaxの出力 156 | self.t = None # 教師ラベル 157 | 158 | def forward(self, x, t): 159 | self.t = t 160 | self.y = softmax(x) 161 | 162 | # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 163 | if self.t.size == self.y.size: 164 | self.t = self.t.argmax(axis=1) 165 | 166 | loss = cross_entropy_error(self.y, self.t) 167 | return loss 168 | 169 | def backward(self, dout=1): 170 | batch_size = self.t.shape[0] 171 | 172 | dx = self.y.copy() 173 | dx[cp.arange(batch_size), self.t] -= 1 174 | dx *= dout 175 | dx = dx / batch_size 176 | 177 | return dx 178 | 179 | 180 | class Embedding: 181 | def __init__(self, W): 182 | self.params = [W] 183 | self.grads = [cp.zeros_like(W)] 184 | self.idx = None 185 | 186 | def forward(self, idx): 187 | W, = self.params 188 | self.idx = idx 189 | out = W[idx] 190 | return out 191 | 192 | def backward(self, dout): 193 | dW, = self.grads 194 | dW[...] = 0 195 | cp.cupyx.scatter_add(dW, self.idx, dout) 196 | return None 197 | -------------------------------------------------------------------------------- /gpu/common/np.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from common.config import GPU 3 | 4 | 5 | if GPU: 6 | import cupy as np 7 | np.cuda.set_allocator(np.cuda.MemoryPool().malloc) 8 | np.add.at = np.scatter_add 9 | 10 | print('\033[92m' + '-' * 60 + '\033[0m') 11 | print(' ' * 23 + '\033[92mGPU Mode (cupy)\033[0m') 12 | print('\033[92m' + '-' * 60 + '\033[0m\n') 13 | else: 14 | import numpy as np 15 | -------------------------------------------------------------------------------- /gpu/common/optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cupy as cp 3 | 4 | 5 | class SGD: 6 | def __init__(self, lr=0.01): 7 | self.lr = lr # 学習率 8 | 9 | def update(self, params, grads): 10 | for i in range(len(params)): 11 | params[i] -= self.lr * grads[i] 12 | 13 | class AdaGrad: 14 | ''' 15 | AdaGrad 16 | ''' 17 | def __init__(self, lr=0.01): 18 | self.lr = lr 19 | self.h = None 20 | 21 | def update(self, params, grads): 22 | if self.h is None: 23 | self.h = [] 24 | for param in params: 25 | self.h.append(cp.zeros_like(param)) 26 | 27 | for i in range(len(params)): 28 | self.h[i] += grads[i] * grads[i] 29 | params[i] -= self.lr * grads[i] / (cp.sqrt(self.h[i]) + 1e-7) 30 | 31 | class Adam: 32 | ''' 33 | Adam (http://arxiv.org/abs/1412.6980v8) 34 | ''' 35 | def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): 36 | self.lr = lr 37 | self.beta1 = beta1 38 | self.beta2 = beta2 39 | self.iter = 0 40 | self.m = None 41 | self.v = None 42 | 43 | def update(self, params, grads): 44 | if self.m is None: 45 | self.m, self.v = [], [] 46 | for param in params: 47 | self.m.append(cp.zeros_like(param)) 48 | self.v.append(cp.zeros_like(param)) 49 | 50 | self.iter += 1 51 | lr_t = self.lr * cp.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) 52 | 53 | for i in range(len(params)): 54 | self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i]) 55 | self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i]) 56 | 57 | params[i] -= lr_t * self.m[i] / (cp.sqrt(self.v[i]) + 1e-7) 58 | -------------------------------------------------------------------------------- /gpu/common/trainer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import numpy 4 | import time 5 | import matplotlib.pyplot as plt 6 | import cupy as cp 7 | sys.path.append('..') 8 | from common.util import clip_grads 9 | 10 | 11 | class Trainer: 12 | def __init__(self, model, optimizer): 13 | self.model = model 14 | self.optimizer = optimizer 15 | self.loss_list = [] 16 | self.eval_interval = None 17 | self.current_epoch = 0 18 | 19 | def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20): 20 | data_size = len(x) 21 | max_iters = data_size // batch_size 22 | self.eval_interval = eval_interval 23 | model, optimizer = self.model, self.optimizer 24 | total_loss = 0 25 | loss_count = 0 26 | 27 | start_time = time.time() 28 | for epoch in range(max_epoch): 29 | # シャッフル 30 | idx = numpy.random.permutation(numpy.arange(data_size)) 31 | x = x[idx] 32 | t = t[idx] 33 | 34 | for iters in range(max_iters): 35 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 36 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 37 | 38 | # 勾配を求め、パラメータを更新 39 | loss = model.forward(batch_x, batch_t) 40 | model.backward() 41 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 42 | if max_grad is not None: 43 | clip_grads(grads, max_grad) 44 | optimizer.update(params, grads) 45 | total_loss += loss 46 | loss_count += 1 47 | 48 | # 評価 49 | if (eval_interval is not None) and (iters % eval_interval) == 0: 50 | avg_loss = total_loss / loss_count 51 | elapsed_time = time.time() - start_time 52 | print('| epoch %d | iter %d / %d | time %d[s] | loss %.2f' 53 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss)) 54 | self.loss_list.append(float(avg_loss)) 55 | total_loss, loss_count = 0, 0 56 | 57 | self.current_epoch += 1 58 | 59 | def plot(self, ylim=None): 60 | x = numpy.arange(len(self.loss_list)) 61 | if ylim is not None: 62 | plt.ylim(*ylim) 63 | plt.plot(x, self.loss_list, label='train') 64 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 65 | plt.ylabel('loss') 66 | plt.show() 67 | 68 | 69 | class RnnlmTrainer: 70 | def __init__(self, model, optimizer): 71 | self.model = model 72 | self.optimizer = optimizer 73 | self.time_idx = None 74 | self.ppl_list = None 75 | self.eval_interval = None 76 | self.current_epoch = 0 77 | 78 | def get_batch(self, x, t, batch_size, time_size): 79 | batch_x = cp.empty((batch_size, time_size), dtype='i') 80 | batch_t = cp.empty((batch_size, time_size), dtype='i') 81 | 82 | data_size = len(x) 83 | jump = data_size // batch_size 84 | offsets = [i * jump for i in range(batch_size)] # バッチの各サンプルの読み込み開始位置 85 | 86 | for time in range(time_size): 87 | for i, offset in enumerate(offsets): 88 | batch_x[i, time] = x[(offset + self.time_idx) % data_size] 89 | batch_t[i, time] = t[(offset + self.time_idx) % data_size] 90 | self.time_idx += 1 91 | return batch_x, batch_t 92 | 93 | def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35, 94 | max_grad=None, eval_interval=20): 95 | data_size = len(xs) 96 | max_iters = data_size // (batch_size * time_size) 97 | self.time_idx = 0 98 | self.ppl_list = [] 99 | self.eval_interval = eval_interval 100 | model, optimizer = self.model, self.optimizer 101 | total_loss = 0 102 | loss_count = 0 103 | 104 | start_time = time.time() 105 | for epoch in range(max_epoch): 106 | for iters in range(max_iters): 107 | batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size) 108 | 109 | # 勾配を求め、パラメータを更新 110 | loss = model.forward(batch_x, batch_t) 111 | model.backward() 112 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 113 | if max_grad is not None: 114 | clip_grads(grads, max_grad) 115 | optimizer.update(params, grads) 116 | total_loss += loss 117 | loss_count += 1 118 | 119 | # パープレキシティの評価 120 | if (eval_interval is not None) and (iters % eval_interval) == 0: 121 | ppl = cp.exp(-total_loss / loss_count) 122 | elapsed_time = time.time() - start_time 123 | print('| epoch %d | iter %d / %d | time %d[s] | perplexity %.2f' 124 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl)) 125 | self.ppl_list.append(float(ppl)) 126 | total_loss, loss_count = 0, 0 127 | 128 | self.current_epoch += 1 129 | 130 | def plot(self, ylim=None): 131 | x = numpy.arange(len(self.ppl_list)) 132 | if ylim is not None: 133 | plt.ylim(*ylim) 134 | plt.plot(x, self.ppl_list, label='train') 135 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 136 | plt.ylabel('perplexity') 137 | plt.show() 138 | 139 | 140 | def remove_duplicate(params, grads): 141 | ''' 142 | パラメータ配列中の重複する重みをひとつに集約し、 143 | その重みに対応する勾配を加算する 144 | ''' 145 | params, grads = params[:], grads[:] # copy list 146 | 147 | while True: 148 | find_flg = False 149 | L = len(params) 150 | 151 | for i in range(0, L - 1): 152 | for j in range(i + 1, L): 153 | # 重みを共有する場合 154 | if params[i] is params[j]: 155 | grads[i] += grads[j] # 勾配の加算 156 | find_flg = True 157 | params.pop(j) 158 | grads.pop(j) 159 | # 転置行列として重みを共有する場合(weight tying) 160 | elif params[i].ndim == 2 and params[j].ndim == 2 and \ 161 | params[i].T.shape == params[j].shape and cp.all(params[i].T == params[j]): 162 | grads[i] += grads[j].T 163 | find_flg = True 164 | params.pop(j) 165 | grads.pop(j) 166 | 167 | if find_flg: break 168 | if find_flg: break 169 | 170 | if not find_flg: break 171 | 172 | return params, grads 173 | -------------------------------------------------------------------------------- /python/20190327/sigmoid.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def sigmoid(x): 5 | return 1 / (1 + np.exp(-x)) 6 | 7 | print(sigmoid(3)) 8 | print(sigmoid(0)) 9 | print(sigmoid(-3)) 10 | 11 | print(sigmoid(3) + sigmoid(-3)) 12 | 13 | # numpyっぽい書き方 14 | print(sigmoid(np.array([3,0,-3]))) 15 | 16 | # P.13の例 17 | x = np.random.randn(10, 2) 18 | W1 = np.random.randn(2, 4) 19 | b1 = np.random.randn(4) 20 | W2 = np.random.randn(4, 3) 21 | b2 = np.random.randn(3) 22 | 23 | h = np.dot(x, W1) + b1 # これで1層の計算 24 | a = sigmoid(h) 25 | s = np.dot(a, W2) + b2 26 | 27 | print("h=", h) 28 | print("a=", a) 29 | print("s=", s) 30 | 31 | print(h.shape) 32 | print(a.shape) 33 | print(s.shape) 34 | 35 | -------------------------------------------------------------------------------- /python/20190327/sigmoid_class.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class Sigmoid: 5 | def __init__(self): 6 | pass # 何もしない 7 | 8 | def forward(self, x): 9 | return 1 / (1 + np.exp(-x)) 10 | 11 | def backward(self): 12 | pass 13 | 14 | 15 | sig = Sigmoid() 16 | 17 | print(sig.forward(3)) 18 | print(sig.forward(0)) 19 | print(sig.forward(-3)) 20 | 21 | print(sig.forward(3) + sig.forward(-3)) 22 | 23 | # numpyっぽい書き方 24 | print(sig.forward(np.array([3,0,-3]))) 25 | 26 | -------------------------------------------------------------------------------- /python/ch1/affine.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class Affine: 5 | def __init__(self, W, b): 6 | self.params = [W, b] 7 | 8 | def forward(self, x): 9 | W, b = self.params 10 | out = np.dot(x, W) + b 11 | return out 12 | 13 | -------------------------------------------------------------------------------- /python/ch1/forward_net.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class Sigmoid: 5 | def __init__(self): 6 | self.params = [] 7 | 8 | def forward(self, x): 9 | return 1 / (1 + np.exp(-x)) 10 | 11 | def backward(self): 12 | pass 13 | 14 | 15 | 16 | class Affine: 17 | def __init__(self, W, b): 18 | self.params = [W, b] 19 | 20 | def forward(self, x): 21 | W, b = self.params 22 | out = np.dot(x, W) + b 23 | return out 24 | 25 | def backward(self): 26 | pass 27 | 28 | 29 | 30 | class TwoLayerNet: 31 | def __init__(self, input_size, hidden_size, output_size): 32 | I, H, O = input_size, hidden_size, output_size 33 | 34 | # 重みとバイアスの初期化 35 | W1 = np.random.randn(I, H) 36 | b1 = np.random.randn(H) 37 | W2 = np.random.randn(H, O) 38 | b2 = np.random.randn(O) 39 | 40 | # レイヤの生成 41 | self.layers = [ 42 | Affine(W1, b1), 43 | Sigmoid(), 44 | Affine(W2, b2) 45 | ] 46 | 47 | # すべての重みをリストにまとめる 48 | self.params = [] 49 | for layer in self.layers: 50 | self.params += layer.params 51 | 52 | def predict(self, x): 53 | for layer in self.layers: 54 | x = layer.forward(x) 55 | return x 56 | 57 | 58 | if __name__ == '__main__': 59 | sig = Sigmoid() 60 | 61 | print(sig.forward(3)) 62 | print(sig.forward(0)) 63 | print(sig.forward(-3)) 64 | 65 | print(sig.forward(3) + sig.forward(-3)) 66 | 67 | # numpyっぽい書き方 68 | print(sig.forward(np.array([3,0,-3]))) 69 | 70 | # text p18 71 | x = np.random.randn(10, 2) 72 | model = TwoLayerNet(2, 4, 3) 73 | s = model.predict(x) 74 | print(s) 75 | 76 | 77 | -------------------------------------------------------------------------------- /python/ch1/show_spiral_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | from upstream.dataset import spiral # 教科書のリポジトリのを使う 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | x, t = spiral.load_data() 8 | 9 | print('x', x.shape) 10 | print('t', t.shape) 11 | 12 | N = 100 13 | CLS_NUM = 3 14 | markers = ['o', 'x', '^'] 15 | for i in range(CLS_NUM): # python3向け。python2ではrangeの挙動が違うので注意。 16 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=10, marker=markers[i]) 17 | plt.show() 18 | -------------------------------------------------------------------------------- /python/ch1/sigmoid.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def sigmoid(x): 5 | return 1 / (1 + np.exp(-x)) 6 | 7 | print(sigmoid(3)) 8 | print(sigmoid(0)) 9 | print(sigmoid(-3)) 10 | 11 | print(sigmoid(3) + sigmoid(-3)) 12 | 13 | # numpyっぽい書き方 14 | print(sigmoid(np.array([3,0,-3]))) 15 | 16 | # P.13の例 17 | x = np.random.randn(10, 2) 18 | W1 = np.random.randn(2, 4) 19 | b1 = np.random.randn(4) 20 | W2 = np.random.randn(4, 3) 21 | b2 = np.random.randn(3) 22 | 23 | h = np.dot(x, W1) + b1 # これで1層の計算 24 | a = sigmoid(h) 25 | s = np.dot(a, W2) + b2 26 | 27 | print("h=", h) 28 | print("a=", a) 29 | print("s=", s) 30 | 31 | print(h.shape) 32 | print(a.shape) 33 | print(s.shape) 34 | 35 | -------------------------------------------------------------------------------- /python/ch1/sigmoid_class.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class Sigmoid: 5 | def __init__(self): 6 | pass # 何もしない 7 | 8 | def forward(self, x): 9 | return 1 / (1 + np.exp(-x)) 10 | 11 | def backward(self): 12 | pass 13 | 14 | 15 | sig = Sigmoid() 16 | 17 | print(sig.forward(3)) 18 | print(sig.forward(0)) 19 | print(sig.forward(-3)) 20 | 21 | print(sig.forward(3) + sig.forward(-3)) 22 | 23 | # numpyっぽい書き方 24 | print(sig.forward(np.array([3,0,-3]))) 25 | 26 | -------------------------------------------------------------------------------- /python/ch1/train_custom_loop.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../upstream') # dataset 3 | import numpy as np 4 | from common.optimizer import SGD 5 | from dataset import spiral 6 | import matplotlib.pyplot as plt 7 | from two_layer_net import TwoLayerNet 8 | 9 | max_epoch = 300 10 | batch_size = 30 11 | hidden_size = 10 12 | learning_rate = 1.0 13 | 14 | x, t = spiral.load_data() 15 | model = TwoLayerNet(input_size = 2, hidden_size = hidden_size, output_size = 3) 16 | optimizer = SGD(lr = learning_rate) 17 | 18 | data_size = len(x) 19 | max_iters = data_size // batch_size 20 | total_loss = 0 21 | loss_count = 0 22 | loss_list = [] 23 | 24 | for epoch in range(max_epoch): 25 | idx = np.random.permutation(data_size) 26 | x = x[idx] 27 | t = t[idx] 28 | 29 | for iters in range(max_iters): 30 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 31 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 32 | 33 | loss = model.forward(batch_x, batch_t) 34 | model.backward() 35 | optimizer.update(model.params, model.grads) 36 | 37 | total_loss += loss 38 | loss_count += 1 39 | 40 | if (iters + 1) % 10 == 0: 41 | avr_loss = total_loss / loss_count 42 | print('| epoch %d | iter %d / %d | loss %.2f' 43 | % (epoch + 1, iters + 1, max_iters, avr_loss)) 44 | loss_list.append(avr_loss) 45 | total_loss, loss_count = 0, 0 46 | -------------------------------------------------------------------------------- /python/ch1/two_layer_net.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.layers import Affine, Sigmoid # Affine, Sigmoidは自前実装を使う 5 | from upstream.common.layers import SoftmaxWithLoss # SoftmaxWithLossは教科書のを使う 6 | 7 | class TwoLayerNet: 8 | def __init__(self, input_size, hidden_size, output_size): 9 | I, H, O = input_size, hidden_size, output_size 10 | W1 = 0.01 * np.random.randn(I, H) 11 | b1 = np.zeros(H) 12 | W2 = 0.01 * np.random.randn(H, O) 13 | b2 = np.zeros(O) 14 | 15 | self.layers = [ 16 | Affine(W1, b1), 17 | Sigmoid(), 18 | Affine(W2, b2), 19 | ] 20 | self.loss_layer = SoftmaxWithLoss() 21 | 22 | self.params, self.grads = [], [] 23 | for layer in self.layers: 24 | self.params += layer.params 25 | self.grads += layer.grads 26 | 27 | def predict(self, x): 28 | for layer in self.layers: 29 | x = layer.forward(x) 30 | return x 31 | 32 | def forward(self, x, t): 33 | score = self.predict(x) 34 | loss = self.loss_layer.forward(score, t) 35 | return loss 36 | 37 | def backward(self, dout=1): 38 | dout = self.loss_layer.backward(dout) 39 | for layer in reversed(self.layers): 40 | dout = layer.backward(dout) 41 | return dout 42 | 43 | -------------------------------------------------------------------------------- /python/ch3/cbow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.layers import MatMul 5 | 6 | # context data 7 | c0 = np.array([[1, 0, 0, 0, 0, 0, 0]]) 8 | c1 = np.array([[0, 0, 1, 0, 0, 0, 0]]) 9 | 10 | # initialize weights 11 | W_in = np.random.randn(7, 3) 12 | W_out = np.random.randn(3, 7) 13 | 14 | # generate layers 15 | in_layer0 = MatMul(W_in) 16 | in_layer1 = MatMul(W_in) 17 | out_layer = MatMul(W_out) 18 | 19 | # propagation 20 | h0 = in_layer0.forward(c0) 21 | h1 = in_layer1.forward(c1) 22 | h = 0.5 * (h0 + h1) 23 | s = out_layer.forward(h) 24 | 25 | print(s) 26 | -------------------------------------------------------------------------------- /python/ch3/dot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | c = np.array([[1, 0, 0, 0, 0, 0, 0]]) 4 | W = np.random.randn(7, 3) 5 | 6 | print(W) 7 | 8 | h = np.dot(c, W) 9 | 10 | print(h) 11 | -------------------------------------------------------------------------------- /python/ch3/matmul.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | 5 | from common.layers import MatMul 6 | 7 | c = np.array([[1, 0, 0, 0, 0, 0, 0]]) 8 | W = np.random.randn(7, 3) 9 | layer = MatMul(W) 10 | h = layer.forward(c) 11 | 12 | print(h) 13 | -------------------------------------------------------------------------------- /python/ch3/simple_cbow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.layers import MatMul, SoftmaxWithLoss 5 | 6 | class SimpleCBOW: 7 | def __init__(self, vocab_size, hidden_size): 8 | V, H = vocab_size, hidden_size 9 | 10 | W_in = 0.01 * np.random.randn(V, H).astype('f') 11 | W_out = 0.01 * np.random.randn(H, V).astype('f') 12 | 13 | self.in_layer0 = MatMul(W_in) 14 | self.in_layer1 = MatMul(W_in) 15 | self.out_layer = MatMul(W_out) 16 | self.loss_layer = SoftmaxWithLoss() 17 | 18 | layers = [self.in_layer0, self.in_layer1, self.out_layer] 19 | self.params, self.grads = [], [] 20 | for layer in layers: 21 | self.params += layer.params 22 | self.grads += layer.grads 23 | 24 | self.word_vecs = W_in 25 | 26 | def forward(self, contexts, target): 27 | h0 = self.in_layer0.forward(contexts[:, 0]) 28 | h1 = self.in_layer1.forward(contexts[:, 1]) 29 | h = (h0 + h1) * 0.5 30 | score = self.out_layer.forward(h) 31 | loss = self.loss_layer.forward(score, target) 32 | return loss 33 | 34 | def backward(self, dout=1): 35 | ds = self.loss_layer.backward(dout) 36 | da = self.out_layer.backward(ds) 37 | da *= 0.5 38 | self.in_layer0.backward(da) 39 | self.in_layer1.backward(da) 40 | return None 41 | -------------------------------------------------------------------------------- /python/ch3/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | from common.trainer import Trainer 4 | from common.optimizer import Adam 5 | from simple_cbow import SimpleCBOW 6 | from common.util import preprocess, create_contexts_target, convert_one_hot 7 | 8 | window_size = 1 9 | hidden_size = 5 10 | batch_size = 3 11 | max_epoch = 1000 12 | 13 | text = 'You say goodbye and I say hello.' 14 | corpus, word_to_id, id_to_word = preprocess(text) 15 | 16 | vocab_size = len(word_to_id) 17 | contexts, target = create_contexts_target(corpus, window_size) 18 | target = convert_one_hot(target, vocab_size) 19 | contexts = convert_one_hot(contexts, vocab_size) 20 | 21 | model = SimpleCBOW(vocab_size, hidden_size) 22 | optimizer = Adam() 23 | trainer = Trainer(model, optimizer) 24 | 25 | trainer.fit(contexts, target, max_epoch, batch_size) 26 | trainer.plot() 27 | -------------------------------------------------------------------------------- /python/ch4/cbow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.layers import MatMul, SoftmaxWithLoss, Embedding 5 | from ch4.negative_sampling_layer import NegativeSamplingLoss 6 | 7 | class CBOW: 8 | def __init__(self, vocab_size, hidden_size, window_size, corpus): 9 | V, H = vocab_size, hidden_size 10 | 11 | W_in = 0.01 * np.random.randn(V, H).astype('f') 12 | W_out = 0.01 * np.random.randn(V, H).astype('f') 13 | 14 | self.in_layers = [] 15 | for i in range(2 * window_size): 16 | layer = Embedding(W_in) 17 | self.in_layers.append(layer) 18 | print('W_out CBOW') 19 | self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) 20 | 21 | 22 | layers = self.in_layers + [self.ns_loss] 23 | self.params, self.grads = [], [] 24 | for layer in layers: 25 | self.params += layer.params 26 | self.grads += layer.grads 27 | 28 | self.word_vecs = W_in 29 | 30 | def forward(self, contexts, target): 31 | h = 0 # これは後にnumpyのブロードキャストでベクトルになる. 32 | for i, layer in enumerate(self.in_layers): 33 | h += layer.forward(contexts[:, i]) 34 | h *= 1 / len(self.in_layers) 35 | loss = self.ns_loss.forward(h, target) 36 | return loss 37 | 38 | def backward(self, dout=1): 39 | dout = self.ns_loss.backward(dout) 40 | dout *= 1 / len(self.in_layers) 41 | for layer in self.in_layers: 42 | layer.backward(dout) 43 | return None 44 | 45 | if __name__ == '__main__': 46 | cbow = SimpleCBOW(5, 10) 47 | -------------------------------------------------------------------------------- /python/ch4/cbow_params.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/cbow_params.pkl -------------------------------------------------------------------------------- /python/ch4/negative_sampling_layer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.layers import MatMul, SoftmaxWithLoss,SigmoidWithLoss, Embedding 5 | import collections 6 | 7 | class EmbeddingDot: 8 | def __init__(self, W): 9 | self.embed = Embedding(W) 10 | self.params = self.embed.params 11 | self.grads = self.embed.grads 12 | self.cache = None 13 | 14 | def forward(self, h, idx): 15 | target_W = self.embed.forward(idx) 16 | out = np.sum(target_W * h, axis=1) 17 | 18 | self.cache = (h, target_W) 19 | return out 20 | 21 | def backward(self, dout): 22 | h, target_W = self.cache 23 | dout = dout.reshape(dout.shape[0], 1) 24 | 25 | dtarget_W = dout * h 26 | self.embed.backward(dtarget_W) 27 | dh = dout * target_W 28 | return dh 29 | 30 | class UnigramSampler: 31 | def __init__(self, corpus, power, sample_size): 32 | self.sample_size = sample_size 33 | self.vocab_size = None 34 | self.word_p = None 35 | 36 | counts = collections.Counter() 37 | for word_id in corpus: 38 | counts[word_id] += 1 39 | 40 | vocab_size = len(counts) 41 | self.vocab_size = vocab_size 42 | 43 | self.word_p = np.zeros(vocab_size) 44 | for i in range(vocab_size): 45 | self.word_p[i] = counts[i] 46 | 47 | self.word_p = np.power(self.word_p, power) 48 | self.word_p /= np.sum(self.word_p) 49 | 50 | def get_negative_sample(self, target): 51 | batch_size = target.shape[0] 52 | 53 | # if not GPU: 54 | negative_sample = np.zeros((batch_size, self.sample_size), dtype=np.int32) 55 | 56 | for i in range(batch_size): 57 | p = self.word_p.copy() 58 | target_idx = target[i] 59 | p[target_idx] = 0 60 | p /= p.sum() 61 | negative_sample[i, :] = np.random.choice(self.vocab_size, size=self.sample_size, replace=False, p=p) 62 | # else: 63 | # # GPU(cupy)で計算するときは、速度を優先 64 | # # 負例にターゲットが含まれるケースがある 65 | # negative_sample = np.random.choice(self.vocab_size, size=(batch_size, self.sample_size), 66 | # replace=True, p=self.word_p) 67 | 68 | return negative_sample 69 | 70 | class NegativeSamplingLoss: 71 | def __init__(self, W, corpus, power=0.75, sample_size=5): 72 | self.sample_size = sample_size 73 | self.sampler = UnigramSampler(corpus, power, sample_size) 74 | self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)] 75 | self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)] 76 | print('W in NSLoss', [W.shape for _ in range(sample_size + 1)]) 77 | 78 | self.params, self.grads = [], [] 79 | for layer in self.embed_dot_layers: 80 | self.params += layer.params 81 | self.grads += layer.grads 82 | 83 | def forward(self, h, target): 84 | batch_size = target.shape[0] 85 | negative_sample = self.sampler.get_negative_sample(target) 86 | 87 | # 正例 88 | score = self.embed_dot_layers[0].forward(h, target) 89 | correct_label = np.ones(batch_size, dtype=np.int32) 90 | loss = self.loss_layers[0].forward(score, correct_label) 91 | 92 | # 負例 93 | negative_label = np.zeros(batch_size, dtype=np.int32) 94 | for i in range(self.sample_size): 95 | negative_target = negative_sample[:, i] 96 | score = self.embed_dot_layers[1 + i].forward(h, negative_target) 97 | loss += self.loss_layers[1 + i].forward(score, negative_label) 98 | 99 | return loss 100 | 101 | def backward(self, dout=1): 102 | dh = 0 103 | for l0, l1 in zip(self.loss_layers, self.embed_dot_layers): 104 | dscore = l0.backward(dout) 105 | dh += l1.backward(dscore) 106 | return dh 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /python/ch4/ptb.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | sys.path.append('..') 5 | try: 6 | import urllib.request 7 | except ImportError: 8 | raise ImportError('Use Python3!') 9 | import pickle 10 | import numpy as np 11 | 12 | 13 | url_base = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/' 14 | key_file = { 15 | 'train':'ptb.train.txt', 16 | 'test':'ptb.test.txt', 17 | 'valid':'ptb.valid.txt' 18 | } 19 | save_file = { 20 | 'train':'ptb.train.npy', 21 | 'test':'ptb.test.npy', 22 | 'valid':'ptb.valid.npy' 23 | } 24 | vocab_file = 'ptb.vocab.pkl' 25 | 26 | dataset_dir = os.path.dirname(os.path.abspath(__file__)) 27 | 28 | 29 | def _download(file_name): 30 | file_path = dataset_dir + '/' + file_name 31 | if os.path.exists(file_path): 32 | return 33 | 34 | print('Downloading ' + file_name + ' ... ') 35 | 36 | try: 37 | urllib.request.urlretrieve(url_base + file_name, file_path) 38 | except urllib.error.URLError: 39 | import ssl 40 | ssl._create_default_https_context = ssl._create_unverified_context 41 | urllib.request.urlretrieve(url_base + file_name, file_path) 42 | 43 | print('Done') 44 | 45 | 46 | def load_vocab(): 47 | vocab_path = dataset_dir + '/' + vocab_file 48 | 49 | if os.path.exists(vocab_path): 50 | with open(vocab_path, 'rb') as f: 51 | word_to_id, id_to_word = pickle.load(f) 52 | return word_to_id, id_to_word 53 | 54 | word_to_id = {} 55 | id_to_word = {} 56 | data_type = 'train' 57 | file_name = key_file[data_type] 58 | file_path = dataset_dir + '/' + file_name 59 | 60 | _download(file_name) 61 | 62 | words = open(file_path).read().replace('\n', '').strip().split() 63 | 64 | for i, word in enumerate(words): 65 | if word not in word_to_id: 66 | tmp_id = len(word_to_id) 67 | word_to_id[word] = tmp_id 68 | id_to_word[tmp_id] = word 69 | 70 | with open(vocab_path, 'wb') as f: 71 | pickle.dump((word_to_id, id_to_word), f) 72 | 73 | return word_to_id, id_to_word 74 | 75 | 76 | def load_data(data_type='train'): 77 | ''' 78 | :param data_type: データの種類:'train' or 'test' or 'valid (val)' 79 | :return: 80 | ''' 81 | if data_type == 'val': data_type = 'valid' 82 | save_path = dataset_dir + '/' + save_file[data_type] 83 | 84 | word_to_id, id_to_word = load_vocab() 85 | 86 | if os.path.exists(save_path): 87 | corpus = np.load(save_path) 88 | return corpus, word_to_id, id_to_word 89 | 90 | file_name = key_file[data_type] 91 | file_path = dataset_dir + '/' + file_name 92 | _download(file_name) 93 | 94 | words = open(file_path).read().replace('\n', '').strip().split() 95 | corpus = np.array([word_to_id[w] for w in words]) 96 | 97 | np.save(save_path, corpus) 98 | return corpus, word_to_id, id_to_word 99 | 100 | 101 | if __name__ == '__main__': 102 | for data_type in ('train', 'val', 'test'): 103 | load_data(data_type) -------------------------------------------------------------------------------- /python/ch4/ptb.train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/ptb.train.npy -------------------------------------------------------------------------------- /python/ch4/ptb.vocab.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/ptb.vocab.pkl -------------------------------------------------------------------------------- /python/ch4/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common import config 5 | 6 | import pickle 7 | from common.trainer import Trainer 8 | from common.optimizer import Adam 9 | from cbow import CBOW 10 | from common.util import create_contexts_target 11 | import ptb 12 | 13 | # hyper paramerter 14 | window_size = 5 15 | hidden_size = 100 16 | batch_size = 100 17 | max_epoch = 10 18 | 19 | corpus, word_to_id, id_to_word = ptb.load_data('train') 20 | vocab_size = len(word_to_id) 21 | print(len(word_to_id)) 22 | 23 | contexts, target = create_contexts_target(corpus, window_size) 24 | # if config.GPU: 25 | # contexts, target = to_gpu(contexts), to_gpu(target) 26 | 27 | model = CBOW(vocab_size, hidden_size, window_size, corpus) 28 | optimizer = Adam() 29 | trainer = Trainer(model, optimizer) 30 | 31 | trainer.fit(contexts, target, max_epoch, batch_size) 32 | trainer.plot() 33 | 34 | word_vecs = model.word_vecs 35 | 36 | params = {} 37 | params['word_vecs'] = word_vecs.astype(np.float16) 38 | params['word_to_id'] = word_to_id 39 | params['id_to_word'] = id_to_word 40 | pkl_file = 'cbow_params.pkl' 41 | with open(pkl_file, 'wb') as f: 42 | pickle.dump(params, f, -1) -------------------------------------------------------------------------------- /python/ch4/train_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/train_loss.png -------------------------------------------------------------------------------- /python/ch5/simple_rnnlm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.time_layers import * 5 | from numpy.random import randn as rn 6 | 7 | class SimpleRnnlm: 8 | def __init__(self, vocab_size, wordvec_size, hidden_size): 9 | V, D, H = vocab_size, wordvec_size, hidden_size 10 | #rn = np.radom.randn 11 | 12 | # 重みの初期化 13 | embed_W = (rn(V, D) / 100).astype('f') 14 | rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f') 15 | rnn_Wh = (rn(H, H)/ np.sqrt(H)).astype('f') 16 | rnn_b = np.zeros(H).astype('f') 17 | affine_W = (rn(H, V)/ np.sqrt(H)).astype('f') 18 | affine_b = np.zeros(V).astype('f') 19 | 20 | # レイヤの生成 21 | self.layers = [ 22 | TimeEmbedding(embed_W), 23 | TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True), 24 | TimeAffine(affine_W, affine_b) 25 | ] 26 | self.loss_layer = TimeSoftmaxWithLoss() 27 | self.rnn_layer = self.layers[1] 28 | 29 | self.params, self.grads = [], [] 30 | for layer in self.layers: 31 | self.params += layer.params 32 | self.grads += layer.grads 33 | 34 | def forward(self, xs, ts): 35 | for layer in self.layers: 36 | xs = layer.forward(xs) 37 | loss = self.loss_layer.forward(xs, ts) 38 | return loss 39 | 40 | def backward(self, dout=1): 41 | dout = self.loss_layer.backward(dout) 42 | for layer in reversed(self.layers): 43 | dout = layer.backward(dout) 44 | return dout 45 | 46 | def reset_state(self): 47 | self.rnn_layer.reset_state() 48 | -------------------------------------------------------------------------------- /python/ch5/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from common.optimizer import SGD 6 | from dataset import ptb 7 | from simple_rnnlm import SimpleRnnlm 8 | from common.trainer import RnnlmTrainer 9 | 10 | batch_size = 10 11 | wordvec_size = 100 12 | hidden_size = 100 13 | time_size = 5 14 | lr = 0.1 15 | max_epoch = 100 16 | 17 | corpus, word_to_id, id_to_word = ptb.load_data('train') 18 | corpus_size = 1000 19 | corpus = corpus[:corpus_size] 20 | vocab_size = int(max(corpus) + 1) 21 | 22 | xs = corpus[:-1] 23 | ts = corpus[1:] 24 | data_size = len(xs) 25 | print('corpus size: %d, vocabulary size: %d' %(corpus_size, vocab_size)) 26 | 27 | max_iters = data_size // (batch_size * time_size) 28 | time_idx = 0 29 | total_loss = 0 30 | loss_count = 0 31 | ppl_list = [] 32 | 33 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) 34 | optimizer = SGD(lr) 35 | 36 | trainer = RnnlmTrainer(model, optimizer) 37 | trainer.fit(xs, ts, max_epoch, batch_size, time_size) 38 | trainer.plot() 39 | -------------------------------------------------------------------------------- /python/ch5/train_custom_loop.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from common.optimizer import SGD 6 | from dataset import ptb 7 | from simple_rnnlm import SimpleRnnlm 8 | 9 | batch_size = 10 10 | wordvec_size = 100 11 | hidden_size = 100 12 | time_size = 5 13 | lr = 0.1 14 | max_epoch = 100 15 | 16 | corpus, word_to_id, id_to_word = ptb.load_data('train') 17 | corpus_size = 1000 18 | corpus = corpus[:corpus_size] 19 | vocab_size = int(max(corpus) + 1) 20 | 21 | xs = corpus[:-1] 22 | ts = corpus[1:] 23 | data_size = len(xs) 24 | print('corpus size: %d, vocabulary size: %d' %(corpus_size, vocab_size)) 25 | 26 | max_iters = data_size // (batch_size * time_size) 27 | time_idx = 0 28 | total_loss = 0 29 | loss_count = 0 30 | ppl_list = [] 31 | 32 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) 33 | optimizer = SGD(lr) 34 | 35 | jump = (corpus_size - 1) // batch_size 36 | offsets = [i * jump for i in range(batch_size)] 37 | 38 | for epoch in range(max_epoch): 39 | for iter_ in range(max_iters): 40 | batch_x = np.empty((batch_size, time_size), dtype='i') 41 | batch_t = np.empty((batch_size, time_size), dtype='i') 42 | 43 | # ミニバッチの取得 44 | for t in range(time_size): 45 | for i, offset in enumerate(offsets): 46 | batch_x[i, t] = xs[(offset + time_idx) % data_size] 47 | batch_t[i, t] = ts[(offset + time_idx) % data_size] 48 | time_idx += 1 49 | # iter=0 50 | # [ ][ ][ ][ ][ ] 51 | # [ ][ ][ ][ ][ |] 52 | # [ ][ ][ ][ ][ | ] 53 | # [ ][ ][ ][ ][ | ] 54 | # [ ][ ][ ][ ][ | ] 55 | # iter=1 56 | # [ ][ ][ ][ ][ | ] 57 | # [ ][ ][ ][ ][ | ] 58 | # [ ][ ][ ][ ][ | ] 59 | # [ ][ ][ ][ ][| ] 60 | # [ ][ ][ ][ ]| ] 61 | 62 | # 勾配を求め、パラメータを更新 63 | loss = model.forward(batch_x, batch_t) 64 | model.backward() 65 | optimizer.update(model.params, model.grads) 66 | total_loss += loss 67 | loss_count += 1 68 | 69 | # エポックごとにパープレキシティの評価 70 | ppl = np.exp(total_loss / loss_count) 71 | print('%d,%.2f' % (epoch + 1, ppl)) 72 | ppl_list.append(float(ppl)) 73 | total_loss, loss_count = 0, 0 74 | 75 | 76 | plt.plot(ppl_list) 77 | plt.show() 78 | -------------------------------------------------------------------------------- /python/ch6/Figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch6/Figure_1.png -------------------------------------------------------------------------------- /python/ch6/Rnnlm.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch6/Rnnlm.pkl -------------------------------------------------------------------------------- /python/ch6/clip_grads.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | dW1 = np.random.rand(3, 3) * 10 4 | dW2 = np.random.rand(3, 3) * 10 5 | grads = [dW1, dW2] 6 | max_norm = 5.0 7 | 8 | print(grads) 9 | 10 | def clip_grads(grads, max_norm): 11 | total_norm = 0 12 | for grad in grads: 13 | total_norm += np.sum(grad ** 2) 14 | total_norm = np.sqrt(total_norm) 15 | 16 | rate = max_norm / (total_norm + 1e-6) 17 | if rate < 1: 18 | for grad in grads: 19 | grad *= rate 20 | 21 | clip_grads(grads, max_norm) 22 | print(grads) 23 | -------------------------------------------------------------------------------- /python/ch6/lstm_backward_graph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch6/lstm_backward_graph.jpg -------------------------------------------------------------------------------- /python/ch6/rnn_gradient_graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | N = 2 # ミニバッチサイズ 5 | H = 3 # 隠れ状態ベクトルの次元数 6 | T = 20 # 時系列データの長さ 7 | 8 | dh = np.ones((N, H)) 9 | np.random.seed(3) # 再現性のため乱数のシードを固定 10 | Wh = np.random.randn(H, H) 11 | #Wh = np.random.randn(H, H) * 0.5 12 | 13 | 14 | max_norm = 5.0 15 | 16 | def clip_grads(grads, max_norm): 17 | total_norm = 0 18 | for grad in grads: 19 | total_norm += np.sum(grad ** 2) 20 | total_norm = np.sqrt(total_norm) 21 | 22 | rate = max_norm / (total_norm + 1e-6) 23 | if rate < 1: 24 | for grad in grads: 25 | grad *= rate 26 | 27 | 28 | norm_list = [] 29 | for t in range(T): 30 | dh = np.dot(dh, Wh.T) 31 | # clip_grads(dh, max_norm) 32 | norm = np.sqrt(np.sum(dh**2)) / N 33 | norm_list.append(norm) 34 | 35 | plt.plot(norm_list) 36 | plt.show() 37 | -------------------------------------------------------------------------------- /python/ch6/rnn_gradient_graph_clip.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | N = 2 # ミニバッチサイズ 5 | H = 3 # 隠れ状態ベクトルの次元数 6 | T = 20 # 時系列データの長さ 7 | 8 | dh = np.ones((N, H)) 9 | np.random.seed(3) # 再現性のため乱数のシードを固定 10 | Wh = np.random.randn(H, H) 11 | #Wh = np.random.randn(H, H) * 0.5 12 | 13 | 14 | max_norm = 5.0 15 | 16 | def clip_grads(grads, max_norm): 17 | total_norm = 0 18 | for grad in grads: 19 | total_norm += np.sum(grad ** 2) 20 | total_norm = np.sqrt(total_norm) 21 | 22 | rate = max_norm / (total_norm + 1e-6) 23 | if rate < 1: 24 | for grad in grads: 25 | grad *= rate 26 | 27 | 28 | norm_list = [] 29 | for t in range(T): 30 | dh = np.dot(dh, Wh.T) 31 | clip_grads(dh, max_norm) 32 | norm = np.sqrt(np.sum(dh**2)) / N 33 | norm_list.append(norm) 34 | 35 | plt.plot(norm_list) 36 | plt.show() 37 | -------------------------------------------------------------------------------- /python/ch6/rnnlm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | import numpy as np 4 | from common.time_layers import * 5 | import pickle 6 | 7 | 8 | class Rnnlm: 9 | def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100): 10 | V, D, H = vocab_size, wordvec_size, hidden_size 11 | rn = np.random.randn 12 | 13 | # 重みの初期化 14 | embed_W = (rn(V, D) / 100).astype('f') 15 | lstm_Wx = (rn(D, 4*H) / np.sqrt(D)).astype('f') # Xavierの初期値 16 | lstm_Wh = (rn(H, 4*H) / np.sqrt(H)).astype('f') 17 | lstm_b = np.zeros(4*H).astype('f') 18 | affine_W = (rn(H,V) / np.sqrt(H)).astype('f') 19 | affine_b = np.zeros(V).astype('f') 20 | 21 | # レイヤの生成 22 | self.layers = [ 23 | TimeEmbedding(embed_W), 24 | TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True), 25 | TimeAffine(affine_W, affine_b) 26 | ] 27 | self.loss_layer = TimeSoftmaxWithLoss() 28 | self.lstm_layer = self.layers[1] 29 | 30 | # すべての重みと勾配をリストにまとめる 31 | self.params, self.grads = [],[] 32 | for layer in self.layers: 33 | self.params += layer.params 34 | self.grads += layer.grads 35 | 36 | def predict(self, xs): 37 | for layer in self.layers: 38 | xs = layer.forward(xs) 39 | return xs 40 | 41 | def forward(self, xs, ts): 42 | score = self.predict(xs) 43 | loss = self.loss_layer.forward(score, ts) 44 | return loss 45 | 46 | def backward(self, dout=1): 47 | dout = self.loss_layer.backward(dout) 48 | for layer in reversed(self.layers): 49 | dout = layer.backward(dout) 50 | return dout 51 | 52 | def reset_state(self): 53 | self.lstm_layer.reset_state() 54 | 55 | def save_params(self, file_name='Rnnlm.pkl'): 56 | with open(file_name, 'wb') as f: 57 | pickle.dump(self.params, f) 58 | 59 | def load_params(self, file_name='Rnnlm.pkl'): 60 | with open(file_name, 'rb') as f: 61 | prms = pickle.load(f) 62 | 63 | # 教科書には無いが、これがないと読み込めていなかった 64 | for i, param in enumerate(self.params): 65 | param[...] = prms[i] 66 | -------------------------------------------------------------------------------- /python/ch6/train_rnnlm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | from common.optimizer import SGD 4 | from common.trainer import RnnlmTrainer 5 | from common.util import eval_perplexity 6 | from dataset import ptb 7 | from rnnlm import Rnnlm 8 | 9 | # ハイパーパラメータの設定 10 | batch_size = 20 11 | wordvec_size = 100 12 | hidden_size = 100 # RNNの隠れ状態ベクトルの要素数 13 | time_size = 35 # RNNを展開するサイズ 14 | lr = 20.0 15 | max_epoch = 4 16 | max_grad = 0.25 17 | 18 | # 学習データの読み込み 19 | corpus, word_to_id, id_to_word = ptb.load_data('train') 20 | corpus_test, _, _ = ptb.load_data('test') 21 | vocab_size = len(word_to_id) 22 | xs = corpus[:-1] 23 | ts = corpus[1:] 24 | 25 | # モデルの生成 26 | model = Rnnlm(vocab_size, wordvec_size, hidden_size) 27 | optimizer = SGD(lr) 28 | trainer = RnnlmTrainer(model, optimizer) 29 | 30 | # パラメータの読み込み 31 | #model.load_params() 32 | 33 | # 勾配クリッピングを適用して学習 34 | trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad, eval_interval=20) 35 | trainer.plot(ylim=(0, 500)) 36 | 37 | # テストデータで評価 38 | model.reset_state() 39 | ppl_test = eval_perplexity(model, corpus_test) 40 | print('test perplexity: ', ppl_test) 41 | 42 | # パラメータの保存 43 | model.save_params() 44 | 45 | 46 | # | epoch 4 | iter 1321 / 1327 | time 2590[s] | perplexity 110.01 47 | # evaluating perplexity ... 48 | # 234 / 235 49 | # test perplexity: 136.49846872347803 50 | -------------------------------------------------------------------------------- /python/common/config.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | GPU = False 4 | -------------------------------------------------------------------------------- /python/common/functions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from common.np import * 3 | 4 | 5 | def sigmoid(x): 6 | return 1 / (1 + np.exp(-x)) 7 | 8 | 9 | def relu(x): 10 | return np.maximum(0, x) 11 | 12 | 13 | def softmax(x): 14 | if x.ndim == 2: 15 | x = x - x.max(axis=1, keepdims=True) 16 | x = np.exp(x) 17 | x /= x.sum(axis=1, keepdims=True) 18 | elif x.ndim == 1: 19 | x = x - np.max(x) 20 | x = np.exp(x) / np.sum(np.exp(x)) 21 | 22 | return x 23 | 24 | 25 | def cross_entropy_error(y, t): 26 | if y.ndim == 1: 27 | t = t.reshape(1, t.size) 28 | y = y.reshape(1, y.size) 29 | 30 | # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 31 | if t.size == y.size: 32 | t = t.argmax(axis=1) 33 | 34 | batch_size = y.shape[0] 35 | 36 | return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size 37 | -------------------------------------------------------------------------------- /python/common/layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from common.functions import softmax, cross_entropy_error 3 | 4 | class MatMul: 5 | def __init__(self, W): 6 | self.params = [W] 7 | self.grads = [np.zeros_like(W)] 8 | self.x = None 9 | 10 | def forward(self, x): 11 | W, = self.params 12 | out = np.dot(x, W) 13 | self.x = x 14 | return out 15 | 16 | def backward(self, dout): 17 | W, = self.params 18 | dx = np.dot(dout, W.T) 19 | dW = np.dot(self.x.T, dout) 20 | self.grads[0][...] = dW 21 | return dx 22 | 23 | # 他の章でも使うようなので ch1/forward_net.py からコピーしてbackwardを実装 24 | class Sigmoid: 25 | def __init__(self): 26 | self.params = [] 27 | self.grads = [] 28 | 29 | def forward(self, x): 30 | self.out = 1 / (1 + np.exp(-x)) 31 | return self.out 32 | 33 | def backward(self, dout): 34 | dx = dout * (1 - self.out) * self.out 35 | return dx 36 | 37 | 38 | class Affine: 39 | def __init__(self, W, b): 40 | self.mm = MatMul(W) # MatMulを使って実装してみる 41 | self.params = [W, b] 42 | self.grads = [ 43 | self.mm.grads[0], # modelが初期化した直後のgradsを参照するため、MatMulのgradsを参照するようにする 44 | np.zeros_like(b), 45 | ] 46 | 47 | def forward(self, x): 48 | _, b = self.params 49 | out = self.mm.forward(x) + b 50 | return out 51 | 52 | def backward(self, dout): 53 | dx = self.mm.backward(dout) 54 | db = np.sum(dout, axis = 0) 55 | # self.grads[0] はmm.backwardで更新される 56 | self.grads[1] = db.copy() 57 | return dx 58 | 59 | class TwoLayerNet: 60 | def __init__(self, input_size, hidden_size, output_size): 61 | I, H, O = input_size, hidden_size, output_size 62 | 63 | # 重みとバイアスの初期化 64 | W1 = np.random.randn(I, H) 65 | b1 = np.random.randn(H) 66 | W2 = np.random.randn(H, O) 67 | b2 = np.random.randn(O) 68 | 69 | # レイヤの生成 70 | self.layers = [ 71 | Affine(W1, b1), 72 | Sigmoid(), 73 | Affine(W2, b2) 74 | ] 75 | 76 | # すべての重みをリストにまとめる 77 | self.params = [] 78 | for layer in self.layers: 79 | self.params += layer.params 80 | 81 | def predict(self, x): 82 | for layer in self.layers: 83 | x = layer.forward(x) 84 | return x 85 | 86 | 87 | # FROM https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/layers.py 88 | class Softmax: 89 | def __init__(self): 90 | self.params, self.grads = [], [] 91 | self.out = None 92 | 93 | def forward(self, x): 94 | self.out = softmax(x) 95 | return self.out 96 | 97 | def backward(self, dout): 98 | dx = self.out * dout 99 | sumdx = np.sum(dx, axis=1, keepdims=True) 100 | dx -= self.out * sumdx 101 | return dx 102 | 103 | 104 | class SoftmaxWithLoss: 105 | def __init__(self): 106 | self.params, self.grads = [], [] 107 | self.y = None # softmaxの出力 108 | self.t = None # 教師ラベル 109 | 110 | def forward(self, x, t): 111 | self.t = t 112 | self.y = softmax(x) 113 | 114 | # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 115 | if self.t.size == self.y.size: 116 | self.t = self.t.argmax(axis=1) 117 | 118 | loss = cross_entropy_error(self.y, self.t) 119 | return loss 120 | 121 | def backward(self, dout=1): 122 | batch_size = self.t.shape[0] 123 | 124 | dx = self.y.copy() 125 | dx[np.arange(batch_size), self.t] -= 1 126 | dx *= dout 127 | dx = dx / batch_size 128 | 129 | return dx 130 | 131 | 132 | class Embedding: 133 | def __init__(self, W): 134 | self.params = [W] 135 | self.grads = [np.zeros_like(W)] 136 | self.idx = None 137 | 138 | def forward(self, idx): 139 | W, = self.params 140 | self.idx = idx 141 | out = W[idx] 142 | return out 143 | 144 | def backward(self, dout): 145 | dW, = self.grads 146 | dW[...] = 0 147 | np.add.at(dW, self.idx, dout) 148 | return None 149 | -------------------------------------------------------------------------------- /python/common/np.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from common.config import GPU 3 | 4 | 5 | if GPU: 6 | import cupy as np 7 | np.cuda.set_allocator(np.cuda.MemoryPool().malloc) 8 | np.add.at = np.scatter_add 9 | 10 | print('\033[92m' + '-' * 60 + '\033[0m') 11 | print(' ' * 23 + '\033[92mGPU Mode (cupy)\033[0m') 12 | print('\033[92m' + '-' * 60 + '\033[0m\n') 13 | else: 14 | import numpy as np 15 | -------------------------------------------------------------------------------- /python/common/optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class SGD: 4 | def __init__(self, lr = 0.01): 5 | self.lr = lr # 学習率 6 | 7 | def update(self, params, grads): 8 | for i in range(len(params)): 9 | params[i] -= self.lr * grads[i] 10 | 11 | class AdaGrad: 12 | ''' 13 | AdaGrad 14 | ''' 15 | def __init__(self, lr=0.01): 16 | self.lr = lr 17 | self.h = None 18 | 19 | def update(self, params, grads): 20 | if self.h is None: 21 | self.h = [] 22 | for param in params: 23 | self.h.append(np.zeros_like(param)) 24 | 25 | for i in range(len(params)): 26 | self.h[i] += grads[i] * grads[i] 27 | params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7) 28 | 29 | class Adam: 30 | ''' 31 | Adam (http://arxiv.org/abs/1412.6980v8) 32 | ''' 33 | def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): 34 | self.lr = lr 35 | self.beta1 = beta1 36 | self.beta2 = beta2 37 | self.iter = 0 38 | self.m = None 39 | self.v = None 40 | 41 | def update(self, params, grads): 42 | if self.m is None: 43 | self.m, self.v = [], [] 44 | for param in params: 45 | self.m.append(np.zeros_like(param)) 46 | self.v.append(np.zeros_like(param)) 47 | 48 | self.iter += 1 49 | lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) 50 | 51 | for i in range(len(params)): 52 | self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i]) 53 | self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i]) 54 | 55 | params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7) 56 | -------------------------------------------------------------------------------- /python/common/trainer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | import numpy 5 | import time 6 | import matplotlib.pyplot as plt 7 | from common.np import * # import numpy as np 8 | from common.util import clip_grads 9 | 10 | 11 | class Trainer: 12 | def __init__(self, model, optimizer): 13 | self.model = model 14 | self.optimizer = optimizer 15 | self.loss_list = [] 16 | self.eval_interval = None 17 | self.current_epoch = 0 18 | 19 | def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20): 20 | data_size = len(x) 21 | max_iters = data_size // batch_size 22 | self.eval_interval = eval_interval 23 | model, optimizer = self.model, self.optimizer 24 | total_loss = 0 25 | loss_count = 0 26 | 27 | start_time = time.time() 28 | for epoch in range(max_epoch): 29 | # シャッフル 30 | idx = numpy.random.permutation(numpy.arange(data_size)) 31 | x = x[idx] 32 | t = t[idx] 33 | 34 | for iters in range(max_iters): 35 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 36 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 37 | 38 | # 勾配を求め、パラメータを更新 39 | loss = model.forward(batch_x, batch_t) 40 | model.backward() 41 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 42 | if max_grad is not None: 43 | clip_grads(grads, max_grad) 44 | optimizer.update(params, grads) 45 | total_loss += loss 46 | loss_count += 1 47 | 48 | # 評価 49 | if (eval_interval is not None) and (iters % eval_interval) == 0: 50 | avg_loss = total_loss / loss_count 51 | elapsed_time = time.time() - start_time 52 | print('| epoch %d | iter %d / %d | time %d[s] | loss %.2f' 53 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss)) 54 | self.loss_list.append(float(avg_loss)) 55 | total_loss, loss_count = 0, 0 56 | 57 | self.current_epoch += 1 58 | 59 | def plot(self, ylim=None): 60 | x = numpy.arange(len(self.loss_list)) 61 | if ylim is not None: 62 | plt.ylim(*ylim) 63 | plt.plot(x, self.loss_list, label='train') 64 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 65 | plt.ylabel('loss') 66 | plt.show() 67 | 68 | 69 | class RnnlmTrainer: 70 | def __init__(self, model, optimizer): 71 | self.model = model 72 | self.optimizer = optimizer 73 | self.time_idx = None 74 | self.ppl_list = None 75 | self.eval_interval = None 76 | self.current_epoch = 0 77 | 78 | def get_batch(self, x, t, batch_size, time_size): 79 | batch_x = np.empty((batch_size, time_size), dtype='i') 80 | batch_t = np.empty((batch_size, time_size), dtype='i') 81 | 82 | data_size = len(x) 83 | jump = data_size // batch_size 84 | offsets = [i * jump for i in range(batch_size)] # バッチの各サンプルの読み込み開始位置 85 | 86 | for time in range(time_size): 87 | for i, offset in enumerate(offsets): 88 | batch_x[i, time] = x[(offset + self.time_idx) % data_size] 89 | batch_t[i, time] = t[(offset + self.time_idx) % data_size] 90 | self.time_idx += 1 91 | return batch_x, batch_t 92 | 93 | def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35, 94 | max_grad=None, eval_interval=20): 95 | data_size = len(xs) 96 | max_iters = data_size // (batch_size * time_size) 97 | self.time_idx = 0 98 | self.ppl_list = [] 99 | self.eval_interval = eval_interval 100 | model, optimizer = self.model, self.optimizer 101 | total_loss = 0 102 | loss_count = 0 103 | 104 | start_time = time.time() 105 | for epoch in range(max_epoch): 106 | for iters in range(max_iters): 107 | batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size) 108 | 109 | # 勾配を求め、パラメータを更新 110 | loss = model.forward(batch_x, batch_t) 111 | model.backward() 112 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 113 | if max_grad is not None: 114 | clip_grads(grads, max_grad) 115 | optimizer.update(params, grads) 116 | total_loss += loss 117 | loss_count += 1 118 | 119 | # パープレキシティの評価 120 | if (eval_interval is not None) and (iters % eval_interval) == 0: 121 | ppl = np.exp(total_loss / loss_count) 122 | elapsed_time = time.time() - start_time 123 | print('| epoch %d | iter %d / %d | time %d[s] | perplexity %.2f' 124 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl)) 125 | self.ppl_list.append(float(ppl)) 126 | total_loss, loss_count = 0, 0 127 | 128 | self.current_epoch += 1 129 | 130 | def plot(self, ylim=None): 131 | x = numpy.arange(len(self.ppl_list)) 132 | if ylim is not None: 133 | plt.ylim(*ylim) 134 | plt.plot(x, self.ppl_list, label='train') 135 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 136 | plt.ylabel('perplexity') 137 | plt.show() 138 | 139 | 140 | def remove_duplicate(params, grads): 141 | ''' 142 | パラメータ配列中の重複する重みをひとつに集約し、 143 | その重みに対応する勾配を加算する 144 | ''' 145 | params, grads = params[:], grads[:] # copy list 146 | 147 | while True: 148 | find_flg = False 149 | L = len(params) 150 | 151 | for i in range(0, L - 1): 152 | for j in range(i + 1, L): 153 | # 重みを共有する場合 154 | if params[i] is params[j]: 155 | grads[i] += grads[j] # 勾配の加算 156 | find_flg = True 157 | params.pop(j) 158 | grads.pop(j) 159 | # 転置行列として重みを共有する場合(weight tying) 160 | elif params[i].ndim == 2 and params[j].ndim == 2 and \ 161 | params[i].T.shape == params[j].shape and np.all(params[i].T == params[j]): 162 | grads[i] += grads[j].T 163 | find_flg = True 164 | params.pop(j) 165 | grads.pop(j) 166 | 167 | if find_flg: break 168 | if find_flg: break 169 | 170 | if not find_flg: break 171 | 172 | return params, grads 173 | -------------------------------------------------------------------------------- /python/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/dataset/__init__.py -------------------------------------------------------------------------------- /python/dataset/ptb.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | sys.path.append('..') 5 | try: 6 | import urllib.request 7 | except ImportError: 8 | raise ImportError('Use Python3!') 9 | import pickle 10 | import numpy as np 11 | 12 | 13 | url_base = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/' 14 | key_file = { 15 | 'train':'ptb.train.txt', 16 | 'test':'ptb.test.txt', 17 | 'valid':'ptb.valid.txt' 18 | } 19 | save_file = { 20 | 'train':'ptb.train.npy', 21 | 'test':'ptb.test.npy', 22 | 'valid':'ptb.valid.npy' 23 | } 24 | vocab_file = 'ptb.vocab.pkl' 25 | 26 | dataset_dir = os.path.dirname(os.path.abspath(__file__)) 27 | 28 | 29 | def _download(file_name): 30 | file_path = dataset_dir + '/' + file_name 31 | if os.path.exists(file_path): 32 | return 33 | 34 | print('Downloading ' + file_name + ' ... ') 35 | 36 | try: 37 | urllib.request.urlretrieve(url_base + file_name, file_path) 38 | except urllib.error.URLError: 39 | import ssl 40 | ssl._create_default_https_context = ssl._create_unverified_context 41 | urllib.request.urlretrieve(url_base + file_name, file_path) 42 | 43 | print('Done') 44 | 45 | 46 | def load_vocab(): 47 | vocab_path = dataset_dir + '/' + vocab_file 48 | 49 | if os.path.exists(vocab_path): 50 | with open(vocab_path, 'rb') as f: 51 | word_to_id, id_to_word = pickle.load(f) 52 | return word_to_id, id_to_word 53 | 54 | word_to_id = {} 55 | id_to_word = {} 56 | data_type = 'train' 57 | file_name = key_file[data_type] 58 | file_path = dataset_dir + '/' + file_name 59 | 60 | _download(file_name) 61 | 62 | words = open(file_path).read().replace('\n', '').strip().split() 63 | 64 | for i, word in enumerate(words): 65 | if word not in word_to_id: 66 | tmp_id = len(word_to_id) 67 | word_to_id[word] = tmp_id 68 | id_to_word[tmp_id] = word 69 | 70 | with open(vocab_path, 'wb') as f: 71 | pickle.dump((word_to_id, id_to_word), f) 72 | 73 | return word_to_id, id_to_word 74 | 75 | 76 | def load_data(data_type='train'): 77 | ''' 78 | :param data_type: データの種類:'train' or 'test' or 'valid (val)' 79 | :return: 80 | ''' 81 | if data_type == 'val': data_type = 'valid' 82 | save_path = dataset_dir + '/' + save_file[data_type] 83 | 84 | word_to_id, id_to_word = load_vocab() 85 | 86 | if os.path.exists(save_path): 87 | corpus = np.load(save_path) 88 | return corpus, word_to_id, id_to_word 89 | 90 | file_name = key_file[data_type] 91 | file_path = dataset_dir + '/' + file_name 92 | _download(file_name) 93 | 94 | words = open(file_path).read().replace('\n', '').strip().split() 95 | corpus = np.array([word_to_id[w] for w in words]) 96 | 97 | np.save(save_path, corpus) 98 | return corpus, word_to_id, id_to_word 99 | 100 | 101 | if __name__ == '__main__': 102 | for data_type in ('train', 'val', 'test'): 103 | load_data(data_type) 104 | -------------------------------------------------------------------------------- /python/dataset/sequence.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | import os 5 | import numpy 6 | 7 | 8 | id_to_char = {} 9 | char_to_id = {} 10 | 11 | 12 | def _update_vocab(txt): 13 | chars = list(txt) 14 | 15 | for i, char in enumerate(chars): 16 | if char not in char_to_id: 17 | tmp_id = len(char_to_id) 18 | char_to_id[char] = tmp_id 19 | id_to_char[tmp_id] = char 20 | 21 | 22 | def load_data(file_name='addition.txt', seed=1984): 23 | file_path = os.path.dirname(os.path.abspath(__file__)) + '/' + file_name 24 | 25 | if not os.path.exists(file_path): 26 | print('No file: %s' % file_name) 27 | return None 28 | 29 | questions, answers = [], [] 30 | 31 | for line in open(file_path, 'r'): 32 | idx = line.find('_') 33 | questions.append(line[:idx]) 34 | answers.append(line[idx:-1]) 35 | 36 | # create vocab dict 37 | for i in range(len(questions)): 38 | q, a = questions[i], answers[i] 39 | _update_vocab(q) 40 | _update_vocab(a) 41 | 42 | # create numpy array 43 | x = numpy.zeros((len(questions), len(questions[0])), dtype=numpy.int) 44 | t = numpy.zeros((len(questions), len(answers[0])), dtype=numpy.int) 45 | 46 | for i, sentence in enumerate(questions): 47 | x[i] = [char_to_id[c] for c in list(sentence)] 48 | for i, sentence in enumerate(answers): 49 | t[i] = [char_to_id[c] for c in list(sentence)] 50 | 51 | # shuffle 52 | indices = numpy.arange(len(x)) 53 | if seed is not None: 54 | numpy.random.seed(seed) 55 | numpy.random.shuffle(indices) 56 | x = x[indices] 57 | t = t[indices] 58 | 59 | # 10% for validation set 60 | split_at = len(x) - len(x) // 10 61 | (x_train, x_test) = x[:split_at], x[split_at:] 62 | (t_train, t_test) = t[:split_at], t[split_at:] 63 | 64 | return (x_train, t_train), (x_test, t_test) 65 | 66 | 67 | def get_vocab(): 68 | return char_to_id, id_to_char 69 | -------------------------------------------------------------------------------- /python/dataset/spiral.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import numpy as np 3 | 4 | 5 | def load_data(seed=1984): 6 | np.random.seed(seed) 7 | N = 100 # クラスごとのサンプル数 8 | DIM = 2 # データの要素数 9 | CLS_NUM = 3 # クラス数 10 | 11 | x = np.zeros((N*CLS_NUM, DIM)) 12 | t = np.zeros((N*CLS_NUM, CLS_NUM), dtype=np.int) 13 | 14 | for j in range(CLS_NUM): 15 | for i in range(N):#N*j, N*(j+1)): 16 | rate = i / N 17 | radius = 1.0*rate 18 | theta = j*4.0 + 4.0*rate + np.random.randn()*0.2 19 | 20 | ix = N*j + i 21 | x[ix] = np.array([radius*np.sin(theta), 22 | radius*np.cos(theta)]).flatten() 23 | t[ix, j] = 1 24 | 25 | return x, t 26 | -------------------------------------------------------------------------------- /python/memo.txt: -------------------------------------------------------------------------------- 1 | 2 | ## このディレクトリについて 3 | 4 | * 「ゼロから作るDeep Learning 2 自然言語編 読書会」のサンプルをPythonで実装してみるチームの作業ディレクトリです。 5 | 6 | ## メモ 7 | 8 | * 環境はUbuntu 16.04.3 LTSを使用 9 | * PythonはPython 3.5.2 10 | * 自前実装しつつ、一部は教科書のリポジトリから持ってきている 11 | * 教科書から持ってきたものは upstream/ ディレクトリ以下に置いてある 12 | 13 | ## ToDo 14 | - [X]ディレクトリを作る 15 | - [X]numpyのインストール 16 | - [ ]何を作るか確認する 17 | - [X]シグモイド関数(P.13) 18 | - [X]シグモイド関数をクラス化(P.15) 19 | - [X]Affineレイヤの実装(P.15) 20 | - [X]TwoLayerNetの作成(P.17) 21 | - [x]MatMulの作成(p.33) 22 | - [x]Sigmoidにbackwardを追加する(p.36) 23 | - [x]Affineにbackwardを追加する(p.37) 24 | - [x]Softmax with Lossレイヤの確認(p.38) 25 | - [x]SGDを作成する(p.40) 26 | - [x]ニューラルネットワークの実装(p.43) 27 | - [ ] 28 | 29 | ## 開発環境の構築 30 | 31 | ``` 32 | $ sudo apt-get install -y python3-numpy 33 | $ 34 | $ mkdir python 35 | 36 | $ # numpyが入っているかどうかの確認 37 | $ # エラーが出なければOK 38 | $ python3 39 | Python 3.5.2 (default, Nov 12 2018, 13:43:14) 40 | [GCC 5.4.0 20160609] on linux 41 | Type "help", "copyright", "credits" or "license" for more information. 42 | >>> import numpy 43 | >>> 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /python/upstream/ch01/forward_net.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import numpy as np 3 | 4 | 5 | class Sigmoid: 6 | def __init__(self): 7 | self.params = [] 8 | 9 | def forward(self, x): 10 | return 1 / (1 + np.exp(-x)) 11 | 12 | 13 | class Affine: 14 | def __init__(self, W, b): 15 | self.params = [W, b] 16 | 17 | def forward(self, x): 18 | W, b = self.params 19 | out = np.dot(x, W) + b 20 | return out 21 | 22 | 23 | class TwoLayerNet: 24 | def __init__(self, input_size, hidden_size, output_size): 25 | I, H, O = input_size, hidden_size, output_size 26 | 27 | # 重みとバイアスの初期化 28 | W1 = np.random.randn(I, H) 29 | b1 = np.random.randn(H) 30 | W2 = np.random.randn(H, O) 31 | b2 = np.random.randn(O) 32 | 33 | # レイヤの生成 34 | self.layers = [ 35 | Affine(W1, b1), 36 | Sigmoid(), 37 | Affine(W2, b2) 38 | ] 39 | 40 | # すべての重みをリストにまとめる 41 | self.params = [] 42 | for layer in self.layers: 43 | self.params += layer.params 44 | 45 | def predict(self, x): 46 | for layer in self.layers: 47 | x = layer.forward(x) 48 | return x 49 | 50 | 51 | x = np.random.randn(10, 2) 52 | model = TwoLayerNet(2, 4, 3) 53 | s = model.predict(x) 54 | print(s) 55 | -------------------------------------------------------------------------------- /python/upstream/ch01/show_spiral_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') # 親ディレクトリのファイルをインポートするための設定 4 | from dataset import spiral 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | x, t = spiral.load_data() 9 | print('x', x.shape) # (300, 2) 10 | print('t', t.shape) # (300, 3) 11 | 12 | # データ点のプロット 13 | N = 100 14 | CLS_NUM = 3 15 | markers = ['o', 'x', '^'] 16 | for i in range(CLS_NUM): 17 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) 18 | plt.show() 19 | -------------------------------------------------------------------------------- /python/upstream/ch01/train.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') # 親ディレクトリのファイルをインポートするための設定 4 | from common.optimizer import SGD 5 | from common.trainer import Trainer 6 | from dataset import spiral 7 | from two_layer_net import TwoLayerNet 8 | 9 | 10 | # ハイパーパラメータの設定 11 | max_epoch = 300 12 | batch_size = 30 13 | hidden_size = 10 14 | learning_rate = 1.0 15 | 16 | x, t = spiral.load_data() 17 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 18 | optimizer = SGD(lr=learning_rate) 19 | 20 | trainer = Trainer(model, optimizer) 21 | trainer.fit(x, t, max_epoch, batch_size, eval_interval=10) 22 | trainer.plot() 23 | -------------------------------------------------------------------------------- /python/upstream/ch01/train_custom_loop.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') # 親ディレクトリのファイルをインポートするための設定 4 | import numpy as np 5 | from common.optimizer import SGD 6 | from dataset import spiral 7 | import matplotlib.pyplot as plt 8 | from two_layer_net import TwoLayerNet 9 | 10 | 11 | # ハイパーパラメータの設定 12 | max_epoch = 300 13 | batch_size = 30 14 | hidden_size = 10 15 | learning_rate = 1.0 16 | 17 | x, t = spiral.load_data() 18 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 19 | optimizer = SGD(lr=learning_rate) 20 | 21 | # 学習で使用する変数 22 | data_size = len(x) 23 | max_iters = data_size // batch_size 24 | total_loss = 0 25 | loss_count = 0 26 | loss_list = [] 27 | 28 | for epoch in range(max_epoch): 29 | # データのシャッフル 30 | idx = np.random.permutation(data_size) 31 | x = x[idx] 32 | t = t[idx] 33 | 34 | for iters in range(max_iters): 35 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 36 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 37 | 38 | # 勾配を求め、パラメータを更新 39 | loss = model.forward(batch_x, batch_t) 40 | model.backward() 41 | optimizer.update(model.params, model.grads) 42 | 43 | total_loss += loss 44 | loss_count += 1 45 | 46 | # 定期的に学習経過を出力 47 | if (iters+1) % 10 == 0: 48 | avg_loss = total_loss / loss_count 49 | print('| epoch %d | iter %d / %d | loss %.2f' 50 | % (epoch + 1, iters + 1, max_iters, avg_loss)) 51 | loss_list.append(avg_loss) 52 | total_loss, loss_count = 0, 0 53 | 54 | 55 | # 学習結果のプロット 56 | plt.plot(np.arange(len(loss_list)), loss_list, label='train') 57 | plt.xlabel('iterations (x10)') 58 | plt.ylabel('loss') 59 | plt.show() 60 | 61 | # 境界領域のプロット 62 | h = 0.001 63 | x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1 64 | y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1 65 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 66 | X = np.c_[xx.ravel(), yy.ravel()] 67 | score = model.predict(X) 68 | predict_cls = np.argmax(score, axis=1) 69 | Z = predict_cls.reshape(xx.shape) 70 | plt.contourf(xx, yy, Z) 71 | plt.axis('off') 72 | 73 | # データ点のプロット 74 | x, t = spiral.load_data() 75 | N = 100 76 | CLS_NUM = 3 77 | markers = ['o', 'x', '^'] 78 | for i in range(CLS_NUM): 79 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) 80 | plt.show() 81 | -------------------------------------------------------------------------------- /python/upstream/ch01/two_layer_net.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') # 親ディレクトリのファイルをインポートするための設定 4 | import numpy as np 5 | from common.layers import Affine, Sigmoid, SoftmaxWithLoss 6 | 7 | 8 | class TwoLayerNet: 9 | def __init__(self, input_size, hidden_size, output_size): 10 | I, H, O = input_size, hidden_size, output_size 11 | 12 | # 重みとバイアスの初期化 13 | W1 = 0.01 * np.random.randn(I, H) 14 | b1 = np.zeros(H) 15 | W2 = 0.01 * np.random.randn(H, O) 16 | b2 = np.zeros(O) 17 | 18 | # レイヤの生成 19 | self.layers = [ 20 | Affine(W1, b1), 21 | Sigmoid(), 22 | Affine(W2, b2) 23 | ] 24 | self.loss_layer = SoftmaxWithLoss() 25 | 26 | # すべての重みと勾配をリストにまとめる 27 | self.params, self.grads = [], [] 28 | for layer in self.layers: 29 | self.params += layer.params 30 | self.grads += layer.grads 31 | 32 | def predict(self, x): 33 | for layer in self.layers: 34 | x = layer.forward(x) 35 | return x 36 | 37 | def forward(self, x, t): 38 | score = self.predict(x) 39 | loss = self.loss_layer.forward(score, t) 40 | return loss 41 | 42 | def backward(self, dout=1): 43 | dout = self.loss_layer.backward(dout) 44 | for layer in reversed(self.layers): 45 | dout = layer.backward(dout) 46 | return dout 47 | -------------------------------------------------------------------------------- /python/upstream/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/upstream/common/__init__.py -------------------------------------------------------------------------------- /python/upstream/common/base_model.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | import os 5 | import pickle 6 | from common.np import * 7 | from common.util import to_gpu, to_cpu 8 | 9 | 10 | class BaseModel: 11 | def __init__(self): 12 | self.params, self.grads = None, None 13 | 14 | def forward(self, *args): 15 | raise NotImplementedError 16 | 17 | def backward(self, *args): 18 | raise NotImplementedError 19 | 20 | def save_params(self, file_name=None): 21 | if file_name is None: 22 | file_name = self.__class__.__name__ + '.pkl' 23 | 24 | params = [p.astype(np.float16) for p in self.params] 25 | if GPU: 26 | params = [to_cpu(p) for p in params] 27 | 28 | with open(file_name, 'wb') as f: 29 | pickle.dump(params, f) 30 | 31 | def load_params(self, file_name=None): 32 | if file_name is None: 33 | file_name = self.__class__.__name__ + '.pkl' 34 | 35 | if '/' in file_name: 36 | file_name = file_name.replace('/', os.sep) 37 | 38 | if not os.path.exists(file_name): 39 | raise IOError('No file: ' + file_name) 40 | 41 | with open(file_name, 'rb') as f: 42 | params = pickle.load(f) 43 | 44 | params = [p.astype('f') for p in params] 45 | if GPU: 46 | params = [to_gpu(p) for p in params] 47 | 48 | for i, param in enumerate(self.params): 49 | param[...] = params[i] 50 | -------------------------------------------------------------------------------- /python/upstream/common/config.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | GPU = False 4 | -------------------------------------------------------------------------------- /python/upstream/common/functions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from common.np import * 3 | 4 | 5 | def sigmoid(x): 6 | return 1 / (1 + np.exp(-x)) 7 | 8 | 9 | def relu(x): 10 | return np.maximum(0, x) 11 | 12 | 13 | def softmax(x): 14 | if x.ndim == 2: 15 | x = x - x.max(axis=1, keepdims=True) 16 | x = np.exp(x) 17 | x /= x.sum(axis=1, keepdims=True) 18 | elif x.ndim == 1: 19 | x = x - np.max(x) 20 | x = np.exp(x) / np.sum(np.exp(x)) 21 | 22 | return x 23 | 24 | 25 | def cross_entropy_error(y, t): 26 | if y.ndim == 1: 27 | t = t.reshape(1, t.size) 28 | y = y.reshape(1, y.size) 29 | 30 | # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 31 | if t.size == y.size: 32 | t = t.argmax(axis=1) 33 | 34 | batch_size = y.shape[0] 35 | 36 | return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size 37 | -------------------------------------------------------------------------------- /python/upstream/common/layers.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from common.np import * # import numpy as np 3 | from common.config import GPU 4 | from common.functions import softmax, cross_entropy_error 5 | 6 | 7 | class MatMul: 8 | def __init__(self, W): 9 | self.params = [W] 10 | self.grads = [np.zeros_like(W)] 11 | self.x = None 12 | 13 | def forward(self, x): 14 | W, = self.params 15 | out = np.dot(x, W) 16 | self.x = x 17 | return out 18 | 19 | def backward(self, dout): 20 | W, = self.params 21 | dx = np.dot(dout, W.T) 22 | dW = np.dot(self.x.T, dout) 23 | self.grads[0][...] = dW 24 | return dx 25 | 26 | 27 | class Affine: 28 | def __init__(self, W, b): 29 | self.params = [W, b] 30 | self.grads = [np.zeros_like(W), np.zeros_like(b)] 31 | self.x = None 32 | 33 | def forward(self, x): 34 | W, b = self.params 35 | out = np.dot(x, W) + b 36 | self.x = x 37 | return out 38 | 39 | def backward(self, dout): 40 | W, b = self.params 41 | dx = np.dot(dout, W.T) 42 | dW = np.dot(self.x.T, dout) 43 | db = np.sum(dout, axis=0) 44 | 45 | self.grads[0][...] = dW 46 | self.grads[1][...] = db 47 | return dx 48 | 49 | 50 | class Softmax: 51 | def __init__(self): 52 | self.params, self.grads = [], [] 53 | self.out = None 54 | 55 | def forward(self, x): 56 | self.out = softmax(x) 57 | return self.out 58 | 59 | def backward(self, dout): 60 | dx = self.out * dout 61 | sumdx = np.sum(dx, axis=1, keepdims=True) 62 | dx -= self.out * sumdx 63 | return dx 64 | 65 | 66 | class SoftmaxWithLoss: 67 | def __init__(self): 68 | self.params, self.grads = [], [] 69 | self.y = None # softmaxの出力 70 | self.t = None # 教師ラベル 71 | 72 | def forward(self, x, t): 73 | self.t = t 74 | self.y = softmax(x) 75 | 76 | # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 77 | if self.t.size == self.y.size: 78 | self.t = self.t.argmax(axis=1) 79 | 80 | loss = cross_entropy_error(self.y, self.t) 81 | return loss 82 | 83 | def backward(self, dout=1): 84 | batch_size = self.t.shape[0] 85 | 86 | dx = self.y.copy() 87 | dx[np.arange(batch_size), self.t] -= 1 88 | dx *= dout 89 | dx = dx / batch_size 90 | 91 | return dx 92 | 93 | 94 | class Sigmoid: 95 | def __init__(self): 96 | self.params, self.grads = [], [] 97 | self.out = None 98 | 99 | def forward(self, x): 100 | out = 1 / (1 + np.exp(-x)) 101 | self.out = out 102 | return out 103 | 104 | def backward(self, dout): 105 | dx = dout * (1.0 - self.out) * self.out 106 | return dx 107 | 108 | 109 | class SigmoidWithLoss: 110 | def __init__(self): 111 | self.params, self.grads = [], [] 112 | self.loss = None 113 | self.y = None # sigmoidの出力 114 | self.t = None # 教師データ 115 | 116 | def forward(self, x, t): 117 | self.t = t 118 | self.y = 1 / (1 + np.exp(-x)) 119 | 120 | self.loss = cross_entropy_error(np.c_[1 - self.y, self.y], self.t) 121 | 122 | return self.loss 123 | 124 | def backward(self, dout=1): 125 | batch_size = self.t.shape[0] 126 | 127 | dx = (self.y - self.t) * dout / batch_size 128 | return dx 129 | 130 | 131 | class Dropout: 132 | ''' 133 | http://arxiv.org/abs/1207.0580 134 | ''' 135 | def __init__(self, dropout_ratio=0.5): 136 | self.params, self.grads = [], [] 137 | self.dropout_ratio = dropout_ratio 138 | self.mask = None 139 | 140 | def forward(self, x, train_flg=True): 141 | if train_flg: 142 | self.mask = np.random.rand(*x.shape) > self.dropout_ratio 143 | return x * self.mask 144 | else: 145 | return x * (1.0 - self.dropout_ratio) 146 | 147 | def backward(self, dout): 148 | return dout * self.mask 149 | 150 | 151 | class Embedding: 152 | def __init__(self, W): 153 | self.params = [W] 154 | self.grads = [np.zeros_like(W)] 155 | self.idx = None 156 | 157 | def forward(self, idx): 158 | W, = self.params 159 | self.idx = idx 160 | out = W[idx] 161 | return out 162 | 163 | def backward(self, dout): 164 | dW, = self.grads 165 | dW[...] = 0 166 | np.add.at(dW, self.idx, dout) 167 | return None 168 | -------------------------------------------------------------------------------- /python/upstream/common/np.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from common.config import GPU 3 | 4 | 5 | if GPU: 6 | import cupy as np 7 | np.cuda.set_allocator(np.cuda.MemoryPool().malloc) 8 | np.add.at = np.scatter_add 9 | 10 | print('\033[92m' + '-' * 60 + '\033[0m') 11 | print(' ' * 23 + '\033[92mGPU Mode (cupy)\033[0m') 12 | print('\033[92m' + '-' * 60 + '\033[0m\n') 13 | else: 14 | import numpy as np 15 | -------------------------------------------------------------------------------- /python/upstream/common/optimizer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | from common.np import * 5 | 6 | 7 | class SGD: 8 | ''' 9 | 確率的勾配降下法(Stochastic Gradient Descent) 10 | ''' 11 | def __init__(self, lr=0.01): 12 | self.lr = lr 13 | 14 | def update(self, params, grads): 15 | for i in range(len(params)): 16 | params[i] -= self.lr * grads[i] 17 | 18 | 19 | class Momentum: 20 | ''' 21 | Momentum SGD 22 | ''' 23 | def __init__(self, lr=0.01, momentum=0.9): 24 | self.lr = lr 25 | self.momentum = momentum 26 | self.v = None 27 | 28 | def update(self, params, grads): 29 | if self.v is None: 30 | self.v = [] 31 | for param in params: 32 | self.v.append(np.zeros_like(param)) 33 | 34 | for i in range(len(params)): 35 | self.v[i] = self.momentum * self.v[i] - self.lr * grads[i] 36 | params[i] += self.v[i] 37 | 38 | 39 | class Nesterov: 40 | ''' 41 | Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901) 42 | ''' 43 | def __init__(self, lr=0.01, momentum=0.9): 44 | self.lr = lr 45 | self.momentum = momentum 46 | self.v = None 47 | 48 | def update(self, params, grads): 49 | if self.v is None: 50 | self.v = [] 51 | for param in params: 52 | self.v.append(np.zeros_like(param)) 53 | 54 | for i in range(len(params)): 55 | self.v[i] *= self.momentum 56 | self.v[i] -= self.lr * grads[i] 57 | params[i] += self.momentum * self.momentum * self.v[i] 58 | params[i] -= (1 + self.momentum) * self.lr * grads[i] 59 | 60 | 61 | class AdaGrad: 62 | ''' 63 | AdaGrad 64 | ''' 65 | def __init__(self, lr=0.01): 66 | self.lr = lr 67 | self.h = None 68 | 69 | def update(self, params, grads): 70 | if self.h is None: 71 | self.h = [] 72 | for param in params: 73 | self.h.append(np.zeros_like(param)) 74 | 75 | for i in range(len(params)): 76 | self.h[i] += grads[i] * grads[i] 77 | params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7) 78 | 79 | 80 | class RMSprop: 81 | ''' 82 | RMSprop 83 | ''' 84 | def __init__(self, lr=0.01, decay_rate = 0.99): 85 | self.lr = lr 86 | self.decay_rate = decay_rate 87 | self.h = None 88 | 89 | def update(self, params, grads): 90 | if self.h is None: 91 | self.h = [] 92 | for param in params: 93 | self.h.append(np.zeros_like(param)) 94 | 95 | for i in range(len(params)): 96 | self.h[i] *= self.decay_rate 97 | self.h[i] += (1 - self.decay_rate) * grads[i] * grads[i] 98 | params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7) 99 | 100 | 101 | class Adam: 102 | ''' 103 | Adam (http://arxiv.org/abs/1412.6980v8) 104 | ''' 105 | def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): 106 | self.lr = lr 107 | self.beta1 = beta1 108 | self.beta2 = beta2 109 | self.iter = 0 110 | self.m = None 111 | self.v = None 112 | 113 | def update(self, params, grads): 114 | if self.m is None: 115 | self.m, self.v = [], [] 116 | for param in params: 117 | self.m.append(np.zeros_like(param)) 118 | self.v.append(np.zeros_like(param)) 119 | 120 | self.iter += 1 121 | lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) 122 | 123 | for i in range(len(params)): 124 | self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i]) 125 | self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i]) 126 | 127 | params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7) 128 | -------------------------------------------------------------------------------- /python/upstream/common/trainer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | import numpy 5 | import time 6 | import matplotlib.pyplot as plt 7 | from common.np import * # import numpy as np 8 | from common.util import clip_grads 9 | 10 | 11 | class Trainer: 12 | def __init__(self, model, optimizer): 13 | self.model = model 14 | self.optimizer = optimizer 15 | self.loss_list = [] 16 | self.eval_interval = None 17 | self.current_epoch = 0 18 | 19 | def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20): 20 | data_size = len(x) 21 | max_iters = data_size // batch_size 22 | self.eval_interval = eval_interval 23 | model, optimizer = self.model, self.optimizer 24 | total_loss = 0 25 | loss_count = 0 26 | 27 | start_time = time.time() 28 | for epoch in range(max_epoch): 29 | # シャッフル 30 | idx = numpy.random.permutation(numpy.arange(data_size)) 31 | x = x[idx] 32 | t = t[idx] 33 | 34 | for iters in range(max_iters): 35 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 36 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 37 | 38 | # 勾配を求め、パラメータを更新 39 | loss = model.forward(batch_x, batch_t) 40 | model.backward() 41 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 42 | if max_grad is not None: 43 | clip_grads(grads, max_grad) 44 | optimizer.update(params, grads) 45 | total_loss += loss 46 | loss_count += 1 47 | 48 | # 評価 49 | if (eval_interval is not None) and (iters % eval_interval) == 0: 50 | avg_loss = total_loss / loss_count 51 | elapsed_time = time.time() - start_time 52 | print('| epoch %d | iter %d / %d | time %d[s] | loss %.2f' 53 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss)) 54 | self.loss_list.append(float(avg_loss)) 55 | total_loss, loss_count = 0, 0 56 | 57 | self.current_epoch += 1 58 | 59 | def plot(self, ylim=None): 60 | x = numpy.arange(len(self.loss_list)) 61 | if ylim is not None: 62 | plt.ylim(*ylim) 63 | plt.plot(x, self.loss_list, label='train') 64 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 65 | plt.ylabel('loss') 66 | plt.show() 67 | 68 | 69 | class RnnlmTrainer: 70 | def __init__(self, model, optimizer): 71 | self.model = model 72 | self.optimizer = optimizer 73 | self.time_idx = None 74 | self.ppl_list = None 75 | self.eval_interval = None 76 | self.current_epoch = 0 77 | 78 | def get_batch(self, x, t, batch_size, time_size): 79 | batch_x = np.empty((batch_size, time_size), dtype='i') 80 | batch_t = np.empty((batch_size, time_size), dtype='i') 81 | 82 | data_size = len(x) 83 | jump = data_size // batch_size 84 | offsets = [i * jump for i in range(batch_size)] # バッチの各サンプルの読み込み開始位置 85 | 86 | for time in range(time_size): 87 | for i, offset in enumerate(offsets): 88 | batch_x[i, time] = x[(offset + self.time_idx) % data_size] 89 | batch_t[i, time] = t[(offset + self.time_idx) % data_size] 90 | self.time_idx += 1 91 | return batch_x, batch_t 92 | 93 | def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35, 94 | max_grad=None, eval_interval=20): 95 | data_size = len(xs) 96 | max_iters = data_size // (batch_size * time_size) 97 | self.time_idx = 0 98 | self.ppl_list = [] 99 | self.eval_interval = eval_interval 100 | model, optimizer = self.model, self.optimizer 101 | total_loss = 0 102 | loss_count = 0 103 | 104 | start_time = time.time() 105 | for epoch in range(max_epoch): 106 | for iters in range(max_iters): 107 | batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size) 108 | 109 | # 勾配を求め、パラメータを更新 110 | loss = model.forward(batch_x, batch_t) 111 | model.backward() 112 | params, grads = remove_duplicate(model.params, model.grads) # 共有された重みを1つに集約 113 | if max_grad is not None: 114 | clip_grads(grads, max_grad) 115 | optimizer.update(params, grads) 116 | total_loss += loss 117 | loss_count += 1 118 | 119 | # パープレキシティの評価 120 | if (eval_interval is not None) and (iters % eval_interval) == 0: 121 | ppl = np.exp(total_loss / loss_count) 122 | elapsed_time = time.time() - start_time 123 | print('| epoch %d | iter %d / %d | time %d[s] | perplexity %.2f' 124 | % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl)) 125 | self.ppl_list.append(float(ppl)) 126 | total_loss, loss_count = 0, 0 127 | 128 | self.current_epoch += 1 129 | 130 | def plot(self, ylim=None): 131 | x = numpy.arange(len(self.ppl_list)) 132 | if ylim is not None: 133 | plt.ylim(*ylim) 134 | plt.plot(x, self.ppl_list, label='train') 135 | plt.xlabel('iterations (x' + str(self.eval_interval) + ')') 136 | plt.ylabel('perplexity') 137 | plt.show() 138 | 139 | 140 | def remove_duplicate(params, grads): 141 | ''' 142 | パラメータ配列中の重複する重みをひとつに集約し、 143 | その重みに対応する勾配を加算する 144 | ''' 145 | params, grads = params[:], grads[:] # copy list 146 | 147 | while True: 148 | find_flg = False 149 | L = len(params) 150 | 151 | for i in range(0, L - 1): 152 | for j in range(i + 1, L): 153 | # 重みを共有する場合 154 | if params[i] is params[j]: 155 | grads[i] += grads[j] # 勾配の加算 156 | find_flg = True 157 | params.pop(j) 158 | grads.pop(j) 159 | # 転置行列として重みを共有する場合(weight tying) 160 | elif params[i].ndim == 2 and params[j].ndim == 2 and \ 161 | params[i].T.shape == params[j].shape and np.all(params[i].T == params[j]): 162 | grads[i] += grads[j].T 163 | find_flg = True 164 | params.pop(j) 165 | grads.pop(j) 166 | 167 | if find_flg: break 168 | if find_flg: break 169 | 170 | if not find_flg: break 171 | 172 | return params, grads 173 | -------------------------------------------------------------------------------- /python/upstream/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/upstream/dataset/__init__.py -------------------------------------------------------------------------------- /python/upstream/dataset/ptb.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | sys.path.append('..') 5 | try: 6 | import urllib.request 7 | except ImportError: 8 | raise ImportError('Use Python3!') 9 | import pickle 10 | import numpy as np 11 | 12 | 13 | url_base = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/' 14 | key_file = { 15 | 'train':'ptb.train.txt', 16 | 'test':'ptb.test.txt', 17 | 'valid':'ptb.valid.txt' 18 | } 19 | save_file = { 20 | 'train':'ptb.train.npy', 21 | 'test':'ptb.test.npy', 22 | 'valid':'ptb.valid.npy' 23 | } 24 | vocab_file = 'ptb.vocab.pkl' 25 | 26 | dataset_dir = os.path.dirname(os.path.abspath(__file__)) 27 | 28 | 29 | def _download(file_name): 30 | file_path = dataset_dir + '/' + file_name 31 | if os.path.exists(file_path): 32 | return 33 | 34 | print('Downloading ' + file_name + ' ... ') 35 | 36 | try: 37 | urllib.request.urlretrieve(url_base + file_name, file_path) 38 | except urllib.error.URLError: 39 | import ssl 40 | ssl._create_default_https_context = ssl._create_unverified_context 41 | urllib.request.urlretrieve(url_base + file_name, file_path) 42 | 43 | print('Done') 44 | 45 | 46 | def load_vocab(): 47 | vocab_path = dataset_dir + '/' + vocab_file 48 | 49 | if os.path.exists(vocab_path): 50 | with open(vocab_path, 'rb') as f: 51 | word_to_id, id_to_word = pickle.load(f) 52 | return word_to_id, id_to_word 53 | 54 | word_to_id = {} 55 | id_to_word = {} 56 | data_type = 'train' 57 | file_name = key_file[data_type] 58 | file_path = dataset_dir + '/' + file_name 59 | 60 | _download(file_name) 61 | 62 | words = open(file_path).read().replace('\n', '').strip().split() 63 | 64 | for i, word in enumerate(words): 65 | if word not in word_to_id: 66 | tmp_id = len(word_to_id) 67 | word_to_id[word] = tmp_id 68 | id_to_word[tmp_id] = word 69 | 70 | with open(vocab_path, 'wb') as f: 71 | pickle.dump((word_to_id, id_to_word), f) 72 | 73 | return word_to_id, id_to_word 74 | 75 | 76 | def load_data(data_type='train'): 77 | ''' 78 | :param data_type: データの種類:'train' or 'test' or 'valid (val)' 79 | :return: 80 | ''' 81 | if data_type == 'val': data_type = 'valid' 82 | save_path = dataset_dir + '/' + save_file[data_type] 83 | 84 | word_to_id, id_to_word = load_vocab() 85 | 86 | if os.path.exists(save_path): 87 | corpus = np.load(save_path) 88 | return corpus, word_to_id, id_to_word 89 | 90 | file_name = key_file[data_type] 91 | file_path = dataset_dir + '/' + file_name 92 | _download(file_name) 93 | 94 | words = open(file_path).read().replace('\n', '').strip().split() 95 | corpus = np.array([word_to_id[w] for w in words]) 96 | 97 | np.save(save_path, corpus) 98 | return corpus, word_to_id, id_to_word 99 | 100 | 101 | if __name__ == '__main__': 102 | for data_type in ('train', 'val', 'test'): 103 | load_data(data_type) 104 | -------------------------------------------------------------------------------- /python/upstream/dataset/sequence.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('..') 4 | import os 5 | import numpy 6 | 7 | 8 | id_to_char = {} 9 | char_to_id = {} 10 | 11 | 12 | def _update_vocab(txt): 13 | chars = list(txt) 14 | 15 | for i, char in enumerate(chars): 16 | if char not in char_to_id: 17 | tmp_id = len(char_to_id) 18 | char_to_id[char] = tmp_id 19 | id_to_char[tmp_id] = char 20 | 21 | 22 | def load_data(file_name='addition.txt', seed=1984): 23 | file_path = os.path.dirname(os.path.abspath(__file__)) + '/' + file_name 24 | 25 | if not os.path.exists(file_path): 26 | print('No file: %s' % file_name) 27 | return None 28 | 29 | questions, answers = [], [] 30 | 31 | for line in open(file_path, 'r'): 32 | idx = line.find('_') 33 | questions.append(line[:idx]) 34 | answers.append(line[idx:-1]) 35 | 36 | # create vocab dict 37 | for i in range(len(questions)): 38 | q, a = questions[i], answers[i] 39 | _update_vocab(q) 40 | _update_vocab(a) 41 | 42 | # create numpy array 43 | x = numpy.zeros((len(questions), len(questions[0])), dtype=numpy.int) 44 | t = numpy.zeros((len(questions), len(answers[0])), dtype=numpy.int) 45 | 46 | for i, sentence in enumerate(questions): 47 | x[i] = [char_to_id[c] for c in list(sentence)] 48 | for i, sentence in enumerate(answers): 49 | t[i] = [char_to_id[c] for c in list(sentence)] 50 | 51 | # shuffle 52 | indices = numpy.arange(len(x)) 53 | if seed is not None: 54 | numpy.random.seed(seed) 55 | numpy.random.shuffle(indices) 56 | x = x[indices] 57 | t = t[indices] 58 | 59 | # 10% for validation set 60 | split_at = len(x) - len(x) // 10 61 | (x_train, x_test) = x[:split_at], x[split_at:] 62 | (t_train, t_test) = t[:split_at], t[split_at:] 63 | 64 | return (x_train, t_train), (x_test, t_test) 65 | 66 | 67 | def get_vocab(): 68 | return char_to_id, id_to_char 69 | -------------------------------------------------------------------------------- /python/upstream/dataset/spiral.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import numpy as np 3 | 4 | 5 | def load_data(seed=1984): 6 | np.random.seed(seed) 7 | N = 100 # クラスごとのサンプル数 8 | DIM = 2 # データの要素数 9 | CLS_NUM = 3 # クラス数 10 | 11 | x = np.zeros((N*CLS_NUM, DIM)) 12 | t = np.zeros((N*CLS_NUM, CLS_NUM), dtype=np.int) 13 | 14 | for j in range(CLS_NUM): 15 | for i in range(N):#N*j, N*(j+1)): 16 | rate = i / N 17 | radius = 1.0*rate 18 | theta = j*4.0 + 4.0*rate + np.random.randn()*0.2 19 | 20 | ix = N*j + i 21 | x[ix] = np.array([radius*np.sin(theta), 22 | radius*np.cos(theta)]).flatten() 23 | t[ix, j] = 1 24 | 25 | return x, t 26 | -------------------------------------------------------------------------------- /python_team2/.gitignore: -------------------------------------------------------------------------------- 1 | book/ 2 | -------------------------------------------------------------------------------- /python_team2/20190327/sigmoid.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def sigmoid(x): 5 | return 1 / (1 + np.exp(-x)) 6 | 7 | print(sigmoid(3)) 8 | print(sigmoid(0)) 9 | print(sigmoid(-3)) 10 | 11 | print(sigmoid(3) + sigmoid(-3)) 12 | 13 | # numpyっぽい書き方 14 | print(sigmoid(np.array([3,0,-3]))) 15 | 16 | # P.13の例 17 | x = np.random.randn(10, 2) 18 | W1 = np.random.randn(2, 4) 19 | b1 = np.random.randn(4) 20 | W2 = np.random.randn(4, 3) 21 | b2 = np.random.randn(3) 22 | 23 | h = np.dot(x, W1) + b1 # これで1層の計算 24 | a = sigmoid(h) 25 | s = np.dot(a, W2) + b2 26 | 27 | print("h=", h) 28 | print("a=", a) 29 | print("s=", s) 30 | 31 | print(h.shape) 32 | print(a.shape) 33 | print(s.shape) 34 | 35 | -------------------------------------------------------------------------------- /python_team2/20190327/sigmoid_class.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class Sigmoid: 5 | def __init__(self): 6 | pass # 何もしない 7 | 8 | def forward(self, x): 9 | return 1 / (1 + np.exp(-x)) 10 | 11 | def backward(self): 12 | pass 13 | 14 | 15 | sig = Sigmoid() 16 | 17 | print(sig.forward(3)) 18 | print(sig.forward(0)) 19 | print(sig.forward(-3)) 20 | 21 | print(sig.forward(3) + sig.forward(-3)) 22 | 23 | # numpyっぽい書き方 24 | print(sig.forward(np.array([3,0,-3]))) 25 | 26 | -------------------------------------------------------------------------------- /python_team2/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | numpy = "*" 10 | matplotlib = "*" 11 | 12 | [requires] 13 | python_version = "3.7" 14 | -------------------------------------------------------------------------------- /python_team2/README.md: -------------------------------------------------------------------------------- 1 | # このディレクトリについて 2 | 3 | * 「ゼロから作るDeep Learning 2 自然言語編 読書会」のサンプルをPythonで実装してみるチームの作業ディレクトリです。 4 | 5 | ## メモ 6 | * PythonはPython 3.7.3 7 | * 依存ライブラリインストールにはpipenvを使っている 8 | 9 | ## ToDo 10 | - [X] ディレクトリを作る 11 | - [X] numpyのインストール 12 | - [X] 何を作るか確認する 13 | - [X] シグモイド関数(P.13) 14 | - [X] シグモイド関数をクラス化(P.15) 15 | - [X] Affineレイヤの実装(P.15) 16 | - [X] TwoLayerNetの作成(P.17) 17 | - [ ] 1.3 ニューラルネットの学習 18 | - [X] 1.3.4 計算グラフ 19 | - [X] MatMul 20 | - [X] 1.3.5 勾配の導出と逆伝播の実装 21 | - [X] Sigmoid 22 | - [X] Affine 23 | - [X] SoftmaxWithLoss 24 | - [X] 1.3.6 重みの更新 25 | - [X] 1.4 ニューラルネットワークで問題を解く 26 | - [X] 1.4.1 スパイラルデータセット 27 | - [X] 1.4.2 ニューラルネットワークの実装 28 | - [X] 1.4.3 学習用のソースコード 29 | - [X] 1.4.4 Trainerクラス 30 | - [ ] 3章 Word2vec 31 | - [X] 3.1 推論ベースの手法とニューラルネットワーク 32 | - [X] 3.1.3 全結合層による計算 33 | - [X] 3.1.3 MatMulによる計算 34 | - [X] 3.2 シンプルなword2vec 35 | - [X] 3.2.1 推論処理 36 | - [X] 3.3 学習データの準備 37 | - [X] 3.4 CBOWモデルの実装 38 | - [X] 推論 39 | - [X] 学習 40 | 41 | 42 | ## 開発環境の構築 43 | 44 | ``` 45 | # 公式のソースコードをbookでチェックアウト 46 | $ git clone https://github.com/oreilly-japan/deep-learning-from-scratch-2.git book 47 | ``` 48 | 49 | ``` 50 | # for mac 51 | $ brew install pipenv 52 | $ cd {CURRENT_DIRECTORY} 53 | $ pipenv sync 54 | $ pipenv shell 55 | ``` 56 | 57 | ### 参考 58 | 59 | - [Python環境構築ベストプラクティス2019 - ばいおいんふぉっぽいの!](https://www.natsukium.com/blog/2019-02-18/python/) 60 | - [Pipenv で起きる Matplotlib まわりのエラー - Qiita](https://qiita.com/utahkaA/items/ad9aa825832c5909575a) 61 | - mac でpipenv + matplotlib で使う場合は次の設定が必要 62 | -------------------------------------------------------------------------------- /python_team2/ch01/forward_net.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | # すべてのレイヤは、メソッドとして forward() と backward() を持つ 4 | # すべてのレイヤは、インスタンス変数として params と grads を持つ 5 | 6 | 7 | import numpy as np 8 | 9 | class Sigmoid: 10 | def __init__(self): 11 | self.params = [] 12 | 13 | def forward(self, x): 14 | return 1 / (1 + np.exp(-x)) 15 | 16 | def backward(self): 17 | pass 18 | 19 | 20 | class Affine: 21 | def __init__(self, W, b): 22 | self.params = [W, b] 23 | 24 | def forward(self, x): 25 | W, b = self.params 26 | out = np.dot(x, W) + b 27 | return out 28 | 29 | def backward(self): 30 | pass 31 | 32 | 33 | class TwoLayerNet: 34 | def __init__(self, input_size, hidden_size, output_size): 35 | I,H,O = input_size, hidden_size, output_size 36 | 37 | # 重みとバイアスの初期化 38 | W1 = np.random.randn(I, H) 39 | b1 = np.random.randn(H) 40 | W2 = np.random.randn(H, O) 41 | b2 = np.random.randn(O) 42 | 43 | # レイヤの生成 44 | self.layers = [ 45 | Affine(W1, b1), 46 | Sigmoid(), 47 | Affine(W2, b2) 48 | ] 49 | 50 | # 全ての重みをリストにまとめる 51 | self.params = [] 52 | for layer in self.layers: 53 | self.params += layer.params 54 | 55 | def predict(self, x): 56 | for layer in self.layers: 57 | x = layer.forward(x) 58 | return x 59 | 60 | 61 | if __name__ == '__main__': 62 | x = np.random.randn(10, 2) 63 | model = TwoLayerNet(2, 4, 3) 64 | s = model.predict(x) 65 | print(s) 66 | print(s.shape) 67 | -------------------------------------------------------------------------------- /python_team2/ch01/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | # 参考 4 | # scikit-learn - matplotlib を使って分類問題の決定境界を描画する - Pynote 5 | # http://pynote.hatenablog.com/entry/sklearn-plot-decision-boundary 6 | # 機械学習の分類結果を可視化!決定境界 - 見習いデータサイエンティストの隠れ家 7 | # http://www.dskomei.com/entry/2018/03/04/125249 8 | 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | 12 | 13 | 14 | def plotResults(model, loss_list, x): 15 | # 学習経過をプロット 16 | plt.subplot(1,2,1) 17 | plt.plot(loss_list) 18 | 19 | # 決定境界をプロット 20 | plt.subplot(1,2,2) 21 | plotDecisionBoundary(model, x) 22 | 23 | 24 | # 決定境界のプロット 25 | def plotDecisionBoundary(model, x): 26 | # グリッドの座標を作る 27 | x_min, x_max = x[:, 0].min(), x[:, 0].max() 28 | y_min, y_max = x[:, 1].min(), x[:, 1].max() 29 | x_mesh, y_mesh = np.meshgrid(np.arange(x_min, x_max, 0.01), 30 | np.arange(y_min, y_max, 0.01)) 31 | grid = np.array([x_mesh.ravel(), y_mesh.ravel()]).T 32 | 33 | # グリッドの推論結果を集める 34 | pred = model.predict(grid) 35 | z = np.array(x_mesh.ravel()) 36 | for i in range(len(pred)): 37 | z[i] = pred[i].argmax() 38 | z = z.reshape(x_mesh.shape) 39 | 40 | # 等高線描画 41 | plt.contourf(x_mesh, y_mesh, z, alpha=0.3) 42 | plt.xlim(x_mesh.min(), x_mesh.max()) 43 | plt.ylim(y_mesh.min(), y_mesh.max()) 44 | 45 | # データ点のプロット 46 | N = 100 47 | CLS_NUM = 3 48 | markers = ['o', 'x', '^'] 49 | for i in range(CLS_NUM): 50 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) 51 | -------------------------------------------------------------------------------- /python_team2/ch01/show_spiral_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('../book') # 公式リポジトリのdatasetを読むため 4 | from dataset import spiral 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | x, t = spiral.load_data() 9 | print('x', x.shape) # (300, 2) 10 | print('t', t.shape) # (300, 3) 11 | 12 | # データ点のプロット 13 | N = 100 14 | CLS_NUM = 3 15 | markers = ['o', 'x', '^'] 16 | for i in range(CLS_NUM): 17 | plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) 18 | plt.show() 19 | -------------------------------------------------------------------------------- /python_team2/ch01/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | sys.path.append('../book') 5 | from common.optimizer import SGD 6 | from common.trainer import Trainer 7 | from dataset import spiral 8 | from two_layer_net import TwoLayerNet 9 | 10 | max_epoch = 300 11 | batch_size = 30 12 | hidden_size = 10 13 | learning_rate = 1.0 14 | 15 | x, t = spiral.load_data() 16 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 17 | optimizer = SGD(lr = learning_rate) 18 | 19 | trainer = Trainer(model, optimizer) 20 | trainer.fit(x, t, max_epoch, batch_size, eval_interval = 10) 21 | trainer.plot() 22 | -------------------------------------------------------------------------------- /python_team2/ch01/train_custom_loop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | sys.path.append('..') 5 | import numpy as np 6 | from common.optimizer import SGD 7 | from book.dataset import spiral 8 | import matplotlib.pyplot as plt 9 | from two_layer_net import TwoLayerNet 10 | from plots import plotResults 11 | 12 | # ハイパーパラメータの設定 13 | max_epoch = 300 14 | batch_size = 30 15 | hidden_size = 10 16 | learning_rate = 1.0 17 | 18 | # データの読み込み 19 | x, t = spiral.load_data() 20 | 21 | def train(x, t): 22 | # 学習で使用する変数 23 | data_size = len(x) 24 | max_iters = data_size // batch_size 25 | total_loss = 0 26 | loss_count = 0 27 | loss_list = [] 28 | 29 | for epoch in range(max_epoch): 30 | # データのシャッフル 31 | idx = np.random.permutation(data_size) 32 | x = x[idx] 33 | t = t[idx] 34 | 35 | for iters in range(max_iters): 36 | batch_x = x[iters*batch_size:(iters+1)*batch_size] 37 | batch_t = t[iters*batch_size:(iters+1)*batch_size] 38 | 39 | # 勾配を求めパラメターを更新 40 | loss = model.forward(batch_x, batch_t) 41 | model.backward() 42 | optimizer.update(model.params, model.grads) 43 | 44 | total_loss += loss 45 | loss_count += 1 46 | 47 | # 定期的に学習経過を出力 48 | if (iters+1) % 10 == 0: 49 | avg_loss = total_loss / loss_count 50 | print('| epoch %d | iter %d / %d | loss %.2f' 51 | % (epoch + 1, iters + 1, max_iters, avg_loss)) 52 | loss_list.append(avg_loss) 53 | total_loss, loss_count = 0, 0 54 | 55 | return loss_list 56 | 57 | 58 | 59 | 60 | if __name__ == '__main__': 61 | # 学習試行1 62 | 63 | # モデルとオプティマイザの生成 64 | optimizer = SGD(lr=learning_rate) 65 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 66 | 67 | loss_list = train(x, t) 68 | plt.figure(figsize=(10,4)) 69 | plotResults(model, loss_list, x) 70 | 71 | 72 | # 学習試行2 73 | 74 | # モデルとオプティマイザの生成 75 | #hidden_size = 60 76 | #learning_rate = 0.5 77 | #max_epoch = 500 78 | #optimizer = SGD(lr=learning_rate) 79 | #model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) 80 | # 81 | #loss_list = train(x, t) 82 | #plt.figure(figsize=(10,4)) 83 | #plotResults(model, loss_list, x) 84 | 85 | 86 | # グラフ表示 87 | plt.show() 88 | -------------------------------------------------------------------------------- /python_team2/ch01/two_layer_net.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | sys.path.append('..') 5 | import numpy as np 6 | from common.layers import Affine, Sigmoid, SoftmaxWithLoss, Relu 7 | 8 | class TwoLayerNet: 9 | def __init__(self, input_size, hidden_size, output_size): 10 | I,H,O = input_size, hidden_size, output_size 11 | 12 | # 重みとバイアス 13 | W1 = 0.01 * np.random.randn(I,H) 14 | b1 = np.zeros(H) 15 | W2 = 0.01 * np.random.randn(H,O) 16 | b2 = np.zeros(O) 17 | 18 | # レイヤの生成 19 | self.layers = [ 20 | Affine(W1, b1), 21 | Sigmoid(), 22 | # Relu(), 23 | Affine(W2, b2) 24 | ] 25 | self.loss_layer = SoftmaxWithLoss() 26 | 27 | # すべての重みと勾配をリストにまとめる 28 | self.params, self.grads = [], [] 29 | for layer in self.layers: 30 | self.params += layer.params 31 | self.grads += layer.grads 32 | 33 | def predict(self, x): 34 | for layer in self.layers: 35 | x = layer.forward(x) 36 | return x 37 | 38 | def forward(self, x, t): 39 | score = self.predict(x) 40 | loss = self.loss_layer.forward(score, t) 41 | return loss 42 | 43 | def backward(self, dout=1): 44 | dout = self.loss_layer.backward(dout) 45 | for layer in reversed(self.layers): 46 | dout = layer.backward(dout) 47 | return dout 48 | -------------------------------------------------------------------------------- /python_team2/ch02/co_matrix.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | sys.path.append('..') 5 | import numpy as np 6 | from common.util import preprocess, create_co_matrix 7 | 8 | text = 'You say goodbye and I say hello.' 9 | print(text) 10 | 11 | corpus, word_to_id, id_to_word = preprocess(text) 12 | 13 | print(corpus) 14 | print(id_to_word) 15 | 16 | C = create_co_matrix(corpus, len(id_to_word)) 17 | print(C) 18 | 19 | print(id_to_word[0]) 20 | print(C[0]) 21 | 22 | print('goodbye') 23 | print(C[word_to_id['goodbye']]) 24 | print('say') 25 | print(C[word_to_id['say']]) 26 | -------------------------------------------------------------------------------- /python_team2/ch02/ranking.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | sys.path.append('..') 5 | from common.util import preprocess, create_co_matrix, most_similar 6 | 7 | text = 'You say goodbye and I say hello.' 8 | corpus, word_to_id, id_to_word = preprocess(text) 9 | vocab_size = len(word_to_id) 10 | C = create_co_matrix(corpus, vocab_size) 11 | 12 | most_similar('you', word_to_id, id_to_word, C, top=5) 13 | 14 | # [query] you 15 | # goodbye: 0.7071067691154799 16 | # i: 0.7071067691154799 17 | # hello: 0.7071067691154799 18 | # say: 0.0 19 | # and: 0.0 20 | -------------------------------------------------------------------------------- /python_team2/ch02/similarity.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | sys.path.append('..') 5 | from common.util import preprocess, create_co_matrix, cos_similarity 6 | 7 | 8 | text = 'You say goodbye and I say hello.' 9 | corpus, word_to_id, id_to_word = preprocess(text) 10 | vocab_size = len(word_to_id) 11 | C = create_co_matrix(corpus, vocab_size) 12 | 13 | c0 = C[word_to_id['you']] # [you] の単語ベクトル 14 | c1 = C[word_to_id['i']] # [i]の単語ベクトル 15 | 16 | print(cos_similarity(c0, c1)) 17 | # 0.7071067691154799 18 | -------------------------------------------------------------------------------- /python_team2/ch02/words.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | # 2.3 カウントベースの手法 4 | 5 | import numpy as np 6 | 7 | text = "You say goodbye and I say hello." 8 | if __name__ == '__main__': 9 | print(text) 10 | 11 | text.lower() 12 | text = text.lower() 13 | text.replace(".", " .") 14 | text = text.replace(".", " .") 15 | if __name__ == '__main__': 16 | print(text) 17 | 18 | words = text.split (' ') 19 | if __name__ == '__main__': 20 | print(words) 21 | 22 | 23 | word_to_id = {} 24 | id_to_word = {} 25 | for word in words: 26 | if word not in word_to_id: 27 | new_id = len(word_to_id) 28 | word_to_id[word] = new_id 29 | id_to_word[new_id] = word 30 | 31 | 32 | if __name__ == '__main__': 33 | print(word_to_id) 34 | print(id_to_word) 35 | 36 | 37 | corpus = [word_to_id[w] for w in words] 38 | corpus = np.array(corpus) 39 | if __name__ == '__main__': 40 | print(corpus) 41 | -------------------------------------------------------------------------------- /python_team2/ch03/cbow_predict.py: -------------------------------------------------------------------------------- 1 | # 3.2.1 CBOWモデルの推論処理 2 | 3 | import sys 4 | sys.path.append('../book') 5 | import numpy as np 6 | from common.layers import MatMul 7 | 8 | # サンプルのコンテキストデータ 9 | c0 = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 "you" 10 | c1 = np.array([[0, 0, 1, 0, 0, 0, 0]]) # 入力 11 | 12 | # 重みの初期化 13 | W_in = np.random.randn(7, 3) 14 | W_out = np.random.randn(3, 7) 15 | 16 | # レイヤの生成 17 | in_layer0 = MatMul(W_in) 18 | in_layer1 = MatMul(W_in) 19 | out_layer = MatMul(W_out) 20 | 21 | # 順伝播 22 | h0 = in_layer0.forward(c0) 23 | h1 = in_layer1.forward(c1) 24 | h = 0.5*(h0 + h1) 25 | s = out_layer.forward(h) 26 | 27 | print(s) 28 | -------------------------------------------------------------------------------- /python_team2/ch03/simple_cbow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../book') 3 | import numpy as np 4 | from common.layers import MatMul, SoftmaxWithLoss 5 | 6 | class SimpleCBOW: 7 | def __init__(self,vocab_size, hidden_size): 8 | V, H = vocab_size, hidden_size 9 | 10 | # 重みの初期化 11 | W_in = 0.01 * np.random.randn(V,H).astype('f') 12 | W_out = 0.01 * np.random.randn(H,V).astype('f') 13 | 14 | # レイヤの作成 15 | self.in_layer0 = MatMul(W_in) 16 | self.in_layer1 = MatMul(W_in) 17 | self.out_layer = MatMul(W_out) 18 | self.loss_layer = SoftmaxWithLoss() 19 | 20 | # すべての重みと勾配をリストにまとめる 21 | layers = [self.in_layer0, self.in_layer1, self.out_layer] 22 | self.params, self.grads = [], [] 23 | for layer in layers: 24 | self.params += layer.params 25 | self.grads += layer.grads 26 | # メンバ変数に単語の分散表現を設定 27 | self.word_vecs = W_in 28 | 29 | def forward(self, contexts, target): 30 | h0 = self.in_layer0.forward(contexts[:, 0]) 31 | h1 = self.in_layer1.forward(contexts[:, 1]) 32 | h = (h0 + h1) * 0.5 33 | score = self.out_layer.forward(h) 34 | loss = self.loss_layer.forward(score, target) 35 | return loss 36 | 37 | def backward(self, dout=1): 38 | ds = self.loss_layer.backward(dout) 39 | da = self.out_layer.backward(ds) 40 | da *= 0.5 41 | self.in_layer0.backward(da) 42 | self.in_layer1.backward(da) 43 | return None 44 | 45 | 46 | if __name__ == '__main__': 47 | cbow = SimpleCBOW(5, 3) 48 | contexts = np.array([[1, 0, 0,0,0],[0,1,0,0,0]]) 49 | target = np.array([[0,0,0,1,0]]) 50 | print(cbow.forward(contexts.T,target)) 51 | -------------------------------------------------------------------------------- /python_team2/ch03/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../book') 3 | from common.trainer import Trainer 4 | from common.optimizer import Adam 5 | from ch03.simple_skip_gram import SimpleSkipGram 6 | from simple_cbow import SimpleCBOW 7 | from common.util import preprocess, create_contexts_target, convert_one_hot, most_similar 8 | 9 | 10 | window_size=1 11 | hidden_size=5 12 | batch_size=3 13 | max_epoch=1000 14 | 15 | text = 'You say goodbye and I say hello.' 16 | #text='Deep learning (also known as deep structured learning or hierarchical learning) is part of a broader family of machine learning methods based on artificial neural networks. Learning can be supervised, semi-supervised or unsupervised. Deep learning architectures such as deep neural networks, deep belief networks, recurrent neural networks and convolutional neural networks have been applied to fields including computer vision, speech recognition, natural language processing, audio recognition, social network filtering, machine translation, bioinformatics, drug design, medical image analysis, material inspection and board game programs, where they have produced results comparable to and in some cases superior to human experts. Artificial Neural Networks (ANNs) were inspired by information processing and distributed communication nodes in biological systems. ANNs have various differences from biological brains. Specifically, neural networks tend to be static and symbolic, while the biological brain of most living organisms is dynamic (plastic) and analog.' 17 | 18 | corpus, word_to_id, id_to_word = preprocess(text) 19 | 20 | vocab_size = len(word_to_id) 21 | contexts, target = create_contexts_target(corpus, window_size) 22 | target = convert_one_hot(target, vocab_size) 23 | contexts = convert_one_hot(contexts, vocab_size) 24 | 25 | model = SimpleCBOW(vocab_size, hidden_size) 26 | #model = SimpleSkipGram(vocab_size, hidden_size) 27 | optimizer = Adam() 28 | trainer = Trainer(model, optimizer) 29 | 30 | trainer.fit(contexts, target, max_epoch, batch_size) 31 | trainer.plot() 32 | 33 | 34 | word_vecs = model.word_vecs 35 | for word_id, word in id_to_word.items(): 36 | print(word, word_vecs[word_id]) 37 | 38 | 39 | most_similar('you', word_to_id, id_to_word, word_vecs) 40 | #most_similar('learning', word_to_id, id_to_word, word_vecs) 41 | -------------------------------------------------------------------------------- /python_team2/ch03/w_in.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # 3.1.3 p.99 3 | 4 | c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 "you" 5 | W = np.random.randn(7, 3) # 重み 6 | h = np.dot(c, W) # 中間ノード 7 | print(h) 8 | print(W) 9 | -------------------------------------------------------------------------------- /python_team2/ch03/w_in_matmul.py: -------------------------------------------------------------------------------- 1 | # p.100 3.1.3 2 | 3 | import sys 4 | sys.path.append('../book') 5 | import numpy as np 6 | from common.layers import MatMul 7 | 8 | c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 "you" 9 | W = np.random.randn(7, 3) # 重み 10 | layer = MatMul(W) 11 | h = layer.forward(c) 12 | print(h) 13 | print(W) 14 | -------------------------------------------------------------------------------- /python_team2/common/layers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import numpy as np 4 | 5 | class MatMul: 6 | def __init__(self, W): 7 | self.params = [W] 8 | self.grads = [np.zeros_like(W)] 9 | self.x = None 10 | 11 | def forward(self, x): 12 | W, = self.params 13 | out = np.dot(x, W) 14 | self.x = x 15 | return out 16 | 17 | def backward(self, dout): 18 | W, = self.params 19 | dx = np.dot(dout, W.T) 20 | dW = np.dot(self.x.T, dout) 21 | self.grads[0][...] = dW 22 | return dx 23 | 24 | class Sigmoid: 25 | def __init__(self): 26 | self.params, self.grads = [], [] 27 | 28 | def forward(self, x): 29 | out = 1 / (1 + np.exp(-x)) 30 | self.out = out 31 | return out 32 | 33 | def backward(self, dout): 34 | dx = dout * (1.0 - self.out) * self.out 35 | return dx 36 | 37 | # https://github.com/oreilly-japan/deep-learning-from-scratch/blob/master/common/layers.py 38 | class Relu: 39 | def __init__(self): 40 | self.mask = None 41 | self.params, self.grads = [], [] 42 | 43 | def forward(self, x): 44 | self.mask = (x <= 0) 45 | out = x.copy() 46 | out[self.mask] = 0 47 | 48 | return out 49 | 50 | def backward(self, dout): 51 | dout[self.mask] = 0 52 | dx = dout 53 | 54 | return dx 55 | 56 | class Affine: 57 | def __init__(self, W, b): 58 | self.params = [W, b] 59 | self.grads = [np.zeros_like(W), np.zeros_like(b)] 60 | self.x = None 61 | 62 | def forward(self, x): 63 | W, b = self.params 64 | out = np.dot(x, W) + b 65 | self.x = x 66 | return out 67 | 68 | def backward(self, dout): 69 | W, b = self.params 70 | dx = np.dot(dout, W.T) 71 | dW = np.dot(self.x.T, dout) 72 | db = np.sum(dout, axis=0) 73 | 74 | self.grads[0][...] = dW 75 | self.grads[1][...] = db 76 | return dx 77 | 78 | class AffineMM: 79 | def __init__(self, W, b): 80 | self.params = [W, b] 81 | self.grads = [np.zeros_like(W), np.zeros_like(b)] 82 | self.MM = MatMul(W) 83 | 84 | def forward(self, x): 85 | b = self.params[1] 86 | out = self.MM.forward(x) + b 87 | return out 88 | 89 | def backward(self, dout): 90 | b = self.params[1] 91 | dx = self.MM.backward(dout) 92 | db = np.sum(dout, axis=0) 93 | 94 | self.grads[0][...] = self.MM.grads[0] 95 | self.grads[1][...] = db 96 | return dx 97 | 98 | 99 | # https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/functions.py 100 | # からパチった 101 | def softmax(x): 102 | if x.ndim == 2: 103 | x = x - x.max(axis=1, keepdims=True) 104 | x = np.exp(x) 105 | x /= x.sum(axis=1, keepdims=True) 106 | elif x.ndim == 1: 107 | x = x - np.max(x) 108 | x = np.exp(x) / np.sum(np.exp(x)) 109 | 110 | return x 111 | 112 | 113 | def cross_entropy_error(y, t): 114 | if y.ndim == 1: 115 | t = t.reshape(1, t.size) 116 | y = y.reshape(1, y.size) 117 | 118 | # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 119 | if t.size == y.size: 120 | t = t.argmax(axis=1) 121 | 122 | batch_size = y.shape[0] 123 | 124 | return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size 125 | 126 | 127 | # https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/layers.py 128 | class SoftmaxWithLoss: 129 | def __init__(self): 130 | self.params, self.grads = [], [] 131 | self.y = None 132 | self.t = None 133 | 134 | def forward(self, x, t): 135 | self.t = t 136 | self.y = softmax(x) 137 | 138 | # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 139 | if self.t.size == self.y.size: 140 | self.t = self.t.argmax(axis=1) 141 | 142 | loss = cross_entropy_error(self.y, self.t) 143 | return loss 144 | 145 | def backward(self, dout=1): 146 | batch_size = self.t.shape[0] 147 | 148 | dx = self.y.copy() 149 | dx[np.arange(batch_size), self.t] -= 1 150 | dx *= dout 151 | dx = dx / batch_size 152 | 153 | return dx 154 | 155 | 156 | 157 | 158 | if __name__ == '__main__': 159 | print('MatMul 形状チェック') 160 | 161 | W = np.random.randn(3, 4) 162 | mm = MatMul(W) 163 | x = np.random.randn(1, 3) 164 | out = mm.forward(x) 165 | print('mm.forward().shape', out.shape) 166 | grad = mm.backward(out) 167 | print('mm.backward().shape', grad.shape) 168 | 169 | 170 | print("Affine, AffineMM 実装チェック") 171 | 172 | W = np.random.randn(3, 2) 173 | b = np.random.randn(2) 174 | aff = Affine(W, b) 175 | amm = AffineMM(W, b) 176 | 177 | x = np.random.randn(10, 3) 178 | out1 = aff.forward(x) 179 | out2 = amm.forward(x) 180 | print("out equal", (out1 == out2).all()) 181 | 182 | grad1 = aff.backward(out1) 183 | grad2 = amm.backward(out2) 184 | print("grad equal", (grad1 == grad2).all()) 185 | -------------------------------------------------------------------------------- /python_team2/common/optimizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | # 1.3.6 重みの更新 4 | 5 | class SGD: 6 | def __init__(self, lr=0.01): 7 | self.lr = lr 8 | 9 | def update(self, params, grads): 10 | for i in range(len(params)): 11 | params[i] -= self.lr * grads[i] 12 | -------------------------------------------------------------------------------- /python_team2/common/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import numpy as np 4 | 5 | def preprocess(text): 6 | text = text.lower() 7 | text = text.replace('.', ' .') 8 | words = text.split(' ') 9 | 10 | word_to_id = {} 11 | id_to_word = {} 12 | for word in words: 13 | if word not in word_to_id: 14 | new_id = len(word_to_id) 15 | word_to_id[word] = new_id 16 | id_to_word[new_id] = word 17 | 18 | corpus = np.array([word_to_id[w] for w in words]) 19 | return corpus, word_to_id, id_to_word 20 | 21 | 22 | def create_co_matrix(corpus, vocab_size, window_size=1): 23 | corpus_size = len(corpus) 24 | co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32) 25 | 26 | for idx, word_id in enumerate(corpus): 27 | for i in range(1, window_size + 1): 28 | left_idx = idx - i 29 | right_idx = idx + i 30 | 31 | if left_idx >= 0: 32 | left_word_id = corpus[left_idx] 33 | co_matrix[word_id, left_word_id] += 1 34 | 35 | if right_idx < corpus_size: 36 | right_idx = corpus[right_idx] 37 | co_matrix[word_id, right_idx] += 1 38 | 39 | return co_matrix 40 | 41 | def cos_similarity(x, y, eps = 1e-8): 42 | nx = x / (np.sqrt(np.sum(x**2)) + eps) # x の正規化 43 | ny = y / (np.sqrt(np.sum(y**2)) + eps) # y の正規化 44 | return np.dot(nx, ny) 45 | 46 | def most_similar(query, word_to_id, id_to_word, word_matrix, top=5): 47 | # クエリを取り出す 48 | if query not in word_to_id: 49 | print('%s is not found' % query) 50 | return 51 | 52 | print('\n[query] ' + query) 53 | query_id = word_to_id[query] 54 | query_vec = word_matrix[query_id] 55 | 56 | # コサイン類似度の算出 57 | vocab_size = len(id_to_word) 58 | similarity = np.zeros(vocab_size) 59 | for i in range(vocab_size): 60 | similarity[i] = cos_similarity(word_matrix[i], query_vec) 61 | 62 | # コサイン類似度の結果から、その値を高い順に出力 63 | count = 0 64 | for i in (-1 * similarity).argsort(): 65 | if id_to_word[i] == query: 66 | continue 67 | print(' %s: %s' % (id_to_word[i], similarity[i])) 68 | 69 | count += 1 70 | if count >= top: 71 | return 72 | -------------------------------------------------------------------------------- /ruby/.bundle/config: -------------------------------------------------------------------------------- 1 | --- 2 | BUNDLE_PATH: "vendor/bundle" 3 | -------------------------------------------------------------------------------- /ruby/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'https://rubygems.org' 4 | 5 | git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } 6 | 7 | gem 'numo-narray' 8 | gem 'matplotlib' 9 | gem 'red-datasets' 10 | gem 'test-unit' 11 | gem 'irb' 12 | gem 'pry-byebug' 13 | gem 'rake' 14 | -------------------------------------------------------------------------------- /ruby/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | byebug (11.0.1) 5 | coderay (1.1.2) 6 | csv (3.1.1) 7 | irb (1.0.0) 8 | matplotlib (1.0.0) 9 | pycall (>= 1.0.0) 10 | method_source (0.9.2) 11 | numo-narray (0.9.1.4) 12 | power_assert (1.1.4) 13 | pry (0.12.2) 14 | coderay (~> 1.1.0) 15 | method_source (~> 0.9.0) 16 | pry-byebug (3.7.0) 17 | byebug (~> 11.0) 18 | pry (~> 0.10) 19 | pycall (1.2.1) 20 | rake (13.0.1) 21 | red-datasets (0.0.8) 22 | csv (>= 3.0.5) 23 | rubyzip 24 | rubyzip (2.0.0) 25 | test-unit (3.3.2) 26 | power_assert 27 | 28 | PLATFORMS 29 | ruby 30 | 31 | DEPENDENCIES 32 | irb 33 | matplotlib 34 | numo-narray 35 | pry-byebug 36 | rake 37 | red-datasets 38 | test-unit 39 | 40 | BUNDLED WITH 41 | 2.0.2 42 | -------------------------------------------------------------------------------- /ruby/Rakefile: -------------------------------------------------------------------------------- 1 | #require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | Rake::TestTask.new(:test) do |t| 5 | t.libs << "test" 6 | t.libs << "lib" 7 | t.test_files = FileList['test/**/*_test.rb'] 8 | t.warning = false 9 | end 10 | 11 | task :default => :test 12 | -------------------------------------------------------------------------------- /ruby/examples/ch01/show_spiral_dataset.rb: -------------------------------------------------------------------------------- 1 | require 'matplotlib/pyplot' 2 | require_relative "spiral" 3 | 4 | plt = Matplotlib::Pyplot 5 | 6 | spiral = Spiral.new.to_a 7 | 8 | colors = ['yellow', 'green', 'red'] 9 | 10 | spiral.group_by{|x, t| t.to_a }.each do |t, x| 11 | x = x.map(&:first).map(&:to_a) 12 | plt.scatter(*x.transpose, c: colors[t.find_index(1)]) 13 | end 14 | plt.show() 15 | -------------------------------------------------------------------------------- /ruby/examples/ch01/spiral.rb: -------------------------------------------------------------------------------- 1 | require "numo/narray" 2 | require "datasets" 3 | require "datasets/dataset" 4 | 5 | class Spiral < Datasets::Dataset 6 | N = 100 # クラスごとのサンプル数 7 | DIM = 2 # データの要素数 8 | CLS_NUM = 3 # クラス数 9 | attr_reader :x, :t 10 | 11 | def initialize(seed=1984) 12 | super() 13 | @metadata.id = "spiral" 14 | @metadata.name = "Spiral" 15 | @metadata.url = "https://github.com/retrieva/deep-learning-from-scratch-2" 16 | @metadata.description = "Spiral dataset" 17 | 18 | random = Random.new(seed) 19 | 20 | @x = Numo::DFloat.zeros(N * CLS_NUM, DIM) 21 | @t = Numo::Int64.zeros(N * CLS_NUM, CLS_NUM) 22 | 23 | CLS_NUM.times do |j| 24 | N.times do |i| # N*j, N*(j+1)) 25 | rate = i.to_f / N 26 | radius = 1.0 * rate 27 | theta = j * 4.0 + 4.0 * rate + random.rand(0.2) 28 | 29 | ix = N * j + i 30 | @x[ix, true] = [radius * Math.sin(theta), radius * Math.cos(theta)] 31 | @t[ix, j] = 1 32 | end 33 | end 34 | end 35 | 36 | def each 37 | return to_enum(__method__) unless block_given? 38 | 39 | (N * CLS_NUM).times do |ix| 40 | yield [@x[ix, true], @t[ix, true]] 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /ruby/examples/ch01/train.rb: -------------------------------------------------------------------------------- 1 | require_relative '../lib/optimizer' # SGD 2 | require_relative '../lib/trainer' 3 | require_relative 'two_layers_net' 4 | require_relative 'spiral' 5 | 6 | max_epoch = 300 7 | batch_size = 30 8 | hidden_size = 10 9 | learning_rate = 1.0 10 | 11 | spiral = Spiral.new 12 | x = spiral.x 13 | t = spiral.t 14 | model = TwoLayersNet.new(input_size: 2, hidden_size: hidden_size, output_size: 3) 15 | optimizer = SGD.new(learning_rate) 16 | 17 | trainer = Trainer.new(model, optimizer) 18 | trainer.fit(x, t, max_epoch: max_epoch, batch_size: batch_size, eval_interval: 10) 19 | trainer.plot() -------------------------------------------------------------------------------- /ruby/examples/ch01/train_custom_loop.rb: -------------------------------------------------------------------------------- 1 | require_relative 'spiral' 2 | require_relative '../lib/optimizer' 3 | require_relative 'two_layers_net' 4 | 5 | # 1: ハイパーパラメータ設定 6 | max_epoch = 300 7 | batch_size = 30 8 | hidden_size = 10 9 | learning_rate = 1.0 10 | 11 | # 2: データ読み込み、モデルとオプティマイザ生成 12 | samples = Spiral.new 13 | x = samples.x # .shape => [300, 2] 14 | t = samples.t # .shape => [300, 3] 15 | model = TwoLayersNet.new(input_size: 2, hidden_size: hidden_size, output_size: 3) 16 | optimizer = SGD.new(learning_rate) 17 | 18 | data_size = x.shape.first # => 300 19 | max_iters = (data_size / batch_size).floor # => 10 20 | total_loss = 0 21 | loss_count = 0 22 | loss_list = [] 23 | 24 | max_epoch.times do |epoch| 25 | # 3: データのシャッフル 26 | # NOTE: Numoには random.permutation に対応する数列作成がないため、Arrayから作っている 27 | idx = Numo::Int64.new(data_size).store((0 ... data_size).to_a.shuffle) 28 | 29 | # NOTE: pythonのサンプルでは x = x[idx] となっているが、 30 | # 左辺はforループ内のローカル変数扱いなのでrubyでは変数名を変えている 31 | ex = x[idx, true] 32 | et = t[idx, true] 33 | 34 | max_iters.times do |iters| 35 | iter_range = (iters * batch_size) ... ((iters + 1) * batch_size) 36 | batch_x = ex[iter_range, true] 37 | batch_t = et[iter_range, true] 38 | 39 | # 4: 勾配を求め、パラメータを更新 40 | loss = model.forward(batch_x, batch_t) 41 | model.backward 42 | optimizer.update(model.params, model.grads) 43 | 44 | total_loss += loss 45 | loss_count += 1 46 | 47 | # 5: 定期的(10イテレーションに1回)に学習経過を出力 48 | if (iters + 1) % 10 == 0 49 | avg_loss = total_loss / loss_count 50 | puts "| epoch #{epoch+1} | iter #{iters+1} / #{max_iters} | loss #{avg_loss}" 51 | loss_list << avg_loss 52 | total_loss = 0 53 | loss_count = 0 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /ruby/examples/ch01/two_layers_net.rb: -------------------------------------------------------------------------------- 1 | require 'affine' 2 | require 'sigmoid' 3 | require 'softmax_with_loss' 4 | require 'byebug' 5 | 6 | class TwoLayersNet 7 | attr_reader :layers, :loss_layer 8 | attr_accessor :params, :grads 9 | 10 | def initialize(input_size:, hidden_size:, output_size:) 11 | w1 = 0.01 * Numo::SFloat.new(input_size, hidden_size).rand 12 | b1 = Numo::SFloat.zeros(hidden_size) 13 | w2 = 0.01 * Numo::SFloat.new(hidden_size, output_size).rand 14 | b2 = Numo::SFloat.zeros(output_size) 15 | 16 | @layers = [ 17 | Affine.new(w1, b1), 18 | Sigmoid.new, 19 | Affine.new(w2, b2), 20 | ] 21 | @loss_layer = SoftmaxWithLoss.new 22 | 23 | @params, @grads = @layers.reduce([[], []]) do |acc, layer| 24 | acc[0] += layer.params 25 | acc[1] += layer.grads 26 | acc 27 | end 28 | end 29 | 30 | def predict(x) 31 | @layers.each do |layer| 32 | x = layer.forward(x) 33 | end 34 | x 35 | end 36 | 37 | def forward(x, t) 38 | score = predict(x) 39 | @loss_layer.forward(score, t) 40 | end 41 | 42 | def backward(dout = 1) 43 | dout = @loss_layer.backward(dout) 44 | @layers.reverse.each do |layer| 45 | dout = layer.backward(dout) 46 | end 47 | dout 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /ruby/examples/ch03/cbow_predict.rb: -------------------------------------------------------------------------------- 1 | require "numo/narray" 2 | require "mat_mul.rb" 3 | 4 | # サンプルのコンテキストデータ 5 | c0 = Numo::NArray[[1, 0, 0, 0, 0, 0, 0]] 6 | c1 = Numo::NArray[[0, 0, 1, 0, 0, 0, 0]] 7 | 8 | # 重みの初期化 9 | w_in = Numo::DFloat.new(7, 3).rand 10 | w_out = Numo::DFloat.new(3, 7).rand 11 | 12 | # レイヤの生成 13 | in_layer0 = MatMul.new(w_in) 14 | in_layer1 = MatMul.new(w_in) 15 | out_layer = MatMul.new(w_out) 16 | 17 | # 順伝搬 18 | h0 = in_layer0.forward(c0) 19 | h1 = in_layer1.forward(c1) 20 | h = 0.5 * (h0 + h1) 21 | s = out_layer.forward(h) 22 | 23 | pp h0.to_a 24 | #pp s.to_a 25 | -------------------------------------------------------------------------------- /ruby/lib/adam.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Adam (http://arxiv.org/abs/1412.6980) 4 | class Adam 5 | def initialize(lr = 0.001, beta1 = 0.9, beta2 = 0.999) 6 | @lr = lr 7 | @beta1 = beta1 8 | @beta2 = beta2 9 | @iter = 0 10 | @m = nil 11 | @v = nil 12 | end 13 | 14 | def update(params, grads) 15 | unless @m 16 | @m = [] 17 | @v = [] 18 | params.each do |param| 19 | @m.append(Numo::SFloat.zeros(param.shape)) 20 | @v.append(Numo::SFloat.zeros(param.shape)) 21 | end 22 | end 23 | 24 | @iter += 1 25 | lr_t = @lr * Numo::SFloat::Math.sqrt(1.0 - @beta2**@iter) / 26 | (1.0 - @beta1**@iter) 27 | 28 | params.length.times do |i| 29 | @m[i] += (1 - @beta1) * (grads[i] - @m[i]) 30 | @v[i] += (1 - @beta2) * (grads[i]**2 - @v[i]) 31 | 32 | params[i].inplace - lr_t * @m[i] / (Numo::SFloat::Math.sqrt(@v[i]) + 1e-7) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /ruby/lib/affine.rb: -------------------------------------------------------------------------------- 1 | require "numo/narray" 2 | 3 | class Affine 4 | attr_accessor :params, :grads, :x 5 | 6 | def initialize(weight, bias) 7 | @params = [weight, bias] 8 | @grads = [weight.new_zeros, bias.new_zeros] 9 | @x = nil 10 | end 11 | 12 | def forward(x) 13 | weight, bias = @params 14 | @x = x 15 | x.dot(weight) + bias 16 | end 17 | 18 | def backward(dout) 19 | weight, _ = @params 20 | dx = dout.dot(weight.transpose) 21 | dW = @x.transpose.dot(dout) 22 | db = dout.sum(axis: 0) 23 | 24 | @grads[0].store dW 25 | @grads[1].store db 26 | dx 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /ruby/lib/embedding.rb: -------------------------------------------------------------------------------- 1 | class Embedding 2 | attr_reader :params, :grads 3 | 4 | def initialize(w) 5 | @params = [w] 6 | @grads = [w.new_zeros] 7 | @idx = nil 8 | end 9 | 10 | def forward(idx) 11 | w = @params.first 12 | @idx = idx 13 | w[idx, true] 14 | end 15 | 16 | def backward(dout) 17 | dw = @grads.first 18 | dw.store(0) 19 | @idx.each_with_index do |word_id, i| 20 | dw[word_id, true].inplace + dout[i, true] 21 | end 22 | nil 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /ruby/lib/embedding_dot.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'numo/narray' 4 | require 'embedding' 5 | 6 | class EmbeddingDot 7 | def initialize(w) 8 | @embed = Embedding.new(w) 9 | @params = @embed.params 10 | @grads = @embed.grads 11 | @cache = nil 12 | end 13 | 14 | def forward(h, idx) 15 | target_w = @embed.forward(idx) 16 | out = (target_w * h).sum(axis: 1) 17 | 18 | @cache = [h, target_w] 19 | out 20 | end 21 | 22 | def backward(dout) 23 | h, target_w = @cache 24 | dout = dout.reshape(dout.shape[0], 1) # transformで良いのでは? 25 | 26 | dtarget_w = dout * h 27 | @embed.backward(dtarget_w) 28 | dh = dout * target_w 29 | dh 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /ruby/lib/mat_mul.rb: -------------------------------------------------------------------------------- 1 | require 'numo/narray' 2 | 3 | class MatMul 4 | attr_accessor :params, :grads, :x 5 | def initialize(w) 6 | @params = [w] 7 | @grads = [w.new_zeros] 8 | @x = nil 9 | end 10 | 11 | def forward(x) 12 | w = @params.first 13 | @x = x 14 | x.dot(w) 15 | end 16 | 17 | def backward(dout) 18 | w = @params.first 19 | dx = dout.dot(w.transpose) 20 | dw = @x.transpose.dot(dout) 21 | @grads[0].store(dw) 22 | dx 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /ruby/lib/negative_sampling_loss.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'numo/narray' 4 | 5 | class NegativeSamplingLoss 6 | def initialize(w, corpus, power = 0.75, sample_size = 5) 7 | @sample_size = sample_size 8 | @sampler = 9 | end 10 | end 11 | 12 | class UnigramSampler 13 | def initialize(corpus, power, sample_size) 14 | @sample_size = sample_size 15 | @vocab_size = nil 16 | @word_p = nil 17 | 18 | counts = Hash.new(0) 19 | corpus.each do |word_id| 20 | counts[word_id] += 1 21 | end 22 | 23 | @vocab_size = counts.length 24 | 25 | @word_p = Numo::SFloat[*counts.values] 26 | 27 | @word_p = @word_p ** power 28 | @word_p /= @word_p.sum 29 | end 30 | 31 | def get_negative_sample(target) 32 | batch_size = target.shape[0] 33 | 34 | negative_sample = Numo::UInt32.zeros(batch_size, @sample_size) 35 | 36 | batch_size.times do |i| 37 | p = @word_p.dup 38 | target_idx = target[i] 39 | p[target_idx] = 0 40 | p /= p.sum 41 | negative_sample[i, true] = random_choice_without_replacement( 42 | @vocab_size, size: @sample_size, p: p, replacement: false 43 | ) 44 | end 45 | end 46 | end 47 | 48 | # Implementation is based on the Weighted Random Sampling from this SO 49 | # https://stackoverflow.com/a/2149533. 50 | def random_choice_without_replacement(a, size: 1, p:) 51 | array = a.class == Integer ? (0...a).to_a : a 52 | items = array.zip(p) 53 | 54 | heap = rws_heap(items) 55 | 56 | size.times.map { rws_heap_pop(heap) } 57 | end 58 | 59 | Node = Struct.new(:w, :v, :tw) 60 | Rand = Random.new 61 | 62 | def rws_heap(items) 63 | h = [nil] 64 | items.each do |v, w| 65 | h.append(Node.new(w, v, w)) 66 | end 67 | 68 | (h.length - 1).downto(2).each do |i| 69 | h[i >> 1].tw += h[i].tw 70 | end 71 | 72 | h 73 | end 74 | 75 | def rws_heap_pop(h) 76 | gas = h[1].tw * Rand.rand 77 | 78 | i = 1 79 | 80 | while gas >= h[i].w 81 | gas -= h[i].w 82 | i <<= 1 83 | if gas >= h[i].tw 84 | gas -= h[i].tw 85 | i += 1 86 | end 87 | end 88 | 89 | w = h[i].w 90 | v = h[i].v 91 | 92 | h[i].w = 0 93 | while i.positive? 94 | h[i].tw -= w 95 | i >>= 1 96 | end 97 | 98 | v 99 | end 100 | -------------------------------------------------------------------------------- /ruby/lib/optimizer.rb: -------------------------------------------------------------------------------- 1 | # Stochastic Gradient Descent 2 | class SGD 3 | def initialize(lr = 0.01) 4 | @lr = lr 5 | end 6 | 7 | def update(params, grads) 8 | params.length.times do |i| 9 | params[i].inplace - @lr * grads[i] 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /ruby/lib/rnn.rb: -------------------------------------------------------------------------------- 1 | require 'numo/narray' 2 | 3 | class Rnn 4 | 5 | attr_accessor :params, :grads, :cache 6 | 7 | def initialize(wx, wh, b) 8 | @params = [wx, wh, b] 9 | @grads = [wx.new_zeros, wh.new_zeros, b.new_zeros] 10 | end 11 | 12 | def forward(x, h_prev) 13 | wx, wh, b = @params 14 | t = h_prev.dot(wh) + x.dot(wx) + b 15 | h_next = Numo::NMath::tanh(t) 16 | 17 | @cache = [x, h_prev, h_next] 18 | 19 | h_next 20 | end 21 | 22 | def backward(dh_next) 23 | wx, wh, _b = @params 24 | x, h_prev, h_next = @cache 25 | 26 | dt = dh_next * (1 - h_next**2) 27 | db = dt.sum(axis: 0) 28 | dwh = h_prev.transpose.dot(dt) 29 | dh_prev = dt.dot(wh.transpose) 30 | dwx = x.transpose.dot(dt) 31 | dx = dt.dot(wx.transpose) 32 | 33 | @grads[0].store(dwx) 34 | @grads[1].store(dwh) 35 | @grads[2].store(db) 36 | 37 | [dx, dh_prev] 38 | end 39 | end -------------------------------------------------------------------------------- /ruby/lib/sigmoid.rb: -------------------------------------------------------------------------------- 1 | require 'numo/narray' 2 | 3 | class Sigmoid 4 | attr_accessor :params, :grads, :out 5 | def initialize 6 | @params = [] 7 | @grads = [] 8 | @out = nil 9 | end 10 | 11 | def forward(x) 12 | @out = 1.0 / (1.0 + Numo::NMath.exp(-x)) 13 | end 14 | 15 | def backward(dout) 16 | dout * (1.0 - @out) * @out 17 | end 18 | end -------------------------------------------------------------------------------- /ruby/lib/simple_cbow.rb: -------------------------------------------------------------------------------- 1 | require "numo/narray" 2 | require "mat_mul" 3 | require "softmax_with_loss" 4 | require "embedding" 5 | 6 | class SimpleCBow 7 | attr_reader :params, :grads, :word_vecs 8 | 9 | def initialize(vocab_size, hidden_size) 10 | v, h = vocab_size, hidden_size 11 | 12 | # 重みの初期化 13 | w_in = 0.01 * Numo::DFloat.new(v, h).rand 14 | w_out = 0.01 * Numo::DFloat.new(h, v).rand 15 | 16 | # レイヤの生成 17 | @in_layer0 = Embedding.new(w_in) 18 | @in_layer1 = Embedding.new(w_in) 19 | # @in_layer0 = MatMul.new(w_in) 20 | # @in_layer1 = MatMul.new(w_in) 21 | @out_layer = MatMul.new(w_out) 22 | @loss_layer = SoftmaxWithLoss.new 23 | 24 | # すべての重みと勾配をリストにまとめる 25 | layers = [@in_layer0, @in_layer1, @out_layer] 26 | @params, @grads = layers.reduce([[], []]) do |acc, layer| 27 | [acc[0] + layer.params, acc[1] + layer.grads] 28 | end 29 | 30 | # メンバ変数に単語の分散表現を設定 31 | @word_vecs = w_in 32 | end 33 | 34 | def forward(contexts, target) 35 | h0 = @in_layer0.forward(contexts[true, 0]) 36 | h1 = @in_layer1.forward(contexts[true, 1]) 37 | h = (h0 + h1) * 0.5 38 | score = @out_layer.forward(h) 39 | loss = @loss_layer.forward(score, target) 40 | return loss 41 | end 42 | 43 | def backward(dout=1) 44 | ds = @loss_layer.backward(dout) 45 | da = @out_layer.backward(ds) 46 | da *= 0.5 47 | @in_layer1.backward(da) 48 | @in_layer0.backward(da) 49 | nil 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /ruby/lib/softmax_with_loss.rb: -------------------------------------------------------------------------------- 1 | require 'numo/narray' 2 | 3 | class SoftmaxWithLoss 4 | def initialize(y=nil, t=nil) 5 | @y = y # softmaxの出力 6 | @t = t # 教師ラベル 7 | end 8 | 9 | def forward(x, t) 10 | @t = t 11 | @y = softmax(x) 12 | 13 | return cross_entropy_error(@y, @t) 14 | end 15 | 16 | def backward(dout=1) 17 | t = @t 18 | y = @y 19 | 20 | if y.ndim == 1 21 | t = t.reshape(1, t.size) 22 | y = y.reshape(1, y.size) 23 | end 24 | 25 | # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 26 | if t.size == y.size 27 | t = t.max_index(axis: 1) 28 | end 29 | 30 | dx = y.copy() 31 | dx[t].inplace - 1 32 | dx *= dout 33 | dx /= t.size 34 | 35 | return dx 36 | end 37 | 38 | def softmax(x) 39 | if x.ndim == 2 40 | x = x - x.max(axis: 1, keepdims: true) 41 | x = Numo::NMath.exp(x) 42 | x /= x.sum(axis: 1, keepdims: true) 43 | elsif x.ndim == 1 44 | x = x - x.max 45 | x = Numo::NMath.exp(x) 46 | x /= x.sum 47 | end 48 | 49 | return x 50 | end 51 | 52 | def cross_entropy_error(y, t) 53 | if y.ndim == 1 54 | t = t.reshape(1, t.size) 55 | y = y.reshape(1, y.size) 56 | end 57 | 58 | # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 59 | if t.size == y.size 60 | t = t.max_index(axis: 1) 61 | end 62 | 63 | return -1 * Numo::NMath.log(y[t] + 1e-7).sum / t.size 64 | end 65 | 66 | def params 67 | [@t, @y] 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /ruby/lib/time_embedding.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'numo/narray' 4 | require 'embedding' 5 | 6 | class TimeEmbedding 7 | attr_accessor :params, :grads 8 | 9 | def initialize(w) 10 | @params = [w] 11 | @grads = [w.new_zeros] 12 | end 13 | 14 | def forward(idx) 15 | w = @params.first 16 | n, t = idx.shape 17 | @idx = idx 18 | out = Numo::SFloat.zeros(n, t, w.shape.last) 19 | @layers = [] 20 | t.times do |ti| 21 | layer = Embedding.new(w) 22 | out[true, ti, true] = layer.forward(idx[true, ti]) 23 | @layers << layer 24 | end 25 | out 26 | end 27 | 28 | def backward(dout) 29 | _n, t, _d = dout.shape 30 | w = @params.first 31 | 32 | grad = w.new_zeros 33 | 34 | (t - 1).downto(0) do |ti| 35 | layer = @layers[ti] 36 | layer.backward(dout[true, ti, true]) 37 | grad.inplace + layer.grads[0] 38 | end 39 | 40 | @grads[0].store(grad) 41 | nil 42 | end 43 | end 44 | 45 | -------------------------------------------------------------------------------- /ruby/lib/time_rnn.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'numo/narray' 4 | require_relative '../lib/rnn' 5 | 6 | class TimeRnn 7 | attr_accessor :params, :grads 8 | 9 | def initialize(wx, wh, b, stateful: false) 10 | @params = [wx, wh, b] 11 | @grads = [wx.new_zeros, wh.new_zeros, b.new_zeros] 12 | @layers = nil 13 | 14 | @h = nil 15 | @dh = nil 16 | @stateful = stateful 17 | end 18 | 19 | def forward(xs) 20 | wx, wh, b = @params 21 | n, t, d = xs.shape 22 | d, h = wx.shape 23 | 24 | @layers = [] 25 | hs = Numo::SFloat.zeros(n, t, h) 26 | 27 | if !@stateful || @h.nil? 28 | @h = Numo::SFloat.zeros(n, h) 29 | end 30 | 31 | t.times do |ti| 32 | layer = Rnn.new(*@params) 33 | @h = layer.forward(xs[true, ti, true], @h) 34 | hs[true, ti, true] = @h 35 | @layers.append(layer) 36 | end 37 | 38 | hs 39 | end 40 | 41 | def backward(dhs) 42 | wx, wh, b = @params 43 | n, t, h = dhs.shape 44 | d, h = wx.shape 45 | 46 | dxs = Numo::SFloat.zeros(n, t, d) 47 | dh = 0 48 | grads = [0, 0, 0] 49 | 50 | (t - 1).downto(0) do |ti| 51 | layer = @layers[ti] 52 | dx, dh = layer.backward(dhs[true, ti, true] + dh) 53 | dxs[true, ti, true] = dx 54 | 55 | layer.grads.each_with_index do |grad, i| 56 | grads[i] += grad 57 | end 58 | end 59 | 60 | grads.each_with_index do |grad, i| 61 | @grads[i].store(grad) 62 | end 63 | 64 | @dh = dh 65 | 66 | dxs 67 | end 68 | 69 | def state=(h) 70 | @h = h 71 | end 72 | 73 | def reset_state 74 | @h = nil 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /ruby/lib/trainer.rb: -------------------------------------------------------------------------------- 1 | require 'matplotlib/pyplot' 2 | require_relative 'util' 3 | 4 | class Trainer 5 | def initialize(model, optimizer) 6 | @model = model 7 | @optimizer = optimizer 8 | @loss_list = [] 9 | @eval_interval = nil 10 | @current_epoch = 0 11 | end 12 | 13 | def fit(x, t, max_epoch: 10, batch_size: 32, max_grad: nil, eval_interval: 20) 14 | data_size = x.shape.first 15 | max_iters = (data_size / batch_size).floor 16 | @eval_interval = eval_interval 17 | total_loss = 0 18 | loss_count = 0 19 | 20 | start_time = Time.now 21 | max_epoch.times do |epoch| 22 | @current_epoch += 1 23 | # Shuffle 24 | idx = Numo::Int64.new(data_size).store((0 ... data_size).to_a.shuffle) 25 | ex = get_at_dim_index(x, 0, idx) 26 | et = get_at_dim_index(t, 0, idx) 27 | 28 | max_iters.times do |iters| 29 | batch_range = (iters * batch_size) ... ((iters + 1) * batch_size) 30 | batch_x = get_at_dim_index(ex, 0, batch_range) 31 | batch_t = get_at_dim_index(et, 0, batch_range) 32 | 33 | # 勾配をもとめ、Optimizerでパラメータを更新 34 | loss = @model.forward(batch_x, batch_t) 35 | @model.backward 36 | params, grads = remove_duplicate(@model.params, @model.grads) # 共有された重みを1つに集約 37 | clip_grads(grads, max_grad) unless max_grad.nil? 38 | @optimizer.update(params, grads) 39 | total_loss += loss 40 | loss_count += 1 41 | 42 | # 評価 43 | if !eval_interval.nil? && iters % eval_interval == 0 44 | avg_loss = total_loss / loss_count 45 | elapsed_time = Time.now - start_time 46 | puts "| epoch #{@current_epoch + 1} | iter #{iters + 1} / #{max_iters} | time #{elapsed_time} | loss #{avg_loss}" 47 | @loss_list << avg_loss 48 | total_loss = 0 49 | loss_count = 0 50 | end 51 | end 52 | end 53 | end 54 | 55 | def plot(ylim = nil) 56 | plt = Matplotlib::Pyplot 57 | x = (0 ... @loss_list.length).to_a 58 | plt.ylim(ylim) unless ylim.nil? 59 | plt.plot(x, @loss_list, label: 'train') 60 | plt.xlabel("iterations (x#{@eval_interval})") 61 | plt.ylabel('loss') 62 | plt.show 63 | end 64 | end 65 | 66 | def remove_duplicate(_params, _grads) 67 | # パラメータ配列中の重複する重みをひとつに集約し、その重みに対応する勾配を加算する 68 | params = _params.clone 69 | grads = _grads.clone 70 | 71 | while true do 72 | find_flg = false 73 | l = params.length 74 | (l - 1).times do |i| 75 | ((i + 1) .. l).each do |j| 76 | if params[i] && params[j] 77 | if params[i] == params[j] 78 | # 重みを共有する場合 79 | grads[i].inplace + grads[j] # 勾配を加算 80 | find_flg = true 81 | params.delete_at(j) 82 | grads.delete_at(j) 83 | elsif params[i].ndim == 2 && params[j].ndim == 2 && params[i].transpose.shape == params[j].transpose.shape && params[i].transpose == params[j] 84 | # 転置行列として重みを共有する場合 (weight tying) 85 | grads[i].inplace + grads[j].transpose 86 | find_flg = true 87 | params.delete_at(j) 88 | grads.delete_at(j) 89 | end 90 | end 91 | break if find_flg 92 | end 93 | break if find_flg 94 | end 95 | break unless find_flg 96 | end 97 | return params, grads 98 | end 99 | -------------------------------------------------------------------------------- /ruby/lib/util.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | def clip_grads(grads, max_norm) 4 | total_norm = 0 5 | grads.each { |grad| total_norm += (grad ** 2).sum } 6 | total_norm = Numo::NMath.sqrt(total_norm) 7 | 8 | rate = max_norm / (total_norm + 1e-6) 9 | if rate < 1 10 | grads.each { |grad| grad *= rate } 11 | end 12 | end 13 | 14 | def preprocess(text) 15 | text = text.downcase 16 | .gsub('.', ' .') 17 | words = text.split(' ') 18 | 19 | word_to_id = {} 20 | id_to_word = {} 21 | 22 | words.each do |word| 23 | unless word_to_id.include?(word) 24 | new_id = word_to_id.length 25 | word_to_id[word] = new_id 26 | id_to_word[new_id] = word 27 | end 28 | end 29 | 30 | corpus = Numo::NArray[*words.map { |w| word_to_id[w] }] 31 | 32 | [corpus, word_to_id, id_to_word] 33 | end 34 | 35 | def create_contexts_target(corpus, window_size: 1) 36 | target = corpus[window_size...-window_size] 37 | contexts = [] 38 | 39 | (window_size...(corpus.length - window_size)).each do |idx| 40 | cs = [] 41 | (-window_size..window_size).each do |t| 42 | next if t.zero? 43 | cs.append(corpus[idx + t]) 44 | end 45 | contexts.append(cs) 46 | end 47 | n_contexts = Numo::UInt32.zeros(contexts.length, contexts[0].length) 48 | n_contexts[] = contexts 49 | 50 | n_target = Numo::UInt32.zeros(target.length) 51 | n_target[] = target 52 | 53 | [n_contexts, n_target] 54 | end 55 | 56 | def convert_one_hot(corpus, vocab_size) 57 | n = corpus.shape[0] 58 | 59 | if corpus.ndim == 1 60 | one_hot = Numo::UInt32.zeros(n, vocab_size) 61 | corpus.each_with_index do |word_id, idx| 62 | one_hot[idx, word_id] = 1 63 | end 64 | elsif corpus.ndim == 2 65 | c = corpus.shape[1] 66 | one_hot = Numo::UInt32.zeros(n, c, vocab_size) 67 | 68 | n.times do |idx0| 69 | word_ids = corpus[idx0, true] 70 | word_ids.each_with_index do |word_id, idx1| 71 | one_hot[idx0, idx1, word_id] = 1 72 | end 73 | end 74 | end 75 | 76 | one_hot 77 | end 78 | 79 | def get_at_dim_index(x, dim_no, idxs) 80 | ind = dim_full_indices(x, dim_no, idxs) 81 | x[*ind] 82 | end 83 | 84 | def dim_full_indices(x, dim_no, idxs) 85 | ind = Array.new(x.ndim, true) 86 | ind[dim_no] = idxs 87 | ind 88 | end 89 | -------------------------------------------------------------------------------- /ruby/test/affine_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "affine" 3 | require "numo/narray" 4 | 5 | class AffineTest < Test::Unit::TestCase 6 | def setup 7 | @weight = Numo::SFloat[[1,2,3],[5,8,13]] # input: 2, hidden: 3 8 | @bias = Numo::SFloat[0.2, 0.3, 0.4] 9 | @target = Affine.new(@weight, @bias) 10 | end 11 | 12 | def test_initialize 13 | assert_equal [@weight, @bias], @target.params 14 | assert_equal [ Numo::SFloat[[0,0,0],[0,0,0]], 15 | Numo::SFloat[0,0,0] 16 | ], @target.grads 17 | end 18 | 19 | def test_forward1 20 | x = Numo::SFloat[[3,1.5]] 21 | # x.dot(W) = [3*1 + 1.5*5, 3*2 + 1.5*8, 3*3 + 1.5*13] 22 | # = [3 + 7.5 , 6 + 12 , 9 + 19.5 ] 23 | # = [10.5, 18 , 28.5] 24 | # x.dot(W) + b = [10.7, 18.3, 28.9] 25 | assert_equal Numo::SFloat[[10.7,18.3,28.9]], @target.forward(x) 26 | end 27 | 28 | def test_forward2 29 | x = Numo::SFloat[[2,7],[3,9],[11,13]] 30 | # x.dot(W) = [[2*1 + 7*5, 2*2 + 7*8, 2*3 + 7*13 ], 31 | # [3*1 + 9*5, 3*2 + 9*8, 3*3 + 9*13 ], 32 | # [11*1 + 13*5, 11*2 + 13*8, 11*3 + 13*13]] 33 | # = [[37, 60, 97], [48, 78, 126], [76, 126, 202]] 34 | assert_equal Numo::SFloat[[37.2, 60.3, 97.4], 35 | [48.2, 78.3, 126.4], 36 | [76.2, 126.3, 202.4]], @target.forward(x) 37 | end 38 | 39 | def test_backward1 40 | x = Numo::SFloat[[2,7],[3,9],[11,13]] 41 | dout = Numo::SFloat[[1,0,0],[0,1,0],[0,0,1]] # hidden = 3, input = 2 42 | @target.forward(x) 43 | dLdx = @target.backward(dout) 44 | assert_equal @weight.transpose, dLdx 45 | assert_equal x.transpose, @target.grads[0] 46 | assert_equal Numo::SFloat[1,1,1], @target.grads[1] 47 | end 48 | 49 | def test_backward2 50 | x = Numo::SFloat[[2,7],[3,9],[11,13]] 51 | dout = Numo::SFloat[[1,0.5,0.5],[0.5,1,0.5],[0.5,0.5,1]] # hidden = 3, input = 2 52 | @target.forward(x) 53 | dLdx = @target.backward(dout) 54 | assert_equal Numo::SFloat[[3.5, 15.5], 55 | [4, 17], 56 | [4.5, 19.5]], dLdx 57 | assert_equal Numo::SFloat[[9, 9.5, 13.5], 58 | [18, 19, 21]], @target.grads[0] 59 | assert_equal Numo::SFloat[2,2,2], @target.grads[1] 60 | end 61 | 62 | def test_backward3 63 | x = Numo::SFloat[[3, 1.5]] 64 | dout = Numo::SFloat[[1],[0.5],[1]] # hidden = 3, input = 2 65 | @target.forward(x) 66 | dLdx = @target.backward(dout) 67 | assert_equal Numo::SFloat[[6,26],[3,13],[6,26]], dLdx #dx 68 | assert_equal Numo::SFloat[[7.5, 7.5, 7.5],[3.75, 3.75, 3.75]], @target.grads[0] #dW 69 | assert_equal Numo::SFloat[2.5, 2.5, 2.5], @target.grads[1] #repeat 70 | end 71 | end 72 | -------------------------------------------------------------------------------- /ruby/test/mat_mul_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "mat_mul" 3 | require "numo/narray" 4 | 5 | class MatMulTest < Test::Unit::TestCase 6 | def setup 7 | @weight = Numo::SFloat[[1,2,3],[5,8,13]] # input: 2, hidden: 3 8 | @target = MatMul.new(@weight) 9 | end 10 | 11 | def test_initialize 12 | assert_equal [@weight], @target.params 13 | assert_equal [ Numo::SFloat[[0,0,0],[0,0,0]] ], @target.grads 14 | end 15 | 16 | def test_forward1 17 | x = Numo::SFloat[3,1.5] 18 | # x.dot(W) = [3*1 + 1.5*5, 3*2 + 1.5*8, 3*3 + 1.5*13] 19 | # = [3 + 7.5 , 6 + 12 , 9 + 19.5 ] 20 | # = [10.5, 18 , 28.5] 21 | assert_equal Numo::SFloat[10.5,18,28.5], @target.forward(x) 22 | end 23 | 24 | def test_forward2 25 | x = Numo::SFloat[[2,7],[3,9],[11,13]] 26 | # x.dot(W) = [[2*1 + 7*5, 2*2 + 7*8, 2*3 + 7*13 ], 27 | # [3*1 + 9*5, 3*2 + 9*8, 3*3 + 9*13 ], 28 | # [11*1 + 13*5, 11*2 + 13*8, 11*3 + 13*13]] 29 | # = [[37, 60, 97], [48, 78, 126], [76, 126, 202]] 30 | assert_equal Numo::SFloat[[37, 60, 97], 31 | [48, 78, 126], 32 | [76, 126, 202]], @target.forward(x) 33 | end 34 | 35 | def test_backward1 36 | x = Numo::SFloat[[2,7],[3,9],[11,13]] 37 | dout = Numo::SFloat[[1,0,0],[0,1,0],[0,0,1]] # hidden = 3, input = 2 38 | @target.forward(x) 39 | dLdx = @target.backward(dout) 40 | assert_equal @weight.transpose, dLdx 41 | assert_equal x.transpose, @target.grads[0] 42 | end 43 | 44 | def test_backward2 45 | x = Numo::SFloat[[2,7],[3,9],[11,13]] 46 | dout = Numo::SFloat[[1,0.5,0.5],[0.5,1,0.5],[0.5,0.5,1]] # hidden = 3, input = 2 47 | @target.forward(x) 48 | dLdx = @target.backward(dout) 49 | assert_equal Numo::SFloat[[3.5, 15.5], 50 | [4, 17], 51 | [4.5, 19.5]], dLdx 52 | assert_equal Numo::SFloat[[9, 9.5, 13.5], 53 | [18, 19, 21]], @target.grads[0] 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /ruby/test/optimizer_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "numo/narray" 3 | require "optimizer" 4 | 5 | class SGDTest < Test::Unit::TestCase 6 | def setup 7 | @lr = 0.02 8 | @sgd = SGD.new(@lr) 9 | end 10 | 11 | def test_update 12 | params = [Numo::SFloat[0.1, 0.2, 0.3], Numo::SFloat[0.4, 0.5, 0.6]] 13 | grads = [Numo::SFloat[0.01, 0.02, 0.03], Numo::SFloat[0.04, 0.05, 0.06]] 14 | 15 | @sgd.update(params, grads) 16 | 17 | # [[0.1-0.02*0.01, 0.2-0.02*0.02, 0.3-0.02*0.03] 18 | # [0.4-0.02*0.04, 0.5-0.02*0.05, 0.6-0.02*0.06]] 19 | # = [[0.0998, 0.1996, 0.2994], 20 | # [0.3992, 0.499, 0.5988]] 21 | assert_equal [Numo::SFloat[0.0998, 0.1996, 0.2994], 22 | Numo::SFloat[0.3992, 0.499, 0.5988]], 23 | params 24 | end 25 | end 26 | 27 | -------------------------------------------------------------------------------- /ruby/test/rnn_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "rnn" 3 | require "numo/narray" 4 | 5 | class RnnTest < Test::Unit::TestCase 6 | def setup 7 | # N = 5, D = 2, H = 2 8 | @wx = Numo::SFloat[[0.1, 0.2], [0.5, 0.8]] # D x H 9 | @wh = Numo::SFloat[[0.4, 0.2], [0.1, 0.9]] # H x H 10 | @b = Numo::SFloat[0.1] 11 | 12 | @target = Rnn.new(@wx, @wh, @b) 13 | end 14 | 15 | def test_initialize 16 | assert_equal @wx, @target.params[0] 17 | assert_equal [ 18 | Numo::SFloat[[0, 0], [0, 0]], 19 | Numo::SFloat[[0, 0], [0, 0]], 20 | Numo::SFloat[0] 21 | ], @target.grads 22 | end 23 | 24 | def test_forward 25 | x = Numo::SFloat[ 26 | [0.1, 0.4], 27 | [0.7, 0.5], 28 | [0.3, 0.5], 29 | [0.3, 0.8], 30 | [0.1, 0.9] 31 | ] # N x D 32 | h_prev = Numo::SFloat[ 33 | [0.3, 0.5], 34 | [0.1, 0.4], 35 | [0.7, 0.5], 36 | [0.3, 0.8], 37 | [0.2, 0.2] 38 | ] # N x H 39 | actual = @target.forward(x, h_prev) 40 | expected = Numo::SFloat[ 41 | [0.446244, 0.739783], 42 | [0.462117, 0.769867], 43 | [0.610677, 0.817754], 44 | [0.623065, 0.918602], 45 | [0.578363, 0.785664] 46 | ] 47 | #assert_equal expected, actual 48 | 49 | assert_delta_array(expected, actual) 50 | end 51 | 52 | def test_backward 53 | dh_next = Numo::SFloat[ 54 | [0.3, 0.5], 55 | [0.1, 0.4], 56 | [0.7, 0.5], 57 | [0.3, 0.8], 58 | [0.2, 0.2] 59 | ] # N x H 60 | x = Numo::SFloat[ 61 | [0.1, 0.4], 62 | [0.7, 0.5], 63 | [0.3, 0.5], 64 | [0.3, 0.8], 65 | [0.1, 0.9] 66 | ] # N x D 67 | h_prev = Numo::SFloat[ 68 | [0.3, 0.5], 69 | [0.1, 0.4], 70 | [0.7, 0.5], 71 | [0.3, 0.8], 72 | [0.2, 0.2] 73 | ] # N x H 74 | @target.forward(x, h_prev) 75 | actual_dx, actual_dh_prev = @target.backward(dh_next) 76 | expected_dx = Numo::SFloat[[0.0692981, 0.301218], 77 | [0.0404489, 0.16966], 78 | [0.077023, 0.351987], 79 | [0.043341, 0.191718], 80 | [0.0286192, 0.127787]] 81 | assert_delta_array(expected_dx, actual_dx) 82 | expected_dh_prev = Numo::SFloat[[0.141376, 0.22775], 83 | [0.0640424, 0.154494], 84 | [0.208708, 0.19297], 85 | [0.0984021, 0.130797], 86 | [0.068549, 0.0822017]] 87 | assert_delta_array(expected_dh_prev, actual_dh_prev) 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /ruby/test/simple_cbow_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "simple_cbow" 3 | 4 | class SimpleCBowTest < Test::Unit::TestCase 5 | def setup 6 | @simple_cbow = SimpleCBow.new(7, 3) 7 | end 8 | 9 | def test_initialize 10 | assert(true) 11 | end 12 | 13 | def test_forward 14 | contexts = Numo::NArray[[1, 0, 0, 0, 0, 0, 0], 15 | [0, 0, 1, 0, 0, 0, 0]] 16 | target = Numo::NArray[0, 1, 0, 0, 0, 0, 0] 17 | 18 | @simple_cbow.forward(contexts, target) 19 | assert(true) 20 | end 21 | 22 | def test_backward 23 | Numo::NArray.srand(1) 24 | 25 | contexts = Numo::NArray[[1, 0, 0, 0, 0, 0, 0], 26 | [0, 0, 1, 0, 0, 0, 0]] 27 | target = Numo::NArray[0, 1, 0, 0, 0, 0, 0] 28 | 29 | @simple_cbow.forward(contexts, target) 30 | @simple_cbow.backward 31 | 32 | expected = [[0.000617545, 0.00373067, 0.00794815], 33 | [0.00201042, 0.00116041, 0.00344032], 34 | [0.00539948, 0.00737815, 0.00165089], 35 | [0.000508827, 0.00108065, 0.000687079], 36 | [0.00904121, 0.00478644, 0.00342969], 37 | [0.00164541, 0.0074603, 0.00138994], 38 | [0.00411576, 0.00292532, 0.00869421]] 39 | 40 | @simple_cbow.word_vecs.to_a.zip(expected).each do |actual_row, expected_row| 41 | actual_row.zip(expected_row) do |actual, expected_value| 42 | assert_in_delta actual, expected_value, 0.00001 43 | end 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /ruby/test/softmax_with_loss_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "softmax_with_loss" 3 | require "numo/narray" 4 | 5 | class SoftmaxWithLossTest < Test::Unit::TestCase 6 | def setup 7 | @target = SoftmaxWithLoss.new 8 | assert_equal [nil, nil], @target.params 9 | end 10 | 11 | def test_softmax1 12 | # Softmaxの検算はこのサイトを利用しました。https://keisan.casio.jp/exec/system/1516841458 13 | assert_in_delta 0.86681333219734, @target.softmax(Numo::SFloat[3, 7, 5])[1], 0.00001 14 | assert_in_delta 0.86681333219734, @target.softmax(Numo::SFloat[[3, 7, 5], [1, 9, 2]])[1], 0.00001 15 | assert_in_delta 0.99875420933679, @target.softmax(Numo::SFloat[[3, 7, 5], [1, 9, 2]])[4], 0.00001 16 | end 17 | 18 | def test_softmax2 19 | assert_in_delta 1.0, @target.softmax(Numo::SFloat[3, 7, 5]).sum, 0.00001 20 | end 21 | 22 | def test_cross_entropy_error 23 | assert_in_delta 0.51082562376, @target.cross_entropy_error(Numo::SFloat[0.3, 0.6, 0.1], Numo::SFloat[0, 1, 0]), 0.00001 24 | assert_in_delta 0.10536051565, @target.cross_entropy_error(Numo::SFloat[0.0, 0.1, 0.9], Numo::SFloat[0, 0, 1]), 0.00001 25 | assert_in_delta 0.308093069705, @target.cross_entropy_error(Numo::SFloat[[0.3, 0.6, 0.1], [0.0, 0.1, 0.9]], Numo::SFloat[[0, 1, 0], [0, 0, 1]]), 0.00001 26 | end 27 | 28 | def test_backward 29 | bwtarget = SoftmaxWithLoss.new(Numo::SFloat[3, 1, 9], Numo::SFloat[0, 0, 1]) 30 | a = bwtarget.backward() 31 | assert_in_delta 3, a[0], 0.00001 32 | assert_in_delta 1, a[1], 0.00001 33 | assert_in_delta 8, a[2], 0.00001 34 | bwtarget = SoftmaxWithLoss.new(Numo::SFloat[[3, 1, 9], [2, 6, 5]], Numo::SFloat[[0, 0, 1], [0, 1, 0]]) 35 | a = bwtarget.backward() 36 | assert_in_delta 1.5, a[0], 0.00001 37 | assert_in_delta 0.5, a[1], 0.00001 38 | assert_in_delta 4, a[2], 0.00001 39 | assert_in_delta 1, a[3], 0.00001 40 | assert_in_delta 2.5, a[4], 0.00001 41 | assert_in_delta 2.5, a[5], 0.00001 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /ruby/test/test_helper.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) 2 | require 'test/unit' 3 | 4 | module Test 5 | module Unit 6 | module Assertions 7 | def assert_delta_array(expected, actual, delta = 0.00001, message = nil) 8 | assert_equal(expected.shape, actual.shape) 9 | expected.to_a.flatten.zip(actual.to_a.flatten).each do |expected_value, actual_value| 10 | assert_in_delta expected_value, actual_value, delta, message 11 | end 12 | end 13 | end 14 | end 15 | end 16 | 17 | -------------------------------------------------------------------------------- /ruby/test/time_embedding_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "time_embedding" 3 | require "numo/narray" 4 | 5 | class TimeEmbeddingTest < Test::Unit::TestCase 6 | def setup 7 | @w = Numo::SFloat[[0.20071, -0.210761, 0.21761, 0.20861, 0.203998], 8 | [-0.279034, 0.275155, -0.26858, -0.284655, -0.227953], 9 | [0.257709, -0.26606, 0.270528, 0.259538, 0.264047], 10 | [-0.255427, 0.260104, -0.256653, -0.248147, -0.250488], 11 | [0.245554, -0.246388, 0.249393, 0.23673, 0.24366], 12 | [0.207265, -0.207376, 0.209755, 0.207179, 0.206551], 13 | [-0.213308, 0.195443, -0.139508, -0.207847, 0.102623]] 14 | @target = TimeEmbedding.new(@w) 15 | end 16 | 17 | def test_initialize 18 | assert_equal [@w], @target.params 19 | assert_equal [@w.new_zeros], @target.grads 20 | end 21 | 22 | def test_forward 23 | output = @target.forward(Numo::Int32[[0,1], [2,3], [4,5]]) 24 | expected = [@w[[0,1], true].to_a, @w[[2,3], true].to_a, @w[[4,5], true].to_a] 25 | assert_equal(expected, output.to_a) 26 | end 27 | 28 | def test_backward 29 | output = @target.forward(Numo::Int32[[0,1], [2,3], [4,5]]) 30 | @target.backward(output) 31 | expected = Numo::SFloat.zeros(7, 5) 32 | expected = @w[0...6, true].concatenate(Numo::SFloat.zeros(1, 5)) 33 | assert_delta_array expected, @target.grads.first 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /ruby/test/time_rnn_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "time_rnn" 3 | require "numo/narray" 4 | 5 | class TimeRNNTest < Test::Unit::TestCase 6 | def setup 7 | # D = 2, H = 2 8 | @wx = Numo::SFloat[[0.1, 0.2], [0.5, 0.8]] # D x H 9 | @wh = Numo::SFloat[[0.4, 0.2], [0.1, 0.9]] # H x H 10 | @b = Numo::SFloat[0.1] 11 | 12 | @target = TimeRnn.new(@wx, @wh, @b) 13 | end 14 | 15 | def test_initialize 16 | assert_equal @wx, @target.params[0] 17 | assert_equal [ 18 | Numo::SFloat[[0, 0], [0, 0]], 19 | Numo::SFloat[[0, 0], [0, 0]], 20 | Numo::SFloat[0] 21 | ], @target.grads 22 | end 23 | 24 | def test_forward 25 | x = Numo::SFloat[ # N x T x D ( 3 x 2 x 2 ) 26 | [ 27 | [0.1, 0.4], 28 | [0.7, 0.5] 29 | ], 30 | [ 31 | [0.1, 0.4], 32 | [0.7, 0.5] 33 | ], 34 | [ 35 | [0.1, 0.4], 36 | [0.7, 0.5] 37 | ], 38 | ] 39 | 40 | actual = @target.forward(x) 41 | 42 | expected = Numo::SFloat[ 43 | [ 44 | [0.300437, 0.413644], 45 | [0.523783, 0.790352] 46 | ], 47 | [ 48 | [0.300437, 0.413644], 49 | [0.523783, 0.790352] 50 | ], 51 | [ 52 | [0.300437, 0.413644], 53 | [0.523783, 0.790352] 54 | ] 55 | ] 56 | 57 | assert_delta_array(expected, actual) 58 | end 59 | 60 | def test_backward 61 | x = Numo::SFloat[ # N x T x D ( 3 x 2 x 2 ) 62 | [ 63 | [0.1, 0.4], 64 | [0.7, 0.5] 65 | ], 66 | [ 67 | [0.1, 0.4], 68 | [0.7, 0.5] 69 | ], 70 | [ 71 | [0.1, 0.4], 72 | [0.7, 0.5] 73 | ], 74 | ] 75 | 76 | @target.forward(x) 77 | 78 | dh_next = Numo::SFloat[ # N x T x H ( 3 x 2 x 2 ) 79 | [ 80 | [0.3, 0.5], 81 | [0.7, 0.5] 82 | ], 83 | [ 84 | [0.2, 0.2], 85 | [0.7, 0.5] 86 | ], 87 | [ 88 | [0.3, 0.8], 89 | [0.2, 0.2] 90 | ] 91 | ] 92 | 93 | actual_dxs = @target.backward(dh_next) 94 | 95 | expected_dxs = Numo::SFloat[ 96 | [ 97 | [0.168503, 0.723202], 98 | [0.08833, 0.404116] 99 | ], 100 | [ 101 | [0.109671, 0.47878], 102 | [0.08833, 0.404116] 103 | ], 104 | [ 105 | [0.180169, 0.754616], 106 | [0.0295268, 0.13262] 107 | ] 108 | ] 109 | 110 | assert_delta_array(expected_dxs, actual_dxs) 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /ruby/test/two_layers_net_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require_relative "../examples/ch01/two_layers_net" 3 | require "numo/narray" 4 | 5 | class TwoLayersNetTest < Test::Unit::TestCase 6 | def setup 7 | @target = TwoLayersNet.new(input_size: 3, hidden_size: 5, output_size: 4) 8 | end 9 | 10 | def test_initialize 11 | affine1, sigmoid, affine2 = @target.layers 12 | assert_equal [3, 5], affine1.params[0].shape 13 | assert_equal Sigmoid, sigmoid.class 14 | assert_equal [5, 4], affine2.params[0].shape 15 | end 16 | 17 | def test_forward 18 | x = Numo::SFloat[[1,2,3],[4,5,6]] 19 | t = Numo::SFloat[[1,2,3,4],[5,6,7,8]] 20 | cross_entropy_error = @target.forward(x, t) 21 | assert_equal Float, cross_entropy_error.class 22 | end 23 | 24 | def test_backward1 25 | x = Numo::SFloat[[1,2,3],[4,5,6]] 26 | t = Numo::SFloat[[1,2,3,4],[5,6,7,8]] 27 | cross_entropy_error = @target.forward(x, t) 28 | dout = 0.8 29 | last_dout = @target.backward(dout) 30 | assert_equal [2,3], last_dout.shape 31 | end 32 | 33 | def test_backward2 34 | x = Numo::SFloat[1,2,3] 35 | t = Numo::SFloat[1,2,3,4] 36 | cross_entropy_error = @target.forward(x, t) 37 | dout = 0.8 38 | last_dout = @target.backward(dout) 39 | assert_equal [1,3], last_dout.shape 40 | end 41 | end 42 | --------------------------------------------------------------------------------