├── .gitignore
├── README.md
├── gpu
    ├── ch01
    │   ├── figure_gpu.png
    │   ├── layers.py
    │   ├── loss_gpu.png
    │   ├── optimizer.py
    │   ├── show_spiral_dataset.py
    │   ├── spiral.py
    │   ├── train.py
    │   ├── train_custom_loop.py
    │   ├── trainer.py
    │   └── two_layer_net.py
    ├── ch03
    │   ├── cbow_predict.py
    │   ├── simple_cbow.py
    │   ├── train.py
    │   └── util.py
    ├── ch04
    │   ├── cbow.py
    │   ├── cbow.py~
    │   ├── negative_sampling_layer.py
    │   ├── netative_sampling_layer.py~
    │   └── train.py
    ├── ch05
    │   ├── simple_rnnlm.py
    │   ├── train.py
    │   └── train_custom_loop.py
    ├── ch06
    │   ├── better_rnnlm.py
    │   └── train_better_rnnlm.py
    └── common
    │   ├── base_model.py
    │   ├── config.py
    │   ├── functions.py
    │   ├── layers.py
    │   ├── np.py
    │   ├── optimizer.py
    │   ├── time_layers.py
    │   ├── trainer.py
    │   └── util.py
├── python
    ├── 20190327
    │   ├── sigmoid.py
    │   └── sigmoid_class.py
    ├── ch1
    │   ├── affine.py
    │   ├── forward_net.py
    │   ├── show_spiral_dataset.py
    │   ├── sigmoid.py
    │   ├── sigmoid_class.py
    │   ├── train_custom_loop.py
    │   └── two_layer_net.py
    ├── ch3
    │   ├── cbow.py
    │   ├── dot.py
    │   ├── matmul.py
    │   ├── simple_cbow.py
    │   └── train.py
    ├── ch4
    │   ├── cbow.py
    │   ├── cbow_params.pkl
    │   ├── negative_sampling_layer.py
    │   ├── ptb.py
    │   ├── ptb.train.npy
    │   ├── ptb.train.txt
    │   ├── ptb.vocab.pkl
    │   ├── train.py
    │   └── train_loss.png
    ├── ch5
    │   ├── simple_rnnlm.py
    │   ├── train.py
    │   └── train_custom_loop.py
    ├── ch6
    │   ├── Figure_1.png
    │   ├── Rnnlm.pkl
    │   ├── clip_grads.py
    │   ├── lstm_backward_graph.jpg
    │   ├── rnn_gradient_graph.py
    │   ├── rnn_gradient_graph_clip.py
    │   ├── rnnlm.py
    │   └── train_rnnlm.py
    ├── common
    │   ├── config.py
    │   ├── functions.py
    │   ├── layers.py
    │   ├── np.py
    │   ├── optimizer.py
    │   ├── time_layers.py
    │   ├── trainer.py
    │   └── util.py
    ├── dataset
    │   ├── __init__.py
    │   ├── addition.txt
    │   ├── date.txt
    │   ├── ptb.py
    │   ├── sequence.py
    │   └── spiral.py
    ├── memo.txt
    └── upstream
    │   ├── ch01
    │       ├── forward_net.py
    │       ├── show_spiral_dataset.py
    │       ├── train.py
    │       ├── train_custom_loop.py
    │       └── two_layer_net.py
    │   ├── common
    │       ├── __init__.py
    │       ├── base_model.py
    │       ├── config.py
    │       ├── functions.py
    │       ├── layers.py
    │       ├── np.py
    │       ├── optimizer.py
    │       ├── time_layers.py
    │       ├── trainer.py
    │       └── util.py
    │   └── dataset
    │       ├── __init__.py
    │       ├── addition.txt
    │       ├── date.txt
    │       ├── ptb.py
    │       ├── sequence.py
    │       └── spiral.py
├── python_team2
    ├── 20190327
    │   ├── sigmoid.py
    │   └── sigmoid_class.py
    ├── .gitignore
    ├── Pipfile
    ├── Pipfile.lock
    ├── README.md
    ├── ch01
    │   ├── forward_net.py
    │   ├── plots.py
    │   ├── show_spiral_dataset.py
    │   ├── train.py
    │   ├── train_custom_loop.py
    │   └── two_layer_net.py
    ├── ch02
    │   ├── co_matrix.py
    │   ├── ranking.py
    │   ├── similarity.py
    │   └── words.py
    ├── ch03
    │   ├── cbow_predict.py
    │   ├── simple_cbow.py
    │   ├── train.py
    │   ├── w_in.py
    │   └── w_in_matmul.py
    └── common
    │   ├── layers.py
    │   ├── optimizer.py
    │   └── util.py
└── ruby
    ├── .bundle
        └── config
    ├── Gemfile
    ├── Gemfile.lock
    ├── Rakefile
    ├── examples
        ├── ch01
        │   ├── show_spiral_dataset.rb
        │   ├── spiral.rb
        │   ├── train.rb
        │   ├── train_custom_loop.rb
        │   └── two_layers_net.rb
        └── ch03
        │   ├── cbow_predict.rb
        │   └── train.rb
    ├── lib
        ├── adam.rb
        ├── affine.rb
        ├── embedding.rb
        ├── embedding_dot.rb
        ├── mat_mul.rb
        ├── negative_sampling_loss.rb
        ├── optimizer.rb
        ├── rnn.rb
        ├── sigmoid.rb
        ├── simple_cbow.rb
        ├── softmax_with_loss.rb
        ├── time_embedding.rb
        ├── time_rnn.rb
        ├── trainer.rb
        └── util.rb
    └── test
        ├── affine_test.rb
        ├── mat_mul_test.rb
        ├── optimizer_test.rb
        ├── rnn_test.rb
        ├── simple_cbow_test.rb
        ├── softmax_with_loss_test.rb
        ├── test_helper.rb
        ├── time_embedding_test.rb
        ├── time_rnn_test.rb
        └── two_layers_net_test.rb


/.gitignore:
--------------------------------------------------------------------------------
1 | ruby/.idea/*
2 | ruby/vendor/*
3 | *.swp
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deep-learning-from-scratch-2
 2 | 
 3 | こちらのイベントで利用するリポジトリです
 4 | https://retrieva.connpass.com/event/123223/
 5 | 
 6 | Twitterハッシュタグ `#retrieva_nlp`
 7 | 
 8 | イベント中にモブプログラミングしながら実装していく予定です
 9 | 
10 | - 教科書チーム
11 | - GPUチーム
12 | - Rubyチーム
13 | 


--------------------------------------------------------------------------------
/gpu/ch01/figure_gpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/gpu/ch01/figure_gpu.png


--------------------------------------------------------------------------------
/gpu/ch01/layers.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | 
 3 | class MatMul:
 4 | 
 5 |     def __init__(self, W):
 6 |         self.params = [W]
 7 |         self.grads = [cp.zeros_like(W)]
 8 |         self.x = None
 9 | 
10 |     def forward(self, x):
11 |         W, = self.params
12 |         out = cp.dot(x, W)
13 |         self.x = x
14 |         return out
15 | 
16 |     def backward(self, dout):
17 |         W, = self.params
18 |         dx = cp.dot(dout, W.T)
19 |         dW = cp.dot(self.x.T, dout)
20 |         self.grads[0][...] = dW
21 |         return dx
22 | 
23 | class Sigmoid:
24 |     def __init__(self):
25 |         self.params, self.grads = [], []
26 |         self.x = None
27 | 
28 |     def forward(self, x):
29 |         out = 1. / (1.0 + cp.exp(-x))
30 |         self.out = out
31 |         return out
32 | 
33 |     def backward(self, dout):
34 |         dx = dout * (1.0 - self.out) * self.out
35 |         return dx
36 | 
37 | class Affine:
38 |     def __init__(self, W, b):
39 |         self.params = [W, b]
40 |         self.grads = [cp.zeros_like(W), cp.zeros_like(b)]
41 |         self.x = None
42 | 
43 |     def forward(self, x):
44 |         W, b = self.params
45 |         out = cp.dot(x, W) + b
46 |         self.x = x
47 |         return out
48 | 
49 |     def backward(self, dout):
50 |         W,b = self.params
51 |         dx = cp.dot(dout, W.T)
52 |         dW = cp.dot(self.x.T, dout)
53 |         db = cp.sum(dout, axis=0)
54 |         self.grads[0][...] = dW
55 |         self.grads[1][...] = db
56 |         return dx
57 | 
58 | class SoftmaxWithLoss:
59 |     def __init__(self):
60 |         self.params, self.grads = [], []
61 |         self.y = None
62 |         self.t = None
63 | 
64 |     def forward(self, x, t):
65 |         if x.ndim == 2: # ミニバッチ使用時
66 |             x = x - x.max(axis=1, keepdims=True)
67 |             x = cp.exp(x)
68 |             y = x / x.sum(axis=1, keepdims=True)
69 |         elif x.ndim == 1:
70 |             x = x - cp.max(x)
71 |             y = cp.exp(x) / cp.sum(cp.exp(x))
72 | 
73 |         if y.ndim == 1:
74 |             t = t.reshape(1, t.size)
75 |             y = y.reshape(1, y.size)
76 | 
77 |         # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
78 |         if t.size == y.size:
79 |             t = t.argmax(axis=1)
80 | 
81 |         batch_size = y.shape[0]
82 |         loss = - 1.0 * cp.sum(t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size
83 |         self.y = y
84 |         self.t = t
85 |         return loss
86 | 
87 |     def backward(self, dout=1):
88 |         batch_size = self.t.shape[0]
89 |         dx = self.y.copy()
90 |         dx[cp.arange(batch_size), self.t] -= 1
91 |         dx *= dout
92 |         dx = dx / batch_size
93 |         return dx
94 | 


--------------------------------------------------------------------------------
/gpu/ch01/loss_gpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/gpu/ch01/loss_gpu.png


--------------------------------------------------------------------------------
/gpu/ch01/optimizer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import cupy as cp
  4 | 
  5 | class SGD:
  6 |     '''
  7 |     確率的勾配降下法（Stochastic Gradient Descent）
  8 |     '''
  9 |     def __init__(self, lr=0.01):
 10 |         self.lr = lr
 11 | 
 12 |     def update(self, params, grads):
 13 |         for i in range(len(params)):
 14 |             params[i] -= self.lr * grads[i]
 15 | 
 16 | 
 17 | class Momentum:
 18 |     '''
 19 |     Momentum SGD
 20 |     '''
 21 |     def __init__(self, lr=0.01, momentum=0.9):
 22 |         self.lr = lr
 23 |         self.momentum = momentum
 24 |         self.v = None
 25 | 
 26 |     def update(self, params, grads):
 27 |         if self.v is None:
 28 |             self.v = []
 29 |             for param in params:
 30 |                 self.v.append(cp.zeros_like(param))
 31 | 
 32 |         for i in range(len(params)):
 33 |             self.v[i] = self.momentum * self.v[i] - self.lr * grads[i]
 34 |             params[i] += self.v[i]
 35 | 
 36 | 
 37 | class Nesterov:
 38 |     '''
 39 |     Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)
 40 |     '''
 41 |     def __init__(self, lr=0.01, momentum=0.9):
 42 |         self.lr = lr
 43 |         self.momentum = momentum
 44 |         self.v = None
 45 | 
 46 |     def update(self, params, grads):
 47 |         if self.v is None:
 48 |             self.v = []
 49 |             for param in params:
 50 |                 self.v.append(cp.zeros_like(param))
 51 | 
 52 |         for i in range(len(params)):
 53 |             self.v[i] *= self.momentum
 54 |             self.v[i] -= self.lr * grads[i]
 55 |             params[i] += self.momentum * self.momentum * self.v[i]
 56 |             params[i] -= (1 + self.momentum) * self.lr * grads[i]
 57 | 
 58 | 
 59 | class AdaGrad:
 60 |     '''
 61 |     AdaGrad
 62 |     '''
 63 |     def __init__(self, lr=0.01):
 64 |         self.lr = lr
 65 |         self.h = None
 66 | 
 67 |     def update(self, params, grads):
 68 |         if self.h is None:
 69 |             self.h = []
 70 |             for param in params:
 71 |                 self.h.append(cp.zeros_like(param))
 72 | 
 73 |         for i in range(len(params)):
 74 |             self.h[i] += grads[i] * grads[i]
 75 |             params[i] -= self.lr * grads[i] / (cp.sqrt(self.h[i]) + 1e-7)
 76 | 
 77 | 
 78 | class RMSprop:
 79 |     '''
 80 |     RMSprop
 81 |     '''
 82 |     def __init__(self, lr=0.01, decay_rate = 0.99):
 83 |         self.lr = lr
 84 |         self.decay_rate = decay_rate
 85 |         self.h = None
 86 | 
 87 |     def update(self, params, grads):
 88 |         if self.h is None:
 89 |             self.h = []
 90 |             for param in params:
 91 |                 self.h.append(cp.zeros_like(param))
 92 | 
 93 |         for i in range(len(params)):
 94 |             self.h[i] *= self.decay_rate
 95 |             self.h[i] += (1 - self.decay_rate) * grads[i] * grads[i]
 96 |             params[i] -= self.lr * grads[i] / (cp.sqrt(self.h[i]) + 1e-7)
 97 | 
 98 | 
 99 | class Adam:
100 |     '''
101 |     Adam (http://arxiv.org/abs/1412.6980v8)
102 |     '''
103 |     def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
104 |         self.lr = lr
105 |         self.beta1 = beta1
106 |         self.beta2 = beta2
107 |         self.iter = 0
108 |         self.m = None
109 |         self.v = None
110 | 
111 |     def update(self, params, grads):
112 |         if self.m is None:
113 |             self.m, self.v = [], []
114 |             for param in params:
115 |                 self.m.append(cp.zeros_like(param))
116 |                 self.v.append(cp.zeros_like(param))
117 | 
118 |         self.iter += 1
119 |         lr_t = self.lr * cp.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
120 | 
121 |         for i in range(len(params)):
122 |             self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
123 |             self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
124 | 
125 |             params[i] -= lr_t * self.m[i] / (cp.sqrt(self.v[i]) + 1e-7)
126 | 


--------------------------------------------------------------------------------
/gpu/ch01/show_spiral_dataset.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')  # 親ディレクトリのファイルをインポートするための設定
 4 | import spiral
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | x, t = spiral.load_data()
 9 | print('x', x.shape)  # (300, 2)
10 | print('t', t.shape)  # (300, 3)
11 | 
12 | # データ点のプロット
13 | N = 100
14 | CLS_NUM = 3
15 | markers = ['o', 'x', '^']
16 | for i in range(CLS_NUM):
17 |     plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
18 | plt.show()
19 | 


--------------------------------------------------------------------------------
/gpu/ch01/spiral.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_data(seed=1984):
 7 |     np.random.seed(seed)
 8 |     N = 100     # number of sample each class
 9 |     DIM = 2     # number of data elements
10 |     CLS_NUM = 3 # number of class
11 | 
12 |     x = np.zeros((N * CLS_NUM, DIM))
13 |     t = np.zeros((N * CLS_NUM, CLS_NUM), dtype=np.int)
14 | 
15 |     for j in range(CLS_NUM):
16 |         for i in range(N):
17 |             rate = i / N
18 |             radius = 1.0 * rate
19 |             theta = j * 4.0 + 4.0 * rate + np.random.randn() * 0.2
20 | 
21 |             ix = N * j + i
22 |             x[ix] = np.array([radius * np.sin(theta),
23 |                               radius * np.cos(theta)], dtype=np.float32).flatten()
24 |             t[ix, j] = 1
25 | 
26 |     return x, t
27 | 


--------------------------------------------------------------------------------
/gpu/ch01/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from optimizer import SGD
 4 | from trainer import Trainer
 5 | import spiral
 6 | import cupy as cp
 7 | from two_layer_net import TwoLayerNet
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     max_epoch = 300
12 |     batch_size = 30
13 |     hidden_size = 10
14 |     learning_rate = 1.0
15 | 
16 |     x, t = spiral.load_data()
17 |     x_cp = cp.array(x)
18 |     t_cp = cp.array(t)
19 |     model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
20 |     optimizer = SGD(lr=learning_rate)
21 | 
22 |     trainer = Trainer(model, optimizer)
23 |     trainer.fit(x_cp, t_cp, max_epoch, batch_size, eval_interval=5)
24 |     trainer.plot()
25 | 


--------------------------------------------------------------------------------
/gpu/ch01/train_custom_loop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import cupy as cp
 5 | from optimizer import SGD
 6 | import spiral
 7 | import matplotlib.pyplot as plt
 8 | plt.switch_backend('agg')
 9 | from two_layer_net import TwoLayerNet
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     print("# 1. hyper parameter settings")
14 |     max_epoch = 300
15 |     batch_size = 30
16 |     hidden_size = 10
17 |     learning_rate = 1.0
18 | 
19 |     print("# 2. load data and generate model and optimizer")
20 |     x, t = spiral.load_data()
21 |     model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
22 |     optimizer = SGD(lr=learning_rate)
23 | 
24 |     # variables for learning
25 |     data_size = len(x)
26 |     max_iters = data_size // batch_size
27 |     total_loss = 0
28 |     loss_count = 0
29 |     loss_list = []
30 | 
31 |     print("#3 start epoch")
32 |     for epoch in range(max_epoch):
33 |         # 3. shuffle data
34 |         idx = np.random.permutation(data_size)
35 |         x = x[idx]
36 |         t = t[idx]
37 | 
38 |         for iters in range(max_iters):
39 |             batch_x = cp.asarray(x[iters * batch_size: (iters + 1) * batch_size])
40 |             batch_t = cp.asarray(t[iters * batch_size: (iters + 1) * batch_size])
41 | 
42 |             # 4. process grads and update parameters
43 |             loss = model.forward(batch_x, batch_t)
44 |             model.backward()
45 |             optimizer.update(model.params, model.grads)
46 | 
47 |             total_loss += loss
48 |             loss_count += 1
49 | 
50 |             # 5. output learning result
51 |             if (iters + 1) % 10 == 0:
52 |                 avg_loss = total_loss / loss_count
53 |                 print('| epoch %d | iter %d / %d | loss %.2f'
54 |                       % (epoch + 1, iters + 1, max_iters, avg_loss))
55 |                 loss_list.append(avg_loss)
56 |                 total_loss, loss_count = 0, 0
57 | 
58 |     print("# plot learning result")
59 |     plt.plot(np.arange(len(loss_list)), loss_list, label='train')
60 |     plt.xlabel('iterations (x10)')
61 |     plt.ylabel('loss')
62 |     plt.savefig("loss_gpu.png")
63 |     plt.cla()
64 | 
65 |     # plot boundary
66 |     h = 0.001
67 |     x_min, x_max = np.min(x[:, 0]) - .1, np.max(x[:, 0]) + .1
68 |     y_min, y_max = np.min(x[:, 1]) - .1, np.max(x[:, 1]) + .1
69 |     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
70 |     X = cp.asarray(np.c_[xx.ravel(), yy.ravel()].astype(np.float32))
71 |     score = cp.asnumpy(model.predict(X))
72 |     predict_cls = np.argmax(score, axis=1)
73 |     Z = predict_cls.reshape(xx.shape)
74 |     plt.contourf(xx, yy, Z)
75 |     plt.axis('off')
76 | 
77 |     # plot data points
78 |     x, t = spiral.load_data()
79 |     N = 100
80 |     CLS_NUM = 3
81 |     markers = ['o', 'x', '^']
82 |     for i in range(CLS_NUM):
83 |         plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
84 |     plt.savefig("figure_gpu.png")
85 | 


--------------------------------------------------------------------------------
/gpu/ch01/trainer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | sys.path.append('..')
  4 | import numpy as np
  5 | import cupy as cp
  6 | import time
  7 | import matplotlib.pyplot as plt
  8 | plt.switch_backend('agg')
  9 | 
 10 | class Trainer:
 11 |     def __init__(self, model, optimizer):
 12 |         self.model = model
 13 |         self.optimizer = optimizer
 14 |         self.loss_list = []
 15 |         self.eval_interval = None
 16 |         self.current_epoch = 0
 17 | 
 18 |     def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20):
 19 |         data_size = len(x)
 20 |         max_iters = data_size // batch_size
 21 |         self.eval_interval = eval_interval
 22 |         model, optimizer = self.model, self.optimizer
 23 |         total_loss = 0
 24 |         loss_count = 0
 25 | 
 26 |         start_time = time.time()
 27 |         for epoch in range(max_epoch):
 28 |             # シャッフル
 29 |             idx = cp.random.permutation(cp.arange(data_size))
 30 |             x = x[idx]
 31 |             t = t[idx]
 32 | 
 33 |             for iters in range(max_iters):
 34 |                 batch_x = x[iters*batch_size:(iters+1)*batch_size]
 35 |                 batch_t = t[iters*batch_size:(iters+1)*batch_size]
 36 | 
 37 |                 # 勾配を求め、パラメータを更新
 38 |                 loss = model.forward(batch_x, batch_t)
 39 |                 model.backward()
 40 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
 41 |                 if max_grad is not None:
 42 |                     clip_grads(grads, max_grad)
 43 |                 optimizer.update(params, grads)
 44 |                 total_loss += loss
 45 |                 loss_count += 1
 46 | 
 47 |                 # 評価
 48 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
 49 |                     avg_loss = total_loss / loss_count
 50 |                     elapsed_time = time.time() - start_time
 51 |                     print('| epoch %d |  iter %d / %d | time %d[s] | loss %.2f'
 52 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss))
 53 |                     self.loss_list.append(float(avg_loss))
 54 |                     total_loss, loss_count = 0, 0
 55 | 
 56 |             self.current_epoch += 1
 57 | 
 58 |     def plot(self, ylim=None):
 59 |         x = np.arange(len(self.loss_list))
 60 |         if ylim is not None:
 61 |             plt.ylim(*ylim)
 62 |         plt.plot(x, self.loss_list, label='train')
 63 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
 64 |         plt.ylabel('loss')
 65 |         plt.savefig('figure.png')
 66 |         #plt.show()
 67 | 
 68 | 
 69 | class RnnlmTrainer:
 70 |     def __init__(self, model, optimizer):
 71 |         self.model = model
 72 |         self.optimizer = optimizer
 73 |         self.time_idx = None
 74 |         self.ppl_list = None
 75 |         self.eval_interval = None
 76 |         self.current_epoch = 0
 77 | 
 78 |     def get_batch(self, x, t, batch_size, time_size):
 79 |         batch_x = cp.empty((batch_size, time_size), dtype='i')
 80 |         batch_t = cp.empty((batch_size, time_size), dtype='i')
 81 | 
 82 |         data_size = len(x)
 83 |         jump = data_size // batch_size
 84 |         offsets = [i * jump for i in range(batch_size)]  # バッチの各サンプルの読み込み開始位置
 85 | 
 86 |         for time in range(time_size):
 87 |             for i, offset in enumerate(offsets):
 88 |                 batch_x[i, time] = x[(offset + self.time_idx) % data_size]
 89 |                 batch_t[i, time] = t[(offset + self.time_idx) % data_size]
 90 |             self.time_idx += 1
 91 |         return batch_x, batch_t
 92 | 
 93 |     def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35,
 94 |             max_grad=None, eval_interval=20):
 95 |         data_size = len(xs)
 96 |         max_iters = data_size // (batch_size * time_size)
 97 |         self.time_idx = 0
 98 |         self.ppl_list = []
 99 |         self.eval_interval = eval_interval
100 |         model, optimizer = self.model, self.optimizer
101 |         total_loss = 0
102 |         loss_count = 0
103 | 
104 |         start_time = time.time()
105 |         for epoch in range(max_epoch):
106 |             for iters in range(max_iters):
107 |                 batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size)
108 | 
109 |                 # 勾配を求め、パラメータを更新
110 |                 loss = model.forward(batch_x, batch_t)
111 |                 model.backward()
112 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
113 |                 if max_grad is not None:
114 |                     clip_grads(grads, max_grad)
115 |                 optimizer.update(params, grads)
116 |                 total_loss += loss
117 |                 loss_count += 1
118 | 
119 |                 # パープレキシティの評価
120 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
121 |                     ppl = cp.exp(total_loss / loss_count)
122 |                     elapsed_time = time.time() - start_time
123 |                     print('| epoch %d |  iter %d / %d | time %d[s] | perplexity %.2f'
124 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl))
125 |                     self.ppl_list.append(float(ppl))
126 |                     total_loss, loss_count = 0, 0
127 | 
128 |             self.current_epoch += 1
129 | 
130 |     def plot(self, ylim=None):
131 |         x = numpy.arange(len(self.ppl_list))
132 |         if ylim is not None:
133 |             plt.ylim(*ylim)
134 |         plt.plot(x, self.ppl_list, label='train')
135 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
136 |         plt.ylabel('perplexity')
137 |         plt.show()
138 | 
139 | 
140 | def remove_duplicate(params, grads):
141 |     '''
142 |     パラメータ配列中の重複する重みをひとつに集約し、
143 |     その重みに対応する勾配を加算する
144 |     '''
145 |     params, grads = params[:], grads[:]  # copy list
146 | 
147 |     while True:
148 |         find_flg = False
149 |         L = len(params)
150 | 
151 |         for i in range(0, L - 1):
152 |             for j in range(i + 1, L):
153 |                 # 重みを共有する場合
154 |                 if params[i] is params[j]:
155 |                     grads[i] += grads[j]  # 勾配の加算
156 |                     find_flg = True
157 |                     params.pop(j)
158 |                     grads.pop(j)
159 |                 # 転置行列として重みを共有する場合（weight tying）
160 |                 elif params[i].ndim == 2 and params[j].ndim == 2 and \
161 |                      params[i].T.shape == params[j].shape and cp.all(params[i].T == params[j]):
162 |                     grads[i] += grads[j].T
163 |                     find_flg = True
164 |                     params.pop(j)
165 |                     grads.pop(j)
166 | 
167 |                 if find_flg: break
168 |             if find_flg: break
169 | 
170 |         if not find_flg: break
171 | 
172 |     return params, grads
173 | 
174 | 
175 | def clip_grads(grads, max_norm):
176 |     total_norm = 0
177 |     for grad in grads:
178 |         total_norm += cp.sum(grad ** 2)
179 |     total_norm = cp.sqrt(total_norm)
180 | 
181 |     rate = max_norm / (total_norm + 1e-6)
182 |     if rate < 1:
183 |         for grad in grads:
184 |             grad *= rate
185 | 


--------------------------------------------------------------------------------
/gpu/ch01/two_layer_net.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | 
 3 | from layers import Affine, Sigmoid, SoftmaxWithLoss
 4 | 
 5 | class TwoLayerNet:
 6 |     def __init__(self, input_size, hidden_size, output_size):
 7 |         I, H, O = input_size, hidden_size, output_size
 8 | 
 9 |         # initialize weight and bias
10 |         W1 = 0.01 * cp.random.randn(I, H)
11 |         b1 = cp.zeros(H)
12 |         W2 = 0.01 * cp.random.randn(H, O)
13 |         b2 = cp.zeros(O)
14 | 
15 |         # create layer
16 |         self.layers = [
17 |             Affine(W1, b1),
18 |             Sigmoid(),
19 |             Affine(W2, b2)
20 |         ]
21 |         self.loss_layer = SoftmaxWithLoss()
22 | 
23 |         # combine all weight and grads into list
24 |         self.params, self.grads = [], []
25 | 
26 |         for layer in self.layers:
27 |             self.params += layer.params
28 |             self.grads += layer.grads
29 | 
30 |     def predict(self, x):
31 |         for layer in self.layers:
32 |             x = layer.forward(x)
33 |         return x
34 | 
35 |     def forward(self, x, t):
36 |         score = self.predict(x)
37 |         loss = self.loss_layer.forward(score, t)
38 |         return loss
39 | 
40 |     def backward(self, dout=1):
41 |         dout = self.loss_layer.backward(dout)
42 |         for layer in reversed(self.layers):
43 |             dout = layer.backward(dout)
44 |         return dout
45 | 


--------------------------------------------------------------------------------
/gpu/ch03/cbow_predict.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import cupy as cp
 4 | from ch01.layers import MatMul
 5 | 
 6 | #サンプルのコンテキストデータ
 7 | c0 = cp.array([[1, 0, 0, 0, 0, 0, 0]])
 8 | c1 = cp.array([[0, 0, 0, 1, 0, 0, 0]])
 9 | 
10 | W_in = cp.random.randn(7, 3)
11 | W_out = cp.random.randn(3, 7)
12 | 
13 | in_layer0 = MatMul(W_in)
14 | in_layer1 = MatMul(W_in)
15 | out_layer = MatMul(W_out)
16 | 
17 | h0 = in_layer0.forward(c0)
18 | h1 = in_layer1.forward(c1)
19 | h = 0.5 * (h0 + h1)
20 | s = out_layer.forward(h)
21 | 
22 | print(s)
23 | 


--------------------------------------------------------------------------------
/gpu/ch03/simple_cbow.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | 
 3 | from ch01.layers import MatMul, SoftmaxWithLoss
 4 | 
 5 | class SimpleCBOW:
 6 |     def __init__(self, vocab_size, hidden_size):
 7 |         V, H = vocab_size, hidden_size
 8 | 
 9 |         #重みの初期化
10 |         W_in = 0.01 * cp.random.randn(V, H).astype(cp.float32)
11 |         W_out = 0.01 * cp.random.randn(H, V).astype(cp.float32)
12 | 
13 |         #レイヤの生成
14 |         self.in_layer0 = MatMul(W_in)
15 |         self.in_layer1 = MatMul(W_in)
16 |         self.out_layer = MatMul(W_out)
17 |         self.loss_layer = SoftmaxWithLoss()
18 | 
19 |         # すべての重みと購買をリストにまとめる
20 |         layers = [self.in_layer0, self.in_layer1, self.out_layer]
21 |         self.params, self.grads = [],[]
22 |         for layer in layers:
23 |             self.params += layer.params
24 |             self.grads += layer.grads
25 | 
26 |         #メンバ変数に単語の分散表現を追加
27 |         self.word_vecs = W_in
28 | 
29 |     def forward(self, contexts, target):
30 |         h0 = self.in_layer0.forward(contexts[:, 0])
31 |         h1 = self.in_layer1.forward(contexts[:, 1])
32 |         h = (h0 + h1) * 0.5
33 |         score = self.out_layer.forward(h)
34 |         loss = self.loss_layer.forward(score, target)
35 |         return loss
36 | 
37 |     def backward(self, dout=1):
38 |         ds = self.loss_layer.backward(dout)
39 |         da = self.out_layer.backward(ds)
40 |         da *= 0.5
41 |         self.in_layer1.backward(da)
42 |         self.in_layer0.backward(da)
43 |         return None
44 | 


--------------------------------------------------------------------------------
/gpu/ch03/train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import cupy as cp
 3 | sys.path.append('..')
 4 | from ch01.trainer import Trainer
 5 | from ch01.optimizer import Adam
 6 | from simple_cbow import SimpleCBOW
 7 | from util import preprocess, create_contexts_target, convert_one_hot
 8 | 
 9 | #import ipdb; ipdb.set_trace()
10 | 
11 | window_size = 1
12 | hidden_size = 5
13 | batch_size = 3
14 | max_epoch = 1000
15 | 
16 | text = 'You say goodbye and I say hello.'
17 | corpus, word_to_id, id_to_word = preprocess(text)
18 | 
19 | vocab_size = len(word_to_id)
20 | contexts, target = create_contexts_target(corpus, window_size)
21 | target = convert_one_hot(target, vocab_size)
22 | contexts = convert_one_hot(contexts, vocab_size)
23 | 
24 | model = SimpleCBOW(vocab_size, hidden_size)
25 | optimizer = Adam()
26 | trainer = Trainer(model, optimizer)
27 | 
28 | trainer.fit(contexts, target, max_epoch, batch_size)
29 | trainer.plot()
30 | 


--------------------------------------------------------------------------------
/gpu/ch03/util.py:
--------------------------------------------------------------------------------
 1 | import cupy as cp
 2 | 
 3 | def preprocess(text):
 4 |     text = text.lower()
 5 |     text = text.replace('.', ' .')
 6 |     words = text.split(' ')
 7 | 
 8 |     word_to_id = {}
 9 |     id_to_word = {}
10 |     for word in words:
11 |         if word not in word_to_id:
12 |             new_id = len(word_to_id)
13 |             word_to_id[word] = new_id
14 |             id_to_word[new_id] = word
15 |     corpus = cp.array([word_to_id[w] for w in words])
16 |     return corpus, word_to_id, id_to_word
17 | 
18 | 
19 | def create_contexts_target(corpus, window_size=1):
20 |     target = corpus[window_size: -window_size]
21 |     contexts = []
22 | 
23 |     for idx in range(window_size, len(corpus)-window_size):
24 |         cs = []
25 |         for t in range(-window_size, window_size + 1):
26 |             if t == 0:
27 |                 continue
28 |             cs.append(int(corpus[idx + t]))
29 |         contexts.append(cs)
30 | 
31 |     return cp.array(contexts), cp.array(target)
32 | 
33 | 
34 | def convert_one_hot(corpus, vocab_size):
35 |     '''one-hot表現への変換
36 |     :param corpus: 単語IDのリスト（1次元もしくは2次元のNumPy配列）
37 |     :param vocab_size: 語彙数
38 |     :return: one-hot表現（2次元もしくは3次元のNumPy配列）
39 |     '''
40 |     N = corpus.shape[0]
41 | 
42 |     if corpus.ndim == 1:
43 |         one_hot = cp.zeros((N, vocab_size), dtype=cp.int32)
44 |         for idx, word_id in enumerate(corpus):
45 |             one_hot[idx, word_id] = 1
46 | 
47 |     elif corpus.ndim == 2:
48 |         C = corpus.shape[1]
49 |         one_hot = cp.zeros((N, C, vocab_size), dtype=cp.int32)
50 |         for idx_0, word_ids in enumerate(corpus):
51 |             for idx_1, word_id in enumerate(word_ids):
52 |                 one_hot[idx_0, idx_1, word_id] = 1
53 | 
54 |     return one_hot
55 | 


--------------------------------------------------------------------------------
/gpu/ch04/cbow.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cupy as cp
 3 | from negative_sampling_layer import NegativeSamplingLoss
 4 | import sys
 5 | sys.path.append('..')
 6 | from common.layers import Embedding
 7 | 
 8 | 
 9 | class CBOW:
10 |     def __init__(self, vocab_size, hidden_size, window_size, corpus):
11 |         V, H = vocab_size, hidden_size
12 |         W_in = 0.01 * cp.random.randn(V, H).astype('f')
13 |         W_out = 0.01 * cp.random.randn(V, H).astype('f')
14 | 
15 |         self.in_layers = []
16 |         for i in range(2 * window_size):
17 |             layer = Embedding(W_in)
18 |             self.in_layers.append(layer)
19 | 
20 |         self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)
21 | 
22 |         layers = self.in_layers + [self.ns_loss]
23 |         self.params, self.grads = [], []
24 |         for layer in layers:
25 |             self.params += layer.params
26 |             self.grads += layer.grads
27 | 
28 |         self.word_vecs = W_in
29 | 
30 |     def forward(self, contexts, target):
31 |         h = 0
32 |         for i, layer in enumerate(self.in_layers):
33 |             h += layer.forward(contexts[:, i])
34 |         h *= 1 / len(self.in_layers)
35 |         loss = self.ns_loss.forward(h, target)
36 |         return loss
37 | 
38 |     def backward(self, dout=1):
39 |         dout = self.ns_loss.backward(dout)
40 |         dout *= 1 / len(self.in_layers)
41 |         for layer in self.in_layers:
42 |             layer.backward(dout)
43 |         return None
44 | 


--------------------------------------------------------------------------------
/gpu/ch04/cbow.py~:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import negative_sampling_layer import Nega
3 | import sys
4 | 


--------------------------------------------------------------------------------
/gpu/ch04/negative_sampling_layer.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import numpy as np
 3 | import cupy as cp
 4 | import sys
 5 | sys.path.append('..')
 6 | from common.layers import Embedding, SigmoidWithLoss
 7 | 
 8 | 
 9 | class EmbeddingDot:
10 |     def __init__(self, W):
11 |         self.embed = Embedding(W)
12 |         self.params = self.embed.params
13 |         self.grads = self.embed.grads
14 |         self.cache = None
15 | 
16 |     def forward(self, h, idx):
17 |         target_W = self.embed.forward(idx)
18 |         out = cp.sum(target_W * h, axis=1)
19 |         self.cache = (h, target_W)
20 |         return out
21 | 
22 |     def backward(self, dout):
23 |         h, target_W = self.cache
24 |         dout = dout.reshape(dout.shape[0], 1)
25 |         dtarget_W = dout * h
26 |         self.embed.backward(dtarget_W)
27 |         dh = dout * target_W
28 |         return dh
29 | 
30 | 
31 | class UnigramSampler:
32 |     def __init__(self, corpus, power, sample_size):
33 |         self.sample_size = sample_size
34 |         self.vocab_size = None
35 |         self.word_p = None
36 | 
37 |         counts = collections.Counter()
38 |         counts.update(corpus)
39 | 
40 |         vocab_size = len(counts)
41 |         self.vocab_size = vocab_size
42 |         self.word_p = np.zeros(vocab_size)
43 |         for i in range(vocab_size):
44 |             self.word_p[i] = counts[i]
45 | 
46 |         self.word_p = np.power(self.word_p, power)
47 |         self.word_p /= np.sum(self.word_p)
48 | 
49 |     def get_negative_sample(self, target):
50 |         batch_size = target.shape[0]
51 |         negative_sample = cp.random.choice(self.vocab_size, size=(batch_size, self.sample_size),
52 |                                            replace=True, p=self.word_p)
53 |         return negative_sample
54 |     
55 | 
56 | class NegativeSamplingLoss:
57 |     def __init__(self, W, corpus, power=0.75, sample_size=5):
58 |         self.sample_size = sample_size
59 |         self.sampler = UnigramSampler(corpus, power, sample_size)
60 |         self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
61 |         self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]
62 | 
63 |         self.params, self.grads = [], []
64 |         for layer in self.embed_dot_layers:
65 |             self.params += layer.params
66 |             self.grads += layer.grads
67 | 
68 |     def forward(self, h, target):
69 |         batch_size = target.shape[0]
70 |         negative_sample = self.sampler.get_negative_sample(target)
71 | 
72 |         score = self.embed_dot_layers[0].forward(h, target)
73 |         correct_label = cp.ones(batch_size, dtype=cp.int32)
74 |         loss = self.loss_layers[0].forward(score, correct_label)
75 | 
76 |         negative_label = cp.zeros(batch_size, dtype=cp.int32)
77 |         for i in range(self.sample_size):
78 |             negative_target = negative_sample[:, i]
79 |             score = self.embed_dot_layers[1 + i].forward(h, negative_target)
80 |             loss += self.loss_layers[1 + i].forward(score, negative_label)
81 | 
82 |         return loss
83 |         
84 |     def backward(self, dout=1):
85 |         dh = 0
86 |         for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
87 |             dscore = l0.backward(dout)
88 |             dh += l1.backward(dscore)
89 | 
90 |         return dh
91 |     
92 | 


--------------------------------------------------------------------------------
/gpu/ch04/netative_sampling_layer.py~:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import numpy as np
 3 | import sys
 4 | sys.path.append('..')
 5 | from common.layers import Embedding
 6 | 
 7 | 
 8 | class EmbeddingDot:
 9 |     def __init__(self, W):
10 |         self.embed = Embedding(W)
11 |         self.params = self.embed.params
12 |         self.grads = self.embed_grads
13 |         self.cache = None
14 | 
15 |     def forward(self, h, idx):
16 |         target_W = self.embed.forward(idx)
17 |         out = np.sum(target_W * h, axis=1)
18 |         self.cache = (h, target_W)
19 |         return out
20 | 
21 |     def backward(self, dout):
22 |         h, target_W = self.cache
23 |         dout = dout.reshape(dout.shape[0], 1)
24 |         dtarget_W = dout * h
25 |         self.embed.backward(dtarget_W)
26 |         dh = dout * target_W
27 |         return dh
28 | 
29 | 
30 | class UnigramSapmler:
31 |     def __init__(self, corpus, power, sample_size):
32 |         self.sampelsize = sample_size
33 |         self.vocab_size = None
34 |         self.word_p = None
35 | 
36 |         counts = collections.Counter()
37 |         counts.update(corpus)
38 | 
39 |         vocab_size = len(counts)
40 |         self.vocab_size = vocab_size
41 |         self.word_p = np.zeros(vocab_size)
42 |         for i in range(vocab_size):
43 |             self.word_p[i] = counts[i]
44 | 
45 |         self.word_p = np.power(self.word_p, power)
46 |         self.word_p /= np.sum(self.word_p)
47 | 
48 | 
49 |     def get_negative_sample(self, target):
50 |         batch_size = target.shape[0]
51 | 
52 |         if not GPU:
53 |             negative_sample = np.zeros((batch_size, self.sampel_size), dtype=np.int32)
54 | 
55 |             for i in range(batch_size):
56 |                 p = self.word_p.copy()
57 |                 target_idx = target[i]
58 |                 p[target_idx] = 0
59 |                 p /= p.sum()
60 |                 negative_sample[i, :] = np.ramdom.choice(self.vocab_size,
61 |                                                          size=self.sample_size, replace=False, p=p)
62 |         
63 |     
64 | 
65 | class NegativeSamplingLoss:
66 |     def __init__(self, W, corpus, power=0.75, sample_size=5):
67 |         self.sample_size = sample_size
68 |         self.sampler = UnigramSapmler(corpus, power, sample_size)
69 | 


--------------------------------------------------------------------------------
/gpu/ch04/train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | import cupy as cp
 5 | from common import config
 6 | import pickle
 7 | from common.trainer import Trainer
 8 | from common.optimizer import Adam
 9 | from cbow import CBOW
10 | from common.util import create_contexts_target, to_gpu, to_cpu
11 | from dataset import ptb
12 | 
13 | window_size = 5
14 | hidden_size = 100
15 | batch_size = 100
16 | max_epoch = 10
17 | 
18 | corpus, word_to_id, id_to_word = ptb.load_data('train')
19 | vocab_size = len(word_to_id)
20 | 
21 | contexts, target = create_contexts_target(corpus, window_size)
22 | print(contexts.shape, target.shape)
23 | contexts, target = to_gpu(contexts), to_gpu(target)
24 | 
25 | model = CBOW(vocab_size, hidden_size, window_size, corpus)
26 | optimizer = Adam()
27 | trainer = Trainer(model, optimizer)
28 | 
29 | trainer.fit(contexts, target, max_epoch, batch_size)
30 | trainer.plot()
31 | 
32 | word_vecs = model.word_vecs
33 | word_vec = to_cpu(word_vecs)
34 | 
35 | params = {}
36 | params['word_vecs'] = word_vecs.astype(np.float16)
37 | params['word_to_id'] = word_to_id
38 | params['id_to_word'] = id_to_word
39 | pkl_file = 'cbow_params.pkl'
40 | with open(pkl_file, 'wb') as f:
41 |     pickle.dump(params, f, -1)
42 | 
43 | 


--------------------------------------------------------------------------------
/gpu/ch05/simple_rnnlm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import cupy as cp
 3 | sys.path.append('..')
 4 | from common.time_layers import TimeAffine, TimeEmbedding, TimeRNN, TimeSoftmaxWithLoss
 5 | 
 6 | 
 7 | class SimpleRnnlm:
 8 |     def __init__(self, vocab_size, wordvec_size, hidden_size):
 9 |         V, D, H = vocab_size, wordvec_size, hidden_size
10 |         rn = cp.random.randn
11 | 
12 |         embed_W = (rn(V, D) / 100).astype('f')
13 |         rnn_Wx = (rn(D, H) / cp.sqrt(D)).astype('f')
14 |         rnn_Wh = (rn(H, H) / cp.sqrt(H)).astype('f')
15 |         rnn_b = cp.zeros(H).astype('f')
16 |         affine_W = (rn(H, V) / cp.sqrt(H)).astype('f')
17 |         affine_b = cp.zeros(V).astype('f')
18 | 
19 |         self.layers = [
20 |             TimeEmbedding(embed_W),
21 |             TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
22 |             TimeAffine(affine_W, affine_b)
23 |         ]
24 |         self.loss_layer = TimeSoftmaxWithLoss()
25 |         self.rnn_layer = self.layers[1]
26 | 
27 |         self.params, self.grads = [], []
28 |         for layer in self.layers:
29 |             self.params += layer.params
30 |             self.grads += layer.grads
31 | 
32 |     def forward(self, xs, ts):
33 |         for layer in self.layers:
34 |             xs = layer.forward(xs)
35 | 
36 |         loss = self.loss_layer.forward(xs, ts)
37 |         return loss
38 |             
39 |     def backward(self, dout=1):
40 |         dout = self.loss_layer.backward(dout)
41 |         for layer in reversed(self.layers):
42 |             dout = layer.backward(dout)
43 |         return dout
44 | 
45 |     def reset_state(self):
46 |         self.rnn_layer.reset_state()
47 | 


--------------------------------------------------------------------------------
/gpu/ch05/train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | # import matplotlib.pyplot as plt
 3 | import cupy as cp
 4 | sys.path.append('..')
 5 | from common.optimizer import SGD
 6 | from dataset import ptb
 7 | from simple_rnnlm import SimpleRnnlm
 8 | from common.trainer import RnnlmTrainer
 9 | 
10 | 
11 | batch_size = 10
12 | wordvec_size = 100
13 | hidden_size = 100
14 | time_size = 5
15 | lr = 0.1
16 | max_epoch = 100
17 | 
18 | corpus, word_to_id, id_to_word = ptb.load_data('train')
19 | corpus_size = 1000
20 | corpus = corpus[:corpus_size]
21 | vocab_size = int(max(corpus) + 1)
22 | 
23 | xs = corpus[:-1]
24 | ts = corpus[1:]
25 | data_size = len(xs)
26 | print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))
27 | 
28 | 
29 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
30 | optimizer = SGD(lr)
31 | trainer = RnnlmTrainer(model, optimizer)
32 | 
33 | trainer.fit(xs, ts, max_epoch, batch_size, time_size)
34 | 


--------------------------------------------------------------------------------
/gpu/ch05/train_custom_loop.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | # import matplotlib.pyplot as plt
 3 | import cupy as cp
 4 | sys.path.append('..')
 5 | from common.optimizer import SGD
 6 | from dataset import ptb
 7 | from simple_rnnlm import SimpleRnnlm
 8 | 
 9 | batch_size = 10
10 | wordvec_size = 100
11 | hidden_size = 100
12 | time_size = 5
13 | lr = 0.1
14 | max_epoch = 100
15 | 
16 | corpus, word_to_id, id_to_word = ptb.load_data('train')
17 | corpus_size = 1000
18 | corpus = corpus[:corpus_size]
19 | vocab_size = int(max(corpus) + 1)
20 | 
21 | xs = corpus[:-1]
22 | ts = corpus[1:]
23 | data_size = len(xs)
24 | print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))
25 | 
26 | max_iter = data_size // (batch_size * time_size)
27 | time_idx = 0
28 | total_loss = 0
29 | loss_count = 0
30 | ppl_list = []
31 | 
32 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
33 | optimizer = SGD(lr)
34 | 
35 | jump = (corpus_size - 1) // batch_size
36 | offsets = [i * jump for i in range(batch_size)]
37 | 
38 | for epoch in range(max_epoch):
39 |     for iter in range(max_iter):
40 |         batch_x = cp.empty((batch_size, time_size), dtype='i')
41 |         batch_t = cp.empty((batch_size, time_size), dtype='i')
42 |         for t in range(time_size):
43 |             for i, offset in enumerate(offsets):
44 |                 batch_x[i, t] = xs[(offset + time_idx) % data_size]
45 |                 batch_t[i, t] = ts[(offset + time_idx) % data_size]
46 |             time_idx += 1
47 | 
48 |         loss = model.forward(batch_x, batch_t)
49 |         model.backward()
50 |         optimizer.update(model.params, model.grads)
51 |         total_loss += loss
52 |         loss_count += 1
53 | 
54 |     ppl = cp.exp(-total_loss / loss_count)
55 |     print('| epoch %d | perplexity %.2f | loss %.2f' % (epoch+1, ppl, loss))
56 |     ppl_list.append(float(ppl))
57 |     total_loss, loss_count = 0, 0
58 | 


--------------------------------------------------------------------------------
/gpu/ch06/better_rnnlm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import cupy as cp
 4 | from common.time_layers import TimeAffine, TimeEmbedding, TimeLSTM, TimeDropout, TimeSoftmaxWithLoss
 5 | from common.base_model import BaseModel
 6 | 
 7 | 
 8 | class BetterRnnlm(BaseModel):
 9 |     def __init__(self, vocab_size=10000, wordvec_size=650,
10 |                  hidden_size=650, dropout_ratio=0.5):
11 |         V, D, H = vocab_size, wordvec_size, hidden_size
12 |         rn = cp.random.randn
13 | 
14 |         embed_W = (rn(V, D) / 100).astype('f')
15 |         lstm_Wx1 = (rn(D, 4 * H) / cp.sqrt(D)).astype('f')
16 |         lstm_Wh1 = (rn(H, 4 * H) / cp.sqrt(H)).astype('f')
17 |         lstm_b1 = cp.zeros(4 * H).astype('f')
18 |         lstm_Wx2 = (rn(H, 4 * H) / cp.sqrt(H)).astype('f')
19 |         lstm_Wh2 = (rn(H, 4 * H) / cp.sqrt(H)).astype('f')
20 |         lstm_b2 = cp.zeros(4 * H).astype('f')
21 |         affine_b = cp.zeros(V).astype('f')
22 | 
23 |         self.layers = [
24 |             TimeEmbedding(embed_W),
25 |             TimeDropout(dropout_ratio),
26 |             TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True),
27 |             TimeDropout(dropout_ratio),
28 |             TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True),
29 |             TimeDropout(dropout_ratio),
30 |             TimeAffine(embed_W.T, affine_b)
31 |         ]
32 |         self.loss_layer = TimeSoftmaxWithLoss()
33 |         self.lstm_layers = [self.layers[2], self.layers[4]]
34 |         self.drop_layers = [self.layers[1], self.layers[3], self.layers[5]]
35 |         self.params, self.grads = [], []
36 |         for layer in self.layers:
37 |             self.params += layer.params
38 |             self.grads += layer.grads
39 | 
40 |     def predict(self, xs, train_flg=False):
41 |         for layer in self.drop_layers:
42 |             layer.train_flg = train_flg
43 |         for layer in self.layers:
44 |             xs = layer.forward(xs)
45 |         return xs
46 | 
47 |     def forward(self, xs, ts, train_flg=True):
48 |         score = self.predict(xs, train_flg)
49 |         loss = self.loss_layer.forward(score, ts)
50 |         return loss
51 | 
52 |     def backward(self, dout=1):
53 |         dout = self.loss_layer.backward(dout)
54 |         for layer in reversed(self.layers):
55 |             dout = layer.backward(dout)
56 |         return dout
57 | 
58 |     def reset_state(self):
59 |         for layer in self.lstm_layers:
60 |             layer.reset_state()
61 | 


--------------------------------------------------------------------------------
/gpu/ch06/train_better_rnnlm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | from common import config
 4 | from common.optimizer import SGD
 5 | from common.trainer import RnnlmTrainer
 6 | from common.util import eval_perplexity, to_gpu
 7 | from dataset import ptb
 8 | from better_rnnlm import BetterRnnlm
 9 | 
10 | batch_size = 20
11 | wordvec_size = 650
12 | hidden_size = 650
13 | time_size = 35
14 | lr = 20.0
15 | max_epoch = 40
16 | max_grad = 0.25
17 | dropout = 0.5
18 | 
19 | corpus, word_to_id, id_to_word = ptb.load_data('train')
20 | corpus_val, _, _ = ptb.load_data('val')
21 | corpus_test, _, _ = ptb.load_data('test')
22 | 
23 | corpus = to_gpu(corpus)
24 | corpus_val = to_gpu(corpus_val)
25 | corpus_test = to_gpu(corpus_test)
26 | 
27 | vocab_size = len(word_to_id)
28 | xs = corpus[:-1]
29 | ts = corpus[1:]
30 | 
31 | model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout)
32 | optimizer = SGD(lr)
33 | trainer = RnnlmTrainer(model, optimizer)
34 | 
35 | best_ppl = float('inf')
36 | for epoch in range(max_epoch):
37 |     trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad)
38 |     model.reset_state()
39 |     ppl = eval_perplexity(model, corpus_val)
40 |     print('valid perplexity: ', ppl)
41 | 
42 |     if best_ppl > ppl:
43 |         best_ppl = ppl
44 |         model.save_params()
45 |     else:
46 |         lr /= 4.0
47 |         optimizer.lr = lr
48 | 
49 |     model.reset_state()
50 |     print('-'*50)
51 | 


--------------------------------------------------------------------------------
/gpu/common/base_model.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import os
 4 | import pickle
 5 | import cupy as cp
 6 | from common.util import to_gpu, to_cpu
 7 | 
 8 | 
 9 | class BaseModel:
10 |     def __init__(self):
11 |         self.params, self.grads = None, None
12 | 
13 |     def forward(self, *args):
14 |         raise NotImplementedError
15 | 
16 |     def backward(self, *args):
17 |         raise NotImplementedError
18 | 
19 |     def save_params(self, file_name=None):
20 |         if file_name is None:
21 |             file_name = self.__class__.__name__ + '.pkl'
22 | 
23 |         params = [p.astype(cp.float16) for p in self.params]
24 |         params = [to_cpu(p) for p in params]
25 | 
26 |         with open(file_name, 'wb') as f:
27 |             pickle.dump(params, f)
28 | 
29 |     def load_params(self, file_name=None):
30 |         if file_name is None:
31 |             file_name = self.__class__.__name__ + '.pkl'
32 | 
33 |         if '/' in file_name:
34 |             file_name = file_name.replace('/', os.sep)
35 | 
36 |         if not os.path.exists(file_name):
37 |             raise IOError('No file: ' + file_name)
38 | 
39 |         with open(file_name, 'rb') as f:
40 |             params = pickle.load(f)
41 | 
42 |         params = [p.astype('f') for p in params]
43 |         params = [to_gpu(p) for p in params]
44 | 
45 |         for i, param in enumerate(self.params):
46 |             param[...] = params[i]
47 | 


--------------------------------------------------------------------------------
/gpu/common/config.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | GPU = False
4 | 


--------------------------------------------------------------------------------
/gpu/common/functions.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | # from common.np import *
 3 | import numpy as np
 4 | import cupy as cp
 5 | 
 6 | 
 7 | def sigmoid(x):
 8 |     return 1 / (1 + cp.exp(-x))
 9 | 
10 | 
11 | def relu(x):
12 |     return cp.maximum(0, x)
13 | 
14 | 
15 | def softmax(x):
16 |     if x.ndim == 2:
17 |         x = x - x.max(axis=1, keepdims=True)
18 |         x = cp.exp(x)
19 |         x /= x.sum(axis=1, keepdims=True)
20 |     elif x.ndim == 1:
21 |         x = x - cp.max(x)
22 |         x = cp.exp(x) / cp.sum(cp.exp(x))
23 | 
24 |     return x
25 | 
26 | 
27 | def cross_entropy_error(y, t):
28 |     if y.ndim == 1:
29 |         t = t.reshape(1, t.size)
30 |         y = y.reshape(1, y.size)
31 |         
32 |     # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
33 |     if t.size == y.size:
34 |         t = t.argmax(axis=1)
35 |              
36 |     batch_size = y.shape[0]
37 | 
38 |     return -cp.sum(cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size
39 | 


--------------------------------------------------------------------------------
/gpu/common/layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cupy as cp
  3 | from common.functions import softmax, cross_entropy_error
  4 | 
  5 | 
  6 | class MatMul:
  7 |     def __init__(self, W):
  8 |         self.params = [W]
  9 |         self.grads = [cp.zeros_like(W)]
 10 |         self.x = None
 11 | 
 12 |     def forward(self, x):
 13 |         W, = self.params
 14 |         out = cp.dot(x, W)
 15 |         self.x = x
 16 |         return out
 17 | 
 18 |     def backward(self, dout):
 19 |         W, = self.params
 20 |         dx = cp.dot(dout, W.T)
 21 |         dW = cp.dot(self.x.T, dout)
 22 |         self.grads[0][...] = dW
 23 |         return dx
 24 | 
 25 | 
 26 | # 他の章でも使うようなので ch1/forward_net.py からコピーしてbackwardを実装
 27 | class Sigmoid:
 28 |     def __init__(self):
 29 |         self.params = []
 30 |         self.grads = []
 31 | 
 32 |     def forward(self, x):
 33 |         self.out = 1 / (1 + cp.exp(-x))
 34 |         return self.out
 35 | 
 36 |     def backward(self, dout):
 37 |         dx = dout * (1 - self.out) * self.out
 38 |         return dx
 39 | 
 40 | 
 41 | class SigmoidWithLoss:
 42 |     def __init__(self):
 43 |         self.params, self.grads = [], []
 44 |         self.loss = None
 45 |         self.y = None  # sigmoidの出力
 46 |         self.t = None  # 教師データ
 47 | 
 48 |     def forward(self, x, t):
 49 |         self.t = t
 50 |         self.y = 1 / (1 + cp.exp(-x))
 51 | 
 52 |         self.loss = cross_entropy_error(cp.c_[1 - self.y, self.y], self.t)
 53 | 
 54 |         return self.loss
 55 | 
 56 |     def backward(self, dout=1):
 57 |         batch_size = self.t.shape[0]
 58 | 
 59 |         dx = (self.y - self.t) * dout / batch_size
 60 |         return dx
 61 | 
 62 | class SigmoidWithLoss:
 63 |     def __init__(self):
 64 |         self.params, self.grads = [], []
 65 |         self.loss = None
 66 |         self.y = None  # sigmoidの出力
 67 |         self.t = None  # 教師データ
 68 | 
 69 |     def forward(self, x, t):
 70 |         self.t = t
 71 |         self.y = 1 / (1 + cp.exp(-x))
 72 | 
 73 |         self.loss = cross_entropy_error(cp.c_[1 - self.y, self.y], self.t)
 74 | 
 75 |         return self.loss
 76 | 
 77 |     def backward(self, dout=1):
 78 |         batch_size = self.t.shape[0]
 79 | 
 80 |         dx = (self.y - self.t) * dout / batch_size
 81 |         return dx
 82 | 
 83 | 
 84 | 
 85 | 
 86 | class Affine:
 87 | 	def __init__(self, W, b):
 88 | 		self.mm = MatMul(W)  # MatMulを使って実装してみる
 89 | 		self.params = [W, b]
 90 | 		self.grads = [
 91 | 		  self.mm.grads[0],  # modelが初期化した直後のgradsを参照するため、MatMulのgradsを参照するようにする
 92 | 		  cp.zeros_like(b),
 93 | 		]
 94 | 
 95 | 	def forward(self, x):
 96 | 		_, b = self.params
 97 | 		out = self.mm.forward(x) + b
 98 | 		return out
 99 | 
100 | 	def backward(self, dout):
101 | 		dx = self.mm.backward(dout)
102 | 		db = cp.sum(dout, axis = 0)
103 | 		# self.grads[0] はmm.backwardで更新される
104 | 		self.grads[1] = db.copy()
105 | 		return dx
106 | 
107 | class TwoLayerNet:
108 | 	def __init__(self, input_size, hidden_size, output_size):
109 | 		I, H, O = input_size, hidden_size, output_size
110 | 
111 | 		# 重みとバイアスの初期化
112 | 		W1 = cp.random.randn(I, H)
113 | 		b1 = cp.random.randn(H)
114 | 		W2 = cp.random.randn(H, O)
115 | 		b2 = cp.random.randn(O)
116 | 
117 | 		# レイヤの生成
118 | 		self.layers = [
119 | 			Affine(W1, b1),
120 | 			Sigmoid(),
121 | 			Affine(W2, b2)
122 | 		]
123 | 
124 | 		# すべての重みをリストにまとめる
125 | 		self.params = []
126 | 		for layer in self.layers:
127 | 			self.params += layer.params
128 | 
129 | 	def predict(self, x):
130 | 		for layer in self.layers:
131 | 			x = layer.forward(x)
132 | 		return x
133 | 
134 | 
135 | # FROM https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/layers.py
136 | class Softmax:
137 |     def __init__(self):
138 |         self.params, self.grads = [], []
139 |         self.out = None
140 | 
141 |     def forward(self, x):
142 |         self.out = softmax(x)
143 |         return self.out
144 | 
145 |     def backward(self, dout):
146 |         dx = self.out * dout
147 |         sumdx = cp.sum(dx, axis=1, keepdims=True)
148 |         dx -= self.out * sumdx
149 |         return dx
150 | 
151 | 
152 | class SoftmaxWithLoss:
153 |     def __init__(self):
154 |         self.params, self.grads = [], []
155 |         self.y = None  # softmaxの出力
156 |         self.t = None  # 教師ラベル
157 | 
158 |     def forward(self, x, t):
159 |         self.t = t
160 |         self.y = softmax(x)
161 | 
162 |         # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
163 |         if self.t.size == self.y.size:
164 |             self.t = self.t.argmax(axis=1)
165 | 
166 |         loss = cross_entropy_error(self.y, self.t)
167 |         return loss
168 | 
169 |     def backward(self, dout=1):
170 |         batch_size = self.t.shape[0]
171 | 
172 |         dx = self.y.copy()
173 |         dx[cp.arange(batch_size), self.t] -= 1
174 |         dx *= dout
175 |         dx = dx / batch_size
176 | 
177 |         return dx
178 | 
179 |     
180 | class Embedding:
181 |     def __init__(self, W):
182 |         self.params = [W]
183 |         self.grads = [cp.zeros_like(W)]
184 |         self.idx = None
185 | 
186 |     def forward(self, idx):
187 |         W, = self.params
188 |         self.idx = idx
189 |         out = W[idx]
190 |         return out
191 | 
192 |     def backward(self, dout):
193 |         dW, = self.grads
194 |         dW[...] = 0
195 |         cp.cupyx.scatter_add(dW, self.idx, dout)
196 |         return None
197 | 


--------------------------------------------------------------------------------
/gpu/common/np.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from common.config import GPU
 3 | 
 4 | 
 5 | if GPU:
 6 |     import cupy as np
 7 |     np.cuda.set_allocator(np.cuda.MemoryPool().malloc)
 8 |     np.add.at = np.scatter_add
 9 | 
10 |     print('\033[92m' + '-' * 60 + '\033[0m')
11 |     print(' ' * 23 + '\033[92mGPU Mode (cupy)\033[0m')
12 |     print('\033[92m' + '-' * 60 + '\033[0m\n')
13 | else:
14 |     import numpy as np
15 | 


--------------------------------------------------------------------------------
/gpu/common/optimizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cupy as cp
 3 | 
 4 | 
 5 | class SGD:
 6 |     def __init__(self, lr=0.01):
 7 |         self.lr = lr  # 学習率
 8 | 
 9 |     def update(self, params, grads):
10 |         for i in range(len(params)):
11 |             params[i] -= self.lr * grads[i]
12 | 
13 | class AdaGrad:
14 |     '''
15 |     AdaGrad
16 |     '''
17 |     def __init__(self, lr=0.01):
18 |         self.lr = lr
19 |         self.h = None
20 | 
21 |     def update(self, params, grads):
22 |         if self.h is None:
23 |             self.h = []
24 |             for param in params:
25 |                 self.h.append(cp.zeros_like(param))
26 | 
27 |         for i in range(len(params)):
28 |             self.h[i] += grads[i] * grads[i]
29 |             params[i] -= self.lr * grads[i] / (cp.sqrt(self.h[i]) + 1e-7)
30 | 
31 | class Adam:
32 |     '''
33 |     Adam (http://arxiv.org/abs/1412.6980v8)
34 |     '''
35 |     def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
36 |         self.lr = lr
37 |         self.beta1 = beta1
38 |         self.beta2 = beta2
39 |         self.iter = 0
40 |         self.m = None
41 |         self.v = None
42 |         
43 |     def update(self, params, grads):
44 |         if self.m is None:
45 |             self.m, self.v = [], []
46 |             for param in params:
47 |                 self.m.append(cp.zeros_like(param))
48 |                 self.v.append(cp.zeros_like(param))
49 |         
50 |         self.iter += 1
51 |         lr_t = self.lr * cp.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
52 | 
53 |         for i in range(len(params)):
54 |             self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
55 |             self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
56 |             
57 |             params[i] -= lr_t * self.m[i] / (cp.sqrt(self.v[i]) + 1e-7)
58 | 


--------------------------------------------------------------------------------
/gpu/common/trainer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import numpy
  4 | import time
  5 | import matplotlib.pyplot as plt
  6 | import cupy as cp
  7 | sys.path.append('..')
  8 | from common.util import clip_grads
  9 | 
 10 | 
 11 | class Trainer:
 12 |     def __init__(self, model, optimizer):
 13 |         self.model = model
 14 |         self.optimizer = optimizer
 15 |         self.loss_list = []
 16 |         self.eval_interval = None
 17 |         self.current_epoch = 0
 18 | 
 19 |     def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20):
 20 |         data_size = len(x)
 21 |         max_iters = data_size // batch_size
 22 |         self.eval_interval = eval_interval
 23 |         model, optimizer = self.model, self.optimizer
 24 |         total_loss = 0
 25 |         loss_count = 0
 26 | 
 27 |         start_time = time.time()
 28 |         for epoch in range(max_epoch):
 29 |             # シャッフル
 30 |             idx = numpy.random.permutation(numpy.arange(data_size))
 31 |             x = x[idx]
 32 |             t = t[idx]
 33 | 
 34 |             for iters in range(max_iters):
 35 |                 batch_x = x[iters*batch_size:(iters+1)*batch_size]
 36 |                 batch_t = t[iters*batch_size:(iters+1)*batch_size]
 37 | 
 38 |                 # 勾配を求め、パラメータを更新
 39 |                 loss = model.forward(batch_x, batch_t)
 40 |                 model.backward()
 41 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
 42 |                 if max_grad is not None:
 43 |                     clip_grads(grads, max_grad)
 44 |                 optimizer.update(params, grads)
 45 |                 total_loss += loss
 46 |                 loss_count += 1
 47 | 
 48 |                 # 評価
 49 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
 50 |                     avg_loss = total_loss / loss_count
 51 |                     elapsed_time = time.time() - start_time
 52 |                     print('| epoch %d |  iter %d / %d | time %d[s] | loss %.2f'
 53 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss))
 54 |                     self.loss_list.append(float(avg_loss))
 55 |                     total_loss, loss_count = 0, 0
 56 | 
 57 |             self.current_epoch += 1
 58 | 
 59 |     def plot(self, ylim=None):
 60 |         x = numpy.arange(len(self.loss_list))
 61 |         if ylim is not None:
 62 |             plt.ylim(*ylim)
 63 |         plt.plot(x, self.loss_list, label='train')
 64 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
 65 |         plt.ylabel('loss')
 66 |         plt.show()
 67 | 
 68 | 
 69 | class RnnlmTrainer:
 70 |     def __init__(self, model, optimizer):
 71 |         self.model = model
 72 |         self.optimizer = optimizer
 73 |         self.time_idx = None
 74 |         self.ppl_list = None
 75 |         self.eval_interval = None
 76 |         self.current_epoch = 0
 77 | 
 78 |     def get_batch(self, x, t, batch_size, time_size):
 79 |         batch_x = cp.empty((batch_size, time_size), dtype='i')
 80 |         batch_t = cp.empty((batch_size, time_size), dtype='i')
 81 | 
 82 |         data_size = len(x)
 83 |         jump = data_size // batch_size
 84 |         offsets = [i * jump for i in range(batch_size)]  # バッチの各サンプルの読み込み開始位置
 85 | 
 86 |         for time in range(time_size):
 87 |             for i, offset in enumerate(offsets):
 88 |                 batch_x[i, time] = x[(offset + self.time_idx) % data_size]
 89 |                 batch_t[i, time] = t[(offset + self.time_idx) % data_size]
 90 |             self.time_idx += 1
 91 |         return batch_x, batch_t
 92 | 
 93 |     def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35,
 94 |             max_grad=None, eval_interval=20):
 95 |         data_size = len(xs)
 96 |         max_iters = data_size // (batch_size * time_size)
 97 |         self.time_idx = 0
 98 |         self.ppl_list = []
 99 |         self.eval_interval = eval_interval
100 |         model, optimizer = self.model, self.optimizer
101 |         total_loss = 0
102 |         loss_count = 0
103 | 
104 |         start_time = time.time()
105 |         for epoch in range(max_epoch):
106 |             for iters in range(max_iters):
107 |                 batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size)
108 | 
109 |                 # 勾配を求め、パラメータを更新
110 |                 loss = model.forward(batch_x, batch_t)
111 |                 model.backward()
112 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
113 |                 if max_grad is not None:
114 |                     clip_grads(grads, max_grad)
115 |                 optimizer.update(params, grads)
116 |                 total_loss += loss
117 |                 loss_count += 1
118 | 
119 |                 # パープレキシティの評価
120 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
121 |                     ppl = cp.exp(-total_loss / loss_count)
122 |                     elapsed_time = time.time() - start_time
123 |                     print('| epoch %d |  iter %d / %d | time %d[s] | perplexity %.2f'
124 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl))
125 |                     self.ppl_list.append(float(ppl))
126 |                     total_loss, loss_count = 0, 0
127 | 
128 |             self.current_epoch += 1
129 | 
130 |     def plot(self, ylim=None):
131 |         x = numpy.arange(len(self.ppl_list))
132 |         if ylim is not None:
133 |             plt.ylim(*ylim)
134 |         plt.plot(x, self.ppl_list, label='train')
135 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
136 |         plt.ylabel('perplexity')
137 |         plt.show()
138 | 
139 | 
140 | def remove_duplicate(params, grads):
141 |     '''
142 |     パラメータ配列中の重複する重みをひとつに集約し、
143 |     その重みに対応する勾配を加算する
144 |     '''
145 |     params, grads = params[:], grads[:]  # copy list
146 | 
147 |     while True:
148 |         find_flg = False
149 |         L = len(params)
150 | 
151 |         for i in range(0, L - 1):
152 |             for j in range(i + 1, L):
153 |                 # 重みを共有する場合
154 |                 if params[i] is params[j]:
155 |                     grads[i] += grads[j]  # 勾配の加算
156 |                     find_flg = True
157 |                     params.pop(j)
158 |                     grads.pop(j)
159 |                 # 転置行列として重みを共有する場合（weight tying）
160 |                 elif params[i].ndim == 2 and params[j].ndim == 2 and \
161 |                      params[i].T.shape == params[j].shape and cp.all(params[i].T == params[j]):
162 |                     grads[i] += grads[j].T
163 |                     find_flg = True
164 |                     params.pop(j)
165 |                     grads.pop(j)
166 | 
167 |                 if find_flg: break
168 |             if find_flg: break
169 | 
170 |         if not find_flg: break
171 | 
172 |     return params, grads
173 | 


--------------------------------------------------------------------------------
/python/20190327/sigmoid.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def sigmoid(x):
 5 | 	return 1 / (1 + np.exp(-x))
 6 | 
 7 | print(sigmoid(3))
 8 | print(sigmoid(0))
 9 | print(sigmoid(-3))
10 | 
11 | print(sigmoid(3) + sigmoid(-3))
12 | 
13 | # numpyっぽい書き方
14 | print(sigmoid(np.array([3,0,-3])))
15 | 
16 | # P.13の例
17 | x  = np.random.randn(10, 2)
18 | W1 = np.random.randn(2, 4)
19 | b1 = np.random.randn(4)
20 | W2 = np.random.randn(4, 3)
21 | b2 = np.random.randn(3)
22 | 
23 | h = np.dot(x, W1) + b1  # これで1層の計算
24 | a = sigmoid(h)
25 | s = np.dot(a, W2) + b2
26 | 
27 | print("h=", h)
28 | print("a=", a)
29 | print("s=", s)
30 | 
31 | print(h.shape)
32 | print(a.shape)
33 | print(s.shape)
34 | 
35 | 


--------------------------------------------------------------------------------
/python/20190327/sigmoid_class.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | class Sigmoid:
 5 | 	def __init__(self):
 6 | 		pass  # 何もしない
 7 | 
 8 | 	def forward(self, x):
 9 | 		return 1 / (1 + np.exp(-x))
10 | 
11 | 	def backward(self):
12 | 		pass
13 | 
14 | 
15 | sig = Sigmoid()
16 | 
17 | print(sig.forward(3))
18 | print(sig.forward(0))
19 | print(sig.forward(-3))
20 | 
21 | print(sig.forward(3) + sig.forward(-3))
22 | 
23 | # numpyっぽい書き方
24 | print(sig.forward(np.array([3,0,-3])))
25 | 
26 | 


--------------------------------------------------------------------------------
/python/ch1/affine.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | class Affine:
 5 | 	def __init__(self, W, b):
 6 | 		self.params = [W, b]
 7 | 
 8 | 	def forward(self, x):
 9 | 		W, b = self.params
10 | 		out = np.dot(x, W) + b
11 | 		return out
12 | 
13 | 


--------------------------------------------------------------------------------
/python/ch1/forward_net.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | class Sigmoid:
 5 | 	def __init__(self):
 6 | 		self.params = []
 7 | 
 8 | 	def forward(self, x):
 9 | 		return 1 / (1 + np.exp(-x))
10 | 
11 | 	def backward(self):
12 | 		pass
13 | 
14 | 
15 | 
16 | class Affine:
17 | 	def __init__(self, W, b):
18 | 		self.params = [W, b]
19 | 
20 | 	def forward(self, x):
21 | 		W, b = self.params
22 | 		out = np.dot(x, W) + b
23 | 		return out
24 | 
25 | 	def backward(self):
26 | 		pass
27 | 
28 | 
29 | 
30 | class TwoLayerNet:
31 | 	def __init__(self, input_size, hidden_size, output_size):
32 | 		I, H, O = input_size, hidden_size, output_size
33 | 
34 | 		# 重みとバイアスの初期化
35 | 		W1 = np.random.randn(I, H)
36 | 		b1 = np.random.randn(H)
37 | 		W2 = np.random.randn(H, O)
38 | 		b2 = np.random.randn(O)
39 | 
40 | 		# レイヤの生成
41 | 		self.layers = [
42 | 			Affine(W1, b1),
43 | 			Sigmoid(),
44 | 			Affine(W2, b2)
45 | 		]
46 | 
47 | 		# すべての重みをリストにまとめる
48 | 		self.params = []
49 | 		for layer in self.layers:
50 | 			self.params += layer.params
51 | 
52 | 	def predict(self, x):
53 | 		for layer in self.layers:
54 | 			x = layer.forward(x)
55 | 		return x
56 | 
57 | 
58 | if __name__ == '__main__':
59 | 	sig = Sigmoid()
60 | 	
61 | 	print(sig.forward(3))
62 | 	print(sig.forward(0))
63 | 	print(sig.forward(-3))
64 | 	
65 | 	print(sig.forward(3) + sig.forward(-3))
66 | 	
67 | 	# numpyっぽい書き方
68 | 	print(sig.forward(np.array([3,0,-3])))
69 | 
70 | 	# text p18
71 | 	x = np.random.randn(10, 2)
72 | 	model = TwoLayerNet(2, 4, 3)
73 | 	s = model.predict(x)
74 | 	print(s)
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/python/ch1/show_spiral_dataset.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | from upstream.dataset import spiral  # 教科書のリポジトリのを使う
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | x, t = spiral.load_data()
 8 | 
 9 | print('x', x.shape)
10 | print('t', t.shape)
11 | 
12 | N = 100
13 | CLS_NUM = 3
14 | markers = ['o', 'x', '^']
15 | for i in range(CLS_NUM):  # python3向け。python2ではrangeの挙動が違うので注意。
16 |     plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=10, marker=markers[i])
17 | plt.show()
18 | 


--------------------------------------------------------------------------------
/python/ch1/sigmoid.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def sigmoid(x):
 5 | 	return 1 / (1 + np.exp(-x))
 6 | 
 7 | print(sigmoid(3))
 8 | print(sigmoid(0))
 9 | print(sigmoid(-3))
10 | 
11 | print(sigmoid(3) + sigmoid(-3))
12 | 
13 | # numpyっぽい書き方
14 | print(sigmoid(np.array([3,0,-3])))
15 | 
16 | # P.13の例
17 | x  = np.random.randn(10, 2)
18 | W1 = np.random.randn(2, 4)
19 | b1 = np.random.randn(4)
20 | W2 = np.random.randn(4, 3)
21 | b2 = np.random.randn(3)
22 | 
23 | h = np.dot(x, W1) + b1  # これで1層の計算
24 | a = sigmoid(h)
25 | s = np.dot(a, W2) + b2
26 | 
27 | print("h=", h)
28 | print("a=", a)
29 | print("s=", s)
30 | 
31 | print(h.shape)
32 | print(a.shape)
33 | print(s.shape)
34 | 
35 | 


--------------------------------------------------------------------------------
/python/ch1/sigmoid_class.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | class Sigmoid:
 5 | 	def __init__(self):
 6 | 		pass  # 何もしない
 7 | 
 8 | 	def forward(self, x):
 9 | 		return 1 / (1 + np.exp(-x))
10 | 
11 | 	def backward(self):
12 | 		pass
13 | 
14 | 
15 | sig = Sigmoid()
16 | 
17 | print(sig.forward(3))
18 | print(sig.forward(0))
19 | print(sig.forward(-3))
20 | 
21 | print(sig.forward(3) + sig.forward(-3))
22 | 
23 | # numpyっぽい書き方
24 | print(sig.forward(np.array([3,0,-3])))
25 | 
26 | 


--------------------------------------------------------------------------------
/python/ch1/train_custom_loop.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('../upstream')  # dataset
 3 | import numpy as np
 4 | from common.optimizer import SGD
 5 | from dataset import spiral
 6 | import matplotlib.pyplot as plt
 7 | from two_layer_net import TwoLayerNet
 8 | 
 9 | max_epoch = 300
10 | batch_size = 30
11 | hidden_size = 10
12 | learning_rate = 1.0
13 | 
14 | x, t = spiral.load_data()
15 | model = TwoLayerNet(input_size = 2, hidden_size = hidden_size, output_size = 3)
16 | optimizer = SGD(lr = learning_rate)
17 | 
18 | data_size = len(x)
19 | max_iters = data_size // batch_size
20 | total_loss = 0
21 | loss_count = 0
22 | loss_list = []
23 | 
24 | for epoch in range(max_epoch):
25 | 	idx = np.random.permutation(data_size)
26 | 	x = x[idx]
27 | 	t = t[idx]
28 | 
29 | 	for iters in range(max_iters):
30 | 		batch_x = x[iters*batch_size:(iters+1)*batch_size]
31 | 		batch_t = t[iters*batch_size:(iters+1)*batch_size]
32 | 
33 | 		loss = model.forward(batch_x, batch_t)
34 | 		model.backward()
35 | 		optimizer.update(model.params, model.grads)
36 | 
37 | 		total_loss += loss
38 | 		loss_count += 1
39 | 
40 | 		if (iters + 1) % 10 == 0:
41 | 			avr_loss = total_loss / loss_count
42 | 			print('| epoch %d | iter %d / %d | loss %.2f'
43 | 				% (epoch + 1, iters + 1, max_iters, avr_loss))
44 | 			loss_list.append(avr_loss)
45 | 			total_loss, loss_count = 0, 0
46 | 


--------------------------------------------------------------------------------
/python/ch1/two_layer_net.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | from common.layers import Affine, Sigmoid  # Affine, Sigmoidは自前実装を使う
 5 | from upstream.common.layers import SoftmaxWithLoss  # SoftmaxWithLossは教科書のを使う
 6 | 
 7 | class TwoLayerNet:
 8 | 	def __init__(self, input_size, hidden_size, output_size):
 9 | 		I, H, O = input_size, hidden_size, output_size
10 | 		W1 = 0.01 * np.random.randn(I, H)
11 | 		b1 = np.zeros(H)
12 | 		W2 = 0.01 * np.random.randn(H, O)
13 | 		b2 = np.zeros(O)
14 | 
15 | 		self.layers = [
16 | 			Affine(W1, b1),
17 | 			Sigmoid(),
18 | 			Affine(W2, b2),
19 | 		]
20 | 		self.loss_layer = SoftmaxWithLoss()
21 | 
22 | 		self.params, self.grads = [], []
23 | 		for layer in self.layers:
24 | 			self.params += layer.params
25 | 			self.grads += layer.grads
26 | 
27 | 	def predict(self, x):
28 | 		for layer in self.layers:
29 | 			x = layer.forward(x)
30 | 		return x
31 | 
32 | 	def forward(self, x, t):
33 | 		score = self.predict(x)
34 | 		loss = self.loss_layer.forward(score, t)
35 | 		return loss
36 | 
37 | 	def backward(self, dout=1):
38 | 		dout = self.loss_layer.backward(dout)
39 | 		for layer in reversed(self.layers):
40 | 			dout = layer.backward(dout)
41 | 		return dout
42 | 
43 | 


--------------------------------------------------------------------------------
/python/ch3/cbow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | from common.layers import MatMul
 5 | 
 6 | # context data
 7 | c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])
 8 | c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])
 9 | 
10 | # initialize weights
11 | W_in = np.random.randn(7, 3)
12 | W_out = np.random.randn(3, 7)
13 | 
14 | # generate layers
15 | in_layer0 = MatMul(W_in)
16 | in_layer1 = MatMul(W_in)
17 | out_layer = MatMul(W_out)
18 | 
19 | # propagation
20 | h0 = in_layer0.forward(c0)
21 | h1 = in_layer1.forward(c1)
22 | h = 0.5 * (h0 + h1)
23 | s = out_layer.forward(h)
24 | 
25 | print(s)
26 | 


--------------------------------------------------------------------------------
/python/ch3/dot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | c = np.array([[1, 0, 0, 0, 0, 0, 0]])
 4 | W = np.random.randn(7, 3)
 5 | 
 6 | print(W)
 7 | 
 8 | h = np.dot(c, W)
 9 | 
10 | print(h)
11 | 


--------------------------------------------------------------------------------
/python/ch3/matmul.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | 
 5 | from common.layers import MatMul
 6 | 
 7 | c = np.array([[1, 0, 0, 0, 0, 0, 0]])
 8 | W = np.random.randn(7, 3)
 9 | layer = MatMul(W)
10 | h = layer.forward(c)
11 | 
12 | print(h)
13 | 


--------------------------------------------------------------------------------
/python/ch3/simple_cbow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | from common.layers import MatMul, SoftmaxWithLoss
 5 | 
 6 | class SimpleCBOW:
 7 |     def __init__(self, vocab_size, hidden_size):
 8 |         V, H = vocab_size, hidden_size
 9 | 
10 |         W_in = 0.01 * np.random.randn(V, H).astype('f')
11 |         W_out = 0.01 * np.random.randn(H, V).astype('f')
12 | 
13 |         self.in_layer0 = MatMul(W_in)
14 |         self.in_layer1 = MatMul(W_in)
15 |         self.out_layer = MatMul(W_out)
16 |         self.loss_layer = SoftmaxWithLoss()
17 | 
18 |         layers = [self.in_layer0, self.in_layer1, self.out_layer]
19 |         self.params, self.grads = [], []
20 |         for layer in layers:
21 |             self.params += layer.params
22 |             self.grads += layer.grads
23 | 
24 |         self.word_vecs = W_in
25 | 
26 |     def forward(self, contexts, target):
27 |         h0 = self.in_layer0.forward(contexts[:, 0])
28 |         h1 = self.in_layer1.forward(contexts[:, 1])
29 |         h = (h0 + h1) * 0.5
30 |         score = self.out_layer.forward(h)
31 |         loss = self.loss_layer.forward(score, target)
32 |         return loss
33 | 
34 |     def backward(self, dout=1):
35 |         ds = self.loss_layer.backward(dout)
36 |         da = self.out_layer.backward(ds)
37 |         da *= 0.5
38 |         self.in_layer0.backward(da)
39 |         self.in_layer1.backward(da)
40 |         return None
41 | 


--------------------------------------------------------------------------------
/python/ch3/train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | from common.trainer import Trainer
 4 | from common.optimizer import Adam
 5 | from simple_cbow import SimpleCBOW
 6 | from common.util import preprocess, create_contexts_target, convert_one_hot
 7 | 
 8 | window_size = 1
 9 | hidden_size = 5
10 | batch_size = 3
11 | max_epoch = 1000
12 | 
13 | text = 'You say goodbye and I say hello.'
14 | corpus, word_to_id, id_to_word = preprocess(text)
15 | 
16 | vocab_size = len(word_to_id)
17 | contexts, target = create_contexts_target(corpus, window_size)
18 | target = convert_one_hot(target, vocab_size)
19 | contexts = convert_one_hot(contexts, vocab_size)
20 | 
21 | model = SimpleCBOW(vocab_size, hidden_size)
22 | optimizer = Adam()
23 | trainer = Trainer(model, optimizer)
24 | 
25 | trainer.fit(contexts, target, max_epoch, batch_size)
26 | trainer.plot()
27 | 


--------------------------------------------------------------------------------
/python/ch4/cbow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | from common.layers import MatMul, SoftmaxWithLoss, Embedding
 5 | from ch4.negative_sampling_layer import NegativeSamplingLoss
 6 | 
 7 | class CBOW:
 8 |     def __init__(self, vocab_size, hidden_size, window_size, corpus):
 9 |         V, H = vocab_size, hidden_size
10 | 
11 |         W_in = 0.01 * np.random.randn(V, H).astype('f')
12 |         W_out = 0.01 * np.random.randn(V, H).astype('f')
13 | 
14 |         self.in_layers = [] 
15 |         for i in range(2 * window_size):
16 |             layer = Embedding(W_in)
17 |             self.in_layers.append(layer)
18 |         print('W_out CBOW')
19 |         self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)
20 | 
21 | 
22 |         layers = self.in_layers + [self.ns_loss]
23 |         self.params, self.grads = [], []
24 |         for layer in layers:
25 |             self.params += layer.params
26 |             self.grads += layer.grads
27 | 
28 |         self.word_vecs = W_in
29 | 
30 |     def forward(self, contexts, target):
31 |         h = 0 # これは後にnumpyのブロードキャストでベクトルになる.
32 |         for i, layer in enumerate(self.in_layers):
33 |             h += layer.forward(contexts[:, i])
34 |         h *= 1 / len(self.in_layers)
35 |         loss = self.ns_loss.forward(h, target)
36 |         return loss
37 | 
38 |     def backward(self, dout=1):
39 |         dout = self.ns_loss.backward(dout)
40 |         dout *= 1 / len(self.in_layers)
41 |         for layer in self.in_layers:
42 |             layer.backward(dout) 
43 |         return None
44 | 
45 | if __name__ == '__main__':
46 |     cbow = SimpleCBOW(5, 10)
47 | 


--------------------------------------------------------------------------------
/python/ch4/cbow_params.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/cbow_params.pkl


--------------------------------------------------------------------------------
/python/ch4/negative_sampling_layer.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('..')
  3 | import numpy as np
  4 | from common.layers import MatMul, SoftmaxWithLoss,SigmoidWithLoss,  Embedding
  5 | import collections
  6 | 
  7 | class EmbeddingDot:
  8 |     def __init__(self, W):
  9 |         self.embed = Embedding(W)
 10 |         self.params = self.embed.params
 11 |         self.grads = self.embed.grads
 12 |         self.cache = None 
 13 | 
 14 |     def forward(self, h, idx):
 15 |         target_W = self.embed.forward(idx)
 16 |         out = np.sum(target_W * h, axis=1)
 17 | 
 18 |         self.cache = (h, target_W)
 19 |         return out 
 20 | 
 21 |     def backward(self, dout):
 22 |         h, target_W = self.cache 
 23 |         dout = dout.reshape(dout.shape[0], 1)
 24 | 
 25 |         dtarget_W = dout * h
 26 |         self.embed.backward(dtarget_W)
 27 |         dh = dout * target_W
 28 |         return dh
 29 | 
 30 | class UnigramSampler:
 31 |     def __init__(self, corpus, power, sample_size):
 32 |         self.sample_size = sample_size
 33 |         self.vocab_size = None
 34 |         self.word_p = None
 35 | 
 36 |         counts = collections.Counter()
 37 |         for word_id in corpus:
 38 |             counts[word_id] += 1
 39 | 
 40 |         vocab_size = len(counts)
 41 |         self.vocab_size = vocab_size
 42 | 
 43 |         self.word_p = np.zeros(vocab_size)
 44 |         for i in range(vocab_size):
 45 |             self.word_p[i] = counts[i]
 46 | 
 47 |         self.word_p = np.power(self.word_p, power)
 48 |         self.word_p /= np.sum(self.word_p)
 49 | 
 50 |     def get_negative_sample(self, target):
 51 |         batch_size = target.shape[0]
 52 | 
 53 |         # if not GPU:
 54 |         negative_sample = np.zeros((batch_size, self.sample_size), dtype=np.int32)
 55 | 
 56 |         for i in range(batch_size):
 57 |             p = self.word_p.copy()
 58 |             target_idx = target[i]
 59 |             p[target_idx] = 0
 60 |             p /= p.sum()
 61 |             negative_sample[i, :] = np.random.choice(self.vocab_size, size=self.sample_size, replace=False, p=p)
 62 |         # else:
 63 |         #     # GPU(cupy）で計算するときは、速度を優先
 64 |         #     # 負例にターゲットが含まれるケースがある
 65 |         #     negative_sample = np.random.choice(self.vocab_size, size=(batch_size, self.sample_size),
 66 |         #                                        replace=True, p=self.word_p)
 67 | 
 68 |         return negative_sample
 69 | 
 70 | class NegativeSamplingLoss:
 71 |     def __init__(self, W, corpus, power=0.75, sample_size=5):
 72 |         self.sample_size = sample_size
 73 |         self.sampler = UnigramSampler(corpus, power, sample_size)
 74 |         self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
 75 |         self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]
 76 |         print('W in NSLoss', [W.shape for _ in range(sample_size + 1)])
 77 | 
 78 |         self.params, self.grads = [], []
 79 |         for layer in self.embed_dot_layers:
 80 |             self.params += layer.params 
 81 |             self.grads += layer.grads 
 82 | 
 83 |     def forward(self, h, target):
 84 |         batch_size = target.shape[0]
 85 |         negative_sample = self.sampler.get_negative_sample(target)
 86 | 
 87 |         # 正例
 88 |         score = self.embed_dot_layers[0].forward(h, target)
 89 |         correct_label = np.ones(batch_size, dtype=np.int32)
 90 |         loss = self.loss_layers[0].forward(score, correct_label)
 91 | 
 92 |         # 負例
 93 |         negative_label = np.zeros(batch_size, dtype=np.int32)
 94 |         for i in range(self.sample_size):
 95 |             negative_target = negative_sample[:, i]
 96 |             score = self.embed_dot_layers[1 + i].forward(h, negative_target)
 97 |             loss += self.loss_layers[1 + i].forward(score, negative_label)
 98 | 
 99 |         return loss 
100 | 
101 |     def backward(self, dout=1):
102 |         dh = 0 
103 |         for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
104 |             dscore = l0.backward(dout)
105 |             dh += l1.backward(dscore)
106 |         return dh 
107 | 
108 |             
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/python/ch4/ptb.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import os
  4 | sys.path.append('..')
  5 | try:
  6 |     import urllib.request
  7 | except ImportError:
  8 |     raise ImportError('Use Python3!')
  9 | import pickle
 10 | import numpy as np
 11 | 
 12 | 
 13 | url_base = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/'
 14 | key_file = {
 15 |     'train':'ptb.train.txt',
 16 |     'test':'ptb.test.txt',
 17 |     'valid':'ptb.valid.txt'
 18 | }
 19 | save_file = {
 20 |     'train':'ptb.train.npy',
 21 |     'test':'ptb.test.npy',
 22 |     'valid':'ptb.valid.npy'
 23 | }
 24 | vocab_file = 'ptb.vocab.pkl'
 25 | 
 26 | dataset_dir = os.path.dirname(os.path.abspath(__file__))
 27 | 
 28 | 
 29 | def _download(file_name):
 30 |     file_path = dataset_dir + '/' + file_name
 31 |     if os.path.exists(file_path):
 32 |         return
 33 | 
 34 |     print('Downloading ' + file_name + ' ... ')
 35 | 
 36 |     try:
 37 |         urllib.request.urlretrieve(url_base + file_name, file_path)
 38 |     except urllib.error.URLError:
 39 |         import ssl
 40 |         ssl._create_default_https_context = ssl._create_unverified_context
 41 |         urllib.request.urlretrieve(url_base + file_name, file_path)
 42 | 
 43 |     print('Done')
 44 | 
 45 | 
 46 | def load_vocab():
 47 |     vocab_path = dataset_dir + '/' + vocab_file
 48 | 
 49 |     if os.path.exists(vocab_path):
 50 |         with open(vocab_path, 'rb') as f:
 51 |             word_to_id, id_to_word = pickle.load(f)
 52 |         return word_to_id, id_to_word
 53 | 
 54 |     word_to_id = {}
 55 |     id_to_word = {}
 56 |     data_type = 'train'
 57 |     file_name = key_file[data_type]
 58 |     file_path = dataset_dir + '/' + file_name
 59 | 
 60 |     _download(file_name)
 61 | 
 62 |     words = open(file_path).read().replace('\n', '<eos>').strip().split()
 63 | 
 64 |     for i, word in enumerate(words):
 65 |         if word not in word_to_id:
 66 |             tmp_id = len(word_to_id)
 67 |             word_to_id[word] = tmp_id
 68 |             id_to_word[tmp_id] = word
 69 | 
 70 |     with open(vocab_path, 'wb') as f:
 71 |         pickle.dump((word_to_id, id_to_word), f)
 72 | 
 73 |     return word_to_id, id_to_word
 74 | 
 75 | 
 76 | def load_data(data_type='train'):
 77 |     '''
 78 |         :param data_type: データの種類：'train' or 'test' or 'valid (val)'
 79 |         :return:
 80 |     '''
 81 |     if data_type == 'val': data_type = 'valid'
 82 |     save_path = dataset_dir + '/' + save_file[data_type]
 83 | 
 84 |     word_to_id, id_to_word = load_vocab()
 85 | 
 86 |     if os.path.exists(save_path):
 87 |         corpus = np.load(save_path)
 88 |         return corpus, word_to_id, id_to_word
 89 | 
 90 |     file_name = key_file[data_type]
 91 |     file_path = dataset_dir + '/' + file_name
 92 |     _download(file_name)
 93 | 
 94 |     words = open(file_path).read().replace('\n', '<eos>').strip().split()
 95 |     corpus = np.array([word_to_id[w] for w in words])
 96 | 
 97 |     np.save(save_path, corpus)
 98 |     return corpus, word_to_id, id_to_word
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     for data_type in ('train', 'val', 'test'):
103 |         load_data(data_type)


--------------------------------------------------------------------------------
/python/ch4/ptb.train.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/ptb.train.npy


--------------------------------------------------------------------------------
/python/ch4/ptb.vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/ptb.vocab.pkl


--------------------------------------------------------------------------------
/python/ch4/train.py:
--------------------------------------------------------------------------------
 1 | import sys 
 2 | sys.path.append('..')
 3 | import numpy as np 
 4 | from common import config
 5 | 
 6 | import pickle 
 7 | from common.trainer import Trainer 
 8 | from common.optimizer import Adam 
 9 | from cbow import CBOW
10 | from common.util import create_contexts_target
11 | import ptb 
12 | 
13 | # hyper paramerter 
14 | window_size = 5 
15 | hidden_size = 100
16 | batch_size = 100 
17 | max_epoch = 10 
18 | 
19 | corpus, word_to_id, id_to_word = ptb.load_data('train')
20 | vocab_size = len(word_to_id)
21 | print(len(word_to_id))
22 | 
23 | contexts, target = create_contexts_target(corpus, window_size)
24 | # if config.GPU:
25 | #     contexts, target = to_gpu(contexts), to_gpu(target)
26 | 
27 | model = CBOW(vocab_size, hidden_size, window_size, corpus)
28 | optimizer = Adam()
29 | trainer = Trainer(model, optimizer)
30 | 
31 | trainer.fit(contexts, target, max_epoch, batch_size)
32 | trainer.plot()
33 | 
34 | word_vecs = model.word_vecs
35 | 
36 | params = {}
37 | params['word_vecs'] = word_vecs.astype(np.float16)
38 | params['word_to_id'] = word_to_id
39 | params['id_to_word'] = id_to_word
40 | pkl_file = 'cbow_params.pkl'
41 | with open(pkl_file, 'wb') as f:
42 |     pickle.dump(params, f, -1)


--------------------------------------------------------------------------------
/python/ch4/train_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch4/train_loss.png


--------------------------------------------------------------------------------
/python/ch5/simple_rnnlm.py:
--------------------------------------------------------------------------------
 1 | import sys 
 2 | sys.path.append('..')
 3 | import numpy as np 
 4 | from common.time_layers import * 
 5 | from numpy.random import randn as rn
 6 | 
 7 | class SimpleRnnlm:
 8 |     def __init__(self, vocab_size, wordvec_size, hidden_size):
 9 |         V, D, H = vocab_size, wordvec_size, hidden_size
10 |         #rn = np.radom.randn
11 | 
12 |         # 重みの初期化
13 |         embed_W = (rn(V, D) / 100).astype('f')
14 |         rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
15 |         rnn_Wh = (rn(H, H)/ np.sqrt(H)).astype('f')
16 |         rnn_b = np.zeros(H).astype('f')
17 |         affine_W = (rn(H, V)/ np.sqrt(H)).astype('f')
18 |         affine_b = np.zeros(V).astype('f')
19 | 
20 |         # レイヤの生成
21 |         self.layers = [
22 |             TimeEmbedding(embed_W),
23 |             TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
24 |             TimeAffine(affine_W, affine_b)
25 |         ]
26 |         self.loss_layer = TimeSoftmaxWithLoss()
27 |         self.rnn_layer = self.layers[1]
28 | 
29 |         self.params, self.grads = [], [] 
30 |         for layer in self.layers:
31 |             self.params += layer.params 
32 |             self.grads += layer.grads 
33 | 
34 |     def forward(self, xs, ts):
35 |         for layer in self.layers:
36 |             xs = layer.forward(xs)
37 |         loss = self.loss_layer.forward(xs, ts)
38 |         return loss 
39 | 
40 |     def backward(self, dout=1):
41 |         dout = self.loss_layer.backward(dout)
42 |         for layer in reversed(self.layers):
43 |             dout = layer.backward(dout)
44 |         return dout 
45 | 
46 |     def reset_state(self):
47 |         self.rnn_layer.reset_state()
48 | 


--------------------------------------------------------------------------------
/python/ch5/train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from common.optimizer import SGD
 6 | from dataset import ptb
 7 | from simple_rnnlm import SimpleRnnlm
 8 | from common.trainer import RnnlmTrainer
 9 | 
10 | batch_size = 10
11 | wordvec_size = 100
12 | hidden_size = 100
13 | time_size = 5
14 | lr = 0.1
15 | max_epoch = 100
16 | 
17 | corpus, word_to_id, id_to_word = ptb.load_data('train')
18 | corpus_size = 1000
19 | corpus = corpus[:corpus_size]
20 | vocab_size = int(max(corpus) + 1)
21 | 
22 | xs = corpus[:-1]
23 | ts = corpus[1:]
24 | data_size = len(xs)
25 | print('corpus size: %d, vocabulary size: %d' %(corpus_size, vocab_size))
26 | 
27 | max_iters = data_size // (batch_size * time_size)
28 | time_idx = 0
29 | total_loss = 0
30 | loss_count = 0
31 | ppl_list = []
32 | 
33 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
34 | optimizer = SGD(lr)
35 | 
36 | trainer = RnnlmTrainer(model, optimizer)
37 | trainer.fit(xs, ts, max_epoch, batch_size, time_size)
38 | trainer.plot()
39 | 


--------------------------------------------------------------------------------
/python/ch5/train_custom_loop.py:
--------------------------------------------------------------------------------
 1 | import sys 
 2 | sys.path.append('..')
 3 | import matplotlib.pyplot as plt 
 4 | import numpy as np 
 5 | from common.optimizer import SGD 
 6 | from dataset import ptb 
 7 | from simple_rnnlm import SimpleRnnlm 
 8 | 
 9 | batch_size = 10 
10 | wordvec_size = 100 
11 | hidden_size = 100 
12 | time_size = 5 
13 | lr = 0.1 
14 | max_epoch = 100 
15 | 
16 | corpus, word_to_id, id_to_word = ptb.load_data('train')
17 | corpus_size = 1000
18 | corpus = corpus[:corpus_size]
19 | vocab_size = int(max(corpus) + 1)
20 | 
21 | xs = corpus[:-1]
22 | ts = corpus[1:]
23 | data_size = len(xs)
24 | print('corpus size: %d, vocabulary size: %d' %(corpus_size, vocab_size))
25 | 
26 | max_iters = data_size // (batch_size * time_size)
27 | time_idx = 0
28 | total_loss = 0
29 | loss_count = 0 
30 | ppl_list = []
31 | 
32 | model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
33 | optimizer = SGD(lr)
34 | 
35 | jump = (corpus_size - 1) // batch_size
36 | offsets = [i * jump for i in range(batch_size)]
37 | 
38 | for epoch in range(max_epoch):
39 |     for iter_ in range(max_iters):
40 |         batch_x = np.empty((batch_size, time_size), dtype='i')
41 |         batch_t = np.empty((batch_size, time_size), dtype='i')
42 | 
43 |         # ミニバッチの取得
44 |         for t in range(time_size):
45 |             for i, offset in enumerate(offsets):
46 |                 batch_x[i, t] = xs[(offset + time_idx) % data_size]
47 |                 batch_t[i, t] = ts[(offset + time_idx) % data_size]
48 |             time_idx += 1
49 |         # iter=0
50 |         #     [        ][        ][        ][        ][        ]
51 |         #      [        ][        ][        ][        ][       |]
52 |         #       [        ][        ][        ][        ][      | ]
53 |         #        [        ][        ][        ][        ][     |  ]
54 |         #         [        ][        ][        ][        ][    |   ]
55 |         # iter=1
56 |         #          [        ][        ][        ][        ][   |    ]
57 |         #           [        ][        ][        ][        ][  |     ]
58 |         #            [        ][        ][        ][        ][ |      ]
59 |         #             [        ][        ][        ][        ][|       ]
60 |         #              [        ][        ][        ][        ]|        ]
61 | 
62 |         # 勾配を求め、パラメータを更新
63 |         loss = model.forward(batch_x, batch_t)
64 |         model.backward()
65 |         optimizer.update(model.params, model.grads)
66 |         total_loss += loss
67 |         loss_count += 1
68 | 
69 |     # エポックごとにパープレキシティの評価
70 |     ppl = np.exp(total_loss / loss_count)
71 |     print('%d,%.2f' % (epoch + 1, ppl))
72 |     ppl_list.append(float(ppl))
73 |     total_loss, loss_count = 0, 0
74 | 
75 | 
76 | plt.plot(ppl_list)
77 | plt.show()
78 | 


--------------------------------------------------------------------------------
/python/ch6/Figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch6/Figure_1.png


--------------------------------------------------------------------------------
/python/ch6/Rnnlm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch6/Rnnlm.pkl


--------------------------------------------------------------------------------
/python/ch6/clip_grads.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | dW1 = np.random.rand(3, 3) * 10
 4 | dW2 = np.random.rand(3, 3) * 10
 5 | grads = [dW1, dW2]
 6 | max_norm = 5.0
 7 | 
 8 | print(grads)
 9 | 
10 | def clip_grads(grads, max_norm):
11 |     total_norm = 0
12 |     for grad in grads:
13 |         total_norm += np.sum(grad ** 2)
14 |     total_norm = np.sqrt(total_norm)
15 | 
16 |     rate = max_norm / (total_norm + 1e-6)
17 |     if rate < 1:
18 |         for grad in grads:
19 |             grad *= rate
20 | 
21 | clip_grads(grads, max_norm)
22 | print(grads)
23 | 


--------------------------------------------------------------------------------
/python/ch6/lstm_backward_graph.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/ch6/lstm_backward_graph.jpg


--------------------------------------------------------------------------------
/python/ch6/rnn_gradient_graph.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | N = 2 # ミニバッチサイズ
 5 | H = 3 # 隠れ状態ベクトルの次元数
 6 | T = 20 # 時系列データの長さ
 7 | 
 8 | dh = np.ones((N, H))
 9 | np.random.seed(3) # 再現性のため乱数のシードを固定
10 | Wh = np.random.randn(H, H)
11 | #Wh = np.random.randn(H, H) * 0.5
12 | 
13 | 
14 | max_norm = 5.0
15 | 
16 | def clip_grads(grads, max_norm):
17 |     total_norm = 0
18 |     for grad in grads:
19 |         total_norm += np.sum(grad ** 2)
20 |     total_norm = np.sqrt(total_norm)
21 | 
22 |     rate = max_norm / (total_norm + 1e-6)
23 |     if rate < 1:
24 |         for grad in grads:
25 |             grad *= rate
26 | 
27 | 
28 | norm_list = []
29 | for t in range(T):
30 |     dh = np.dot(dh, Wh.T)
31 | #    clip_grads(dh, max_norm)
32 |     norm = np.sqrt(np.sum(dh**2)) / N
33 |     norm_list.append(norm)
34 | 
35 | plt.plot(norm_list)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/python/ch6/rnn_gradient_graph_clip.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | N = 2 # ミニバッチサイズ
 5 | H = 3 # 隠れ状態ベクトルの次元数
 6 | T = 20 # 時系列データの長さ
 7 | 
 8 | dh = np.ones((N, H))
 9 | np.random.seed(3) # 再現性のため乱数のシードを固定
10 | Wh = np.random.randn(H, H)
11 | #Wh = np.random.randn(H, H) * 0.5
12 | 
13 | 
14 | max_norm = 5.0
15 | 
16 | def clip_grads(grads, max_norm):
17 |     total_norm = 0
18 |     for grad in grads:
19 |         total_norm += np.sum(grad ** 2)
20 |     total_norm = np.sqrt(total_norm)
21 | 
22 |     rate = max_norm / (total_norm + 1e-6)
23 |     if rate < 1:
24 |         for grad in grads:
25 |             grad *= rate
26 | 
27 | 
28 | norm_list = []
29 | for t in range(T):
30 |     dh = np.dot(dh, Wh.T)
31 |     clip_grads(dh, max_norm)
32 |     norm = np.sqrt(np.sum(dh**2)) / N
33 |     norm_list.append(norm)
34 | 
35 | plt.plot(norm_list)
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/python/ch6/rnnlm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | import numpy as np
 4 | from common.time_layers import *
 5 | import pickle
 6 | 
 7 | 
 8 | class Rnnlm:
 9 |     def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100):
10 |         V, D, H = vocab_size, wordvec_size, hidden_size
11 |         rn = np.random.randn
12 | 
13 |         # 重みの初期化
14 |         embed_W = (rn(V, D) / 100).astype('f')
15 |         lstm_Wx = (rn(D, 4*H) / np.sqrt(D)).astype('f') # Xavierの初期値
16 |         lstm_Wh = (rn(H, 4*H) / np.sqrt(H)).astype('f')
17 |         lstm_b = np.zeros(4*H).astype('f')
18 |         affine_W = (rn(H,V) / np.sqrt(H)).astype('f')
19 |         affine_b = np.zeros(V).astype('f')
20 | 
21 |         # レイヤの生成
22 |         self.layers = [
23 |             TimeEmbedding(embed_W),
24 |             TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True),
25 |             TimeAffine(affine_W, affine_b)
26 |         ]
27 |         self.loss_layer = TimeSoftmaxWithLoss()
28 |         self.lstm_layer = self.layers[1]
29 | 
30 |         # すべての重みと勾配をリストにまとめる
31 |         self.params, self.grads = [],[]
32 |         for layer in self.layers:
33 |             self.params += layer.params
34 |             self.grads += layer.grads
35 | 
36 |     def predict(self, xs):
37 |         for layer in self.layers:
38 |             xs = layer.forward(xs)
39 |         return xs
40 | 
41 |     def forward(self, xs, ts):
42 |         score = self.predict(xs)
43 |         loss = self.loss_layer.forward(score, ts)
44 |         return loss
45 | 
46 |     def backward(self, dout=1):
47 |         dout = self.loss_layer.backward(dout)
48 |         for layer in reversed(self.layers):
49 |             dout = layer.backward(dout)
50 |         return dout
51 | 
52 |     def reset_state(self):
53 |         self.lstm_layer.reset_state()
54 | 
55 |     def save_params(self, file_name='Rnnlm.pkl'):
56 |         with open(file_name, 'wb') as f:
57 |             pickle.dump(self.params, f)
58 | 
59 |     def load_params(self, file_name='Rnnlm.pkl'):
60 |         with open(file_name, 'rb') as f:
61 |             prms = pickle.load(f)
62 | 
63 |         # 教科書には無いが、これがないと読み込めていなかった
64 |         for i, param in enumerate(self.params):
65 |             param[...] = prms[i]
66 | 


--------------------------------------------------------------------------------
/python/ch6/train_rnnlm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('..')
 3 | from common.optimizer import SGD
 4 | from common.trainer import RnnlmTrainer
 5 | from common.util import eval_perplexity
 6 | from dataset import ptb
 7 | from rnnlm import Rnnlm
 8 | 
 9 | # ハイパーパラメータの設定
10 | batch_size = 20
11 | wordvec_size = 100
12 | hidden_size = 100 # RNNの隠れ状態ベクトルの要素数
13 | time_size = 35 # RNNを展開するサイズ
14 | lr = 20.0
15 | max_epoch = 4
16 | max_grad = 0.25
17 | 
18 | # 学習データの読み込み
19 | corpus, word_to_id, id_to_word = ptb.load_data('train')
20 | corpus_test, _, _ = ptb.load_data('test')
21 | vocab_size = len(word_to_id)
22 | xs = corpus[:-1]
23 | ts = corpus[1:]
24 | 
25 | # モデルの生成
26 | model = Rnnlm(vocab_size, wordvec_size, hidden_size)
27 | optimizer = SGD(lr)
28 | trainer = RnnlmTrainer(model, optimizer)
29 | 
30 | # パラメータの読み込み
31 | #model.load_params()
32 | 
33 | # 勾配クリッピングを適用して学習
34 | trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad, eval_interval=20)
35 | trainer.plot(ylim=(0, 500))
36 | 
37 | # テストデータで評価
38 | model.reset_state()
39 | ppl_test = eval_perplexity(model, corpus_test)
40 | print('test perplexity: ', ppl_test)
41 | 
42 | # パラメータの保存
43 | model.save_params()
44 | 
45 | 
46 | # | epoch 4 |  iter 1321 / 1327 | time 2590[s] | perplexity 110.01
47 | # evaluating perplexity ...
48 | # 234 / 235
49 | # test perplexity:  136.49846872347803
50 | 


--------------------------------------------------------------------------------
/python/common/config.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | GPU = False
4 | 


--------------------------------------------------------------------------------
/python/common/functions.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from common.np import *
 3 | 
 4 | 
 5 | def sigmoid(x):
 6 |     return 1 / (1 + np.exp(-x))
 7 | 
 8 | 
 9 | def relu(x):
10 |     return np.maximum(0, x)
11 | 
12 | 
13 | def softmax(x):
14 |     if x.ndim == 2:
15 |         x = x - x.max(axis=1, keepdims=True)
16 |         x = np.exp(x)
17 |         x /= x.sum(axis=1, keepdims=True)
18 |     elif x.ndim == 1:
19 |         x = x - np.max(x)
20 |         x = np.exp(x) / np.sum(np.exp(x))
21 | 
22 |     return x
23 | 
24 | 
25 | def cross_entropy_error(y, t):
26 |     if y.ndim == 1:
27 |         t = t.reshape(1, t.size)
28 |         y = y.reshape(1, y.size)
29 |         
30 |     # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
31 |     if t.size == y.size:
32 |         t = t.argmax(axis=1)
33 |              
34 |     batch_size = y.shape[0]
35 | 
36 |     return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
37 | 


--------------------------------------------------------------------------------
/python/common/layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from common.functions import softmax, cross_entropy_error
  3 | 
  4 | class MatMul:
  5 |     def __init__(self, W):
  6 |         self.params = [W]
  7 |         self.grads = [np.zeros_like(W)]
  8 |         self.x = None
  9 | 
 10 |     def forward(self, x):
 11 |         W, = self.params
 12 |         out = np.dot(x, W)
 13 |         self.x = x
 14 |         return out
 15 | 
 16 |     def backward(self, dout):
 17 |         W, = self.params
 18 |         dx = np.dot(dout, W.T)
 19 |         dW = np.dot(self.x.T, dout)
 20 |         self.grads[0][...] = dW
 21 |         return dx
 22 | 
 23 |     # 他の章でも使うようなので ch1/forward_net.py からコピーしてbackwardを実装
 24 | class Sigmoid:
 25 | 	def __init__(self):
 26 | 		self.params = []
 27 | 		self.grads = []
 28 | 
 29 | 	def forward(self, x):
 30 | 		self.out = 1 / (1 + np.exp(-x))
 31 | 		return self.out
 32 | 
 33 | 	def backward(self, dout):
 34 | 		dx = dout * (1 - self.out) * self.out
 35 | 		return dx
 36 | 
 37 | 
 38 | class Affine:
 39 | 	def __init__(self, W, b):
 40 | 		self.mm = MatMul(W)  # MatMulを使って実装してみる
 41 | 		self.params = [W, b]
 42 | 		self.grads = [
 43 | 		  self.mm.grads[0],  # modelが初期化した直後のgradsを参照するため、MatMulのgradsを参照するようにする
 44 | 		  np.zeros_like(b),
 45 | 		]
 46 | 
 47 | 	def forward(self, x):
 48 | 		_, b = self.params
 49 | 		out = self.mm.forward(x) + b
 50 | 		return out
 51 | 
 52 | 	def backward(self, dout):
 53 | 		dx = self.mm.backward(dout)
 54 | 		db = np.sum(dout, axis = 0)
 55 | 		# self.grads[0] はmm.backwardで更新される
 56 | 		self.grads[1] = db.copy()
 57 | 		return dx
 58 | 
 59 | class TwoLayerNet:
 60 | 	def __init__(self, input_size, hidden_size, output_size):
 61 | 		I, H, O = input_size, hidden_size, output_size
 62 | 
 63 | 		# 重みとバイアスの初期化
 64 | 		W1 = np.random.randn(I, H)
 65 | 		b1 = np.random.randn(H)
 66 | 		W2 = np.random.randn(H, O)
 67 | 		b2 = np.random.randn(O)
 68 | 
 69 | 		# レイヤの生成
 70 | 		self.layers = [
 71 | 			Affine(W1, b1),
 72 | 			Sigmoid(),
 73 | 			Affine(W2, b2)
 74 | 		]
 75 | 
 76 | 		# すべての重みをリストにまとめる
 77 | 		self.params = []
 78 | 		for layer in self.layers:
 79 | 			self.params += layer.params
 80 | 
 81 | 	def predict(self, x):
 82 | 		for layer in self.layers:
 83 | 			x = layer.forward(x)
 84 | 		return x
 85 | 
 86 | 
 87 | # FROM https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/layers.py
 88 | class Softmax:
 89 |     def __init__(self):
 90 |         self.params, self.grads = [], []
 91 |         self.out = None
 92 | 
 93 |     def forward(self, x):
 94 |         self.out = softmax(x)
 95 |         return self.out
 96 | 
 97 |     def backward(self, dout):
 98 |         dx = self.out * dout
 99 |         sumdx = np.sum(dx, axis=1, keepdims=True)
100 |         dx -= self.out * sumdx
101 |         return dx
102 | 
103 | 
104 | class SoftmaxWithLoss:
105 |     def __init__(self):
106 |         self.params, self.grads = [], []
107 |         self.y = None  # softmaxの出力
108 |         self.t = None  # 教師ラベル
109 | 
110 |     def forward(self, x, t):
111 |         self.t = t
112 |         self.y = softmax(x)
113 | 
114 |         # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
115 |         if self.t.size == self.y.size:
116 |             self.t = self.t.argmax(axis=1)
117 | 
118 |         loss = cross_entropy_error(self.y, self.t)
119 |         return loss
120 | 
121 |     def backward(self, dout=1):
122 |         batch_size = self.t.shape[0]
123 | 
124 |         dx = self.y.copy()
125 |         dx[np.arange(batch_size), self.t] -= 1
126 |         dx *= dout
127 |         dx = dx / batch_size
128 | 
129 |         return dx
130 | 
131 | 
132 | class Embedding:
133 |     def __init__(self, W):
134 |         self.params = [W]
135 |         self.grads = [np.zeros_like(W)]
136 |         self.idx = None
137 | 
138 |     def forward(self, idx):
139 |         W, = self.params
140 |         self.idx = idx
141 |         out = W[idx]
142 |         return out
143 | 
144 |     def backward(self, dout):
145 |         dW, = self.grads
146 |         dW[...] = 0
147 |         np.add.at(dW, self.idx, dout)
148 |         return None
149 | 


--------------------------------------------------------------------------------
/python/common/np.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from common.config import GPU
 3 | 
 4 | 
 5 | if GPU:
 6 |     import cupy as np
 7 |     np.cuda.set_allocator(np.cuda.MemoryPool().malloc)
 8 |     np.add.at = np.scatter_add
 9 | 
10 |     print('\033[92m' + '-' * 60 + '\033[0m')
11 |     print(' ' * 23 + '\033[92mGPU Mode (cupy)\033[0m')
12 |     print('\033[92m' + '-' * 60 + '\033[0m\n')
13 | else:
14 |     import numpy as np
15 | 


--------------------------------------------------------------------------------
/python/common/optimizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class SGD:
 4 |     def __init__(self, lr = 0.01):
 5 |         self.lr = lr  # 学習率
 6 | 
 7 |     def update(self, params, grads):
 8 |         for i in range(len(params)):
 9 |             params[i] -= self.lr * grads[i]
10 | 
11 | class AdaGrad:
12 |     '''
13 |     AdaGrad
14 |     '''
15 |     def __init__(self, lr=0.01):
16 |         self.lr = lr
17 |         self.h = None
18 | 
19 |     def update(self, params, grads):
20 |         if self.h is None:
21 |             self.h = []
22 |             for param in params:
23 |                 self.h.append(np.zeros_like(param))
24 | 
25 |         for i in range(len(params)):
26 |             self.h[i] += grads[i] * grads[i]
27 |             params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7)
28 | 
29 | class Adam:
30 |     '''
31 |     Adam (http://arxiv.org/abs/1412.6980v8)
32 |     '''
33 |     def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
34 |         self.lr = lr
35 |         self.beta1 = beta1
36 |         self.beta2 = beta2
37 |         self.iter = 0
38 |         self.m = None
39 |         self.v = None
40 |         
41 |     def update(self, params, grads):
42 |         if self.m is None:
43 |             self.m, self.v = [], []
44 |             for param in params:
45 |                 self.m.append(np.zeros_like(param))
46 |                 self.v.append(np.zeros_like(param))
47 |         
48 |         self.iter += 1
49 |         lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
50 | 
51 |         for i in range(len(params)):
52 |             self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
53 |             self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
54 |             
55 |             params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)
56 | 


--------------------------------------------------------------------------------
/python/common/trainer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | sys.path.append('..')
  4 | import numpy
  5 | import time
  6 | import matplotlib.pyplot as plt
  7 | from common.np import *  # import numpy as np
  8 | from common.util import clip_grads
  9 | 
 10 | 
 11 | class Trainer:
 12 |     def __init__(self, model, optimizer):
 13 |         self.model = model
 14 |         self.optimizer = optimizer
 15 |         self.loss_list = []
 16 |         self.eval_interval = None
 17 |         self.current_epoch = 0
 18 | 
 19 |     def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20):
 20 |         data_size = len(x)
 21 |         max_iters = data_size // batch_size
 22 |         self.eval_interval = eval_interval
 23 |         model, optimizer = self.model, self.optimizer
 24 |         total_loss = 0
 25 |         loss_count = 0
 26 | 
 27 |         start_time = time.time()
 28 |         for epoch in range(max_epoch):
 29 |             # シャッフル
 30 |             idx = numpy.random.permutation(numpy.arange(data_size))
 31 |             x = x[idx]
 32 |             t = t[idx]
 33 | 
 34 |             for iters in range(max_iters):
 35 |                 batch_x = x[iters*batch_size:(iters+1)*batch_size]
 36 |                 batch_t = t[iters*batch_size:(iters+1)*batch_size]
 37 | 
 38 |                 # 勾配を求め、パラメータを更新
 39 |                 loss = model.forward(batch_x, batch_t)
 40 |                 model.backward()
 41 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
 42 |                 if max_grad is not None:
 43 |                     clip_grads(grads, max_grad)
 44 |                 optimizer.update(params, grads)
 45 |                 total_loss += loss
 46 |                 loss_count += 1
 47 | 
 48 |                 # 評価
 49 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
 50 |                     avg_loss = total_loss / loss_count
 51 |                     elapsed_time = time.time() - start_time
 52 |                     print('| epoch %d |  iter %d / %d | time %d[s] | loss %.2f'
 53 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss))
 54 |                     self.loss_list.append(float(avg_loss))
 55 |                     total_loss, loss_count = 0, 0
 56 | 
 57 |             self.current_epoch += 1
 58 | 
 59 |     def plot(self, ylim=None):
 60 |         x = numpy.arange(len(self.loss_list))
 61 |         if ylim is not None:
 62 |             plt.ylim(*ylim)
 63 |         plt.plot(x, self.loss_list, label='train')
 64 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
 65 |         plt.ylabel('loss')
 66 |         plt.show()
 67 | 
 68 | 
 69 | class RnnlmTrainer:
 70 |     def __init__(self, model, optimizer):
 71 |         self.model = model
 72 |         self.optimizer = optimizer
 73 |         self.time_idx = None
 74 |         self.ppl_list = None
 75 |         self.eval_interval = None
 76 |         self.current_epoch = 0
 77 | 
 78 |     def get_batch(self, x, t, batch_size, time_size):
 79 |         batch_x = np.empty((batch_size, time_size), dtype='i')
 80 |         batch_t = np.empty((batch_size, time_size), dtype='i')
 81 | 
 82 |         data_size = len(x)
 83 |         jump = data_size // batch_size
 84 |         offsets = [i * jump for i in range(batch_size)]  # バッチの各サンプルの読み込み開始位置
 85 | 
 86 |         for time in range(time_size):
 87 |             for i, offset in enumerate(offsets):
 88 |                 batch_x[i, time] = x[(offset + self.time_idx) % data_size]
 89 |                 batch_t[i, time] = t[(offset + self.time_idx) % data_size]
 90 |             self.time_idx += 1
 91 |         return batch_x, batch_t
 92 | 
 93 |     def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35,
 94 |             max_grad=None, eval_interval=20):
 95 |         data_size = len(xs)
 96 |         max_iters = data_size // (batch_size * time_size)
 97 |         self.time_idx = 0
 98 |         self.ppl_list = []
 99 |         self.eval_interval = eval_interval
100 |         model, optimizer = self.model, self.optimizer
101 |         total_loss = 0
102 |         loss_count = 0
103 | 
104 |         start_time = time.time()
105 |         for epoch in range(max_epoch):
106 |             for iters in range(max_iters):
107 |                 batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size)
108 | 
109 |                 # 勾配を求め、パラメータを更新
110 |                 loss = model.forward(batch_x, batch_t)
111 |                 model.backward()
112 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
113 |                 if max_grad is not None:
114 |                     clip_grads(grads, max_grad)
115 |                 optimizer.update(params, grads)
116 |                 total_loss += loss
117 |                 loss_count += 1
118 | 
119 |                 # パープレキシティの評価
120 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
121 |                     ppl = np.exp(total_loss / loss_count)
122 |                     elapsed_time = time.time() - start_time
123 |                     print('| epoch %d |  iter %d / %d | time %d[s] | perplexity %.2f'
124 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl))
125 |                     self.ppl_list.append(float(ppl))
126 |                     total_loss, loss_count = 0, 0
127 | 
128 |             self.current_epoch += 1
129 | 
130 |     def plot(self, ylim=None):
131 |         x = numpy.arange(len(self.ppl_list))
132 |         if ylim is not None:
133 |             plt.ylim(*ylim)
134 |         plt.plot(x, self.ppl_list, label='train')
135 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
136 |         plt.ylabel('perplexity')
137 |         plt.show()
138 | 
139 | 
140 | def remove_duplicate(params, grads):
141 |     '''
142 |     パラメータ配列中の重複する重みをひとつに集約し、
143 |     その重みに対応する勾配を加算する
144 |     '''
145 |     params, grads = params[:], grads[:]  # copy list
146 | 
147 |     while True:
148 |         find_flg = False
149 |         L = len(params)
150 | 
151 |         for i in range(0, L - 1):
152 |             for j in range(i + 1, L):
153 |                 # 重みを共有する場合
154 |                 if params[i] is params[j]:
155 |                     grads[i] += grads[j]  # 勾配の加算
156 |                     find_flg = True
157 |                     params.pop(j)
158 |                     grads.pop(j)
159 |                 # 転置行列として重みを共有する場合（weight tying）
160 |                 elif params[i].ndim == 2 and params[j].ndim == 2 and \
161 |                      params[i].T.shape == params[j].shape and np.all(params[i].T == params[j]):
162 |                     grads[i] += grads[j].T
163 |                     find_flg = True
164 |                     params.pop(j)
165 |                     grads.pop(j)
166 | 
167 |                 if find_flg: break
168 |             if find_flg: break
169 | 
170 |         if not find_flg: break
171 | 
172 |     return params, grads
173 | 


--------------------------------------------------------------------------------
/python/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/dataset/__init__.py


--------------------------------------------------------------------------------
/python/dataset/ptb.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import os
  4 | sys.path.append('..')
  5 | try:
  6 |     import urllib.request
  7 | except ImportError:
  8 |     raise ImportError('Use Python3!')
  9 | import pickle
 10 | import numpy as np
 11 | 
 12 | 
 13 | url_base = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/'
 14 | key_file = {
 15 |     'train':'ptb.train.txt',
 16 |     'test':'ptb.test.txt',
 17 |     'valid':'ptb.valid.txt'
 18 | }
 19 | save_file = {
 20 |     'train':'ptb.train.npy',
 21 |     'test':'ptb.test.npy',
 22 |     'valid':'ptb.valid.npy'
 23 | }
 24 | vocab_file = 'ptb.vocab.pkl'
 25 | 
 26 | dataset_dir = os.path.dirname(os.path.abspath(__file__))
 27 | 
 28 | 
 29 | def _download(file_name):
 30 |     file_path = dataset_dir + '/' + file_name
 31 |     if os.path.exists(file_path):
 32 |         return
 33 | 
 34 |     print('Downloading ' + file_name + ' ... ')
 35 | 
 36 |     try:
 37 |         urllib.request.urlretrieve(url_base + file_name, file_path)
 38 |     except urllib.error.URLError:
 39 |         import ssl
 40 |         ssl._create_default_https_context = ssl._create_unverified_context
 41 |         urllib.request.urlretrieve(url_base + file_name, file_path)
 42 | 
 43 |     print('Done')
 44 | 
 45 | 
 46 | def load_vocab():
 47 |     vocab_path = dataset_dir + '/' + vocab_file
 48 | 
 49 |     if os.path.exists(vocab_path):
 50 |         with open(vocab_path, 'rb') as f:
 51 |             word_to_id, id_to_word = pickle.load(f)
 52 |         return word_to_id, id_to_word
 53 | 
 54 |     word_to_id = {}
 55 |     id_to_word = {}
 56 |     data_type = 'train'
 57 |     file_name = key_file[data_type]
 58 |     file_path = dataset_dir + '/' + file_name
 59 | 
 60 |     _download(file_name)
 61 | 
 62 |     words = open(file_path).read().replace('\n', '<eos>').strip().split()
 63 | 
 64 |     for i, word in enumerate(words):
 65 |         if word not in word_to_id:
 66 |             tmp_id = len(word_to_id)
 67 |             word_to_id[word] = tmp_id
 68 |             id_to_word[tmp_id] = word
 69 | 
 70 |     with open(vocab_path, 'wb') as f:
 71 |         pickle.dump((word_to_id, id_to_word), f)
 72 | 
 73 |     return word_to_id, id_to_word
 74 | 
 75 | 
 76 | def load_data(data_type='train'):
 77 |     '''
 78 |         :param data_type: データの種類：'train' or 'test' or 'valid (val)'
 79 |         :return:
 80 |     '''
 81 |     if data_type == 'val': data_type = 'valid'
 82 |     save_path = dataset_dir + '/' + save_file[data_type]
 83 | 
 84 |     word_to_id, id_to_word = load_vocab()
 85 | 
 86 |     if os.path.exists(save_path):
 87 |         corpus = np.load(save_path)
 88 |         return corpus, word_to_id, id_to_word
 89 | 
 90 |     file_name = key_file[data_type]
 91 |     file_path = dataset_dir + '/' + file_name
 92 |     _download(file_name)
 93 | 
 94 |     words = open(file_path).read().replace('\n', '<eos>').strip().split()
 95 |     corpus = np.array([word_to_id[w] for w in words])
 96 | 
 97 |     np.save(save_path, corpus)
 98 |     return corpus, word_to_id, id_to_word
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     for data_type in ('train', 'val', 'test'):
103 |         load_data(data_type)
104 | 


--------------------------------------------------------------------------------
/python/dataset/sequence.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')
 4 | import os
 5 | import numpy
 6 | 
 7 | 
 8 | id_to_char = {}
 9 | char_to_id = {}
10 | 
11 | 
12 | def _update_vocab(txt):
13 |     chars = list(txt)
14 | 
15 |     for i, char in enumerate(chars):
16 |         if char not in char_to_id:
17 |             tmp_id = len(char_to_id)
18 |             char_to_id[char] = tmp_id
19 |             id_to_char[tmp_id] = char
20 | 
21 | 
22 | def load_data(file_name='addition.txt', seed=1984):
23 |     file_path = os.path.dirname(os.path.abspath(__file__)) + '/' + file_name
24 | 
25 |     if not os.path.exists(file_path):
26 |         print('No file: %s' % file_name)
27 |         return None
28 | 
29 |     questions, answers = [], []
30 | 
31 |     for line in open(file_path, 'r'):
32 |         idx = line.find('_')
33 |         questions.append(line[:idx])
34 |         answers.append(line[idx:-1])
35 | 
36 |     # create vocab dict
37 |     for i in range(len(questions)):
38 |         q, a = questions[i], answers[i]
39 |         _update_vocab(q)
40 |         _update_vocab(a)
41 | 
42 |     # create numpy array
43 |     x = numpy.zeros((len(questions), len(questions[0])), dtype=numpy.int)
44 |     t = numpy.zeros((len(questions), len(answers[0])), dtype=numpy.int)
45 | 
46 |     for i, sentence in enumerate(questions):
47 |         x[i] = [char_to_id[c] for c in list(sentence)]
48 |     for i, sentence in enumerate(answers):
49 |         t[i] = [char_to_id[c] for c in list(sentence)]
50 | 
51 |     # shuffle
52 |     indices = numpy.arange(len(x))
53 |     if seed is not None:
54 |         numpy.random.seed(seed)
55 |     numpy.random.shuffle(indices)
56 |     x = x[indices]
57 |     t = t[indices]
58 | 
59 |     # 10% for validation set
60 |     split_at = len(x) - len(x) // 10
61 |     (x_train, x_test) = x[:split_at], x[split_at:]
62 |     (t_train, t_test) = t[:split_at], t[split_at:]
63 | 
64 |     return (x_train, t_train), (x_test, t_test)
65 | 
66 | 
67 | def get_vocab():
68 |     return char_to_id, id_to_char
69 | 


--------------------------------------------------------------------------------
/python/dataset/spiral.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_data(seed=1984):
 6 |     np.random.seed(seed)
 7 |     N = 100  # クラスごとのサンプル数
 8 |     DIM = 2  # データの要素数
 9 |     CLS_NUM = 3  # クラス数
10 | 
11 |     x = np.zeros((N*CLS_NUM, DIM))
12 |     t = np.zeros((N*CLS_NUM, CLS_NUM), dtype=np.int)
13 | 
14 |     for j in range(CLS_NUM):
15 |         for i in range(N):#N*j, N*(j+1)):
16 |             rate = i / N
17 |             radius = 1.0*rate
18 |             theta = j*4.0 + 4.0*rate + np.random.randn()*0.2
19 | 
20 |             ix = N*j + i
21 |             x[ix] = np.array([radius*np.sin(theta),
22 |                               radius*np.cos(theta)]).flatten()
23 |             t[ix, j] = 1
24 | 
25 |     return x, t
26 | 


--------------------------------------------------------------------------------
/python/memo.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ## このディレクトリについて
 3 | 
 4 | * 「ゼロから作るDeep Learning 2 自然言語編 読書会」のサンプルをPythonで実装してみるチームの作業ディレクトリです。
 5 | 
 6 | ## メモ
 7 | 
 8 | * 環境はUbuntu 16.04.3 LTSを使用
 9 | * PythonはPython 3.5.2
10 | * 自前実装しつつ、一部は教科書のリポジトリから持ってきている
11 |   * 教科書から持ってきたものは upstream/ ディレクトリ以下に置いてある
12 | 
13 | ## ToDo
14 | - [X]ディレクトリを作る
15 | - [X]numpyのインストール
16 | - [ ]何を作るか確認する
17 |   - [X]シグモイド関数(P.13)
18 |   - [X]シグモイド関数をクラス化(P.15)
19 |   - [X]Affineレイヤの実装(P.15) 
20 |   - [X]TwoLayerNetの作成(P.17)
21 |   - [x]MatMulの作成(p.33)
22 |   - [x]Sigmoidにbackwardを追加する(p.36)
23 |   - [x]Affineにbackwardを追加する(p.37)
24 |   - [x]Softmax with Lossレイヤの確認(p.38)
25 |   - [x]SGDを作成する(p.40)
26 |   - [x]ニューラルネットワークの実装(p.43)
27 | - [ ]
28 | 
29 | ## 開発環境の構築
30 | 
31 | ```
32 | $ sudo apt-get install -y python3-numpy
33 | $ 
34 | $ mkdir python
35 | 
36 | $ # numpyが入っているかどうかの確認
37 | $ # エラーが出なければOK
38 | $ python3
39 | Python 3.5.2 (default, Nov 12 2018, 13:43:14)
40 | [GCC 5.4.0 20160609] on linux
41 | Type "help", "copyright", "credits" or "license" for more information.
42 | >>> import numpy
43 | >>>
44 | ```
45 | 
46 | 


--------------------------------------------------------------------------------
/python/upstream/ch01/forward_net.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Sigmoid:
 6 |     def __init__(self):
 7 |         self.params = []
 8 | 
 9 |     def forward(self, x):
10 |         return 1 / (1 + np.exp(-x))
11 | 
12 | 
13 | class Affine:
14 |     def __init__(self, W, b):
15 |         self.params = [W, b]
16 | 
17 |     def forward(self, x):
18 |         W, b = self.params
19 |         out = np.dot(x, W) + b
20 |         return out
21 | 
22 | 
23 | class TwoLayerNet:
24 |     def __init__(self, input_size, hidden_size, output_size):
25 |         I, H, O = input_size, hidden_size, output_size
26 | 
27 |         # 重みとバイアスの初期化
28 |         W1 = np.random.randn(I, H)
29 |         b1 = np.random.randn(H)
30 |         W2 = np.random.randn(H, O)
31 |         b2 = np.random.randn(O)
32 | 
33 |         # レイヤの生成
34 |         self.layers = [
35 |             Affine(W1, b1),
36 |             Sigmoid(),
37 |             Affine(W2, b2)
38 |         ]
39 | 
40 |         # すべての重みをリストにまとめる
41 |         self.params = []
42 |         for layer in self.layers:
43 |             self.params += layer.params
44 | 
45 |     def predict(self, x):
46 |         for layer in self.layers:
47 |             x = layer.forward(x)
48 |         return x
49 | 
50 | 
51 | x = np.random.randn(10, 2)
52 | model = TwoLayerNet(2, 4, 3)
53 | s = model.predict(x)
54 | print(s)
55 | 


--------------------------------------------------------------------------------
/python/upstream/ch01/show_spiral_dataset.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')  # 親ディレクトリのファイルをインポートするための設定
 4 | from dataset import spiral
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | x, t = spiral.load_data()
 9 | print('x', x.shape)  # (300, 2)
10 | print('t', t.shape)  # (300, 3)
11 | 
12 | # データ点のプロット
13 | N = 100
14 | CLS_NUM = 3
15 | markers = ['o', 'x', '^']
16 | for i in range(CLS_NUM):
17 |     plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
18 | plt.show()
19 | 


--------------------------------------------------------------------------------
/python/upstream/ch01/train.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')  # 親ディレクトリのファイルをインポートするための設定
 4 | from common.optimizer import SGD
 5 | from common.trainer import Trainer
 6 | from dataset import spiral
 7 | from two_layer_net import TwoLayerNet
 8 | 
 9 | 
10 | # ハイパーパラメータの設定
11 | max_epoch = 300
12 | batch_size = 30
13 | hidden_size = 10
14 | learning_rate = 1.0
15 | 
16 | x, t = spiral.load_data()
17 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
18 | optimizer = SGD(lr=learning_rate)
19 | 
20 | trainer = Trainer(model, optimizer)
21 | trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
22 | trainer.plot()
23 | 


--------------------------------------------------------------------------------
/python/upstream/ch01/train_custom_loop.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')  # 親ディレクトリのファイルをインポートするための設定
 4 | import numpy as np
 5 | from common.optimizer import SGD
 6 | from dataset import spiral
 7 | import matplotlib.pyplot as plt
 8 | from two_layer_net import TwoLayerNet
 9 | 
10 | 
11 | # ハイパーパラメータの設定
12 | max_epoch = 300
13 | batch_size = 30
14 | hidden_size = 10
15 | learning_rate = 1.0
16 | 
17 | x, t = spiral.load_data()
18 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
19 | optimizer = SGD(lr=learning_rate)
20 | 
21 | # 学習で使用する変数
22 | data_size = len(x)
23 | max_iters = data_size // batch_size
24 | total_loss = 0
25 | loss_count = 0
26 | loss_list = []
27 | 
28 | for epoch in range(max_epoch):
29 |     # データのシャッフル
30 |     idx = np.random.permutation(data_size)
31 |     x = x[idx]
32 |     t = t[idx]
33 | 
34 |     for iters in range(max_iters):
35 |         batch_x = x[iters*batch_size:(iters+1)*batch_size]
36 |         batch_t = t[iters*batch_size:(iters+1)*batch_size]
37 | 
38 |         # 勾配を求め、パラメータを更新
39 |         loss = model.forward(batch_x, batch_t)
40 |         model.backward()
41 |         optimizer.update(model.params, model.grads)
42 | 
43 |         total_loss += loss
44 |         loss_count += 1
45 | 
46 |         # 定期的に学習経過を出力
47 |         if (iters+1) % 10 == 0:
48 |             avg_loss = total_loss / loss_count
49 |             print('| epoch %d |  iter %d / %d | loss %.2f'
50 |                   % (epoch + 1, iters + 1, max_iters, avg_loss))
51 |             loss_list.append(avg_loss)
52 |             total_loss, loss_count = 0, 0
53 | 
54 | 
55 | # 学習結果のプロット
56 | plt.plot(np.arange(len(loss_list)), loss_list, label='train')
57 | plt.xlabel('iterations (x10)')
58 | plt.ylabel('loss')
59 | plt.show()
60 | 
61 | # 境界領域のプロット
62 | h = 0.001
63 | x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1
64 | y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1
65 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
66 | X = np.c_[xx.ravel(), yy.ravel()]
67 | score = model.predict(X)
68 | predict_cls = np.argmax(score, axis=1)
69 | Z = predict_cls.reshape(xx.shape)
70 | plt.contourf(xx, yy, Z)
71 | plt.axis('off')
72 | 
73 | # データ点のプロット
74 | x, t = spiral.load_data()
75 | N = 100
76 | CLS_NUM = 3
77 | markers = ['o', 'x', '^']
78 | for i in range(CLS_NUM):
79 |     plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
80 | plt.show()
81 | 


--------------------------------------------------------------------------------
/python/upstream/ch01/two_layer_net.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')  # 親ディレクトリのファイルをインポートするための設定
 4 | import numpy as np
 5 | from common.layers import Affine, Sigmoid, SoftmaxWithLoss
 6 | 
 7 | 
 8 | class TwoLayerNet:
 9 |     def __init__(self, input_size, hidden_size, output_size):
10 |         I, H, O = input_size, hidden_size, output_size
11 | 
12 |         # 重みとバイアスの初期化
13 |         W1 = 0.01 * np.random.randn(I, H)
14 |         b1 = np.zeros(H)
15 |         W2 = 0.01 * np.random.randn(H, O)
16 |         b2 = np.zeros(O)
17 | 
18 |         # レイヤの生成
19 |         self.layers = [
20 |             Affine(W1, b1),
21 |             Sigmoid(),
22 |             Affine(W2, b2)
23 |         ]
24 |         self.loss_layer = SoftmaxWithLoss()
25 | 
26 |         # すべての重みと勾配をリストにまとめる
27 |         self.params, self.grads = [], []
28 |         for layer in self.layers:
29 |             self.params += layer.params
30 |             self.grads += layer.grads
31 | 
32 |     def predict(self, x):
33 |         for layer in self.layers:
34 |             x = layer.forward(x)
35 |         return x
36 | 
37 |     def forward(self, x, t):
38 |         score = self.predict(x)
39 |         loss = self.loss_layer.forward(score, t)
40 |         return loss
41 | 
42 |     def backward(self, dout=1):
43 |         dout = self.loss_layer.backward(dout)
44 |         for layer in reversed(self.layers):
45 |             dout = layer.backward(dout)
46 |         return dout
47 | 


--------------------------------------------------------------------------------
/python/upstream/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/upstream/common/__init__.py


--------------------------------------------------------------------------------
/python/upstream/common/base_model.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')
 4 | import os
 5 | import pickle
 6 | from common.np import *
 7 | from common.util import to_gpu, to_cpu
 8 | 
 9 | 
10 | class BaseModel:
11 |     def __init__(self):
12 |         self.params, self.grads = None, None
13 | 
14 |     def forward(self, *args):
15 |         raise NotImplementedError
16 | 
17 |     def backward(self, *args):
18 |         raise NotImplementedError
19 | 
20 |     def save_params(self, file_name=None):
21 |         if file_name is None:
22 |             file_name = self.__class__.__name__ + '.pkl'
23 | 
24 |         params = [p.astype(np.float16) for p in self.params]
25 |         if GPU:
26 |             params = [to_cpu(p) for p in params]
27 | 
28 |         with open(file_name, 'wb') as f:
29 |             pickle.dump(params, f)
30 | 
31 |     def load_params(self, file_name=None):
32 |         if file_name is None:
33 |             file_name = self.__class__.__name__ + '.pkl'
34 | 
35 |         if '/' in file_name:
36 |             file_name = file_name.replace('/', os.sep)
37 | 
38 |         if not os.path.exists(file_name):
39 |             raise IOError('No file: ' + file_name)
40 | 
41 |         with open(file_name, 'rb') as f:
42 |             params = pickle.load(f)
43 | 
44 |         params = [p.astype('f') for p in params]
45 |         if GPU:
46 |             params = [to_gpu(p) for p in params]
47 | 
48 |         for i, param in enumerate(self.params):
49 |             param[...] = params[i]
50 | 


--------------------------------------------------------------------------------
/python/upstream/common/config.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | GPU = False
4 | 


--------------------------------------------------------------------------------
/python/upstream/common/functions.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from common.np import *
 3 | 
 4 | 
 5 | def sigmoid(x):
 6 |     return 1 / (1 + np.exp(-x))
 7 | 
 8 | 
 9 | def relu(x):
10 |     return np.maximum(0, x)
11 | 
12 | 
13 | def softmax(x):
14 |     if x.ndim == 2:
15 |         x = x - x.max(axis=1, keepdims=True)
16 |         x = np.exp(x)
17 |         x /= x.sum(axis=1, keepdims=True)
18 |     elif x.ndim == 1:
19 |         x = x - np.max(x)
20 |         x = np.exp(x) / np.sum(np.exp(x))
21 | 
22 |     return x
23 | 
24 | 
25 | def cross_entropy_error(y, t):
26 |     if y.ndim == 1:
27 |         t = t.reshape(1, t.size)
28 |         y = y.reshape(1, y.size)
29 |         
30 |     # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
31 |     if t.size == y.size:
32 |         t = t.argmax(axis=1)
33 |              
34 |     batch_size = y.shape[0]
35 | 
36 |     return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
37 | 


--------------------------------------------------------------------------------
/python/upstream/common/layers.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from common.np import *  # import numpy as np
  3 | from common.config import GPU
  4 | from common.functions import softmax, cross_entropy_error
  5 | 
  6 | 
  7 | class MatMul:
  8 |     def __init__(self, W):
  9 |         self.params = [W]
 10 |         self.grads = [np.zeros_like(W)]
 11 |         self.x = None
 12 | 
 13 |     def forward(self, x):
 14 |         W, = self.params
 15 |         out = np.dot(x, W)
 16 |         self.x = x
 17 |         return out
 18 | 
 19 |     def backward(self, dout):
 20 |         W, = self.params
 21 |         dx = np.dot(dout, W.T)
 22 |         dW = np.dot(self.x.T, dout)
 23 |         self.grads[0][...] = dW
 24 |         return dx
 25 | 
 26 | 
 27 | class Affine:
 28 |     def __init__(self, W, b):
 29 |         self.params = [W, b]
 30 |         self.grads = [np.zeros_like(W), np.zeros_like(b)]
 31 |         self.x = None
 32 | 
 33 |     def forward(self, x):
 34 |         W, b = self.params
 35 |         out = np.dot(x, W) + b
 36 |         self.x = x
 37 |         return out
 38 | 
 39 |     def backward(self, dout):
 40 |         W, b = self.params
 41 |         dx = np.dot(dout, W.T)
 42 |         dW = np.dot(self.x.T, dout)
 43 |         db = np.sum(dout, axis=0)
 44 | 
 45 |         self.grads[0][...] = dW
 46 |         self.grads[1][...] = db
 47 |         return dx
 48 | 
 49 | 
 50 | class Softmax:
 51 |     def __init__(self):
 52 |         self.params, self.grads = [], []
 53 |         self.out = None
 54 | 
 55 |     def forward(self, x):
 56 |         self.out = softmax(x)
 57 |         return self.out
 58 | 
 59 |     def backward(self, dout):
 60 |         dx = self.out * dout
 61 |         sumdx = np.sum(dx, axis=1, keepdims=True)
 62 |         dx -= self.out * sumdx
 63 |         return dx
 64 | 
 65 | 
 66 | class SoftmaxWithLoss:
 67 |     def __init__(self):
 68 |         self.params, self.grads = [], []
 69 |         self.y = None  # softmaxの出力
 70 |         self.t = None  # 教師ラベル
 71 | 
 72 |     def forward(self, x, t):
 73 |         self.t = t
 74 |         self.y = softmax(x)
 75 | 
 76 |         # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
 77 |         if self.t.size == self.y.size:
 78 |             self.t = self.t.argmax(axis=1)
 79 | 
 80 |         loss = cross_entropy_error(self.y, self.t)
 81 |         return loss
 82 | 
 83 |     def backward(self, dout=1):
 84 |         batch_size = self.t.shape[0]
 85 | 
 86 |         dx = self.y.copy()
 87 |         dx[np.arange(batch_size), self.t] -= 1
 88 |         dx *= dout
 89 |         dx = dx / batch_size
 90 | 
 91 |         return dx
 92 | 
 93 | 
 94 | class Sigmoid:
 95 |     def __init__(self):
 96 |         self.params, self.grads = [], []
 97 |         self.out = None
 98 | 
 99 |     def forward(self, x):
100 |         out = 1 / (1 + np.exp(-x))
101 |         self.out = out
102 |         return out
103 | 
104 |     def backward(self, dout):
105 |         dx = dout * (1.0 - self.out) * self.out
106 |         return dx
107 | 
108 | 
109 | class SigmoidWithLoss:
110 |     def __init__(self):
111 |         self.params, self.grads = [], []
112 |         self.loss = None
113 |         self.y = None  # sigmoidの出力
114 |         self.t = None  # 教師データ
115 | 
116 |     def forward(self, x, t):
117 |         self.t = t
118 |         self.y = 1 / (1 + np.exp(-x))
119 | 
120 |         self.loss = cross_entropy_error(np.c_[1 - self.y, self.y], self.t)
121 | 
122 |         return self.loss
123 | 
124 |     def backward(self, dout=1):
125 |         batch_size = self.t.shape[0]
126 | 
127 |         dx = (self.y - self.t) * dout / batch_size
128 |         return dx
129 | 
130 | 
131 | class Dropout:
132 |     '''
133 |     http://arxiv.org/abs/1207.0580
134 |     '''
135 |     def __init__(self, dropout_ratio=0.5):
136 |         self.params, self.grads = [], []
137 |         self.dropout_ratio = dropout_ratio
138 |         self.mask = None
139 | 
140 |     def forward(self, x, train_flg=True):
141 |         if train_flg:
142 |             self.mask = np.random.rand(*x.shape) > self.dropout_ratio
143 |             return x * self.mask
144 |         else:
145 |             return x * (1.0 - self.dropout_ratio)
146 | 
147 |     def backward(self, dout):
148 |         return dout * self.mask
149 | 
150 | 
151 | class Embedding:
152 |     def __init__(self, W):
153 |         self.params = [W]
154 |         self.grads = [np.zeros_like(W)]
155 |         self.idx = None
156 | 
157 |     def forward(self, idx):
158 |         W, = self.params
159 |         self.idx = idx
160 |         out = W[idx]
161 |         return out
162 | 
163 |     def backward(self, dout):
164 |         dW, = self.grads
165 |         dW[...] = 0
166 |         np.add.at(dW, self.idx, dout)
167 |         return None
168 | 


--------------------------------------------------------------------------------
/python/upstream/common/np.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from common.config import GPU
 3 | 
 4 | 
 5 | if GPU:
 6 |     import cupy as np
 7 |     np.cuda.set_allocator(np.cuda.MemoryPool().malloc)
 8 |     np.add.at = np.scatter_add
 9 | 
10 |     print('\033[92m' + '-' * 60 + '\033[0m')
11 |     print(' ' * 23 + '\033[92mGPU Mode (cupy)\033[0m')
12 |     print('\033[92m' + '-' * 60 + '\033[0m\n')
13 | else:
14 |     import numpy as np
15 | 


--------------------------------------------------------------------------------
/python/upstream/common/optimizer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | sys.path.append('..')
  4 | from common.np import *
  5 | 
  6 | 
  7 | class SGD:
  8 |     '''
  9 |     確率的勾配降下法（Stochastic Gradient Descent）
 10 |     '''
 11 |     def __init__(self, lr=0.01):
 12 |         self.lr = lr
 13 |         
 14 |     def update(self, params, grads):
 15 |         for i in range(len(params)):
 16 |             params[i] -= self.lr * grads[i]
 17 | 
 18 | 
 19 | class Momentum:
 20 |     '''
 21 |     Momentum SGD
 22 |     '''
 23 |     def __init__(self, lr=0.01, momentum=0.9):
 24 |         self.lr = lr
 25 |         self.momentum = momentum
 26 |         self.v = None
 27 |         
 28 |     def update(self, params, grads):
 29 |         if self.v is None:
 30 |             self.v = []
 31 |             for param in params:
 32 |                 self.v.append(np.zeros_like(param))
 33 | 
 34 |         for i in range(len(params)):
 35 |             self.v[i] = self.momentum * self.v[i] - self.lr * grads[i]
 36 |             params[i] += self.v[i]
 37 | 
 38 | 
 39 | class Nesterov:
 40 |     '''
 41 |     Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)
 42 |     '''
 43 |     def __init__(self, lr=0.01, momentum=0.9):
 44 |         self.lr = lr
 45 |         self.momentum = momentum
 46 |         self.v = None
 47 |         
 48 |     def update(self, params, grads):
 49 |         if self.v is None:
 50 |             self.v = []
 51 |             for param in params:
 52 |                 self.v.append(np.zeros_like(param))
 53 | 
 54 |         for i in range(len(params)):
 55 |             self.v[i] *= self.momentum
 56 |             self.v[i] -= self.lr * grads[i]
 57 |             params[i] += self.momentum * self.momentum * self.v[i]
 58 |             params[i] -= (1 + self.momentum) * self.lr * grads[i]
 59 | 
 60 | 
 61 | class AdaGrad:
 62 |     '''
 63 |     AdaGrad
 64 |     '''
 65 |     def __init__(self, lr=0.01):
 66 |         self.lr = lr
 67 |         self.h = None
 68 |         
 69 |     def update(self, params, grads):
 70 |         if self.h is None:
 71 |             self.h = []
 72 |             for param in params:
 73 |                 self.h.append(np.zeros_like(param))
 74 | 
 75 |         for i in range(len(params)):
 76 |             self.h[i] += grads[i] * grads[i]
 77 |             params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7)
 78 | 
 79 | 
 80 | class RMSprop:
 81 |     '''
 82 |     RMSprop
 83 |     '''
 84 |     def __init__(self, lr=0.01, decay_rate = 0.99):
 85 |         self.lr = lr
 86 |         self.decay_rate = decay_rate
 87 |         self.h = None
 88 |         
 89 |     def update(self, params, grads):
 90 |         if self.h is None:
 91 |             self.h = []
 92 |             for param in params:
 93 |                 self.h.append(np.zeros_like(param))
 94 | 
 95 |         for i in range(len(params)):
 96 |             self.h[i] *= self.decay_rate
 97 |             self.h[i] += (1 - self.decay_rate) * grads[i] * grads[i]
 98 |             params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7)
 99 | 
100 | 
101 | class Adam:
102 |     '''
103 |     Adam (http://arxiv.org/abs/1412.6980v8)
104 |     '''
105 |     def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
106 |         self.lr = lr
107 |         self.beta1 = beta1
108 |         self.beta2 = beta2
109 |         self.iter = 0
110 |         self.m = None
111 |         self.v = None
112 |         
113 |     def update(self, params, grads):
114 |         if self.m is None:
115 |             self.m, self.v = [], []
116 |             for param in params:
117 |                 self.m.append(np.zeros_like(param))
118 |                 self.v.append(np.zeros_like(param))
119 |         
120 |         self.iter += 1
121 |         lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
122 | 
123 |         for i in range(len(params)):
124 |             self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
125 |             self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
126 |             
127 |             params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)
128 | 


--------------------------------------------------------------------------------
/python/upstream/common/trainer.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | sys.path.append('..')
  4 | import numpy
  5 | import time
  6 | import matplotlib.pyplot as plt
  7 | from common.np import *  # import numpy as np
  8 | from common.util import clip_grads
  9 | 
 10 | 
 11 | class Trainer:
 12 |     def __init__(self, model, optimizer):
 13 |         self.model = model
 14 |         self.optimizer = optimizer
 15 |         self.loss_list = []
 16 |         self.eval_interval = None
 17 |         self.current_epoch = 0
 18 | 
 19 |     def fit(self, x, t, max_epoch=10, batch_size=32, max_grad=None, eval_interval=20):
 20 |         data_size = len(x)
 21 |         max_iters = data_size // batch_size
 22 |         self.eval_interval = eval_interval
 23 |         model, optimizer = self.model, self.optimizer
 24 |         total_loss = 0
 25 |         loss_count = 0
 26 | 
 27 |         start_time = time.time()
 28 |         for epoch in range(max_epoch):
 29 |             # シャッフル
 30 |             idx = numpy.random.permutation(numpy.arange(data_size))
 31 |             x = x[idx]
 32 |             t = t[idx]
 33 | 
 34 |             for iters in range(max_iters):
 35 |                 batch_x = x[iters*batch_size:(iters+1)*batch_size]
 36 |                 batch_t = t[iters*batch_size:(iters+1)*batch_size]
 37 | 
 38 |                 # 勾配を求め、パラメータを更新
 39 |                 loss = model.forward(batch_x, batch_t)
 40 |                 model.backward()
 41 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
 42 |                 if max_grad is not None:
 43 |                     clip_grads(grads, max_grad)
 44 |                 optimizer.update(params, grads)
 45 |                 total_loss += loss
 46 |                 loss_count += 1
 47 | 
 48 |                 # 評価
 49 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
 50 |                     avg_loss = total_loss / loss_count
 51 |                     elapsed_time = time.time() - start_time
 52 |                     print('| epoch %d |  iter %d / %d | time %d[s] | loss %.2f'
 53 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, avg_loss))
 54 |                     self.loss_list.append(float(avg_loss))
 55 |                     total_loss, loss_count = 0, 0
 56 | 
 57 |             self.current_epoch += 1
 58 | 
 59 |     def plot(self, ylim=None):
 60 |         x = numpy.arange(len(self.loss_list))
 61 |         if ylim is not None:
 62 |             plt.ylim(*ylim)
 63 |         plt.plot(x, self.loss_list, label='train')
 64 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
 65 |         plt.ylabel('loss')
 66 |         plt.show()
 67 | 
 68 | 
 69 | class RnnlmTrainer:
 70 |     def __init__(self, model, optimizer):
 71 |         self.model = model
 72 |         self.optimizer = optimizer
 73 |         self.time_idx = None
 74 |         self.ppl_list = None
 75 |         self.eval_interval = None
 76 |         self.current_epoch = 0
 77 | 
 78 |     def get_batch(self, x, t, batch_size, time_size):
 79 |         batch_x = np.empty((batch_size, time_size), dtype='i')
 80 |         batch_t = np.empty((batch_size, time_size), dtype='i')
 81 | 
 82 |         data_size = len(x)
 83 |         jump = data_size // batch_size
 84 |         offsets = [i * jump for i in range(batch_size)]  # バッチの各サンプルの読み込み開始位置
 85 | 
 86 |         for time in range(time_size):
 87 |             for i, offset in enumerate(offsets):
 88 |                 batch_x[i, time] = x[(offset + self.time_idx) % data_size]
 89 |                 batch_t[i, time] = t[(offset + self.time_idx) % data_size]
 90 |             self.time_idx += 1
 91 |         return batch_x, batch_t
 92 | 
 93 |     def fit(self, xs, ts, max_epoch=10, batch_size=20, time_size=35,
 94 |             max_grad=None, eval_interval=20):
 95 |         data_size = len(xs)
 96 |         max_iters = data_size // (batch_size * time_size)
 97 |         self.time_idx = 0
 98 |         self.ppl_list = []
 99 |         self.eval_interval = eval_interval
100 |         model, optimizer = self.model, self.optimizer
101 |         total_loss = 0
102 |         loss_count = 0
103 | 
104 |         start_time = time.time()
105 |         for epoch in range(max_epoch):
106 |             for iters in range(max_iters):
107 |                 batch_x, batch_t = self.get_batch(xs, ts, batch_size, time_size)
108 | 
109 |                 # 勾配を求め、パラメータを更新
110 |                 loss = model.forward(batch_x, batch_t)
111 |                 model.backward()
112 |                 params, grads = remove_duplicate(model.params, model.grads)  # 共有された重みを1つに集約
113 |                 if max_grad is not None:
114 |                     clip_grads(grads, max_grad)
115 |                 optimizer.update(params, grads)
116 |                 total_loss += loss
117 |                 loss_count += 1
118 | 
119 |                 # パープレキシティの評価
120 |                 if (eval_interval is not None) and (iters % eval_interval) == 0:
121 |                     ppl = np.exp(total_loss / loss_count)
122 |                     elapsed_time = time.time() - start_time
123 |                     print('| epoch %d |  iter %d / %d | time %d[s] | perplexity %.2f'
124 |                           % (self.current_epoch + 1, iters + 1, max_iters, elapsed_time, ppl))
125 |                     self.ppl_list.append(float(ppl))
126 |                     total_loss, loss_count = 0, 0
127 | 
128 |             self.current_epoch += 1
129 | 
130 |     def plot(self, ylim=None):
131 |         x = numpy.arange(len(self.ppl_list))
132 |         if ylim is not None:
133 |             plt.ylim(*ylim)
134 |         plt.plot(x, self.ppl_list, label='train')
135 |         plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
136 |         plt.ylabel('perplexity')
137 |         plt.show()
138 | 
139 | 
140 | def remove_duplicate(params, grads):
141 |     '''
142 |     パラメータ配列中の重複する重みをひとつに集約し、
143 |     その重みに対応する勾配を加算する
144 |     '''
145 |     params, grads = params[:], grads[:]  # copy list
146 | 
147 |     while True:
148 |         find_flg = False
149 |         L = len(params)
150 | 
151 |         for i in range(0, L - 1):
152 |             for j in range(i + 1, L):
153 |                 # 重みを共有する場合
154 |                 if params[i] is params[j]:
155 |                     grads[i] += grads[j]  # 勾配の加算
156 |                     find_flg = True
157 |                     params.pop(j)
158 |                     grads.pop(j)
159 |                 # 転置行列として重みを共有する場合（weight tying）
160 |                 elif params[i].ndim == 2 and params[j].ndim == 2 and \
161 |                      params[i].T.shape == params[j].shape and np.all(params[i].T == params[j]):
162 |                     grads[i] += grads[j].T
163 |                     find_flg = True
164 |                     params.pop(j)
165 |                     grads.pop(j)
166 | 
167 |                 if find_flg: break
168 |             if find_flg: break
169 | 
170 |         if not find_flg: break
171 | 
172 |     return params, grads
173 | 


--------------------------------------------------------------------------------
/python/upstream/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/retrieva/deep-learning-from-scratch-2/b2142b2b6ad2fb522fd93b90b79cd8109323b9f0/python/upstream/dataset/__init__.py


--------------------------------------------------------------------------------
/python/upstream/dataset/ptb.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import os
  4 | sys.path.append('..')
  5 | try:
  6 |     import urllib.request
  7 | except ImportError:
  8 |     raise ImportError('Use Python3!')
  9 | import pickle
 10 | import numpy as np
 11 | 
 12 | 
 13 | url_base = 'https://raw.githubusercontent.com/tomsercu/lstm/master/data/'
 14 | key_file = {
 15 |     'train':'ptb.train.txt',
 16 |     'test':'ptb.test.txt',
 17 |     'valid':'ptb.valid.txt'
 18 | }
 19 | save_file = {
 20 |     'train':'ptb.train.npy',
 21 |     'test':'ptb.test.npy',
 22 |     'valid':'ptb.valid.npy'
 23 | }
 24 | vocab_file = 'ptb.vocab.pkl'
 25 | 
 26 | dataset_dir = os.path.dirname(os.path.abspath(__file__))
 27 | 
 28 | 
 29 | def _download(file_name):
 30 |     file_path = dataset_dir + '/' + file_name
 31 |     if os.path.exists(file_path):
 32 |         return
 33 | 
 34 |     print('Downloading ' + file_name + ' ... ')
 35 | 
 36 |     try:
 37 |         urllib.request.urlretrieve(url_base + file_name, file_path)
 38 |     except urllib.error.URLError:
 39 |         import ssl
 40 |         ssl._create_default_https_context = ssl._create_unverified_context
 41 |         urllib.request.urlretrieve(url_base + file_name, file_path)
 42 | 
 43 |     print('Done')
 44 | 
 45 | 
 46 | def load_vocab():
 47 |     vocab_path = dataset_dir + '/' + vocab_file
 48 | 
 49 |     if os.path.exists(vocab_path):
 50 |         with open(vocab_path, 'rb') as f:
 51 |             word_to_id, id_to_word = pickle.load(f)
 52 |         return word_to_id, id_to_word
 53 | 
 54 |     word_to_id = {}
 55 |     id_to_word = {}
 56 |     data_type = 'train'
 57 |     file_name = key_file[data_type]
 58 |     file_path = dataset_dir + '/' + file_name
 59 | 
 60 |     _download(file_name)
 61 | 
 62 |     words = open(file_path).read().replace('\n', '<eos>').strip().split()
 63 | 
 64 |     for i, word in enumerate(words):
 65 |         if word not in word_to_id:
 66 |             tmp_id = len(word_to_id)
 67 |             word_to_id[word] = tmp_id
 68 |             id_to_word[tmp_id] = word
 69 | 
 70 |     with open(vocab_path, 'wb') as f:
 71 |         pickle.dump((word_to_id, id_to_word), f)
 72 | 
 73 |     return word_to_id, id_to_word
 74 | 
 75 | 
 76 | def load_data(data_type='train'):
 77 |     '''
 78 |         :param data_type: データの種類：'train' or 'test' or 'valid (val)'
 79 |         :return:
 80 |     '''
 81 |     if data_type == 'val': data_type = 'valid'
 82 |     save_path = dataset_dir + '/' + save_file[data_type]
 83 | 
 84 |     word_to_id, id_to_word = load_vocab()
 85 | 
 86 |     if os.path.exists(save_path):
 87 |         corpus = np.load(save_path)
 88 |         return corpus, word_to_id, id_to_word
 89 | 
 90 |     file_name = key_file[data_type]
 91 |     file_path = dataset_dir + '/' + file_name
 92 |     _download(file_name)
 93 | 
 94 |     words = open(file_path).read().replace('\n', '<eos>').strip().split()
 95 |     corpus = np.array([word_to_id[w] for w in words])
 96 | 
 97 |     np.save(save_path, corpus)
 98 |     return corpus, word_to_id, id_to_word
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     for data_type in ('train', 'val', 'test'):
103 |         load_data(data_type)
104 | 


--------------------------------------------------------------------------------
/python/upstream/dataset/sequence.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('..')
 4 | import os
 5 | import numpy
 6 | 
 7 | 
 8 | id_to_char = {}
 9 | char_to_id = {}
10 | 
11 | 
12 | def _update_vocab(txt):
13 |     chars = list(txt)
14 | 
15 |     for i, char in enumerate(chars):
16 |         if char not in char_to_id:
17 |             tmp_id = len(char_to_id)
18 |             char_to_id[char] = tmp_id
19 |             id_to_char[tmp_id] = char
20 | 
21 | 
22 | def load_data(file_name='addition.txt', seed=1984):
23 |     file_path = os.path.dirname(os.path.abspath(__file__)) + '/' + file_name
24 | 
25 |     if not os.path.exists(file_path):
26 |         print('No file: %s' % file_name)
27 |         return None
28 | 
29 |     questions, answers = [], []
30 | 
31 |     for line in open(file_path, 'r'):
32 |         idx = line.find('_')
33 |         questions.append(line[:idx])
34 |         answers.append(line[idx:-1])
35 | 
36 |     # create vocab dict
37 |     for i in range(len(questions)):
38 |         q, a = questions[i], answers[i]
39 |         _update_vocab(q)
40 |         _update_vocab(a)
41 | 
42 |     # create numpy array
43 |     x = numpy.zeros((len(questions), len(questions[0])), dtype=numpy.int)
44 |     t = numpy.zeros((len(questions), len(answers[0])), dtype=numpy.int)
45 | 
46 |     for i, sentence in enumerate(questions):
47 |         x[i] = [char_to_id[c] for c in list(sentence)]
48 |     for i, sentence in enumerate(answers):
49 |         t[i] = [char_to_id[c] for c in list(sentence)]
50 | 
51 |     # shuffle
52 |     indices = numpy.arange(len(x))
53 |     if seed is not None:
54 |         numpy.random.seed(seed)
55 |     numpy.random.shuffle(indices)
56 |     x = x[indices]
57 |     t = t[indices]
58 | 
59 |     # 10% for validation set
60 |     split_at = len(x) - len(x) // 10
61 |     (x_train, x_test) = x[:split_at], x[split_at:]
62 |     (t_train, t_test) = t[:split_at], t[split_at:]
63 | 
64 |     return (x_train, t_train), (x_test, t_test)
65 | 
66 | 
67 | def get_vocab():
68 |     return char_to_id, id_to_char
69 | 


--------------------------------------------------------------------------------
/python/upstream/dataset/spiral.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_data(seed=1984):
 6 |     np.random.seed(seed)
 7 |     N = 100  # クラスごとのサンプル数
 8 |     DIM = 2  # データの要素数
 9 |     CLS_NUM = 3  # クラス数
10 | 
11 |     x = np.zeros((N*CLS_NUM, DIM))
12 |     t = np.zeros((N*CLS_NUM, CLS_NUM), dtype=np.int)
13 | 
14 |     for j in range(CLS_NUM):
15 |         for i in range(N):#N*j, N*(j+1)):
16 |             rate = i / N
17 |             radius = 1.0*rate
18 |             theta = j*4.0 + 4.0*rate + np.random.randn()*0.2
19 | 
20 |             ix = N*j + i
21 |             x[ix] = np.array([radius*np.sin(theta),
22 |                               radius*np.cos(theta)]).flatten()
23 |             t[ix, j] = 1
24 | 
25 |     return x, t
26 | 


--------------------------------------------------------------------------------
/python_team2/.gitignore:
--------------------------------------------------------------------------------
1 | book/
2 | 


--------------------------------------------------------------------------------
/python_team2/20190327/sigmoid.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def sigmoid(x):
 5 | 	return 1 / (1 + np.exp(-x))
 6 | 
 7 | print(sigmoid(3))
 8 | print(sigmoid(0))
 9 | print(sigmoid(-3))
10 | 
11 | print(sigmoid(3) + sigmoid(-3))
12 | 
13 | # numpyっぽい書き方
14 | print(sigmoid(np.array([3,0,-3])))
15 | 
16 | # P.13の例
17 | x  = np.random.randn(10, 2)
18 | W1 = np.random.randn(2, 4)
19 | b1 = np.random.randn(4)
20 | W2 = np.random.randn(4, 3)
21 | b2 = np.random.randn(3)
22 | 
23 | h = np.dot(x, W1) + b1  # これで1層の計算
24 | a = sigmoid(h)
25 | s = np.dot(a, W2) + b2
26 | 
27 | print("h=", h)
28 | print("a=", a)
29 | print("s=", s)
30 | 
31 | print(h.shape)
32 | print(a.shape)
33 | print(s.shape)
34 | 
35 | 


--------------------------------------------------------------------------------
/python_team2/20190327/sigmoid_class.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | class Sigmoid:
 5 | 	def __init__(self):
 6 | 		pass  # 何もしない
 7 | 
 8 | 	def forward(self, x):
 9 | 		return 1 / (1 + np.exp(-x))
10 | 
11 | 	def backward(self):
12 | 		pass
13 | 
14 | 
15 | sig = Sigmoid()
16 | 
17 | print(sig.forward(3))
18 | print(sig.forward(0))
19 | print(sig.forward(-3))
20 | 
21 | print(sig.forward(3) + sig.forward(-3))
22 | 
23 | # numpyっぽい書き方
24 | print(sig.forward(np.array([3,0,-3])))
25 | 
26 | 


--------------------------------------------------------------------------------
/python_team2/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | numpy = "*"
10 | matplotlib = "*"
11 | 
12 | [requires]
13 | python_version = "3.7"
14 | 


--------------------------------------------------------------------------------
/python_team2/README.md:
--------------------------------------------------------------------------------
 1 | # このディレクトリについて
 2 | 
 3 |  * 「ゼロから作るDeep Learning 2 自然言語編 読書会」のサンプルをPythonで実装してみるチームの作業ディレクトリです。
 4 | 
 5 | ## メモ
 6 |  * PythonはPython 3.7.3
 7 |  * 依存ライブラリインストールにはpipenvを使っている
 8 | 
 9 | ## ToDo
10 |  - [X] ディレクトリを作る
11 |  - [X] numpyのインストール
12 |  - [X] 何を作るか確認する
13 |    - [X] シグモイド関数(P.13)
14 |    - [X] シグモイド関数をクラス化(P.15)
15 |    - [X] Affineレイヤの実装(P.15)
16 |    - [X] TwoLayerNetの作成(P.17)
17 |    - [ ] 1.3 ニューラルネットの学習
18 |      - [X] 1.3.4 計算グラフ
19 |        - [X] MatMul
20 |      - [X] 1.3.5 勾配の導出と逆伝播の実装
21 |        - [X] Sigmoid
22 |        - [X] Affine
23 |        - [X] SoftmaxWithLoss
24 |      - [X] 1.3.6 重みの更新
25 |    - [X] 1.4 ニューラルネットワークで問題を解く
26 |      - [X] 1.4.1 スパイラルデータセット
27 |      - [X] 1.4.2 ニューラルネットワークの実装
28 |      - [X] 1.4.3 学習用のソースコード
29 |      - [X] 1.4.4 Trainerクラス
30 |  - [ ] 3章 Word2vec
31 |     - [X] 3.1 推論ベースの手法とニューラルネットワーク
32 |        - [X] 3.1.3 全結合層による計算
33 |        - [X] 3.1.3 MatMulによる計算
34 |     - [X] 3.2 シンプルなword2vec
35 |        - [X] 3.2.1 推論処理
36 |     - [X] 3.3 学習データの準備
37 |     - [X] 3.4 CBOWモデルの実装
38 |        - [X] 推論
39 |        - [X] 学習
40 | 
41 | 
42 | ## 開発環境の構築
43 | 
44 | ```
45 | # 公式のソースコードをbookでチェックアウト
46 | $ git clone https://github.com/oreilly-japan/deep-learning-from-scratch-2.git book
47 | ```
48 | 
49 | ```
50 | # for mac
51 | $ brew install pipenv
52 | $ cd {CURRENT_DIRECTORY}
53 | $ pipenv sync
54 | $ pipenv shell
55 | ```
56 | 
57 | ### 参考
58 | 
59 | - [Python環境構築ベストプラクティス2019 - ばいおいんふぉっぽいの！](https://www.natsukium.com/blog/2019-02-18/python/)
60 | - [Pipenv で起きる Matplotlib まわりのエラー - Qiita](https://qiita.com/utahkaA/items/ad9aa825832c5909575a)
61 |     - mac でpipenv + matplotlib で使う場合は次の設定が必要
62 | 


--------------------------------------------------------------------------------
/python_team2/ch01/forward_net.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | # すべてのレイヤは、メソッドとして forward() と backward() を持つ
 4 | # すべてのレイヤは、インスタンス変数として params と grads を持つ
 5 | 
 6 | 
 7 | import numpy as np
 8 | 
 9 | class Sigmoid:
10 |     def __init__(self):
11 |         self.params = []
12 | 
13 |     def forward(self, x):
14 |         return 1 / (1 + np.exp(-x))
15 | 
16 |     def backward(self):
17 |         pass
18 | 
19 | 
20 | class Affine:
21 |     def __init__(self, W, b):
22 |         self.params = [W, b]
23 | 
24 |     def forward(self, x):
25 |         W, b = self.params
26 |         out = np.dot(x, W) + b
27 |         return out
28 | 
29 |     def backward(self):
30 |         pass
31 | 
32 | 
33 | class TwoLayerNet:
34 |     def __init__(self, input_size, hidden_size, output_size):
35 |         I,H,O = input_size, hidden_size, output_size
36 | 
37 |         # 重みとバイアスの初期化
38 |         W1 = np.random.randn(I, H)
39 |         b1 = np.random.randn(H)
40 |         W2 = np.random.randn(H, O)
41 |         b2 = np.random.randn(O)
42 | 
43 |         # レイヤの生成
44 |         self.layers = [
45 |             Affine(W1, b1),
46 |             Sigmoid(),
47 |             Affine(W2, b2)
48 |         ]
49 | 
50 |         # 全ての重みをリストにまとめる
51 |         self.params = []
52 |         for layer in self.layers:
53 |             self.params += layer.params
54 | 
55 |     def predict(self, x):
56 |         for layer in self.layers:
57 |             x = layer.forward(x)
58 |         return x
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     x = np.random.randn(10, 2)
63 |     model = TwoLayerNet(2, 4, 3)
64 |     s = model.predict(x)
65 |     print(s)
66 |     print(s.shape)
67 | 


--------------------------------------------------------------------------------
/python_team2/ch01/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | # 参考
 4 | # scikit-learn - matplotlib を使って分類問題の決定境界を描画する - Pynote
 5 | # http://pynote.hatenablog.com/entry/sklearn-plot-decision-boundary
 6 | # 機械学習の分類結果を可視化！決定境界 - 見習いデータサイエンティストの隠れ家
 7 | # http://www.dskomei.com/entry/2018/03/04/125249
 8 | 
 9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | 
12 | 
13 | 
14 | def plotResults(model, loss_list, x):
15 |     # 学習経過をプロット
16 |     plt.subplot(1,2,1)
17 |     plt.plot(loss_list)
18 | 
19 |     # 決定境界をプロット
20 |     plt.subplot(1,2,2)
21 |     plotDecisionBoundary(model, x)
22 | 
23 | 
24 | # 決定境界のプロット
25 | def plotDecisionBoundary(model, x):
26 |     # グリッドの座標を作る
27 |     x_min, x_max = x[:, 0].min(), x[:, 0].max()
28 |     y_min, y_max = x[:, 1].min(), x[:, 1].max()
29 |     x_mesh, y_mesh = np.meshgrid(np.arange(x_min, x_max, 0.01),
30 |                                  np.arange(y_min, y_max, 0.01))
31 |     grid = np.array([x_mesh.ravel(), y_mesh.ravel()]).T
32 | 
33 |     # グリッドの推論結果を集める
34 |     pred = model.predict(grid)
35 |     z = np.array(x_mesh.ravel())
36 |     for i in range(len(pred)):
37 |         z[i] = pred[i].argmax()
38 |     z = z.reshape(x_mesh.shape)
39 | 
40 |     # 等高線描画
41 |     plt.contourf(x_mesh, y_mesh, z, alpha=0.3)
42 |     plt.xlim(x_mesh.min(), x_mesh.max())
43 |     plt.ylim(y_mesh.min(), y_mesh.max())
44 | 
45 |     # データ点のプロット
46 |     N = 100
47 |     CLS_NUM = 3
48 |     markers = ['o', 'x', '^']
49 |     for i in range(CLS_NUM):
50 |         plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
51 | 


--------------------------------------------------------------------------------
/python_team2/ch01/show_spiral_dataset.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('../book')  # 公式リポジトリのdatasetを読むため
 4 | from dataset import spiral
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | x, t = spiral.load_data()
 9 | print('x', x.shape)  # (300, 2)
10 | print('t', t.shape)  # (300, 3)
11 | 
12 | # データ点のプロット
13 | N = 100
14 | CLS_NUM = 3
15 | markers = ['o', 'x', '^']
16 | for i in range(CLS_NUM):
17 |     plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
18 | plt.show()
19 | 


--------------------------------------------------------------------------------
/python_team2/ch01/train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import sys
 4 | sys.path.append('../book')
 5 | from common.optimizer import SGD
 6 | from common.trainer import Trainer
 7 | from dataset import spiral
 8 | from two_layer_net import TwoLayerNet
 9 | 
10 | max_epoch = 300
11 | batch_size = 30
12 | hidden_size = 10
13 | learning_rate = 1.0
14 | 
15 | x, t = spiral.load_data()
16 | model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
17 | optimizer = SGD(lr = learning_rate)
18 | 
19 | trainer = Trainer(model, optimizer)
20 | trainer.fit(x, t, max_epoch, batch_size, eval_interval = 10)
21 | trainer.plot()
22 | 


--------------------------------------------------------------------------------
/python_team2/ch01/train_custom_loop.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import sys
 4 | sys.path.append('..')
 5 | import numpy as np
 6 | from common.optimizer import SGD
 7 | from book.dataset import spiral
 8 | import matplotlib.pyplot as plt
 9 | from two_layer_net import TwoLayerNet
10 | from plots import plotResults
11 | 
12 | # ハイパーパラメータの設定
13 | max_epoch = 300
14 | batch_size = 30
15 | hidden_size = 10
16 | learning_rate = 1.0
17 | 
18 | # データの読み込み
19 | x, t = spiral.load_data()
20 | 
21 | def train(x, t):
22 |     # 学習で使用する変数
23 |     data_size = len(x)
24 |     max_iters = data_size // batch_size
25 |     total_loss = 0
26 |     loss_count = 0
27 |     loss_list = []
28 | 
29 |     for epoch in range(max_epoch):
30 |         # データのシャッフル
31 |         idx = np.random.permutation(data_size)
32 |         x = x[idx]
33 |         t = t[idx]
34 | 
35 |         for iters in range(max_iters):
36 |             batch_x = x[iters*batch_size:(iters+1)*batch_size]
37 |             batch_t = t[iters*batch_size:(iters+1)*batch_size]
38 | 
39 |             # 勾配を求めパラメターを更新
40 |             loss = model.forward(batch_x, batch_t)
41 |             model.backward()
42 |             optimizer.update(model.params, model.grads)
43 | 
44 |             total_loss += loss
45 |             loss_count += 1
46 | 
47 |             # 定期的に学習経過を出力
48 |             if (iters+1) % 10 == 0:
49 |                 avg_loss = total_loss / loss_count
50 |                 print('| epoch %d |  iter %d / %d | loss %.2f'
51 |                       % (epoch + 1, iters + 1, max_iters, avg_loss))
52 |                 loss_list.append(avg_loss)
53 |                 total_loss, loss_count = 0, 0
54 | 
55 |     return loss_list
56 | 
57 | 
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     # 学習試行1
62 | 
63 |     # モデルとオプティマイザの生成
64 |     optimizer = SGD(lr=learning_rate)
65 |     model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
66 | 
67 |     loss_list = train(x, t)
68 |     plt.figure(figsize=(10,4))
69 |     plotResults(model, loss_list, x)
70 | 
71 | 
72 |     # 学習試行2
73 | 
74 |     # モデルとオプティマイザの生成
75 |     #hidden_size = 60
76 |     #learning_rate = 0.5
77 |     #max_epoch = 500
78 |     #optimizer = SGD(lr=learning_rate)
79 |     #model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
80 |     #
81 |     #loss_list = train(x, t)
82 |     #plt.figure(figsize=(10,4))
83 |     #plotResults(model, loss_list, x)
84 | 
85 | 
86 |     # グラフ表示
87 |     plt.show()
88 | 


--------------------------------------------------------------------------------
/python_team2/ch01/two_layer_net.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import sys
 4 | sys.path.append('..')
 5 | import numpy as np
 6 | from common.layers import Affine, Sigmoid, SoftmaxWithLoss, Relu
 7 | 
 8 | class TwoLayerNet:
 9 |     def __init__(self, input_size, hidden_size, output_size):
10 |         I,H,O = input_size, hidden_size, output_size
11 | 
12 |         # 重みとバイアス
13 |         W1 = 0.01 * np.random.randn(I,H)
14 |         b1 = np.zeros(H)
15 |         W2 = 0.01 * np.random.randn(H,O)
16 |         b2 = np.zeros(O)
17 | 
18 |         # レイヤの生成
19 |         self.layers = [
20 |             Affine(W1, b1),
21 |             Sigmoid(),
22 | #            Relu(),
23 |             Affine(W2, b2)
24 |         ]
25 |         self.loss_layer = SoftmaxWithLoss()
26 | 
27 |         # すべての重みと勾配をリストにまとめる
28 |         self.params, self.grads = [], []
29 |         for layer in self.layers:
30 |             self.params += layer.params
31 |             self.grads += layer.grads
32 | 
33 |     def predict(self, x):
34 |         for layer in self.layers:
35 |             x = layer.forward(x)
36 |         return x
37 | 
38 |     def forward(self, x, t):
39 |         score = self.predict(x)
40 |         loss = self.loss_layer.forward(score, t)
41 |         return loss
42 | 
43 |     def backward(self, dout=1):
44 |         dout = self.loss_layer.backward(dout)
45 |         for layer in reversed(self.layers):
46 |             dout = layer.backward(dout)
47 |         return dout
48 | 


--------------------------------------------------------------------------------
/python_team2/ch02/co_matrix.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import sys
 4 | sys.path.append('..')
 5 | import numpy as np
 6 | from common.util import preprocess, create_co_matrix
 7 | 
 8 | text = 'You say goodbye and I say hello.'
 9 | print(text)
10 | 
11 | corpus, word_to_id, id_to_word = preprocess(text)
12 | 
13 | print(corpus)
14 | print(id_to_word)
15 | 
16 | C = create_co_matrix(corpus, len(id_to_word))
17 | print(C)
18 | 
19 | print(id_to_word[0])
20 | print(C[0])
21 | 
22 | print('goodbye')
23 | print(C[word_to_id['goodbye']])
24 | print('say')
25 | print(C[word_to_id['say']])
26 | 


--------------------------------------------------------------------------------
/python_team2/ch02/ranking.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import sys
 4 | sys.path.append('..')
 5 | from common.util import preprocess, create_co_matrix, most_similar
 6 | 
 7 | text = 'You say goodbye and I say hello.'
 8 | corpus, word_to_id, id_to_word = preprocess(text)
 9 | vocab_size = len(word_to_id)
10 | C = create_co_matrix(corpus, vocab_size)
11 | 
12 | most_similar('you', word_to_id, id_to_word, C, top=5)
13 | 
14 | # [query] you
15 | #  goodbye: 0.7071067691154799
16 | #  i: 0.7071067691154799
17 | #  hello: 0.7071067691154799
18 | #  say: 0.0
19 | #  and: 0.0
20 | 


--------------------------------------------------------------------------------
/python_team2/ch02/similarity.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import sys
 4 | sys.path.append('..')
 5 | from common.util import preprocess, create_co_matrix, cos_similarity
 6 | 
 7 | 
 8 | text = 'You say goodbye and I say hello.'
 9 | corpus, word_to_id, id_to_word = preprocess(text)
10 | vocab_size = len(word_to_id)
11 | C = create_co_matrix(corpus, vocab_size)
12 | 
13 | c0 = C[word_to_id['you']] # [you] の単語ベクトル
14 | c1 = C[word_to_id['i']]   # [i]の単語ベクトル
15 | 
16 | print(cos_similarity(c0, c1))
17 | # 0.7071067691154799
18 | 


--------------------------------------------------------------------------------
/python_team2/ch02/words.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | # 2.3 カウントベースの手法
 4 | 
 5 | import numpy as np
 6 | 
 7 | text = "You say goodbye and I say hello."
 8 | if __name__ == '__main__':
 9 |     print(text)
10 | 
11 | text.lower()
12 | text = text.lower()
13 | text.replace(".", " .")
14 | text = text.replace(".", " .")
15 | if __name__ == '__main__':
16 |     print(text)
17 | 
18 | words = text.split (' ')
19 | if __name__ == '__main__':
20 |     print(words)
21 | 
22 | 
23 | word_to_id = {}
24 | id_to_word = {}
25 | for word in words:
26 |     if word not in word_to_id:
27 |         new_id = len(word_to_id)
28 |         word_to_id[word] = new_id
29 |         id_to_word[new_id] = word
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     print(word_to_id)
34 |     print(id_to_word)
35 | 
36 | 
37 | corpus = [word_to_id[w] for w in words]
38 | corpus = np.array(corpus)
39 | if __name__ == '__main__':
40 |     print(corpus)
41 | 


--------------------------------------------------------------------------------
/python_team2/ch03/cbow_predict.py:
--------------------------------------------------------------------------------
 1 | # 3.2.1 CBOWモデルの推論処理
 2 | 
 3 | import sys
 4 | sys.path.append('../book')
 5 | import numpy as np
 6 | from common.layers import MatMul
 7 | 
 8 | # サンプルのコンテキストデータ
 9 | c0 = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 "you"
10 | c1 = np.array([[0, 0, 1, 0, 0, 0, 0]]) # 入力
11 | 
12 | # 重みの初期化
13 | W_in = np.random.randn(7, 3)
14 | W_out = np.random.randn(3, 7)
15 | 
16 | # レイヤの生成
17 | in_layer0 = MatMul(W_in)
18 | in_layer1 = MatMul(W_in)
19 | out_layer = MatMul(W_out)
20 | 
21 | # 順伝播
22 | h0 = in_layer0.forward(c0)
23 | h1 = in_layer1.forward(c1)
24 | h = 0.5*(h0 + h1)
25 | s = out_layer.forward(h)
26 | 
27 | print(s)
28 | 


--------------------------------------------------------------------------------
/python_team2/ch03/simple_cbow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('../book')
 3 | import numpy as np
 4 | from common.layers import MatMul, SoftmaxWithLoss
 5 | 
 6 | class SimpleCBOW:
 7 |     def __init__(self,vocab_size, hidden_size):
 8 |         V, H = vocab_size, hidden_size
 9 | 
10 |         # 重みの初期化
11 |         W_in = 0.01 * np.random.randn(V,H).astype('f')
12 |         W_out = 0.01 * np.random.randn(H,V).astype('f')
13 | 
14 |         # レイヤの作成
15 |         self.in_layer0 = MatMul(W_in)
16 |         self.in_layer1 = MatMul(W_in)
17 |         self.out_layer = MatMul(W_out)
18 |         self.loss_layer = SoftmaxWithLoss()
19 | 
20 |         # すべての重みと勾配をリストにまとめる
21 |         layers = [self.in_layer0, self.in_layer1, self.out_layer]
22 |         self.params, self.grads = [], []
23 |         for layer in layers:
24 |             self.params += layer.params
25 |             self.grads += layer.grads
26 |         # メンバ変数に単語の分散表現を設定
27 |         self.word_vecs = W_in
28 | 
29 |     def forward(self, contexts, target):
30 |         h0 = self.in_layer0.forward(contexts[:, 0])
31 |         h1 = self.in_layer1.forward(contexts[:, 1])
32 |         h = (h0 + h1) * 0.5
33 |         score = self.out_layer.forward(h)
34 |         loss = self.loss_layer.forward(score, target)
35 |         return loss
36 | 
37 |     def backward(self, dout=1):
38 |         ds = self.loss_layer.backward(dout)
39 |         da = self.out_layer.backward(ds)
40 |         da *= 0.5
41 |         self.in_layer0.backward(da)
42 |         self.in_layer1.backward(da)
43 |         return None
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     cbow = SimpleCBOW(5, 3)
48 |     contexts = np.array([[1, 0, 0,0,0],[0,1,0,0,0]])
49 |     target = np.array([[0,0,0,1,0]])
50 |     print(cbow.forward(contexts.T,target))
51 | 


--------------------------------------------------------------------------------
/python_team2/ch03/train.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('../book')
 3 | from common.trainer import Trainer
 4 | from common.optimizer import Adam
 5 | from ch03.simple_skip_gram import SimpleSkipGram
 6 | from simple_cbow import SimpleCBOW
 7 | from common.util import preprocess, create_contexts_target, convert_one_hot, most_similar
 8 | 
 9 | 
10 | window_size=1
11 | hidden_size=5
12 | batch_size=3
13 | max_epoch=1000
14 | 
15 | text  = 'You say goodbye and I say hello.'
16 | #text='Deep learning (also known as deep structured learning or hierarchical learning) is part of a broader family of machine learning methods based on artificial neural networks. Learning can be supervised, semi-supervised or unsupervised. Deep learning architectures such as deep neural networks, deep belief networks, recurrent neural networks and convolutional neural networks have been applied to fields including computer vision, speech recognition, natural language processing, audio recognition, social network filtering, machine translation, bioinformatics, drug design, medical image analysis, material inspection and board game programs, where they have produced results comparable to and in some cases superior to human experts. Artificial Neural Networks (ANNs) were inspired by information processing and distributed communication nodes in biological systems. ANNs have various differences from biological brains. Specifically, neural networks tend to be static and symbolic, while the biological brain of most living organisms is dynamic (plastic) and analog.'
17 | 
18 | corpus, word_to_id, id_to_word = preprocess(text)
19 | 
20 | vocab_size = len(word_to_id)
21 | contexts, target = create_contexts_target(corpus, window_size)
22 | target = convert_one_hot(target, vocab_size)
23 | contexts = convert_one_hot(contexts, vocab_size)
24 | 
25 | model = SimpleCBOW(vocab_size, hidden_size)
26 | #model = SimpleSkipGram(vocab_size, hidden_size)
27 | optimizer = Adam()
28 | trainer = Trainer(model, optimizer)
29 | 
30 | trainer.fit(contexts, target, max_epoch, batch_size)
31 | trainer.plot()
32 | 
33 | 
34 | word_vecs = model.word_vecs
35 | for word_id, word in id_to_word.items():
36 |     print(word, word_vecs[word_id])
37 | 
38 | 
39 | most_similar('you', word_to_id, id_to_word, word_vecs)
40 | #most_similar('learning', word_to_id, id_to_word, word_vecs)
41 | 


--------------------------------------------------------------------------------
/python_team2/ch03/w_in.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | # 3.1.3 p.99
3 | 
4 | c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 "you"
5 | W = np.random.randn(7, 3)             # 重み
6 | h = np.dot(c, W)                      # 中間ノード
7 | print(h)
8 | print(W)
9 | 


--------------------------------------------------------------------------------
/python_team2/ch03/w_in_matmul.py:
--------------------------------------------------------------------------------
 1 | # p.100 3.1.3
 2 | 
 3 | import sys
 4 | sys.path.append('../book')
 5 | import numpy as np
 6 | from common.layers import MatMul
 7 | 
 8 | c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 入力 "you"
 9 | W = np.random.randn(7, 3)             # 重み
10 | layer = MatMul(W)
11 | h = layer.forward(c)
12 | print(h)
13 | print(W)
14 | 


--------------------------------------------------------------------------------
/python_team2/common/layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8
  2 | 
  3 | import numpy as np
  4 | 
  5 | class MatMul:
  6 |     def __init__(self, W):
  7 |         self.params = [W]
  8 |         self.grads = [np.zeros_like(W)]
  9 |         self.x = None
 10 | 
 11 |     def forward(self, x):
 12 |         W, = self.params
 13 |         out = np.dot(x, W)
 14 |         self.x = x
 15 |         return out
 16 | 
 17 |     def backward(self, dout):
 18 |         W, = self.params
 19 |         dx = np.dot(dout, W.T)
 20 |         dW = np.dot(self.x.T, dout)
 21 |         self.grads[0][...] = dW
 22 |         return dx
 23 | 
 24 | class Sigmoid:
 25 |     def __init__(self):
 26 |         self.params, self.grads = [], []
 27 | 
 28 |     def forward(self, x):
 29 |         out = 1 / (1 + np.exp(-x))
 30 |         self.out = out
 31 |         return out
 32 | 
 33 |     def backward(self, dout):
 34 |         dx = dout * (1.0 - self.out) * self.out
 35 |         return dx
 36 | 
 37 | # https://github.com/oreilly-japan/deep-learning-from-scratch/blob/master/common/layers.py
 38 | class Relu:
 39 |     def __init__(self):
 40 |         self.mask = None
 41 |         self.params, self.grads = [], []
 42 | 
 43 |     def forward(self, x):
 44 |         self.mask = (x <= 0)
 45 |         out = x.copy()
 46 |         out[self.mask] = 0
 47 | 
 48 |         return out
 49 | 
 50 |     def backward(self, dout):
 51 |         dout[self.mask] = 0
 52 |         dx = dout
 53 | 
 54 |         return dx
 55 | 
 56 | class Affine:
 57 |     def __init__(self, W, b):
 58 |         self.params = [W, b]
 59 |         self.grads = [np.zeros_like(W), np.zeros_like(b)]
 60 |         self.x = None
 61 | 
 62 |     def forward(self, x):
 63 |         W, b = self.params
 64 |         out = np.dot(x, W) + b
 65 |         self.x = x
 66 |         return out
 67 | 
 68 |     def backward(self, dout):
 69 |         W, b = self.params
 70 |         dx = np.dot(dout, W.T)
 71 |         dW = np.dot(self.x.T, dout)
 72 |         db = np.sum(dout, axis=0)
 73 | 
 74 |         self.grads[0][...] = dW
 75 |         self.grads[1][...] = db
 76 |         return dx
 77 | 
 78 | class AffineMM:
 79 |     def __init__(self, W, b):
 80 |         self.params = [W, b]
 81 |         self.grads = [np.zeros_like(W), np.zeros_like(b)]
 82 |         self.MM = MatMul(W)
 83 | 
 84 |     def forward(self, x):
 85 |         b = self.params[1]
 86 |         out = self.MM.forward(x) + b
 87 |         return out
 88 | 
 89 |     def backward(self, dout):
 90 |         b = self.params[1]
 91 |         dx = self.MM.backward(dout)
 92 |         db = np.sum(dout, axis=0)
 93 | 
 94 |         self.grads[0][...] = self.MM.grads[0]
 95 |         self.grads[1][...] = db
 96 |         return dx
 97 | 
 98 | 
 99 | # https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/functions.py
100 | # からパチった
101 | def softmax(x):
102 |     if x.ndim == 2:
103 |         x = x - x.max(axis=1, keepdims=True)
104 |         x = np.exp(x)
105 |         x /= x.sum(axis=1, keepdims=True)
106 |     elif x.ndim == 1:
107 |         x = x - np.max(x)
108 |         x = np.exp(x) / np.sum(np.exp(x))
109 | 
110 |     return x
111 | 
112 | 
113 | def cross_entropy_error(y, t):
114 |     if y.ndim == 1:
115 |         t = t.reshape(1, t.size)
116 |         y = y.reshape(1, y.size)
117 | 
118 |     # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
119 |     if t.size == y.size:
120 |         t = t.argmax(axis=1)
121 | 
122 |     batch_size = y.shape[0]
123 | 
124 |     return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
125 | 
126 | 
127 | # https://github.com/oreilly-japan/deep-learning-from-scratch-2/blob/master/common/layers.py
128 | class SoftmaxWithLoss:
129 |     def __init__(self):
130 |         self.params, self.grads = [], []
131 |         self.y = None
132 |         self.t = None
133 | 
134 |     def forward(self, x, t):
135 |         self.t = t
136 |         self.y = softmax(x)
137 | 
138 |          # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
139 |         if self.t.size == self.y.size:
140 |             self.t = self.t.argmax(axis=1)
141 | 
142 |         loss = cross_entropy_error(self.y, self.t)
143 |         return loss
144 | 
145 |     def backward(self, dout=1):
146 |         batch_size = self.t.shape[0]
147 | 
148 |         dx = self.y.copy()
149 |         dx[np.arange(batch_size), self.t] -= 1
150 |         dx *= dout
151 |         dx = dx / batch_size
152 | 
153 |         return dx
154 | 
155 | 
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     print('MatMul 形状チェック')
160 | 
161 |     W = np.random.randn(3, 4)
162 |     mm = MatMul(W)
163 |     x = np.random.randn(1, 3)
164 |     out = mm.forward(x)
165 |     print('mm.forward().shape', out.shape)
166 |     grad = mm.backward(out)
167 |     print('mm.backward().shape', grad.shape)
168 | 
169 | 
170 |     print("Affine, AffineMM 実装チェック")
171 | 
172 |     W = np.random.randn(3, 2)
173 |     b = np.random.randn(2)
174 |     aff = Affine(W, b)
175 |     amm = AffineMM(W, b)
176 | 
177 |     x = np.random.randn(10, 3)
178 |     out1 = aff.forward(x)
179 |     out2 = amm.forward(x)
180 |     print("out equal", (out1 == out2).all())
181 | 
182 |     grad1 = aff.backward(out1)
183 |     grad2 = amm.backward(out2)
184 |     print("grad equal", (grad1 == grad2).all())
185 | 


--------------------------------------------------------------------------------
/python_team2/common/optimizer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | # 1.3.6 重みの更新
 4 | 
 5 | class SGD:
 6 |     def __init__(self, lr=0.01):
 7 |         self.lr = lr
 8 | 
 9 |     def update(self, params, grads):
10 |         for i in range(len(params)):
11 |             params[i] -= self.lr * grads[i]
12 | 


--------------------------------------------------------------------------------
/python_team2/common/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | def preprocess(text):
 6 |     text = text.lower()
 7 |     text = text.replace('.', ' .')
 8 |     words = text.split(' ')
 9 | 
10 |     word_to_id = {}
11 |     id_to_word = {}
12 |     for word in words:
13 |         if word not in word_to_id:
14 |             new_id = len(word_to_id)
15 |             word_to_id[word] = new_id
16 |             id_to_word[new_id] = word
17 | 
18 |     corpus = np.array([word_to_id[w] for w in words])
19 |     return corpus, word_to_id, id_to_word
20 | 
21 | 
22 | def create_co_matrix(corpus, vocab_size, window_size=1):
23 |     corpus_size = len(corpus)
24 |     co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32)
25 | 
26 |     for idx, word_id in enumerate(corpus):
27 |         for i in range(1, window_size + 1):
28 |             left_idx = idx - i
29 |             right_idx = idx + i
30 | 
31 |             if left_idx >= 0:
32 |                 left_word_id = corpus[left_idx]
33 |                 co_matrix[word_id, left_word_id] += 1
34 | 
35 |             if right_idx < corpus_size:
36 |                 right_idx = corpus[right_idx]
37 |                 co_matrix[word_id, right_idx] += 1
38 | 
39 |     return co_matrix
40 | 
41 | def cos_similarity(x, y, eps = 1e-8):
42 |     nx = x / (np.sqrt(np.sum(x**2)) + eps) # x の正規化
43 |     ny = y / (np.sqrt(np.sum(y**2)) + eps) # y の正規化
44 |     return np.dot(nx, ny)
45 | 
46 | def most_similar(query, word_to_id, id_to_word, word_matrix, top=5):
47 |     # クエリを取り出す
48 |     if query not in word_to_id:
49 |         print('%s is not found' % query)
50 |         return
51 | 
52 |     print('\n[query] ' + query)
53 |     query_id = word_to_id[query]
54 |     query_vec = word_matrix[query_id]
55 | 
56 |     # コサイン類似度の算出
57 |     vocab_size = len(id_to_word)
58 |     similarity = np.zeros(vocab_size)
59 |     for i in range(vocab_size):
60 |         similarity[i] = cos_similarity(word_matrix[i], query_vec)
61 | 
62 |     # コサイン類似度の結果から、その値を高い順に出力
63 |     count = 0
64 |     for i in (-1 * similarity).argsort():
65 |         if id_to_word[i] == query:
66 |             continue
67 |         print(' %s: %s' % (id_to_word[i], similarity[i]))
68 | 
69 |         count += 1
70 |         if count >= top:
71 |             return
72 | 


--------------------------------------------------------------------------------
/ruby/.bundle/config:
--------------------------------------------------------------------------------
1 | ---
2 | BUNDLE_PATH: "vendor/bundle"
3 | 


--------------------------------------------------------------------------------
/ruby/Gemfile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | source 'https://rubygems.org'
 4 | 
 5 | git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
 6 | 
 7 | gem 'numo-narray'
 8 | gem 'matplotlib'
 9 | gem 'red-datasets'
10 | gem 'test-unit'
11 | gem 'irb'
12 | gem 'pry-byebug'
13 | gem 'rake'
14 | 


--------------------------------------------------------------------------------
/ruby/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     byebug (11.0.1)
 5 |     coderay (1.1.2)
 6 |     csv (3.1.1)
 7 |     irb (1.0.0)
 8 |     matplotlib (1.0.0)
 9 |       pycall (>= 1.0.0)
10 |     method_source (0.9.2)
11 |     numo-narray (0.9.1.4)
12 |     power_assert (1.1.4)
13 |     pry (0.12.2)
14 |       coderay (~> 1.1.0)
15 |       method_source (~> 0.9.0)
16 |     pry-byebug (3.7.0)
17 |       byebug (~> 11.0)
18 |       pry (~> 0.10)
19 |     pycall (1.2.1)
20 |     rake (13.0.1)
21 |     red-datasets (0.0.8)
22 |       csv (>= 3.0.5)
23 |       rubyzip
24 |     rubyzip (2.0.0)
25 |     test-unit (3.3.2)
26 |       power_assert
27 | 
28 | PLATFORMS
29 |   ruby
30 | 
31 | DEPENDENCIES
32 |   irb
33 |   matplotlib
34 |   numo-narray
35 |   pry-byebug
36 |   rake
37 |   red-datasets
38 |   test-unit
39 | 
40 | BUNDLED WITH
41 |    2.0.2
42 | 


--------------------------------------------------------------------------------
/ruby/Rakefile:
--------------------------------------------------------------------------------
 1 | #require "bundler/gem_tasks"
 2 | require "rake/testtask"
 3 | 
 4 | Rake::TestTask.new(:test) do |t|
 5 |   t.libs << "test"
 6 |   t.libs << "lib"
 7 |   t.test_files = FileList['test/**/*_test.rb']
 8 |   t.warning = false
 9 | end
10 | 
11 | task :default => :test
12 | 


--------------------------------------------------------------------------------
/ruby/examples/ch01/show_spiral_dataset.rb:
--------------------------------------------------------------------------------
 1 | require 'matplotlib/pyplot'
 2 | require_relative "spiral"
 3 | 
 4 | plt = Matplotlib::Pyplot
 5 | 
 6 | spiral = Spiral.new.to_a
 7 | 
 8 | colors = ['yellow', 'green', 'red']
 9 | 
10 | spiral.group_by{|x, t| t.to_a }.each do |t, x|
11 |   x = x.map(&:first).map(&:to_a)
12 |   plt.scatter(*x.transpose, c: colors[t.find_index(1)])
13 | end
14 | plt.show()
15 | 


--------------------------------------------------------------------------------
/ruby/examples/ch01/spiral.rb:
--------------------------------------------------------------------------------
 1 | require "numo/narray"
 2 | require "datasets"
 3 | require "datasets/dataset"
 4 | 
 5 | class Spiral < Datasets::Dataset
 6 |   N = 100  # クラスごとのサンプル数
 7 |   DIM = 2  # データの要素数
 8 |   CLS_NUM = 3  # クラス数
 9 |   attr_reader :x, :t
10 | 
11 |   def initialize(seed=1984)
12 |     super()
13 |     @metadata.id = "spiral"
14 |     @metadata.name = "Spiral"
15 |     @metadata.url = "https://github.com/retrieva/deep-learning-from-scratch-2"
16 |     @metadata.description = "Spiral dataset"
17 | 
18 |     random = Random.new(seed)
19 | 
20 |     @x = Numo::DFloat.zeros(N * CLS_NUM, DIM)
21 |     @t = Numo::Int64.zeros(N * CLS_NUM, CLS_NUM)
22 | 
23 |     CLS_NUM.times do |j|
24 |       N.times do |i|  # N*j, N*(j+1))
25 |         rate = i.to_f / N
26 |         radius = 1.0 * rate
27 |         theta = j * 4.0 + 4.0 * rate + random.rand(0.2)
28 | 
29 |         ix = N * j + i
30 |         @x[ix, true] = [radius * Math.sin(theta), radius * Math.cos(theta)]
31 |         @t[ix, j] = 1
32 |       end
33 |     end
34 |   end
35 | 
36 |   def each
37 |     return to_enum(__method__) unless block_given?
38 | 
39 |     (N * CLS_NUM).times do |ix|
40 |       yield [@x[ix, true], @t[ix, true]]
41 |     end
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/ruby/examples/ch01/train.rb:
--------------------------------------------------------------------------------
 1 | require_relative '../lib/optimizer' # SGD
 2 | require_relative '../lib/trainer'
 3 | require_relative 'two_layers_net'
 4 | require_relative 'spiral'
 5 | 
 6 | max_epoch = 300
 7 | batch_size = 30
 8 | hidden_size = 10
 9 | learning_rate = 1.0
10 | 
11 | spiral = Spiral.new
12 | x = spiral.x
13 | t = spiral.t
14 | model = TwoLayersNet.new(input_size: 2, hidden_size: hidden_size, output_size: 3)
15 | optimizer = SGD.new(learning_rate)
16 | 
17 | trainer = Trainer.new(model, optimizer)
18 | trainer.fit(x, t, max_epoch: max_epoch, batch_size: batch_size, eval_interval: 10)
19 | trainer.plot()


--------------------------------------------------------------------------------
/ruby/examples/ch01/train_custom_loop.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'spiral'
 2 | require_relative '../lib/optimizer'
 3 | require_relative 'two_layers_net'
 4 | 
 5 | # 1: ハイパーパラメータ設定
 6 | max_epoch = 300
 7 | batch_size = 30
 8 | hidden_size = 10
 9 | learning_rate = 1.0
10 | 
11 | # 2: データ読み込み、モデルとオプティマイザ生成
12 | samples = Spiral.new
13 | x = samples.x # .shape => [300, 2]
14 | t = samples.t # .shape => [300, 3]
15 | model = TwoLayersNet.new(input_size: 2, hidden_size: hidden_size, output_size: 3)
16 | optimizer = SGD.new(learning_rate)
17 | 
18 | data_size = x.shape.first # => 300
19 | max_iters = (data_size / batch_size).floor # => 10
20 | total_loss = 0
21 | loss_count = 0
22 | loss_list = []
23 | 
24 | max_epoch.times do |epoch|
25 |   # 3: データのシャッフル
26 |   # NOTE: Numoには random.permutation に対応する数列作成がないため、Arrayから作っている
27 |   idx = Numo::Int64.new(data_size).store((0 ... data_size).to_a.shuffle)
28 | 
29 |   # NOTE: pythonのサンプルでは x = x[idx] となっているが、
30 |   #       左辺はforループ内のローカル変数扱いなのでrubyでは変数名を変えている
31 |   ex = x[idx, true]
32 |   et = t[idx, true]
33 | 
34 |   max_iters.times do |iters|
35 |     iter_range = (iters * batch_size) ... ((iters + 1) * batch_size)
36 |     batch_x = ex[iter_range, true]
37 |     batch_t = et[iter_range, true]
38 | 
39 |     # 4: 勾配を求め、パラメータを更新
40 |     loss = model.forward(batch_x, batch_t)
41 |     model.backward
42 |     optimizer.update(model.params, model.grads)
43 | 
44 |     total_loss += loss
45 |     loss_count += 1
46 | 
47 |     # 5: 定期的（10イテレーションに1回）に学習経過を出力
48 |     if (iters + 1) % 10 == 0
49 |       avg_loss = total_loss / loss_count
50 |       puts "| epoch #{epoch+1} | iter #{iters+1} / #{max_iters} | loss #{avg_loss}"
51 |       loss_list << avg_loss
52 |       total_loss = 0
53 |       loss_count = 0
54 |     end
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/ruby/examples/ch01/two_layers_net.rb:
--------------------------------------------------------------------------------
 1 | require 'affine'
 2 | require 'sigmoid'
 3 | require 'softmax_with_loss'
 4 | require 'byebug'
 5 | 
 6 | class TwoLayersNet
 7 |   attr_reader :layers, :loss_layer
 8 |   attr_accessor :params, :grads
 9 | 
10 |   def initialize(input_size:, hidden_size:, output_size:)
11 |     w1 = 0.01 * Numo::SFloat.new(input_size, hidden_size).rand
12 |     b1 = Numo::SFloat.zeros(hidden_size)
13 |     w2 = 0.01 * Numo::SFloat.new(hidden_size, output_size).rand
14 |     b2 = Numo::SFloat.zeros(output_size)
15 | 
16 |     @layers = [
17 |       Affine.new(w1, b1),
18 |       Sigmoid.new,
19 |       Affine.new(w2, b2),
20 |     ]
21 |     @loss_layer = SoftmaxWithLoss.new
22 | 
23 |     @params, @grads  = @layers.reduce([[], []]) do |acc, layer|
24 |       acc[0] += layer.params
25 |       acc[1] += layer.grads
26 |       acc
27 |     end
28 |   end
29 | 
30 |   def predict(x)
31 |     @layers.each do |layer|
32 |       x = layer.forward(x)
33 |     end
34 |     x
35 |   end
36 | 
37 |   def forward(x, t)
38 |     score = predict(x)
39 |     @loss_layer.forward(score, t)
40 |   end
41 | 
42 |   def backward(dout = 1)
43 |     dout = @loss_layer.backward(dout)
44 |     @layers.reverse.each do |layer|
45 |       dout = layer.backward(dout)
46 |     end
47 |     dout
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/ruby/examples/ch03/cbow_predict.rb:
--------------------------------------------------------------------------------
 1 | require "numo/narray"
 2 | require "mat_mul.rb"
 3 | 
 4 | # サンプルのコンテキストデータ
 5 | c0 = Numo::NArray[[1, 0, 0, 0, 0, 0, 0]]
 6 | c1 = Numo::NArray[[0, 0, 1, 0, 0, 0, 0]]
 7 | 
 8 | # 重みの初期化
 9 | w_in = Numo::DFloat.new(7, 3).rand
10 | w_out = Numo::DFloat.new(3, 7).rand
11 | 
12 | # レイヤの生成
13 | in_layer0 = MatMul.new(w_in)
14 | in_layer1 = MatMul.new(w_in)
15 | out_layer = MatMul.new(w_out)
16 | 
17 | # 順伝搬
18 | h0 = in_layer0.forward(c0)
19 | h1 = in_layer1.forward(c1)
20 | h = 0.5 * (h0 + h1)
21 | s = out_layer.forward(h)
22 | 
23 | pp h0.to_a
24 | #pp s.to_a
25 | 


--------------------------------------------------------------------------------
/ruby/lib/adam.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Adam (http://arxiv.org/abs/1412.6980)
 4 | class Adam
 5 |   def initialize(lr = 0.001, beta1 = 0.9, beta2 = 0.999)
 6 |     @lr = lr
 7 |     @beta1 = beta1
 8 |     @beta2 = beta2
 9 |     @iter = 0
10 |     @m = nil
11 |     @v = nil
12 |   end
13 | 
14 |   def update(params, grads)
15 |     unless @m
16 |       @m = []
17 |       @v = []
18 |       params.each do |param|
19 |         @m.append(Numo::SFloat.zeros(param.shape))
20 |         @v.append(Numo::SFloat.zeros(param.shape))
21 |       end
22 |     end
23 | 
24 |     @iter += 1
25 |     lr_t = @lr * Numo::SFloat::Math.sqrt(1.0 - @beta2**@iter) /
26 |            (1.0 - @beta1**@iter)
27 | 
28 |     params.length.times do |i|
29 |       @m[i] += (1 - @beta1) * (grads[i] - @m[i])
30 |       @v[i] += (1 - @beta2) * (grads[i]**2 - @v[i])
31 | 
32 |       params[i].inplace - lr_t * @m[i] / (Numo::SFloat::Math.sqrt(@v[i]) + 1e-7)
33 |     end
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/ruby/lib/affine.rb:
--------------------------------------------------------------------------------
 1 | require "numo/narray"
 2 | 
 3 | class Affine
 4 |   attr_accessor :params, :grads, :x
 5 | 
 6 |   def initialize(weight, bias)
 7 |     @params = [weight, bias]
 8 |     @grads  = [weight.new_zeros, bias.new_zeros]
 9 |     @x = nil
10 |   end
11 | 
12 |   def forward(x)
13 |     weight, bias = @params
14 |     @x = x
15 |     x.dot(weight) + bias
16 |   end
17 | 
18 |   def backward(dout)
19 |     weight, _ = @params
20 |     dx = dout.dot(weight.transpose)
21 |     dW = @x.transpose.dot(dout)
22 |     db = dout.sum(axis: 0)
23 | 
24 |     @grads[0].store dW
25 |     @grads[1].store db
26 |     dx
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/ruby/lib/embedding.rb:
--------------------------------------------------------------------------------
 1 | class Embedding
 2 |   attr_reader :params, :grads
 3 | 
 4 |   def initialize(w)
 5 |     @params = [w]
 6 |     @grads = [w.new_zeros]
 7 |     @idx = nil
 8 |   end
 9 | 
10 |   def forward(idx)
11 |     w = @params.first
12 |     @idx = idx
13 |     w[idx, true]
14 |   end
15 | 
16 |   def backward(dout)
17 |     dw = @grads.first
18 |     dw.store(0)
19 |     @idx.each_with_index do |word_id, i|
20 |       dw[word_id, true].inplace + dout[i, true]
21 |     end
22 |     nil
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/ruby/lib/embedding_dot.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'numo/narray'
 4 | require 'embedding'
 5 | 
 6 | class EmbeddingDot
 7 |   def initialize(w)
 8 |     @embed = Embedding.new(w)
 9 |     @params = @embed.params
10 |     @grads = @embed.grads
11 |     @cache = nil
12 |   end
13 | 
14 |   def forward(h, idx)
15 |     target_w = @embed.forward(idx)
16 |     out = (target_w * h).sum(axis: 1)
17 | 
18 |     @cache = [h, target_w]
19 |     out
20 |   end
21 | 
22 |   def backward(dout)
23 |     h, target_w = @cache
24 |     dout = dout.reshape(dout.shape[0], 1) # transformで良いのでは？
25 | 
26 |     dtarget_w = dout * h
27 |     @embed.backward(dtarget_w)
28 |     dh = dout * target_w
29 |     dh
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/ruby/lib/mat_mul.rb:
--------------------------------------------------------------------------------
 1 | require 'numo/narray'
 2 | 
 3 | class MatMul
 4 |   attr_accessor :params, :grads, :x
 5 |   def initialize(w)
 6 |     @params = [w]
 7 |     @grads = [w.new_zeros]
 8 |     @x = nil
 9 |   end
10 | 
11 |   def forward(x)
12 |     w = @params.first
13 |     @x = x
14 |     x.dot(w)
15 |   end
16 | 
17 |   def backward(dout)
18 |     w = @params.first
19 |     dx = dout.dot(w.transpose)
20 |     dw = @x.transpose.dot(dout)
21 |     @grads[0].store(dw)
22 |     dx
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/ruby/lib/negative_sampling_loss.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'numo/narray'
  4 | 
  5 | class NegativeSamplingLoss
  6 |   def initialize(w, corpus, power = 0.75, sample_size = 5)
  7 |     @sample_size = sample_size
  8 |     @sampler = 
  9 |   end
 10 | end
 11 | 
 12 | class UnigramSampler
 13 |   def initialize(corpus, power, sample_size)
 14 |     @sample_size = sample_size
 15 |     @vocab_size = nil
 16 |     @word_p = nil
 17 | 
 18 |     counts = Hash.new(0)
 19 |     corpus.each do |word_id|
 20 |       counts[word_id] += 1
 21 |     end
 22 | 
 23 |     @vocab_size = counts.length
 24 | 
 25 |     @word_p = Numo::SFloat[*counts.values]
 26 | 
 27 |     @word_p = @word_p ** power
 28 |     @word_p /= @word_p.sum
 29 |   end
 30 | 
 31 |   def get_negative_sample(target)
 32 |     batch_size = target.shape[0]
 33 | 
 34 |     negative_sample = Numo::UInt32.zeros(batch_size, @sample_size)
 35 | 
 36 |     batch_size.times do |i|
 37 |       p = @word_p.dup
 38 |       target_idx = target[i]
 39 |       p[target_idx] = 0
 40 |       p /= p.sum
 41 |       negative_sample[i, true] = random_choice_without_replacement(
 42 |         @vocab_size, size: @sample_size, p: p, replacement: false
 43 |       )
 44 |     end
 45 |   end
 46 | end
 47 | 
 48 | # Implementation is based on the Weighted Random Sampling from this SO
 49 | # https://stackoverflow.com/a/2149533.
 50 | def random_choice_without_replacement(a, size: 1, p:)
 51 |   array = a.class == Integer ? (0...a).to_a : a
 52 |   items = array.zip(p)
 53 | 
 54 |   heap = rws_heap(items)
 55 | 
 56 |   size.times.map { rws_heap_pop(heap) }
 57 | end
 58 | 
 59 | Node = Struct.new(:w, :v, :tw)
 60 | Rand = Random.new
 61 | 
 62 | def rws_heap(items)
 63 |   h = [nil]
 64 |   items.each do |v, w|
 65 |     h.append(Node.new(w, v, w))
 66 |   end
 67 | 
 68 |   (h.length - 1).downto(2).each do |i|
 69 |     h[i >> 1].tw += h[i].tw
 70 |   end
 71 | 
 72 |   h
 73 | end
 74 | 
 75 | def rws_heap_pop(h)
 76 |   gas = h[1].tw * Rand.rand
 77 | 
 78 |   i = 1
 79 | 
 80 |   while gas >= h[i].w
 81 |     gas -= h[i].w
 82 |     i <<= 1
 83 |     if gas >= h[i].tw
 84 |       gas -= h[i].tw
 85 |       i += 1
 86 |     end
 87 |   end
 88 | 
 89 |   w = h[i].w
 90 |   v = h[i].v
 91 | 
 92 |   h[i].w = 0
 93 |   while i.positive?
 94 |     h[i].tw -= w
 95 |     i >>= 1
 96 |   end
 97 | 
 98 |   v
 99 | end
100 | 


--------------------------------------------------------------------------------
/ruby/lib/optimizer.rb:
--------------------------------------------------------------------------------
 1 | # Stochastic Gradient Descent
 2 | class SGD
 3 |   def initialize(lr = 0.01)
 4 |     @lr = lr
 5 |   end
 6 | 
 7 |   def update(params, grads)
 8 |     params.length.times do |i|
 9 |       params[i].inplace - @lr * grads[i]
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/ruby/lib/rnn.rb:
--------------------------------------------------------------------------------
 1 | require 'numo/narray'
 2 | 
 3 | class Rnn
 4 | 
 5 |   attr_accessor :params, :grads, :cache
 6 | 
 7 |   def initialize(wx, wh, b)
 8 |     @params = [wx, wh, b]
 9 |     @grads = [wx.new_zeros, wh.new_zeros, b.new_zeros]
10 |   end
11 | 
12 |   def forward(x, h_prev)
13 |     wx, wh, b = @params
14 |     t = h_prev.dot(wh) + x.dot(wx) + b
15 |     h_next = Numo::NMath::tanh(t)
16 | 
17 |     @cache = [x, h_prev, h_next]
18 | 
19 |     h_next
20 |   end
21 | 
22 |   def backward(dh_next)
23 |     wx, wh, _b = @params
24 |     x, h_prev, h_next = @cache
25 | 
26 |     dt = dh_next * (1 - h_next**2)
27 |     db = dt.sum(axis: 0)
28 |     dwh = h_prev.transpose.dot(dt)
29 |     dh_prev = dt.dot(wh.transpose)
30 |     dwx = x.transpose.dot(dt)
31 |     dx = dt.dot(wx.transpose)
32 | 
33 |     @grads[0].store(dwx)
34 |     @grads[1].store(dwh)
35 |     @grads[2].store(db)
36 | 
37 |     [dx, dh_prev]
38 |   end
39 | end


--------------------------------------------------------------------------------
/ruby/lib/sigmoid.rb:
--------------------------------------------------------------------------------
 1 | require 'numo/narray'
 2 | 
 3 | class Sigmoid
 4 |   attr_accessor :params, :grads, :out
 5 |   def initialize
 6 |     @params = []
 7 |     @grads = []
 8 |     @out = nil
 9 |   end
10 | 
11 |   def forward(x)
12 |     @out = 1.0 / (1.0 + Numo::NMath.exp(-x))
13 |   end
14 | 
15 |   def backward(dout)
16 |     dout * (1.0 - @out) * @out
17 |   end
18 | end  


--------------------------------------------------------------------------------
/ruby/lib/simple_cbow.rb:
--------------------------------------------------------------------------------
 1 | require "numo/narray"
 2 | require "mat_mul"
 3 | require "softmax_with_loss"
 4 | require "embedding"
 5 | 
 6 | class SimpleCBow
 7 |   attr_reader :params, :grads, :word_vecs
 8 | 
 9 |   def initialize(vocab_size, hidden_size)
10 |     v, h = vocab_size, hidden_size
11 | 
12 |     # 重みの初期化
13 |     w_in = 0.01 * Numo::DFloat.new(v, h).rand
14 |     w_out = 0.01 * Numo::DFloat.new(h, v).rand
15 | 
16 |     # レイヤの生成
17 |     @in_layer0 = Embedding.new(w_in)
18 |     @in_layer1 = Embedding.new(w_in)
19 |     # @in_layer0 = MatMul.new(w_in)
20 |     # @in_layer1 = MatMul.new(w_in)
21 |     @out_layer = MatMul.new(w_out)
22 |     @loss_layer = SoftmaxWithLoss.new
23 | 
24 |     # すべての重みと勾配をリストにまとめる
25 |     layers = [@in_layer0, @in_layer1, @out_layer]
26 |     @params, @grads = layers.reduce([[], []]) do |acc, layer|
27 |       [acc[0] + layer.params, acc[1] + layer.grads]
28 |     end
29 | 
30 |     # メンバ変数に単語の分散表現を設定
31 |     @word_vecs = w_in
32 |   end
33 | 
34 |   def forward(contexts, target)
35 |     h0 = @in_layer0.forward(contexts[true, 0])
36 |     h1 = @in_layer1.forward(contexts[true, 1])
37 |     h = (h0 + h1) * 0.5
38 |     score = @out_layer.forward(h)
39 |     loss = @loss_layer.forward(score, target)
40 |     return loss
41 |   end
42 | 
43 |   def backward(dout=1)
44 |     ds = @loss_layer.backward(dout)
45 |     da = @out_layer.backward(ds)
46 |     da *= 0.5
47 |     @in_layer1.backward(da)
48 |     @in_layer0.backward(da)
49 |     nil
50 |   end
51 | end
52 | 


--------------------------------------------------------------------------------
/ruby/lib/softmax_with_loss.rb:
--------------------------------------------------------------------------------
 1 | require 'numo/narray'
 2 | 
 3 | class SoftmaxWithLoss
 4 |   def initialize(y=nil, t=nil)
 5 |     @y = y  # softmaxの出力
 6 |     @t = t  # 教師ラベル
 7 |   end
 8 | 
 9 |   def forward(x, t)
10 |     @t = t
11 |     @y = softmax(x)
12 |     
13 |     return cross_entropy_error(@y, @t)
14 |   end
15 | 
16 |   def backward(dout=1)
17 |     t = @t
18 |     y = @y
19 |     
20 |     if y.ndim == 1
21 |       t = t.reshape(1, t.size)
22 |       y = y.reshape(1, y.size)
23 |     end
24 | 
25 |     # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
26 |     if t.size == y.size
27 |       t = t.max_index(axis: 1)
28 |     end
29 | 
30 |     dx = y.copy()
31 |     dx[t].inplace - 1
32 |     dx *= dout
33 |     dx /= t.size
34 |     
35 |     return dx
36 |   end
37 | 
38 |   def softmax(x)
39 |     if x.ndim == 2
40 |       x = x - x.max(axis: 1, keepdims: true)
41 |       x = Numo::NMath.exp(x)
42 |       x /= x.sum(axis: 1, keepdims: true)
43 |     elsif x.ndim == 1
44 |       x = x - x.max
45 |       x = Numo::NMath.exp(x)
46 |       x /= x.sum
47 |     end
48 | 
49 |     return x
50 |   end
51 |   
52 |   def cross_entropy_error(y, t)
53 |     if y.ndim == 1
54 |       t = t.reshape(1, t.size)
55 |       y = y.reshape(1, y.size)
56 |     end
57 |     
58 |     # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
59 |     if t.size == y.size
60 |       t = t.max_index(axis: 1)
61 |     end
62 | 
63 |     return -1 * Numo::NMath.log(y[t] + 1e-7).sum / t.size
64 |   end
65 |   
66 |   def params
67 |     [@t, @y]
68 |   end
69 | end
70 | 


--------------------------------------------------------------------------------
/ruby/lib/time_embedding.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'numo/narray'
 4 | require 'embedding'
 5 | 
 6 | class TimeEmbedding
 7 |   attr_accessor :params, :grads
 8 | 
 9 |   def initialize(w)
10 |     @params = [w]
11 |     @grads = [w.new_zeros]
12 |   end
13 | 
14 |   def forward(idx)
15 |     w = @params.first
16 |     n, t = idx.shape
17 |     @idx = idx
18 |     out = Numo::SFloat.zeros(n, t, w.shape.last)
19 |     @layers = []
20 |     t.times do |ti|
21 |       layer = Embedding.new(w)
22 |       out[true, ti, true] = layer.forward(idx[true, ti])
23 |       @layers << layer
24 |     end
25 |     out
26 |   end
27 | 
28 |   def backward(dout)
29 |     _n, t, _d = dout.shape
30 |     w = @params.first
31 | 
32 |     grad = w.new_zeros
33 | 
34 |     (t - 1).downto(0) do |ti|
35 |       layer = @layers[ti]
36 |       layer.backward(dout[true, ti, true])
37 |       grad.inplace + layer.grads[0]
38 |     end
39 | 
40 |     @grads[0].store(grad)
41 |     nil
42 |   end
43 | end
44 | 
45 | 


--------------------------------------------------------------------------------
/ruby/lib/time_rnn.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'numo/narray'
 4 | require_relative '../lib/rnn'
 5 | 
 6 | class TimeRnn
 7 |   attr_accessor :params, :grads
 8 | 
 9 |   def initialize(wx, wh, b, stateful: false)
10 |     @params = [wx, wh, b]
11 |     @grads = [wx.new_zeros, wh.new_zeros, b.new_zeros]
12 |     @layers = nil
13 | 
14 |     @h = nil
15 |     @dh = nil
16 |     @stateful = stateful
17 |   end
18 | 
19 |   def forward(xs)
20 |     wx, wh, b = @params
21 |     n, t, d = xs.shape
22 |     d, h = wx.shape
23 | 
24 |     @layers = []
25 |     hs = Numo::SFloat.zeros(n, t, h)
26 | 
27 |     if !@stateful || @h.nil?
28 |       @h = Numo::SFloat.zeros(n, h)
29 |     end
30 | 
31 |     t.times do |ti|
32 |       layer = Rnn.new(*@params)
33 |       @h = layer.forward(xs[true, ti, true], @h)
34 |       hs[true, ti, true] = @h
35 |       @layers.append(layer)
36 |     end
37 | 
38 |     hs
39 |   end
40 | 
41 |   def backward(dhs)
42 |     wx, wh, b = @params
43 |     n, t, h = dhs.shape
44 |     d, h = wx.shape
45 | 
46 |     dxs = Numo::SFloat.zeros(n, t, d)
47 |     dh = 0
48 |     grads = [0, 0, 0]
49 | 
50 |     (t - 1).downto(0) do |ti|
51 |       layer = @layers[ti]
52 |       dx, dh = layer.backward(dhs[true, ti, true] + dh)
53 |       dxs[true, ti, true] = dx
54 | 
55 |       layer.grads.each_with_index do |grad, i|
56 |         grads[i] += grad
57 |       end
58 |     end
59 | 
60 |     grads.each_with_index do |grad, i|
61 |       @grads[i].store(grad)
62 |     end
63 | 
64 |     @dh = dh
65 | 
66 |     dxs
67 |   end
68 | 
69 |   def state=(h)
70 |     @h = h
71 |   end
72 | 
73 |   def reset_state
74 |     @h = nil
75 |   end
76 | end
77 | 


--------------------------------------------------------------------------------
/ruby/lib/trainer.rb:
--------------------------------------------------------------------------------
 1 | require 'matplotlib/pyplot'
 2 | require_relative 'util'
 3 | 
 4 | class Trainer
 5 |   def initialize(model, optimizer)
 6 |     @model = model
 7 |     @optimizer = optimizer
 8 |     @loss_list = []
 9 |     @eval_interval = nil
10 |     @current_epoch = 0
11 |   end
12 | 
13 |   def fit(x, t, max_epoch: 10, batch_size: 32, max_grad: nil, eval_interval: 20)
14 |     data_size = x.shape.first
15 |     max_iters = (data_size / batch_size).floor
16 |     @eval_interval = eval_interval
17 |     total_loss = 0
18 |     loss_count = 0
19 | 
20 |     start_time = Time.now
21 |     max_epoch.times do |epoch|
22 |       @current_epoch += 1
23 |       # Shuffle
24 |       idx = Numo::Int64.new(data_size).store((0 ... data_size).to_a.shuffle)
25 |       ex = get_at_dim_index(x, 0, idx)
26 |       et = get_at_dim_index(t, 0, idx)
27 | 
28 |       max_iters.times do |iters|
29 |         batch_range = (iters * batch_size) ... ((iters + 1) * batch_size)
30 |         batch_x = get_at_dim_index(ex, 0, batch_range)
31 |         batch_t = get_at_dim_index(et, 0, batch_range)
32 | 
33 |         # 勾配をもとめ、Optimizerでパラメータを更新
34 |         loss = @model.forward(batch_x, batch_t)
35 |         @model.backward
36 |         params, grads = remove_duplicate(@model.params, @model.grads) # 共有された重みを1つに集約
37 |         clip_grads(grads, max_grad) unless max_grad.nil?
38 |         @optimizer.update(params, grads)
39 |         total_loss += loss
40 |         loss_count += 1
41 | 
42 |         # 評価
43 |         if !eval_interval.nil? && iters % eval_interval == 0
44 |           avg_loss = total_loss / loss_count
45 |           elapsed_time = Time.now - start_time
46 |           puts "| epoch #{@current_epoch + 1} | iter #{iters + 1} / #{max_iters} | time #{elapsed_time} | loss #{avg_loss}"
47 |           @loss_list << avg_loss
48 |           total_loss = 0
49 |           loss_count = 0
50 |         end
51 |       end
52 |     end
53 |   end
54 | 
55 |   def plot(ylim = nil)
56 |     plt = Matplotlib::Pyplot
57 |     x = (0 ... @loss_list.length).to_a
58 |     plt.ylim(ylim) unless ylim.nil?
59 |     plt.plot(x, @loss_list, label: 'train')
60 |     plt.xlabel("iterations (x#{@eval_interval})")
61 |     plt.ylabel('loss')
62 |     plt.show
63 |   end
64 | end
65 | 
66 | def remove_duplicate(_params, _grads)
67 |   # パラメータ配列中の重複する重みをひとつに集約し、その重みに対応する勾配を加算する
68 |   params = _params.clone
69 |   grads = _grads.clone
70 | 
71 |   while true do
72 |     find_flg = false
73 |     l = params.length
74 |     (l - 1).times do |i|
75 |       ((i + 1) .. l).each do |j|
76 |         if params[i] && params[j] 
77 |           if params[i] == params[j]
78 |             # 重みを共有する場合
79 |             grads[i].inplace + grads[j] # 勾配を加算
80 |             find_flg = true
81 |             params.delete_at(j)
82 |             grads.delete_at(j)
83 |           elsif params[i].ndim == 2 && params[j].ndim == 2 && params[i].transpose.shape == params[j].transpose.shape && params[i].transpose == params[j]
84 |             # 転置行列として重みを共有する場合 (weight tying)
85 |             grads[i].inplace + grads[j].transpose
86 |             find_flg = true
87 |             params.delete_at(j)
88 |             grads.delete_at(j)
89 |           end
90 |         end
91 |         break if find_flg
92 |       end
93 |       break if find_flg
94 |     end
95 |     break unless find_flg
96 |   end
97 |   return params, grads
98 | end
99 | 


--------------------------------------------------------------------------------
/ruby/lib/util.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | def clip_grads(grads, max_norm)
 4 |   total_norm = 0
 5 |   grads.each { |grad| total_norm += (grad ** 2).sum }
 6 |   total_norm = Numo::NMath.sqrt(total_norm)
 7 | 
 8 |   rate = max_norm / (total_norm + 1e-6)
 9 |   if rate < 1
10 |     grads.each { |grad| grad *= rate }
11 |   end
12 | end
13 | 
14 | def preprocess(text)
15 |   text = text.downcase
16 |              .gsub('.', ' .')
17 |   words = text.split(' ')
18 | 
19 |   word_to_id = {}
20 |   id_to_word = {}
21 | 
22 |   words.each do |word|
23 |     unless word_to_id.include?(word)
24 |       new_id = word_to_id.length
25 |       word_to_id[word] = new_id
26 |       id_to_word[new_id] = word
27 |     end
28 |   end
29 | 
30 |   corpus = Numo::NArray[*words.map { |w| word_to_id[w] }]
31 | 
32 |   [corpus, word_to_id, id_to_word]
33 | end
34 | 
35 | def create_contexts_target(corpus, window_size: 1)
36 |   target = corpus[window_size...-window_size]
37 |   contexts = []
38 | 
39 |   (window_size...(corpus.length - window_size)).each do |idx|
40 |     cs = []
41 |     (-window_size..window_size).each do |t|
42 |       next if t.zero?
43 |       cs.append(corpus[idx + t])
44 |     end
45 |     contexts.append(cs)
46 |   end
47 |   n_contexts = Numo::UInt32.zeros(contexts.length, contexts[0].length)
48 |   n_contexts[] = contexts
49 | 
50 |   n_target = Numo::UInt32.zeros(target.length)
51 |   n_target[] = target
52 | 
53 |   [n_contexts, n_target]
54 | end
55 | 
56 | def convert_one_hot(corpus, vocab_size)
57 |   n = corpus.shape[0]
58 | 
59 |   if corpus.ndim == 1
60 |     one_hot = Numo::UInt32.zeros(n, vocab_size)
61 |     corpus.each_with_index do |word_id, idx|
62 |       one_hot[idx, word_id] = 1
63 |     end
64 |   elsif corpus.ndim == 2
65 |     c = corpus.shape[1]
66 |     one_hot = Numo::UInt32.zeros(n, c, vocab_size)
67 | 
68 |     n.times do |idx0|
69 |       word_ids = corpus[idx0, true]
70 |       word_ids.each_with_index do |word_id, idx1|
71 |         one_hot[idx0, idx1, word_id] = 1
72 |       end
73 |     end
74 |   end
75 | 
76 |   one_hot
77 | end
78 | 
79 | def get_at_dim_index(x, dim_no, idxs)
80 |   ind = dim_full_indices(x, dim_no, idxs)
81 |   x[*ind]
82 | end
83 | 
84 | def dim_full_indices(x, dim_no, idxs)
85 |   ind = Array.new(x.ndim, true)
86 |   ind[dim_no] = idxs
87 |   ind
88 | end
89 | 


--------------------------------------------------------------------------------
/ruby/test/affine_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "affine"
 3 | require "numo/narray"
 4 | 
 5 | class AffineTest < Test::Unit::TestCase
 6 |   def setup
 7 |     @weight = Numo::SFloat[[1,2,3],[5,8,13]] # input: 2, hidden: 3
 8 |     @bias   = Numo::SFloat[0.2, 0.3, 0.4]
 9 |     @target = Affine.new(@weight, @bias)
10 |   end
11 | 
12 |   def test_initialize
13 |     assert_equal [@weight, @bias], @target.params
14 |     assert_equal [ Numo::SFloat[[0,0,0],[0,0,0]],
15 |                    Numo::SFloat[0,0,0]
16 |                  ], @target.grads
17 |   end
18 | 
19 |   def test_forward1
20 |     x = Numo::SFloat[[3,1.5]]
21 |     # x.dot(W)     = [3*1 + 1.5*5, 3*2 + 1.5*8, 3*3 + 1.5*13]
22 |     #              = [3   + 7.5  , 6   + 12   , 9   + 19.5  ]
23 |     #              = [10.5, 18  , 28.5]
24 |     # x.dot(W) + b = [10.7, 18.3, 28.9]
25 |     assert_equal Numo::SFloat[[10.7,18.3,28.9]], @target.forward(x)
26 |   end
27 | 
28 |   def test_forward2
29 |     x = Numo::SFloat[[2,7],[3,9],[11,13]]
30 |     # x.dot(W) = [[2*1  + 7*5,  2*2  + 7*8,  2*3  + 7*13 ],
31 |     #             [3*1  + 9*5,  3*2  + 9*8,  3*3  + 9*13 ],
32 |     #             [11*1 + 13*5, 11*2 + 13*8, 11*3 + 13*13]]
33 |     #          = [[37, 60, 97], [48, 78, 126], [76, 126, 202]]
34 |     assert_equal Numo::SFloat[[37.2, 60.3, 97.4],
35 |                               [48.2, 78.3, 126.4],
36 |                               [76.2, 126.3, 202.4]], @target.forward(x)
37 |   end
38 | 
39 |   def test_backward1
40 |     x = Numo::SFloat[[2,7],[3,9],[11,13]]
41 |     dout = Numo::SFloat[[1,0,0],[0,1,0],[0,0,1]] # hidden = 3, input = 2
42 |     @target.forward(x)
43 |     dLdx = @target.backward(dout)
44 |     assert_equal @weight.transpose, dLdx
45 |     assert_equal x.transpose, @target.grads[0]
46 |     assert_equal Numo::SFloat[1,1,1], @target.grads[1]
47 |   end
48 | 
49 |   def test_backward2
50 |     x = Numo::SFloat[[2,7],[3,9],[11,13]]
51 |     dout = Numo::SFloat[[1,0.5,0.5],[0.5,1,0.5],[0.5,0.5,1]] # hidden = 3, input = 2
52 |     @target.forward(x)
53 |     dLdx = @target.backward(dout)
54 |     assert_equal Numo::SFloat[[3.5, 15.5], 
55 |                               [4, 17], 
56 |                               [4.5, 19.5]], dLdx
57 |     assert_equal Numo::SFloat[[9, 9.5, 13.5], 
58 |                               [18, 19, 21]], @target.grads[0]
59 |     assert_equal Numo::SFloat[2,2,2], @target.grads[1]
60 |   end
61 | 
62 |   def test_backward3
63 |     x = Numo::SFloat[[3, 1.5]]
64 |     dout = Numo::SFloat[[1],[0.5],[1]] # hidden = 3, input = 2
65 |     @target.forward(x)
66 |     dLdx = @target.backward(dout)
67 |     assert_equal Numo::SFloat[[6,26],[3,13],[6,26]], dLdx #dx
68 |     assert_equal Numo::SFloat[[7.5, 7.5, 7.5],[3.75, 3.75, 3.75]], @target.grads[0] #dW
69 |     assert_equal Numo::SFloat[2.5, 2.5, 2.5], @target.grads[1] #repeat
70 |   end
71 | end
72 | 


--------------------------------------------------------------------------------
/ruby/test/mat_mul_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "mat_mul"
 3 | require "numo/narray"
 4 | 
 5 | class MatMulTest < Test::Unit::TestCase
 6 |   def setup
 7 |     @weight = Numo::SFloat[[1,2,3],[5,8,13]] # input: 2, hidden: 3
 8 |     @target = MatMul.new(@weight)
 9 |   end
10 | 
11 |   def test_initialize
12 |     assert_equal [@weight], @target.params
13 |     assert_equal [ Numo::SFloat[[0,0,0],[0,0,0]] ], @target.grads
14 |   end
15 | 
16 |   def test_forward1
17 |     x = Numo::SFloat[3,1.5]
18 |     # x.dot(W)     = [3*1 + 1.5*5, 3*2 + 1.5*8, 3*3 + 1.5*13]
19 |     #              = [3   + 7.5  , 6   + 12   , 9   + 19.5  ]
20 |     #              = [10.5, 18  , 28.5]
21 |     assert_equal Numo::SFloat[10.5,18,28.5], @target.forward(x)
22 |   end
23 | 
24 |   def test_forward2
25 |     x = Numo::SFloat[[2,7],[3,9],[11,13]]
26 |     # x.dot(W) = [[2*1  + 7*5,  2*2  + 7*8,  2*3  + 7*13 ],
27 |     #             [3*1  + 9*5,  3*2  + 9*8,  3*3  + 9*13 ],
28 |     #             [11*1 + 13*5, 11*2 + 13*8, 11*3 + 13*13]]
29 |     #          = [[37, 60, 97], [48, 78, 126], [76, 126, 202]]
30 |     assert_equal Numo::SFloat[[37, 60, 97],
31 |                               [48, 78, 126],
32 |                               [76, 126, 202]], @target.forward(x)
33 |   end
34 | 
35 |   def test_backward1
36 |     x = Numo::SFloat[[2,7],[3,9],[11,13]]
37 |     dout = Numo::SFloat[[1,0,0],[0,1,0],[0,0,1]] # hidden = 3, input = 2
38 |     @target.forward(x)
39 |     dLdx = @target.backward(dout)
40 |     assert_equal @weight.transpose, dLdx
41 |     assert_equal x.transpose, @target.grads[0]
42 |   end
43 | 
44 |   def test_backward2
45 |     x = Numo::SFloat[[2,7],[3,9],[11,13]]
46 |     dout = Numo::SFloat[[1,0.5,0.5],[0.5,1,0.5],[0.5,0.5,1]] # hidden = 3, input = 2
47 |     @target.forward(x)
48 |     dLdx = @target.backward(dout)
49 |     assert_equal Numo::SFloat[[3.5, 15.5], 
50 |                               [4, 17], 
51 |                               [4.5, 19.5]], dLdx
52 |     assert_equal Numo::SFloat[[9, 9.5, 13.5], 
53 |                               [18, 19, 21]], @target.grads[0]
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/ruby/test/optimizer_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "numo/narray"
 3 | require "optimizer"
 4 | 
 5 | class SGDTest < Test::Unit::TestCase
 6 |   def setup
 7 |     @lr = 0.02
 8 |     @sgd = SGD.new(@lr)
 9 |   end
10 | 
11 |   def test_update
12 |     params = [Numo::SFloat[0.1, 0.2, 0.3], Numo::SFloat[0.4, 0.5, 0.6]]
13 |     grads = [Numo::SFloat[0.01, 0.02, 0.03], Numo::SFloat[0.04, 0.05, 0.06]]
14 | 
15 |     @sgd.update(params, grads)
16 | 
17 |     # [[0.1-0.02*0.01, 0.2-0.02*0.02, 0.3-0.02*0.03]
18 |     #  [0.4-0.02*0.04, 0.5-0.02*0.05, 0.6-0.02*0.06]]
19 |     # = [[0.0998, 0.1996, 0.2994],
20 |     #    [0.3992, 0.499, 0.5988]]
21 |     assert_equal [Numo::SFloat[0.0998, 0.1996, 0.2994],
22 |                   Numo::SFloat[0.3992, 0.499, 0.5988]],
23 |                  params
24 |   end
25 | end
26 | 
27 | 


--------------------------------------------------------------------------------
/ruby/test/rnn_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "rnn"
 3 | require "numo/narray"
 4 | 
 5 | class RnnTest < Test::Unit::TestCase
 6 |   def setup
 7 |     # N = 5, D = 2, H = 2
 8 |     @wx = Numo::SFloat[[0.1, 0.2], [0.5, 0.8]] # D x H
 9 |     @wh = Numo::SFloat[[0.4, 0.2], [0.1, 0.9]] # H x H
10 |     @b = Numo::SFloat[0.1]
11 | 
12 |     @target = Rnn.new(@wx, @wh, @b)
13 |   end
14 | 
15 |   def test_initialize
16 |     assert_equal @wx, @target.params[0]
17 |     assert_equal [
18 |                      Numo::SFloat[[0, 0], [0, 0]],
19 |                      Numo::SFloat[[0, 0], [0, 0]],
20 |                      Numo::SFloat[0]
21 |                  ], @target.grads
22 |   end
23 | 
24 |   def test_forward
25 |     x = Numo::SFloat[
26 |         [0.1, 0.4],
27 |         [0.7, 0.5],
28 |         [0.3, 0.5],
29 |         [0.3, 0.8],
30 |         [0.1, 0.9]
31 |     ] # N x D
32 |     h_prev = Numo::SFloat[
33 |         [0.3, 0.5],
34 |         [0.1, 0.4],
35 |         [0.7, 0.5],
36 |         [0.3, 0.8],
37 |         [0.2, 0.2]
38 |     ] # N x H
39 |     actual = @target.forward(x, h_prev)
40 |     expected = Numo::SFloat[
41 |         [0.446244, 0.739783],
42 |         [0.462117, 0.769867],
43 |         [0.610677, 0.817754],
44 |         [0.623065, 0.918602],
45 |         [0.578363, 0.785664]
46 |     ]
47 |     #assert_equal expected, actual
48 | 
49 |     assert_delta_array(expected, actual)
50 |   end
51 | 
52 |   def test_backward
53 |     dh_next = Numo::SFloat[
54 |         [0.3, 0.5],
55 |         [0.1, 0.4],
56 |         [0.7, 0.5],
57 |         [0.3, 0.8],
58 |         [0.2, 0.2]
59 |     ] # N x H
60 |     x = Numo::SFloat[
61 |         [0.1, 0.4],
62 |         [0.7, 0.5],
63 |         [0.3, 0.5],
64 |         [0.3, 0.8],
65 |         [0.1, 0.9]
66 |     ] # N x D
67 |     h_prev = Numo::SFloat[
68 |         [0.3, 0.5],
69 |         [0.1, 0.4],
70 |         [0.7, 0.5],
71 |         [0.3, 0.8],
72 |         [0.2, 0.2]
73 |     ] # N x H
74 |     @target.forward(x, h_prev)
75 |     actual_dx, actual_dh_prev = @target.backward(dh_next)
76 |     expected_dx = Numo::SFloat[[0.0692981, 0.301218],
77 |                                [0.0404489, 0.16966],
78 |                                [0.077023, 0.351987],
79 |                                [0.043341, 0.191718],
80 |                                [0.0286192, 0.127787]]
81 |     assert_delta_array(expected_dx, actual_dx)
82 |     expected_dh_prev = Numo::SFloat[[0.141376, 0.22775],
83 |                                     [0.0640424, 0.154494],
84 |                                     [0.208708, 0.19297],
85 |                                     [0.0984021, 0.130797],
86 |                                     [0.068549, 0.0822017]]
87 |     assert_delta_array(expected_dh_prev, actual_dh_prev)
88 |   end
89 | end
90 | 


--------------------------------------------------------------------------------
/ruby/test/simple_cbow_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "simple_cbow"
 3 | 
 4 | class SimpleCBowTest < Test::Unit::TestCase
 5 |   def setup
 6 |     @simple_cbow = SimpleCBow.new(7, 3)
 7 |   end
 8 | 
 9 |   def test_initialize
10 |     assert(true)
11 |   end
12 | 
13 |   def test_forward
14 |     contexts = Numo::NArray[[1, 0, 0, 0, 0, 0, 0],
15 |                              [0, 0, 1, 0, 0, 0, 0]]
16 |     target = Numo::NArray[0, 1, 0, 0, 0, 0, 0]
17 | 
18 |     @simple_cbow.forward(contexts, target)
19 |     assert(true)
20 |   end
21 | 
22 |   def test_backward
23 |     Numo::NArray.srand(1)
24 | 
25 |     contexts = Numo::NArray[[1, 0, 0, 0, 0, 0, 0],
26 |                              [0, 0, 1, 0, 0, 0, 0]]
27 |     target = Numo::NArray[0, 1, 0, 0, 0, 0, 0]
28 | 
29 |     @simple_cbow.forward(contexts, target)
30 |     @simple_cbow.backward
31 | 
32 |     expected = [[0.000617545, 0.00373067, 0.00794815],
33 |                    [0.00201042, 0.00116041, 0.00344032],
34 |                    [0.00539948, 0.00737815, 0.00165089],
35 |                    [0.000508827, 0.00108065, 0.000687079],
36 |                    [0.00904121, 0.00478644, 0.00342969],
37 |                    [0.00164541, 0.0074603, 0.00138994],
38 |                    [0.00411576, 0.00292532, 0.00869421]]
39 | 
40 |     @simple_cbow.word_vecs.to_a.zip(expected).each do |actual_row, expected_row|
41 |       actual_row.zip(expected_row) do |actual, expected_value|
42 |         assert_in_delta actual, expected_value, 0.00001
43 |       end
44 |     end
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/ruby/test/softmax_with_loss_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "softmax_with_loss"
 3 | require "numo/narray"
 4 | 
 5 | class SoftmaxWithLossTest < Test::Unit::TestCase
 6 |   def setup
 7 |     @target = SoftmaxWithLoss.new
 8 |     assert_equal [nil, nil], @target.params
 9 |   end
10 | 
11 |   def test_softmax1
12 |     # Softmaxの検算はこのサイトを利用しました。https://keisan.casio.jp/exec/system/1516841458
13 |     assert_in_delta 0.86681333219734, @target.softmax(Numo::SFloat[3, 7, 5])[1], 0.00001
14 |     assert_in_delta 0.86681333219734, @target.softmax(Numo::SFloat[[3, 7, 5], [1, 9, 2]])[1], 0.00001
15 |     assert_in_delta 0.99875420933679, @target.softmax(Numo::SFloat[[3, 7, 5], [1, 9, 2]])[4], 0.00001
16 |   end
17 |   
18 |   def test_softmax2
19 |     assert_in_delta 1.0, @target.softmax(Numo::SFloat[3, 7, 5]).sum, 0.00001
20 |   end
21 |   
22 |   def test_cross_entropy_error
23 |     assert_in_delta 0.51082562376, @target.cross_entropy_error(Numo::SFloat[0.3, 0.6, 0.1], Numo::SFloat[0, 1, 0]), 0.00001
24 |     assert_in_delta 0.10536051565, @target.cross_entropy_error(Numo::SFloat[0.0, 0.1, 0.9], Numo::SFloat[0, 0, 1]), 0.00001
25 |     assert_in_delta 0.308093069705, @target.cross_entropy_error(Numo::SFloat[[0.3, 0.6, 0.1], [0.0, 0.1, 0.9]], Numo::SFloat[[0, 1, 0], [0, 0, 1]]), 0.00001
26 |   end
27 | 
28 |   def test_backward
29 |     bwtarget = SoftmaxWithLoss.new(Numo::SFloat[3, 1, 9], Numo::SFloat[0, 0, 1])
30 |     a = bwtarget.backward()
31 |     assert_in_delta 3, a[0], 0.00001
32 |     assert_in_delta 1, a[1], 0.00001
33 |     assert_in_delta 8, a[2], 0.00001
34 |     bwtarget = SoftmaxWithLoss.new(Numo::SFloat[[3, 1, 9], [2, 6, 5]], Numo::SFloat[[0, 0, 1], [0, 1, 0]])
35 |     a = bwtarget.backward()
36 |     assert_in_delta 1.5, a[0], 0.00001
37 |     assert_in_delta 0.5, a[1], 0.00001
38 |     assert_in_delta 4, a[2], 0.00001
39 |     assert_in_delta 1, a[3], 0.00001
40 |     assert_in_delta 2.5, a[4], 0.00001
41 |     assert_in_delta 2.5, a[5], 0.00001
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/ruby/test/test_helper.rb:
--------------------------------------------------------------------------------
 1 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
 2 | require 'test/unit'
 3 | 
 4 | module Test
 5 |   module Unit
 6 |     module Assertions
 7 |       def assert_delta_array(expected, actual, delta = 0.00001, message = nil)
 8 |         assert_equal(expected.shape, actual.shape)
 9 |         expected.to_a.flatten.zip(actual.to_a.flatten).each do |expected_value, actual_value|
10 |           assert_in_delta expected_value, actual_value, delta, message
11 |         end
12 |       end
13 |     end
14 |   end
15 | end
16 | 
17 | 


--------------------------------------------------------------------------------
/ruby/test/time_embedding_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "time_embedding"
 3 | require "numo/narray"
 4 | 
 5 | class TimeEmbeddingTest < Test::Unit::TestCase
 6 |   def setup
 7 |     @w = Numo::SFloat[[0.20071, -0.210761, 0.21761, 0.20861, 0.203998], 
 8 |                       [-0.279034, 0.275155, -0.26858, -0.284655, -0.227953], 
 9 |                       [0.257709, -0.26606, 0.270528, 0.259538, 0.264047], 
10 |                       [-0.255427, 0.260104, -0.256653, -0.248147, -0.250488], 
11 |                       [0.245554, -0.246388, 0.249393, 0.23673, 0.24366], 
12 |                       [0.207265, -0.207376, 0.209755, 0.207179, 0.206551], 
13 |                       [-0.213308, 0.195443, -0.139508, -0.207847, 0.102623]]
14 |     @target = TimeEmbedding.new(@w)
15 |   end
16 | 
17 |   def test_initialize
18 |     assert_equal [@w], @target.params      
19 |     assert_equal [@w.new_zeros], @target.grads
20 |   end
21 | 
22 |   def test_forward
23 |     output = @target.forward(Numo::Int32[[0,1], [2,3], [4,5]])
24 |     expected = [@w[[0,1], true].to_a, @w[[2,3], true].to_a, @w[[4,5], true].to_a]
25 |     assert_equal(expected, output.to_a)
26 |   end
27 | 
28 |   def test_backward
29 |     output = @target.forward(Numo::Int32[[0,1], [2,3], [4,5]])
30 |     @target.backward(output)
31 |     expected = Numo::SFloat.zeros(7, 5)
32 |     expected = @w[0...6, true].concatenate(Numo::SFloat.zeros(1, 5))
33 |     assert_delta_array expected, @target.grads.first
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/ruby/test/time_rnn_test.rb:
--------------------------------------------------------------------------------
  1 | require "test_helper"
  2 | require "time_rnn"
  3 | require "numo/narray"
  4 | 
  5 | class TimeRNNTest < Test::Unit::TestCase
  6 |   def setup
  7 |     # D = 2, H = 2
  8 |     @wx = Numo::SFloat[[0.1, 0.2], [0.5, 0.8]] # D x H
  9 |     @wh = Numo::SFloat[[0.4, 0.2], [0.1, 0.9]] # H x H
 10 |     @b = Numo::SFloat[0.1]
 11 | 
 12 |     @target = TimeRnn.new(@wx, @wh, @b)
 13 |   end
 14 | 
 15 |   def test_initialize
 16 |     assert_equal @wx, @target.params[0]
 17 |     assert_equal [
 18 |       Numo::SFloat[[0, 0], [0, 0]],
 19 |       Numo::SFloat[[0, 0], [0, 0]],
 20 |       Numo::SFloat[0]
 21 |     ], @target.grads
 22 |   end
 23 | 
 24 |   def test_forward
 25 |     x = Numo::SFloat[  # N x T x D ( 3 x 2 x 2 )
 26 |                        [
 27 |                          [0.1, 0.4],
 28 |                          [0.7, 0.5]
 29 |                        ],
 30 |                        [
 31 |                          [0.1, 0.4],
 32 |                          [0.7, 0.5]
 33 |                        ],
 34 |                        [
 35 |                          [0.1, 0.4],
 36 |                          [0.7, 0.5]
 37 |                        ],
 38 |                     ]
 39 | 
 40 |     actual = @target.forward(x)
 41 | 
 42 |     expected = Numo::SFloat[
 43 |       [
 44 |         [0.300437, 0.413644],
 45 |         [0.523783, 0.790352]
 46 |       ],
 47 |       [
 48 |         [0.300437, 0.413644],
 49 |         [0.523783, 0.790352]
 50 |       ],
 51 |       [
 52 |         [0.300437, 0.413644],
 53 |         [0.523783, 0.790352]
 54 |       ]
 55 |     ]
 56 | 
 57 |     assert_delta_array(expected, actual)
 58 |   end
 59 | 
 60 |   def test_backward
 61 |     x = Numo::SFloat[  # N x T x D ( 3 x 2 x 2 )
 62 |                        [
 63 |                          [0.1, 0.4],
 64 |                          [0.7, 0.5]
 65 |                        ],
 66 |                        [
 67 |                          [0.1, 0.4],
 68 |                          [0.7, 0.5]
 69 |                        ],
 70 |                        [
 71 |                          [0.1, 0.4],
 72 |                          [0.7, 0.5]
 73 |                        ],
 74 |                     ]
 75 | 
 76 |     @target.forward(x)
 77 | 
 78 |     dh_next = Numo::SFloat[ # N x T x H ( 3 x 2 x 2 )
 79 |       [
 80 |         [0.3, 0.5],
 81 |         [0.7, 0.5]
 82 |       ],
 83 |       [
 84 |         [0.2, 0.2],
 85 |         [0.7, 0.5]
 86 |       ],
 87 |       [
 88 |         [0.3, 0.8],
 89 |         [0.2, 0.2]
 90 |       ]
 91 |     ]
 92 | 
 93 |     actual_dxs = @target.backward(dh_next)
 94 | 
 95 |     expected_dxs = Numo::SFloat[
 96 |       [
 97 |         [0.168503, 0.723202],
 98 |         [0.08833, 0.404116]
 99 |       ],
100 |       [
101 |         [0.109671, 0.47878],
102 |         [0.08833, 0.404116]
103 |       ],
104 |       [
105 |         [0.180169, 0.754616],
106 |         [0.0295268, 0.13262]
107 |       ]
108 |     ]
109 | 
110 |     assert_delta_array(expected_dxs, actual_dxs)
111 |   end
112 | end
113 | 


--------------------------------------------------------------------------------
/ruby/test/two_layers_net_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require_relative "../examples/ch01/two_layers_net"
 3 | require "numo/narray"
 4 | 
 5 | class TwoLayersNetTest < Test::Unit::TestCase
 6 |   def setup
 7 |     @target = TwoLayersNet.new(input_size: 3, hidden_size: 5, output_size: 4)
 8 |   end
 9 | 
10 |   def test_initialize
11 |     affine1, sigmoid, affine2 = @target.layers
12 |     assert_equal [3, 5], affine1.params[0].shape
13 |     assert_equal Sigmoid, sigmoid.class
14 |     assert_equal [5, 4], affine2.params[0].shape
15 |   end
16 | 
17 |   def test_forward
18 |     x = Numo::SFloat[[1,2,3],[4,5,6]]
19 |     t = Numo::SFloat[[1,2,3,4],[5,6,7,8]]
20 |     cross_entropy_error = @target.forward(x, t)
21 |     assert_equal Float, cross_entropy_error.class
22 |   end
23 | 
24 |   def test_backward1
25 |     x = Numo::SFloat[[1,2,3],[4,5,6]]
26 |     t = Numo::SFloat[[1,2,3,4],[5,6,7,8]]
27 |     cross_entropy_error = @target.forward(x, t)
28 |     dout = 0.8
29 |     last_dout = @target.backward(dout)
30 |     assert_equal [2,3], last_dout.shape
31 |   end
32 | 
33 |   def test_backward2
34 |     x = Numo::SFloat[1,2,3]
35 |     t = Numo::SFloat[1,2,3,4]
36 |     cross_entropy_error = @target.forward(x, t)
37 |     dout = 0.8
38 |     last_dout = @target.backward(dout)
39 |     assert_equal [1,3], last_dout.shape
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------