├── 2 ├── 00_hello_world.py ├── 01_if.py ├── 02_if_else.py ├── 03_if_elif.py ├── 04_if_elif_else.py ├── 05_while.py ├── 06_while_break.py ├── 07_while_else.py ├── 08_for.py ├── 09_for_break.py ├── 10_for_dict.py ├── 11_for_dict_key_value.py ├── 12_function_print_x2.py ├── 13_function_return_x2.py ├── 14_function_return_ax2_1.py ├── 15_function_return_ax2_2.py └── 16_class.py ├── 3 ├── 01_simple_perceptron.py ├── 02_logistic_regression.py ├── 03_multi_class_logistic_regression.py ├── 04_mlp.py ├── 05_mlp_toy_problem.py ├── 100_logistic_regression_xor.py └── models │ ├── MLP.py │ └── __init__.py ├── 4 ├── 01_mlp_toy_problem_keras.py ├── 02_mlp_toy_problem_tf.py ├── 03_mlp_toy_problem_torch.py ├── 04_mnist_keras.py ├── 05_mnist_tf.py ├── 06_mnist_torch.py ├── 07_mnist_tanh_keras.py ├── 08_mnist_tanh_tf.py ├── 09_mnist_tanh_torch.py ├── 10_mnist_relu_keras.py ├── 11_mnist_relu_tf.py ├── 12_mnist_relu_torch.py ├── 13_mnist_lrelu_keras.py ├── 14_mnist_lrelu_tf.py ├── 15_mnist_lrelu_torch.py ├── 16_mnist_swish_keras.py ├── 17_mnist_swish_tf.py ├── 18_mnist_swish_torch.py ├── 19_mnist_dropout_keras.py ├── 20_mnist_dropout_tf.py ├── 21_mnist_dropout_torch.py ├── 22_mnist_plot_keras.py ├── 23_mnist_plot_tf.py ├── 24_mnist_plot_torch.py ├── 25_mnist_early_stopping_keras.py ├── 26_mnist_early_stopping_tf.py ├── 27_mnist_early_stopping_torch.py ├── 28_mnist_batch_norm_keras.py ├── 29_mnist_batch_norm_tf.py ├── 30_mnist_batch_norm_torch.py └── callbacks │ ├── EarlyStopping.py │ └── __init__.py ├── 5 ├── 01_sin_rnn_keras.py ├── 02_sin_rnn_tf.py ├── 03_sin_rnn_torch.py ├── 04_sin_lstm_keras.py ├── 05_sin_lstm_tf.py ├── 06_sin_lstm_torch.py ├── 07_adding_problem_lstm_keras.py ├── 08_adding_problem_lstm_tf.py ├── 09_adding_problem_lstm_torch.py ├── 10_sin_gru_keras.py ├── 11_sin_gru_tf.py ├── 12_sin_gru_torch.py ├── 13_adding_problem_gru_keras.py ├── 14_adding_problem_gru_tf.py ├── 15_adding_problem_gru_torch.py ├── 16_mnist_birnn_keras.py ├── 17_mnist_birnn_tf.py ├── 18_mnist_birnn_torch.py ├── 19_imdb_birnn_keras.py ├── 20_imdb_birnn_tf.py ├── 21_imdb_birnn_torch.py ├── callbacks │ ├── EarlyStopping.py │ └── __init__.py └── sounds │ ├── sin.mp3 │ └── sin_noise.mp3 ├── 6 ├── 01_mnist_dataloader_tf.py ├── 02_mnist_dataloader_torch.py ├── 03_encoder_decoder_tf.py ├── 04_encoder_decoder_torch.py ├── 05_attention_tf.py ├── 06_attention_torch.py ├── 07_transformer_tf.py ├── 08_transformer_torch.py ├── callbacks │ ├── EarlyStopping.py │ └── __init__.py ├── data │ └── .keep ├── layers │ ├── __init__.py │ ├── tf │ │ ├── Attention.py │ │ ├── LayerNormalization.py │ │ ├── MultiHeadAttention.py │ │ ├── PositionalEncoding.py │ │ ├── ScaledDotProductAttention.py │ │ └── __init__.py │ └── torch │ │ ├── Attention.py │ │ ├── MultiHeadAttention.py │ │ ├── PositionalEncoding.py │ │ ├── ScaledDotProductAttention.py │ │ └── __init__.py └── utils │ ├── Vocab.py │ ├── __init__.py │ ├── tf │ ├── DataLoader.py │ └── __init__.py │ └── torch │ ├── DataLoader.py │ └── __init__.py ├── .gitignore ├── A ├── 01_decorator.py ├── 02_save_load_model_keras.py ├── 03_save_load_model_tf.py └── 04_save_load_model_torch.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | **/data/* 3 | !**/data/.keep 4 | 5 | output/* 6 | !output/.keep 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | -------------------------------------------------------------------------------- /2/00_hello_world.py: -------------------------------------------------------------------------------- 1 | print("hello, world!") 2 | -------------------------------------------------------------------------------- /2/01_if.py: -------------------------------------------------------------------------------- 1 | a = 10 2 | 3 | if a > 1: 4 | print("a > 1") 5 | -------------------------------------------------------------------------------- /2/02_if_else.py: -------------------------------------------------------------------------------- 1 | a = -10 2 | 3 | if a > 1: 4 | print("a > 1") 5 | else: 6 | print("a <= 1") 7 | -------------------------------------------------------------------------------- /2/03_if_elif.py: -------------------------------------------------------------------------------- 1 | a = 0 2 | 3 | if a > 1: 4 | print("a > 1") 5 | elif a > -1: 6 | print("1 >= a > -1") 7 | -------------------------------------------------------------------------------- /2/04_if_elif_else.py: -------------------------------------------------------------------------------- 1 | a = -2 2 | 3 | if a > 1: 4 | print("a > 1") 5 | elif a > -1: 6 | print("1 >= a > -1") 7 | elif a > -3: 8 | print("-1 >= a > -3") 9 | else: 10 | print("a <= -3") 11 | -------------------------------------------------------------------------------- /2/05_while.py: -------------------------------------------------------------------------------- 1 | a = 5 2 | 3 | while a > 0: 4 | print("a =", a) 5 | a -= 1 6 | -------------------------------------------------------------------------------- /2/06_while_break.py: -------------------------------------------------------------------------------- 1 | a = 5 2 | 3 | while a > 0: 4 | print("a =", a) 5 | a -= 1 6 | 7 | if a == 4: 8 | break 9 | -------------------------------------------------------------------------------- /2/07_while_else.py: -------------------------------------------------------------------------------- 1 | a = 5 2 | 3 | while a > 0: 4 | print("a =", a) 5 | a -= 1 6 | else: 7 | print("end of while") 8 | -------------------------------------------------------------------------------- /2/08_for.py: -------------------------------------------------------------------------------- 1 | data = [0, 1, 2, 3, 4, 5] 2 | 3 | for x in data: 4 | print(x, end=' ') 5 | -------------------------------------------------------------------------------- /2/09_for_break.py: -------------------------------------------------------------------------------- 1 | data = [0, 1, 2, 3, 4, 5] 2 | 3 | for x in data: 4 | print(x, end=' ') 5 | 6 | if x == 1: 7 | break 8 | -------------------------------------------------------------------------------- /2/10_for_dict.py: -------------------------------------------------------------------------------- 1 | data = {'tokyo': 1, 'new york': 2} 2 | 3 | for x in data: 4 | print(x) 5 | -------------------------------------------------------------------------------- /2/11_for_dict_key_value.py: -------------------------------------------------------------------------------- 1 | data = {'tokyo': 1, 'new york': 2} 2 | 3 | for key, value in data.items(): 4 | print(key, end=': ') 5 | print(value) 6 | -------------------------------------------------------------------------------- /2/12_function_print_x2.py: -------------------------------------------------------------------------------- 1 | def f(x): 2 | print(x ** 2) 3 | 4 | 5 | f(1) 6 | f(2) 7 | f(3) 8 | -------------------------------------------------------------------------------- /2/13_function_return_x2.py: -------------------------------------------------------------------------------- 1 | def f(x): 2 | return x ** 2 3 | 4 | 5 | print(f(1) + f(2)) 6 | -------------------------------------------------------------------------------- /2/14_function_return_ax2_1.py: -------------------------------------------------------------------------------- 1 | def f(x, a): 2 | return a * x ** 2, 2 * a * x 3 | 4 | 5 | y, y_prime = f(1, 2) 6 | 7 | print(y) 8 | print(y_prime) 9 | -------------------------------------------------------------------------------- /2/15_function_return_ax2_2.py: -------------------------------------------------------------------------------- 1 | def f(x, a=2): 2 | return a * x ** 2, 2 * a * x 3 | 4 | 5 | y, y_prime = f(1) 6 | 7 | print(y) 8 | print(y_prime) 9 | -------------------------------------------------------------------------------- /2/16_class.py: -------------------------------------------------------------------------------- 1 | class Company: 2 | def __init__(self, sales, cost, persons): 3 | self.sales = sales 4 | self.cost = cost 5 | self.persons = persons 6 | 7 | def get_profit(self): 8 | return self.sales - self.cost 9 | 10 | 11 | company_A = Company(100, 80, 10) 12 | company_B = Company(40, 60, 20) 13 | 14 | print(company_A.sales) 15 | print(company_A.get_profit()) 16 | 17 | company_A.sales = 80 18 | print(company_A.sales) 19 | -------------------------------------------------------------------------------- /3/01_simple_perceptron.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3.3.2 単純パーセプトロン 3 | ''' 4 | 5 | import numpy as np 6 | 7 | 8 | class SimplePerceptron(object): 9 | ''' 10 | 単純パーセプトロン 11 | ''' 12 | def __init__(self, input_dim): 13 | self.input_dim = input_dim 14 | self.w = np.random.normal(size=(input_dim,)) 15 | self.b = 0. 16 | 17 | def forward(self, x): 18 | y = step(np.matmul(self.w, x) + self.b) 19 | return y 20 | 21 | def compute_deltas(self, x, t): 22 | y = self.forward(x) 23 | delta = y - t 24 | dw = delta * x 25 | db = delta 26 | 27 | return dw, db 28 | 29 | 30 | def step(x): 31 | return 1 * (x > 0) 32 | 33 | 34 | if __name__ == '__main__': 35 | np.random.seed(123) # 乱数シード 36 | 37 | ''' 38 | 1. データの準備 39 | ''' 40 | d = 2 # 入力次元 41 | N = 20 # 全データ数 42 | 43 | mean = 5 44 | 45 | x1 = np.random.randn(N//2, d) + np.array([0, 0]) 46 | x2 = np.random.randn(N//2, d) + np.array([mean, mean]) 47 | 48 | t1 = np.zeros(N//2) 49 | t2 = np.ones(N//2) 50 | 51 | x = np.concatenate((x1, x2), axis=0) # 入力データ 52 | t = np.concatenate((t1, t2)) # 出力データ 53 | 54 | ''' 55 | 2. モデルの構築 56 | ''' 57 | model = SimplePerceptron(input_dim=d) 58 | 59 | ''' 60 | 3. モデルの学習 61 | ''' 62 | def compute_loss(dw, db): 63 | return all(dw == 0) * (db == 0) 64 | 65 | def train_step(x, t): 66 | dw, db = model.compute_deltas(x, t) 67 | loss = compute_loss(dw, db) 68 | model.w = model.w - dw 69 | model.b = model.b - db 70 | 71 | return loss 72 | 73 | while True: 74 | classified = True 75 | for i in range(N): 76 | loss = train_step(x[i], t[i]) 77 | classified *= loss 78 | if classified: 79 | break 80 | 81 | ''' 82 | 4. モデルの評価 83 | ''' 84 | print('w:', model.w) # => w: [1.660725 1.49465147] 85 | print('b:', model.b) # => b: -10.0 86 | 87 | print('(0, 0) =>', model.forward([0, 0])) # => 0 88 | print('(5, 5) =>', model.forward([5, 5])) # => 1 89 | -------------------------------------------------------------------------------- /3/02_logistic_regression.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3.4.3 ロジスティック回帰 3 | ''' 4 | 5 | import numpy as np 6 | 7 | 8 | class LogisticRegression(object): 9 | ''' 10 | ロジスティック回帰 11 | ''' 12 | def __init__(self, input_dim): 13 | self.input_dim = input_dim 14 | self.w = np.random.normal(size=(input_dim,)) 15 | self.b = 0. 16 | 17 | def __call__(self, x): 18 | return self.forward(x) 19 | 20 | def forward(self, x): 21 | return sigmoid(np.matmul(x, self.w) + self.b) 22 | 23 | def compute_gradients(self, x, t): 24 | y = self.forward(x) 25 | delta = y - t 26 | dw = np.matmul(x.T, delta) 27 | db = np.matmul(np.ones(x.shape[0]), delta) 28 | 29 | return dw, db 30 | 31 | 32 | def sigmoid(x): 33 | return 1 / (1 + np.exp(-x)) 34 | 35 | 36 | if __name__ == '__main__': 37 | np.random.seed(123) 38 | 39 | ''' 40 | 1. データの準備 41 | ''' 42 | # OR 43 | x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) 44 | t = np.array([0, 1, 1, 1]) 45 | 46 | ''' 47 | 2. モデルの構築 48 | ''' 49 | model = LogisticRegression(input_dim=2) 50 | 51 | ''' 52 | 3. モデルの学習 53 | ''' 54 | def compute_loss(t, y): 55 | return (-t * np.log(y) - (1 - t) * np.log(1 - y)).sum() 56 | 57 | def train_step(x, t): 58 | dw, db = model.compute_gradients(x, t) 59 | model.w = model.w - 0.1 * dw 60 | model.b = model.b - 0.1 * db 61 | loss = compute_loss(t, model(x)) 62 | return loss 63 | 64 | epochs = 100 65 | 66 | for epoch in range(epochs): 67 | train_loss = train_step(x, t) # バッチ学習 68 | 69 | if epoch % 10 == 0 or epoch == epochs - 1: 70 | print('epoch: {}, loss: {:.3f}'.format( 71 | epoch+1, 72 | train_loss 73 | )) 74 | 75 | ''' 76 | 4. モデルの評価 77 | ''' 78 | for input in x: 79 | print('{} => {:.3f}'.format(input, model(input))) 80 | -------------------------------------------------------------------------------- /3/03_multi_class_logistic_regression.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3.5.3 (多クラス)ロジスティック回帰 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | 8 | 9 | class LogisticRegression(object): 10 | ''' 11 | (多クラス)ロジスティック回帰 12 | ''' 13 | def __init__(self, input_dim, output_dim): 14 | self.input_dim = input_dim 15 | self.W = np.random.normal(size=(input_dim, output_dim)) 16 | self.b = np.zeros(output_dim) 17 | 18 | def __call__(self, x): 19 | return self.forward(x) 20 | 21 | def forward(self, x): 22 | return softmax(np.matmul(x, self.W) + self.b) 23 | 24 | def compute_gradients(self, x, t): 25 | y = self.forward(x) 26 | delta = y - t 27 | dW = np.matmul(x.T, delta) 28 | db = np.matmul(np.ones(x.shape[0]), delta) 29 | 30 | return dW, db 31 | 32 | 33 | def softmax(x): 34 | return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) 35 | 36 | 37 | if __name__ == '__main__': 38 | np.random.seed(123) 39 | 40 | ''' 41 | 1. データの準備 42 | ''' 43 | M = 2 # 入力データの次元 44 | K = 3 # クラス数 45 | n = 100 # クラスごとのデータ数 46 | N = n * K # 全データ数 47 | 48 | x1 = np.random.randn(n, M) + np.array([0, 10]) 49 | x2 = np.random.randn(n, M) + np.array([5, 5]) 50 | x3 = np.random.randn(n, M) + np.array([10, 0]) 51 | t1 = np.array([[1, 0, 0] for i in range(n)]) 52 | t2 = np.array([[0, 1, 0] for i in range(n)]) 53 | t3 = np.array([[0, 0, 1] for i in range(n)]) 54 | 55 | x = np.concatenate((x1, x2, x3), axis=0) 56 | t = np.concatenate((t1, t2, t3), axis=0) 57 | 58 | ''' 59 | 2. モデルの構築 60 | ''' 61 | model = LogisticRegression(input_dim=M, 62 | output_dim=K) 63 | 64 | ''' 65 | 3. モデルの学習 66 | ''' 67 | def compute_loss(t, y): 68 | return (-t * np.log(y)).sum(axis=1).mean() 69 | 70 | def train_step(x, t): 71 | dW, db = model.compute_gradients(x, t) 72 | model.W = model.W - 0.1 * dW 73 | model.b = model.b - 0.1 * db 74 | loss = compute_loss(t, model(x)) 75 | return loss 76 | 77 | epochs = 10 78 | batch_size = 50 79 | n_batches = x.shape[0] // batch_size 80 | 81 | for epoch in range(epochs): 82 | train_loss = 0. 83 | x_, t_ = shuffle(x, t) 84 | 85 | for n_batch in range(n_batches): 86 | start = n_batch * batch_size 87 | end = start + batch_size 88 | 89 | train_loss += train_step(x_[start:end], 90 | t_[start:end]) 91 | 92 | if epoch % 10 == 0 or epoch == epochs - 1: 93 | print('epoch: {}, loss: {:.3f}'.format( 94 | epoch+1, 95 | train_loss 96 | )) 97 | 98 | ''' 99 | 4. モデルの評価 100 | ''' 101 | x_, t_ = shuffle(x, t) 102 | preds = model(x_[0:5]) 103 | classified = \ 104 | np.argmax(t_[0:5], axis=1) == np.argmax(preds[0:5], axis=1) 105 | print('Prediction matched:', classified) 106 | -------------------------------------------------------------------------------- /3/04_mlp.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3.6.3 多層パーセプトロン 3 | ''' 4 | 5 | import numpy as np 6 | 7 | 8 | class MLP(object): 9 | ''' 10 | 多層パーセプトロン 11 | ''' 12 | def __init__(self, input_dim, hidden_dim, output_dim): 13 | self.l1 = Layer(input_dim=input_dim, 14 | output_dim=hidden_dim, 15 | activation=sigmoid, 16 | dactivation=dsigmoid) 17 | 18 | self.l2 = Layer(input_dim=hidden_dim, 19 | output_dim=output_dim, 20 | activation=sigmoid, 21 | dactivation=dsigmoid) 22 | 23 | self.layers = [self.l1, self.l2] 24 | 25 | def __call__(self, x): 26 | return self.forward(x) 27 | 28 | def forward(self, x): 29 | h = self.l1(x) 30 | y = self.l2(h) 31 | return y 32 | 33 | 34 | class Layer(object): 35 | ''' 36 | 層間の結合 37 | ''' 38 | def __init__(self, input_dim, output_dim, 39 | activation, dactivation): 40 | self.W = np.random.normal(size=(input_dim, output_dim)) 41 | self.b = np.zeros(output_dim) 42 | 43 | self.activation = activation 44 | self.dactivation = dactivation 45 | 46 | def __call__(self, x): 47 | return self.forward(x) 48 | 49 | def forward(self, x): 50 | self._input = x 51 | self._pre_activation = np.matmul(x, self.W) + self.b 52 | return self.activation(self._pre_activation) 53 | 54 | def backward(self, delta, W): 55 | delta = self.dactivation(self._pre_activation) \ 56 | * np.matmul(delta, W.T) 57 | return delta 58 | 59 | def compute_gradients(self, delta): 60 | dW = np.matmul(self._input.T, delta) 61 | db = np.matmul(np.ones(self._input.shape[0]), delta) 62 | 63 | return dW, db 64 | 65 | 66 | def sigmoid(x): 67 | return 1 / (1 + np.exp(-x)) 68 | 69 | 70 | def dsigmoid(x): 71 | return sigmoid(x) * (1 - sigmoid(x)) 72 | 73 | 74 | if __name__ == '__main__': 75 | np.random.seed(123) 76 | 77 | ''' 78 | 1. データの準備 79 | ''' 80 | # XOR 81 | x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) 82 | t = np.array([[0], [1], [1], [0]]) 83 | 84 | ''' 85 | 2. モデルの構築 86 | ''' 87 | model = MLP(2, 2, 1) 88 | 89 | ''' 90 | 3. モデルの学習 91 | ''' 92 | def compute_loss(t, y): 93 | return (-t * np.log(y) - (1 - t) * np.log(1 - y)).sum() 94 | 95 | def train_step(x, t): 96 | y = model(x) 97 | for i, layer in enumerate(model.layers[::-1]): 98 | if i == 0: 99 | delta = y - t 100 | else: 101 | delta = layer.backward(delta, W) 102 | 103 | dW, db = layer.compute_gradients(delta) 104 | layer.W = layer.W - 0.1 * dW 105 | layer.b = layer.b - 0.1 * db 106 | 107 | W = layer.W 108 | 109 | loss = compute_loss(t, y) 110 | return loss 111 | 112 | epochs = 1000 113 | 114 | for epoch in range(epochs): 115 | train_loss = train_step(x, t) 116 | 117 | if epoch % 100 == 0 or epoch == epochs - 1: 118 | print('epoch: {}, loss: {:.3f}'.format( 119 | epoch+1, 120 | train_loss 121 | )) 122 | 123 | ''' 124 | 4. モデルの評価 125 | ''' 126 | for input in x: 127 | print('{} => {:.3f}'.format(input, model(input)[0])) 128 | -------------------------------------------------------------------------------- /3/05_mlp_toy_problem.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3.7.3 簡単な実験 3 | ''' 4 | 5 | import numpy as np 6 | from models import MLP 7 | from sklearn import datasets 8 | from sklearn.utils import shuffle 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.metrics import accuracy_score 11 | 12 | 13 | if __name__ == '__main__': 14 | np.random.seed(123) 15 | 16 | ''' 17 | 1. データの準備 18 | ''' 19 | N = 300 20 | x, t = datasets.make_moons(N, noise=0.3) 21 | t = t.reshape(N, 1) 22 | 23 | x_train, x_test, t_train, t_test = \ 24 | train_test_split(x, t, test_size=0.2) 25 | 26 | ''' 27 | 2. モデルの構築 28 | ''' 29 | # model = MLP(2, 2, 1) 30 | model = MLP(2, 3, 1) 31 | 32 | ''' 33 | 3. モデルの学習 34 | ''' 35 | def compute_loss(t, y): 36 | return (-t * np.log(y) - (1 - t) * np.log(1 - y)).sum() 37 | 38 | def train_step(x, t): 39 | y = model(x) 40 | for i, layer in enumerate(model.layers[::-1]): 41 | if i == 0: 42 | delta = y - t 43 | else: 44 | delta = layer.backward(delta, W) 45 | 46 | dW, db = layer.compute_gradients(delta) 47 | layer.W = layer.W - 0.1 * dW 48 | layer.b = layer.b - 0.1 * db 49 | 50 | W = layer.W 51 | 52 | loss = compute_loss(t, y) 53 | return loss 54 | 55 | epochs = 100 56 | batch_size = 30 57 | n_batches = x_train.shape[0] // batch_size 58 | 59 | for epoch in range(epochs): 60 | train_loss = 0. 61 | x_, t_ = shuffle(x_train, t_train) 62 | 63 | for n_batch in range(n_batches): 64 | start = n_batch * batch_size 65 | end = start + batch_size 66 | 67 | train_loss += train_step(x_[start:end], 68 | t_[start:end]) 69 | 70 | if epoch % 10 == 0 or epoch == epochs - 1: 71 | print('epoch: {}, loss: {:.3f}'.format( 72 | epoch+1, 73 | train_loss 74 | )) 75 | 76 | ''' 77 | 4. モデルの評価 78 | ''' 79 | preds = model(x_test) > 0.5 80 | acc = accuracy_score(t_test, preds) 81 | print('acc.: {:.3f}'.format(acc)) 82 | -------------------------------------------------------------------------------- /3/100_logistic_regression_xor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3.6.1.1 XOR - ロジスティック回帰によるXORの学習の試み 3 | ''' 4 | 5 | import numpy as np 6 | 7 | 8 | class LogisticRegression(object): 9 | ''' 10 | ロジスティック回帰 11 | ''' 12 | def __init__(self, input_dim): 13 | self.input_dim = input_dim 14 | self.w = np.random.normal(size=(input_dim,)) 15 | self.b = 0. 16 | 17 | def __call__(self, x): 18 | return self.forward(x) 19 | 20 | def forward(self, x): 21 | return sigmoid(np.matmul(x, self.w) + self.b) 22 | 23 | def compute_gradients(self, x, t): 24 | y = self.forward(x) 25 | delta = y - t 26 | dw = np.matmul(x.T, delta) 27 | db = np.matmul(np.ones(x.shape[0]), delta) 28 | 29 | return dw, db 30 | 31 | 32 | def sigmoid(x): 33 | return 1 / (1 + np.exp(-x)) 34 | 35 | 36 | if __name__ == '__main__': 37 | np.random.seed(123) 38 | 39 | ''' 40 | 1. データの準備 41 | ''' 42 | # XOR 43 | x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) 44 | t = np.array([0, 1, 1, 0]) 45 | 46 | ''' 47 | 2. モデルの構築 48 | ''' 49 | model = LogisticRegression(input_dim=2) 50 | 51 | ''' 52 | 3. モデルの学習 53 | ''' 54 | def compute_loss(t, y): 55 | return (-t * np.log(y) - (1 - t) * np.log(1 - y)).sum() 56 | 57 | def train_step(x, t): 58 | dw, db = model.compute_gradients(x, t) 59 | model.w = model.w - 0.1 * dw 60 | model.b = model.b - 0.1 * db 61 | loss = compute_loss(t, model(x)) 62 | return loss 63 | 64 | epochs = 100 65 | 66 | for epoch in range(epochs): 67 | train_loss = train_step(x, t) # バッチ学習 68 | 69 | if epoch % 10 == 0 or epoch == epochs - 1: 70 | print('epoch: {}, loss: {:.3f}'.format( 71 | epoch+1, 72 | train_loss 73 | )) 74 | 75 | ''' 76 | 4. モデルの評価 77 | ''' 78 | for input in x: 79 | print('{} => {:.3f}'.format(input, model(input))) 80 | -------------------------------------------------------------------------------- /3/models/MLP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class MLP(object): 5 | ''' 6 | 多層パーセプトロン 7 | ''' 8 | def __init__(self, input_dim, hidden_dim, output_dim): 9 | self.l1 = Layer(input_dim=input_dim, 10 | output_dim=hidden_dim, 11 | activation=sigmoid, 12 | dactivation=dsigmoid) 13 | 14 | self.l2 = Layer(input_dim=hidden_dim, 15 | output_dim=output_dim, 16 | activation=sigmoid, 17 | dactivation=dsigmoid) 18 | 19 | self.layers = [self.l1, self.l2] 20 | 21 | def __call__(self, x): 22 | return self.forward(x) 23 | 24 | def forward(self, x): 25 | h = self.l1(x) 26 | y = self.l2(h) 27 | return y 28 | 29 | 30 | class Layer(object): 31 | ''' 32 | 層間の結合 33 | ''' 34 | def __init__(self, input_dim, output_dim, 35 | activation, dactivation): 36 | self.W = np.random.normal(size=(input_dim, output_dim)) 37 | self.b = np.zeros(output_dim) 38 | 39 | self.activation = activation 40 | self.dactivation = dactivation 41 | 42 | def __call__(self, x): 43 | return self.forward(x) 44 | 45 | def forward(self, x): 46 | self._input = x 47 | self._pre_activation = np.matmul(x, self.W) + self.b 48 | return self.activation(self._pre_activation) 49 | 50 | def backward(self, delta, W): 51 | delta = self.dactivation(self._pre_activation) \ 52 | * np.matmul(delta, W.T) 53 | return delta 54 | 55 | def compute_gradients(self, delta): 56 | dW = np.matmul(self._input.T, delta) 57 | db = np.matmul(np.ones(self._input.shape[0]), delta) 58 | 59 | return dW, db 60 | 61 | 62 | def sigmoid(x): 63 | return 1 / (1 + np.exp(-x)) 64 | 65 | 66 | def dsigmoid(x): 67 | return sigmoid(x) * (1 - sigmoid(x)) 68 | -------------------------------------------------------------------------------- /3/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .MLP import MLP 2 | -------------------------------------------------------------------------------- /4/01_mlp_toy_problem_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.1.1 Keras(トイ・プロブレム) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn import datasets 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras.models import Sequential 10 | from tensorflow.keras.layers import Dense 11 | from tensorflow.keras import optimizers 12 | 13 | 14 | if __name__ == '__main__': 15 | np.random.seed(123) 16 | tf.random.set_seed(123) 17 | 18 | ''' 19 | 1. データの準備 20 | ''' 21 | N = 300 22 | x, t = datasets.make_moons(N, noise=0.3) 23 | t = t.reshape(N, 1) 24 | 25 | x_train, x_test, t_train, t_test = \ 26 | train_test_split(x, t, test_size=0.2) 27 | 28 | ''' 29 | 2. モデルの構築 30 | ''' 31 | model = Sequential() 32 | model.add(Dense(3, activation='sigmoid')) 33 | model.add(Dense(1, activation='sigmoid')) 34 | 35 | ''' 36 | 3. モデルの学習 37 | ''' 38 | optimizer = optimizers.SGD(learning_rate=0.1) 39 | model.compile(optimizer=optimizer, loss='binary_crossentropy', 40 | metrics=['accuracy']) 41 | 42 | model.fit(x_train, t_train, 43 | epochs=100, batch_size=10, 44 | verbose=1) 45 | 46 | ''' 47 | 4. モデルの評価 48 | ''' 49 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 50 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 51 | loss, 52 | acc 53 | )) 54 | -------------------------------------------------------------------------------- /4/02_mlp_toy_problem_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.1.2 TensorFlow(トイ・プロブレム) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn import datasets 7 | from sklearn.utils import shuffle 8 | from sklearn.model_selection import train_test_split 9 | import tensorflow as tf 10 | from tensorflow.keras.models import Model 11 | from tensorflow.keras.layers import Dense 12 | from tensorflow.keras import optimizers 13 | from tensorflow.keras import losses 14 | from tensorflow.keras import metrics 15 | 16 | 17 | class MLP(Model): 18 | ''' 19 | 多層パーセプトロン 20 | ''' 21 | def __init__(self, hidden_dim, output_dim): 22 | super().__init__() 23 | self.l1 = Dense(hidden_dim, activation='sigmoid') 24 | self.l2 = Dense(output_dim, activation='sigmoid') 25 | 26 | def call(self, x): 27 | h = self.l1(x) 28 | y = self.l2(h) 29 | 30 | return y 31 | 32 | 33 | if __name__ == '__main__': 34 | np.random.seed(123) 35 | tf.random.set_seed(123) 36 | 37 | ''' 38 | 1. データの準備 39 | ''' 40 | N = 300 41 | x, t = datasets.make_moons(N, noise=0.3) 42 | t = t.reshape(N, 1) 43 | 44 | x_train, x_test, t_train, t_test = \ 45 | train_test_split(x, t, test_size=0.2) 46 | 47 | ''' 48 | 2. モデルの構築 49 | ''' 50 | model = MLP(3, 1) 51 | 52 | ''' 53 | 3. モデルの学習 54 | ''' 55 | criterion = losses.BinaryCrossentropy() 56 | optimizer = optimizers.SGD(learning_rate=0.1) 57 | 58 | def compute_loss(t, y): 59 | return criterion(t, y) 60 | 61 | def train_step(x, t): 62 | with tf.GradientTape() as tape: 63 | preds = model(x) 64 | loss = compute_loss(t, preds) 65 | grads = tape.gradient(loss, model.trainable_variables) 66 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 67 | 68 | return loss 69 | 70 | epochs = 100 71 | batch_size = 10 72 | n_batches = x_train.shape[0] // batch_size 73 | 74 | for epoch in range(epochs): 75 | train_loss = 0. 76 | x_, t_ = shuffle(x_train, t_train) 77 | 78 | for batch in range(n_batches): 79 | start = batch * batch_size 80 | end = start + batch_size 81 | loss = train_step(x_[start:end], t_[start:end]) 82 | train_loss += loss.numpy() 83 | 84 | print('epoch: {}, loss: {:.3}'.format( 85 | epoch+1, 86 | train_loss 87 | )) 88 | 89 | ''' 90 | 4. モデルの評価 91 | ''' 92 | test_loss = metrics.Mean() 93 | test_acc = metrics.BinaryAccuracy() 94 | 95 | def test_step(x, t): 96 | preds = model(x) 97 | loss = compute_loss(t, preds) 98 | test_loss(loss) 99 | test_acc(t, preds) 100 | 101 | return loss 102 | 103 | test_step(x_test, t_test) 104 | 105 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 106 | test_loss.result(), 107 | test_acc.result() 108 | )) 109 | -------------------------------------------------------------------------------- /4/03_mlp_toy_problem_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.1.3 PyTorch(トイ・プロブレム) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn import datasets 7 | from sklearn.utils import shuffle 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.metrics import accuracy_score 10 | import torch 11 | import torch.nn as nn 12 | import torch.optim as optimizers 13 | 14 | 15 | class MLP(nn.Module): 16 | ''' 17 | 多層パーセプトロン 18 | ''' 19 | def __init__(self, input_dim, hidden_dim, output_dim): 20 | super().__init__() 21 | self.l1 = nn.Linear(input_dim, hidden_dim) 22 | self.a1 = nn.Sigmoid() 23 | self.l2 = nn.Linear(hidden_dim, output_dim) 24 | self.a2 = nn.Sigmoid() 25 | 26 | self.layers = [self.l1, self.a1, self.l2, self.a2] 27 | 28 | def forward(self, x): 29 | for layer in self.layers: 30 | x = layer(x) 31 | 32 | return x 33 | 34 | 35 | if __name__ == '__main__': 36 | np.random.seed(123) 37 | torch.manual_seed(123) 38 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 39 | 40 | ''' 41 | 1. データの準備 42 | ''' 43 | N = 300 44 | x, t = datasets.make_moons(N, noise=0.3) 45 | t = t.reshape(N, 1) 46 | 47 | x_train, x_test, t_train, t_test = \ 48 | train_test_split(x, t, test_size=0.2) 49 | 50 | ''' 51 | 2. モデルの構築 52 | ''' 53 | model = MLP(2, 3, 1).to(device) 54 | 55 | ''' 56 | 3. モデルの学習 57 | ''' 58 | criterion = nn.BCELoss() 59 | optimizer = optimizers.SGD(model.parameters(), lr=0.1) 60 | 61 | def compute_loss(t, y): 62 | return criterion(y, t) 63 | 64 | def train_step(x, t): 65 | model.train() 66 | preds = model(x) 67 | loss = compute_loss(t, preds) 68 | optimizer.zero_grad() 69 | loss.backward() 70 | optimizer.step() 71 | 72 | return loss 73 | 74 | epochs = 100 75 | batch_size = 10 76 | n_batches = x_train.shape[0] // batch_size 77 | 78 | for epoch in range(epochs): 79 | train_loss = 0. 80 | x_, t_ = shuffle(x_train, t_train) 81 | x_ = torch.Tensor(x_).to(device) 82 | t_ = torch.Tensor(t_).to(device) 83 | 84 | for n_batch in range(n_batches): 85 | start = n_batch * batch_size 86 | end = start + batch_size 87 | loss = train_step(x_[start:end], t_[start:end]) 88 | train_loss += loss.item() 89 | 90 | print('epoch: {}, loss: {:.3}'.format( 91 | epoch+1, 92 | train_loss 93 | )) 94 | 95 | ''' 96 | 4. モデルの評価 97 | ''' 98 | def test_step(x, t): 99 | x = torch.Tensor(x).to(device) 100 | t = torch.Tensor(t).to(device) 101 | model.eval() 102 | preds = model(x) 103 | loss = compute_loss(t, preds) 104 | 105 | return loss, preds 106 | 107 | loss, preds = test_step(x_test, t_test) 108 | test_loss = loss.item() 109 | preds = preds.data.cpu().numpy() > 0.5 110 | test_acc = accuracy_score(t_test, preds) 111 | 112 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 113 | test_loss, 114 | test_acc 115 | )) 116 | -------------------------------------------------------------------------------- /4/04_mnist_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.2.4.1 Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras import datasets 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense 10 | 11 | 12 | if __name__ == '__main__': 13 | np.random.seed(123) 14 | tf.random.set_seed(123) 15 | 16 | ''' 17 | 1. データの準備 18 | ''' 19 | mnist = datasets.mnist 20 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 21 | 22 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 23 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 24 | t_train = np.eye(10)[t_train].astype(np.float32) 25 | t_test = np.eye(10)[t_test].astype(np.float32) 26 | 27 | ''' 28 | 2. モデルの構築 29 | ''' 30 | model = Sequential() 31 | model.add(Dense(200, activation='sigmoid')) 32 | model.add(Dense(200, activation='sigmoid')) 33 | model.add(Dense(200, activation='sigmoid')) 34 | model.add(Dense(10, activation='softmax')) 35 | 36 | ''' 37 | 3. モデルの学習 38 | ''' 39 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 40 | metrics=['accuracy']) 41 | 42 | model.fit(x_train, t_train, 43 | epochs=30, batch_size=100, 44 | verbose=2) 45 | 46 | ''' 47 | 4. モデルの評価 48 | ''' 49 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 50 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 51 | loss, 52 | acc 53 | )) 54 | -------------------------------------------------------------------------------- /4/05_mnist_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.2.4.2 TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Dense 11 | from tensorflow.keras import optimizers 12 | from tensorflow.keras import losses 13 | from tensorflow.keras import metrics 14 | 15 | 16 | class DNN(Model): 17 | def __init__(self, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = Dense(hidden_dim, activation='sigmoid') 20 | self.l2 = Dense(hidden_dim, activation='sigmoid') 21 | self.l3 = Dense(hidden_dim, activation='sigmoid') 22 | self.l4 = Dense(output_dim, activation='softmax') 23 | 24 | self.ls = [self.l1, self.l2, self.l3, self.l4] 25 | 26 | def call(self, x): 27 | for layer in self.ls: 28 | x = layer(x) 29 | 30 | return x 31 | 32 | 33 | if __name__ == '__main__': 34 | np.random.seed(123) 35 | tf.random.set_seed(123) 36 | 37 | ''' 38 | 1. データの準備 39 | ''' 40 | mnist = datasets.mnist 41 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 42 | 43 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 44 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 45 | t_train = np.eye(10)[t_train].astype(np.float32) 46 | t_test = np.eye(10)[t_test].astype(np.float32) 47 | 48 | ''' 49 | 2. モデルの構築 50 | ''' 51 | model = DNN(200, 10) 52 | 53 | ''' 54 | 3. モデルの学習 55 | ''' 56 | criterion = losses.CategoricalCrossentropy() 57 | optimizer = optimizers.SGD(learning_rate=0.01) 58 | train_loss = metrics.Mean() 59 | train_acc = metrics.CategoricalAccuracy() 60 | 61 | def compute_loss(t, y): 62 | return criterion(t, y) 63 | 64 | def train_step(x, t): 65 | with tf.GradientTape() as tape: 66 | preds = model(x) 67 | loss = compute_loss(t, preds) 68 | grads = tape.gradient(loss, model.trainable_variables) 69 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 70 | train_loss(loss) 71 | train_acc(t, preds) 72 | 73 | return loss 74 | 75 | epochs = 30 76 | batch_size = 100 77 | n_batches = x_train.shape[0] // batch_size 78 | 79 | for epoch in range(epochs): 80 | x_, t_ = shuffle(x_train, t_train) 81 | 82 | for batch in range(n_batches): 83 | start = batch * batch_size 84 | end = start + batch_size 85 | train_step(x_[start:end], t_[start:end]) 86 | 87 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 88 | epoch+1, 89 | train_loss.result(), 90 | train_acc.result() 91 | )) 92 | 93 | ''' 94 | 4. モデルの評価 95 | ''' 96 | test_loss = metrics.Mean() 97 | test_acc = metrics.CategoricalAccuracy() 98 | 99 | def test_step(x, t): 100 | preds = model(x) 101 | loss = compute_loss(t, preds) 102 | test_loss(loss) 103 | test_acc(t, preds) 104 | 105 | return loss 106 | 107 | test_step(x_test, t_test) 108 | 109 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 110 | test_loss.result(), 111 | test_acc.result() 112 | )) 113 | -------------------------------------------------------------------------------- /4/06_mnist_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.2.4.3 PyTorch (MNIST) 3 | ''' 4 | 5 | import os 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optimizers 11 | from torch.utils.data import DataLoader 12 | from torchvision import datasets 13 | import torchvision.transforms as transforms 14 | 15 | 16 | class DNN(nn.Module): 17 | def __init__(self, input_dim, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = nn.Linear(input_dim, hidden_dim) 20 | self.a1 = nn.Sigmoid() 21 | self.l2 = nn.Linear(hidden_dim, hidden_dim) 22 | self.a2 = nn.Sigmoid() 23 | self.l3 = nn.Linear(hidden_dim, hidden_dim) 24 | self.a3 = nn.Sigmoid() 25 | self.l4 = nn.Linear(hidden_dim, output_dim) 26 | 27 | self.layers = [self.l1, self.a1, 28 | self.l2, self.a2, 29 | self.l3, self.a3, 30 | self.l4] 31 | 32 | def forward(self, x): 33 | for layer in self.layers: 34 | x = layer(x) 35 | 36 | return x 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | torch.manual_seed(123) 42 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 43 | 44 | ''' 45 | 1. データの準備 46 | ''' 47 | root = os.path.join('~', '.torch', 'mnist') 48 | transform = transforms.Compose([transforms.ToTensor(), 49 | lambda x: x.view(-1)]) 50 | mnist_train = datasets.MNIST(root=root, 51 | download=True, 52 | train=True, 53 | transform=transform) 54 | mnist_test = datasets.MNIST(root=root, 55 | download=True, 56 | train=False, 57 | transform=transform) 58 | 59 | train_dataloader = DataLoader(mnist_train, 60 | batch_size=100, 61 | shuffle=True) 62 | test_dataloader = DataLoader(mnist_test, 63 | batch_size=100, 64 | shuffle=False) 65 | 66 | ''' 67 | 2. モデルの構築 68 | ''' 69 | model = DNN(784, 200, 10).to(device) 70 | 71 | ''' 72 | 3. モデルの学習 73 | ''' 74 | criterion = nn.CrossEntropyLoss() 75 | optimizer = optimizers.SGD(model.parameters(), lr=0.01) 76 | 77 | def compute_loss(t, y): 78 | return criterion(y, t) 79 | 80 | def train_step(x, t): 81 | model.train() 82 | preds = model(x) 83 | loss = compute_loss(t, preds) 84 | optimizer.zero_grad() 85 | loss.backward() 86 | optimizer.step() 87 | 88 | return loss, preds 89 | 90 | epochs = 30 91 | 92 | for epoch in range(epochs): 93 | train_loss = 0. 94 | train_acc = 0. 95 | 96 | for (x, t) in train_dataloader: 97 | x, t = x.to(device), t.to(device) 98 | loss, preds = train_step(x, t) 99 | train_loss += loss.item() 100 | train_acc += \ 101 | accuracy_score(t.tolist(), 102 | preds.argmax(dim=-1).tolist()) 103 | 104 | train_loss /= len(train_dataloader) 105 | train_acc /= len(train_dataloader) 106 | 107 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 108 | epoch+1, 109 | train_loss, 110 | train_acc 111 | )) 112 | 113 | ''' 114 | 4. モデルの評価 115 | ''' 116 | def test_step(x, t): 117 | model.eval() 118 | preds = model(x) 119 | loss = criterion(preds, t) 120 | 121 | return loss, preds 122 | 123 | test_loss = 0. 124 | test_acc = 0. 125 | 126 | for (x, t) in test_dataloader: 127 | x, t = x.to(device), t.to(device) 128 | loss, preds = test_step(x, t) 129 | test_loss += loss.item() 130 | test_acc += \ 131 | accuracy_score(t.tolist(), 132 | preds.argmax(dim=-1).tolist()) 133 | 134 | test_loss /= len(test_dataloader) 135 | test_acc /= len(test_dataloader) 136 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 137 | test_loss, 138 | test_acc 139 | )) 140 | -------------------------------------------------------------------------------- /4/07_mnist_tanh_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.1.2 tanh - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras import datasets 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense 10 | 11 | 12 | if __name__ == '__main__': 13 | np.random.seed(123) 14 | tf.random.set_seed(123) 15 | 16 | ''' 17 | 1. データの準備 18 | ''' 19 | mnist = datasets.mnist 20 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 21 | 22 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 23 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 24 | t_train = np.eye(10)[t_train].astype(np.float32) 25 | t_test = np.eye(10)[t_test].astype(np.float32) 26 | 27 | ''' 28 | 2. モデルの構築 29 | ''' 30 | model = Sequential() 31 | model.add(Dense(200, activation='tanh')) 32 | model.add(Dense(200, activation='tanh')) 33 | model.add(Dense(200, activation='tanh')) 34 | model.add(Dense(10, activation='softmax')) 35 | 36 | ''' 37 | 3. モデルの学習 38 | ''' 39 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 40 | metrics=['accuracy']) 41 | 42 | model.fit(x_train, t_train, 43 | epochs=30, batch_size=100, 44 | verbose=2) 45 | 46 | ''' 47 | 4. モデルの評価 48 | ''' 49 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 50 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 51 | loss, 52 | acc 53 | )) 54 | -------------------------------------------------------------------------------- /4/08_mnist_tanh_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.1.2 tanh - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Dense 11 | from tensorflow.keras import optimizers 12 | from tensorflow.keras import losses 13 | from tensorflow.keras import metrics 14 | 15 | 16 | class DNN(Model): 17 | def __init__(self, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = Dense(hidden_dim, activation='tanh') 20 | self.l2 = Dense(hidden_dim, activation='tanh') 21 | self.l3 = Dense(hidden_dim, activation='tanh') 22 | self.l4 = Dense(output_dim, activation='softmax') 23 | 24 | self.ls = [self.l1, self.l2, self.l3, self.l4] 25 | 26 | def call(self, x): 27 | for layer in self.ls: 28 | x = layer(x) 29 | 30 | return x 31 | 32 | 33 | if __name__ == '__main__': 34 | np.random.seed(123) 35 | tf.random.set_seed(123) 36 | 37 | ''' 38 | 1. データの準備 39 | ''' 40 | mnist = datasets.mnist 41 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 42 | 43 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 44 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 45 | t_train = np.eye(10)[t_train].astype(np.float32) 46 | t_test = np.eye(10)[t_test].astype(np.float32) 47 | 48 | ''' 49 | 2. モデルの構築 50 | ''' 51 | model = DNN(200, 10) 52 | 53 | ''' 54 | 3. モデルの学習 55 | ''' 56 | criterion = losses.CategoricalCrossentropy() 57 | optimizer = optimizers.SGD(learning_rate=0.01) 58 | train_loss = metrics.Mean() 59 | train_acc = metrics.CategoricalAccuracy() 60 | 61 | def compute_loss(t, y): 62 | return criterion(t, y) 63 | 64 | def train_step(x, t): 65 | with tf.GradientTape() as tape: 66 | preds = model(x) 67 | loss = compute_loss(t, preds) 68 | grads = tape.gradient(loss, model.trainable_variables) 69 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 70 | train_loss(loss) 71 | train_acc(t, preds) 72 | 73 | return loss 74 | 75 | epochs = 30 76 | batch_size = 100 77 | n_batches = x_train.shape[0] // batch_size 78 | 79 | for epoch in range(epochs): 80 | x_, t_ = shuffle(x_train, t_train) 81 | 82 | for batch in range(n_batches): 83 | start = batch * batch_size 84 | end = start + batch_size 85 | train_step(x_[start:end], t_[start:end]) 86 | 87 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 88 | epoch+1, 89 | train_loss.result(), 90 | train_acc.result() 91 | )) 92 | 93 | ''' 94 | 4. モデルの評価 95 | ''' 96 | test_loss = metrics.Mean() 97 | test_acc = metrics.CategoricalAccuracy() 98 | 99 | def test_step(x, t): 100 | preds = model(x) 101 | loss = compute_loss(t, preds) 102 | test_loss(loss) 103 | test_acc(t, preds) 104 | 105 | return loss 106 | 107 | test_step(x_test, t_test) 108 | 109 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 110 | test_loss.result(), 111 | test_acc.result() 112 | )) 113 | -------------------------------------------------------------------------------- /4/09_mnist_tanh_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.1.2 tanh - PyTorch (MNIST) 3 | ''' 4 | 5 | import os 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optimizers 11 | from torch.utils.data import DataLoader 12 | from torchvision import datasets 13 | import torchvision.transforms as transforms 14 | 15 | 16 | class DNN(nn.Module): 17 | def __init__(self, input_dim, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = nn.Linear(input_dim, hidden_dim) 20 | self.a1 = nn.Tanh() 21 | self.l2 = nn.Linear(hidden_dim, hidden_dim) 22 | self.a2 = nn.Tanh() 23 | self.l3 = nn.Linear(hidden_dim, hidden_dim) 24 | self.a3 = nn.Tanh() 25 | self.l4 = nn.Linear(hidden_dim, output_dim) 26 | 27 | self.layers = [self.l1, self.a1, 28 | self.l2, self.a2, 29 | self.l3, self.a3, 30 | self.l4] 31 | 32 | def forward(self, x): 33 | for layer in self.layers: 34 | x = layer(x) 35 | 36 | return x 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | torch.manual_seed(123) 42 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 43 | 44 | ''' 45 | 1. データの準備 46 | ''' 47 | root = os.path.join('~', '.torch', 'mnist') 48 | transform = transforms.Compose([transforms.ToTensor(), 49 | lambda x: x.view(-1)]) 50 | mnist_train = datasets.MNIST(root=root, 51 | download=True, 52 | train=True, 53 | transform=transform) 54 | mnist_test = datasets.MNIST(root=root, 55 | download=True, 56 | train=False, 57 | transform=transform) 58 | 59 | train_dataloader = DataLoader(mnist_train, 60 | batch_size=100, 61 | shuffle=True) 62 | test_dataloader = DataLoader(mnist_test, 63 | batch_size=100, 64 | shuffle=False) 65 | 66 | ''' 67 | 2. モデルの構築 68 | ''' 69 | model = DNN(784, 200, 10).to(device) 70 | 71 | ''' 72 | 3. モデルの学習 73 | ''' 74 | criterion = nn.CrossEntropyLoss() 75 | optimizer = optimizers.SGD(model.parameters(), lr=0.01) 76 | 77 | def compute_loss(t, y): 78 | return criterion(y, t) 79 | 80 | def train_step(x, t): 81 | model.train() 82 | preds = model(x) 83 | loss = compute_loss(t, preds) 84 | optimizer.zero_grad() 85 | loss.backward() 86 | optimizer.step() 87 | 88 | return loss, preds 89 | 90 | epochs = 30 91 | 92 | for epoch in range(epochs): 93 | train_loss = 0. 94 | train_acc = 0. 95 | 96 | for (x, t) in train_dataloader: 97 | x, t = x.to(device), t.to(device) 98 | loss, preds = train_step(x, t) 99 | train_loss += loss.item() 100 | train_acc += \ 101 | accuracy_score(t.tolist(), 102 | preds.argmax(dim=-1).tolist()) 103 | 104 | train_loss /= len(train_dataloader) 105 | train_acc /= len(train_dataloader) 106 | 107 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 108 | epoch+1, 109 | train_loss, 110 | train_acc 111 | )) 112 | 113 | ''' 114 | 4. モデルの評価 115 | ''' 116 | def test_step(x, t): 117 | model.eval() 118 | preds = model(x) 119 | loss = criterion(preds, t) 120 | 121 | return loss, preds 122 | 123 | test_loss = 0. 124 | test_acc = 0. 125 | 126 | for (x, t) in test_dataloader: 127 | x, t = x.to(device), t.to(device) 128 | loss, preds = test_step(x, t) 129 | test_loss += loss.item() 130 | test_acc += \ 131 | accuracy_score(t.tolist(), 132 | preds.argmax(dim=-1).tolist()) 133 | 134 | test_loss /= len(test_dataloader) 135 | test_acc /= len(test_dataloader) 136 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 137 | test_loss, 138 | test_acc 139 | )) 140 | -------------------------------------------------------------------------------- /4/10_mnist_relu_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.2.2 ReLU - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras import datasets 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense 10 | 11 | 12 | if __name__ == '__main__': 13 | np.random.seed(123) 14 | tf.random.set_seed(123) 15 | 16 | ''' 17 | 1. データの準備 18 | ''' 19 | mnist = datasets.mnist 20 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 21 | 22 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 23 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 24 | t_train = np.eye(10)[t_train].astype(np.float32) 25 | t_test = np.eye(10)[t_test].astype(np.float32) 26 | 27 | ''' 28 | 2. モデルの構築 29 | ''' 30 | model = Sequential() 31 | model.add(Dense(200, activation='relu')) 32 | model.add(Dense(200, activation='relu')) 33 | model.add(Dense(200, activation='relu')) 34 | model.add(Dense(10, activation='softmax')) 35 | 36 | ''' 37 | 3. モデルの学習 38 | ''' 39 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 40 | metrics=['accuracy']) 41 | 42 | model.fit(x_train, t_train, 43 | epochs=30, batch_size=100, 44 | verbose=2) 45 | 46 | ''' 47 | 4. モデルの評価 48 | ''' 49 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 50 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 51 | loss, 52 | acc 53 | )) 54 | -------------------------------------------------------------------------------- /4/11_mnist_relu_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.2.2 ReLU - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Dense 11 | from tensorflow.keras import optimizers 12 | from tensorflow.keras import losses 13 | from tensorflow.keras import metrics 14 | 15 | 16 | class DNN(Model): 17 | def __init__(self, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = Dense(hidden_dim, activation='relu') 20 | self.l2 = Dense(hidden_dim, activation='relu') 21 | self.l3 = Dense(hidden_dim, activation='relu') 22 | self.l4 = Dense(output_dim, activation='softmax') 23 | 24 | self.ls = [self.l1, self.l2, self.l3, self.l4] 25 | 26 | def call(self, x): 27 | for layer in self.ls: 28 | x = layer(x) 29 | 30 | return x 31 | 32 | 33 | if __name__ == '__main__': 34 | np.random.seed(123) 35 | tf.random.set_seed(123) 36 | 37 | ''' 38 | 1. データの準備 39 | ''' 40 | mnist = datasets.mnist 41 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 42 | 43 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 44 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 45 | t_train = np.eye(10)[t_train].astype(np.float32) 46 | t_test = np.eye(10)[t_test].astype(np.float32) 47 | 48 | ''' 49 | 2. モデルの構築 50 | ''' 51 | model = DNN(200, 10) 52 | 53 | ''' 54 | 3. モデルの学習 55 | ''' 56 | criterion = losses.CategoricalCrossentropy() 57 | optimizer = optimizers.SGD(learning_rate=0.01) 58 | train_loss = metrics.Mean() 59 | train_acc = metrics.CategoricalAccuracy() 60 | 61 | def compute_loss(t, y): 62 | return criterion(t, y) 63 | 64 | def train_step(x, t): 65 | with tf.GradientTape() as tape: 66 | preds = model(x) 67 | loss = compute_loss(t, preds) 68 | grads = tape.gradient(loss, model.trainable_variables) 69 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 70 | train_loss(loss) 71 | train_acc(t, preds) 72 | 73 | return loss 74 | 75 | epochs = 30 76 | batch_size = 100 77 | n_batches = x_train.shape[0] // batch_size 78 | 79 | for epoch in range(epochs): 80 | x_, t_ = shuffle(x_train, t_train) 81 | 82 | for batch in range(n_batches): 83 | start = batch * batch_size 84 | end = start + batch_size 85 | train_step(x_[start:end], t_[start:end]) 86 | 87 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 88 | epoch+1, 89 | train_loss.result(), 90 | train_acc.result() 91 | )) 92 | 93 | ''' 94 | 4. モデルの評価 95 | ''' 96 | test_loss = metrics.Mean() 97 | test_acc = metrics.CategoricalAccuracy() 98 | 99 | def test_step(x, t): 100 | preds = model(x) 101 | loss = compute_loss(t, preds) 102 | test_loss(loss) 103 | test_acc(t, preds) 104 | 105 | return loss 106 | 107 | test_step(x_test, t_test) 108 | 109 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 110 | test_loss.result(), 111 | test_acc.result() 112 | )) 113 | -------------------------------------------------------------------------------- /4/12_mnist_relu_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.2.2 ReLU - PyTorch (MNIST) 3 | ''' 4 | 5 | import os 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optimizers 11 | from torch.utils.data import DataLoader 12 | from torchvision import datasets 13 | import torchvision.transforms as transforms 14 | 15 | 16 | class DNN(nn.Module): 17 | def __init__(self, input_dim, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = nn.Linear(input_dim, hidden_dim) 20 | self.a1 = nn.ReLU() 21 | self.l2 = nn.Linear(hidden_dim, hidden_dim) 22 | self.a2 = nn.ReLU() 23 | self.l3 = nn.Linear(hidden_dim, hidden_dim) 24 | self.a3 = nn.ReLU() 25 | self.l4 = nn.Linear(hidden_dim, output_dim) 26 | 27 | self.layers = [self.l1, self.a1, 28 | self.l2, self.a2, 29 | self.l3, self.a3, 30 | self.l4] 31 | 32 | def forward(self, x): 33 | for layer in self.layers: 34 | x = layer(x) 35 | 36 | return x 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | torch.manual_seed(123) 42 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 43 | 44 | ''' 45 | 1. データの準備 46 | ''' 47 | root = os.path.join('~', '.torch', 'mnist') 48 | transform = transforms.Compose([transforms.ToTensor(), 49 | lambda x: x.view(-1)]) 50 | mnist_train = datasets.MNIST(root=root, 51 | download=True, 52 | train=True, 53 | transform=transform) 54 | mnist_test = datasets.MNIST(root=root, 55 | download=True, 56 | train=False, 57 | transform=transform) 58 | 59 | train_dataloader = DataLoader(mnist_train, 60 | batch_size=100, 61 | shuffle=True) 62 | test_dataloader = DataLoader(mnist_test, 63 | batch_size=100, 64 | shuffle=False) 65 | 66 | ''' 67 | 2. モデルの構築 68 | ''' 69 | model = DNN(784, 200, 10).to(device) 70 | 71 | ''' 72 | 3. モデルの学習 73 | ''' 74 | criterion = nn.CrossEntropyLoss() 75 | optimizer = optimizers.SGD(model.parameters(), lr=0.01) 76 | 77 | def compute_loss(t, y): 78 | return criterion(y, t) 79 | 80 | def train_step(x, t): 81 | model.train() 82 | preds = model(x) 83 | loss = compute_loss(t, preds) 84 | optimizer.zero_grad() 85 | loss.backward() 86 | optimizer.step() 87 | 88 | return loss, preds 89 | 90 | epochs = 30 91 | 92 | for epoch in range(epochs): 93 | train_loss = 0. 94 | train_acc = 0. 95 | 96 | for (x, t) in train_dataloader: 97 | x, t = x.to(device), t.to(device) 98 | loss, preds = train_step(x, t) 99 | train_loss += loss.item() 100 | train_acc += \ 101 | accuracy_score(t.tolist(), 102 | preds.argmax(dim=-1).tolist()) 103 | 104 | train_loss /= len(train_dataloader) 105 | train_acc /= len(train_dataloader) 106 | 107 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 108 | epoch+1, 109 | train_loss, 110 | train_acc 111 | )) 112 | 113 | ''' 114 | 4. モデルの評価 115 | ''' 116 | def test_step(x, t): 117 | model.eval() 118 | preds = model(x) 119 | loss = criterion(preds, t) 120 | 121 | return loss, preds 122 | 123 | test_loss = 0. 124 | test_acc = 0. 125 | 126 | for (x, t) in test_dataloader: 127 | x, t = x.to(device), t.to(device) 128 | loss, preds = test_step(x, t) 129 | test_loss += loss.item() 130 | test_acc += \ 131 | accuracy_score(t.tolist(), 132 | preds.argmax(dim=-1).tolist()) 133 | 134 | test_loss /= len(test_dataloader) 135 | test_acc /= len(test_dataloader) 136 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 137 | test_loss, 138 | test_acc 139 | )) 140 | -------------------------------------------------------------------------------- /4/13_mnist_lrelu_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.3.2 Leaky ReLU - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras import datasets 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense, LeakyReLU 10 | 11 | 12 | if __name__ == '__main__': 13 | np.random.seed(123) 14 | tf.random.set_seed(123) 15 | 16 | ''' 17 | 1. データの準備 18 | ''' 19 | mnist = datasets.mnist 20 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 21 | 22 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 23 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 24 | t_train = np.eye(10)[t_train].astype(np.float32) 25 | t_test = np.eye(10)[t_test].astype(np.float32) 26 | 27 | ''' 28 | 2. モデルの構築 29 | ''' 30 | model = Sequential() 31 | model.add(Dense(200)) 32 | model.add(LeakyReLU(0.01)) 33 | model.add(Dense(200)) 34 | model.add(LeakyReLU(0.01)) 35 | model.add(Dense(200)) 36 | model.add(LeakyReLU(0.01)) 37 | model.add(Dense(10, activation='softmax')) 38 | 39 | ''' 40 | 3. モデルの学習 41 | ''' 42 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 43 | metrics=['accuracy']) 44 | 45 | model.fit(x_train, t_train, 46 | epochs=30, batch_size=100, 47 | verbose=2) 48 | 49 | ''' 50 | 4. モデルの評価 51 | ''' 52 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 53 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 54 | loss, 55 | acc 56 | )) 57 | -------------------------------------------------------------------------------- /4/14_mnist_lrelu_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.3.2 Leaky ReLU - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Dense, LeakyReLU 11 | from tensorflow.keras import optimizers 12 | from tensorflow.keras import losses 13 | from tensorflow.keras import metrics 14 | 15 | 16 | class DNN(Model): 17 | def __init__(self, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = Dense(hidden_dim) 20 | self.a1 = LeakyReLU(0.01) 21 | self.l2 = Dense(hidden_dim) 22 | self.a2 = LeakyReLU(0.01) 23 | self.l3 = Dense(hidden_dim) 24 | self.a3 = LeakyReLU(0.01) 25 | self.l4 = Dense(output_dim, activation='softmax') 26 | 27 | self.ls = [self.l1, self.a1, 28 | self.l2, self.a2, 29 | self.l3, self.a3, 30 | self.l4] 31 | 32 | def call(self, x): 33 | for layer in self.ls: 34 | x = layer(x) 35 | 36 | return x 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | tf.random.set_seed(123) 42 | 43 | ''' 44 | 1. データの準備 45 | ''' 46 | mnist = datasets.mnist 47 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 48 | 49 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 50 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 51 | t_train = np.eye(10)[t_train].astype(np.float32) 52 | t_test = np.eye(10)[t_test].astype(np.float32) 53 | 54 | ''' 55 | 2. モデルの構築 56 | ''' 57 | model = DNN(200, 10) 58 | 59 | ''' 60 | 3. モデルの学習 61 | ''' 62 | criterion = losses.CategoricalCrossentropy() 63 | optimizer = optimizers.SGD(learning_rate=0.01) 64 | train_loss = metrics.Mean() 65 | train_acc = metrics.CategoricalAccuracy() 66 | 67 | def compute_loss(t, y): 68 | return criterion(t, y) 69 | 70 | def train_step(x, t): 71 | with tf.GradientTape() as tape: 72 | preds = model(x) 73 | loss = compute_loss(t, preds) 74 | grads = tape.gradient(loss, model.trainable_variables) 75 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 76 | train_loss(loss) 77 | train_acc(t, preds) 78 | 79 | return loss 80 | 81 | epochs = 30 82 | batch_size = 100 83 | n_batches = x_train.shape[0] // batch_size 84 | 85 | for epoch in range(epochs): 86 | x_, t_ = shuffle(x_train, t_train) 87 | 88 | for batch in range(n_batches): 89 | start = batch * batch_size 90 | end = start + batch_size 91 | train_step(x_[start:end], t_[start:end]) 92 | 93 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 94 | epoch+1, 95 | train_loss.result(), 96 | train_acc.result() 97 | )) 98 | 99 | ''' 100 | 4. モデルの評価 101 | ''' 102 | test_loss = metrics.Mean() 103 | test_acc = metrics.CategoricalAccuracy() 104 | 105 | def test_step(x, t): 106 | preds = model(x) 107 | loss = compute_loss(t, preds) 108 | test_loss(loss) 109 | test_acc(t, preds) 110 | 111 | return loss 112 | 113 | test_step(x_test, t_test) 114 | 115 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 116 | test_loss.result(), 117 | test_acc.result() 118 | )) 119 | -------------------------------------------------------------------------------- /4/15_mnist_lrelu_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.3.2 Leaky ReLU - PyTorch (MNIST) 3 | ''' 4 | 5 | import os 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optimizers 11 | from torch.utils.data import DataLoader 12 | from torchvision import datasets 13 | import torchvision.transforms as transforms 14 | 15 | 16 | class DNN(nn.Module): 17 | def __init__(self, input_dim, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = nn.Linear(input_dim, hidden_dim) 20 | self.a1 = nn.LeakyReLU(0.01) 21 | self.l2 = nn.Linear(hidden_dim, hidden_dim) 22 | self.a2 = nn.LeakyReLU(0.01) 23 | self.l3 = nn.Linear(hidden_dim, hidden_dim) 24 | self.a3 = nn.LeakyReLU(0.01) 25 | self.l4 = nn.Linear(hidden_dim, output_dim) 26 | 27 | self.layers = [self.l1, self.a1, 28 | self.l2, self.a2, 29 | self.l3, self.a3, 30 | self.l4] 31 | 32 | def forward(self, x): 33 | for layer in self.layers: 34 | x = layer(x) 35 | 36 | return x 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | torch.manual_seed(123) 42 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 43 | 44 | ''' 45 | 1. データの準備 46 | ''' 47 | root = os.path.join('~', '.torch', 'mnist') 48 | transform = transforms.Compose([transforms.ToTensor(), 49 | lambda x: x.view(-1)]) 50 | mnist_train = datasets.MNIST(root=root, 51 | download=True, 52 | train=True, 53 | transform=transform) 54 | mnist_test = datasets.MNIST(root=root, 55 | download=True, 56 | train=False, 57 | transform=transform) 58 | 59 | train_dataloader = DataLoader(mnist_train, 60 | batch_size=100, 61 | shuffle=True) 62 | test_dataloader = DataLoader(mnist_test, 63 | batch_size=100, 64 | shuffle=False) 65 | 66 | ''' 67 | 2. モデルの構築 68 | ''' 69 | model = DNN(784, 200, 10).to(device) 70 | 71 | ''' 72 | 3. モデルの学習 73 | ''' 74 | criterion = nn.CrossEntropyLoss() 75 | optimizer = optimizers.SGD(model.parameters(), lr=0.01) 76 | 77 | def compute_loss(t, y): 78 | return criterion(y, t) 79 | 80 | def train_step(x, t): 81 | model.train() 82 | preds = model(x) 83 | loss = compute_loss(t, preds) 84 | optimizer.zero_grad() 85 | loss.backward() 86 | optimizer.step() 87 | 88 | return loss, preds 89 | 90 | epochs = 30 91 | 92 | for epoch in range(epochs): 93 | train_loss = 0. 94 | train_acc = 0. 95 | 96 | for (x, t) in train_dataloader: 97 | x, t = x.to(device), t.to(device) 98 | loss, preds = train_step(x, t) 99 | train_loss += loss.item() 100 | train_acc += \ 101 | accuracy_score(t.tolist(), 102 | preds.argmax(dim=-1).tolist()) 103 | 104 | train_loss /= len(train_dataloader) 105 | train_acc /= len(train_dataloader) 106 | 107 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 108 | epoch+1, 109 | train_loss, 110 | train_acc 111 | )) 112 | 113 | ''' 114 | 4. モデルの評価 115 | ''' 116 | def test_step(x, t): 117 | model.eval() 118 | preds = model(x) 119 | loss = criterion(preds, t) 120 | 121 | return loss, preds 122 | 123 | test_loss = 0. 124 | test_acc = 0. 125 | 126 | for (x, t) in test_dataloader: 127 | x, t = x.to(device), t.to(device) 128 | loss, preds = test_step(x, t) 129 | test_loss += loss.item() 130 | test_acc += \ 131 | accuracy_score(t.tolist(), 132 | preds.argmax(dim=-1).tolist()) 133 | 134 | test_loss /= len(test_dataloader) 135 | test_acc /= len(test_dataloader) 136 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 137 | test_loss, 138 | test_acc 139 | )) 140 | -------------------------------------------------------------------------------- /4/16_mnist_swish_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.4.2 Swish - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras import datasets 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense, LeakyReLU 10 | from tensorflow.keras import backend as K 11 | 12 | 13 | if __name__ == '__main__': 14 | np.random.seed(123) 15 | tf.random.set_seed(123) 16 | 17 | ''' 18 | 1. データの準備 19 | ''' 20 | mnist = datasets.mnist 21 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 22 | 23 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 24 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 25 | t_train = np.eye(10)[t_train].astype(np.float32) 26 | t_test = np.eye(10)[t_test].astype(np.float32) 27 | 28 | ''' 29 | 2. モデルの構築 30 | ''' 31 | def swish(x, beta=1.): 32 | return x * K.sigmoid(beta * x) 33 | # return x * tf.nn.sigmoid(beta * x) # こちらでもOK 34 | 35 | model = Sequential() 36 | model.add(Dense(200, activation=swish)) 37 | model.add(Dense(200, activation=swish)) 38 | model.add(Dense(200, activation=swish)) 39 | model.add(Dense(10, activation='softmax')) 40 | 41 | ''' 42 | 3. モデルの学習 43 | ''' 44 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 45 | metrics=['accuracy']) 46 | 47 | model.fit(x_train, t_train, 48 | epochs=30, batch_size=100, 49 | verbose=2) 50 | 51 | ''' 52 | 4. モデルの評価 53 | ''' 54 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 55 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 56 | loss, 57 | acc 58 | )) 59 | -------------------------------------------------------------------------------- /4/17_mnist_swish_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.4.2 Swish - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Dense, LeakyReLU 11 | from tensorflow.keras import optimizers 12 | from tensorflow.keras import losses 13 | from tensorflow.keras import metrics 14 | 15 | 16 | class DNN(Model): 17 | def __init__(self, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = Dense(hidden_dim, activation=swish) 20 | self.l2 = Dense(hidden_dim, activation=swish) 21 | self.l3 = Dense(hidden_dim, activation=swish) 22 | self.l4 = Dense(output_dim, activation='softmax') 23 | 24 | self.ls = [self.l1, self.l2, self.l3, self.l4] 25 | 26 | def call(self, x): 27 | for layer in self.ls: 28 | x = layer(x) 29 | 30 | return x 31 | 32 | 33 | def swish(x, beta=1.): 34 | return x * tf.nn.sigmoid(beta * x) 35 | 36 | 37 | if __name__ == '__main__': 38 | np.random.seed(123) 39 | tf.random.set_seed(123) 40 | 41 | ''' 42 | 1. データの準備 43 | ''' 44 | mnist = datasets.mnist 45 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 46 | 47 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 48 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 49 | t_train = np.eye(10)[t_train].astype(np.float32) 50 | t_test = np.eye(10)[t_test].astype(np.float32) 51 | 52 | ''' 53 | 2. モデルの構築 54 | ''' 55 | model = DNN(200, 10) 56 | 57 | ''' 58 | 3. モデルの学習 59 | ''' 60 | criterion = losses.CategoricalCrossentropy() 61 | optimizer = optimizers.SGD(learning_rate=0.01) 62 | train_loss = metrics.Mean() 63 | train_acc = metrics.CategoricalAccuracy() 64 | 65 | def compute_loss(t, y): 66 | return criterion(t, y) 67 | 68 | def train_step(x, t): 69 | with tf.GradientTape() as tape: 70 | preds = model(x) 71 | loss = compute_loss(t, preds) 72 | grads = tape.gradient(loss, model.trainable_variables) 73 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 74 | train_loss(loss) 75 | train_acc(t, preds) 76 | 77 | return loss 78 | 79 | epochs = 30 80 | batch_size = 100 81 | n_batches = x_train.shape[0] // batch_size 82 | 83 | for epoch in range(epochs): 84 | x_, t_ = shuffle(x_train, t_train) 85 | 86 | for batch in range(n_batches): 87 | start = batch * batch_size 88 | end = start + batch_size 89 | train_step(x_[start:end], t_[start:end]) 90 | 91 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 92 | epoch+1, 93 | train_loss.result(), 94 | train_acc.result() 95 | )) 96 | 97 | ''' 98 | 4. モデルの評価 99 | ''' 100 | test_loss = metrics.Mean() 101 | test_acc = metrics.CategoricalAccuracy() 102 | 103 | def test_step(x, t): 104 | preds = model(x) 105 | loss = compute_loss(t, preds) 106 | test_loss(loss) 107 | test_acc(t, preds) 108 | 109 | return loss 110 | 111 | test_step(x_test, t_test) 112 | 113 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 114 | test_loss.result(), 115 | test_acc.result() 116 | )) 117 | -------------------------------------------------------------------------------- /4/18_mnist_swish_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.3.4.2 Swish - PyTorch (MNIST) 3 | ''' 4 | 5 | import os 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optimizers 11 | from torch.utils.data import DataLoader 12 | from torchvision import datasets 13 | import torchvision.transforms as transforms 14 | 15 | 16 | class Swish(nn.Module): 17 | def __init__(self, beta=1.): 18 | super().__init__() 19 | self.beta = beta 20 | 21 | def forward(self, x): 22 | return x * torch.sigmoid(self.beta * x) 23 | 24 | 25 | class DNN(nn.Module): 26 | def __init__(self, input_dim, hidden_dim, output_dim): 27 | super().__init__() 28 | self.l1 = nn.Linear(input_dim, hidden_dim) 29 | self.a1 = Swish() 30 | self.l2 = nn.Linear(hidden_dim, hidden_dim) 31 | self.a2 = Swish() 32 | self.l3 = nn.Linear(hidden_dim, hidden_dim) 33 | self.a3 = Swish() 34 | self.l4 = nn.Linear(hidden_dim, output_dim) 35 | 36 | self.layers = [self.l1, self.a1, 37 | self.l2, self.a2, 38 | self.l3, self.a3, 39 | self.l4] 40 | 41 | def forward(self, x): 42 | for layer in self.layers: 43 | x = layer(x) 44 | 45 | return x 46 | 47 | 48 | if __name__ == '__main__': 49 | np.random.seed(123) 50 | torch.manual_seed(123) 51 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 52 | 53 | ''' 54 | 1. データの準備 55 | ''' 56 | root = os.path.join('~', '.torch', 'mnist') 57 | transform = transforms.Compose([transforms.ToTensor(), 58 | lambda x: x.view(-1)]) 59 | mnist_train = datasets.MNIST(root=root, 60 | download=True, 61 | train=True, 62 | transform=transform) 63 | mnist_test = datasets.MNIST(root=root, 64 | download=True, 65 | train=False, 66 | transform=transform) 67 | 68 | train_dataloader = DataLoader(mnist_train, 69 | batch_size=100, 70 | shuffle=True) 71 | test_dataloader = DataLoader(mnist_test, 72 | batch_size=100, 73 | shuffle=False) 74 | 75 | ''' 76 | 2. モデルの構築 77 | ''' 78 | model = DNN(784, 200, 10).to(device) 79 | 80 | ''' 81 | 3. モデルの学習 82 | ''' 83 | criterion = nn.CrossEntropyLoss() 84 | optimizer = optimizers.SGD(model.parameters(), lr=0.01) 85 | 86 | def compute_loss(t, y): 87 | return criterion(y, t) 88 | 89 | def train_step(x, t): 90 | model.train() 91 | preds = model(x) 92 | loss = compute_loss(t, preds) 93 | optimizer.zero_grad() 94 | loss.backward() 95 | optimizer.step() 96 | 97 | return loss, preds 98 | 99 | epochs = 30 100 | 101 | for epoch in range(epochs): 102 | train_loss = 0. 103 | train_acc = 0. 104 | 105 | for (x, t) in train_dataloader: 106 | x, t = x.to(device), t.to(device) 107 | loss, preds = train_step(x, t) 108 | train_loss += loss.item() 109 | train_acc += \ 110 | accuracy_score(t.tolist(), 111 | preds.argmax(dim=-1).tolist()) 112 | 113 | train_loss /= len(train_dataloader) 114 | train_acc /= len(train_dataloader) 115 | 116 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 117 | epoch+1, 118 | train_loss, 119 | train_acc 120 | )) 121 | 122 | ''' 123 | 4. モデルの評価 124 | ''' 125 | def test_step(x, t): 126 | model.eval() 127 | preds = model(x) 128 | loss = criterion(preds, t) 129 | 130 | return loss, preds 131 | 132 | test_loss = 0. 133 | test_acc = 0. 134 | 135 | for (x, t) in test_dataloader: 136 | x, t = x.to(device), t.to(device) 137 | loss, preds = test_step(x, t) 138 | test_loss += loss.item() 139 | test_acc += \ 140 | accuracy_score(t.tolist(), 141 | preds.argmax(dim=-1).tolist()) 142 | 143 | test_loss /= len(test_dataloader) 144 | test_acc /= len(test_dataloader) 145 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 146 | test_loss, 147 | test_acc 148 | )) 149 | -------------------------------------------------------------------------------- /4/19_mnist_dropout_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.4.2 Dropout - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras import datasets 8 | from tensorflow.keras.models import Sequential 9 | from tensorflow.keras.layers import Dense, Dropout 10 | 11 | 12 | if __name__ == '__main__': 13 | np.random.seed(123) 14 | tf.random.set_seed(123) 15 | 16 | ''' 17 | 1. データの準備 18 | ''' 19 | mnist = datasets.mnist 20 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 21 | 22 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 23 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 24 | t_train = np.eye(10)[t_train].astype(np.float32) 25 | t_test = np.eye(10)[t_test].astype(np.float32) 26 | 27 | ''' 28 | 2. モデルの構築 29 | ''' 30 | model = Sequential() 31 | model.add(Dense(200, activation='relu')) 32 | model.add(Dropout(0.5)) 33 | model.add(Dense(200, activation='relu')) 34 | model.add(Dropout(0.5)) 35 | model.add(Dense(200, activation='relu')) 36 | model.add(Dropout(0.5)) 37 | model.add(Dense(10, activation='softmax')) 38 | 39 | ''' 40 | 3. モデルの学習 41 | ''' 42 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 43 | metrics=['accuracy']) 44 | 45 | model.fit(x_train, t_train, 46 | epochs=100, batch_size=100, 47 | verbose=2) 48 | 49 | ''' 50 | 4. モデルの評価 51 | ''' 52 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 53 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 54 | loss, 55 | acc 56 | )) 57 | -------------------------------------------------------------------------------- /4/20_mnist_dropout_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.4.2 Dropout - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.layers import Dense, Dropout 11 | from tensorflow.keras import optimizers 12 | from tensorflow.keras import losses 13 | from tensorflow.keras import metrics 14 | 15 | 16 | class DNN(Model): 17 | def __init__(self, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = Dense(hidden_dim, activation='relu') 20 | self.d1 = Dropout(0.5) 21 | self.l2 = Dense(hidden_dim, activation='relu') 22 | self.d2 = Dropout(0.5) 23 | self.l3 = Dense(hidden_dim, activation='relu') 24 | self.d3 = Dropout(0.5) 25 | self.l4 = Dense(output_dim, activation='softmax') 26 | 27 | self.ls = [self.l1, self.d1, 28 | self.l2, self.d2, 29 | self.l3, self.d3, 30 | self.l4] 31 | 32 | def call(self, x): 33 | for layer in self.ls: 34 | x = layer(x) 35 | 36 | return x 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | tf.random.set_seed(123) 42 | 43 | ''' 44 | 1. データの準備 45 | ''' 46 | mnist = datasets.mnist 47 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 48 | 49 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 50 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 51 | t_train = np.eye(10)[t_train].astype(np.float32) 52 | t_test = np.eye(10)[t_test].astype(np.float32) 53 | 54 | ''' 55 | 2. モデルの構築 56 | ''' 57 | model = DNN(200, 10) 58 | 59 | ''' 60 | 3. モデルの学習 61 | ''' 62 | criterion = losses.CategoricalCrossentropy() 63 | optimizer = optimizers.SGD(learning_rate=0.01) 64 | train_loss = metrics.Mean() 65 | train_acc = metrics.CategoricalAccuracy() 66 | 67 | def compute_loss(t, y): 68 | return criterion(t, y) 69 | 70 | def train_step(x, t): 71 | with tf.GradientTape() as tape: 72 | preds = model(x) 73 | loss = compute_loss(t, preds) 74 | grads = tape.gradient(loss, model.trainable_variables) 75 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 76 | train_loss(loss) 77 | train_acc(t, preds) 78 | 79 | return loss 80 | 81 | epochs = 100 82 | batch_size = 100 83 | n_batches = x_train.shape[0] // batch_size 84 | 85 | for epoch in range(epochs): 86 | x_, t_ = shuffle(x_train, t_train) 87 | 88 | for batch in range(n_batches): 89 | start = batch * batch_size 90 | end = start + batch_size 91 | train_step(x_[start:end], t_[start:end]) 92 | 93 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 94 | epoch+1, 95 | train_loss.result(), 96 | train_acc.result() 97 | )) 98 | 99 | ''' 100 | 4. モデルの評価 101 | ''' 102 | test_loss = metrics.Mean() 103 | test_acc = metrics.CategoricalAccuracy() 104 | 105 | def test_step(x, t): 106 | preds = model(x) 107 | loss = compute_loss(t, preds) 108 | test_loss(loss) 109 | test_acc(t, preds) 110 | 111 | return loss 112 | 113 | test_step(x_test, t_test) 114 | 115 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 116 | test_loss.result(), 117 | test_acc.result() 118 | )) 119 | -------------------------------------------------------------------------------- /4/21_mnist_dropout_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.4.2 Dropout - PyTorch (MNIST) 3 | ''' 4 | 5 | import os 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | import torch 9 | import torch.nn as nn 10 | import torch.optim as optimizers 11 | from torch.utils.data import DataLoader 12 | from torchvision import datasets 13 | import torchvision.transforms as transforms 14 | 15 | 16 | class DNN(nn.Module): 17 | def __init__(self, input_dim, hidden_dim, output_dim): 18 | super().__init__() 19 | self.l1 = nn.Linear(input_dim, hidden_dim) 20 | self.a1 = nn.ReLU() 21 | self.d1 = nn.Dropout(0.5) 22 | self.l2 = nn.Linear(hidden_dim, hidden_dim) 23 | self.a2 = nn.ReLU() 24 | self.d2 = nn.Dropout(0.5) 25 | self.l3 = nn.Linear(hidden_dim, hidden_dim) 26 | self.a3 = nn.ReLU() 27 | self.d3 = nn.Dropout(0.5) 28 | self.l4 = nn.Linear(hidden_dim, output_dim) 29 | 30 | self.layers = [self.l1, self.a1, self.d1, 31 | self.l2, self.a2, self.d2, 32 | self.l3, self.a3, self.d3, 33 | self.l4] 34 | 35 | def forward(self, x): 36 | for layer in self.layers: 37 | x = layer(x) 38 | 39 | return x 40 | 41 | 42 | if __name__ == '__main__': 43 | np.random.seed(123) 44 | torch.manual_seed(123) 45 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 46 | 47 | ''' 48 | 1. データの準備 49 | ''' 50 | root = os.path.join('~', '.torch', 'mnist') 51 | transform = transforms.Compose([transforms.ToTensor(), 52 | lambda x: x.view(-1)]) 53 | mnist_train = datasets.MNIST(root=root, 54 | download=True, 55 | train=True, 56 | transform=transform) 57 | mnist_test = datasets.MNIST(root=root, 58 | download=True, 59 | train=False, 60 | transform=transform) 61 | 62 | train_dataloader = DataLoader(mnist_train, 63 | batch_size=100, 64 | shuffle=True) 65 | test_dataloader = DataLoader(mnist_test, 66 | batch_size=100, 67 | shuffle=False) 68 | 69 | ''' 70 | 2. モデルの構築 71 | ''' 72 | model = DNN(784, 200, 10).to(device) 73 | 74 | ''' 75 | 3. モデルの学習 76 | ''' 77 | criterion = nn.CrossEntropyLoss() 78 | optimizer = optimizers.SGD(model.parameters(), lr=0.01) 79 | 80 | def compute_loss(t, y): 81 | return criterion(y, t) 82 | 83 | def train_step(x, t): 84 | model.train() 85 | preds = model(x) 86 | loss = compute_loss(t, preds) 87 | optimizer.zero_grad() 88 | loss.backward() 89 | optimizer.step() 90 | 91 | return loss, preds 92 | 93 | epochs = 100 94 | 95 | for epoch in range(epochs): 96 | train_loss = 0. 97 | train_acc = 0. 98 | 99 | for (x, t) in train_dataloader: 100 | x, t = x.to(device), t.to(device) 101 | loss, preds = train_step(x, t) 102 | train_loss += loss.item() 103 | train_acc += \ 104 | accuracy_score(t.tolist(), 105 | preds.argmax(dim=-1).tolist()) 106 | 107 | train_loss /= len(train_dataloader) 108 | train_acc /= len(train_dataloader) 109 | 110 | print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format( 111 | epoch+1, 112 | train_loss, 113 | train_acc 114 | )) 115 | 116 | ''' 117 | 4. モデルの評価 118 | ''' 119 | def test_step(x, t): 120 | model.eval() 121 | preds = model(x) 122 | loss = criterion(preds, t) 123 | 124 | return loss, preds 125 | 126 | test_loss = 0. 127 | test_acc = 0. 128 | 129 | for (x, t) in test_dataloader: 130 | x, t = x.to(device), t.to(device) 131 | loss, preds = test_step(x, t) 132 | test_loss += loss.item() 133 | test_acc += \ 134 | accuracy_score(t.tolist(), 135 | preds.argmax(dim=-1).tolist()) 136 | 137 | test_loss /= len(test_dataloader) 138 | test_acc /= len(test_dataloader) 139 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 140 | test_loss, 141 | test_acc 142 | )) 143 | -------------------------------------------------------------------------------- /4/22_mnist_plot_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.5.1.1 学習の可視化 - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Sequential 11 | from tensorflow.keras.layers import Dense, Dropout 12 | 13 | 14 | if __name__ == '__main__': 15 | np.random.seed(123) 16 | tf.random.set_seed(123) 17 | 18 | ''' 19 | 1. データの準備 20 | ''' 21 | mnist = datasets.mnist 22 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 23 | 24 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 25 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 26 | 27 | x_train, x_val, t_train, t_val = \ 28 | train_test_split(x_train, t_train, test_size=0.2) 29 | 30 | ''' 31 | 2. モデルの構築 32 | ''' 33 | model = Sequential() 34 | model.add(Dense(200, activation='relu')) 35 | model.add(Dropout(0.5)) 36 | model.add(Dense(200, activation='relu')) 37 | model.add(Dropout(0.5)) 38 | model.add(Dense(200, activation='relu')) 39 | model.add(Dropout(0.5)) 40 | model.add(Dense(10, activation='softmax')) 41 | 42 | ''' 43 | 3. モデルの学習 44 | ''' 45 | model.compile(optimizer='sgd', 46 | loss='sparse_categorical_crossentropy', 47 | metrics=['accuracy']) 48 | 49 | hist = model.fit(x_train, t_train, 50 | epochs=100, batch_size=100, 51 | verbose=2, 52 | validation_data=(x_val, t_val)) 53 | 54 | ''' 55 | 4. モデルの評価 56 | ''' 57 | # 検証データの誤差の可視化 58 | val_loss = hist.history['val_loss'] 59 | 60 | fig = plt.figure() 61 | plt.rc('font', family='serif') 62 | plt.plot(range(len(val_loss)), val_loss, 63 | color='black', linewidth=1) 64 | plt.xlabel('epochs') 65 | plt.ylabel('loss') 66 | # plt.savefig('output.jpg') 67 | plt.show() 68 | 69 | # 正解率を可視化する場合 70 | # val_acc = hist.history['val_accuracy'] 71 | # 72 | # fig = plt.figure() 73 | # plt.rc('font', family='serif') 74 | # plt.plot(range(len(val_acc)), val_acc, 75 | # color='black', linewidth=1) 76 | # plt.xlabel('epochs') 77 | # plt.ylabel('acc') 78 | # plt.savefig('output_acc.jpg') 79 | # plt.show() 80 | 81 | # テストデータの評価 82 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 83 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 84 | loss, 85 | acc 86 | )) 87 | -------------------------------------------------------------------------------- /4/23_mnist_plot_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.5.1.2 学習の可視化 - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, Dropout 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | 17 | 18 | class DNN(Model): 19 | def __init__(self, hidden_dim, output_dim): 20 | super().__init__() 21 | self.l1 = Dense(hidden_dim, activation='relu') 22 | self.d1 = Dropout(0.5) 23 | self.l2 = Dense(hidden_dim, activation='relu') 24 | self.d2 = Dropout(0.5) 25 | self.l3 = Dense(hidden_dim, activation='relu') 26 | self.d3 = Dropout(0.5) 27 | self.l4 = Dense(output_dim, activation='softmax') 28 | 29 | self.ls = [self.l1, self.d1, 30 | self.l2, self.d2, 31 | self.l3, self.d3, 32 | self.l4] 33 | 34 | def call(self, x): 35 | for layer in self.ls: 36 | x = layer(x) 37 | 38 | return x 39 | 40 | 41 | if __name__ == '__main__': 42 | np.random.seed(123) 43 | tf.random.set_seed(123) 44 | 45 | ''' 46 | 1. データの準備 47 | ''' 48 | mnist = datasets.mnist 49 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 50 | 51 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 52 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 53 | 54 | x_train, x_val, t_train, t_val = \ 55 | train_test_split(x_train, t_train, test_size=0.2) 56 | 57 | ''' 58 | 2. モデルの構築 59 | ''' 60 | model = DNN(200, 10) 61 | 62 | ''' 63 | 3. モデルの学習 64 | ''' 65 | criterion = losses.SparseCategoricalCrossentropy() 66 | optimizer = optimizers.SGD(learning_rate=0.01) 67 | train_loss = metrics.Mean() 68 | train_acc = metrics.SparseCategoricalAccuracy() 69 | val_loss = metrics.Mean() 70 | val_acc = metrics.SparseCategoricalAccuracy() 71 | 72 | def compute_loss(t, y): 73 | return criterion(t, y) 74 | 75 | def train_step(x, t): 76 | with tf.GradientTape() as tape: 77 | preds = model(x) 78 | loss = compute_loss(t, preds) 79 | grads = tape.gradient(loss, model.trainable_variables) 80 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 81 | train_loss(loss) 82 | train_acc(t, preds) 83 | 84 | return loss 85 | 86 | def val_step(x, t): 87 | preds = model(x) 88 | loss = compute_loss(t, preds) 89 | val_loss(loss) 90 | val_acc(t, preds) 91 | 92 | epochs = 100 93 | batch_size = 100 94 | n_batches_train = x_train.shape[0] // batch_size 95 | n_batches_val = x_val.shape[0] // batch_size 96 | hist = {'val_loss': [], 'val_accuracy': []} 97 | 98 | for epoch in range(epochs): 99 | x_, t_ = shuffle(x_train, t_train) 100 | 101 | for batch in range(n_batches_train): 102 | start = batch * batch_size 103 | end = start + batch_size 104 | train_step(x_[start:end], t_[start:end]) 105 | 106 | for batch in range(n_batches_val): 107 | start = batch * batch_size 108 | end = start + batch_size 109 | val_step(x_val[start:end], t_val[start:end]) 110 | 111 | hist['val_loss'].append(val_loss.result()) 112 | hist['val_accuracy'].append(val_acc.result()) 113 | 114 | print('epoch: {}, loss: {:.3}, acc: {:.3f}' 115 | ', val_loss: {:.3}, val_acc: {:.3f}'.format( 116 | epoch+1, 117 | train_loss.result(), 118 | train_acc.result(), 119 | val_loss.result(), 120 | val_acc.result() 121 | )) 122 | 123 | ''' 124 | 4. モデルの評価 125 | ''' 126 | # 検証データの誤差の可視化 127 | val_loss = hist['val_loss'] 128 | 129 | fig = plt.figure() 130 | plt.rc('font', family='serif') 131 | plt.plot(range(len(val_loss)), val_loss, 132 | color='black', linewidth=1) 133 | plt.xlabel('epochs') 134 | plt.ylabel('loss') 135 | # plt.savefig('output.jpg') 136 | plt.show() 137 | 138 | # 正解率を可視化する場合 139 | # val_acc = hist['val_accuracy'] 140 | # 141 | # fig = plt.figure() 142 | # plt.rc('font', family='serif') 143 | # plt.plot(range(len(val_acc)), val_acc, 144 | # color='black', linewidth=1) 145 | # plt.xlabel('epochs') 146 | # plt.ylabel('acc') 147 | # plt.savefig('output_acc.jpg') 148 | # plt.show() 149 | 150 | # テストデータの評価 151 | test_loss = metrics.Mean() 152 | test_acc = metrics.SparseCategoricalAccuracy() 153 | 154 | def test_step(x, t): 155 | preds = model(x) 156 | loss = compute_loss(t, preds) 157 | test_loss(loss) 158 | test_acc(t, preds) 159 | 160 | return loss 161 | 162 | test_step(x_test, t_test) 163 | 164 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 165 | test_loss.result(), 166 | test_acc.result() 167 | )) 168 | -------------------------------------------------------------------------------- /4/25_mnist_early_stopping_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.5.2.2 早期終了 - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Sequential 11 | from tensorflow.keras.layers import Dense, Dropout 12 | from tensorflow.keras.callbacks import EarlyStopping 13 | 14 | 15 | if __name__ == '__main__': 16 | np.random.seed(123) 17 | tf.random.set_seed(123) 18 | 19 | ''' 20 | 1. データの準備 21 | ''' 22 | mnist = datasets.mnist 23 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 24 | 25 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 26 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 27 | 28 | x_train, x_val, t_train, t_val = \ 29 | train_test_split(x_train, t_train, test_size=0.2) 30 | 31 | ''' 32 | 2. モデルの構築 33 | ''' 34 | model = Sequential() 35 | model.add(Dense(200, activation='relu')) 36 | model.add(Dropout(0.5)) 37 | model.add(Dense(200, activation='relu')) 38 | model.add(Dropout(0.5)) 39 | model.add(Dense(200, activation='relu')) 40 | model.add(Dropout(0.5)) 41 | model.add(Dense(10, activation='softmax')) 42 | 43 | ''' 44 | 3. モデルの学習 45 | ''' 46 | model.compile(optimizer='sgd', 47 | loss='sparse_categorical_crossentropy', 48 | metrics=['accuracy']) 49 | 50 | es = EarlyStopping(monitor='val_loss', 51 | patience=5, 52 | verbose=1) 53 | 54 | hist = model.fit(x_train, t_train, 55 | epochs=1000, batch_size=100, 56 | verbose=2, 57 | validation_data=(x_val, t_val), 58 | callbacks=[es]) 59 | 60 | ''' 61 | 4. モデルの評価 62 | ''' 63 | # 誤差の可視化 64 | loss = hist.history['loss'] 65 | val_loss = hist.history['val_loss'] 66 | 67 | fig = plt.figure() 68 | plt.rc('font', family='serif') 69 | plt.plot(range(len(loss)), loss, 70 | color='gray', linewidth=1, 71 | label='loss') 72 | plt.plot(range(len(val_loss)), val_loss, 73 | color='black', linewidth=1, 74 | label='val_loss') 75 | plt.xlabel('epochs') 76 | plt.ylabel('loss') 77 | plt.legend() 78 | # plt.savefig('output.jpg') 79 | plt.show() 80 | 81 | # テストデータの評価 82 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 83 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 84 | loss, 85 | acc 86 | )) 87 | -------------------------------------------------------------------------------- /4/26_mnist_early_stopping_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.5.2.2 早期終了 - TensorFlow (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, Dropout 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | from callbacks import EarlyStopping 17 | 18 | 19 | class DNN(Model): 20 | def __init__(self, hidden_dim, output_dim): 21 | super().__init__() 22 | self.l1 = Dense(hidden_dim, activation='relu') 23 | self.d1 = Dropout(0.5) 24 | self.l2 = Dense(hidden_dim, activation='relu') 25 | self.d2 = Dropout(0.5) 26 | self.l3 = Dense(hidden_dim, activation='relu') 27 | self.d3 = Dropout(0.5) 28 | self.l4 = Dense(output_dim, activation='softmax') 29 | 30 | self.ls = [self.l1, self.d1, 31 | self.l2, self.d2, 32 | self.l3, self.d3, 33 | self.l4] 34 | 35 | def call(self, x): 36 | for layer in self.ls: 37 | x = layer(x) 38 | 39 | return x 40 | 41 | 42 | if __name__ == '__main__': 43 | np.random.seed(123) 44 | tf.random.set_seed(123) 45 | 46 | ''' 47 | 1. データの準備 48 | ''' 49 | mnist = datasets.mnist 50 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 51 | 52 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 53 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 54 | 55 | x_train, x_val, t_train, t_val = \ 56 | train_test_split(x_train, t_train, test_size=0.2) 57 | 58 | ''' 59 | 2. モデルの構築 60 | ''' 61 | model = DNN(200, 10) 62 | 63 | ''' 64 | 3. モデルの学習 65 | ''' 66 | criterion = losses.SparseCategoricalCrossentropy() 67 | optimizer = optimizers.SGD(learning_rate=0.01) 68 | train_loss = metrics.Mean() 69 | train_acc = metrics.SparseCategoricalAccuracy() 70 | val_loss = metrics.Mean() 71 | val_acc = metrics.SparseCategoricalAccuracy() 72 | 73 | def compute_loss(t, y): 74 | return criterion(t, y) 75 | 76 | def train_step(x, t): 77 | with tf.GradientTape() as tape: 78 | preds = model(x) 79 | loss = compute_loss(t, preds) 80 | grads = tape.gradient(loss, model.trainable_variables) 81 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 82 | train_loss(loss) 83 | train_acc(t, preds) 84 | 85 | return loss 86 | 87 | def val_step(x, t): 88 | preds = model(x) 89 | loss = compute_loss(t, preds) 90 | val_loss(loss) 91 | val_acc(t, preds) 92 | 93 | epochs = 1000 94 | batch_size = 100 95 | n_batches_train = x_train.shape[0] // batch_size 96 | n_batches_val = x_val.shape[0] // batch_size 97 | hist = {'loss': [], 'accuracy': [], 98 | 'val_loss': [], 'val_accuracy': []} 99 | es = EarlyStopping(patience=5, verbose=1) 100 | 101 | for epoch in range(epochs): 102 | x_, t_ = shuffle(x_train, t_train) 103 | 104 | for batch in range(n_batches_train): 105 | start = batch * batch_size 106 | end = start + batch_size 107 | train_step(x_[start:end], t_[start:end]) 108 | 109 | for batch in range(n_batches_val): 110 | start = batch * batch_size 111 | end = start + batch_size 112 | val_step(x_val[start:end], t_val[start:end]) 113 | 114 | hist['loss'].append(train_loss.result()) 115 | hist['accuracy'].append(train_acc.result()) 116 | hist['val_loss'].append(val_loss.result()) 117 | hist['val_accuracy'].append(val_acc.result()) 118 | 119 | print('epoch: {}, loss: {:.3}, acc: {:.3f}' 120 | ', val_loss: {:.3}, val_acc: {:.3f}'.format( 121 | epoch+1, 122 | train_loss.result(), 123 | train_acc.result(), 124 | val_loss.result(), 125 | val_acc.result() 126 | )) 127 | 128 | if es(val_loss.result()): # 早期終了判定 129 | break 130 | 131 | ''' 132 | 4. モデルの評価 133 | ''' 134 | # 検証データの誤差の可視化 135 | loss = hist['loss'] 136 | val_loss = hist['val_loss'] 137 | 138 | fig = plt.figure() 139 | plt.rc('font', family='serif') 140 | plt.plot(range(len(loss)), loss, 141 | color='gray', linewidth=1, 142 | label='loss') 143 | plt.plot(range(len(val_loss)), val_loss, 144 | color='black', linewidth=1, 145 | label='val_loss') 146 | plt.xlabel('epochs') 147 | plt.ylabel('loss') 148 | plt.legend() 149 | # plt.savefig('output.jpg') 150 | plt.show() 151 | 152 | # テストデータの評価 153 | test_loss = metrics.Mean() 154 | test_acc = metrics.SparseCategoricalAccuracy() 155 | 156 | def test_step(x, t): 157 | preds = model(x) 158 | loss = compute_loss(t, preds) 159 | test_loss(loss) 160 | test_acc(t, preds) 161 | 162 | return loss 163 | 164 | test_step(x_test, t_test) 165 | 166 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 167 | test_loss.result(), 168 | test_acc.result() 169 | )) 170 | -------------------------------------------------------------------------------- /4/28_mnist_batch_norm_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 4.8.2 バッチ正規化 - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Sequential 11 | from tensorflow.keras.layers import \ 12 | Dense, Dropout, Activation, BatchNormalization 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras.callbacks import EarlyStopping 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | mnist = datasets.mnist 25 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 26 | 27 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 28 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 29 | 30 | x_train, x_val, t_train, t_val = \ 31 | train_test_split(x_train, t_train, test_size=0.2) 32 | 33 | ''' 34 | 2. モデルの構築 35 | ''' 36 | model = Sequential() 37 | model.add(Dense(200, kernel_initializer='he_normal')) 38 | model.add(BatchNormalization()) 39 | model.add(Activation('relu')) 40 | model.add(Dropout(0.5)) 41 | model.add(Dense(200, kernel_initializer='he_normal')) 42 | model.add(BatchNormalization()) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(200, kernel_initializer='he_normal')) 46 | model.add(BatchNormalization()) 47 | model.add(Activation('relu')) 48 | model.add(Dropout(0.5)) 49 | model.add(Dense(10, kernel_initializer='he_normal', 50 | activation='softmax')) 51 | 52 | ''' 53 | 3. モデルの学習 54 | ''' 55 | optimizer = optimizers.Adam(learning_rate=0.001, 56 | beta_1=0.9, beta_2=0.999, amsgrad=True) 57 | 58 | model.compile(optimizer=optimizer, 59 | loss='sparse_categorical_crossentropy', 60 | metrics=['accuracy']) 61 | 62 | es = EarlyStopping(monitor='val_loss', 63 | patience=5, 64 | verbose=1) 65 | 66 | hist = model.fit(x_train, t_train, 67 | epochs=1000, batch_size=100, 68 | verbose=2, 69 | validation_data=(x_val, t_val), 70 | callbacks=[es]) 71 | 72 | ''' 73 | 4. モデルの評価 74 | ''' 75 | # 誤差の可視化 76 | loss = hist.history['loss'] 77 | val_loss = hist.history['val_loss'] 78 | 79 | fig = plt.figure() 80 | plt.rc('font', family='serif') 81 | plt.plot(range(len(loss)), loss, 82 | color='gray', linewidth=1, 83 | label='loss') 84 | plt.plot(range(len(val_loss)), val_loss, 85 | color='black', linewidth=1, 86 | label='val_loss') 87 | plt.xlabel('epochs') 88 | plt.ylabel('loss') 89 | plt.legend() 90 | # plt.savefig('output.jpg') 91 | plt.show() 92 | 93 | # テストデータの評価 94 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 95 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 96 | loss, 97 | acc 98 | )) 99 | -------------------------------------------------------------------------------- /4/callbacks/EarlyStopping.py: -------------------------------------------------------------------------------- 1 | class EarlyStopping: 2 | ''' 3 | 早期終了 (early stopping) 4 | ''' 5 | def __init__(self, patience=0, verbose=0): 6 | self._step = 0 7 | self._loss = float('inf') 8 | self.patience = patience 9 | self.verbose = verbose 10 | 11 | def __call__(self, loss): 12 | if self._loss < loss: 13 | self._step += 1 14 | if self._step > self.patience: 15 | if self.verbose: 16 | print('early stopping') 17 | return True 18 | else: 19 | self._step = 0 20 | self._loss = loss 21 | 22 | return False 23 | -------------------------------------------------------------------------------- /4/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .EarlyStopping import EarlyStopping 2 | -------------------------------------------------------------------------------- /5/01_sin_rnn_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.1.5.2 RNN - Keras (sin波) 3 | ''' 4 | 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from sklearn.model_selection import train_test_split 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Sequential 12 | from tensorflow.keras.layers import Dense, SimpleRNN 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras.callbacks import EarlyStopping 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | def sin(x, T=100): 25 | return np.sin(2.0 * np.pi * x / T) 26 | 27 | def toy_problem(T=100, ampl=0.05): 28 | x = np.arange(0, 2*T + 1) 29 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 30 | size=len(x)) 31 | return sin(x) + noise 32 | 33 | T = 100 34 | f = toy_problem(T).astype(np.float32) 35 | length_of_sequences = len(f) 36 | maxlen = 25 37 | 38 | x = [] 39 | t = [] 40 | 41 | for i in range(length_of_sequences - maxlen): 42 | x.append(f[i:i+maxlen]) 43 | t.append(f[i+maxlen]) 44 | 45 | x = np.array(x).reshape(-1, maxlen, 1) 46 | t = np.array(t).reshape(-1, 1) 47 | 48 | x_train, x_val, t_train, t_val = \ 49 | train_test_split(x, t, test_size=0.2, shuffle=False) 50 | 51 | ''' 52 | 2. モデルの構築 53 | ''' 54 | model = Sequential() 55 | model.add(SimpleRNN(50, activation='tanh', 56 | kernel_initializer='glorot_normal', 57 | recurrent_initializer='orthogonal')) 58 | model.add(Dense(1, activation='linear')) 59 | 60 | ''' 61 | 3. モデルの学習 62 | ''' 63 | optimizer = optimizers.Adam(learning_rate=0.001, 64 | beta_1=0.9, beta_2=0.999, amsgrad=True) 65 | 66 | model.compile(optimizer=optimizer, 67 | loss='mean_squared_error') 68 | 69 | es = EarlyStopping(monitor='val_loss', 70 | patience=10, 71 | verbose=1) 72 | 73 | hist = model.fit(x_train, t_train, 74 | epochs=1000, batch_size=100, 75 | verbose=2, 76 | validation_data=(x_val, t_val), 77 | callbacks=[es]) 78 | 79 | ''' 80 | 4. モデルの評価 81 | ''' 82 | # sin波の予測 83 | sin = toy_problem(T, ampl=0.) 84 | gen = [None for i in range(maxlen)] 85 | 86 | z = x[:1] 87 | 88 | # 逐次的に予測値を求める 89 | for i in range(length_of_sequences - maxlen): 90 | preds = model.predict(z[-1:]) 91 | z = np.append(z, preds)[1:] 92 | z = z.reshape(-1, maxlen, 1) 93 | gen.append(preds[0, 0]) 94 | 95 | # 予測値を可視化 96 | fig = plt.figure() 97 | plt.rc('font', family='serif') 98 | plt.xlim([0, 2*T]) 99 | plt.ylim([-1.5, 1.5]) 100 | plt.plot(range(len(f)), sin, 101 | color='gray', 102 | linestyle='--', linewidth=0.5) 103 | plt.plot(range(len(f)), gen, 104 | color='black', linewidth=1, 105 | marker='o', markersize=1, markerfacecolor='black', 106 | markeredgecolor='black') 107 | # plt.savefig('output.jpg') 108 | plt.show() 109 | -------------------------------------------------------------------------------- /5/02_sin_rnn_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.1.5.3 RNN - TensorFlow (sin波) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, SimpleRNN 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | from callbacks import EarlyStopping 17 | 18 | 19 | class RNN(Model): 20 | def __init__(self, hidden_dim): 21 | super().__init__() 22 | self.l1 = SimpleRNN(hidden_dim, activation='tanh', 23 | kernel_initializer='glorot_normal', 24 | recurrent_initializer='orthogonal') 25 | self.l2 = Dense(1, activation='linear') 26 | 27 | def call(self, x): 28 | h = self.l1(x) 29 | y = self.l2(h) 30 | 31 | return y 32 | 33 | 34 | if __name__ == '__main__': 35 | np.random.seed(123) 36 | tf.random.set_seed(123) 37 | 38 | ''' 39 | 1. データの準備 40 | ''' 41 | def sin(x, T=100): 42 | return np.sin(2.0 * np.pi * x / T) 43 | 44 | def toy_problem(T=100, ampl=0.05): 45 | x = np.arange(0, 2*T + 1) 46 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 47 | size=len(x)) 48 | return sin(x) + noise 49 | 50 | T = 100 51 | f = toy_problem(T).astype(np.float32) 52 | length_of_sequences = len(f) 53 | maxlen = 25 54 | 55 | x = [] 56 | t = [] 57 | 58 | for i in range(length_of_sequences - maxlen): 59 | x.append(f[i:i+maxlen]) 60 | t.append(f[i+maxlen]) 61 | 62 | x = np.array(x).reshape(-1, maxlen, 1) 63 | t = np.array(t).reshape(-1, 1) 64 | 65 | x_train, x_val, t_train, t_val = \ 66 | train_test_split(x, t, test_size=0.2, shuffle=False) 67 | 68 | ''' 69 | 2. モデルの構築 70 | ''' 71 | model = RNN(50) 72 | 73 | ''' 74 | 3. モデルの学習 75 | ''' 76 | criterion = losses.MeanSquaredError() 77 | optimizer = optimizers.Adam(learning_rate=0.001, 78 | beta_1=0.9, beta_2=0.999, amsgrad=True) 79 | train_loss = metrics.Mean() 80 | val_loss = metrics.Mean() 81 | 82 | def compute_loss(t, y): 83 | return criterion(t, y) 84 | 85 | def train_step(x, t): 86 | with tf.GradientTape() as tape: 87 | preds = model(x) 88 | loss = compute_loss(t, preds) 89 | grads = tape.gradient(loss, model.trainable_variables) 90 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 91 | train_loss(loss) 92 | 93 | return loss 94 | 95 | def val_step(x, t): 96 | preds = model(x) 97 | loss = compute_loss(t, preds) 98 | val_loss(loss) 99 | 100 | epochs = 1000 101 | batch_size = 100 102 | n_batches_train = x_train.shape[0] // batch_size + 1 103 | n_batches_val = x_val.shape[0] // batch_size + 1 104 | hist = {'loss': [], 'val_loss': []} 105 | es = EarlyStopping(patience=10, verbose=1) 106 | 107 | for epoch in range(epochs): 108 | x_, t_ = shuffle(x_train, t_train) 109 | 110 | for batch in range(n_batches_train): 111 | start = batch * batch_size 112 | end = start + batch_size 113 | train_step(x_[start:end], t_[start:end]) 114 | 115 | for batch in range(n_batches_val): 116 | start = batch * batch_size 117 | end = start + batch_size 118 | val_step(x_val[start:end], t_val[start:end]) 119 | 120 | hist['loss'].append(train_loss.result()) 121 | hist['val_loss'].append(val_loss.result()) 122 | 123 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 124 | epoch+1, 125 | train_loss.result(), 126 | val_loss.result() 127 | )) 128 | 129 | if es(val_loss.result()): 130 | break 131 | 132 | ''' 133 | 4. モデルの評価 134 | ''' 135 | # sin波の予測 136 | sin = toy_problem(T, ampl=0.) 137 | gen = [None for i in range(maxlen)] 138 | 139 | z = x[:1] 140 | 141 | for i in range(length_of_sequences - maxlen): 142 | preds = model.predict(z[-1:]) 143 | # preds = model(z[-1:]) 144 | z = np.append(z, preds)[1:] 145 | z = z.reshape(-1, maxlen, 1) 146 | gen.append(preds[0, 0]) 147 | 148 | # 予測値を可視化 149 | fig = plt.figure() 150 | plt.rc('font', family='serif') 151 | plt.xlim([0, 2*T]) 152 | plt.ylim([-1.5, 1.5]) 153 | plt.plot(range(len(f)), sin, 154 | color='gray', 155 | linestyle='--', linewidth=0.5) 156 | plt.plot(range(len(f)), gen, 157 | color='black', linewidth=1, 158 | marker='o', markersize=1, markerfacecolor='black', 159 | markeredgecolor='black') 160 | # plt.savefig('output.jpg') 161 | plt.show() 162 | -------------------------------------------------------------------------------- /5/03_sin_rnn_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.1.5.4 RNN - PyTorch (sin波) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optimizers 12 | from callbacks import EarlyStopping 13 | 14 | 15 | class RNN(nn.Module): 16 | def __init__(self, hidden_dim): 17 | super().__init__() 18 | self.l1 = nn.RNN(1, hidden_dim, 19 | nonlinearity='tanh', 20 | batch_first=True) 21 | self.l2 = nn.Linear(hidden_dim, 1) 22 | 23 | nn.init.xavier_normal_(self.l1.weight_ih_l0) 24 | nn.init.orthogonal_(self.l1.weight_hh_l0) 25 | 26 | def forward(self, x): 27 | h, _ = self.l1(x) 28 | y = self.l2(h[:, -1]) 29 | return y 30 | 31 | 32 | if __name__ == '__main__': 33 | np.random.seed(123) 34 | torch.manual_seed(123) 35 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 36 | 37 | ''' 38 | 1. データの準備 39 | ''' 40 | def sin(x, T=100): 41 | return np.sin(2.0 * np.pi * x / T) 42 | 43 | def toy_problem(T=100, ampl=0.05): 44 | x = np.arange(0, 2*T + 1) 45 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 46 | size=len(x)) 47 | return sin(x) + noise 48 | 49 | T = 100 50 | f = toy_problem(T).astype(np.float32) 51 | length_of_sequences = len(f) 52 | maxlen = 25 53 | 54 | x = [] 55 | t = [] 56 | 57 | for i in range(length_of_sequences - maxlen): 58 | x.append(f[i:i+maxlen]) 59 | t.append(f[i+maxlen]) 60 | 61 | x = np.array(x).reshape(-1, maxlen, 1) 62 | t = np.array(t).reshape(-1, 1) 63 | 64 | x_train, x_val, t_train, t_val = \ 65 | train_test_split(x, t, test_size=0.2, shuffle=False) 66 | 67 | ''' 68 | 2. モデルの構築 69 | ''' 70 | model = RNN(50).to(device) 71 | 72 | ''' 73 | 3. モデルの学習 74 | ''' 75 | criterion = nn.MSELoss(reduction='mean') 76 | optimizer = optimizers.Adam(model.parameters(), 77 | lr=0.001, 78 | betas=(0.9, 0.999), amsgrad=True) 79 | 80 | def compute_loss(t, y): 81 | return criterion(y, t) 82 | 83 | def train_step(x, t): 84 | x = torch.Tensor(x).to(device) 85 | t = torch.Tensor(t).to(device) 86 | model.train() 87 | preds = model(x) 88 | loss = compute_loss(t, preds) 89 | optimizer.zero_grad() 90 | loss.backward() 91 | optimizer.step() 92 | 93 | return loss, preds 94 | 95 | def val_step(x, t): 96 | x = torch.Tensor(x).to(device) 97 | t = torch.Tensor(t).to(device) 98 | model.eval() 99 | preds = model(x) 100 | loss = criterion(preds, t) 101 | 102 | return loss, preds 103 | 104 | epochs = 1000 105 | batch_size = 100 106 | n_batches_train = x_train.shape[0] // batch_size + 1 107 | n_batches_val = x_val.shape[0] // batch_size + 1 108 | hist = {'loss': [], 'val_loss': []} 109 | es = EarlyStopping(patience=10, verbose=1) 110 | 111 | for epoch in range(epochs): 112 | train_loss = 0. 113 | val_loss = 0. 114 | x_, t_ = shuffle(x_train, t_train) 115 | 116 | for batch in range(n_batches_train): 117 | start = batch * batch_size 118 | end = start + batch_size 119 | loss, _ = train_step(x_[start:end], t_[start:end]) 120 | train_loss += loss.item() 121 | 122 | for batch in range(n_batches_val): 123 | start = batch * batch_size 124 | end = start + batch_size 125 | loss, _ = val_step(x_val[start:end], t_val[start:end]) 126 | val_loss += loss.item() 127 | 128 | train_loss /= n_batches_train 129 | val_loss /= n_batches_val 130 | 131 | hist['loss'].append(train_loss) 132 | hist['val_loss'].append(val_loss) 133 | 134 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 135 | epoch+1, 136 | train_loss, 137 | val_loss 138 | )) 139 | 140 | if es(val_loss): 141 | break 142 | 143 | ''' 144 | 4. モデルの評価 145 | ''' 146 | model.eval() 147 | 148 | # sin波の予測 149 | sin = toy_problem(T, ampl=0.) 150 | gen = [None for i in range(maxlen)] 151 | 152 | z = x[:1] 153 | 154 | for i in range(length_of_sequences - maxlen): 155 | z_ = torch.Tensor(z[-1:]).to(device) 156 | preds = model(z_).data.cpu().numpy() 157 | z = np.append(z, preds)[1:] 158 | z = z.reshape(-1, maxlen, 1) 159 | gen.append(preds[0, 0]) 160 | 161 | # 予測値を可視化 162 | fig = plt.figure() 163 | plt.rc('font', family='serif') 164 | plt.xlim([0, 2*T]) 165 | plt.ylim([-1.5, 1.5]) 166 | plt.plot(range(len(f)), sin, 167 | color='gray', 168 | linestyle='--', linewidth=0.5) 169 | plt.plot(range(len(f)), gen, 170 | color='black', linewidth=1, 171 | marker='o', markersize=1, markerfacecolor='black', 172 | markeredgecolor='black') 173 | # plt.savefig('output.jpg') 174 | plt.show() 175 | -------------------------------------------------------------------------------- /5/04_sin_lstm_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.2.6 LSTM - Keras (sin波) 3 | ''' 4 | 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from sklearn.model_selection import train_test_split 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Sequential 12 | from tensorflow.keras.layers import Dense, LSTM 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras.callbacks import EarlyStopping 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | def sin(x, T=100): 25 | return np.sin(2.0 * np.pi * x / T) 26 | 27 | def toy_problem(T=100, ampl=0.05): 28 | x = np.arange(0, 2*T + 1) 29 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 30 | size=len(x)) 31 | return sin(x) + noise 32 | 33 | T = 100 34 | f = toy_problem(T).astype(np.float32) 35 | length_of_sequences = len(f) 36 | maxlen = 25 37 | 38 | x = [] 39 | t = [] 40 | 41 | for i in range(length_of_sequences - maxlen): 42 | x.append(f[i:i+maxlen]) 43 | t.append(f[i+maxlen]) 44 | 45 | x = np.array(x).reshape(-1, maxlen, 1) 46 | t = np.array(t).reshape(-1, 1) 47 | 48 | x_train, x_val, t_train, t_val = \ 49 | train_test_split(x, t, test_size=0.2, shuffle=False) 50 | 51 | ''' 52 | 2. モデルの構築 53 | ''' 54 | model = Sequential() 55 | model.add(LSTM(50, activation='tanh', 56 | recurrent_activation='sigmoid', 57 | kernel_initializer='glorot_normal', 58 | recurrent_initializer='orthogonal')) 59 | model.add(Dense(1, activation='linear')) 60 | 61 | ''' 62 | 3. モデルの学習 63 | ''' 64 | optimizer = optimizers.Adam(learning_rate=0.001, 65 | beta_1=0.9, beta_2=0.999, amsgrad=True) 66 | 67 | model.compile(optimizer=optimizer, 68 | loss='mean_squared_error') 69 | 70 | es = EarlyStopping(monitor='val_loss', 71 | patience=10, 72 | verbose=1) 73 | 74 | hist = model.fit(x_train, t_train, 75 | epochs=1000, batch_size=100, 76 | verbose=2, 77 | validation_data=(x_val, t_val), 78 | callbacks=[es]) 79 | 80 | ''' 81 | 4. モデルの評価 82 | ''' 83 | # sin波の予測 84 | sin = toy_problem(T, ampl=0.) 85 | gen = [None for i in range(maxlen)] 86 | 87 | z = x[:1] 88 | 89 | # 逐次的に予測値を求める 90 | for i in range(length_of_sequences - maxlen): 91 | preds = model.predict(z[-1:]) 92 | z = np.append(z, preds)[1:] 93 | z = z.reshape(-1, maxlen, 1) 94 | gen.append(preds[0, 0]) 95 | 96 | # 予測値を可視化 97 | fig = plt.figure() 98 | plt.rc('font', family='serif') 99 | plt.xlim([0, 2*T]) 100 | plt.ylim([-1.5, 1.5]) 101 | plt.plot(range(len(f)), sin, 102 | color='gray', 103 | linestyle='--', linewidth=0.5) 104 | plt.plot(range(len(f)), gen, 105 | color='black', linewidth=1, 106 | marker='o', markersize=1, markerfacecolor='black', 107 | markeredgecolor='black') 108 | # plt.savefig('output.jpg') 109 | plt.show() 110 | -------------------------------------------------------------------------------- /5/05_sin_lstm_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.2.6 LSTM - TensorFlow (sin波) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, LSTM 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | from callbacks import EarlyStopping 17 | 18 | 19 | class RNN(Model): 20 | def __init__(self, hidden_dim): 21 | super().__init__() 22 | self.l1 = LSTM(hidden_dim, activation='tanh', 23 | recurrent_activation='sigmoid', 24 | kernel_initializer='glorot_normal', 25 | recurrent_initializer='orthogonal') 26 | self.l2 = Dense(1, activation='linear') 27 | 28 | def call(self, x): 29 | h = self.l1(x) 30 | y = self.l2(h) 31 | 32 | return y 33 | 34 | 35 | if __name__ == '__main__': 36 | np.random.seed(123) 37 | tf.random.set_seed(123) 38 | 39 | ''' 40 | 1. データの準備 41 | ''' 42 | def sin(x, T=100): 43 | return np.sin(2.0 * np.pi * x / T) 44 | 45 | def toy_problem(T=100, ampl=0.05): 46 | x = np.arange(0, 2*T + 1) 47 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 48 | size=len(x)) 49 | return sin(x) + noise 50 | 51 | T = 100 52 | f = toy_problem(T).astype(np.float32) 53 | length_of_sequences = len(f) 54 | maxlen = 25 55 | 56 | x = [] 57 | t = [] 58 | 59 | for i in range(length_of_sequences - maxlen): 60 | x.append(f[i:i+maxlen]) 61 | t.append(f[i+maxlen]) 62 | 63 | x = np.array(x).reshape(-1, maxlen, 1) 64 | t = np.array(t).reshape(-1, 1) 65 | 66 | x_train, x_val, t_train, t_val = \ 67 | train_test_split(x, t, test_size=0.2, shuffle=False) 68 | 69 | ''' 70 | 2. モデルの構築 71 | ''' 72 | model = RNN(50) 73 | 74 | ''' 75 | 3. モデルの学習 76 | ''' 77 | criterion = losses.MeanSquaredError() 78 | optimizer = optimizers.Adam(learning_rate=0.001, 79 | beta_1=0.9, beta_2=0.999, amsgrad=True) 80 | train_loss = metrics.Mean() 81 | val_loss = metrics.Mean() 82 | 83 | def compute_loss(t, y): 84 | return criterion(t, y) 85 | 86 | def train_step(x, t): 87 | with tf.GradientTape() as tape: 88 | preds = model(x) 89 | loss = compute_loss(t, preds) 90 | grads = tape.gradient(loss, model.trainable_variables) 91 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 92 | train_loss(loss) 93 | 94 | return loss 95 | 96 | def val_step(x, t): 97 | preds = model(x) 98 | loss = compute_loss(t, preds) 99 | val_loss(loss) 100 | 101 | epochs = 1000 102 | batch_size = 100 103 | n_batches_train = x_train.shape[0] // batch_size + 1 104 | n_batches_val = x_val.shape[0] // batch_size + 1 105 | hist = {'loss': [], 'val_loss': []} 106 | es = EarlyStopping(patience=10, verbose=1) 107 | 108 | for epoch in range(epochs): 109 | x_, t_ = shuffle(x_train, t_train) 110 | 111 | for batch in range(n_batches_train): 112 | start = batch * batch_size 113 | end = start + batch_size 114 | train_step(x_[start:end], t_[start:end]) 115 | 116 | for batch in range(n_batches_val): 117 | start = batch * batch_size 118 | end = start + batch_size 119 | val_step(x_val[start:end], t_val[start:end]) 120 | 121 | hist['loss'].append(train_loss.result()) 122 | hist['val_loss'].append(val_loss.result()) 123 | 124 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 125 | epoch+1, 126 | train_loss.result(), 127 | val_loss.result() 128 | )) 129 | 130 | if es(val_loss.result()): 131 | break 132 | 133 | ''' 134 | 4. モデルの評価 135 | ''' 136 | # sin波の予測 137 | sin = toy_problem(T, ampl=0.) 138 | gen = [None for i in range(maxlen)] 139 | 140 | z = x[:1] 141 | 142 | for i in range(length_of_sequences - maxlen): 143 | preds = model.predict(z[-1:]) 144 | # preds = model(z[-1:]) 145 | z = np.append(z, preds)[1:] 146 | z = z.reshape(-1, maxlen, 1) 147 | gen.append(preds[0, 0]) 148 | 149 | # 予測値を可視化 150 | fig = plt.figure() 151 | plt.rc('font', family='serif') 152 | plt.xlim([0, 2*T]) 153 | plt.ylim([-1.5, 1.5]) 154 | plt.plot(range(len(f)), sin, 155 | color='gray', 156 | linestyle='--', linewidth=0.5) 157 | plt.plot(range(len(f)), gen, 158 | color='black', linewidth=1, 159 | marker='o', markersize=1, markerfacecolor='black', 160 | markeredgecolor='black') 161 | # plt.savefig('output.jpg') 162 | plt.show() 163 | -------------------------------------------------------------------------------- /5/06_sin_lstm_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.2.6 LSTM - PyTorch (sin波) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optimizers 12 | from callbacks import EarlyStopping 13 | 14 | 15 | class RNN(nn.Module): 16 | def __init__(self, hidden_dim): 17 | super().__init__() 18 | self.l1 = nn.LSTM(1, hidden_dim, 19 | batch_first=True) 20 | self.l2 = nn.Linear(hidden_dim, 1) 21 | 22 | nn.init.xavier_normal_(self.l1.weight_ih_l0) 23 | nn.init.orthogonal_(self.l1.weight_hh_l0) 24 | 25 | def forward(self, x): 26 | h, _ = self.l1(x) 27 | y = self.l2(h[:, -1]) 28 | return y 29 | 30 | 31 | if __name__ == '__main__': 32 | np.random.seed(123) 33 | torch.manual_seed(123) 34 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 35 | 36 | ''' 37 | 1. データの準備 38 | ''' 39 | def sin(x, T=100): 40 | return np.sin(2.0 * np.pi * x / T) 41 | 42 | def toy_problem(T=100, ampl=0.05): 43 | x = np.arange(0, 2*T + 1) 44 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 45 | size=len(x)) 46 | return sin(x) + noise 47 | 48 | T = 100 49 | f = toy_problem(T).astype(np.float32) 50 | length_of_sequences = len(f) 51 | maxlen = 25 52 | 53 | x = [] 54 | t = [] 55 | 56 | for i in range(length_of_sequences - maxlen): 57 | x.append(f[i:i+maxlen]) 58 | t.append(f[i+maxlen]) 59 | 60 | x = np.array(x).reshape(-1, maxlen, 1) 61 | t = np.array(t).reshape(-1, 1) 62 | 63 | x_train, x_val, t_train, t_val = \ 64 | train_test_split(x, t, test_size=0.2, shuffle=False) 65 | 66 | ''' 67 | 2. モデルの構築 68 | ''' 69 | model = RNN(50).to(device) 70 | 71 | ''' 72 | 3. モデルの学習 73 | ''' 74 | criterion = nn.MSELoss(reduction='mean') 75 | optimizer = optimizers.Adam(model.parameters(), 76 | lr=0.001, 77 | betas=(0.9, 0.999), amsgrad=True) 78 | 79 | def compute_loss(t, y): 80 | return criterion(y, t) 81 | 82 | def train_step(x, t): 83 | x = torch.Tensor(x).to(device) 84 | t = torch.Tensor(t).to(device) 85 | model.train() 86 | preds = model(x) 87 | loss = compute_loss(t, preds) 88 | optimizer.zero_grad() 89 | loss.backward() 90 | optimizer.step() 91 | 92 | return loss, preds 93 | 94 | def val_step(x, t): 95 | x = torch.Tensor(x).to(device) 96 | t = torch.Tensor(t).to(device) 97 | model.eval() 98 | preds = model(x) 99 | loss = criterion(preds, t) 100 | 101 | return loss, preds 102 | 103 | epochs = 1000 104 | batch_size = 100 105 | n_batches_train = x_train.shape[0] // batch_size + 1 106 | n_batches_val = x_val.shape[0] // batch_size + 1 107 | hist = {'loss': [], 'val_loss': []} 108 | es = EarlyStopping(patience=10, verbose=1) 109 | 110 | for epoch in range(epochs): 111 | train_loss = 0. 112 | val_loss = 0. 113 | x_, t_ = shuffle(x_train, t_train) 114 | 115 | for batch in range(n_batches_train): 116 | start = batch * batch_size 117 | end = start + batch_size 118 | loss, _ = train_step(x_[start:end], t_[start:end]) 119 | train_loss += loss.item() 120 | 121 | for batch in range(n_batches_val): 122 | start = batch * batch_size 123 | end = start + batch_size 124 | loss, _ = val_step(x_val[start:end], t_val[start:end]) 125 | val_loss += loss.item() 126 | 127 | train_loss /= n_batches_train 128 | val_loss /= n_batches_val 129 | 130 | hist['loss'].append(train_loss) 131 | hist['val_loss'].append(val_loss) 132 | 133 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 134 | epoch+1, 135 | train_loss, 136 | val_loss 137 | )) 138 | 139 | if es(val_loss): 140 | break 141 | 142 | ''' 143 | 4. モデルの評価 144 | ''' 145 | model.eval() 146 | 147 | # sin波の予測 148 | sin = toy_problem(T, ampl=0.) 149 | gen = [None for i in range(maxlen)] 150 | 151 | z = x[:1] 152 | 153 | for i in range(length_of_sequences - maxlen): 154 | z_ = torch.Tensor(z[-1:]).to(device) 155 | preds = model(z_).data.cpu().numpy() 156 | z = np.append(z, preds)[1:] 157 | z = z.reshape(-1, maxlen, 1) 158 | gen.append(preds[0, 0]) 159 | 160 | # 予測値を可視化 161 | fig = plt.figure() 162 | plt.rc('font', family='serif') 163 | plt.xlim([0, 2*T]) 164 | plt.ylim([-1.5, 1.5]) 165 | plt.plot(range(len(f)), sin, 166 | color='gray', 167 | linestyle='--', linewidth=0.5) 168 | plt.plot(range(len(f)), gen, 169 | color='black', linewidth=1, 170 | marker='o', markersize=1, markerfacecolor='black', 171 | markeredgecolor='black') 172 | # plt.savefig('output.jpg') 173 | plt.show() 174 | -------------------------------------------------------------------------------- /5/07_adding_problem_lstm_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.2.7 LSTM - Keras (Adding Problem) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Sequential 11 | from tensorflow.keras.layers import Dense, LSTM 12 | from tensorflow.keras import optimizers 13 | from tensorflow.keras.callbacks import EarlyStopping 14 | import pickle 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | def mask(T=200): 25 | mask = np.zeros(T) 26 | indices = np.random.permutation(np.arange(T))[:2] 27 | mask[indices] = 1 28 | return mask 29 | 30 | def toy_problem(N, T=200): 31 | signals = np.random.uniform(low=0.0, high=1.0, 32 | size=(N, T)) 33 | masks = np.zeros((N, T)) 34 | for i in range(N): 35 | masks[i] = mask(T) 36 | 37 | data = np.zeros((N, T, 2)) 38 | data[:, :, 0] = signals[:] 39 | data[:, :, 1] = masks[:] 40 | target = (signals * masks).sum(axis=1).reshape(N, 1) 41 | 42 | return (data.astype(np.float32), 43 | target.astype(np.float32)) 44 | 45 | N = 10000 46 | T = 200 47 | maxlen = T 48 | 49 | x, t = toy_problem(N, T) 50 | x_train, x_val, t_train, t_val = \ 51 | train_test_split(x, t, test_size=0.2, shuffle=False) 52 | 53 | ''' 54 | 2. モデルの構築 55 | ''' 56 | model = Sequential() 57 | model.add(LSTM(50, activation='tanh', 58 | recurrent_activation='sigmoid', 59 | kernel_initializer='glorot_normal', 60 | recurrent_initializer='orthogonal')) 61 | model.add(Dense(1, activation='linear')) 62 | 63 | ''' 64 | 3. モデルの学習 65 | ''' 66 | optimizer = optimizers.Adam(learning_rate=0.001, 67 | beta_1=0.9, beta_2=0.999, amsgrad=True) 68 | 69 | model.compile(optimizer=optimizer, 70 | loss='mean_squared_error') 71 | 72 | hist = model.fit(x_train, t_train, 73 | epochs=500, batch_size=100, 74 | verbose=2, 75 | validation_data=(x_val, t_val)) 76 | 77 | ''' 78 | 4. モデルの評価 79 | ''' 80 | # 誤差の可視化 81 | val_loss = hist.history['val_loss'] 82 | 83 | fig = plt.figure() 84 | plt.rc('font', family='serif') 85 | plt.plot(range(len(val_loss)), val_loss, 86 | color='black', linewidth=1, 87 | label='val_loss') 88 | plt.xlabel('epochs') 89 | plt.ylabel('loss') 90 | plt.legend() 91 | # plt.savefig('output.jpg') 92 | plt.show() 93 | -------------------------------------------------------------------------------- /5/08_adding_problem_lstm_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.2.7 LSTM - TensorFlow (Adding Problem) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, LSTM 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | 17 | 18 | class RNN(Model): 19 | def __init__(self, hidden_dim): 20 | super().__init__() 21 | self.l1 = LSTM(hidden_dim, activation='tanh', 22 | recurrent_activation='sigmoid', 23 | kernel_initializer='glorot_normal', 24 | recurrent_initializer='orthogonal') 25 | self.l2 = Dense(1, activation='linear') 26 | 27 | def call(self, x): 28 | h = self.l1(x) 29 | y = self.l2(h) 30 | 31 | return y 32 | 33 | 34 | if __name__ == '__main__': 35 | np.random.seed(123) 36 | tf.random.set_seed(123) 37 | 38 | ''' 39 | 1. データの準備 40 | ''' 41 | def mask(T=200): 42 | mask = np.zeros(T) 43 | indices = np.random.permutation(np.arange(T))[:2] 44 | mask[indices] = 1 45 | return mask 46 | 47 | def toy_problem(N, T=200): 48 | signals = np.random.uniform(low=0.0, high=1.0, 49 | size=(N, T)) 50 | masks = np.zeros((N, T)) 51 | for i in range(N): 52 | masks[i] = mask(T) 53 | 54 | data = np.zeros((N, T, 2)) 55 | data[:, :, 0] = signals[:] 56 | data[:, :, 1] = masks[:] 57 | target = (signals * masks).sum(axis=1).reshape(N, 1) 58 | 59 | return (data.astype(np.float32), 60 | target.astype(np.float32)) 61 | 62 | N = 10000 63 | T = 200 64 | maxlen = T 65 | 66 | x, t = toy_problem(N, T) 67 | x_train, x_val, t_train, t_val = \ 68 | train_test_split(x, t, test_size=0.2, shuffle=False) 69 | 70 | ''' 71 | 2. モデルの構築 72 | ''' 73 | model = RNN(50) 74 | 75 | ''' 76 | 3. モデルの学習 77 | ''' 78 | criterion = losses.MeanSquaredError() 79 | optimizer = optimizers.Adam(learning_rate=0.001, 80 | beta_1=0.9, beta_2=0.999, amsgrad=True) 81 | train_loss = metrics.Mean() 82 | val_loss = metrics.Mean() 83 | 84 | def compute_loss(t, y): 85 | return criterion(t, y) 86 | 87 | def train_step(x, t): 88 | with tf.GradientTape() as tape: 89 | preds = model(x) 90 | loss = compute_loss(t, preds) 91 | grads = tape.gradient(loss, model.trainable_variables) 92 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 93 | train_loss(loss) 94 | 95 | return loss 96 | 97 | def val_step(x, t): 98 | preds = model(x) 99 | loss = compute_loss(t, preds) 100 | val_loss(loss) 101 | 102 | epochs = 500 103 | batch_size = 100 104 | n_batches_train = x_train.shape[0] // batch_size 105 | n_batches_val = x_val.shape[0] // batch_size 106 | hist = {'loss': [], 'val_loss': []} 107 | 108 | for epoch in range(epochs): 109 | x_, t_ = shuffle(x_train, t_train) 110 | 111 | for batch in range(n_batches_train): 112 | start = batch * batch_size 113 | end = start + batch_size 114 | train_step(x_[start:end], t_[start:end]) 115 | 116 | for batch in range(n_batches_val): 117 | start = batch * batch_size 118 | end = start + batch_size 119 | val_step(x_val[start:end], t_val[start:end]) 120 | 121 | hist['loss'].append(train_loss.result()) 122 | hist['val_loss'].append(val_loss.result()) 123 | 124 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 125 | epoch+1, 126 | train_loss.result(), 127 | val_loss.result() 128 | )) 129 | 130 | ''' 131 | 4. モデルの評価 132 | ''' 133 | # 誤差の可視化 134 | val_loss = hist['val_loss'] 135 | 136 | fig = plt.figure() 137 | plt.rc('font', family='serif') 138 | plt.plot(range(len(val_loss)), val_loss, 139 | color='black', linewidth=1, 140 | label='val_loss') 141 | plt.xlabel('epochs') 142 | plt.ylabel('loss') 143 | plt.legend() 144 | # plt.savefig('output.jpg') 145 | plt.show() 146 | -------------------------------------------------------------------------------- /5/09_adding_problem_lstm_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.2.7 LSTM - PyTorch (Adding Problem) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optimizers 12 | 13 | 14 | class RNN(nn.Module): 15 | def __init__(self, hidden_dim): 16 | super().__init__() 17 | self.l1 = nn.LSTM(2, hidden_dim, 18 | batch_first=True) 19 | self.l2 = nn.Linear(hidden_dim, 1) 20 | 21 | nn.init.xavier_normal_(self.l1.weight_ih_l0) 22 | nn.init.orthogonal_(self.l1.weight_hh_l0) 23 | 24 | def forward(self, x): 25 | h, _ = self.l1(x) 26 | y = self.l2(h[:, -1]) 27 | return y 28 | 29 | 30 | if __name__ == '__main__': 31 | np.random.seed(123) 32 | torch.manual_seed(123) 33 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 34 | 35 | ''' 36 | 1. データの準備 37 | ''' 38 | def mask(T=200): 39 | mask = np.zeros(T) 40 | indices = np.random.permutation(np.arange(T))[:2] 41 | mask[indices] = 1 42 | return mask 43 | 44 | def toy_problem(N, T=200): 45 | signals = np.random.uniform(low=0.0, high=1.0, 46 | size=(N, T)) 47 | masks = np.zeros((N, T)) 48 | for i in range(N): 49 | masks[i] = mask(T) 50 | 51 | data = np.zeros((N, T, 2)) 52 | data[:, :, 0] = signals[:] 53 | data[:, :, 1] = masks[:] 54 | target = (signals * masks).sum(axis=1).reshape(N, 1) 55 | 56 | return (data.astype(np.float32), 57 | target.astype(np.float32)) 58 | 59 | N = 10000 60 | T = 200 61 | maxlen = T 62 | 63 | x, t = toy_problem(N, T) 64 | x_train, x_val, t_train, t_val = \ 65 | train_test_split(x, t, test_size=0.2, shuffle=False) 66 | 67 | ''' 68 | 2. モデルの構築 69 | ''' 70 | model = RNN(50).to(device) 71 | 72 | ''' 73 | 3. モデルの学習 74 | ''' 75 | criterion = nn.MSELoss(reduction='mean') 76 | optimizer = optimizers.Adam(model.parameters(), 77 | lr=0.001, 78 | betas=(0.9, 0.999), amsgrad=True) 79 | 80 | def compute_loss(t, y): 81 | return criterion(y, t) 82 | 83 | def train_step(x, t): 84 | x = torch.Tensor(x).to(device) 85 | t = torch.Tensor(t).to(device) 86 | model.train() 87 | preds = model(x) 88 | loss = compute_loss(t, preds) 89 | optimizer.zero_grad() 90 | loss.backward() 91 | optimizer.step() 92 | 93 | return loss, preds 94 | 95 | def val_step(x, t): 96 | x = torch.Tensor(x).to(device) 97 | t = torch.Tensor(t).to(device) 98 | model.eval() 99 | preds = model(x) 100 | loss = criterion(preds, t) 101 | 102 | return loss, preds 103 | 104 | epochs = 500 105 | batch_size = 100 106 | n_batches_train = x_train.shape[0] // batch_size 107 | n_batches_val = x_val.shape[0] // batch_size 108 | hist = {'loss': [], 'val_loss': []} 109 | 110 | for epoch in range(epochs): 111 | train_loss = 0. 112 | val_loss = 0. 113 | x_, t_ = shuffle(x_train, t_train) 114 | 115 | for batch in range(n_batches_train): 116 | start = batch * batch_size 117 | end = start + batch_size 118 | loss, _ = train_step(x_[start:end], t_[start:end]) 119 | train_loss += loss.item() 120 | 121 | for batch in range(n_batches_val): 122 | start = batch * batch_size 123 | end = start + batch_size 124 | loss, _ = val_step(x_val[start:end], t_val[start:end]) 125 | val_loss += loss.item() 126 | 127 | train_loss /= n_batches_train 128 | val_loss /= n_batches_val 129 | 130 | hist['loss'].append(train_loss) 131 | hist['val_loss'].append(val_loss) 132 | 133 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 134 | epoch+1, 135 | train_loss, 136 | val_loss 137 | )) 138 | 139 | ''' 140 | 4. モデルの評価 141 | ''' 142 | # 誤差の可視化 143 | val_loss = hist['val_loss'] 144 | 145 | fig = plt.figure() 146 | plt.rc('font', family='serif') 147 | plt.plot(range(len(val_loss)), val_loss, 148 | color='black', linewidth=1, 149 | label='val_loss') 150 | plt.xlabel('epochs') 151 | plt.ylabel('loss') 152 | plt.legend() 153 | # plt.savefig('output.jpg') 154 | plt.show() 155 | -------------------------------------------------------------------------------- /5/10_sin_gru_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.3.2 GRU - Keras (sin波) 3 | ''' 4 | 5 | 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from sklearn.model_selection import train_test_split 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Sequential 12 | from tensorflow.keras.layers import Dense, GRU 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras.callbacks import EarlyStopping 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | def sin(x, T=100): 25 | return np.sin(2.0 * np.pi * x / T) 26 | 27 | def toy_problem(T=100, ampl=0.05): 28 | x = np.arange(0, 2*T + 1) 29 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 30 | size=len(x)) 31 | return sin(x) + noise 32 | 33 | T = 100 34 | f = toy_problem(T).astype(np.float32) 35 | length_of_sequences = len(f) 36 | maxlen = 25 37 | 38 | x = [] 39 | t = [] 40 | 41 | for i in range(length_of_sequences - maxlen): 42 | x.append(f[i:i+maxlen]) 43 | t.append(f[i+maxlen]) 44 | 45 | x = np.array(x).reshape(-1, maxlen, 1) 46 | t = np.array(t).reshape(-1, 1) 47 | 48 | x_train, x_val, t_train, t_val = \ 49 | train_test_split(x, t, test_size=0.2, shuffle=False) 50 | 51 | ''' 52 | 2. モデルの構築 53 | ''' 54 | model = Sequential() 55 | model.add(GRU(50, activation='tanh', 56 | recurrent_activation='sigmoid', 57 | kernel_initializer='glorot_normal', 58 | recurrent_initializer='orthogonal')) 59 | model.add(Dense(1, activation='linear')) 60 | 61 | ''' 62 | 3. モデルの学習 63 | ''' 64 | optimizer = optimizers.Adam(learning_rate=0.001, 65 | beta_1=0.9, beta_2=0.999, amsgrad=True) 66 | 67 | model.compile(optimizer=optimizer, 68 | loss='mean_squared_error') 69 | 70 | es = EarlyStopping(monitor='val_loss', 71 | patience=10, 72 | verbose=1) 73 | 74 | hist = model.fit(x_train, t_train, 75 | epochs=1000, batch_size=100, 76 | verbose=2, 77 | validation_data=(x_val, t_val), 78 | callbacks=[es]) 79 | 80 | ''' 81 | 4. モデルの評価 82 | ''' 83 | # sin波の予測 84 | sin = toy_problem(T, ampl=0.) 85 | gen = [None for i in range(maxlen)] 86 | 87 | z = x[:1] 88 | 89 | # 逐次的に予測値を求める 90 | for i in range(length_of_sequences - maxlen): 91 | preds = model.predict(z[-1:]) 92 | z = np.append(z, preds)[1:] 93 | z = z.reshape(-1, maxlen, 1) 94 | gen.append(preds[0, 0]) 95 | 96 | # 予測値を可視化 97 | fig = plt.figure() 98 | plt.rc('font', family='serif') 99 | plt.xlim([0, 2*T]) 100 | plt.ylim([-1.5, 1.5]) 101 | plt.plot(range(len(f)), sin, 102 | color='gray', 103 | linestyle='--', linewidth=0.5) 104 | plt.plot(range(len(f)), gen, 105 | color='black', linewidth=1, 106 | marker='o', markersize=1, markerfacecolor='black', 107 | markeredgecolor='black') 108 | # plt.savefig('output.jpg') 109 | plt.show() 110 | -------------------------------------------------------------------------------- /5/11_sin_gru_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.3.2 GRU - TensorFlow (sin波) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, GRU 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | from callbacks import EarlyStopping 17 | 18 | 19 | class RNN(Model): 20 | def __init__(self, hidden_dim): 21 | super().__init__() 22 | self.l1 = GRU(hidden_dim, activation='tanh', 23 | recurrent_activation='sigmoid', 24 | kernel_initializer='glorot_normal', 25 | recurrent_initializer='orthogonal') 26 | self.l2 = Dense(1, activation='linear') 27 | 28 | def call(self, x): 29 | h = self.l1(x) 30 | y = self.l2(h) 31 | 32 | return y 33 | 34 | 35 | if __name__ == '__main__': 36 | np.random.seed(123) 37 | tf.random.set_seed(123) 38 | 39 | ''' 40 | 1. データの準備 41 | ''' 42 | def sin(x, T=100): 43 | return np.sin(2.0 * np.pi * x / T) 44 | 45 | def toy_problem(T=100, ampl=0.05): 46 | x = np.arange(0, 2*T + 1) 47 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 48 | size=len(x)) 49 | return sin(x) + noise 50 | 51 | T = 100 52 | f = toy_problem(T).astype(np.float32) 53 | length_of_sequences = len(f) 54 | maxlen = 25 55 | 56 | x = [] 57 | t = [] 58 | 59 | for i in range(length_of_sequences - maxlen): 60 | x.append(f[i:i+maxlen]) 61 | t.append(f[i+maxlen]) 62 | 63 | x = np.array(x).reshape(-1, maxlen, 1) 64 | t = np.array(t).reshape(-1, 1) 65 | 66 | x_train, x_val, t_train, t_val = \ 67 | train_test_split(x, t, test_size=0.2, shuffle=False) 68 | 69 | ''' 70 | 2. モデルの構築 71 | ''' 72 | model = RNN(50) 73 | 74 | ''' 75 | 3. モデルの学習 76 | ''' 77 | criterion = losses.MeanSquaredError() 78 | optimizer = optimizers.Adam(learning_rate=0.001, 79 | beta_1=0.9, beta_2=0.999, amsgrad=True) 80 | train_loss = metrics.Mean() 81 | val_loss = metrics.Mean() 82 | 83 | def compute_loss(t, y): 84 | return criterion(t, y) 85 | 86 | def train_step(x, t): 87 | with tf.GradientTape() as tape: 88 | preds = model(x) 89 | loss = compute_loss(t, preds) 90 | grads = tape.gradient(loss, model.trainable_variables) 91 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 92 | train_loss(loss) 93 | 94 | return loss 95 | 96 | def val_step(x, t): 97 | preds = model(x) 98 | loss = compute_loss(t, preds) 99 | val_loss(loss) 100 | 101 | epochs = 1000 102 | batch_size = 100 103 | n_batches_train = x_train.shape[0] // batch_size + 1 104 | n_batches_val = x_val.shape[0] // batch_size + 1 105 | hist = {'loss': [], 'val_loss': []} 106 | es = EarlyStopping(patience=10, verbose=1) 107 | 108 | for epoch in range(epochs): 109 | x_, t_ = shuffle(x_train, t_train) 110 | 111 | for batch in range(n_batches_train): 112 | start = batch * batch_size 113 | end = start + batch_size 114 | train_step(x_[start:end], t_[start:end]) 115 | 116 | for batch in range(n_batches_val): 117 | start = batch * batch_size 118 | end = start + batch_size 119 | val_step(x_val[start:end], t_val[start:end]) 120 | 121 | hist['loss'].append(train_loss.result()) 122 | hist['val_loss'].append(val_loss.result()) 123 | 124 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 125 | epoch+1, 126 | train_loss.result(), 127 | val_loss.result() 128 | )) 129 | 130 | if es(val_loss.result()): 131 | break 132 | 133 | ''' 134 | 4. モデルの評価 135 | ''' 136 | # sin波の予測 137 | sin = toy_problem(T, ampl=0.) 138 | gen = [None for i in range(maxlen)] 139 | 140 | z = x[:1] 141 | 142 | for i in range(length_of_sequences - maxlen): 143 | preds = model.predict(z[-1:]) 144 | # preds = model(z[-1:]) 145 | z = np.append(z, preds)[1:] 146 | z = z.reshape(-1, maxlen, 1) 147 | gen.append(preds[0, 0]) 148 | 149 | # 予測値を可視化 150 | fig = plt.figure() 151 | plt.rc('font', family='serif') 152 | plt.xlim([0, 2*T]) 153 | plt.ylim([-1.5, 1.5]) 154 | plt.plot(range(len(f)), sin, 155 | color='gray', 156 | linestyle='--', linewidth=0.5) 157 | plt.plot(range(len(f)), gen, 158 | color='black', linewidth=1, 159 | marker='o', markersize=1, markerfacecolor='black', 160 | markeredgecolor='black') 161 | # plt.savefig('output.jpg') 162 | plt.show() 163 | -------------------------------------------------------------------------------- /5/12_sin_gru_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.3.2 GRU - PyTorch (sin波) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optimizers 12 | from callbacks import EarlyStopping 13 | 14 | 15 | class RNN(nn.Module): 16 | def __init__(self, hidden_dim): 17 | super().__init__() 18 | self.l1 = nn.GRU(1, hidden_dim, 19 | batch_first=True) 20 | self.l2 = nn.Linear(hidden_dim, 1) 21 | 22 | nn.init.xavier_normal_(self.l1.weight_ih_l0) 23 | nn.init.orthogonal_(self.l1.weight_hh_l0) 24 | 25 | def forward(self, x): 26 | h, _ = self.l1(x) 27 | y = self.l2(h[:, -1]) 28 | return y 29 | 30 | 31 | if __name__ == '__main__': 32 | np.random.seed(123) 33 | torch.manual_seed(123) 34 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 35 | 36 | ''' 37 | 1. データの準備 38 | ''' 39 | def sin(x, T=100): 40 | return np.sin(2.0 * np.pi * x / T) 41 | 42 | def toy_problem(T=100, ampl=0.05): 43 | x = np.arange(0, 2*T + 1) 44 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, 45 | size=len(x)) 46 | return sin(x) + noise 47 | 48 | T = 100 49 | f = toy_problem(T).astype(np.float32) 50 | length_of_sequences = len(f) 51 | maxlen = 25 52 | 53 | x = [] 54 | t = [] 55 | 56 | for i in range(length_of_sequences - maxlen): 57 | x.append(f[i:i+maxlen]) 58 | t.append(f[i+maxlen]) 59 | 60 | x = np.array(x).reshape(-1, maxlen, 1) 61 | t = np.array(t).reshape(-1, 1) 62 | 63 | x_train, x_val, t_train, t_val = \ 64 | train_test_split(x, t, test_size=0.2, shuffle=False) 65 | 66 | ''' 67 | 2. モデルの構築 68 | ''' 69 | model = RNN(50).to(device) 70 | 71 | ''' 72 | 3. モデルの学習 73 | ''' 74 | criterion = nn.MSELoss(reduction='mean') 75 | optimizer = optimizers.Adam(model.parameters(), 76 | lr=0.001, 77 | betas=(0.9, 0.999), amsgrad=True) 78 | 79 | def compute_loss(t, y): 80 | return criterion(y, t) 81 | 82 | def train_step(x, t): 83 | x = torch.Tensor(x).to(device) 84 | t = torch.Tensor(t).to(device) 85 | model.train() 86 | preds = model(x) 87 | loss = compute_loss(t, preds) 88 | optimizer.zero_grad() 89 | loss.backward() 90 | optimizer.step() 91 | 92 | return loss, preds 93 | 94 | def val_step(x, t): 95 | x = torch.Tensor(x).to(device) 96 | t = torch.Tensor(t).to(device) 97 | model.eval() 98 | preds = model(x) 99 | loss = criterion(preds, t) 100 | 101 | return loss, preds 102 | 103 | epochs = 1000 104 | batch_size = 100 105 | n_batches_train = x_train.shape[0] // batch_size + 1 106 | n_batches_val = x_val.shape[0] // batch_size + 1 107 | hist = {'loss': [], 'val_loss': []} 108 | es = EarlyStopping(patience=10, verbose=1) 109 | 110 | for epoch in range(epochs): 111 | train_loss = 0. 112 | val_loss = 0. 113 | x_, t_ = shuffle(x_train, t_train) 114 | 115 | for batch in range(n_batches_train): 116 | start = batch * batch_size 117 | end = start + batch_size 118 | loss, _ = train_step(x_[start:end], t_[start:end]) 119 | train_loss += loss.item() 120 | 121 | for batch in range(n_batches_val): 122 | start = batch * batch_size 123 | end = start + batch_size 124 | loss, _ = val_step(x_val[start:end], t_val[start:end]) 125 | val_loss += loss.item() 126 | 127 | train_loss /= n_batches_train 128 | val_loss /= n_batches_val 129 | 130 | hist['loss'].append(train_loss) 131 | hist['val_loss'].append(val_loss) 132 | 133 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 134 | epoch+1, 135 | train_loss, 136 | val_loss 137 | )) 138 | 139 | if es(val_loss): 140 | break 141 | 142 | ''' 143 | 4. モデルの評価 144 | ''' 145 | model.eval() 146 | 147 | # sin波の予測 148 | sin = toy_problem(T, ampl=0.) 149 | gen = [None for i in range(maxlen)] 150 | 151 | z = x[:1] 152 | 153 | for i in range(length_of_sequences - maxlen): 154 | z_ = torch.Tensor(z[-1:]).to(device) 155 | preds = model(z_).data.cpu().numpy() 156 | z = np.append(z, preds)[1:] 157 | z = z.reshape(-1, maxlen, 1) 158 | gen.append(preds[0, 0]) 159 | 160 | # 予測値を可視化 161 | fig = plt.figure() 162 | plt.rc('font', family='serif') 163 | plt.xlim([0, 2*T]) 164 | plt.ylim([-1.5, 1.5]) 165 | plt.plot(range(len(f)), sin, 166 | color='gray', 167 | linestyle='--', linewidth=0.5) 168 | plt.plot(range(len(f)), gen, 169 | color='black', linewidth=1, 170 | marker='o', markersize=1, markerfacecolor='black', 171 | markeredgecolor='black') 172 | # plt.savefig('output.jpg') 173 | plt.show() 174 | -------------------------------------------------------------------------------- /5/13_adding_problem_gru_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.3.2 GRU - Keras (Adding Problem) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Sequential 11 | from tensorflow.keras.layers import Dense, GRU 12 | from tensorflow.keras import optimizers 13 | from tensorflow.keras.callbacks import EarlyStopping 14 | import pickle 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | def mask(T=200): 25 | mask = np.zeros(T) 26 | indices = np.random.permutation(np.arange(T))[:2] 27 | mask[indices] = 1 28 | return mask 29 | 30 | def toy_problem(N, T=200): 31 | signals = np.random.uniform(low=0.0, high=1.0, 32 | size=(N, T)) 33 | masks = np.zeros((N, T)) 34 | for i in range(N): 35 | masks[i] = mask(T) 36 | 37 | data = np.zeros((N, T, 2)) 38 | data[:, :, 0] = signals[:] 39 | data[:, :, 1] = masks[:] 40 | target = (signals * masks).sum(axis=1).reshape(N, 1) 41 | 42 | return (data.astype(np.float32), 43 | target.astype(np.float32)) 44 | 45 | N = 10000 46 | T = 200 47 | maxlen = T 48 | 49 | x, t = toy_problem(N, T) 50 | x_train, x_val, t_train, t_val = \ 51 | train_test_split(x, t, test_size=0.2, shuffle=False) 52 | 53 | ''' 54 | 2. モデルの構築 55 | ''' 56 | model = Sequential() 57 | model.add(GRU(50, activation='tanh', 58 | recurrent_activation='sigmoid', 59 | kernel_initializer='glorot_normal', 60 | recurrent_initializer='orthogonal')) 61 | model.add(Dense(1, activation='linear')) 62 | 63 | ''' 64 | 3. モデルの学習 65 | ''' 66 | optimizer = optimizers.Adam(learning_rate=0.001, 67 | beta_1=0.9, beta_2=0.999, amsgrad=True) 68 | 69 | model.compile(optimizer=optimizer, 70 | loss='mean_squared_error') 71 | 72 | hist = model.fit(x_train, t_train, 73 | epochs=500, batch_size=100, 74 | verbose=2, 75 | validation_data=(x_val, t_val)) 76 | 77 | ''' 78 | 4. モデルの評価 79 | ''' 80 | # 誤差の可視化 81 | val_loss = hist.history['val_loss'] 82 | 83 | fig = plt.figure() 84 | plt.rc('font', family='serif') 85 | plt.plot(range(len(val_loss)), val_loss, 86 | color='black', linewidth=1, 87 | label='val_loss') 88 | plt.xlabel('epochs') 89 | plt.ylabel('loss') 90 | plt.legend() 91 | # plt.savefig('output.jpg') 92 | plt.show() 93 | -------------------------------------------------------------------------------- /5/14_adding_problem_gru_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.3.2 GRU - TensorFlow (Adding Problem) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import tensorflow as tf 10 | from tensorflow.keras import datasets 11 | from tensorflow.keras.models import Model 12 | from tensorflow.keras.layers import Dense, GRU 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | 17 | 18 | class RNN(Model): 19 | def __init__(self, hidden_dim): 20 | super().__init__() 21 | self.l1 = GRU(hidden_dim, activation='tanh', 22 | recurrent_activation='sigmoid', 23 | kernel_initializer='glorot_normal', 24 | recurrent_initializer='orthogonal') 25 | self.l2 = Dense(1, activation='linear') 26 | 27 | def call(self, x): 28 | h = self.l1(x) 29 | y = self.l2(h) 30 | 31 | return y 32 | 33 | 34 | if __name__ == '__main__': 35 | np.random.seed(123) 36 | tf.random.set_seed(123) 37 | 38 | ''' 39 | 1. データの準備 40 | ''' 41 | def mask(T=200): 42 | mask = np.zeros(T) 43 | indices = np.random.permutation(np.arange(T))[:2] 44 | mask[indices] = 1 45 | return mask 46 | 47 | def toy_problem(N, T=200): 48 | signals = np.random.uniform(low=0.0, high=1.0, 49 | size=(N, T)) 50 | masks = np.zeros((N, T)) 51 | for i in range(N): 52 | masks[i] = mask(T) 53 | 54 | data = np.zeros((N, T, 2)) 55 | data[:, :, 0] = signals[:] 56 | data[:, :, 1] = masks[:] 57 | target = (signals * masks).sum(axis=1).reshape(N, 1) 58 | 59 | return (data.astype(np.float32), 60 | target.astype(np.float32)) 61 | 62 | N = 10000 63 | T = 200 64 | maxlen = T 65 | 66 | x, t = toy_problem(N, T) 67 | x_train, x_val, t_train, t_val = \ 68 | train_test_split(x, t, test_size=0.2, shuffle=False) 69 | 70 | ''' 71 | 2. モデルの構築 72 | ''' 73 | model = RNN(50) 74 | 75 | ''' 76 | 3. モデルの学習 77 | ''' 78 | criterion = losses.MeanSquaredError() 79 | optimizer = optimizers.Adam(learning_rate=0.001, 80 | beta_1=0.9, beta_2=0.999, amsgrad=True) 81 | train_loss = metrics.Mean() 82 | val_loss = metrics.Mean() 83 | 84 | def compute_loss(t, y): 85 | return criterion(t, y) 86 | 87 | def train_step(x, t): 88 | with tf.GradientTape() as tape: 89 | preds = model(x) 90 | loss = compute_loss(t, preds) 91 | grads = tape.gradient(loss, model.trainable_variables) 92 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 93 | train_loss(loss) 94 | 95 | return loss 96 | 97 | def val_step(x, t): 98 | preds = model(x) 99 | loss = compute_loss(t, preds) 100 | val_loss(loss) 101 | 102 | epochs = 500 103 | batch_size = 100 104 | n_batches_train = x_train.shape[0] // batch_size 105 | n_batches_val = x_val.shape[0] // batch_size 106 | hist = {'loss': [], 'val_loss': []} 107 | 108 | for epoch in range(epochs): 109 | x_, t_ = shuffle(x_train, t_train) 110 | 111 | for batch in range(n_batches_train): 112 | start = batch * batch_size 113 | end = start + batch_size 114 | train_step(x_[start:end], t_[start:end]) 115 | 116 | for batch in range(n_batches_val): 117 | start = batch * batch_size 118 | end = start + batch_size 119 | val_step(x_val[start:end], t_val[start:end]) 120 | 121 | hist['loss'].append(train_loss.result()) 122 | hist['val_loss'].append(val_loss.result()) 123 | 124 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 125 | epoch+1, 126 | train_loss.result(), 127 | val_loss.result() 128 | )) 129 | 130 | ''' 131 | 4. モデルの評価 132 | ''' 133 | # 誤差の可視化 134 | val_loss = hist['val_loss'] 135 | 136 | fig = plt.figure() 137 | plt.rc('font', family='serif') 138 | plt.plot(range(len(val_loss)), val_loss, 139 | color='black', linewidth=1, 140 | label='val_loss') 141 | plt.xlabel('epochs') 142 | plt.ylabel('loss') 143 | plt.legend() 144 | # plt.savefig('output.jpg') 145 | plt.show() 146 | -------------------------------------------------------------------------------- /5/15_adding_problem_gru_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.3.2 GRU - PyTorch (Adding Problem) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils import shuffle 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optimizers 12 | 13 | 14 | class RNN(nn.Module): 15 | def __init__(self, hidden_dim): 16 | super().__init__() 17 | self.l1 = nn.GRU(2, hidden_dim, 18 | batch_first=True) 19 | self.l2 = nn.Linear(hidden_dim, 1) 20 | 21 | nn.init.xavier_normal_(self.l1.weight_ih_l0) 22 | nn.init.orthogonal_(self.l1.weight_hh_l0) 23 | 24 | def forward(self, x): 25 | h, _ = self.l1(x) 26 | y = self.l2(h[:, -1]) 27 | return y 28 | 29 | 30 | if __name__ == '__main__': 31 | np.random.seed(123) 32 | torch.manual_seed(123) 33 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 34 | 35 | ''' 36 | 1. データの準備 37 | ''' 38 | def mask(T=200): 39 | mask = np.zeros(T) 40 | indices = np.random.permutation(np.arange(T))[:2] 41 | mask[indices] = 1 42 | return mask 43 | 44 | def toy_problem(N, T=200): 45 | signals = np.random.uniform(low=0.0, high=1.0, 46 | size=(N, T)) 47 | masks = np.zeros((N, T)) 48 | for i in range(N): 49 | masks[i] = mask(T) 50 | 51 | data = np.zeros((N, T, 2)) 52 | data[:, :, 0] = signals[:] 53 | data[:, :, 1] = masks[:] 54 | target = (signals * masks).sum(axis=1).reshape(N, 1) 55 | 56 | return (data.astype(np.float32), 57 | target.astype(np.float32)) 58 | 59 | N = 10000 60 | T = 200 61 | maxlen = T 62 | 63 | x, t = toy_problem(N, T) 64 | x_train, x_val, t_train, t_val = \ 65 | train_test_split(x, t, test_size=0.2, shuffle=False) 66 | 67 | ''' 68 | 2. モデルの構築 69 | ''' 70 | model = RNN(50).to(device) 71 | 72 | ''' 73 | 3. モデルの学習 74 | ''' 75 | criterion = nn.MSELoss(reduction='mean') 76 | optimizer = optimizers.Adam(model.parameters(), 77 | lr=0.001, 78 | betas=(0.9, 0.999), amsgrad=True) 79 | 80 | def compute_loss(t, y): 81 | return criterion(y, t) 82 | 83 | def train_step(x, t): 84 | x = torch.Tensor(x).to(device) 85 | t = torch.Tensor(t).to(device) 86 | model.train() 87 | preds = model(x) 88 | loss = compute_loss(t, preds) 89 | optimizer.zero_grad() 90 | loss.backward() 91 | optimizer.step() 92 | 93 | return loss, preds 94 | 95 | def val_step(x, t): 96 | x = torch.Tensor(x).to(device) 97 | t = torch.Tensor(t).to(device) 98 | model.eval() 99 | preds = model(x) 100 | loss = criterion(preds, t) 101 | 102 | return loss, preds 103 | 104 | epochs = 500 105 | batch_size = 100 106 | n_batches_train = x_train.shape[0] // batch_size 107 | n_batches_val = x_val.shape[0] // batch_size 108 | hist = {'loss': [], 'val_loss': []} 109 | 110 | for epoch in range(epochs): 111 | train_loss = 0. 112 | val_loss = 0. 113 | x_, t_ = shuffle(x_train, t_train) 114 | 115 | for batch in range(n_batches_train): 116 | start = batch * batch_size 117 | end = start + batch_size 118 | loss, _ = train_step(x_[start:end], t_[start:end]) 119 | train_loss += loss.item() 120 | 121 | for batch in range(n_batches_val): 122 | start = batch * batch_size 123 | end = start + batch_size 124 | loss, _ = val_step(x_val[start:end], t_val[start:end]) 125 | val_loss += loss.item() 126 | 127 | train_loss /= n_batches_train 128 | val_loss /= n_batches_val 129 | 130 | hist['loss'].append(train_loss) 131 | hist['val_loss'].append(val_loss) 132 | 133 | print('epoch: {}, loss: {:.3}, val_loss: {:.3f}'.format( 134 | epoch+1, 135 | train_loss, 136 | val_loss 137 | )) 138 | 139 | ''' 140 | 4. モデルの評価 141 | ''' 142 | # 誤差の可視化 143 | val_loss = hist['val_loss'] 144 | 145 | fig = plt.figure() 146 | plt.rc('font', family='serif') 147 | plt.plot(range(len(val_loss)), val_loss, 148 | color='black', linewidth=1, 149 | label='val_loss') 150 | plt.xlabel('epochs') 151 | plt.ylabel('loss') 152 | plt.legend() 153 | # plt.savefig('output.jpg') 154 | plt.show() 155 | -------------------------------------------------------------------------------- /5/16_mnist_birnn_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.4.3.2 BiRNN - Keras (MNIST) 3 | ''' 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from sklearn.model_selection import train_test_split 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Sequential 11 | from tensorflow.keras.layers import Dense, LSTM, Bidirectional 12 | from tensorflow.keras import optimizers 13 | from tensorflow.keras.callbacks import EarlyStopping 14 | 15 | 16 | if __name__ == '__main__': 17 | np.random.seed(123) 18 | tf.random.set_seed(123) 19 | 20 | ''' 21 | 1. データの準備 22 | ''' 23 | mnist = datasets.mnist 24 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 25 | 26 | x_train = (x_train.reshape(-1, 28, 28) / 255).astype(np.float32) 27 | x_test = (x_test.reshape(-1, 28, 28) / 255).astype(np.float32) 28 | 29 | x_train, x_val, t_train, t_val = \ 30 | train_test_split(x_train, t_train, test_size=0.2) 31 | 32 | ''' 33 | 2. モデルの構築 34 | ''' 35 | model = Sequential() 36 | model.add(Bidirectional(LSTM(25, activation='tanh', 37 | recurrent_activation='sigmoid', 38 | kernel_initializer='glorot_normal', 39 | recurrent_initializer='orthogonal'), 40 | merge_mode='concat')) 41 | model.add(Dense(10, kernel_initializer='glorot_normal', 42 | activation='softmax')) 43 | 44 | ''' 45 | 3. モデルの学習 46 | ''' 47 | optimizer = optimizers.Adam(learning_rate=0.001, 48 | beta_1=0.9, beta_2=0.999, amsgrad=True) 49 | 50 | model.compile(optimizer=optimizer, 51 | loss='sparse_categorical_crossentropy', 52 | metrics=['accuracy']) 53 | 54 | es = EarlyStopping(monitor='val_loss', 55 | patience=5, 56 | verbose=1) 57 | 58 | hist = model.fit(x_train, t_train, 59 | epochs=1000, batch_size=100, 60 | verbose=2, 61 | validation_data=(x_val, t_val), 62 | callbacks=[es]) 63 | 64 | ''' 65 | 4. モデルの評価 66 | ''' 67 | # 誤差の可視化 68 | loss = hist.history['loss'] 69 | val_loss = hist.history['val_loss'] 70 | 71 | fig = plt.figure() 72 | plt.rc('font', family='serif') 73 | plt.plot(range(len(loss)), loss, 74 | color='gray', linewidth=1, 75 | label='loss') 76 | plt.plot(range(len(val_loss)), val_loss, 77 | color='black', linewidth=1, 78 | label='val_loss') 79 | plt.xlabel('epochs') 80 | plt.ylabel('loss') 81 | plt.legend() 82 | # plt.savefig('output.jpg') 83 | plt.show() 84 | 85 | # テストデータの評価 86 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 87 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 88 | loss, 89 | acc 90 | )) 91 | -------------------------------------------------------------------------------- /5/19_imdb_birnn_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.4.4.2 BiRNN - Keras (IMDb) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.model_selection import train_test_split 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Sequential 10 | from tensorflow.keras.layers \ 11 | import Dense, LSTM, Bidirectional, Embedding 12 | from tensorflow.keras import optimizers 13 | from tensorflow.keras.callbacks import EarlyStopping 14 | from tensorflow.keras.preprocessing.sequence import pad_sequences 15 | 16 | 17 | if __name__ == '__main__': 18 | np.random.seed(123) 19 | tf.random.set_seed(123) 20 | 21 | ''' 22 | 1. データの準備 23 | ''' 24 | num_words = 20000 25 | maxlen = 80 26 | 27 | imdb = datasets.imdb 28 | word_index = imdb.get_word_index() 29 | 30 | (x_train, t_train), (x_test, t_test) = imdb.load_data(num_words=num_words, 31 | start_char=1, 32 | oov_char=2, 33 | index_from=3) 34 | 35 | x_train, x_val, t_train, t_val = \ 36 | train_test_split(x_train, t_train, test_size=0.2) 37 | 38 | x_train = pad_sequences(x_train, maxlen=maxlen, padding='pre') 39 | x_val = pad_sequences(x_val, maxlen=maxlen, padding='pre') 40 | x_test = pad_sequences(x_test, maxlen=maxlen, padding='pre') 41 | 42 | ''' 43 | 2. モデルの構築 44 | ''' 45 | model = Sequential() 46 | model.add(Embedding(num_words, 128, mask_zero=True)) 47 | model.add(Bidirectional(LSTM(128, activation='tanh', 48 | recurrent_activation='sigmoid', 49 | kernel_initializer='glorot_normal', 50 | recurrent_initializer='orthogonal'), 51 | merge_mode='concat')) 52 | model.add(Dense(1, kernel_initializer='glorot_normal', 53 | activation='sigmoid')) 54 | 55 | ''' 56 | 3. モデルの学習 57 | ''' 58 | optimizer = optimizers.Adam(learning_rate=0.001, 59 | beta_1=0.9, beta_2=0.999, amsgrad=True) 60 | 61 | model.compile(optimizer=optimizer, 62 | loss='binary_crossentropy', 63 | metrics=['accuracy']) 64 | 65 | es = EarlyStopping(monitor='val_loss', 66 | patience=5, 67 | verbose=1) 68 | 69 | model.fit(x_train, t_train, 70 | epochs=1000, batch_size=100, 71 | verbose=2, 72 | validation_data=(x_val, t_val), 73 | callbacks=[es]) 74 | 75 | ''' 76 | 4. モデルの評価 77 | ''' 78 | test_loss, test_acc = model.evaluate(x_test, t_test, verbose=0) 79 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 80 | test_loss, 81 | test_acc 82 | )) 83 | -------------------------------------------------------------------------------- /5/20_imdb_birnn_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 5.4.4.2 BiRNN - TensorFlow (IMDb) 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.utils import shuffle 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Model 11 | from tensorflow.keras.layers \ 12 | import Dense, LSTM, Bidirectional, Embedding 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | from callbacks import EarlyStopping 17 | from tensorflow.keras.preprocessing.sequence import pad_sequences 18 | 19 | 20 | class BiRNN(Model): 21 | def __init__(self, num_words, hidden_dim): 22 | super().__init__() 23 | self.emb = Embedding(num_words, hidden_dim, mask_zero=True) 24 | self.lstm = Bidirectional(LSTM(hidden_dim, activation='tanh', 25 | recurrent_activation='sigmoid', 26 | kernel_initializer='glorot_normal', 27 | recurrent_initializer='orthogonal'), 28 | merge_mode='concat') 29 | self.out = Dense(1, kernel_initializer='glorot_normal', 30 | activation='sigmoid') 31 | 32 | def call(self, x): 33 | h = self.emb(x) 34 | h = self.lstm(h) 35 | y = self.out(h) 36 | return tf.reshape(y, [-1]) # (batch_size, 1) => (batch_size,) 37 | 38 | 39 | if __name__ == '__main__': 40 | np.random.seed(123) 41 | tf.random.set_seed(123) 42 | 43 | ''' 44 | 1. データの準備 45 | ''' 46 | num_words = 20000 47 | maxlen = 80 48 | 49 | imdb = datasets.imdb 50 | word_index = imdb.get_word_index() 51 | 52 | (x_train, t_train), (x_test, t_test) = imdb.load_data(num_words=num_words, 53 | start_char=1, 54 | oov_char=2, 55 | index_from=3) 56 | 57 | x_train, x_val, t_train, t_val = \ 58 | train_test_split(x_train, t_train, test_size=0.2) 59 | 60 | x_train = pad_sequences(x_train, maxlen=maxlen, padding='pre') 61 | x_val = pad_sequences(x_val, maxlen=maxlen, padding='pre') 62 | x_test = pad_sequences(x_test, maxlen=maxlen, padding='pre') 63 | 64 | ''' 65 | 2. モデルの構築 66 | ''' 67 | model = BiRNN(num_words, 128) 68 | 69 | ''' 70 | 3. モデルの学習 71 | ''' 72 | criterion = losses.BinaryCrossentropy() 73 | optimizer = optimizers.Adam(learning_rate=0.001, 74 | beta_1=0.9, beta_2=0.999, amsgrad=True) 75 | train_loss = metrics.Mean() 76 | train_acc = metrics.BinaryAccuracy() 77 | val_loss = metrics.Mean() 78 | val_acc = metrics.BinaryAccuracy() 79 | 80 | def compute_loss(t, y): 81 | return criterion(t, y) 82 | 83 | def train_step(x, t): 84 | with tf.GradientTape() as tape: 85 | preds = model(x) 86 | loss = compute_loss(t, preds) 87 | grads = tape.gradient(loss, model.trainable_variables) 88 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 89 | train_loss(loss) 90 | train_acc(t, preds) 91 | 92 | return loss 93 | 94 | def val_step(x, t): 95 | preds = model(x) 96 | loss = compute_loss(t, preds) 97 | val_loss(loss) 98 | val_acc(t, preds) 99 | 100 | epochs = 1000 101 | batch_size = 100 102 | n_batches_train = x_train.shape[0] // batch_size 103 | n_batches_val = x_val.shape[0] // batch_size 104 | es = EarlyStopping(patience=5, verbose=1) 105 | 106 | for epoch in range(epochs): 107 | x_, t_ = shuffle(x_train, t_train) 108 | 109 | for batch in range(n_batches_train): 110 | start = batch * batch_size 111 | end = start + batch_size 112 | train_step(x_[start:end], t_[start:end]) 113 | 114 | for batch in range(n_batches_val): 115 | start = batch * batch_size 116 | end = start + batch_size 117 | val_step(x_val[start:end], t_val[start:end]) 118 | 119 | print('epoch: {}, loss: {:.3}, acc: {:.3f}' 120 | ', val_loss: {:.3}, val_acc: {:.3f}'.format( 121 | epoch+1, 122 | train_loss.result(), 123 | train_acc.result(), 124 | val_loss.result(), 125 | val_acc.result() 126 | )) 127 | 128 | if es(val_loss.result()): 129 | break 130 | 131 | ''' 132 | 4. モデルの評価 133 | ''' 134 | test_loss = metrics.Mean() 135 | test_acc = metrics.BinaryAccuracy() 136 | 137 | def test_step(x, t): 138 | preds = model(x) 139 | loss = compute_loss(t, preds) 140 | test_loss(loss) 141 | test_acc(t, preds) 142 | 143 | return loss 144 | 145 | test_step(x_test, t_test) 146 | 147 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 148 | test_loss.result(), 149 | test_acc.result() 150 | )) 151 | -------------------------------------------------------------------------------- /5/callbacks/EarlyStopping.py: -------------------------------------------------------------------------------- 1 | class EarlyStopping: 2 | ''' 3 | 早期終了 (early stopping) 4 | ''' 5 | def __init__(self, patience=0, verbose=0): 6 | self._step = 0 7 | self._loss = float('inf') 8 | self.patience = patience 9 | self.verbose = verbose 10 | 11 | def __call__(self, loss): 12 | if self._loss < loss: 13 | self._step += 1 14 | if self._step > self.patience: 15 | if self.verbose: 16 | print('early stopping') 17 | return True 18 | else: 19 | self._step = 0 20 | self._loss = loss 21 | 22 | return False 23 | -------------------------------------------------------------------------------- /5/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .EarlyStopping import EarlyStopping 2 | -------------------------------------------------------------------------------- /5/sounds/sin.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-keras-tf2-torch/08e7b1fbd965eb5d6dbc50fd79c22dc4560ee5ea/5/sounds/sin.mp3 -------------------------------------------------------------------------------- /5/sounds/sin_noise.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-keras-tf2-torch/08e7b1fbd965eb5d6dbc50fd79c22dc4560ee5ea/5/sounds/sin_noise.mp3 -------------------------------------------------------------------------------- /6/01_mnist_dataloader_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.1.3.3 データローダ - TensorFlow 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | 10 | 11 | class DataLoader(object): 12 | def __init__(self, dataset, 13 | batch_size=100, 14 | shuffle=False, 15 | random_state=None): 16 | self.dataset = list(zip(dataset[0], dataset[1])) 17 | self.batch_size = batch_size 18 | self.shuffle = shuffle 19 | 20 | if random_state is None: 21 | random_state = np.random.RandomState(123) 22 | 23 | self.random_state = random_state 24 | self._idx = 0 25 | self._reset() 26 | 27 | def __len__(self): 28 | N = len(self.dataset) 29 | b = self.batch_size 30 | return N // b + bool(N % b) 31 | 32 | def __iter__(self): 33 | return self 34 | 35 | def __next__(self): 36 | if self._idx >= len(self.dataset): 37 | self._reset() 38 | raise StopIteration() 39 | 40 | x, t = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 41 | 42 | x = tf.convert_to_tensor(x, dtype=tf.float32) 43 | t = tf.convert_to_tensor(t, dtype=tf.float32) 44 | 45 | self._idx += self.batch_size 46 | 47 | return x, t 48 | 49 | def _reset(self): 50 | if self.shuffle: 51 | self.dataset = shuffle(self.dataset, 52 | random_state=self.random_state) 53 | self._idx = 0 54 | 55 | 56 | if __name__ == '__main__': 57 | np.random.seed(123) 58 | tf.random.set_seed(123) 59 | 60 | mnist = datasets.mnist 61 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 62 | 63 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 64 | t_train = np.eye(10)[t_train].astype(np.float32) 65 | 66 | train_dataloader = DataLoader((x_train, t_train), 67 | batch_size=100, 68 | shuffle=True) 69 | 70 | for (x, t) in train_dataloader: 71 | print(x.shape) # => (100, 784) 72 | print(t.shape) # => (100, 10) 73 | break 74 | -------------------------------------------------------------------------------- /6/02_mnist_dataloader_torch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.1.3.3 データローダ - PyTorch 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.utils import shuffle 7 | import tensorflow as tf 8 | import torch 9 | from tensorflow.keras import datasets 10 | 11 | 12 | class DataLoader(object): 13 | def __init__(self, dataset, 14 | batch_size=100, 15 | shuffle=False, 16 | device='cpu', 17 | random_state=None): 18 | self.dataset = list(zip(dataset[0], dataset[1])) 19 | self.batch_size = batch_size 20 | self.shuffle = shuffle 21 | self.device = device 22 | 23 | if random_state is None: 24 | random_state = np.random.RandomState(123) 25 | 26 | self.random_state = random_state 27 | self._idx = 0 28 | self._reset() 29 | 30 | def __len__(self): 31 | N = len(self.dataset) 32 | b = self.batch_size 33 | return N // b + bool(N % b) 34 | 35 | def __iter__(self): 36 | return self 37 | 38 | def __next__(self): 39 | if self._idx >= len(self.dataset): 40 | self._reset() 41 | raise StopIteration() 42 | 43 | x, t = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 44 | 45 | x = torch.Tensor(x) 46 | t = torch.Tensor(t) 47 | 48 | self._idx += self.batch_size 49 | 50 | return x.to(self.device), t.to(self.device) 51 | 52 | def _reset(self): 53 | if self.shuffle: 54 | self.dataset = shuffle(self.dataset, 55 | random_state=self.random_state) 56 | self._idx = 0 57 | 58 | 59 | if __name__ == '__main__': 60 | np.random.seed(123) 61 | tf.random.set_seed(123) 62 | 63 | mnist = datasets.mnist 64 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 65 | 66 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 67 | t_train = np.eye(10)[t_train].astype(np.float32) 68 | 69 | train_dataloader = DataLoader((x_train, t_train), 70 | batch_size=100, 71 | shuffle=True) 72 | 73 | for (x, t) in train_dataloader: 74 | print(x.shape) # => (100, 784) 75 | print(t.shape) # => (100, 10) 76 | break 77 | -------------------------------------------------------------------------------- /6/callbacks/EarlyStopping.py: -------------------------------------------------------------------------------- 1 | class EarlyStopping: 2 | ''' 3 | 早期終了 (early stopping) 4 | ''' 5 | def __init__(self, patience=0, verbose=0): 6 | self._step = 0 7 | self._loss = float('inf') 8 | self.patience = patience 9 | self.verbose = verbose 10 | 11 | def __call__(self, loss): 12 | if self._loss < loss: 13 | self._step += 1 14 | if self._step > self.patience: 15 | if self.verbose: 16 | print('early stopping') 17 | return True 18 | else: 19 | self._step = 0 20 | self._loss = loss 21 | 22 | return False 23 | -------------------------------------------------------------------------------- /6/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .EarlyStopping import EarlyStopping 2 | -------------------------------------------------------------------------------- /6/data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-keras-tf2-torch/08e7b1fbd965eb5d6dbc50fd79c22dc4560ee5ea/6/data/.keep -------------------------------------------------------------------------------- /6/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yusugomori/deeplearning-keras-tf2-torch/08e7b1fbd965eb5d6dbc50fd79c22dc4560ee5ea/6/layers/__init__.py -------------------------------------------------------------------------------- /6/layers/tf/Attention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.2.4.1 Attention - TensorFlow 3 | ''' 4 | 5 | import tensorflow as tf 6 | from tensorflow.keras.layers import Layer 7 | 8 | 9 | class Attention(Layer): 10 | def __init__(self, output_dim, hidden_dim): 11 | super().__init__() 12 | self.output_dim = output_dim 13 | self.hidden_dim = hidden_dim 14 | 15 | def build(self, input_shape): 16 | self.W_a = self.add_weight(name='W_a', 17 | shape=(self.hidden_dim, 18 | self.hidden_dim), 19 | initializer='glorot_normal', 20 | trainable=True) 21 | 22 | self.W_c = self.add_weight(name='W_c', 23 | shape=(self.hidden_dim + self.hidden_dim, 24 | self.output_dim), 25 | initializer='glorot_normal', 26 | trainable=True) 27 | 28 | self.b = self.add_weight(name='b', 29 | shape=(self.output_dim), 30 | initializer='zeros', 31 | trainable=True) 32 | 33 | super().build(input_shape) 34 | 35 | def call(self, ht, hs, source=None): 36 | score = tf.einsum('ijk,kl->ijl', hs, self.W_a) 37 | score = tf.einsum('ijk,ilk->ijl', ht, score) 38 | 39 | score = score - tf.reduce_max(score, axis=-1, keepdims=True) 40 | score = tf.exp(score) 41 | if source is not None: 42 | len_source_sequences = \ 43 | tf.reduce_sum(tf.cast(tf.not_equal(source, 0), 44 | tf.int32), axis=1) 45 | mask_source = \ 46 | tf.cast(tf.sequence_mask(len_source_sequences, 47 | tf.shape(score)[-1]), 48 | tf.float32) 49 | 50 | score = score * mask_source[:, tf.newaxis, :] 51 | a = score / tf.reduce_sum(score, axis=-1, keepdims=True) 52 | 53 | c = tf.einsum('ijk,ikl->ijl', a, hs) 54 | h = tf.concat([c, ht], -1) 55 | return tf.nn.tanh(tf.einsum('ijk,kl->ijl', h, self.W_c) + self.b) 56 | -------------------------------------------------------------------------------- /6/layers/tf/LayerNormalization.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.4.1 Layer Normalization - TensorFlow 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras.layers import Layer 8 | 9 | 10 | class LayerNormalization(Layer): 11 | def __init__(self, eps=np.float32(1e-8)): 12 | super().__init__() 13 | self.eps = eps 14 | 15 | def build(self, input_shape): 16 | self.gamma = self.add_weight(name='gamma', 17 | shape=(input_shape[-1]), 18 | initializer='ones') 19 | 20 | self.beta = self.add_weight(name='beta', 21 | shape=(input_shape[-1]), 22 | initializer='zeros') 23 | super().build(input_shape) 24 | 25 | def call(self, x): 26 | mean, var = tf.nn.moments(x, axes=-1, keepdims=True) 27 | std = tf.sqrt(var) + self.eps 28 | 29 | return self.gamma * (x - mean) / std + self.beta 30 | -------------------------------------------------------------------------------- /6/layers/tf/MultiHeadAttention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.4.3 Multi-Head Attention - TensorFlow 3 | ''' 4 | 5 | import tensorflow as tf 6 | from tensorflow.keras.layers import Layer 7 | from .ScaledDotProductAttention import ScaledDotProductAttention 8 | 9 | 10 | class MultiHeadAttention(Layer): 11 | def __init__(self, h, d_model): 12 | super().__init__() 13 | self.h = h 14 | self.d_model = d_model 15 | self.d_k = d_k = d_model // h 16 | self.d_v = d_v = d_model // h 17 | self.attn = ScaledDotProductAttention(d_k) 18 | 19 | def build(self, input_shape): 20 | self.W_q = self.add_weight(name='W_q', 21 | shape=(self.h, 22 | self.d_model, 23 | self.d_k), 24 | initializer='glorot_normal', 25 | trainable=True) 26 | 27 | self.W_k = self.add_weight(name='W_k', 28 | shape=(self.h, 29 | self.d_model, 30 | self.d_k), 31 | initializer='glorot_normal', 32 | trainable=True) 33 | 34 | self.W_v = self.add_weight(name='W_v', 35 | shape=(self.h, 36 | self.d_model, 37 | self.d_v), 38 | initializer='glorot_normal', 39 | trainable=True) 40 | 41 | self.W_o = self.add_weight(name='W_o', 42 | shape=(self.h * self.d_v, 43 | self.d_model), 44 | initializer='glorot_normal', 45 | trainable=True) 46 | 47 | self.b_o = self.add_weight(name='b_o', 48 | shape=(self.d_model), 49 | initializer='zeros', 50 | trainable=True) 51 | 52 | super().build(input_shape) 53 | 54 | def call(self, q, k, v, mask=None): 55 | q = tf.einsum('hijk,hkl->hijl', 56 | tf.tile(q[tf.newaxis, :, :, :], 57 | [self.h, 1, 1, 1]), 58 | self.W_q) 59 | k = tf.einsum('hijk,hkl->hijl', 60 | tf.tile(k[tf.newaxis, :, :, :], 61 | [self.h, 1, 1, 1]), 62 | self.W_k) 63 | v = tf.einsum('hijk,hkl->hijl', 64 | tf.tile(v[tf.newaxis, :, :, :], 65 | [self.h, 1, 1, 1]), 66 | self.W_v) 67 | 68 | q = tf.reshape(q, shape=(-1, q.shape[-2], q.shape[-1])) 69 | k = tf.reshape(k, shape=(-1, k.shape[-2], k.shape[-1])) 70 | v = tf.reshape(v, shape=(-1, v.shape[-2], v.shape[-1])) 71 | 72 | if mask is not None: 73 | multiples = [self.h] + [1] * (len(mask.shape) - 1) 74 | mask = tf.tile(mask, multiples=multiples) 75 | 76 | c = self.attn(q, k, v, mask=mask) 77 | c = tf.split(c, self.h, axis=0) 78 | c = tf.concat(c, axis=-1) 79 | 80 | out = tf.einsum('ijk,kl->ijl', c, self.W_o) + self.b_o 81 | 82 | return out 83 | 84 | def compute_output_shape(self, input_shape): 85 | return (input_shape[0], input_shape[1], self.d_model) 86 | 87 | def compute_mask(self, input, mask): 88 | return mask 89 | -------------------------------------------------------------------------------- /6/layers/tf/PositionalEncoding.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.4.4 Positional Encoding - TensorFlow 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras.layers import Layer 8 | 9 | 10 | class PositionalEncoding(Layer): 11 | def __init__(self, output_dim, 12 | maxlen=6000): 13 | super().__init__() 14 | self.output_dim = output_dim 15 | self.maxlen = maxlen 16 | 17 | def build(self, input_shape): 18 | self.PE = self.add_weight(name='PE', 19 | shape=(self.maxlen, 20 | self.output_dim), 21 | initializer=self.initializer, 22 | trainable=False, 23 | dtype=tf.float32) 24 | 25 | super().build(input_shape) 26 | 27 | def call(self, x): 28 | pe = self.PE[tf.newaxis, :tf.shape(x)[1], :] 29 | return x + pe 30 | 31 | def initializer(self, input_shape, dtype=tf.float32): 32 | pe = \ 33 | np.array([[pos / np.power(10000, 2 * (i // 2) / self.output_dim) 34 | for i in range(self.output_dim)] 35 | for pos in range(self.maxlen)]) 36 | 37 | pe[:, 0::2] = np.sin(pe[:, 0::2]) 38 | pe[:, 1::2] = np.cos(pe[:, 1::2]) 39 | 40 | return tf.convert_to_tensor(pe, dtype=tf.float32) 41 | -------------------------------------------------------------------------------- /6/layers/tf/ScaledDotProductAttention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.4.2 Scaled Dot-Product Attention - TensorFlow 3 | ''' 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.keras.layers import Layer 8 | 9 | 10 | class ScaledDotProductAttention(Layer): 11 | def __init__(self, d_k): 12 | super().__init__() 13 | self.d_k = d_k 14 | self.scaler = np.sqrt(d_k) 15 | 16 | def build(self, input_shape): 17 | super().build(input_shape) 18 | 19 | def call(self, q, k, v, mask=None): 20 | score = tf.einsum('ijk,ilk->ijl', q, k) / self.scaler 21 | score = score - tf.reduce_max(score, axis=-1, keepdims=True) 22 | score = tf.exp(score) 23 | if mask is not None: 24 | if len(mask.shape) == 2: 25 | mask = mask[:, tf.newaxis, :] 26 | mask = tf.cast(mask, tf.float32) 27 | score = score * mask 28 | 29 | a = score / tf.reduce_sum(score, axis=-1, keepdims=True) 30 | c = tf.einsum('ijk,ikl->ijl', a, v) 31 | 32 | return c 33 | 34 | def compute_output_shape(self, input_shape): 35 | return (input_shape[0], input_shape[1], self.d_model) 36 | 37 | def compute_mask(self, inputs, mask): 38 | return mask 39 | -------------------------------------------------------------------------------- /6/layers/tf/__init__.py: -------------------------------------------------------------------------------- 1 | from .Attention import Attention 2 | from .PositionalEncoding import PositionalEncoding 3 | from .ScaledDotProductAttention import ScaledDotProductAttention 4 | from .LayerNormalization import LayerNormalization 5 | from .MultiHeadAttention import MultiHeadAttention 6 | -------------------------------------------------------------------------------- /6/layers/torch/Attention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.2.5.1 Attention - PyTorch 3 | ''' 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class Attention(nn.Module): 10 | def __init__(self, output_dim, hidden_dim, device='cpu'): 11 | super().__init__() 12 | self.device = device 13 | self.output_dim = output_dim 14 | self.hidden_dim = hidden_dim 15 | 16 | self.W_a = nn.Parameter(torch.Tensor(hidden_dim, 17 | hidden_dim)) 18 | 19 | self.W_c = nn.Parameter(torch.Tensor(hidden_dim + hidden_dim, 20 | output_dim)) 21 | 22 | self.b = nn.Parameter(torch.zeros(output_dim)) 23 | 24 | nn.init.xavier_normal_(self.W_a) 25 | nn.init.xavier_normal_(self.W_c) 26 | 27 | def forward(self, ht, hs, source=None): 28 | ''' 29 | # Argument 30 | ht, hs: (sequence, batch, out_features) 31 | source: (sequence, batch) 32 | ''' 33 | score = torch.einsum('jik,kl->jil', (hs, self.W_a)) 34 | score = torch.einsum('jik,lik->jil', (ht, score)) 35 | 36 | score = score - torch.max(score, dim=-1, keepdim=True)[0] 37 | score = torch.exp(score) 38 | if source is not None: 39 | mask_source = source.t().eq(0).unsqueeze(0) 40 | score.data.masked_fill_(mask_source, 0) 41 | a = score / torch.sum(score, dim=-1, keepdim=True) 42 | 43 | c = torch.einsum('jik,kil->jil', (a, hs)) 44 | h = torch.cat((c, ht), -1) 45 | return torch.tanh(torch.einsum('jik,kl->jil', (h, self.W_c)) + self.b) 46 | -------------------------------------------------------------------------------- /6/layers/torch/MultiHeadAttention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.5.2 Multi-Head Attention - PyTorch 3 | ''' 4 | 5 | import torch 6 | import torch.nn as nn 7 | from .ScaledDotProductAttention import ScaledDotProductAttention 8 | 9 | 10 | class MultiHeadAttention(nn.Module): 11 | def __init__(self, 12 | h, 13 | d_model, 14 | device='cpu'): 15 | super().__init__() 16 | self.h = h 17 | self.d_model = d_model 18 | self.d_k = d_k = d_model // h 19 | self.d_v = d_v = d_model // h 20 | self.device = device 21 | 22 | self.W_q = nn.Parameter(torch.Tensor(h, 23 | d_model, 24 | d_k)) 25 | 26 | self.W_k = nn.Parameter(torch.Tensor(h, 27 | d_model, 28 | d_k)) 29 | 30 | self.W_v = nn.Parameter(torch.Tensor(h, 31 | d_model, 32 | d_v)) 33 | 34 | nn.init.xavier_normal_(self.W_q) 35 | nn.init.xavier_normal_(self.W_k) 36 | nn.init.xavier_normal_(self.W_v) 37 | 38 | self.attn = ScaledDotProductAttention(d_k) 39 | self.linear = nn.Linear((h * d_v), d_model) 40 | nn.init.xavier_normal_(self.linear.weight) 41 | 42 | def forward(self, q, k, v, mask=None): 43 | ''' 44 | # Argument 45 | q, k, v: (batch, sequence, out_features) 46 | mask: (batch, sequence) 47 | ''' 48 | batch_size = q.size(0) 49 | 50 | q = torch.einsum('hijk,hkl->hijl', 51 | (q.unsqueeze(0).repeat(self.h, 1, 1, 1), 52 | self.W_q)) 53 | k = torch.einsum('hijk,hkl->hijl', 54 | (k.unsqueeze(0).repeat(self.h, 1, 1, 1), 55 | self.W_k)) 56 | v = torch.einsum('hijk,hkl->hijl', 57 | (v.unsqueeze(0).repeat(self.h, 1, 1, 1), 58 | self.W_v)) 59 | 60 | q = q.view(-1, q.size(-2), q.size(-1)) 61 | k = k.view(-1, k.size(-2), k.size(-1)) 62 | v = v.view(-1, v.size(-2), v.size(-1)) 63 | 64 | if mask is not None: 65 | multiples = [self.h] + [1] * (len(mask.size()) - 1) 66 | mask = mask.repeat(multiples) 67 | 68 | c = self.attn(q, k, v, mask=mask) 69 | c = torch.split(c, batch_size, dim=0) 70 | c = torch.cat(c, dim=-1) 71 | 72 | out = self.linear(c) 73 | 74 | return out 75 | -------------------------------------------------------------------------------- /6/layers/torch/PositionalEncoding.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.5.3 Positional Encoding - PyTorch 3 | ''' 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class PositionalEncoding(nn.Module): 11 | def __init__(self, output_dim, 12 | maxlen=6000, 13 | device='cpu'): 14 | super().__init__() 15 | self.device = device 16 | self.output_dim = output_dim 17 | self.maxlen = maxlen 18 | pe = self.initializer() 19 | self.register_buffer('pe', pe) 20 | 21 | def forward(self, x, mask=None): 22 | ''' 23 | # Argument 24 | x: (batch, sequence) 25 | ''' 26 | pe = self.pe[:x.size(1), :].unsqueeze(0) 27 | return x + pe 28 | 29 | def initializer(self): 30 | pe = \ 31 | np.array([[pos / np.power(10000, 2 * (i // 2) / self.output_dim) 32 | for i in range(self.output_dim)] 33 | for pos in range(self.maxlen)]) 34 | 35 | pe[:, 0::2] = np.sin(pe[:, 0::2]) 36 | pe[:, 1::2] = np.cos(pe[:, 1::2]) 37 | 38 | return torch.from_numpy(pe).float() 39 | -------------------------------------------------------------------------------- /6/layers/torch/ScaledDotProductAttention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 6.3.5.1 Scaled Dot-Product Attention - PyTorch 3 | ''' 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class ScaledDotProductAttention(nn.Module): 11 | def __init__(self, 12 | d_k, 13 | device='cpu'): 14 | super().__init__() 15 | self.device = device 16 | self.scaler = np.sqrt(d_k) 17 | 18 | def forward(self, q, k, v, mask=None): 19 | ''' 20 | # Argument 21 | q, k, v: (batch, sequence, out_features) 22 | mask: (batch, sequence) 23 | ''' 24 | score = torch.einsum('ijk,ilk->ijl', (q, k)) / self.scaler 25 | score = score - torch.max(score, dim=-1, keepdim=True)[0] 26 | 27 | score = torch.exp(score) 28 | if mask is not None: 29 | if len(mask.size()) == 2: 30 | mask = mask.unsqueeze(1).repeat(1, score.size(1), 1) 31 | score.data.masked_fill_(mask, 0) 32 | 33 | a = score / torch.sum(score, dim=-1, keepdim=True) 34 | c = torch.einsum('ijk,ikl->ijl', (a, v)) 35 | 36 | return c 37 | -------------------------------------------------------------------------------- /6/layers/torch/__init__.py: -------------------------------------------------------------------------------- 1 | from .Attention import Attention 2 | from .PositionalEncoding import PositionalEncoding 3 | from .ScaledDotProductAttention import ScaledDotProductAttention 4 | from .MultiHeadAttention import MultiHeadAttention 5 | -------------------------------------------------------------------------------- /6/utils/Vocab.py: -------------------------------------------------------------------------------- 1 | class Vocab(object): 2 | def __init__(self): 3 | self.w2i = {} 4 | self.i2w = {} 5 | self.special_chars = ['', '', '', ''] 6 | self.bos_char = self.special_chars[1] 7 | self.eos_char = self.special_chars[2] 8 | self.oov_char = self.special_chars[3] 9 | 10 | def fit(self, path): 11 | self._words = set() 12 | 13 | with open(path, 'r') as f: 14 | sentences = f.read().splitlines() 15 | 16 | for sentence in sentences: 17 | self._words.update(sentence.split()) 18 | 19 | self.w2i = {w: (i + len(self.special_chars)) 20 | for i, w in enumerate(self._words)} 21 | 22 | for i, w in enumerate(self.special_chars): 23 | self.w2i[w] = i 24 | 25 | self.i2w = {i: w for w, i in self.w2i.items()} 26 | 27 | def transform(self, path, bos=False, eos=False): 28 | output = [] 29 | 30 | with open(path, 'r') as f: 31 | sentences = f.read().splitlines() 32 | 33 | for sentence in sentences: 34 | sentence = sentence.split() 35 | if bos: 36 | sentence = [self.bos_char] + sentence 37 | if eos: 38 | sentence = sentence + [self.eos_char] 39 | output.append(self.encode(sentence)) 40 | 41 | return output 42 | 43 | def encode(self, sentence): 44 | output = [] 45 | 46 | for w in sentence: 47 | if w not in self.w2i: 48 | idx = self.w2i[self.oov_char] 49 | else: 50 | idx = self.w2i[w] 51 | output.append(idx) 52 | 53 | return output 54 | 55 | def decode(self, sentence): 56 | return [self.i2w[id] for id in sentence] 57 | -------------------------------------------------------------------------------- /6/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .Vocab import Vocab 2 | -------------------------------------------------------------------------------- /6/utils/tf/DataLoader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.utils import shuffle 3 | import tensorflow as tf 4 | from tensorflow.keras.preprocessing.sequence import pad_sequences 5 | 6 | 7 | class DataLoader(object): 8 | def __init__(self, dataset, 9 | batch_size=100, 10 | shuffle=False, 11 | random_state=None): 12 | self.dataset = list(zip(dataset[0], dataset[1])) 13 | self.batch_size = batch_size 14 | self.shuffle = shuffle 15 | 16 | if random_state is None: 17 | random_state = np.random.RandomState(123) 18 | 19 | self.random_state = random_state 20 | self._idx = 0 21 | self._reset() 22 | 23 | def __len__(self): 24 | N = len(self.dataset) 25 | b = self.batch_size 26 | return N // b + bool(N % b) 27 | 28 | def __iter__(self): 29 | return self 30 | 31 | def __next__(self): 32 | if self._idx >= len(self.dataset): 33 | self._reset() 34 | raise StopIteration() 35 | 36 | x, t = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 37 | x = pad_sequences(x, padding='post') 38 | t = pad_sequences(t, padding='post') 39 | 40 | x = tf.convert_to_tensor(x, dtype=tf.int32) 41 | t = tf.convert_to_tensor(t, dtype=tf.int32) 42 | 43 | self._idx += self.batch_size 44 | 45 | return x, t 46 | 47 | def _reset(self): 48 | if self.shuffle: 49 | self.dataset = shuffle(self.dataset, 50 | random_state=self.random_state) 51 | self._idx = 0 52 | -------------------------------------------------------------------------------- /6/utils/tf/__init__.py: -------------------------------------------------------------------------------- 1 | from .DataLoader import DataLoader 2 | -------------------------------------------------------------------------------- /6/utils/torch/DataLoader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.utils import shuffle 3 | import torch 4 | from tensorflow.keras.preprocessing.sequence import pad_sequences 5 | 6 | 7 | class DataLoader(object): 8 | def __init__(self, dataset, 9 | batch_size=100, 10 | shuffle=False, 11 | batch_first=False, 12 | device='cpu', 13 | random_state=None): 14 | self.dataset = list(zip(dataset[0], dataset[1])) 15 | self.batch_size = batch_size 16 | self.shuffle = shuffle 17 | self.batch_first = batch_first 18 | self.device = device 19 | 20 | if random_state is None: 21 | random_state = np.random.RandomState(123) 22 | 23 | self.random_state = random_state 24 | self._idx = 0 25 | self._reset() 26 | 27 | def __len__(self): 28 | N = len(self.dataset) 29 | b = self.batch_size 30 | return N // b + bool(N % b) 31 | 32 | def __iter__(self): 33 | return self 34 | 35 | def __next__(self): 36 | if self._idx >= len(self.dataset): 37 | self._reset() 38 | raise StopIteration() 39 | 40 | x, t = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) 41 | x = pad_sequences(x, padding='post') 42 | t = pad_sequences(t, padding='post') 43 | 44 | x = torch.LongTensor(x) 45 | t = torch.LongTensor(t) 46 | 47 | if not self.batch_first: 48 | x = x.t() 49 | t = t.t() 50 | 51 | self._idx += self.batch_size 52 | 53 | return x.to(self.device), t.to(self.device) 54 | 55 | def _reset(self): 56 | if self.shuffle: 57 | self.dataset = shuffle(self.dataset, 58 | random_state=self.random_state) 59 | self._idx = 0 60 | -------------------------------------------------------------------------------- /6/utils/torch/__init__.py: -------------------------------------------------------------------------------- 1 | from .DataLoader import DataLoader 2 | -------------------------------------------------------------------------------- /A/01_decorator.py: -------------------------------------------------------------------------------- 1 | def decorator(fn): 2 | def wrapper(*args, **kwargs): 3 | print('decorator') 4 | fn(*args, **kwargs) 5 | return wrapper 6 | 7 | 8 | @decorator 9 | def main(): 10 | print('main') 11 | 12 | 13 | main() 14 | -------------------------------------------------------------------------------- /A/02_save_load_model_keras.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A.3.1 モデルの保存と読み込み - Keras 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.model_selection import train_test_split 7 | import tensorflow as tf 8 | from tensorflow.keras import datasets 9 | from tensorflow.keras.models import Sequential 10 | from tensorflow.keras.layers import \ 11 | Dense, Dropout, Activation, BatchNormalization 12 | from tensorflow.keras import optimizers 13 | from tensorflow.keras.models import load_model 14 | 15 | 16 | if __name__ == '__main__': 17 | np.random.seed(123) 18 | tf.random.set_seed(123) 19 | 20 | ''' 21 | 1. データの準備 22 | ''' 23 | mnist = datasets.mnist 24 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 25 | 26 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 27 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 28 | 29 | x_train, x_val, t_train, t_val = \ 30 | train_test_split(x_train, t_train, test_size=0.2) 31 | 32 | ''' 33 | 2. モデルの構築 34 | ''' 35 | model = Sequential() 36 | model.add(Dense(200, kernel_initializer='he_normal')) 37 | model.add(BatchNormalization()) 38 | model.add(Activation('relu')) 39 | model.add(Dropout(0.5)) 40 | model.add(Dense(10, kernel_initializer='he_normal', 41 | activation='softmax')) 42 | 43 | ''' 44 | 3. モデルの学習・保存 45 | ''' 46 | optimizer = optimizers.Adam(learning_rate=0.001, 47 | beta_1=0.9, beta_2=0.999, amsgrad=True) 48 | 49 | model.compile(optimizer=optimizer, 50 | loss='sparse_categorical_crossentropy', 51 | metrics=['accuracy']) 52 | 53 | model.fit(x_train, t_train, 54 | epochs=10, batch_size=100, 55 | verbose=2, 56 | validation_data=(x_val, t_val)) 57 | 58 | model.save('model_keras.h5') # モデルを保存 59 | 60 | print('model saved to: {}'.format('model_keras.h5')) 61 | 62 | ''' 63 | 4. モデルの読み込み・評価 64 | ''' 65 | del model # これまで学習していたモデルを削除 66 | 67 | model = load_model('model_keras.h5') # 学習済のモデルを読み込み 68 | 69 | print('-' * 20) 70 | print('model loaded.') 71 | 72 | # テストデータの評価 73 | loss, acc = model.evaluate(x_test, t_test, verbose=0) 74 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 75 | loss, 76 | acc 77 | )) 78 | -------------------------------------------------------------------------------- /A/03_save_load_model_tf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A.3.2 モデルの保存と読み込み - TensorFlow 3 | ''' 4 | 5 | import numpy as np 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.utils import shuffle 8 | import tensorflow as tf 9 | from tensorflow.keras import datasets 10 | from tensorflow.keras.models import Model 11 | from tensorflow.keras.layers import \ 12 | Dense, Dropout, Activation, BatchNormalization 13 | from tensorflow.keras import optimizers 14 | from tensorflow.keras import losses 15 | from tensorflow.keras import metrics 16 | 17 | 18 | class DNN(Model): 19 | def __init__(self, hidden_dim, output_dim): 20 | super().__init__() 21 | self.l1 = Dense(hidden_dim, kernel_initializer='he_normal') 22 | self.b1 = BatchNormalization() 23 | self.a1 = Activation('relu') 24 | self.d1 = Dropout(0.5) 25 | self.l2 = Dense(output_dim, kernel_initializer='he_normal', 26 | activation='softmax') 27 | 28 | self.ls = [self.l1, self.b1, self.a1, self.d1, 29 | self.l2] 30 | 31 | def call(self, x): 32 | for layer in self.ls: 33 | x = layer(x) 34 | 35 | return x 36 | 37 | 38 | if __name__ == '__main__': 39 | np.random.seed(123) 40 | tf.random.set_seed(123) 41 | 42 | ''' 43 | 1. データの準備 44 | ''' 45 | mnist = datasets.mnist 46 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 47 | 48 | x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) 49 | x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) 50 | 51 | x_train, x_val, t_train, t_val = \ 52 | train_test_split(x_train, t_train, test_size=0.2) 53 | 54 | ''' 55 | 2. モデルの構築 56 | ''' 57 | model = DNN(200, 10) 58 | 59 | ''' 60 | 3. モデルの学習・保存 61 | ''' 62 | criterion = losses.SparseCategoricalCrossentropy() 63 | optimizer = optimizers.Adam(learning_rate=0.001, 64 | beta_1=0.9, beta_2=0.999, amsgrad=True) 65 | train_loss = metrics.Mean() 66 | train_acc = metrics.SparseCategoricalAccuracy() 67 | val_loss = metrics.Mean() 68 | val_acc = metrics.SparseCategoricalAccuracy() 69 | 70 | def compute_loss(t, y): 71 | return criterion(t, y) 72 | 73 | @tf.function 74 | def train_step(x, t): 75 | with tf.GradientTape() as tape: 76 | preds = model(x) 77 | loss = compute_loss(t, preds) 78 | grads = tape.gradient(loss, model.trainable_variables) 79 | optimizer.apply_gradients(zip(grads, model.trainable_variables)) 80 | train_loss(loss) 81 | train_acc(t, preds) 82 | 83 | return loss 84 | 85 | @tf.function 86 | def val_step(x, t): 87 | preds = model(x) 88 | loss = compute_loss(t, preds) 89 | val_loss(loss) 90 | val_acc(t, preds) 91 | 92 | epochs = 10 93 | batch_size = 100 94 | n_batches_train = x_train.shape[0] // batch_size 95 | n_batches_val = x_val.shape[0] // batch_size 96 | 97 | for epoch in range(epochs): 98 | x_, t_ = shuffle(x_train, t_train) 99 | 100 | for batch in range(n_batches_train): 101 | start = batch * batch_size 102 | end = start + batch_size 103 | train_step(x_[start:end], t_[start:end]) 104 | 105 | for batch in range(n_batches_val): 106 | start = batch * batch_size 107 | end = start + batch_size 108 | val_step(x_val[start:end], t_val[start:end]) 109 | 110 | print('epoch: {}, loss: {:.3}, acc: {:.3f}' 111 | ', val_loss: {:.3}, val_acc: {:.3f}'.format( 112 | epoch+1, 113 | train_loss.result(), 114 | train_acc.result(), 115 | val_loss.result(), 116 | val_acc.result() 117 | )) 118 | 119 | model.save_weights('model_tf.h5') # モデルの重みを保存 120 | 121 | print('model weights saved to: {}'.format('model_tf.h5')) 122 | 123 | ''' 124 | 4. モデルの読み込み・評価 125 | ''' 126 | del model # これまで学習していたモデルを削除 127 | 128 | model = DNN(200, 10) # 新しいモデルを初期化 129 | model.build(input_shape=(None, 784)) # モデルをビルド 130 | model.load_weights('model_tf.h5') # 学習済モデルの重みを設定 131 | 132 | print('-' * 20) 133 | print('model loaded.') 134 | 135 | # テストデータの評価 136 | test_loss = metrics.Mean() 137 | test_acc = metrics.SparseCategoricalAccuracy() 138 | 139 | def test_step(x, t): 140 | preds = model(x) 141 | loss = compute_loss(t, preds) 142 | test_loss(loss) 143 | test_acc(t, preds) 144 | 145 | return loss 146 | 147 | test_step(x_test, t_test) 148 | 149 | print('test_loss: {:.3f}, test_acc: {:.3f}'.format( 150 | test_loss.result(), 151 | test_acc.result() 152 | )) 153 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 詳解ディープラーニング 第2版 2 | 3 | ディープラーニング書籍 [詳解ディープラーニング \~TensorFlow/Keras・PyTorchによる時系列データ処理\~](https://book.mynavi.jp/ec/products/detail/id=109454) の中で紹介しているコード集です。 4 | --------------------------------------------------------------------------------