├── .gitignore ├── README.md ├── ch01 ├── keras_MINST_V1.py ├── keras_MINST_V2.py ├── keras_MINST_V3.py ├── keras_MINST_V4.py ├── make_tensorboard.py ├── requirements.txt └── requirements_gpu.txt ├── ch02 ├── quiver.py ├── requirements-windows.txt └── requirements.txt ├── ch03 ├── cifar10_deep_net.py ├── cifar10_deep_with_aug.py ├── cifar10_net.py ├── cifar10_predict.py ├── keras_augs.py ├── lenet.py ├── pretrain_inception_v3_cifar10_with_aug.py ├── pretrain_vgg.py ├── pretrain_vgg_feature_extract.py ├── requirements.txt ├── requirements_gpu.txt ├── sample_images │ ├── cat.png │ └── dog.png └── sample_images_pretrain │ ├── elephant.jpg │ └── steaming_train.png ├── ch04 ├── dataset.py ├── dcgan.py ├── example_gan_cifar10.py ├── example_gan_convolutional.py ├── image_utils.py ├── requirements.txt ├── requirements_gpu.txt ├── requirements_gpu_wavenet.txt ├── wavenet.py └── wavenet_utils.py ├── ch05 ├── data │ └── .gitkeep ├── finetune_glove_embeddings.py ├── finetune_word2vec_embeddings.py ├── keras_cbow.py ├── keras_skipgram.py ├── learn_embedding_from_scratch.py ├── models.py ├── requirements.txt ├── requirements_gpu.txt ├── skipgram_example.py ├── transfer_glove_embeddings.py ├── transfer_word2vec_embeddings.py ├── utils.py ├── word2vec_cbow.py ├── word2vec_gensim.py └── word2vec_skipgram.py ├── ch06 ├── alice_chargen_rnn.py ├── data │ └── .gitkeep ├── econs_data.py ├── econs_stateful.py ├── pos_tagging_gru.py ├── requirements.txt ├── requirements_gpu.txt └── umich_sentiment_lstm.py ├── ch07 ├── composite_qa_net.py ├── custom_layer_lambda.py ├── custom_layer_normalize.py ├── data │ └── .gitkeep ├── deep_dream.py ├── functional_api.py ├── lstm_autoencoder.py ├── regression_net.py ├── requirements.txt ├── requirements_gpu.txt └── style_transfer.py ├── ch08 ├── log.txt ├── model │ ├── agent_network.h5 │ └── events.out.tfevents.1505884941.smap6.local ├── play_rl_network.py ├── requirements.txt ├── requirements_gpu.txt └── rl_network.py └── deep-learning-with-keras-ja.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 直感 Deep Learning 2 | 3 | --- 4 | 5 | ![表紙](deep-learning-with-keras-ja.png) 6 | 7 | --- 8 | 9 | 本リポジトリはオライリー・ジャパン発行書籍『[直感 Deep Learning](http://www.oreilly.co.jp/books/9784873118260/)』(原書名『[Deep Learning with Keras](https://www.packtpub.com/big-data-and-business-intelligence/deep-learning-keras)』)のサポートサイトです。 10 | 11 | ## サンプルコード 12 | 13 | ### ファイル構成 14 | 15 | |フォルダ名 |説明 | 16 | |:-- |:-- | 17 | |ch01 |1章で使用するソースコードとライブラリ導入に必要なrequirements.txt | 18 | |ch02 |2章で使用するソースコードとライブラリ導入に必要なrequirements.txt | 19 | |... |... | 20 | |ch08 |8章で使用するソースコードとライブラリ導入に必要なrequirements.txt | 21 | 22 | サンプルコードの解説は本書籍をご覧ください。 23 | 24 | ## 仮想環境の準備 25 | 26 | 環境構築の方法は読者に任せていますが、一般的には仮想環境を構築する方法を推奨されるのでその方法を記述しておきます。 27 | 28 | `pyenv`と`virtualenv`の導入をします。 29 | 30 | **linux** 31 | ``` 32 | apt-get install pyenv 33 | apt-get install virtualenv 34 | ``` 35 | **Mac** 36 | ``` 37 | brew install pyenv 38 | brew install virtualenv 39 | ``` 40 | 41 | `pyenv`と`virtualenv`を用いて仮想環境を構築します。 42 | 43 | ``` 44 | pyenv install 3.6.0 45 | pyenv rehash 46 | pyenv local 3.6.0 47 | virtualenv -p ~/.pyenv/versions/3.6.0/bin/python3.6 my_env 48 | source my_env/bin/activate 49 | ``` 50 | 51 | **Windows** 52 | 53 | [Anaconda](https://www.anaconda.com/download/#windows)を導入して下さい。 54 | 55 | ### 使用方法 56 | 57 | サンプルを実行する前に、必要なライブラリをインストールする必要があります。 58 | 59 | ```bash 60 | $ pip install -r requirements.txt 61 | ``` 62 | 63 | CPU用の`requirements.txt`とGPU用の`requirements_gpu.txt`を章ごとに用意してあります。 64 | 65 | > 巻末の付録Aで、GPUを考慮した開発環境の構築について補足していますので参考にしてください。 66 | 67 | 各章のフォルダへ移動して、Pythonコマンドを実行します。書籍にどのpythonコードを動作させているか明記しています。 68 | 69 | 下記は一例です。 70 | 71 | ``` 72 | cd ch03 73 | python cifar10_deep_with_aug.py 74 | ``` 75 | 76 | ## 実行環境 77 | 78 | 日本語版で検証に使用した各ソフトウェアのバージョン、およびハードウェアは次のとおりです。 79 | 80 | #### ソフトウェア 81 | 82 | * Pycharm-community-2017.1 83 | * Python 3.6.0(10章ではDocker環境での簡易的な確認のため3.5.2) 84 | * TensorFlow 1.8.0 85 | * Keras 2.1.6(4章では他のライブラリとの関係があるため2.1.2) 86 | * h5py 2.7.1 87 | * numpy 1.14.0 88 | * scipy 1.0.0 89 | * quiver-engine 0.1.4.1.4 90 | * matplotlib 2.1.1 91 | * picklable\_itertools 0.1.1以上 92 | * sacred 0.6.10以上 93 | * tqdm 4.8.4以上 94 | * q 2.6以上 95 | * gensim 3.2.0 96 | * nltk 3.2.5 97 | * scikit-learn 0.19.1 98 | * pandas 0.22.0 99 | * Pillow 4.3.0 100 | * gym 0.10.5 101 | * pygame 1.9.3 102 | * html5lib 0.9999999 103 | * keras-adversarial 0.0.3 104 | * PyYAML 3.12 105 | * requests 2.14.2 106 | 107 | ##### GPUを使用する場合 108 | 109 | * tensorflow-gpu 1.8.0 110 | * cuda 9.0 111 | * cuDNN 7.0.5 112 | 113 | ##### Macで使用する場合の注意点 114 | 115 | `matplotlib`を標準設定のまま使用すると下記のようなエラーが発生します。これはMacで設定されている標準の画像レンダリングAPIが`matplotlib`で使用するものと異なるためです。 116 | 117 | ``` 118 | RuntimeError: Python is not installed as a framework. The Mac OS X backend will not be able to function correctly if Python is not installed as a framework. See the Python documentation for more information on installing Python as a framework on Mac OS X. Please either reinstall Python as a framework, or try one of the other backends. If you are using (Ana)Conda please install python.app and replace the use of 'python' with 'pythonw'. See 'Working with Matplotlib on OSX' in the Matplotlib FAQ for more information. 119 | ``` 120 | 121 | `matplotlib`を使用する前に下記のように画像レンダリングAPIを設定して使用してください。 122 | 123 | ```py 124 | import matplotlib as mpl 125 | mpl.use('TkAgg') 126 | ``` 127 | 128 | #### 動作確認済みハードウェア 129 | 130 | * Ubuntu 16.04 LTS(GPU:GeForce GTX 1080) 131 | * 64ビットアーキテクチャ 132 | * Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz 133 | * 16GBのRAM 134 | * ハードディスクの空き容量は少なくとも10GB 135 | 136 | ## 正誤表 137 | 138 | 下記の誤りがありました。お詫びして訂正いたします。 139 | 140 | 本ページに掲載されていない誤植など間違いを見つけた方は、japan@oreilly.co.jpまでお知らせください。 141 | 142 | 143 | ### 第2刷まで 144 | 145 | #### ■1章 P.27 10行目 146 | **誤** 147 | ``` 148 | シグモイド関数は連続関数なので、微分可能です。 149 | ``` 150 | **正** 151 | ``` 152 | シグモイドは連続関数であり、微分可能です。 153 | ``` 154 | 155 | 156 | ### 第1刷 157 | 158 | #### ■1章 P.22 1行目 159 | **誤** 160 | ``` 161 | from keras.layersimport Dense, Dropout, Activation 162 | ``` 163 | **正** 164 | ``` 165 | from keras.layers import Dense, Dropout, Activation 166 | ``` 167 | 168 | #### ■2章 P.67 脚注 169 | **誤** 170 | ``` 171 | 前:package_data=’quiver_engine’: ’quiverboard/dist/*’, 172 | 後:package_data=’quiver_engine’: [’quiverboard/dist/*’], 173 | ``` 174 | **正** 175 | ``` 176 | 前:package_data={'quiver_engine': 'quiverboard/dist/*'}, 177 | 後:package_data={'quiver_engine': ['quiverboard/dist/*']}, 178 | ``` 179 | 180 | #### ■3章 P.96 8~13行目 181 | **誤** 182 | ``` 183 | from pathlib import Path 184 | import pathlib 185 | import numpy as np 186 | from PIL import Image 187 | from keras.models import load_model 188 | 189 | model_path = "logdir/model_file.hdf5" 190 | ``` 191 | **正** 192 | ``` 193 | from pathlib import Path 194 | import numpy as np 195 | from PIL import Image 196 | from keras.models import load_model 197 | 198 | model_path = "logdir_cifar10_deep_with_aug/model_file.hdf5" 199 | ``` 200 | 201 | #### ■4章 P.112 1行目 202 | **誤** 203 | ``` 204 | DCGGAN生成モデルは、以下のKerasコードで実装できます。 205 | ``` 206 | **正** 207 | ``` 208 | DCGAN生成モデルは、以下のKerasコードで実装できます。 209 | ``` 210 | 211 | #### ■6章 P.177 10行目 212 | **誤** 213 | ``` 214 | 今回の場合、語彙数は42です。 215 | ``` 216 | **正** 217 | ``` 218 | 今回の場合、語彙数は60です。 219 | ``` 220 | 221 | #### ■6章 P.179 35~36行目 222 | **誤** 223 | ``` 224 | for i, ch in enumerate(test_chars): 225 | Xtest[0, i, char2index[ch]] = 1 226 | ``` 227 | **正** 228 | ``` 229 | for j, ch in enumerate(test_chars): 230 | Xtest[0, j, char2index[ch]] = 1 231 | ``` 232 | -------------------------------------------------------------------------------- /ch01/keras_MINST_V1.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from keras.datasets import mnist 4 | from keras.models import Sequential 5 | from keras.layers.core import Dense, Activation 6 | from keras.optimizers import SGD 7 | from keras.utils import np_utils 8 | from make_tensorboard import make_tensorboard 9 | 10 | 11 | np.random.seed(1671) # for reproducibility 12 | 13 | # network and training 14 | NB_EPOCH = 200 15 | BATCH_SIZE = 128 16 | VERBOSE = 1 17 | NB_CLASSES = 10 # number of outputs = number of digits 18 | OPTIMIZER = SGD() # SGD optimizer, explained later in this chapter 19 | N_HIDDEN = 128 20 | VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION 21 | 22 | # data: shuffled and split between train and test sets 23 | # 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | # X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784 27 | RESHAPED = 784 28 | # 29 | X_train = X_train.reshape(60000, RESHAPED) 30 | X_test = X_test.reshape(10000, RESHAPED) 31 | X_train = X_train.astype('float32') 32 | X_test = X_test.astype('float32') 33 | 34 | # normalize 35 | # 36 | X_train /= 255 37 | X_test /= 255 38 | print(X_train.shape[0], 'train samples') 39 | print(X_test.shape[0], 'test samples') 40 | 41 | # convert class vectors to binary class matrices 42 | Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 43 | Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 44 | 45 | # 10 outputs 46 | # final stage is softmax 47 | 48 | model = Sequential() 49 | model.add(Dense(NB_CLASSES, input_shape=(RESHAPED,))) 50 | model.add(Activation('softmax')) 51 | 52 | model.summary() 53 | 54 | model.compile(loss='categorical_crossentropy', 55 | optimizer=OPTIMIZER, 56 | metrics=['accuracy']) 57 | 58 | callbacks = [make_tensorboard(set_dir_name='keras_MINST_V1')] 59 | 60 | model.fit(X_train, Y_train, 61 | batch_size=BATCH_SIZE, epochs=NB_EPOCH, 62 | callbacks=callbacks, 63 | verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 64 | 65 | score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 66 | print("\nTest score:", score[0]) 67 | print('Test accuracy:', score[1]) 68 | -------------------------------------------------------------------------------- /ch01/keras_MINST_V2.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from keras.datasets import mnist 4 | from keras.models import Sequential 5 | from keras.layers.core import Dense, Activation 6 | from keras.optimizers import SGD 7 | from keras.utils import np_utils 8 | from make_tensorboard import make_tensorboard 9 | 10 | 11 | np.random.seed(1671) # for reproducibility 12 | 13 | # network and training 14 | NB_EPOCH = 20 15 | BATCH_SIZE = 128 16 | VERBOSE = 1 17 | NB_CLASSES = 10 # number of outputs = number of digits 18 | OPTIMIZER = SGD() # optimizer, explained later in this chapter 19 | N_HIDDEN = 128 20 | VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION 21 | 22 | # data: shuffled and split between train and test sets 23 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 24 | 25 | # X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784 26 | RESHAPED = 784 27 | # 28 | X_train = X_train.reshape(60000, RESHAPED) 29 | X_test = X_test.reshape(10000, RESHAPED) 30 | X_train = X_train.astype('float32') 31 | X_test = X_test.astype('float32') 32 | 33 | # normalize 34 | X_train /= 255 35 | X_test /= 255 36 | print(X_train.shape[0], 'train samples') 37 | print(X_test.shape[0], 'test samples') 38 | 39 | # convert class vectors to binary class matrices 40 | Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 41 | Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 42 | 43 | # M_HIDDEN hidden layers 44 | # 10 outputs 45 | # final stage is softmax 46 | 47 | model = Sequential() 48 | model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 49 | model.add(Activation('relu')) 50 | model.add(Dense(N_HIDDEN)) 51 | model.add(Activation('relu')) 52 | model.add(Dense(NB_CLASSES)) 53 | model.add(Activation('softmax')) 54 | model.summary() 55 | 56 | model.compile(loss='categorical_crossentropy', 57 | optimizer=OPTIMIZER, 58 | metrics=['accuracy']) 59 | 60 | callbacks = [make_tensorboard(set_dir_name='keras_MINST_V2')] 61 | 62 | model.fit(X_train, Y_train, 63 | batch_size=BATCH_SIZE, epochs=NB_EPOCH, 64 | callbacks=callbacks, 65 | verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 66 | 67 | score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 68 | print("\nTest score:", score[0]) 69 | print('Test accuracy:', score[1]) 70 | -------------------------------------------------------------------------------- /ch01/keras_MINST_V3.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from keras.datasets import mnist 4 | from keras.models import Sequential 5 | from keras.layers.core import Dense, Dropout, Activation 6 | from keras.optimizers import SGD 7 | from keras.utils import np_utils 8 | from make_tensorboard import make_tensorboard 9 | 10 | 11 | np.random.seed(1671) # for reproducibility 12 | 13 | # network and training 14 | NB_EPOCH = 20 15 | BATCH_SIZE = 128 16 | VERBOSE = 1 17 | NB_CLASSES = 10 # number of outputs = number of digits 18 | OPTIMIZER = SGD() # optimizer, explained later in this chapter 19 | N_HIDDEN = 128 20 | VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION 21 | DROPOUT = 0.3 22 | 23 | # data: shuffled and split between train and test sets 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | # X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784 27 | RESHAPED = 784 28 | # 29 | X_train = X_train.reshape(60000, RESHAPED) 30 | X_test = X_test.reshape(10000, RESHAPED) 31 | X_train = X_train.astype('float32') 32 | X_test = X_test.astype('float32') 33 | 34 | # normalize 35 | X_train /= 255 36 | X_test /= 255 37 | print(X_train.shape[0], 'train samples') 38 | print(X_test.shape[0], 'test samples') 39 | 40 | # convert class vectors to binary class matrices 41 | Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 42 | Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 43 | 44 | # M_HIDDEN hidden layers 45 | # 10 outputs 46 | # final stage is softmax 47 | 48 | model = Sequential() 49 | model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 50 | model.add(Activation('relu')) 51 | model.add(Dropout(DROPOUT)) 52 | model.add(Dense(N_HIDDEN)) 53 | model.add(Activation('relu')) 54 | model.add(Dropout(DROPOUT)) 55 | model.add(Dense(NB_CLASSES)) 56 | model.add(Activation('softmax')) 57 | model.summary() 58 | 59 | model.compile(loss='categorical_crossentropy', 60 | optimizer=OPTIMIZER, 61 | metrics=['accuracy']) 62 | 63 | callbacks = [make_tensorboard(set_dir_name='keras_MINST_V3')] 64 | 65 | model.fit(X_train, Y_train, 66 | batch_size=BATCH_SIZE, epochs=NB_EPOCH, 67 | callbacks=callbacks, 68 | verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 69 | 70 | score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 71 | print("\nTest score:", score[0]) 72 | print('Test accuracy:', score[1]) 73 | -------------------------------------------------------------------------------- /ch01/keras_MINST_V4.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from keras.datasets import mnist 4 | from keras.models import Sequential 5 | from keras.layers.core import Dense, Dropout, Activation 6 | from keras.optimizers import RMSprop 7 | from keras.utils import np_utils 8 | from make_tensorboard import make_tensorboard 9 | 10 | 11 | np.random.seed(1671) # for reproducibility 12 | 13 | # network and training 14 | NB_EPOCH = 20 15 | BATCH_SIZE = 128 16 | VERBOSE = 1 17 | NB_CLASSES = 10 # number of outputs = number of digits 18 | OPTIMIZER = RMSprop() # optimizer, explainedin this chapter 19 | N_HIDDEN = 128 20 | VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION 21 | DROPOUT = 0.3 22 | 23 | # data: shuffled and split between train and test sets 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | # X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784 27 | RESHAPED = 784 28 | # 29 | X_train = X_train.reshape(60000, RESHAPED) 30 | X_test = X_test.reshape(10000, RESHAPED) 31 | X_train = X_train.astype('float32') 32 | X_test = X_test.astype('float32') 33 | 34 | # normalize 35 | X_train /= 255 36 | X_test /= 255 37 | print(X_train.shape[0], 'train samples') 38 | print(X_test.shape[0], 'test samples') 39 | 40 | # convert class vectors to binary class matrices 41 | Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 42 | Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 43 | 44 | # M_HIDDEN hidden layers 45 | # 10 outputs 46 | # final stage is softmax 47 | 48 | model = Sequential() 49 | model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 50 | model.add(Activation('relu')) 51 | model.add(Dropout(DROPOUT)) 52 | model.add(Dense(N_HIDDEN)) 53 | model.add(Activation('relu')) 54 | model.add(Dropout(DROPOUT)) 55 | model.add(Dense(NB_CLASSES)) 56 | model.add(Activation('softmax')) 57 | model.summary() 58 | 59 | callbacks = [make_tensorboard(set_dir_name='keras_MINST_V4')] 60 | 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=OPTIMIZER, 63 | metrics=['accuracy']) 64 | 65 | model.fit(X_train, Y_train, 66 | batch_size=BATCH_SIZE, epochs=NB_EPOCH, 67 | callbacks=callbacks, 68 | verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 69 | 70 | score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 71 | print("\nTest score:", score[0]) 72 | print('Test accuracy:', score[1]) 73 | -------------------------------------------------------------------------------- /ch01/make_tensorboard.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | from time import gmtime, strftime 5 | from keras.callbacks import TensorBoard 6 | import os 7 | 8 | 9 | def make_tensorboard(set_dir_name=''): 10 | tictoc = strftime("%a_%d_%b_%Y_%H_%M_%S", gmtime()) 11 | directory_name = tictoc 12 | log_dir = set_dir_name + '_' + directory_name 13 | os.mkdir(log_dir) 14 | tensorboard = TensorBoard(log_dir=log_dir, write_graph=True, ) 15 | return tensorboard 16 | -------------------------------------------------------------------------------- /ch01/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | h5py==2.7.1 4 | -------------------------------------------------------------------------------- /ch01/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow-gpu==1.8.0 3 | h5py==2.7.1 4 | -------------------------------------------------------------------------------- /ch02/quiver.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from keras.applications.vgg16 import VGG16 3 | from quiver_engine import server 4 | model = VGG16() 5 | 6 | server.launch(model, 7 | input_folder='./sample_images', temp_folder='./tmp', port=8000) 8 | -------------------------------------------------------------------------------- /ch02/requirements-windows.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | gevent==1.2.2 4 | git+https://github.com/SnowMasaya/quiver 5 | h5py==2.7.1 6 | -------------------------------------------------------------------------------- /ch02/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | gevent==1.2.2 4 | quiver-engine==0.1.4.1.4 5 | h5py==2.7.1 6 | -------------------------------------------------------------------------------- /ch03/cifar10_deep_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | from keras.models import Sequential 4 | from keras.layers.convolutional import Conv2D 5 | from keras.layers.convolutional import MaxPooling2D 6 | from keras.layers.core import Activation 7 | from keras.layers.core import Flatten, Dropout 8 | from keras.layers.core import Dense 9 | from keras.datasets import cifar10 10 | from keras.optimizers import RMSprop 11 | from keras.callbacks import TensorBoard, ModelCheckpoint 12 | 13 | 14 | def network(input_shape, num_classes): 15 | model = Sequential() 16 | 17 | # extract image features by convolution and max pooling layers 18 | model.add(Conv2D( 19 | 32, kernel_size=3, padding="same", 20 | input_shape=input_shape, activation="relu" 21 | )) 22 | model.add(Conv2D(32, kernel_size=3, activation="relu")) 23 | model.add(MaxPooling2D(pool_size=(2, 2))) 24 | model.add(Dropout(0.25)) 25 | model.add(Conv2D(64, kernel_size=3, padding="same", activation="relu")) 26 | model.add(Conv2D(64, kernel_size=3, activation="relu")) 27 | model.add(MaxPooling2D(pool_size=(2, 2))) 28 | model.add(Dropout(0.25)) 29 | # classify the class by fully-connected layers 30 | model.add(Flatten()) 31 | model.add(Dense(512, activation="relu")) 32 | model.add(Dropout(0.5)) 33 | model.add(Dense(num_classes)) 34 | model.add(Activation("softmax")) 35 | return model 36 | 37 | 38 | class CIFAR10Dataset(): 39 | 40 | def __init__(self): 41 | self.image_shape = (32, 32, 3) 42 | self.num_classes = 10 43 | 44 | def get_batch(self): 45 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 46 | 47 | x_train, x_test = [self.preprocess(d) for d in [x_train, x_test]] 48 | y_train, y_test = [self.preprocess(d, label_data=True) for d in 49 | [y_train, y_test]] 50 | 51 | return x_train, y_train, x_test, y_test 52 | 53 | def preprocess(self, data, label_data=False): 54 | if label_data: 55 | # convert class vectors to binary class matrices 56 | data = keras.utils.to_categorical(data, self.num_classes) 57 | else: 58 | data = data.astype("float32") 59 | data /= 255 # convert the value to 0~1 scale 60 | shape = (data.shape[0],) + self.image_shape # add dataset length 61 | data = data.reshape(shape) 62 | 63 | return data 64 | 65 | 66 | class Trainer(): 67 | 68 | def __init__(self, model, loss, optimizer): 69 | self._target = model 70 | self._target.compile( 71 | loss=loss, optimizer=optimizer, metrics=["accuracy"] 72 | ) 73 | self.verbose = 1 74 | logdir = "logdir_cifar10_deep_net" 75 | self.log_dir = os.path.join(os.path.dirname(__file__), logdir) 76 | self.model_file_name = "model_file.hdf5" 77 | 78 | def train(self, x_train, y_train, batch_size, epochs, validation_split): 79 | if os.path.exists(self.log_dir): 80 | import shutil 81 | shutil.rmtree(self.log_dir) # remove previous execution 82 | os.mkdir(self.log_dir) 83 | 84 | model_path = os.path.join(self.log_dir, self.model_file_name) 85 | self._target.fit( 86 | x_train, y_train, 87 | batch_size=batch_size, epochs=epochs, 88 | validation_split=validation_split, 89 | callbacks=[ 90 | TensorBoard(log_dir=self.log_dir), 91 | ModelCheckpoint(model_path, save_best_only=True) 92 | ], 93 | verbose=self.verbose 94 | ) 95 | 96 | 97 | dataset = CIFAR10Dataset() 98 | 99 | # make model 100 | model = network(dataset.image_shape, dataset.num_classes) 101 | 102 | # train the model 103 | x_train, y_train, x_test, y_test = dataset.get_batch() 104 | trainer = Trainer(model, loss="categorical_crossentropy", optimizer=RMSprop()) 105 | trainer.train( 106 | x_train, y_train, batch_size=128, epochs=12, validation_split=0.2 107 | ) 108 | 109 | # show result 110 | score = model.evaluate(x_test, y_test, verbose=0) 111 | print("Test loss:", score[0]) 112 | print("Test accuracy:", score[1]) 113 | -------------------------------------------------------------------------------- /ch03/cifar10_deep_with_aug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | from keras.models import Sequential 4 | from keras.layers.convolutional import Conv2D 5 | from keras.layers.convolutional import MaxPooling2D 6 | from keras.layers.core import Activation 7 | from keras.layers.core import Flatten, Dropout 8 | from keras.layers.core import Dense 9 | from keras.datasets import cifar10 10 | from keras.optimizers import RMSprop 11 | from keras.callbacks import TensorBoard, ModelCheckpoint 12 | from keras.preprocessing.image import ImageDataGenerator 13 | import numpy as np 14 | 15 | 16 | def network(input_shape, num_classes): 17 | model = Sequential() 18 | 19 | # extract image features by convolution and max pooling layers 20 | model.add(Conv2D( 21 | 32, kernel_size=3, padding="same", 22 | input_shape=input_shape, activation="relu" 23 | )) 24 | model.add(Conv2D(32, kernel_size=3, activation="relu")) 25 | model.add(MaxPooling2D(pool_size=(2, 2))) 26 | model.add(Dropout(0.25)) 27 | model.add(Conv2D(64, kernel_size=3, padding="same", activation="relu")) 28 | model.add(Conv2D(64, kernel_size=3, activation="relu")) 29 | model.add(MaxPooling2D(pool_size=(2, 2))) 30 | model.add(Dropout(0.25)) 31 | # classify the class by fully-connected layers 32 | model.add(Flatten()) 33 | model.add(Dense(512, activation="relu")) 34 | model.add(Dropout(0.5)) 35 | model.add(Dense(num_classes)) 36 | model.add(Activation("softmax")) 37 | return model 38 | 39 | 40 | class CIFAR10Dataset(): 41 | 42 | def __init__(self): 43 | self.image_shape = (32, 32, 3) 44 | self.num_classes = 10 45 | 46 | def get_batch(self): 47 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 48 | 49 | x_train, x_test = [self.preprocess(d) for d in [x_train, x_test]] 50 | y_train, y_test = [self.preprocess(d, label_data=True) for d in 51 | [y_train, y_test]] 52 | 53 | return x_train, y_train, x_test, y_test 54 | 55 | def preprocess(self, data, label_data=False): 56 | if label_data: 57 | # convert class vectors to binary class matrices 58 | data = keras.utils.to_categorical(data, self.num_classes) 59 | else: 60 | data = data.astype("float32") 61 | data /= 255 # convert the value to 0~1 scale 62 | shape = (data.shape[0],) + self.image_shape # add dataset length 63 | data = data.reshape(shape) 64 | 65 | return data 66 | 67 | 68 | class Trainer(): 69 | 70 | def __init__(self, model, loss, optimizer): 71 | self._target = model 72 | self._target.compile( 73 | loss=loss, optimizer=optimizer, metrics=["accuracy"] 74 | ) 75 | self.verbose = 1 76 | logdir = "logdir_cifar10_deep_with_aug" 77 | self.log_dir = os.path.join(os.path.dirname(__file__), logdir) 78 | self.model_file_name = "model_file.hdf5" 79 | 80 | def train(self, x_train, y_train, batch_size, epochs, validation_split): 81 | if os.path.exists(self.log_dir): 82 | import shutil 83 | shutil.rmtree(self.log_dir) # remove previous execution 84 | os.mkdir(self.log_dir) 85 | 86 | datagen = ImageDataGenerator( 87 | featurewise_center=False, # set input mean to 0 over the dataset 88 | samplewise_center=False, # set each sample mean to 0 89 | featurewise_std_normalization=False, # divide inputs by std 90 | samplewise_std_normalization=False, # divide each input by its std 91 | zca_whitening=False, # apply ZCA whitening 92 | rotation_range=0, # randomly rotate images in the range (0~180) 93 | width_shift_range=0.1, # randomly shift images horizontally 94 | height_shift_range=0.1, # randomly shift images vertically 95 | horizontal_flip=True, # randomly flip images 96 | vertical_flip=False) # randomly flip images 97 | 98 | # compute quantities for normalization (mean, std etc) 99 | datagen.fit(x_train) 100 | 101 | # split for validation data 102 | indices = np.arange(x_train.shape[0]) 103 | np.random.shuffle(indices) 104 | validation_size = int(x_train.shape[0] * validation_split) 105 | x_train, x_valid = \ 106 | x_train[indices[:-validation_size], :], \ 107 | x_train[indices[-validation_size:], :] 108 | y_train, y_valid = \ 109 | y_train[indices[:-validation_size], :], \ 110 | y_train[indices[-validation_size:], :] 111 | 112 | model_path = os.path.join(self.log_dir, self.model_file_name) 113 | self._target.fit_generator( 114 | datagen.flow(x_train, y_train, batch_size=batch_size), 115 | steps_per_epoch=x_train.shape[0] // batch_size, 116 | epochs=epochs, 117 | validation_data=(x_valid, y_valid), 118 | callbacks=[ 119 | TensorBoard(log_dir=self.log_dir), 120 | ModelCheckpoint(model_path, save_best_only=True) 121 | ], 122 | verbose=self.verbose, 123 | workers=4 124 | ) 125 | 126 | 127 | dataset = CIFAR10Dataset() 128 | 129 | # make model 130 | model = network(dataset.image_shape, dataset.num_classes) 131 | 132 | # train the model 133 | x_train, y_train, x_test, y_test = dataset.get_batch() 134 | trainer = Trainer(model, loss="categorical_crossentropy", optimizer=RMSprop()) 135 | trainer.train( 136 | x_train, y_train, batch_size=128, epochs=15, validation_split=0.2 137 | ) 138 | 139 | # show result 140 | score = model.evaluate(x_test, y_test, verbose=0) 141 | print("Test loss:", score[0]) 142 | print("Test accuracy:", score[1]) 143 | -------------------------------------------------------------------------------- /ch03/cifar10_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | from keras.models import Sequential 4 | from keras.layers.convolutional import Conv2D 5 | from keras.layers.convolutional import MaxPooling2D 6 | from keras.layers.core import Activation 7 | from keras.layers.core import Flatten, Dropout 8 | from keras.layers.core import Dense 9 | from keras.datasets import cifar10 10 | from keras.optimizers import RMSprop 11 | from keras.callbacks import TensorBoard, ModelCheckpoint 12 | 13 | 14 | def network(input_shape, num_classes): 15 | model = Sequential() 16 | 17 | # extract image features by convolution and max pooling layers 18 | model.add(Conv2D( 19 | 32, kernel_size=3, padding="same", 20 | input_shape=input_shape, activation="relu" 21 | )) 22 | model.add(MaxPooling2D(pool_size=(2, 2))) 23 | model.add(Dropout(0.25)) 24 | model.add(Conv2D(64, kernel_size=3, padding="same", activation="relu")) 25 | model.add(MaxPooling2D(pool_size=(2, 2))) 26 | # classify the class by fully-connected layers 27 | model.add(Flatten()) 28 | model.add(Dense(512, activation="relu")) 29 | model.add(Dropout(0.5)) 30 | model.add(Dense(num_classes)) 31 | model.add(Activation("softmax")) 32 | return model 33 | 34 | 35 | class CIFAR10Dataset(): 36 | 37 | def __init__(self): 38 | self.image_shape = (32, 32, 3) 39 | self.num_classes = 10 40 | 41 | def get_batch(self): 42 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 43 | 44 | x_train, x_test = [self.preprocess(d) for d in [x_train, x_test]] 45 | y_train, y_test = [self.preprocess(d, label_data=True) for d in 46 | [y_train, y_test]] 47 | 48 | return x_train, y_train, x_test, y_test 49 | 50 | def preprocess(self, data, label_data=False): 51 | if label_data: 52 | # convert class vectors to binary class matrices 53 | data = keras.utils.to_categorical(data, self.num_classes) 54 | else: 55 | data = data.astype("float32") 56 | data /= 255 # convert the value to 0~1 scale 57 | shape = (data.shape[0],) + self.image_shape # add dataset length 58 | data = data.reshape(shape) 59 | 60 | return data 61 | 62 | 63 | class Trainer(): 64 | 65 | def __init__(self, model, loss, optimizer): 66 | self._target = model 67 | self._target.compile( 68 | loss=loss, optimizer=optimizer, metrics=["accuracy"] 69 | ) 70 | self.verbose = 1 71 | logdir = "logdir_cifar10_net" 72 | self.log_dir = os.path.join(os.path.dirname(__file__), logdir) 73 | self.model_file_name = "model_file.hdf5" 74 | 75 | def train(self, x_train, y_train, batch_size, epochs, validation_split): 76 | if os.path.exists(self.log_dir): 77 | import shutil 78 | shutil.rmtree(self.log_dir) # remove previous execution 79 | os.mkdir(self.log_dir) 80 | 81 | model_path = os.path.join(self.log_dir, self.model_file_name) 82 | self._target.fit( 83 | x_train, y_train, 84 | batch_size=batch_size, epochs=epochs, 85 | validation_split=validation_split, 86 | callbacks=[ 87 | TensorBoard(log_dir=self.log_dir), 88 | ModelCheckpoint(model_path, save_best_only=True) 89 | ], 90 | verbose=self.verbose 91 | ) 92 | 93 | 94 | dataset = CIFAR10Dataset() 95 | 96 | # make model 97 | model = network(dataset.image_shape, dataset.num_classes) 98 | 99 | # train the model 100 | x_train, y_train, x_test, y_test = dataset.get_batch() 101 | trainer = Trainer(model, loss="categorical_crossentropy", optimizer=RMSprop()) 102 | trainer.train( 103 | x_train, y_train, batch_size=128, epochs=12, validation_split=0.2 104 | ) 105 | 106 | # show result 107 | score = model.evaluate(x_test, y_test, verbose=0) 108 | print("Test loss:", score[0]) 109 | print("Test accuracy:", score[1]) 110 | -------------------------------------------------------------------------------- /ch03/cifar10_predict.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import numpy as np 3 | from PIL import Image 4 | from keras.models import load_model 5 | 6 | 7 | model_path = "logdir_cifar10_deep_with_aug/model_file.hdf5" 8 | images_folder = "sample_images" 9 | 10 | # load model 11 | model = load_model(model_path) 12 | image_shape = (32, 32, 3) 13 | 14 | 15 | # load images 16 | def crop_resize(image_path): 17 | image = Image.open(image_path) 18 | length = min(image.size) 19 | crop = image.crop((0, 0, length, length)) 20 | resized = crop.resize(image_shape[:2]) # use width x height 21 | img = np.array(resized).astype("float32") 22 | img /= 255 23 | return img 24 | 25 | 26 | folder = Path(images_folder) 27 | image_paths = [str(f) for f in folder.glob("*.png")] 28 | images = [crop_resize(p) for p in image_paths] 29 | images = np.asarray(images) 30 | 31 | predicted = model.predict_classes(images) 32 | 33 | assert predicted[0] == 3, "image should be cat." 34 | assert predicted[1] == 5, "image should be dog." 35 | 36 | print("You can detect cat & dog!") 37 | -------------------------------------------------------------------------------- /ch03/keras_augs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from keras.datasets import cifar10 4 | from keras.preprocessing.image import ImageDataGenerator 5 | import numpy as np 6 | 7 | path = os.path.join(os.path.dirname(__file__), "augs") 8 | if os.path.exists(path): 9 | shutil.rmtree(path) 10 | 11 | os.mkdir(path) 12 | 13 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 14 | 15 | indices = np.arange(x_train.shape[0]) 16 | np.random.shuffle(indices) 17 | 18 | samples = x_train[indices[:5], :] 19 | 20 | datagen = ImageDataGenerator( 21 | rotation_range=40, 22 | width_shift_range=0.2, 23 | height_shift_range=0.2, 24 | horizontal_flip=True, 25 | vertical_flip=False) 26 | 27 | g = datagen.flow( 28 | samples, 29 | np.arange(len(samples)), batch_size=1, 30 | save_to_dir=path, save_prefix="auged_" 31 | ) 32 | 33 | for i in range(18): 34 | g.next() 35 | -------------------------------------------------------------------------------- /ch03/lenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | from keras.models import Sequential 4 | from keras.layers.convolutional import Conv2D 5 | from keras.layers.convolutional import MaxPooling2D 6 | from keras.layers.core import Activation 7 | from keras.layers.core import Flatten 8 | from keras.layers.core import Dense 9 | from keras.datasets import mnist 10 | from keras.optimizers import Adam 11 | from keras.callbacks import TensorBoard 12 | 13 | 14 | def lenet(input_shape, num_classes): 15 | model = Sequential() 16 | 17 | # extract image features by convolution and max pooling layers 18 | model.add(Conv2D( 19 | 20, kernel_size=5, padding="same", 20 | input_shape=input_shape, activation="relu" 21 | )) 22 | model.add(MaxPooling2D(pool_size=(2, 2))) 23 | model.add(Conv2D(50, kernel_size=5, padding="same", activation="relu")) 24 | model.add(MaxPooling2D(pool_size=(2, 2))) 25 | # classify the class by fully-connected layers 26 | model.add(Flatten()) 27 | model.add(Dense(500, activation="relu")) 28 | model.add(Dense(num_classes)) 29 | model.add(Activation("softmax")) 30 | return model 31 | 32 | 33 | class MNISTDataset(): 34 | 35 | def __init__(self): 36 | self.image_shape = (28, 28, 1) # image is 28x28x1 (grayscale) 37 | self.num_classes = 10 38 | 39 | def get_batch(self): 40 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 41 | 42 | x_train, x_test = [self.preprocess(d) for d in [x_train, x_test]] 43 | y_train, y_test = [self.preprocess(d, label_data=True) for d in 44 | [y_train, y_test]] 45 | 46 | return x_train, y_train, x_test, y_test 47 | 48 | def preprocess(self, data, label_data=False): 49 | if label_data: 50 | # convert class vectors to binary class matrices 51 | data = keras.utils.to_categorical(data, self.num_classes) 52 | else: 53 | data = data.astype("float32") 54 | data /= 255 # convert the value to 0~1 scale 55 | shape = (data.shape[0],) + self.image_shape # add dataset length 56 | data = data.reshape(shape) 57 | 58 | return data 59 | 60 | 61 | class Trainer(): 62 | 63 | def __init__(self, model, loss, optimizer): 64 | self._target = model 65 | self._target.compile( 66 | loss=loss, optimizer=optimizer, metrics=["accuracy"] 67 | ) 68 | self.verbose = 1 69 | logdir = "logdir_lenet" 70 | self.log_dir = os.path.join(os.path.dirname(__file__), logdir) 71 | 72 | def train(self, x_train, y_train, batch_size, epochs, validation_split): 73 | if os.path.exists(self.log_dir): 74 | import shutil 75 | shutil.rmtree(self.log_dir) # remove previous execution 76 | os.mkdir(self.log_dir) 77 | 78 | self._target.fit( 79 | x_train, y_train, 80 | batch_size=batch_size, epochs=epochs, 81 | validation_split=validation_split, 82 | callbacks=[TensorBoard(log_dir=self.log_dir)], 83 | verbose=self.verbose 84 | ) 85 | 86 | 87 | dataset = MNISTDataset() 88 | 89 | # make model 90 | model = lenet(dataset.image_shape, dataset.num_classes) 91 | 92 | # train the model 93 | x_train, y_train, x_test, y_test = dataset.get_batch() 94 | trainer = Trainer(model, loss="categorical_crossentropy", optimizer=Adam()) 95 | trainer.train( 96 | x_train, y_train, batch_size=128, epochs=12, validation_split=0.2 97 | ) 98 | 99 | # show result 100 | score = model.evaluate(x_test, y_test, verbose=0) 101 | print("Test loss:", score[0]) 102 | print("Test accuracy:", score[1]) 103 | -------------------------------------------------------------------------------- /ch03/pretrain_inception_v3_cifar10_with_aug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | from keras.applications.inception_v3 import InceptionV3 4 | from keras.models import Model, load_model 5 | from keras.layers import Dense, GlobalAveragePooling2D 6 | from keras.datasets import cifar10 7 | from keras.optimizers import RMSprop 8 | from keras.optimizers import SGD 9 | from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping 10 | from keras.preprocessing.image import ImageDataGenerator 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def network(): 16 | base_model = InceptionV3(weights="imagenet", include_top=False) 17 | for layer in base_model.layers: 18 | layer.trainable = False 19 | x = base_model.output 20 | x = GlobalAveragePooling2D()(x) 21 | x = Dense(1024, activation="relu")(x) 22 | prediction = Dense(10, activation="softmax")(x) 23 | model = Model(inputs=base_model.input, outputs=prediction) 24 | return model 25 | 26 | 27 | class CIFAR10Dataset(): 28 | 29 | def __init__(self): 30 | """ 31 | Setting image size for inceptionv3 32 | Reference 33 | https://keras.io/applications/#inceptionv3 34 | """ 35 | self.image_shape = (190, 190, 3) 36 | self.num_classes = 10 37 | self.train_data_size = 5000 38 | self.test_data_size = 5000 39 | 40 | def upscale(self, x, data_size): 41 | data_upscaled = np.zeros((data_size, 42 | self.image_shape[0], 43 | self.image_shape[1], 44 | self.image_shape[2])) 45 | for i, img in enumerate(x): 46 | large_img = cv2.resize(img, dsize=(self.image_shape[0], 47 | self.image_shape[1]),) 48 | data_upscaled[i] = large_img 49 | return data_upscaled 50 | 51 | def get_batch(self): 52 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 53 | 54 | x_train = x_train[:self.train_data_size] 55 | y_train = y_train[:self.train_data_size] 56 | x_test = x_test[:self.test_data_size] 57 | y_test = y_test[:self.test_data_size] 58 | x_train = self.upscale(x_train, x_train.shape[0]) 59 | x_test = self.upscale(x_test, x_test.shape[0]) 60 | 61 | x_train, x_test = [self.preprocess(d) for d in [x_train, x_test]] 62 | y_train, y_test = [self.preprocess(d, True) for d in [y_train, y_test]] 63 | 64 | return x_train, y_train, x_test, y_test 65 | 66 | def preprocess(self, data, label_data=False): 67 | if label_data: 68 | # convert class vectors to binary class matrices 69 | data = keras.utils.to_categorical(data, self.num_classes) 70 | else: 71 | data = data.astype("float32") 72 | data /= 255 # convert the value to 0~1 scale 73 | shape = (data.shape[0],) + self.image_shape # add dataset length 74 | data = data.reshape(shape) 75 | 76 | return data 77 | 78 | 79 | class Trainer(): 80 | 81 | def __init__(self, model, loss, optimizer): 82 | self._target = model 83 | self._target.compile( 84 | loss=loss, optimizer=optimizer, metrics=["accuracy"] 85 | ) 86 | self.verbose = 1 87 | logdir = "logdir_" + os.path.basename(__file__).replace('.py', '') 88 | self.log_dir = os.path.join(os.path.dirname(__file__), logdir) 89 | self.model_file_name = "model_file.hdf5" 90 | 91 | def train(self, x_train, y_train, batch_size, epochs, validation_split): 92 | if os.path.exists(self.log_dir): 93 | import shutil 94 | shutil.rmtree(self.log_dir) # remove previous execution 95 | os.mkdir(self.log_dir) 96 | 97 | datagen = ImageDataGenerator( 98 | featurewise_center=False, # set input mean to 0 over the dataset 99 | samplewise_center=False, # set each sample mean to 0 100 | featurewise_std_normalization=False, # divide inputs by std 101 | samplewise_std_normalization=False, # divide each input by its std 102 | zca_whitening=False, # apply ZCA whitening 103 | rotation_range=0, # randomly rotate images in the range (0~180) 104 | width_shift_range=0.1, # randomly shift images horizontally 105 | height_shift_range=0.1, # randomly shift images vertically 106 | horizontal_flip=True, # randomly flip images 107 | vertical_flip=False) # randomly flip images 108 | 109 | # compute quantities for normalization (mean, std etc) 110 | datagen.fit(x_train) 111 | 112 | # split for validation data 113 | indices = np.arange(x_train.shape[0]) 114 | np.random.shuffle(indices) 115 | validation_size = int(x_train.shape[0] * validation_split) 116 | x_train, x_valid = \ 117 | x_train[indices[:-validation_size], :], \ 118 | x_train[indices[-validation_size:], :] 119 | y_train, y_valid = \ 120 | y_train[indices[:-validation_size], :], \ 121 | y_train[indices[-validation_size:], :] 122 | 123 | model_path = os.path.join(self.log_dir, self.model_file_name) 124 | self._target.fit_generator( 125 | datagen.flow(x_train, y_train, batch_size=batch_size), 126 | steps_per_epoch=x_train.shape[0] // batch_size, 127 | epochs=epochs, 128 | validation_data=(x_valid, y_valid), 129 | callbacks=[ 130 | TensorBoard(log_dir=self.log_dir), 131 | ModelCheckpoint(model_path, save_best_only=True), 132 | EarlyStopping(), 133 | ], 134 | verbose=self.verbose, 135 | workers=4 136 | ) 137 | 138 | 139 | dataset = CIFAR10Dataset() 140 | 141 | # make model 142 | model = network() 143 | 144 | # train the model 145 | x_train, y_train, x_test, y_test = dataset.get_batch() 146 | trainer = Trainer(model, loss="categorical_crossentropy", optimizer=RMSprop()) 147 | trainer.train( 148 | x_train, y_train, batch_size=26, epochs=8, validation_split=0.2 149 | ) 150 | model = load_model(os.path.join(trainer.log_dir, trainer.model_file_name)) 151 | 152 | for layer in model.layers[:249]: 153 | layer.trainable = False 154 | for layer in model.layers[249:]: 155 | layer.trainable = True 156 | 157 | trainer = Trainer(model, loss="categorical_crossentropy", 158 | optimizer=SGD(lr=0.001, momentum=0.9)) 159 | trainer.train( 160 | x_train, y_train, batch_size=26, epochs=8, validation_split=0.2 161 | ) 162 | model = load_model(os.path.join(trainer.log_dir, trainer.model_file_name)) 163 | 164 | # show result 165 | score = model.evaluate(x_test, y_test, verbose=0) 166 | print("Test loss:", score[0]) 167 | print("Test accuracy:", score[1]) 168 | -------------------------------------------------------------------------------- /ch03/pretrain_vgg.py: -------------------------------------------------------------------------------- 1 | from keras.applications.vgg16 import VGG16 2 | from keras.applications.vgg16 import preprocess_input, decode_predictions 3 | import keras.preprocessing.image as Image 4 | import numpy as np 5 | 6 | 7 | model = VGG16(weights="imagenet", include_top=True) 8 | 9 | image_path = "sample_images_pretrain/steaming_train.png" 10 | image = Image.load_img(image_path, target_size=(224, 224)) # imagenet size 11 | x = Image.img_to_array(image) 12 | x = np.expand_dims(x, axis=0) # add batch size dim 13 | x = preprocess_input(x) 14 | 15 | result = model.predict(x) 16 | result = decode_predictions(result, top=3)[0] 17 | print(result[0][1]) # show description 18 | -------------------------------------------------------------------------------- /ch03/pretrain_vgg_feature_extract.py: -------------------------------------------------------------------------------- 1 | from keras.applications.vgg19 import VGG19 2 | from keras.preprocessing import image 3 | from keras.applications.vgg19 import preprocess_input 4 | from keras.models import Model 5 | import numpy as np 6 | 7 | base_model = VGG19(weights="imagenet") 8 | model = Model(inputs=base_model.input, outputs=base_model.get_layer("block4_pool").output) 9 | 10 | img_path = "sample_images_pretrain/elephant.jpg" 11 | img = image.load_img(img_path, target_size=(224, 224)) 12 | x = image.img_to_array(img) 13 | x = np.expand_dims(x, axis=0) 14 | x = preprocess_input(x) 15 | 16 | block4_pool_features = model.predict(x) 17 | -------------------------------------------------------------------------------- /ch03/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | h5py==2.7.1 4 | Pillow==4.3.0 5 | opencv-python==3.3.1.11 6 | -------------------------------------------------------------------------------- /ch03/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow-gpu==1.8.0 3 | h5py==2.7.1 4 | Pillow==4.3.0 5 | opencv-python==3.3.1.11 6 | -------------------------------------------------------------------------------- /ch03/sample_images/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch03/sample_images/cat.png -------------------------------------------------------------------------------- /ch03/sample_images/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch03/sample_images/dog.png -------------------------------------------------------------------------------- /ch03/sample_images_pretrain/elephant.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch03/sample_images_pretrain/elephant.jpg -------------------------------------------------------------------------------- /ch03/sample_images_pretrain/steaming_train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch03/sample_images_pretrain/steaming_train.png -------------------------------------------------------------------------------- /ch04/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | from __future__ import division 4 | 5 | import math 6 | import os 7 | import warnings 8 | 9 | import numpy as np 10 | import scipy.io.wavfile 11 | import scipy.signal 12 | from picklable_itertools import cycle 13 | from picklable_itertools.extras import partition_all 14 | from tqdm import tqdm 15 | 16 | 17 | # TODO: make SACRED ingredient. 18 | def one_hot(x): 19 | return np.eye(256, dtype='uint8')[x.astype('uint8')] 20 | 21 | 22 | def fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins): 23 | for seq_i, sequence in enumerate(full_sequences): 24 | # range_values = np.linspace(np.iinfo(sequence.dtype).min, np.iinfo(sequence.dtype).max, nb_output_bins) 25 | # digitized = np.digitize(sequence, range_values).astype('uint8') 26 | for i in range(0, sequence.shape[0] - fragment_length, fragment_stride): 27 | yield seq_i, i 28 | 29 | 30 | def select_generator(set_name, random_train_batches, full_sequences, fragment_length, batch_size, fragment_stride, 31 | nb_output_bins, randomize_batch_order, _rnd): 32 | if random_train_batches and set_name == 'train': 33 | bg = random_batch_generator 34 | else: 35 | bg = batch_generator 36 | return bg(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd) 37 | 38 | 39 | def batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd): 40 | indices = list(fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins)) 41 | if randomize_batch_order: 42 | _rnd.shuffle(indices) 43 | 44 | batches = cycle(partition_all(batch_size, indices)) 45 | for batch in batches: 46 | if len(batch) < batch_size: 47 | continue 48 | yield np.array( 49 | [one_hot(full_sequences[e[0]][e[1]:e[1] + fragment_length]) for e in batch], dtype='uint8'), np.array( 50 | [one_hot(full_sequences[e[0]][e[1] + 1:e[1] + fragment_length + 1]) for e in batch], dtype='uint8') 51 | 52 | 53 | def random_batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, 54 | randomize_batch_order, _rnd): 55 | lengths = [x.shape[0] for x in full_sequences] 56 | nb_sequences = len(full_sequences) 57 | while True: 58 | sequence_indices = _rnd.randint(0, nb_sequences, batch_size) 59 | batch_inputs = [] 60 | batch_outputs = [] 61 | for i, seq_i in enumerate(sequence_indices): 62 | l = lengths[seq_i] 63 | offset = np.squeeze(_rnd.randint(0, l - fragment_length, 1)) 64 | batch_inputs.append(full_sequences[seq_i][offset:offset + fragment_length]) 65 | batch_outputs.append(full_sequences[seq_i][offset + 1:offset + fragment_length + 1]) 66 | yield one_hot(np.array(batch_inputs, dtype='uint8')), one_hot(np.array(batch_outputs, dtype='uint8')) 67 | 68 | 69 | def generators(dirname, desired_sample_rate, fragment_length, batch_size, fragment_stride, nb_output_bins, 70 | learn_all_outputs, use_ulaw, randomize_batch_order, _rnd, random_train_batches): 71 | fragment_generators = {} 72 | nb_examples = {} 73 | for set_name in ['train', 'test']: 74 | set_dirname = os.path.join(dirname, set_name) 75 | full_sequences = load_set(desired_sample_rate, set_dirname, use_ulaw) 76 | fragment_generators[set_name] = select_generator(set_name, random_train_batches, full_sequences, 77 | fragment_length, 78 | batch_size, fragment_stride, nb_output_bins, 79 | randomize_batch_order, _rnd) 80 | nb_examples[set_name] = int(sum( 81 | [len(range(0, x.shape[0] - fragment_length, fragment_stride)) for x in 82 | full_sequences]) / batch_size) * batch_size 83 | 84 | return fragment_generators, nb_examples 85 | 86 | 87 | def generators_vctk(dirname, desired_sample_rate, fragment_length, batch_size, fragment_stride, nb_output_bins, 88 | learn_all_outputs, use_ulaw, test_factor, randomize_batch_order, _rnd, random_train_batches): 89 | fragment_generators = {} 90 | nb_examples = {} 91 | speaker_dirs = os.listdir(dirname) 92 | train_full_sequences = [] 93 | test_full_sequences = [] 94 | for speaker_dir in speaker_dirs: 95 | full_sequences = load_set(desired_sample_rate, os.path.join(dirname, speaker_dir), use_ulaw) 96 | nb_examples_train = int(math.ceil(len(full_sequences) * (1 - test_factor))) 97 | train_full_sequences.extend(full_sequences[0:nb_examples_train]) 98 | test_full_sequences.extend(full_sequences[nb_examples_train:]) 99 | 100 | for set_name, set_sequences in zip(['train', 'test'], [train_full_sequences, test_full_sequences]): 101 | fragment_generators[set_name] = select_generator(set_name, random_train_batches, full_sequences, 102 | fragment_length, 103 | batch_size, fragment_stride, nb_output_bins, 104 | randomize_batch_order, _rnd) 105 | nb_examples[set_name] = int(sum( 106 | [len(range(0, x.shape[0] - fragment_length, fragment_stride)) for x in 107 | full_sequences]) / batch_size) * batch_size 108 | 109 | return fragment_generators, nb_examples 110 | 111 | 112 | def load_set(desired_sample_rate, set_dirname, use_ulaw): 113 | ulaw_str = '_ulaw' if use_ulaw else '' 114 | cache_fn = os.path.join(set_dirname, 'processed_%d%s.npy' % (desired_sample_rate, ulaw_str)) 115 | if os.path.isfile(cache_fn): 116 | full_sequences = np.load(cache_fn) 117 | else: 118 | file_names = [fn for fn in os.listdir(set_dirname) if fn.endswith('.wav')] 119 | full_sequences = [] 120 | for fn in tqdm(file_names): 121 | sequence = process_wav(desired_sample_rate, os.path.join(set_dirname, fn), use_ulaw) 122 | full_sequences.append(sequence) 123 | np.save(cache_fn, full_sequences) 124 | 125 | return full_sequences 126 | 127 | 128 | def process_wav(desired_sample_rate, filename, use_ulaw): 129 | with warnings.catch_warnings(): 130 | warnings.simplefilter("error") 131 | channels = scipy.io.wavfile.read(filename) 132 | file_sample_rate, audio = channels 133 | audio = ensure_mono(audio) 134 | audio = wav_to_float(audio) 135 | if use_ulaw: 136 | audio = ulaw(audio) 137 | audio = ensure_sample_rate(desired_sample_rate, file_sample_rate, audio) 138 | audio = float_to_uint8(audio) 139 | return audio 140 | 141 | 142 | def ulaw(x, u=255): 143 | x = np.sign(x) * (np.log(1 + u * np.abs(x)) / np.log(1 + u)) 144 | return x 145 | 146 | 147 | def float_to_uint8(x): 148 | x += 1. 149 | x /= 2. 150 | uint8_max_value = np.iinfo('uint8').max 151 | x *= uint8_max_value 152 | x = x.astype('uint8') 153 | return x 154 | 155 | 156 | def wav_to_float(x): 157 | try: 158 | max_value = np.iinfo(x.dtype).max 159 | min_value = np.iinfo(x.dtype).min 160 | except: 161 | max_value = np.finfo(x.dtype).max 162 | min_value = np.iinfo(x.dtype).min 163 | x = x.astype('float64', casting='safe') 164 | x -= min_value 165 | x /= ((max_value - min_value) / 2.) 166 | x -= 1. 167 | return x 168 | 169 | 170 | def ulaw2lin(x, u=255.): 171 | max_value = np.iinfo('uint8').max 172 | min_value = np.iinfo('uint8').min 173 | x = x.astype('float64', casting='safe') 174 | x -= min_value 175 | x /= ((max_value - min_value) / 2.) 176 | x -= 1. 177 | x = np.sign(x) * (1 / u) * (((1 + u) ** np.abs(x)) - 1) 178 | x = float_to_uint8(x) 179 | return x 180 | 181 | def ensure_sample_rate(desired_sample_rate, file_sample_rate, mono_audio): 182 | if file_sample_rate != desired_sample_rate: 183 | mono_audio = scipy.signal.resample_poly(mono_audio, desired_sample_rate, file_sample_rate) 184 | return mono_audio 185 | 186 | 187 | def ensure_mono(raw_audio): 188 | """ 189 | Just use first channel. 190 | """ 191 | if raw_audio.ndim == 2: 192 | raw_audio = raw_audio[:, 0] 193 | return raw_audio 194 | 195 | -------------------------------------------------------------------------------- /ch04/dcgan.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers import Dense 3 | from keras.layers import Reshape 4 | from keras.layers.core import Activation 5 | from keras.layers.normalization import BatchNormalization 6 | from keras.layers.convolutional import UpSampling2D 7 | from keras.layers.convolutional import Conv2D, MaxPooling2D 8 | from keras.layers.core import Flatten 9 | from keras.optimizers import SGD 10 | from keras.datasets import mnist 11 | import numpy as np 12 | from PIL import Image 13 | import argparse 14 | import math 15 | 16 | 17 | def generator_model(): 18 | model = Sequential() 19 | model.add(Dense(1024, input_shape=(100, ), activation="tanh")) 20 | model.add(Dense(128 * 7 * 7)) 21 | model.add(BatchNormalization()) 22 | model.add(Activation("tanh")) 23 | model.add(Reshape((7, 7, 128), input_shape=(7 * 7 * 128,))) 24 | model.add(UpSampling2D(size=(2, 2))) 25 | model.add(Conv2D(64, (5, 5), 26 | padding="same", 27 | activation="tanh", 28 | data_format="channels_last")) 29 | model.add(UpSampling2D(size=(2, 2))) 30 | model.add(Conv2D(1, (5, 5), 31 | padding="same", 32 | activation="tanh", 33 | data_format="channels_last")) 34 | return model 35 | 36 | 37 | def discriminator_model(): 38 | model = Sequential() 39 | model.add(Conv2D(64, (5, 5), 40 | padding="same", 41 | input_shape=(28, 28, 1), 42 | activation="tanh", 43 | data_format="channels_last")) 44 | model.add(MaxPooling2D(pool_size=(2, 2))) 45 | model.add(Conv2D(128, (5, 5), 46 | activation="tanh", 47 | data_format="channels_last")) 48 | model.add(MaxPooling2D(pool_size=(2, 2))) 49 | model.add(Flatten()) 50 | model.add(Dense(1024, activation="tanh")) 51 | model.add(Dense(1, activation="sigmoid")) 52 | return model 53 | 54 | 55 | def generator_containing_discriminator(generator, discriminator): 56 | model = Sequential() 57 | model.add(generator) 58 | discriminator.trainable = False 59 | model.add(discriminator) 60 | return model 61 | 62 | 63 | def combine_images(generated_images): 64 | generated_images = generated_images.reshape(generated_images.shape[0], 65 | generated_images.shape[3], 66 | generated_images.shape[1], 67 | generated_images.shape[2]) 68 | num = generated_images.shape[0] 69 | width = int(math.sqrt(num)) 70 | height = int(math.ceil(float(num) / width)) 71 | shape = generated_images.shape[2:] 72 | image = np.zeros((height*shape[0], width*shape[1]), 73 | dtype=generated_images.dtype) 74 | for index, img in enumerate(generated_images): 75 | i = int(index/width) 76 | j = index % width 77 | image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \ 78 | img[0, :, :] 79 | return image 80 | 81 | 82 | def train(BATCH_SIZE): 83 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 84 | X_train = (X_train.astype(np.float32) - 127.5)/127.5 85 | X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:]) 86 | discriminator = discriminator_model() 87 | generator = generator_model() 88 | discriminator_on_generator = \ 89 | generator_containing_discriminator(generator, discriminator) 90 | d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) 91 | g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) 92 | generator.compile(loss="binary_crossentropy", optimizer="SGD") 93 | discriminator_on_generator.compile( 94 | loss="binary_crossentropy", optimizer=g_optim) 95 | discriminator.trainable = True 96 | discriminator.compile(loss="binary_crossentropy", optimizer=d_optim) 97 | noise = np.zeros((BATCH_SIZE, 100)) 98 | for epoch in range(100): 99 | print("Epoch is", epoch) 100 | print("Number of batches", int(X_train.shape[0]/BATCH_SIZE)) 101 | for index in range(int(X_train.shape[0]/BATCH_SIZE)): 102 | for i in range(BATCH_SIZE): 103 | noise[i, :] = np.random.uniform(-1, 1, 100) 104 | image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE] 105 | image_batch = image_batch.reshape(image_batch.shape[0], 106 | image_batch.shape[2], 107 | image_batch.shape[3], 108 | image_batch.shape[1]) 109 | generated_images = generator.predict(noise, verbose=0) 110 | if index % 20 == 0: 111 | image = combine_images(generated_images) 112 | image = image*127.5+127.5 113 | Image.fromarray(image.astype(np.uint8)).save( 114 | str(epoch)+"_"+str(index)+".png") 115 | X = np.concatenate((image_batch, generated_images)) 116 | y = [1] * BATCH_SIZE + [0] * BATCH_SIZE 117 | d_loss = discriminator.train_on_batch(X, y) 118 | print("batch %d d_loss : %f" % (index, d_loss)) 119 | for i in range(BATCH_SIZE): 120 | noise[i, :] = np.random.uniform(-1, 1, 100) 121 | discriminator.trainable = False 122 | g_loss = discriminator_on_generator.train_on_batch( 123 | noise, [1] * BATCH_SIZE) 124 | discriminator.trainable = True 125 | print("batch %d g_loss : %f" % (index, g_loss)) 126 | if index % 10 == 9: 127 | generator.save_weights("generator", True) 128 | discriminator.save_weights("discriminator", True) 129 | 130 | 131 | def generate(BATCH_SIZE, nice=False): 132 | generator = generator_model() 133 | generator.compile(loss="binary_crossentropy", optimizer="SGD") 134 | generator.load_weights("generator") 135 | if nice: 136 | discriminator = discriminator_model() 137 | discriminator.compile(loss="binary_crossentropy", optimizer="SGD") 138 | discriminator.load_weights("discriminator") 139 | noise = np.zeros((BATCH_SIZE*20, 100)) 140 | for i in range(BATCH_SIZE*20): 141 | noise[i, :] = np.random.uniform(-1, 1, 100) 142 | generated_images = generator.predict(noise, verbose=1) 143 | d_pret = discriminator.predict(generated_images, verbose=1) 144 | index = np.arange(0, BATCH_SIZE*20) 145 | index.resize((BATCH_SIZE*20, 1)) 146 | pre_with_index = list(np.append(d_pret, index, axis=1)) 147 | pre_with_index.sort(key=lambda x: x[0], reverse=True) 148 | nice_images = np.zeros((BATCH_SIZE, 1) + 149 | (generated_images.shape[2:]), dtype=np.float32) 150 | for i in range(int(BATCH_SIZE)): 151 | idx = int(pre_with_index[i][1]) 152 | nice_images[i, 0, :, :] = generated_images[idx, 0, :, :] 153 | image = combine_images(nice_images) 154 | else: 155 | noise = np.zeros((BATCH_SIZE, 100)) 156 | for i in range(BATCH_SIZE): 157 | noise[i, :] = np.random.uniform(-1, 1, 100) 158 | generated_images = generator.predict(noise, verbose=1) 159 | image = combine_images(generated_images) 160 | image = image*127.5+127.5 161 | Image.fromarray(image.astype(np.uint8)).save( 162 | "generated_image.png") 163 | 164 | 165 | def get_args(): 166 | parser = argparse.ArgumentParser() 167 | parser.add_argument("--mode", type=str) 168 | parser.add_argument("--batch_size", type=int, default=128) 169 | parser.add_argument("--nice", dest="nice", action="store_true") 170 | parser.set_defaults(nice=False) 171 | args = parser.parse_args() 172 | return args 173 | 174 | 175 | if __name__ == "__main__": 176 | args = get_args() 177 | if args.mode == "train": 178 | train(BATCH_SIZE=args.batch_size) 179 | elif args.mode == "generate": 180 | generate(BATCH_SIZE=args.batch_size, nice=args.nice) 181 | -------------------------------------------------------------------------------- /ch04/example_gan_cifar10.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib as mpl 5 | import keras.backend as K 6 | from keras.layers import Reshape, Flatten, LeakyReLU, Activation, Dense, BatchNormalization, SpatialDropout2D 7 | from keras.layers.convolutional import Conv2D, UpSampling2D, MaxPooling2D, AveragePooling2D 8 | from keras.regularizers import L1L2 9 | from keras.models import Sequential, Model 10 | from keras.optimizers import Adam 11 | from keras.callbacks import TensorBoard 12 | from keras.datasets import cifar10 13 | from keras_adversarial.image_grid_callback import ImageGridCallback 14 | from keras_adversarial import AdversarialModel, simple_gan, gan_targets, fix_names 15 | from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling 16 | from image_utils import dim_ordering_unfix, dim_ordering_shape 17 | 18 | 19 | # This line allows mpl to run with no DISPLAY defined 20 | mpl.use("Agg") 21 | 22 | 23 | def model_generator(): 24 | model = Sequential() 25 | nch = 256 26 | reg = lambda: L1L2(l1=1e-7, l2=1e-7) 27 | h = 5 28 | model.add(Dense(nch * 4 * 4, input_dim=100, kernel_regularizer=reg())) 29 | model.add(BatchNormalization()) 30 | model.add(Reshape(dim_ordering_shape((nch, 4, 4)))) 31 | model.add(Conv2D(int(nch / 2), (h, h), padding="same", kernel_regularizer=reg())) 32 | model.add(BatchNormalization()) 33 | model.add(LeakyReLU(0.2)) 34 | model.add(UpSampling2D(size=(2, 2))) 35 | model.add(Conv2D(int(nch / 2), (h, h), padding="same", kernel_regularizer=reg())) 36 | model.add(BatchNormalization()) 37 | model.add(LeakyReLU(0.2)) 38 | model.add(UpSampling2D(size=(2, 2))) 39 | model.add(Conv2D(int(nch / 4), (h, h), padding="same", kernel_regularizer=reg())) 40 | model.add(BatchNormalization()) 41 | model.add(LeakyReLU(0.2)) 42 | model.add(UpSampling2D(size=(2, 2))) 43 | model.add(Conv2D(3, (h, h), padding="same", kernel_regularizer=reg())) 44 | model.add(Activation("sigmoid")) 45 | return model 46 | 47 | 48 | def model_discriminator(): 49 | nch = 256 50 | h = 5 51 | reg = lambda: L1L2(l1=1e-7, l2=1e-7) 52 | 53 | c1 = Conv2D(int(nch / 4), 54 | (h, h), 55 | padding="same", 56 | kernel_regularizer=reg(), 57 | input_shape=dim_ordering_shape((3, 32, 32))) 58 | c2 = Conv2D(int(nch / 2), 59 | (h, h), 60 | padding="same", 61 | kernel_regularizer=reg()) 62 | c3 = Conv2D(nch, 63 | (h, h), 64 | padding="same", 65 | kernel_regularizer=reg()) 66 | c4 = Conv2D(1, 67 | (h, h), 68 | padding="same", 69 | kernel_regularizer=reg()) 70 | 71 | def m(dropout): 72 | model = Sequential() 73 | model.add(c1) 74 | model.add(SpatialDropout2D(dropout)) 75 | model.add(MaxPooling2D(pool_size=(2, 2))) 76 | model.add(LeakyReLU(0.2)) 77 | model.add(c2) 78 | model.add(SpatialDropout2D(dropout)) 79 | model.add(MaxPooling2D(pool_size=(2, 2))) 80 | model.add(LeakyReLU(0.2)) 81 | model.add(c3) 82 | model.add(SpatialDropout2D(dropout)) 83 | model.add(MaxPooling2D(pool_size=(2, 2))) 84 | model.add(LeakyReLU(0.2)) 85 | model.add(c4) 86 | model.add(AveragePooling2D(pool_size=(4, 4), padding="valid")) 87 | model.add(Flatten()) 88 | model.add(Activation("sigmoid")) 89 | return model 90 | return m 91 | 92 | 93 | def cifar10_process(x): 94 | x = x.astype(np.float32) / 255.0 95 | return x 96 | 97 | 98 | def cifar10_data(): 99 | (xtrain, ytrain), (xtest, ytest) = cifar10.load_data() 100 | return cifar10_process(xtrain), cifar10_process(xtest) 101 | 102 | 103 | def example_gan(adversarial_optimizer, path, opt_g, opt_d, nb_epoch, 104 | generator, discriminator, latent_dim, 105 | targets=gan_targets, loss="binary_crossentropy"): 106 | csvpath = os.path.join(path, "history.csv") 107 | if os.path.exists(csvpath): 108 | print("Already exists: {}".format(csvpath)) 109 | return 110 | 111 | print("Training: {}".format(csvpath)) 112 | # gan (x - > yfake, yreal), z is gaussian generated on GPU 113 | # can also experiment with uniform_latent_sampling 114 | d_g = discriminator(0) 115 | d_d = discriminator(0.5) 116 | generator.summary() 117 | d_d.summary() 118 | gan_g = simple_gan(generator, d_g, None) 119 | gan_d = simple_gan(generator, d_d, None) 120 | x = gan_g.inputs[1] 121 | z = normal_latent_sampling((latent_dim,))(x) 122 | # estiminate z from inputs 123 | gan_g = Model([x], fix_names(gan_g([z, x]), gan_g.output_names)) 124 | gan_d = Model([x], fix_names(gan_d([z, x]), gan_d.output_names)) 125 | 126 | # build adversarial model 127 | model = AdversarialModel(player_models=[gan_g, gan_d], 128 | player_params=[generator.trainable_weights, 129 | d_d.trainable_weights], 130 | player_names=["generator", "discriminator"]) 131 | model.adversarial_compile(adversarial_optimizer=adversarial_optimizer, 132 | player_optimizers=[opt_g, opt_d], 133 | loss=loss) 134 | 135 | # create callback to generate images 136 | zsamples = np.random.normal(size=(10 * 10, latent_dim)) 137 | 138 | def generator_sampler(): 139 | xpred = generator.predict(zsamples) 140 | xpred = dim_ordering_unfix(xpred.transpose((0, 2, 3, 1))) 141 | return xpred.reshape((10, 10) + xpred.shape[1:]) 142 | 143 | generator_cb = ImageGridCallback( 144 | os.path.join(path, "epoch-{:03d}.png"), 145 | generator_sampler, cmap=None) 146 | 147 | callbacks = [generator_cb] 148 | if K.backend() == "tensorflow": 149 | callbacks.append( 150 | TensorBoard(log_dir=os.path.join(path, "logs"), 151 | histogram_freq=0, write_graph=True, write_images=True)) 152 | 153 | # train model 154 | xtrain, xtest = cifar10_data() 155 | y = targets(xtrain.shape[0]) 156 | ytest = targets(xtest.shape[0]) 157 | history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), 158 | callbacks=callbacks, epochs=nb_epoch, 159 | batch_size=32) 160 | 161 | # save history to CSV 162 | df = pd.DataFrame(history.history) 163 | df.to_csv(csvpath) 164 | 165 | # save models 166 | generator.save(os.path.join(path, "generator.h5")) 167 | d_d.save(os.path.join(path, "discriminator.h5")) 168 | 169 | 170 | def main(): 171 | # z \in R^100 172 | latent_dim = 100 173 | # x \in R^{28x28} 174 | # generator (z -> x) 175 | generator = model_generator() 176 | # discriminator (x -> y) 177 | discriminator = model_discriminator() 178 | if not os.path.exists("output/gan-cifar10"): 179 | os.mkdir("output/gan-cifar10") 180 | example_gan(AdversarialOptimizerSimultaneous(), "output/gan-cifar10", 181 | opt_g=Adam(1e-4, decay=1e-5), 182 | opt_d=Adam(1e-3, decay=1e-5), 183 | nb_epoch=100, generator=generator, discriminator=discriminator, 184 | latent_dim=latent_dim) 185 | 186 | 187 | if __name__ == "__main__": 188 | main() 189 | -------------------------------------------------------------------------------- /ch04/example_gan_convolutional.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib as mpl 5 | import keras.backend as K 6 | from keras.layers import Flatten, Dropout, LeakyReLU, Input, Activation, Dense, BatchNormalization 7 | from keras.layers.convolutional import UpSampling2D, Conv2D 8 | from keras.models import Model 9 | from keras.optimizers import Adam 10 | from keras.callbacks import TensorBoard 11 | from keras.datasets import mnist 12 | from keras_adversarial.image_grid_callback import ImageGridCallback 13 | from keras_adversarial import AdversarialModel, simple_gan, gan_targets 14 | from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling 15 | from image_utils import dim_ordering_fix, dim_ordering_input, dim_ordering_reshape, dim_ordering_unfix 16 | 17 | 18 | # This line allows mpl to run with no DISPLAY defined 19 | mpl.use("Agg") 20 | 21 | 22 | def model_generator(): 23 | nch = 256 24 | g_input = Input(shape=[100]) 25 | H = Dense(nch * 14 * 14)(g_input) 26 | H = BatchNormalization()(H) 27 | H = Activation("relu")(H) 28 | H = dim_ordering_reshape(nch, 14)(H) 29 | H = UpSampling2D(size=(2, 2))(H) 30 | H = Conv2D(int(nch / 2), (3, 3), padding="same")(H) 31 | H = BatchNormalization()(H) 32 | H = Activation("relu")(H) 33 | H = Conv2D(int(nch / 4), (3, 3), padding="same")(H) 34 | H = BatchNormalization()(H) 35 | H = Activation("relu")(H) 36 | H = Conv2D(1, (1, 1), padding="same")(H) 37 | g_V = Activation("sigmoid")(H) 38 | return Model(g_input, g_V) 39 | 40 | 41 | def model_discriminator(input_shape=(1, 28, 28), dropout_rate=0.5): 42 | d_input = dim_ordering_input(input_shape, name="input_x") 43 | nch = 512 44 | # nch = 128 45 | H = Conv2D(int(nch / 2), (5, 5), 46 | strides=(2, 2), 47 | padding="same", 48 | activation="relu", 49 | )(d_input) 50 | H = LeakyReLU(0.2)(H) 51 | H = Dropout(dropout_rate)(H) 52 | H = Conv2D(nch, (5, 5), 53 | strides=(2, 2), 54 | padding="same", 55 | activation="relu", 56 | )(H) 57 | H = LeakyReLU(0.2)(H) 58 | H = Dropout(dropout_rate)(H) 59 | H = Flatten()(H) 60 | H = Dense(int(nch / 2))(H) 61 | H = LeakyReLU(0.2)(H) 62 | H = Dropout(dropout_rate)(H) 63 | d_V = Dense(1, activation="sigmoid")(H) 64 | return Model(d_input, d_V) 65 | 66 | 67 | def mnist_process(x): 68 | x = x.astype(np.float32) / 255.0 69 | return x 70 | 71 | 72 | def mnist_data(): 73 | (xtrain, ytrain), (xtest, ytest) = mnist.load_data() 74 | return mnist_process(xtrain), mnist_process(xtest) 75 | 76 | 77 | def generator_sampler(latent_dim, generator): 78 | def fun(): 79 | zsamples = np.random.normal(size=(10 * 10, latent_dim)) 80 | gen = dim_ordering_unfix(generator.predict(zsamples)) 81 | return gen.reshape((10, 10, 28, 28)) 82 | 83 | return fun 84 | 85 | 86 | if __name__ == "__main__": 87 | # z \in R^100 88 | latent_dim = 100 89 | # x \in R^{28x28} 90 | input_shape = (1, 28, 28) 91 | 92 | # generator (z -> x) 93 | generator = model_generator() 94 | # discriminator (x -> y) 95 | discriminator = model_discriminator(input_shape=input_shape) 96 | # gan (x - > yfake, yreal), z generated on GPU 97 | gan = simple_gan(generator, discriminator, 98 | normal_latent_sampling((latent_dim,))) 99 | 100 | # print summary of models 101 | generator.summary() 102 | discriminator.summary() 103 | gan.summary() 104 | 105 | # build adversarial model 106 | model = AdversarialModel(base_model=gan, 107 | player_params=[generator.trainable_weights, 108 | discriminator.trainable_weights], 109 | player_names=["generator", "discriminator"]) 110 | model.adversarial_compile(adversarial_optimizer=AdversarialOptimizerSimultaneous(), 111 | player_optimizers=[Adam(1e-4, decay=1e-4), 112 | Adam(1e-3, decay=1e-4)], 113 | loss="binary_crossentropy") 114 | 115 | # train model 116 | generator_cb = ImageGridCallback("output/gan_convolutional/epoch-{:03d}.png", 117 | generator_sampler(latent_dim, generator)) 118 | callbacks = [generator_cb] 119 | if K.backend() == "tensorflow": 120 | callbacks.append( 121 | TensorBoard(log_dir=os.path.join("output/gan_convolutional/", "logs/"), 122 | histogram_freq=0, write_graph=True, write_images=True)) 123 | 124 | xtrain, xtest = mnist_data() 125 | xtrain = dim_ordering_fix(xtrain.reshape((-1, 1, 28, 28))) 126 | xtest = dim_ordering_fix(xtest.reshape((-1, 1, 28, 28))) 127 | y = gan_targets(xtrain.shape[0]) 128 | ytest = gan_targets(xtest.shape[0]) 129 | history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), 130 | callbacks=[generator_cb], epochs=100, 131 | batch_size=32) 132 | df = pd.DataFrame(history.history) 133 | df.to_csv("output/gan_convolutional/history.csv") 134 | 135 | generator.save("output/gan_convolutional/generator.h5") 136 | discriminator.save("output/gan_convolutional/discriminator.h5") 137 | -------------------------------------------------------------------------------- /ch04/image_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras.backend as K 3 | from keras.layers import Input, Reshape 4 | 5 | 6 | def dim_ordering_fix(x): 7 | if K.image_dim_ordering() == 'th': 8 | return x 9 | else: 10 | return np.transpose(x, (0, 2, 3, 1)) 11 | 12 | 13 | def dim_ordering_unfix(x): 14 | if K.image_dim_ordering() == 'th': 15 | return x 16 | else: 17 | return np.transpose(x, (0, 3, 1, 2)) 18 | 19 | 20 | def dim_ordering_shape(input_shape): 21 | if K.image_dim_ordering() == 'th': 22 | return input_shape 23 | else: 24 | return (input_shape[1], input_shape[2], input_shape[0]) 25 | 26 | 27 | def dim_ordering_input(input_shape, name): 28 | if K.image_dim_ordering() == 'th': 29 | return Input(input_shape, name=name) 30 | else: 31 | return Input((input_shape[1], input_shape[2], input_shape[0]), name=name) 32 | 33 | 34 | def dim_ordering_reshape(k, w, **kwargs): 35 | if K.image_dim_ordering() == 'th': 36 | return Reshape((k, w, w), **kwargs) 37 | else: 38 | return Reshape((w, w, k), **kwargs) 39 | 40 | 41 | def channel_axis(): 42 | if K.image_dim_ordering() == 'th': 43 | return 1 44 | else: 45 | return 3 -------------------------------------------------------------------------------- /ch04/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.2 2 | tensorflow==1.8.0 3 | h5py==2.7.1 4 | matplotlib==2.1.1 5 | PIllow==5.1.0 6 | pandas==0.22.0 7 | -------------------------------------------------------------------------------- /ch04/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.2 2 | tensorflow-gpu==1.8.0 3 | h5py==2.7.1 4 | matplotlib==2.1.1 5 | PIllow==5.1.0 6 | pandas==0.22.0 7 | -------------------------------------------------------------------------------- /ch04/requirements_gpu_wavenet.txt: -------------------------------------------------------------------------------- 1 | picklable_itertools~=0.1.1 2 | sacred~=0.6.10 3 | tqdm~=4.8.4 4 | q~=2.6 5 | keras==2.1.2 6 | tensorflow-gpu==1.8.0 7 | h5py==2.7.1 8 | scipy==1.0.0 9 | matplotlib==2.1.1 10 | -------------------------------------------------------------------------------- /ch04/wavenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import datetime 4 | import json 5 | import os 6 | import re 7 | import wave 8 | 9 | import keras.backend as K 10 | import numpy as np 11 | import scipy.io.wavfile 12 | import scipy.signal 13 | from keras import layers 14 | from keras import metrics 15 | from keras import objectives 16 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 17 | from keras.engine import Input 18 | from keras.engine import Model 19 | from keras.optimizers import Adam, SGD 20 | from keras.regularizers import l2 21 | from sacred import Experiment 22 | from sacred.commands import print_config 23 | from tqdm import tqdm 24 | from time import gmtime, strftime 25 | from keras.callbacks import TensorBoard 26 | 27 | import dataset 28 | from wavenet_utils import CausalAtrousConvolution1D, categorical_mean_squared_error 29 | 30 | ex = Experiment('wavenet') 31 | 32 | 33 | @ex.config 34 | def config(): 35 | data_dir = 'data' 36 | data_dir_structure = 'flat' # Or 'vctk' for a speakerdir structure 37 | test_factor = 0.1 # For 'vctk' structure, take test_factor amount of sequences for test set. 38 | nb_epoch = 1000 39 | run_dir = None 40 | early_stopping_patience = 20 41 | desired_sample_rate = 4410 42 | batch_size = 16 43 | nb_output_bins = 256 44 | nb_filters = 256 45 | dilation_depth = 9 # 46 | nb_stacks = 1 47 | use_bias = False 48 | use_ulaw = True 49 | res_l2 = 0 50 | final_l2 = 0 51 | fragment_length = 128 + compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks)[0] 52 | fragment_stride = 128 53 | use_skip_connections = True 54 | optimizer = { 55 | 'optimizer': 'sgd', 56 | 'lr': 0.001, 57 | 'momentum': 0.9, 58 | 'decay': 0., 59 | 'nesterov': True, 60 | 'epsilon': None 61 | } 62 | learn_all_outputs = True 63 | random_train_batches = False 64 | randomize_batch_order = True # Only effective if not using random train batches 65 | train_with_soft_target_stdev = None # float to make targets a gaussian with stdev. 66 | 67 | # The temporal-first outputs are computed from zero-padding. Setting below to True ignores these inputs: 68 | train_only_in_receptive_field = True 69 | 70 | keras_verbose = 1 71 | debug = False 72 | 73 | 74 | @ex.named_config 75 | def book(): 76 | desired_sample_rate = 4000 77 | data_dir = 'data_book' 78 | dilation_depth = 8 79 | nb_stacks = 1 80 | fragment_length = 2 ** 10 81 | nb_filters = 256 82 | batch_size = 16 83 | fragment_stride = compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks)[0] 84 | 85 | 86 | @ex.named_config 87 | def small(): 88 | desired_sample_rate = 4410 89 | nb_filters = 16 90 | dilation_depth = 8 91 | nb_stacks = 1 92 | fragment_length = 128 + (compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks)[0]) 93 | fragment_stride = int(desired_sample_rate / 10) 94 | 95 | 96 | @ex.named_config 97 | def soft_targets(): 98 | train_with_soft_target_stdev = 0.5 99 | # TODO: smooth decay of stdev per epoch. 100 | 101 | 102 | @ex.named_config 103 | def vctkdata(): 104 | assert os.path.isdir(os.path.join('vctk', 'VCTK-Corpus')), "Please download vctk by running vctk/download_vctk.sh." 105 | desired_sample_rate = 4000 106 | data_dir = 'vctk/VCTK-Corpus/wav48' 107 | data_dir_structure = 'vctk' 108 | test_factor = 0.01 109 | 110 | 111 | @ex.named_config 112 | def vctkmod(desired_sample_rate): 113 | nb_filters = 32 114 | dilation_depth = 7 115 | nb_stacks = 4 116 | fragment_length = 1 + (compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks)[0]) 117 | fragment_stride = int(desired_sample_rate / 10) 118 | random_train_batches = True 119 | 120 | 121 | @ex.named_config 122 | def length32(desired_sample_rate, dilation_depth, nb_stacks): 123 | fragment_length = 32 + (compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks)[0]) 124 | 125 | 126 | @ex.named_config 127 | def adam(): 128 | optimizer = { 129 | 'optimizer': 'adam', 130 | 'lr': 0.001, 131 | 'decay': 0., 132 | 'epsilon': 1e-8 133 | } 134 | 135 | 136 | @ex.named_config 137 | def adam2(): 138 | optimizer = { 139 | 'optimizer': 'adam', 140 | 'lr': 0.01, 141 | 'decay': 0., 142 | 'epsilon': 1e-10 143 | } 144 | 145 | 146 | @ex.config 147 | def predict_config(): 148 | predict_seconds = 1 149 | sample_argmax = False 150 | sample_temperature = 1.0 # Temperature for sampling. > 1.0 for more exploring, < 1.0 for conservative samples. 151 | predict_use_softmax_as_input = False # Uses the softmax rather than the argmax as in input for the next step. 152 | predict_initial_input = None 153 | 154 | 155 | @ex.named_config 156 | def batch_run(): 157 | keras_verbose = 2 158 | 159 | 160 | def skip_out_of_receptive_field(func): 161 | # TODO: consider using keras masking for this? 162 | receptive_field, _ = compute_receptive_field() 163 | 164 | def wrapper(y_true, y_pred): 165 | y_true = y_true[:, receptive_field - 1:, :] 166 | y_pred = y_pred[:, receptive_field - 1:, :] 167 | return func(y_true, y_pred) 168 | 169 | wrapper.__name__ = func.__name__ 170 | 171 | return wrapper 172 | 173 | 174 | def print_t(tensor, label): 175 | tensor.name = label 176 | # tensor = theano.printing.Print(tensor.name, attrs=('__str__', 'shape'))(tensor) 177 | return tensor 178 | 179 | 180 | @ex.capture 181 | def make_soft(y_true, fragment_length, nb_output_bins, train_with_soft_target_stdev, with_prints=False): 182 | receptive_field, _ = compute_receptive_field() 183 | n_outputs = fragment_length - receptive_field + 1 184 | 185 | # Make a gaussian kernel. 186 | kernel_v = scipy.signal.gaussian(9, std=train_with_soft_target_stdev) 187 | print(kernel_v) 188 | kernel_v = np.reshape(kernel_v, [1, 1, -1, 1]) 189 | kernel = K.variable(kernel_v) 190 | 191 | if with_prints: 192 | y_true = print_t(y_true, 'y_true initial') 193 | 194 | # y_true: [batch, timesteps, input_dim] 195 | y_true = K.reshape(y_true, (-1, 1, nb_output_bins, 1)) # Same filter for all output; combine with batch. 196 | # y_true: [batch*timesteps, n_channels=1, input_dim, dummy] 197 | y_true = K.conv2d(y_true, kernel, padding='same') 198 | y_true = K.reshape(y_true, (-1, n_outputs, nb_output_bins)) # Same filter for all output; combine with batch. 199 | # y_true: [batch, timesteps, input_dim] 200 | y_true /= K.sum(y_true, axis=-1, keepdims=True) 201 | 202 | if with_prints: 203 | y_true = print_t(y_true, 'y_true after') 204 | return y_true 205 | 206 | 207 | def make_targets_soft(func): 208 | """Turns one-hot into gaussian distributed.""" 209 | 210 | def wrapper(y_true, y_pred): 211 | y_true = make_soft(y_true) 212 | y_pred = y_pred 213 | return func(y_true, y_pred) 214 | 215 | wrapper.__name__ = func.__name__ 216 | 217 | return wrapper 218 | 219 | 220 | @ex.capture() 221 | def build_model(fragment_length, nb_filters, nb_output_bins, dilation_depth, nb_stacks, use_skip_connections, 222 | learn_all_outputs, _log, desired_sample_rate, use_bias, res_l2, final_l2): 223 | def residual_block(x): 224 | original_x = x 225 | # TODO: initalization, regularization? 226 | # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. 227 | tanh_out = CausalAtrousConvolution1D(nb_filters, 2, dilation_rate=2 ** i, padding='valid', causal=True, 228 | use_bias=use_bias, 229 | name='dilated_conv_%d_tanh_s%d' % (2 ** i, s), activation='tanh', 230 | kernel_regularizer=l2(res_l2))(x) 231 | sigm_out = CausalAtrousConvolution1D(nb_filters, 2, dilation_rate=2 ** i, padding='valid', causal=True, 232 | use_bias=use_bias, 233 | name='dilated_conv_%d_sigm_s%d' % (2 ** i, s), activation='sigmoid', 234 | kernel_regularizer=l2(res_l2))(x) 235 | x = layers.Multiply(name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) 236 | 237 | res_x = layers.Convolution1D(nb_filters, 1, padding='same', use_bias=use_bias, 238 | kernel_regularizer=l2(res_l2))(x) 239 | skip_x = layers.Convolution1D(nb_filters, 1, padding='same', use_bias=use_bias, 240 | kernel_regularizer=l2(res_l2))(x) 241 | res_x = layers.Add()([original_x, res_x]) 242 | return res_x, skip_x 243 | 244 | input = Input(shape=(fragment_length, nb_output_bins), name='input_part') 245 | out = input 246 | skip_connections = [] 247 | out = CausalAtrousConvolution1D(nb_filters, 2, 248 | dilation_rate=1, 249 | padding='valid', 250 | causal=True, 251 | name='initial_causal_conv' 252 | )(out) 253 | for s in range(nb_stacks): 254 | for i in range(0, dilation_depth + 1): 255 | out, skip_out = residual_block(out) 256 | skip_connections.append(skip_out) 257 | 258 | if use_skip_connections: 259 | out = layers.Add()(skip_connections) 260 | out = layers.Activation('relu')(out) 261 | out = layers.Convolution1D(nb_output_bins, 1, padding='same', 262 | kernel_regularizer=l2(final_l2))(out) 263 | out = layers.Activation('relu')(out) 264 | out = layers.Convolution1D(nb_output_bins, 1, padding='same')(out) 265 | 266 | if not learn_all_outputs: 267 | raise DeprecationWarning('Learning on just all outputs is wasteful, now learning only inside receptive field.') 268 | out = layers.Lambda(lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1],))( 269 | out) # Based on gif in deepmind blog: take last output? 270 | 271 | out = layers.Activation('softmax', name="output_softmax")(out) 272 | model = Model(input, out) 273 | 274 | receptive_field, receptive_field_ms = compute_receptive_field() 275 | 276 | _log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms))) 277 | return model 278 | 279 | 280 | @ex.capture 281 | def compute_receptive_field(desired_sample_rate, dilation_depth, nb_stacks): 282 | return compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks) 283 | 284 | 285 | def compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks): 286 | receptive_field = nb_stacks * (2 ** dilation_depth * 2) - (nb_stacks - 1) 287 | receptive_field_ms = (receptive_field * 1000) / desired_sample_rate 288 | return receptive_field, receptive_field_ms 289 | 290 | 291 | @ex.capture(prefix='optimizer') 292 | def make_optimizer(optimizer, lr, momentum, decay, nesterov, epsilon): 293 | if optimizer == 'sgd': 294 | optim = SGD(lr, momentum, decay, nesterov) 295 | elif optimizer == 'adam': 296 | optim = Adam(lr=lr, decay=decay, epsilon=epsilon) 297 | else: 298 | raise ValueError('Invalid config for optimizer.optimizer: ' + optimizer) 299 | return optim 300 | 301 | 302 | @ex.command 303 | def predict(desired_sample_rate, fragment_length, _log, seed, _seed, _config, predict_seconds, data_dir, batch_size, 304 | fragment_stride, nb_output_bins, learn_all_outputs, run_dir, predict_use_softmax_as_input, use_ulaw, 305 | predict_initial_input, 306 | **kwargs): 307 | fragment_length = compute_receptive_field()[0] 308 | _config['fragment_length'] = fragment_length 309 | 310 | checkpoint_dir = os.path.join(run_dir, 'checkpoints') 311 | last_checkpoint = sorted(os.listdir(checkpoint_dir))[-1] 312 | epoch = int(re.match(r'checkpoint\.(\d+?)-.*', last_checkpoint).group(1)) 313 | _log.info('Using checkpoint from epoch: %s' % epoch) 314 | 315 | sample_dir = os.path.join(run_dir, 'samples') 316 | if not os.path.exists(sample_dir): 317 | os.mkdir(sample_dir) 318 | 319 | sample_name = make_sample_name(epoch) 320 | sample_filename = os.path.join(sample_dir, sample_name) 321 | 322 | _log.info('Saving to "%s"' % sample_filename) 323 | 324 | sample_stream = make_sample_stream(desired_sample_rate, sample_filename) 325 | 326 | model = build_model() 327 | model.load_weights(os.path.join(checkpoint_dir, last_checkpoint)) 328 | model.summary() 329 | 330 | if predict_initial_input is None: 331 | outputs = list(dataset.one_hot(np.zeros(fragment_length) + nb_output_bins / 2)) 332 | elif predict_initial_input != '': 333 | _log.info('Taking first %d (%.2fs) from \'%s\' as initial input.' % ( 334 | fragment_length, fragment_length / desired_sample_rate, predict_initial_input)) 335 | wav = dataset.process_wav(desired_sample_rate, predict_initial_input, use_ulaw) 336 | outputs = list(dataset.one_hot(wav[0:fragment_length])) 337 | else: 338 | _log.info('Taking sample from test dataset as initial input.') 339 | data_generators, _ = get_generators() 340 | outputs = list(data_generators['test'].next()[0][-1]) 341 | 342 | # write_samples(sample_stream, outputs) 343 | warned_repetition = False 344 | for i in tqdm(range(int(desired_sample_rate * predict_seconds))): 345 | if not warned_repetition: 346 | if np.argmax(outputs[-1]) == np.argmax(outputs[-2]) and np.argmax(outputs[-2]) == np.argmax(outputs[-3]): 347 | warned_repetition = True 348 | _log.warning('Last three predicted outputs where %d' % np.argmax(outputs[-1])) 349 | else: 350 | warned_repetition = False 351 | prediction_seed = np.expand_dims(np.array(outputs[i:i + fragment_length]), 0) 352 | output = model.predict(prediction_seed) 353 | output_dist = output[0][-1] 354 | output_val = draw_sample(output_dist) 355 | if predict_use_softmax_as_input: 356 | outputs.append(output_dist) 357 | else: 358 | outputs.append(output_val) 359 | write_samples(sample_stream, [output_val]) 360 | 361 | sample_stream.close() 362 | 363 | _log.info("Done!") 364 | 365 | 366 | @ex.capture 367 | def make_sample_name(epoch, predict_seconds, predict_use_softmax_as_input, sample_argmax, sample_temperature, seed): 368 | sample_str = '' 369 | if predict_use_softmax_as_input: 370 | sample_str += '_soft-in' 371 | if sample_argmax: 372 | sample_str += '_argmax' 373 | else: 374 | sample_str += '_sample' 375 | if sample_temperature: 376 | sample_str += '-temp-%s' % sample_temperature 377 | sample_name = 'sample_epoch-%05d_%02ds_%s_seed-%d.wav' % (epoch, int(predict_seconds), sample_str, seed) 378 | return sample_name 379 | 380 | 381 | @ex.capture 382 | def write_samples(sample_file, out_val, use_ulaw): 383 | s = np.argmax(out_val, axis=-1).astype('uint8') 384 | # print out_val, 385 | if use_ulaw: 386 | s = dataset.ulaw2lin(s) 387 | # print s, 388 | s = bytearray(list(s)) 389 | # print s[0] 390 | sample_file.writeframes(s) 391 | sample_file._file.flush() 392 | 393 | 394 | @ex.capture 395 | def get_generators(batch_size, data_dir, desired_sample_rate, fragment_length, fragment_stride, learn_all_outputs, 396 | nb_output_bins, use_ulaw, test_factor, data_dir_structure, randomize_batch_order, _rnd, 397 | random_train_batches): 398 | if data_dir_structure == 'flat': 399 | return dataset.generators(data_dir, desired_sample_rate, fragment_length, batch_size, 400 | fragment_stride, nb_output_bins, learn_all_outputs, use_ulaw, randomize_batch_order, 401 | _rnd, random_train_batches) 402 | 403 | elif data_dir_structure == 'vctk': 404 | return dataset.generators_vctk(data_dir, desired_sample_rate, fragment_length, batch_size, 405 | fragment_stride, nb_output_bins, learn_all_outputs, use_ulaw, test_factor, 406 | randomize_batch_order, _rnd, random_train_batches) 407 | else: 408 | raise ValueError('data_dir_structure must be "flat" or "vctk", is %s' % data_dir_structure) 409 | 410 | 411 | @ex.command 412 | def test_make_soft(_log, train_with_soft_target_stdev, _config): 413 | if train_with_soft_target_stdev is None: 414 | _config['train_with_soft_target_stdev'] = 1 415 | y_true = K.reshape(K.eye(512)[:129, :256], (2, 129, 256)) 416 | y_soft = make_soft(y_true) 417 | f = K.function([], y_soft) 418 | _log.info('Output of soft:') 419 | f1 = f([]) 420 | 421 | _log.info(f1[0, 0]) 422 | _log.info(f1[-1, -1]) 423 | 424 | 425 | @ex.command 426 | def test_preprocess(desired_sample_rate, batch_size, use_ulaw): 427 | sample_dir = os.path.join('preprocess_test') 428 | if not os.path.exists(sample_dir): 429 | os.mkdir(sample_dir) 430 | 431 | ulaw_str = '_ulaw' if use_ulaw else '' 432 | sample_filename = os.path.join(sample_dir, 'test1%s.wav' % ulaw_str) 433 | sample_stream = make_sample_stream(desired_sample_rate, sample_filename) 434 | 435 | data_generators, _ = get_generators() 436 | outputs = data_generators['test'].next()[0][1].astype('uint8') 437 | 438 | write_samples(sample_stream, outputs) 439 | scipy.io.wavfile.write(os.path.join(sample_dir, 'test2%s.wav' % ulaw_str), desired_sample_rate, 440 | np.argmax(outputs, axis=-1).astype('uint8')) 441 | 442 | 443 | def make_sample_stream(desired_sample_rate, sample_filename): 444 | sample_file = wave.open(sample_filename, mode='w') 445 | sample_file.setnchannels(1) 446 | sample_file.setframerate(desired_sample_rate) 447 | sample_file.setsampwidth(1) 448 | return sample_file 449 | 450 | 451 | def softmax(x, temp, mod=np): 452 | x = mod.log(x) / temp 453 | e_x = mod.exp(x - mod.max(x, axis=-1)) 454 | return e_x / mod.sum(e_x, axis=-1) 455 | 456 | 457 | @ex.capture 458 | def draw_sample(output_dist, sample_temperature, sample_argmax, _rnd): 459 | if sample_argmax: 460 | output_dist = np.eye(256)[np.argmax(output_dist, axis=-1)] 461 | else: 462 | if sample_temperature is not None: 463 | output_dist = softmax(output_dist, sample_temperature) 464 | output_dist = output_dist / np.sum(output_dist + 1e-7) 465 | output_dist = _rnd.multinomial(1, output_dist) 466 | return output_dist 467 | 468 | 469 | @ex.automain 470 | def main(run_dir, data_dir, nb_epoch, early_stopping_patience, desired_sample_rate, fragment_length, batch_size, 471 | fragment_stride, nb_output_bins, keras_verbose, _log, seed, _config, debug, learn_all_outputs, 472 | train_only_in_receptive_field, _run, use_ulaw, train_with_soft_target_stdev): 473 | if run_dir is None: 474 | if not os.path.exists("models"): 475 | os.mkdir("models") 476 | run_dir = os.path.join('models', datetime.datetime.now().strftime('run_%Y%m%d_%H%M%S')) 477 | _config['run_dir'] = run_dir 478 | 479 | print_config(_run) 480 | 481 | _log.info('Running with seed %d' % seed) 482 | 483 | if not debug: 484 | if os.path.exists(run_dir): 485 | raise EnvironmentError('Run with seed %d already exists' % seed) 486 | os.mkdir(run_dir) 487 | checkpoint_dir = os.path.join(run_dir, 'checkpoints') 488 | json.dump(_config, open(os.path.join(run_dir, 'config.json'), 'w')) 489 | 490 | _log.info('Loading data...') 491 | data_generators, nb_examples = get_generators() 492 | 493 | _log.info('Building model...') 494 | model = build_model(fragment_length) 495 | _log.info(model.summary()) 496 | 497 | optim = make_optimizer() 498 | _log.info('Compiling Model...') 499 | 500 | loss = objectives.categorical_crossentropy 501 | all_metrics = [ 502 | metrics.categorical_accuracy, 503 | categorical_mean_squared_error 504 | ] 505 | if train_with_soft_target_stdev: 506 | loss = make_targets_soft(loss) 507 | if train_only_in_receptive_field: 508 | loss = skip_out_of_receptive_field(loss) 509 | all_metrics = [skip_out_of_receptive_field(m) for m in all_metrics] 510 | 511 | model.compile(optimizer=optim, loss=loss, metrics=all_metrics) 512 | # TODO: Consider gradient weighting making last outputs more important. 513 | 514 | tictoc = strftime("%a_%d_%b_%Y_%H_%M_%S", gmtime()) 515 | directory_name = tictoc 516 | log_dir = 'wavenet_' + directory_name 517 | os.mkdir(log_dir) 518 | tensorboard = TensorBoard(log_dir=log_dir) 519 | 520 | callbacks = [ 521 | tensorboard, 522 | ReduceLROnPlateau(patience=early_stopping_patience / 2, cooldown=early_stopping_patience / 4, verbose=1), 523 | EarlyStopping(patience=early_stopping_patience, verbose=1), 524 | ] 525 | if not debug: 526 | callbacks.extend([ 527 | ModelCheckpoint(os.path.join(checkpoint_dir, 'checkpoint.{epoch:05d}-{val_loss:.3f}.hdf5'), 528 | save_best_only=True), 529 | CSVLogger(os.path.join(run_dir, 'history.csv')), 530 | ]) 531 | 532 | if not debug: 533 | os.mkdir(checkpoint_dir) 534 | _log.info('Starting Training...') 535 | 536 | print("nb_examples['train'] {0}".format(nb_examples['train'])) 537 | print("nb_examples['test'] {0}".format(nb_examples['test'])) 538 | 539 | model.fit_generator(data_generators['train'], 540 | steps_per_epoch=nb_examples['train'] // batch_size, 541 | epochs=nb_epoch, 542 | validation_data=data_generators['test'], 543 | validation_steps=nb_examples['test'] // batch_size, 544 | callbacks=callbacks, 545 | verbose=keras_verbose) 546 | -------------------------------------------------------------------------------- /ch04/wavenet_utils.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.layers.convolutional import Conv1D 3 | from keras.utils.conv_utils import conv_output_length 4 | import tensorflow as tf 5 | 6 | 7 | def asymmetric_temporal_padding(x, left_pad=1, right_pad=1): 8 | '''Pad the middle dimension of a 3D tensor 9 | with "left_pad" zeros left and "right_pad" right. 10 | ''' 11 | pattern = [[0, 0], [left_pad, right_pad], [0, 0]] 12 | return tf.pad(x, pattern) 13 | 14 | 15 | def categorical_mean_squared_error(y_true, y_pred): 16 | """MSE for categorical variables.""" 17 | return K.mean(K.square(K.argmax(y_true, axis=-1) - 18 | K.argmax(y_pred, axis=-1))) 19 | 20 | 21 | class CausalAtrousConvolution1D(Conv1D): 22 | def __init__(self, filters, kernel_size, init='glorot_uniform', activation=None, 23 | padding='valid', strides=1, dilation_rate=1, bias_regularizer=None, 24 | activity_regularizer=None, kernel_constraint=None, bias_constraint=None, use_bias=True, causal=False, **kwargs): 25 | super(CausalAtrousConvolution1D, self).__init__(filters, 26 | kernel_size=kernel_size, 27 | strides=strides, 28 | padding=padding, 29 | dilation_rate=dilation_rate, 30 | activation=activation, 31 | use_bias=use_bias, 32 | kernel_initializer=init, 33 | activity_regularizer=activity_regularizer, 34 | bias_regularizer=bias_regularizer, 35 | kernel_constraint=kernel_constraint, 36 | bias_constraint=bias_constraint, 37 | **kwargs) 38 | 39 | self.causal = causal 40 | if self.causal and padding != 'valid': 41 | raise ValueError("Causal mode dictates border_mode=valid.") 42 | 43 | def compute_output_shape(self, input_shape): 44 | input_length = input_shape[1] 45 | 46 | if self.causal: 47 | input_length += self.dilation_rate[0] * (self.kernel_size[0] - 1) 48 | 49 | length = conv_output_length(input_length, 50 | self.kernel_size[0], 51 | self.padding, 52 | self.strides[0], 53 | dilation=self.dilation_rate[0]) 54 | 55 | return (input_shape[0], length, self.filters) 56 | 57 | def call(self, x): 58 | if self.causal: 59 | x = asymmetric_temporal_padding(x, self.dilation_rate[0] * (self.kernel_size[0] - 1), 0) 60 | return super(CausalAtrousConvolution1D, self).call(x) 61 | -------------------------------------------------------------------------------- /ch05/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch05/data/.gitkeep -------------------------------------------------------------------------------- /ch05/finetune_glove_embeddings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | import os 4 | import collections 5 | 6 | import nltk 7 | import numpy as np 8 | from keras.callbacks import TensorBoard 9 | from keras.layers import Dense, Dropout, Conv1D, Embedding, GlobalMaxPooling1D 10 | from keras.models import Sequential 11 | from keras.preprocessing.sequence import pad_sequences 12 | from keras.utils import np_utils 13 | from sklearn.model_selection import train_test_split 14 | import codecs 15 | 16 | np.random.seed(42) 17 | 18 | INPUT_FILE = os.path.join(os.path.dirname(__file__), 19 | "data/umich-sentiment-train.txt") 20 | GLOVE_MODEL = os.path.join(os.path.dirname(__file__), 21 | "./data/glove.6B.300d.txt") 22 | LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") 23 | VOCAB_SIZE = 5000 24 | EMBED_SIZE = 300 25 | NUM_FILTERS = 256 26 | NUM_WORDS = 3 27 | BATCH_SIZE = 64 28 | NUM_EPOCHS = 10 29 | 30 | counter = collections.Counter() 31 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 32 | maxlen = 0 33 | for line in fin: 34 | _, sent = line.strip().split("\t") 35 | try: 36 | words = [x.lower() for x in nltk.word_tokenize(sent)] 37 | except LookupError: 38 | print("Englisth tokenize does not downloaded. So download it.") 39 | nltk.download("punkt") 40 | words = [x.lower() for x in nltk.word_tokenize(sent)] 41 | if len(words) > maxlen: 42 | maxlen = len(words) 43 | for word in words: 44 | counter[word] += 1 45 | 46 | word2index = collections.defaultdict(int) 47 | for wid, word in enumerate(counter.most_common(VOCAB_SIZE)): 48 | word2index[word[0]] = wid + 1 49 | vocab_sz = len(word2index) + 1 50 | index2word = {v: k for k, v in word2index.items()} 51 | 52 | xs, ys = [], [] 53 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 54 | for line in fin: 55 | label, sent = line.strip().split("\t") 56 | ys.append(int(label)) 57 | words = [x.lower() for x in nltk.word_tokenize(sent)] 58 | wids = [word2index[word] for word in words] 59 | xs.append(wids) 60 | 61 | X = pad_sequences(xs, maxlen=maxlen) 62 | Y = np_utils.to_categorical(ys) 63 | 64 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 65 | random_state=42) 66 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 67 | 68 | # load GloVe vectors 69 | word2emb = {} 70 | with codecs.open(GLOVE_MODEL, "r", encoding="utf-8") as fglove: 71 | for line in fglove: 72 | cols = line.strip().split() 73 | word = cols[0] 74 | embedding = np.array(cols[1:], dtype="float32") 75 | word2emb[word] = embedding 76 | 77 | embedding_weights = np.zeros((vocab_sz, EMBED_SIZE)) 78 | for word, index in word2index.items(): 79 | try: 80 | embedding_weights[index, :] = word2emb[word] 81 | except KeyError: 82 | pass 83 | 84 | model = Sequential() 85 | model.add(Embedding(vocab_sz, EMBED_SIZE, input_length=maxlen, 86 | weights=[embedding_weights], 87 | trainable=True)) 88 | model.add(Dropout(0.2)) 89 | model.add(Conv1D(filters=NUM_FILTERS, kernel_size=NUM_WORDS, 90 | activation="relu")) 91 | model.add(GlobalMaxPooling1D()) 92 | model.add(Dense(2, activation="softmax")) 93 | 94 | model.compile(optimizer="adam", loss="categorical_crossentropy", 95 | metrics=["accuracy"]) 96 | 97 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 98 | epochs=NUM_EPOCHS, 99 | callbacks=[TensorBoard(LOG_DIR)], 100 | validation_data=(Xtest, Ytest)) 101 | 102 | # evaluate model 103 | score = model.evaluate(Xtest, Ytest, verbose=1) 104 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 105 | -------------------------------------------------------------------------------- /ch05/finetune_word2vec_embeddings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | import os 4 | import collections 5 | 6 | import nltk 7 | import numpy as np 8 | from gensim.models import KeyedVectors 9 | from keras.callbacks import TensorBoard 10 | from keras.layers import Dense, Dropout, Conv1D, Embedding, GlobalMaxPooling1D 11 | from keras.models import Sequential 12 | from keras.preprocessing.sequence import pad_sequences 13 | from keras.utils import np_utils 14 | from sklearn.model_selection import train_test_split 15 | import codecs 16 | 17 | np.random.seed(42) 18 | 19 | INPUT_FILE = os.path.join(os.path.dirname(__file__), 20 | "data/umich-sentiment-train.txt") 21 | WORD2VEC_MODEL = os.path.join(os.path.dirname(__file__), 22 | "data/GoogleNews-vectors-negative300.bin.gz") 23 | LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") 24 | VOCAB_SIZE = 5000 25 | EMBED_SIZE = 300 26 | NUM_FILTERS = 256 27 | NUM_WORDS = 3 28 | BATCH_SIZE = 64 29 | NUM_EPOCHS = 10 30 | 31 | counter = collections.Counter() 32 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 33 | maxlen = 0 34 | for line in fin: 35 | _, sent = line.strip().split("\t") 36 | try: 37 | words = [x.lower() for x in nltk.word_tokenize(sent)] 38 | except LookupError: 39 | print("Englisth tokenize does not downloaded. So download it.") 40 | nltk.download("punkt") 41 | words = [x.lower() for x in nltk.word_tokenize(sent)] 42 | 43 | maxlen = max(maxlen, len(words)) 44 | for word in words: 45 | counter[word] += 1 46 | 47 | word2index = collections.defaultdict(int) 48 | for wid, word in enumerate(counter.most_common(VOCAB_SIZE)): 49 | word2index[word[0]] = wid + 1 50 | vocab_sz = len(word2index) + 1 51 | index2word = {v: k for k, v in word2index.items()} 52 | 53 | xs, ys = [], [] 54 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 55 | for line in fin: 56 | label, sent = line.strip().split("\t") 57 | ys.append(int(label)) 58 | words = [x.lower() for x in nltk.word_tokenize(sent)] 59 | wids = [word2index[word] for word in words] 60 | xs.append(wids) 61 | 62 | X = pad_sequences(xs, maxlen=maxlen) 63 | Y = np_utils.to_categorical(ys) 64 | 65 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 66 | random_state=42) 67 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 68 | 69 | # load word2vec model 70 | word2vec = KeyedVectors.load_word2vec_format(WORD2VEC_MODEL, binary=True) 71 | embedding_weights = np.zeros((vocab_sz, EMBED_SIZE)) 72 | for word, index in word2index.items(): 73 | try: 74 | embedding_weights[index, :] = word2vec[word] 75 | except KeyError: 76 | pass 77 | 78 | model = Sequential() 79 | model.add(Embedding(vocab_sz, EMBED_SIZE, input_length=maxlen, 80 | weights=[embedding_weights], 81 | trainable=True)) 82 | model.add(Dropout(0.2)) 83 | model.add(Conv1D(filters=NUM_FILTERS, kernel_size=NUM_WORDS, 84 | activation="relu")) 85 | model.add(GlobalMaxPooling1D()) 86 | model.add(Dense(2, activation="softmax")) 87 | 88 | model.compile(optimizer="adam", loss="categorical_crossentropy", 89 | metrics=["accuracy"]) 90 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 91 | epochs=NUM_EPOCHS, 92 | callbacks=[TensorBoard(LOG_DIR)], 93 | validation_data=(Xtest, Ytest)) 94 | 95 | # evaluate model 96 | score = model.evaluate(Xtest, Ytest, verbose=1) 97 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 98 | -------------------------------------------------------------------------------- /ch05/keras_cbow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | 4 | from keras.models import Sequential 5 | from keras.layers import Dense, Lambda, Embedding 6 | import keras.backend as K 7 | 8 | vocab_size = 5000 9 | embed_size = 300 10 | window_size = 1 11 | 12 | model = Sequential() 13 | model.add(Embedding(input_dim=vocab_size, output_dim=embed_size, 14 | embeddings_initializer='glorot_uniform', 15 | input_length=window_size*2)) 16 | model.add(Lambda(lambda x: K.mean(x, axis=1), output_shape=(embed_size,))) 17 | model.add(Dense(vocab_size, kernel_initializer='glorot_uniform', 18 | activation='softmax')) 19 | 20 | model.compile(loss='categorical_crossentropy', optimizer="adadelta") 21 | 22 | # get weights 23 | weights = model.layers[0].get_weights()[0] 24 | -------------------------------------------------------------------------------- /ch05/keras_skipgram.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | 4 | from keras.layers import Merge, Dense, Reshape, Embedding 5 | from keras.models import Sequential 6 | 7 | vocab_size = 5000 8 | embed_size = 300 9 | 10 | word_model = Sequential() 11 | word_model.add(Embedding(vocab_size, embed_size, 12 | embeddings_initializer="glorot_uniform", 13 | input_length=1)) 14 | word_model.add(Reshape((embed_size,))) 15 | 16 | context_model = Sequential() 17 | context_model.add(Embedding(vocab_size, embed_size, 18 | embeddings_initializer="glorot_uniform", 19 | input_length=1)) 20 | context_model.add(Reshape((embed_size,))) 21 | 22 | model = Sequential() 23 | model.add(Merge([word_model, context_model], mode="dot", dot_axes=0)) 24 | model.add(Dense(1, kernel_initializer="glorot_uniform", activation="sigmoid")) 25 | 26 | model.compile(loss="mean_squared_error", optimizer="adam") 27 | 28 | 29 | merge_layer = model.layers[0] 30 | word_model = merge_layer.layers[0] 31 | word_embed_layer = word_model.layers[0] 32 | weights = word_embed_layer.get_weights()[0] 33 | -------------------------------------------------------------------------------- /ch05/learn_embedding_from_scratch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import os 4 | import collections 5 | 6 | import nltk 7 | import numpy as np 8 | from keras.callbacks import TensorBoard 9 | from keras.layers import Dense, Dropout, Conv1D, Embedding, GlobalMaxPooling1D 10 | from keras.models import Sequential 11 | from keras.preprocessing.sequence import pad_sequences 12 | from keras.utils import np_utils 13 | from sklearn.model_selection import train_test_split 14 | import codecs 15 | 16 | np.random.seed(42) 17 | 18 | INPUT_FILE = os.path.join(os.path.dirname(__file__), 19 | "data/umich-sentiment-train.txt") 20 | LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") 21 | VOCAB_SIZE = 5000 22 | EMBED_SIZE = 100 23 | NUM_FILTERS = 256 24 | NUM_WORDS = 3 25 | BATCH_SIZE = 64 26 | NUM_EPOCHS = 20 27 | 28 | counter = collections.Counter() 29 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 30 | maxlen = 0 31 | for line in fin: 32 | _, sent = line.strip().split("\t") 33 | try: 34 | words = [x.lower() for x in nltk.word_tokenize(sent)] 35 | except LookupError: 36 | print("Englisth tokenize does not downloaded. So download it.") 37 | nltk.download("punkt") 38 | words = [x.lower() for x in nltk.word_tokenize(sent)] 39 | maxlen = max(maxlen, len(words)) 40 | for word in words: 41 | counter[word] += 1 42 | 43 | word2index = collections.defaultdict(int) 44 | for wid, word in enumerate(counter.most_common(VOCAB_SIZE)): 45 | word2index[word[0]] = wid + 1 46 | vocab_sz = len(word2index) + 1 47 | index2word = {v: k for k, v in word2index.items()} 48 | 49 | xs, ys = [], [] 50 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 51 | for line in fin: 52 | label, sent = line.strip().split("\t") 53 | ys.append(int(label)) 54 | words = [x.lower() for x in nltk.word_tokenize(sent)] 55 | wids = [word2index[word] for word in words] 56 | xs.append(wids) 57 | 58 | X = pad_sequences(xs, maxlen=maxlen) 59 | Y = np_utils.to_categorical(ys) 60 | 61 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 62 | random_state=42) 63 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 64 | 65 | model = Sequential() 66 | model.add(Embedding(vocab_sz, EMBED_SIZE, input_length=maxlen)) 67 | model.add(Dropout(0.2)) 68 | model.add(Conv1D(filters=NUM_FILTERS, kernel_size=NUM_WORDS, activation="relu")) 69 | model.add(GlobalMaxPooling1D()) 70 | model.add(Dense(2, activation="softmax")) 71 | 72 | model.compile(optimizer="adam", loss="categorical_crossentropy", 73 | metrics=["accuracy"]) 74 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 75 | epochs=NUM_EPOCHS, 76 | callbacks=[TensorBoard(LOG_DIR)], 77 | validation_data=(Xtest, Ytest)) 78 | 79 | # evaluate model 80 | score = model.evaluate(Xtest, Ytest, verbose=1) 81 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 82 | -------------------------------------------------------------------------------- /ch05/models.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dense, Dropout, Conv1D, Embedding, GlobalMaxPooling1D 2 | from keras.models import Sequential 3 | 4 | 5 | def build_sentiment_model(vocab_size, embed_size, maxlen, 6 | num_filters, num_words, embedding_weights=None): 7 | model = Sequential() 8 | if embedding_weights is None: 9 | model.add(Embedding(vocab_size, embed_size, input_length=maxlen)) 10 | else: 11 | model.add(Embedding(vocab_size, embed_size, 12 | input_length=maxlen, 13 | weights=[embedding_weights])) 14 | model.add(Dropout(0.2)) 15 | model.add(Conv1D(filters=num_filters, kernel_size=num_words, 16 | activation="relu")) 17 | model.add(GlobalMaxPooling1D()) 18 | model.add(Dense(2, activation="softmax")) 19 | 20 | return model 21 | -------------------------------------------------------------------------------- /ch05/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | h5py==2.7.1 4 | matplotlib==2.1.1 5 | gensim==3.2.0 6 | nltk==3.2.5 7 | scikit-learn==0.19.1 8 | -------------------------------------------------------------------------------- /ch05/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow-gpu==1.8.0 3 | h5py==2.7.1 4 | matplotlib==2.1.1 5 | gensim==3.2.0 6 | nltk==3.2.5 7 | scikit-learn==0.19.1 8 | -------------------------------------------------------------------------------- /ch05/skipgram_example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | from keras.preprocessing.text import Tokenizer, text_to_word_sequence 5 | from keras.preprocessing.sequence import skipgrams 6 | 7 | text = "I love green eggs and ham ." 8 | 9 | tokenizer = Tokenizer() 10 | tokenizer.fit_on_texts([text]) 11 | 12 | word2id = tokenizer.word_index 13 | id2word = {v: k for k, v in word2id.items()} 14 | 15 | wids = [word2id[w] for w in text_to_word_sequence(text)] 16 | pairs, labels = skipgrams(wids, len(word2id), window_size=1) 17 | print(len(pairs), len(labels)) 18 | for i in range(10): 19 | print("({:s} ({:d}), {:s} ({:d})) -> {:d}".format( 20 | id2word[pairs[i][0]], pairs[i][0], 21 | id2word[pairs[i][1]], pairs[i][1], 22 | labels[i])) 23 | -------------------------------------------------------------------------------- /ch05/transfer_glove_embeddings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import os 4 | import collections 5 | 6 | import nltk 7 | import numpy as np 8 | from keras.callbacks import TensorBoard 9 | from keras.layers import Dense, Dropout 10 | from keras.models import Sequential 11 | from keras.preprocessing.sequence import pad_sequences 12 | from keras.utils import np_utils 13 | from sklearn.model_selection import train_test_split 14 | import codecs 15 | 16 | np.random.seed(42) 17 | 18 | INPUT_FILE = os.path.join(os.path.dirname(__file__), 19 | "data/umich-sentiment-train.txt") 20 | GLOVE_MODEL = os.path.join(os.path.dirname(__file__), 21 | "data/glove.6B.100d.txt") 22 | LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") 23 | VOCAB_SIZE = 5000 24 | EMBED_SIZE = 100 25 | BATCH_SIZE = 64 26 | NUM_EPOCHS = 10 27 | 28 | print("reading data...") 29 | counter = collections.Counter() 30 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 31 | maxlen = 0 32 | for line in fin: 33 | _, sent = line.strip().split("\t") 34 | try: 35 | words = [x.lower() for x in nltk.word_tokenize(sent)] 36 | except LookupError: 37 | print("Englisth tokenize does not downloaded. So download it.") 38 | nltk.download("punkt") 39 | words = [x.lower() for x in nltk.word_tokenize(sent)] 40 | 41 | if len(words) > maxlen: 42 | maxlen = len(words) 43 | for word in words: 44 | counter[word] += 1 45 | 46 | 47 | print("creating vocabulary...") 48 | word2index = collections.defaultdict(int) 49 | for wid, word in enumerate(counter.most_common(VOCAB_SIZE)): 50 | word2index[word[0]] = wid + 1 51 | vocab_sz = len(word2index) + 1 52 | index2word = {v: k for k, v in word2index.items()} 53 | index2word[0] = "_UNK_" 54 | 55 | print("creating word sequences...") 56 | ws, ys = [], [] 57 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 58 | for line in fin: 59 | label, sent = line.strip().split("\t") 60 | ys.append(int(label)) 61 | words = [x.lower() for x in nltk.word_tokenize(sent)] 62 | wids = [word2index[word] for word in words] 63 | ws.append(wids) 64 | 65 | W = pad_sequences(ws, maxlen=maxlen) 66 | Y = np_utils.to_categorical(ys) 67 | 68 | # load GloVe vectors 69 | print("loading GloVe vectors...") 70 | word2emb = collections.defaultdict(int) 71 | with codecs.open(GLOVE_MODEL, "r", encoding="utf-8") as fglove: 72 | for line in fglove: 73 | cols = line.strip().split() 74 | word = cols[0] 75 | embedding = np.array(cols[1:], dtype="float32") 76 | word2emb[word] = embedding 77 | 78 | 79 | print("transferring embeddings...") 80 | X = np.zeros((W.shape[0], EMBED_SIZE)) 81 | for i in range(W.shape[0]): 82 | E = np.zeros((EMBED_SIZE, maxlen)) 83 | words = [index2word[wid] for wid in W[i].tolist()] 84 | for j in range(maxlen): 85 | E[:, j] = word2emb[words[j]] 86 | X[i, :] = np.sum(E, axis=1) 87 | 88 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 89 | random_state=42) 90 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 91 | 92 | model = Sequential() 93 | model.add(Dense(32, input_dim=EMBED_SIZE, activation="relu")) 94 | model.add(Dropout(0.2)) 95 | model.add(Dense(2, activation="softmax")) 96 | 97 | model.compile(optimizer="adam", loss="categorical_crossentropy", 98 | metrics=["accuracy"]) 99 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 100 | epochs=NUM_EPOCHS, 101 | callbacks=[TensorBoard(LOG_DIR)], 102 | validation_data=(Xtest, Ytest)) 103 | 104 | # evaluate model 105 | score = model.evaluate(Xtest, Ytest, verbose=1) 106 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 107 | -------------------------------------------------------------------------------- /ch05/transfer_word2vec_embeddings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import os 4 | import collections 5 | 6 | import nltk 7 | import numpy as np 8 | from gensim.models import KeyedVectors 9 | from keras.callbacks import TensorBoard 10 | from keras.layers import Dense, Dropout 11 | from keras.models import Sequential 12 | from keras.preprocessing.sequence import pad_sequences 13 | from keras.utils import np_utils 14 | from sklearn.model_selection import train_test_split 15 | import codecs 16 | 17 | 18 | np.random.seed(42) 19 | 20 | INPUT_FILE = os.path.join(os.path.dirname(__file__), 21 | "data/umich-sentiment-train.txt") 22 | WORD2VEC_MODEL = os.path.join(os.path.dirname(__file__), 23 | "data/GoogleNews-vectors-negative300.bin.gz") 24 | LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") 25 | VOCAB_SIZE = 5000 26 | EMBED_SIZE = 300 27 | BATCH_SIZE = 64 28 | NUM_EPOCHS = 10 29 | 30 | print("reading data...") 31 | counter = collections.Counter() 32 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as f: 33 | maxlen = 0 34 | for line in f: 35 | _, sent = line.strip().split("\t") 36 | try: 37 | words = [x.lower() for x in nltk.word_tokenize(sent)] 38 | except LookupError: 39 | print("Englisth tokenize does not downloaded. So download it.") 40 | nltk.download("punkt") 41 | words = [x.lower() for x in nltk.word_tokenize(sent)] 42 | 43 | if len(words) > maxlen: 44 | maxlen = len(words) 45 | for word in words: 46 | counter[word] += 1 47 | 48 | print("creating vocabulary...") 49 | word2index = collections.defaultdict(int) 50 | for wid, word in enumerate(counter.most_common(VOCAB_SIZE)): 51 | word2index[word[0]] = wid + 1 52 | vocab_sz = len(word2index) + 1 53 | index2word = {v: k for k, v in word2index.items()} 54 | index2word[0] = "_UNK_" 55 | 56 | print("creating word sequences...") 57 | ws, ys = [], [] 58 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as fin: 59 | for line in fin: 60 | label, sent = line.strip().split("\t") 61 | ys.append(int(label)) 62 | words = [x.lower() for x in nltk.word_tokenize(sent)] 63 | wids = [word2index[word] for word in words] 64 | ws.append(wids) 65 | 66 | W = pad_sequences(ws, maxlen=maxlen) 67 | Y = np_utils.to_categorical(ys) 68 | 69 | # load GloVe vectors 70 | print("loading word2vec vectors...") 71 | word2vec = KeyedVectors.load_word2vec_format(WORD2VEC_MODEL, binary=True) 72 | 73 | print("transferring embeddings...") 74 | X = np.zeros((W.shape[0], EMBED_SIZE)) 75 | for i in range(W.shape[0]): 76 | E = np.zeros((EMBED_SIZE, maxlen)) 77 | words = [index2word[wid] for wid in W[i].tolist()] 78 | for j in range(maxlen): 79 | try: 80 | E[:, j] = word2vec[words[j]] 81 | except KeyError: 82 | pass 83 | X[i, :] = np.sum(E, axis=1) 84 | 85 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 86 | random_state=42) 87 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 88 | 89 | model = Sequential() 90 | model.add(Dense(32, input_dim=EMBED_SIZE, activation="relu")) 91 | model.add(Dropout(0.2)) 92 | model.add(Dense(2, activation="softmax")) 93 | 94 | model.compile(optimizer="adam", loss="categorical_crossentropy", 95 | metrics=["accuracy"]) 96 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 97 | epochs=NUM_EPOCHS, 98 | callbacks=[TensorBoard(LOG_DIR)], 99 | validation_data=(Xtest, Ytest)) 100 | 101 | # evaluate model 102 | score = model.evaluate(Xtest, Ytest, verbose=1) 103 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 104 | -------------------------------------------------------------------------------- /ch05/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import Counter 3 | 4 | import numpy as np 5 | 6 | PAD = "_PAD_" 7 | UNK = "_UNK_" 8 | 9 | 10 | def tokenize(text): 11 | return text.split() 12 | 13 | 14 | def decode_tsv(line): 15 | label, text = line.strip().split("\t") 16 | return text, int(label) 17 | 18 | 19 | def load_texts_and_labels(file): 20 | with open(file) as f: 21 | for line in f: 22 | text, label = decode_tsv(line) 23 | yield text, label 24 | 25 | 26 | def create_vocab(texts, n): 27 | freq = Counter() 28 | for text in texts: 29 | words = tokenize(text) 30 | freq.update(words) 31 | freq = freq.most_common(n) 32 | vocab = {w: i+2 for i, (w, _) in enumerate(freq)} 33 | vocab[PAD] = 0 34 | vocab[UNK] = 1 35 | 36 | return vocab 37 | 38 | 39 | def get_max_seq_len(data): 40 | max_seq_len = 0 41 | for text, _ in data: 42 | words = tokenize(text) 43 | max_seq_len = max(max_seq_len, len(words)) 44 | 45 | return max_seq_len 46 | 47 | 48 | def load_glove_vectors(file): 49 | word2emb = {} 50 | with open(file) as f: 51 | for line in f: 52 | cols = line.strip().split() 53 | word = cols[0] 54 | embedding = np.array(cols[1:], dtype="float32") 55 | word2emb[word] = embedding 56 | 57 | return word2emb 58 | 59 | 60 | def make_weight_matrix(word2index, word2emb, vocab_size, embed_size): 61 | embedding_weights = np.zeros((vocab_size, embed_size)) 62 | for word, index in word2index.items(): 63 | try: 64 | embedding_weights[index, :] = word2emb[word] 65 | except KeyError: 66 | pass 67 | 68 | return embedding_weights 69 | -------------------------------------------------------------------------------- /ch05/word2vec_cbow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import operator 4 | 5 | import nltk 6 | import numpy as np 7 | from keras.callbacks import TensorBoard 8 | from keras.layers import Dense, Dropout, Activation 9 | from keras.models import Sequential 10 | from keras.preprocessing.text import Tokenizer, one_hot 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.metrics.pairwise import cosine_distances 13 | from sklearn.preprocessing import OneHotEncoder 14 | import codecs 15 | 16 | np.random.seed(42) 17 | 18 | LOG_DIR = './logs' 19 | BATCH_SIZE = 128 20 | NUM_EPOCHS = 20 21 | 22 | with codecs.open("./data/alice_in_wonderland.txt", "r", encoding="utf-8") as f: 23 | lines = [line.strip() for line in f if len(line) != 0] 24 | 25 | try: 26 | sents = nltk.sent_tokenize(" ".join(lines)) 27 | except LookupError: 28 | print("Englisth tokenize does not downloaded. So download it.") 29 | nltk.download("punkt") 30 | sents = nltk.sent_tokenize(" ".join(lines)) 31 | 32 | 33 | tokenizer = Tokenizer(5000) # use top 5000 words only 34 | tokens = tokenizer.fit_on_texts(sents) 35 | vocab_size = len(tokenizer.word_index) + 1 36 | 37 | w_lefts, w_centers, w_rights = [], [], [] 38 | for sent in sents: 39 | embedding = one_hot(sent, vocab_size) 40 | triples = list(nltk.trigrams(embedding)) 41 | w_lefts.extend([x[0] for x in triples]) 42 | w_centers.extend([x[1] for x in triples]) 43 | w_rights.extend([x[2] for x in triples]) 44 | 45 | ohe = OneHotEncoder(n_values=vocab_size) 46 | Xleft = ohe.fit_transform(np.array(w_lefts).reshape(-1, 1)).todense() 47 | Xright = ohe.fit_transform(np.array(w_rights).reshape(-1, 1)).todense() 48 | X = (Xleft + Xright) / 2.0 49 | Y = ohe.fit_transform(np.array(w_centers).reshape(-1, 1)).todense() 50 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 51 | random_state=42) 52 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 53 | 54 | model = Sequential() 55 | model.add(Dense(300, input_shape=(Xtrain.shape[1],))) 56 | model.add(Activation("relu")) 57 | model.add(Dropout(0.5)) 58 | model.add(Dense(Ytrain.shape[1])) 59 | model.add(Activation("softmax")) 60 | 61 | model.compile(optimizer="rmsprop", loss="categorical_crossentropy", 62 | metrics=["accuracy"]) 63 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 64 | epochs=NUM_EPOCHS, verbose=1, 65 | callbacks=[TensorBoard(LOG_DIR)], 66 | validation_data=(Xtest, Ytest)) 67 | 68 | # evaluate model 69 | score = model.evaluate(Xtest, Ytest, verbose=1) 70 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 71 | 72 | # using the word2vec model 73 | word2idx = tokenizer.word_index 74 | idx2word = {v: k for k, v in word2idx.items()} 75 | 76 | # retrieve the weights from the first dense layer. This will convert 77 | # the input vector from a one-hot sum of two words to a dense 300 78 | # dimensional representation 79 | W, b = model.layers[0].get_weights() 80 | 81 | idx2emb = {} 82 | for word in word2idx.keys(): 83 | wid = word2idx[word] 84 | vec_in = ohe.fit_transform(np.array(wid)).todense() 85 | vec_emb = np.dot(vec_in, W) 86 | idx2emb[wid] = vec_emb 87 | 88 | for word in ["stupid", "alice", "succeeded"]: 89 | wid = word2idx[word] 90 | source_emb = idx2emb[wid] 91 | distances = [] 92 | for i in range(1, vocab_size): 93 | if i == wid: 94 | continue 95 | target_emb = idx2emb[i] 96 | distances.append(((wid, i), 97 | cosine_distances(source_emb, target_emb))) 98 | sorted_distances = sorted(distances, key=operator.itemgetter(1))[0:10] 99 | predictions = [idx2word[x[0][1]] for x in sorted_distances] 100 | print("{:s} => {:s}".format(word, ", ".join(predictions))) 101 | -------------------------------------------------------------------------------- /ch05/word2vec_gensim.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import logging 4 | import os 5 | from gensim.models import word2vec 6 | 7 | 8 | logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s", 9 | level=logging.INFO) 10 | 11 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data") 12 | sentences = word2vec.Text8Corpus(os.path.join(DATA_DIR, "text8"), 50) 13 | model = word2vec.Word2Vec(sentences, size=300, min_count=30) 14 | 15 | print("model.most_similar('woman')") 16 | print(model.most_similar("woman")) 17 | 18 | 19 | print("model.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)") 20 | print(model.most_similar(positive=["woman", "king"], 21 | negative=["man"], 22 | topn=10)) 23 | 24 | print("model.similarity('girl', 'woman')") 25 | print(model.similarity("girl", "woman")) 26 | print("model.similarity('girl', 'man')") 27 | print(model.similarity("girl", "man")) 28 | print("model.similarity('girl', 'car')") 29 | print(model.similarity("girl", "car")) 30 | print("model.similarity('bus', 'car')") 31 | print(model.similarity("bus", "car")) 32 | -------------------------------------------------------------------------------- /ch05/word2vec_skipgram.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | import operator 4 | 5 | import nltk 6 | import numpy as np 7 | from keras.callbacks import TensorBoard 8 | from keras.layers import Dense, Dropout, Activation 9 | from keras.models import Sequential 10 | from keras.preprocessing.text import Tokenizer, one_hot 11 | from sklearn.metrics.pairwise import cosine_distances 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.preprocessing import OneHotEncoder 14 | import codecs 15 | 16 | np.random.seed(42) 17 | 18 | LOG_DIR = './logs' 19 | BATCH_SIZE = 128 20 | NUM_EPOCHS = 20 21 | 22 | with codecs.open("./data/alice_in_wonderland.txt", "r", encoding="utf-8") as f: 23 | lines = [line.strip() for line in f if len(line) != 0] 24 | 25 | try: 26 | sents = nltk.sent_tokenize(" ".join(lines)) 27 | except LookupError: 28 | print("Englisth tokenize does not downloaded. So download it.") 29 | nltk.download("punkt") 30 | sents = nltk.sent_tokenize(" ".join(lines)) 31 | 32 | 33 | tokenizer = Tokenizer(5000) # use top 5000 words only 34 | tokens = tokenizer.fit_on_texts(sents) 35 | vocab_size = len(tokenizer.word_counts) + 1 36 | 37 | xs = [] 38 | ys = [] 39 | for sent in sents: 40 | embedding = one_hot(sent, vocab_size) 41 | triples = list(nltk.trigrams(embedding)) 42 | w_lefts = [x[0] for x in triples] 43 | w_centers = [x[1] for x in triples] 44 | w_rights = [x[2] for x in triples] 45 | xs.extend(w_centers) 46 | ys.extend(w_lefts) 47 | xs.extend(w_centers) 48 | ys.extend(w_rights) 49 | 50 | ohe = OneHotEncoder(n_values=vocab_size) 51 | X = ohe.fit_transform(np.array(xs).reshape(-1, 1)).todense() 52 | Y = ohe.fit_transform(np.array(ys).reshape(-1, 1)).todense() 53 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, 54 | random_state=42) 55 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 56 | 57 | model = Sequential() 58 | model.add(Dense(300, input_shape=(Xtrain.shape[1],))) 59 | model.add(Activation("relu")) 60 | model.add(Dropout(0.5)) 61 | model.add(Dense(Ytrain.shape[1])) 62 | model.add(Activation("softmax")) 63 | 64 | model.compile(optimizer="rmsprop", loss="categorical_crossentropy", 65 | metrics=["accuracy"]) 66 | history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 67 | epochs=NUM_EPOCHS, verbose=1, 68 | callbacks=[TensorBoard(LOG_DIR)], 69 | validation_data=(Xtest, Ytest)) 70 | 71 | # evaluate model 72 | score = model.evaluate(Xtest, Ytest, verbose=1) 73 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score[0], score[1])) 74 | 75 | # using the word2vec model 76 | word2idx = tokenizer.word_index 77 | idx2word = {v: k for k, v in word2idx.items()} 78 | 79 | # retrieve the weights from the first dense layer. This will convert 80 | # the input vector from a one-hot sum of two words to a dense 300 81 | # dimensional representation 82 | W, b = model.layers[0].get_weights() 83 | 84 | idx2emb = {} 85 | for word in word2idx.keys(): 86 | wid = word2idx[word] 87 | vec_in = ohe.fit_transform(np.array(wid)).todense() 88 | vec_emb = np.dot(vec_in, W) 89 | idx2emb[wid] = vec_emb 90 | 91 | for word in ["stupid", "alice", "succeeded"]: 92 | wid = word2idx[word] 93 | source_emb = idx2emb[wid] 94 | distances = [] 95 | for i in range(1, vocab_size): 96 | if i == wid: 97 | continue 98 | target_emb = idx2emb[i] 99 | distances.append(((wid, i), 100 | cosine_distances(source_emb, target_emb))) 101 | sorted_distances = sorted(distances, key=operator.itemgetter(1))[0:10] 102 | predictions = [idx2word[x[0][1]] for x in sorted_distances] 103 | print("{:s} => {:s}".format(word, ", ".join(predictions))) 104 | -------------------------------------------------------------------------------- /ch06/alice_chargen_rnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | from keras.layers import Dense, Activation, SimpleRNN 6 | from keras.models import Sequential 7 | import codecs 8 | 9 | 10 | INPUT_FILE = "./data/alice_in_wonderland.txt" 11 | 12 | # extract the input as a stream of characters 13 | print("Extracting text from input...") 14 | with codecs.open(INPUT_FILE, "r", encoding="utf-8") as f: 15 | lines = [line.strip().lower() for line in f 16 | if len(line) != 0] 17 | text = " ".join(lines) 18 | 19 | # creating lookup tables 20 | # Here chars is the number of features in our character "vocabulary" 21 | chars = set(text) 22 | nb_chars = len(chars) 23 | char2index = dict((c, i) for i, c in enumerate(chars)) 24 | index2char = dict((i, c) for i, c in enumerate(chars)) 25 | 26 | # create inputs and labels from the text. We do this by stepping 27 | # through the text ${step} character at a time, and extracting a 28 | # sequence of size ${seqlen} and the next output char. For example, 29 | # assuming an input text "The sky was falling", we would get the 30 | # following sequence of input_chars and label_chars (first 5 only) 31 | # The sky wa -> s 32 | # he sky was -> 33 | # e sky was -> f 34 | # sky was f -> a 35 | # sky was fa -> l 36 | print("Creating input and label text...") 37 | SEQLEN = 10 38 | STEP = 1 39 | 40 | input_chars = [] 41 | label_chars = [] 42 | for i in range(0, len(text) - SEQLEN, STEP): 43 | input_chars.append(text[i:i + SEQLEN]) 44 | label_chars.append(text[i + SEQLEN]) 45 | 46 | # vectorize the input and label chars 47 | # Each row of the input is represented by seqlen characters, each 48 | # represented as a 1-hot encoding of size len(char). There are 49 | # len(input_chars) such rows, so shape(X) is (len(input_chars), 50 | # seqlen, nb_chars). 51 | # Each row of output is a single character, also represented as a 52 | # dense encoding of size len(char). Hence shape(y) is (len(input_chars), 53 | # nb_chars). 54 | print("Vectorizing input and label text...") 55 | X = np.zeros((len(input_chars), SEQLEN, nb_chars), dtype=np.bool) 56 | y = np.zeros((len(input_chars), nb_chars), dtype=np.bool) 57 | for i, input_char in enumerate(input_chars): 58 | for j, ch in enumerate(input_char): 59 | X[i, j, char2index[ch]] = 1 60 | y[i, char2index[label_chars[i]]] = 1 61 | 62 | # Build the model. We use a single RNN with a fully connected layer 63 | # to compute the most likely predicted output char 64 | HIDDEN_SIZE = 128 65 | BATCH_SIZE = 128 66 | NUM_ITERATIONS = 25 67 | NUM_EPOCHS_PER_ITERATION = 1 68 | NUM_PREDS_PER_EPOCH = 100 69 | 70 | model = Sequential() 71 | model.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False, 72 | input_shape=(SEQLEN, nb_chars), 73 | unroll=True)) 74 | model.add(Dense(nb_chars)) 75 | model.add(Activation("softmax")) 76 | 77 | model.compile(loss="categorical_crossentropy", optimizer="rmsprop") 78 | 79 | # We train the model in batches and test output generated at each step 80 | for iteration in range(NUM_ITERATIONS): 81 | print("=" * 50) 82 | print("Iteration #: {}".format(iteration)) 83 | model.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION) 84 | 85 | # testing model 86 | # randomly choose a row from input_chars, then use it to 87 | # generate text from model for next 100 chars 88 | test_idx = np.random.randint(len(input_chars)) 89 | test_chars = input_chars[test_idx] 90 | print("Generating from seed: {}".format(test_chars)) 91 | print(test_chars, end="") 92 | for i in range(NUM_PREDS_PER_EPOCH): 93 | Xtest = np.zeros((1, SEQLEN, nb_chars)) 94 | for j, ch in enumerate(test_chars): 95 | Xtest[0, j, char2index[ch]] = 1 96 | pred = model.predict(Xtest, verbose=0)[0] 97 | ypred = index2char[np.argmax(pred)] 98 | print(ypred, end="") 99 | # move forward with test_chars + ypred 100 | test_chars = test_chars[1:] + ypred 101 | print() 102 | -------------------------------------------------------------------------------- /ch06/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch06/data/.gitkeep -------------------------------------------------------------------------------- /ch06/econs_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | import os 4 | import re 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | 10 | DATA_DIR = "./data" 11 | 12 | with open(os.path.join(DATA_DIR, "LD2011_2014.txt"), "r") as fld: 13 | data = [] 14 | cid = 250 15 | for line_num, line in enumerate(fld): 16 | if line.startswith("\"\";"): 17 | continue 18 | if line_num % 100 == 0: 19 | print("{:d} lines read".format(line_num)) 20 | cols = [float(re.sub(",", ".", x)) for x in 21 | line.strip().split(";")[1:]] 22 | data.append(cols[cid]) 23 | 24 | NUM_ENTRIES = 1000 25 | plt.plot(range(NUM_ENTRIES), data[0:NUM_ENTRIES]) 26 | plt.ylabel("electricity consumption") 27 | plt.xlabel("time (1pt = 15 mins)") 28 | plt.show() 29 | 30 | np.save(os.path.join(DATA_DIR, "LD_250.npy"), np.array(data)) 31 | -------------------------------------------------------------------------------- /ch06/econs_stateful.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | import math 4 | import os 5 | 6 | import numpy as np 7 | from keras.layers import Dense, LSTM 8 | from keras.models import Sequential 9 | from sklearn.preprocessing import MinMaxScaler 10 | 11 | 12 | DATA_DIR = "./data" 13 | 14 | data = np.load(os.path.join(DATA_DIR, "LD_250.npy")) 15 | 16 | STATELESS = True 17 | 18 | NUM_TIMESTEPS = 20 19 | HIDDEN_SIZE = 10 20 | BATCH_SIZE = 96 # 24 hours (15 min intervals) 21 | NUM_EPOCHS = 5 22 | 23 | # scale the data to be in the range (0, 1) 24 | data = data.reshape(-1, 1) 25 | scaler = MinMaxScaler(feature_range=(0, 1), copy=False) 26 | data = scaler.fit_transform(data) 27 | 28 | # transform to 4 inputs -> 1 label format 29 | X = np.zeros((data.shape[0], NUM_TIMESTEPS)) 30 | Y = np.zeros((data.shape[0], 1)) 31 | for i in range(len(data) - NUM_TIMESTEPS - 1): 32 | X[i] = data[i:i + NUM_TIMESTEPS].T 33 | Y[i] = data[i + NUM_TIMESTEPS + 1] 34 | 35 | # reshape X to three dimensions (samples, timesteps, features) 36 | X = np.expand_dims(X, axis=2) 37 | 38 | # split into training and test sets (add the extra offsets so 39 | # we can use batch size of 5) 40 | sp = int(0.7 * len(data)) 41 | Xtrain, Xtest, Ytrain, Ytest = X[0:sp], X[sp:], Y[0:sp], Y[sp:] 42 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 43 | 44 | if STATELESS: 45 | # stateless 46 | model = Sequential() 47 | model.add(LSTM(HIDDEN_SIZE, input_shape=(NUM_TIMESTEPS, 1), 48 | return_sequences=False)) 49 | model.add(Dense(1)) 50 | else: 51 | # stateful 52 | model = Sequential() 53 | model.add(LSTM(HIDDEN_SIZE, stateful=True, 54 | batch_input_shape=(BATCH_SIZE, NUM_TIMESTEPS, 1), 55 | return_sequences=False)) 56 | model.add(Dense(1)) 57 | 58 | model.compile(loss="mean_squared_error", optimizer="adam", 59 | metrics=["mean_squared_error"]) 60 | 61 | if STATELESS: 62 | # stateless 63 | model.fit(Xtrain, Ytrain, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, 64 | validation_data=(Xtest, Ytest), 65 | shuffle=False) 66 | else: 67 | # stateful 68 | # need to make training and test data to multiple of BATCH_SIZE 69 | train_size = (Xtrain.shape[0] // BATCH_SIZE) * BATCH_SIZE 70 | test_size = (Xtest.shape[0] // BATCH_SIZE) * BATCH_SIZE 71 | Xtrain, Ytrain = Xtrain[0:train_size], Ytrain[0:train_size] 72 | Xtest, Ytest = Xtest[0:test_size], Ytest[0:test_size] 73 | print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) 74 | for i in range(NUM_EPOCHS): 75 | print("Epoch {:d}/{:d}".format(i+1, NUM_EPOCHS)) 76 | model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=1, 77 | validation_data=(Xtest, Ytest), 78 | shuffle=False) 79 | model.reset_states() 80 | 81 | score, _ = model.evaluate(Xtest, Ytest, batch_size=BATCH_SIZE) 82 | rmse = math.sqrt(score) 83 | print("\nMSE: {:.3f}, RMSE: {:.3f}".format(score, rmse)) 84 | -------------------------------------------------------------------------------- /ch06/pos_tagging_gru.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | import collections 4 | import os 5 | 6 | import nltk 7 | import numpy as np 8 | from keras.layers import Activation, Dense, Dropout, RepeatVector, Embedding, \ 9 | GRU, LSTM, TimeDistributed, Bidirectional 10 | from keras.models import Sequential 11 | from keras.preprocessing import sequence 12 | from keras.utils import np_utils 13 | from sklearn.model_selection import train_test_split 14 | 15 | 16 | def parse_sentences(filename): 17 | word_freqs = collections.Counter() 18 | num_recs, maxlen = 0, 0 19 | with open(filename, "r") as fin: 20 | for line in fin: 21 | words = line.strip().lower().split() 22 | for word in words: 23 | word_freqs[word] += 1 24 | maxlen = max(maxlen, len(words)) 25 | num_recs += 1 26 | return word_freqs, maxlen, num_recs 27 | 28 | 29 | def build_tensor(filename, numrecs, word2index, maxlen): 30 | data = np.empty((numrecs, ), dtype=list) 31 | with open(filename, "r") as fin: 32 | for i, line in enumerate(fin): 33 | wids = [] 34 | for word in line.strip().lower().split(): 35 | if word in word2index: 36 | wids.append(word2index[word]) 37 | else: 38 | wids.append(word2index["UNK"]) 39 | data[i] = wids 40 | pdata = sequence.pad_sequences(data, maxlen=maxlen) 41 | return pdata 42 | 43 | 44 | DATA_DIR = "./data" 45 | 46 | with open(os.path.join(DATA_DIR, "treebank_sents.txt"), "w") as fedata, \ 47 | open(os.path.join(DATA_DIR, "treebank_poss.txt"), "w") as ffdata: 48 | sents = nltk.corpus.treebank.tagged_sents() 49 | for sent in sents: 50 | words, poss = [], [] 51 | for word, pos in sent: 52 | if pos == "-NONE-": 53 | continue 54 | words.append(word) 55 | poss.append(pos) 56 | fedata.write("{:s}\n".format(" ".join(words))) 57 | ffdata.write("{:s}\n".format(" ".join(poss))) 58 | 59 | 60 | s_wordfreqs, s_maxlen, s_numrecs = \ 61 | parse_sentences(os.path.join(DATA_DIR, "treebank_sents.txt")) 62 | t_wordfreqs, t_maxlen, t_numrecs = \ 63 | parse_sentences(os.path.join(DATA_DIR, "treebank_poss.txt")) 64 | print("# records: {:d}".format(s_numrecs)) 65 | print("# unique words: {:d}".format(len(s_wordfreqs))) 66 | print("# unique POS tags: {:d}".format(len(t_wordfreqs))) 67 | print("# words/sentence: max: {:d}".format(s_maxlen)) 68 | 69 | 70 | MAX_SEQLEN = 250 71 | S_MAX_FEATURES = 5000 72 | T_MAX_FEATURES = 45 73 | 74 | 75 | s_vocabsize = min(len(s_wordfreqs), S_MAX_FEATURES) + 2 76 | s_word2index = {x[0]: i+2 for i, x in 77 | enumerate(s_wordfreqs.most_common(S_MAX_FEATURES))} 78 | s_word2index["PAD"] = 0 79 | s_word2index["UNK"] = 1 80 | s_index2word = {v: k for k, v in s_word2index.items()} 81 | 82 | t_vocabsize = len(t_wordfreqs) + 1 83 | t_word2index = {x[0]: i for i, x in 84 | enumerate(t_wordfreqs.most_common(T_MAX_FEATURES))} 85 | t_word2index["PAD"] = 0 86 | t_index2word = {v: k for k, v in t_word2index.items()} 87 | 88 | 89 | X = build_tensor(os.path.join(DATA_DIR, "treebank_sents.txt"), 90 | s_numrecs, s_word2index, MAX_SEQLEN) 91 | Y = build_tensor(os.path.join(DATA_DIR, "treebank_poss.txt"), 92 | t_numrecs, t_word2index, MAX_SEQLEN) 93 | Y = np.array([np_utils.to_categorical(d, t_vocabsize) for d in Y]) 94 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, 95 | test_size=0.2, random_state=42) 96 | 97 | 98 | EMBED_SIZE = 128 99 | HIDDEN_SIZE = 64 100 | BATCH_SIZE = 32 101 | NUM_EPOCHS = 1 102 | 103 | # GRU 104 | model = Sequential() 105 | model.add(Embedding(s_vocabsize, EMBED_SIZE, input_length=MAX_SEQLEN)) 106 | model.add(Dropout(0.2)) 107 | model.add(GRU(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2)) 108 | model.add(RepeatVector(MAX_SEQLEN)) 109 | model.add(GRU(HIDDEN_SIZE, return_sequences=True)) 110 | model.add(TimeDistributed(Dense(t_vocabsize))) 111 | model.add(Activation("softmax")) 112 | model.compile(loss="categorical_crossentropy", 113 | optimizer="adam", 114 | metrics=["accuracy"]) 115 | model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 116 | epochs=NUM_EPOCHS, validation_data=[Xtest, Ytest]) 117 | score, acc = model.evaluate(Xtest, Ytest, batch_size=BATCH_SIZE) 118 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score, acc)) 119 | 120 | 121 | # LSTM 122 | model = Sequential() 123 | model.add(Embedding(s_vocabsize, EMBED_SIZE, input_length=MAX_SEQLEN)) 124 | model.add(Dropout(0.2)) 125 | model.add(LSTM(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2)) 126 | model.add(RepeatVector(MAX_SEQLEN)) 127 | model.add(LSTM(HIDDEN_SIZE, return_sequences=True)) 128 | model.add(TimeDistributed(Dense(t_vocabsize))) 129 | model.add(Activation("softmax")) 130 | model.compile(loss="categorical_crossentropy", 131 | optimizer="adam", metrics=["accuracy"]) 132 | model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 133 | epochs=NUM_EPOCHS, validation_data=[Xtest, Ytest]) 134 | score, acc = model.evaluate(Xtest, Ytest, batch_size=BATCH_SIZE) 135 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score, acc)) 136 | 137 | 138 | # Bidirectional LSTM 139 | model = Sequential() 140 | model.add(Embedding(s_vocabsize, EMBED_SIZE, input_length=MAX_SEQLEN)) 141 | model.add(Dropout(0.2)) 142 | model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout=0.2, recurrent_dropout=0.2))) 143 | model.add(RepeatVector(MAX_SEQLEN)) 144 | model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True))) 145 | model.add(TimeDistributed(Dense(t_vocabsize))) 146 | model.add(Activation("softmax")) 147 | model.compile(loss="categorical_crossentropy", 148 | optimizer="adam", metrics=["accuracy"]) 149 | model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, 150 | epochs=NUM_EPOCHS, validation_data=[Xtest, Ytest]) 151 | score, acc = model.evaluate(Xtest, Ytest, batch_size=BATCH_SIZE) 152 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score, acc)) 153 | -------------------------------------------------------------------------------- /ch06/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | h5py==2.7.1 4 | nltk==3.2.5 5 | sklearn==0.0 6 | matplotlib==2.1.1 7 | -------------------------------------------------------------------------------- /ch06/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow-gpu==1.8.0 3 | h5py==2.7.1 4 | nltk==3.2.5 5 | sklearn==0.0 6 | matplotlib==2.1.1 7 | -------------------------------------------------------------------------------- /ch06/umich_sentiment_lstm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | import collections 4 | import os 5 | 6 | import nltk 7 | import numpy as np 8 | from keras.callbacks import TensorBoard 9 | from keras.layers import Activation, Dense, Dropout, Embedding, LSTM 10 | from keras.models import Sequential 11 | from keras.preprocessing import sequence 12 | from sklearn.model_selection import train_test_split 13 | import codecs 14 | 15 | 16 | DATA_DIR = "./data" 17 | LOG_DIR = "./logs" 18 | 19 | MAX_FEATURES = 2000 20 | MAX_SENTENCE_LENGTH = 40 21 | 22 | EMBEDDING_SIZE = 128 23 | HIDDEN_LAYER_SIZE = 64 24 | BATCH_SIZE = 32 25 | NUM_EPOCHS = 10 26 | 27 | # Read training data and generate vocabulary 28 | maxlen = 0 29 | word_freqs = collections.Counter() 30 | num_recs = 0 31 | with codecs.open(os.path.join(DATA_DIR, "umich-sentiment-train.txt"), "r", 32 | 'utf-8') as ftrain: 33 | for line in ftrain: 34 | label, sentence = line.strip().split("\t") 35 | try: 36 | words = nltk.word_tokenize(sentence.lower()) 37 | except LookupError: 38 | print("Englisth tokenize does not downloaded. So download it.") 39 | nltk.download("punkt") 40 | words = nltk.word_tokenize(sentence.lower()) 41 | maxlen = max(maxlen, len(words)) 42 | for word in words: 43 | word_freqs[word] += 1 44 | num_recs += 1 45 | 46 | # Get some information about our corpus 47 | print(maxlen) # 42 48 | print(len(word_freqs)) # 2313 49 | 50 | # 1 is UNK, 0 is PAD 51 | # We take MAX_FEATURES-1 features to account for PAD 52 | vocab_size = min(MAX_FEATURES, len(word_freqs)) + 2 53 | word2index = {x[0]: i+2 for i, x in 54 | enumerate(word_freqs.most_common(MAX_FEATURES))} 55 | word2index["PAD"] = 0 56 | word2index["UNK"] = 1 57 | index2word = {v: k for k, v in word2index.items()} 58 | 59 | # convert sentences to sequences 60 | X = np.empty((num_recs, ), dtype=list) 61 | y = np.zeros((num_recs, )) 62 | i = 0 63 | with codecs.open(os.path.join(DATA_DIR, "umich-sentiment-train.txt"), 64 | 'r', 'utf-8') as ftrain: 65 | for line in ftrain: 66 | label, sentence = line.strip().split("\t") 67 | words = nltk.word_tokenize(sentence.lower()) 68 | seqs = [] 69 | for word in words: 70 | if word in word2index: 71 | seqs.append(word2index[word]) 72 | else: 73 | seqs.append(word2index["UNK"]) 74 | X[i] = seqs 75 | y[i] = int(label) 76 | i += 1 77 | 78 | # Pad the sequences (left padded with zeros) 79 | X = sequence.pad_sequences(X, maxlen=MAX_SENTENCE_LENGTH) 80 | 81 | # Split input into training and test 82 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, 83 | random_state=42) 84 | print(Xtrain.shape, Xtest.shape, ytrain.shape, ytest.shape) 85 | 86 | # Build model 87 | model = Sequential() 88 | model.add(Embedding(vocab_size, EMBEDDING_SIZE, 89 | input_length=MAX_SENTENCE_LENGTH)) 90 | model.add(Dropout(0.5)) 91 | model.add(LSTM(HIDDEN_LAYER_SIZE, dropout=0.5, recurrent_dropout=0.5)) 92 | model.add(Dense(1)) 93 | model.add(Activation("sigmoid")) 94 | 95 | model.compile(loss="binary_crossentropy", optimizer="adam", 96 | metrics=["accuracy"]) 97 | 98 | 99 | history = model.fit(Xtrain, ytrain, batch_size=BATCH_SIZE, 100 | epochs=NUM_EPOCHS, 101 | callbacks=[TensorBoard(LOG_DIR)], 102 | validation_data=(Xtest, ytest)) 103 | 104 | # evaluate 105 | score, acc = model.evaluate(Xtest, ytest, batch_size=BATCH_SIZE) 106 | print("Test score: {:.3f}, accuracy: {:.3f}".format(score, acc)) 107 | 108 | for i in range(5): 109 | idx = np.random.randint(len(Xtest)) 110 | xtest = Xtest[idx].reshape(1, 40) 111 | ylabel = ytest[idx] 112 | ypred = model.predict(xtest)[0][0] 113 | sent = " ".join([index2word[x] for x in xtest[0].tolist() if x != 0]) 114 | print("{:.0f}\t{:.0f}\t{}".format(ypred, ylabel, sent)) 115 | -------------------------------------------------------------------------------- /ch07/composite_qa_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tarfile 3 | from collections import Counter 4 | from urllib.request import urlretrieve 5 | import numpy as np 6 | from keras.layers import Input, add, concatenate, dot 7 | from keras.layers.core import Activation, Dense, Dropout, Permute 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from keras.models import Model 11 | from keras.preprocessing.text import text_to_word_sequence 12 | from keras.utils import to_categorical 13 | from keras.callbacks import TensorBoard 14 | 15 | 16 | class bAbI(): 17 | 18 | def __init__(self, use_10k=True, data_root="", padding="PAD"): 19 | self.url = "http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz" # noqa 20 | self.vocab = [] 21 | self.story_size = -1 22 | self.question_size = -1 23 | self.data_root = data_root 24 | self.use_10k = use_10k 25 | if not self.data_root: 26 | self.data_root = os.path.join(os.path.dirname(__file__), "data") 27 | self.PAD = padding 28 | 29 | @property 30 | def vocab_size(self): 31 | return len(self.vocab) 32 | 33 | @property 34 | def data_dir(self): 35 | _dir = "tasks_1-20_v1-2/" 36 | _dir += "en-10k" if self.use_10k else "en" 37 | return _dir 38 | 39 | def _get_location(self, kind="train"): 40 | file_name = "qa1_single-supporting-fact_{}.txt".format(kind.lower()) 41 | return self.data_dir + "/" + file_name 42 | 43 | def download(self): 44 | tar_file = os.path.basename(self.url) 45 | if os.path.exists(os.path.join(self.data_root, self.data_dir)): 46 | return 47 | if not os.path.exists(self.data_root): 48 | os.mkdir(self.data_root) 49 | 50 | file_path = os.path.join(self.data_root, tar_file) 51 | if not os.path.isfile(file_path): 52 | print("Download the bABI data...") 53 | urlretrieve(self.url, file_path) 54 | with tarfile.open(file_path, mode="r:gz") as gz: 55 | for kind in ["train", "test"]: 56 | target = self._get_location(kind) 57 | gz.extract(target, self.data_root) 58 | os.remove(file_path) 59 | 60 | def _read_qa(self, kind="train"): 61 | path = os.path.join(self.data_root, self._get_location(kind)) 62 | stories, questions, answers = [], [], [] 63 | with open(path, "r", encoding="utf-8") as f: 64 | story_lines = [] 65 | for line in f: 66 | line = line.strip() 67 | index, text = line.split(" ", 1) 68 | if "\t" in text: 69 | question, answer, _ = text.split("\t") 70 | stories.append(" ".join(story_lines)) 71 | questions.append(question.strip()) 72 | answers.append(answer.strip()) 73 | story_lines = [] 74 | else: 75 | story_lines.append(text) 76 | 77 | return stories, questions, answers 78 | 79 | def make_vocab(self): 80 | train_s, train_q, train_a = self._read_qa(kind="train") 81 | test_s, test_q, test_a = self._read_qa(kind="test") 82 | 83 | all_s = train_s + test_s 84 | all_q = train_q + test_q 85 | 86 | # Make vocabulary from all stories and questions 87 | words = [] 88 | for s, q in zip(all_s, all_q): 89 | s_words = self.tokenize(s) 90 | if len(s_words) > self.story_size: 91 | self.story_size = len(s_words) 92 | 93 | q_words = self.tokenize(q) 94 | if len(q_words) > self.question_size: 95 | self.question_size = len(q_words) 96 | 97 | words += s_words 98 | words += q_words 99 | 100 | word_count = Counter(words) 101 | words = [w_c[0] for w_c in word_count.most_common()] 102 | words.insert(0, self.PAD) # add pad 103 | self.vocab = words 104 | 105 | def tokenize(self, string): 106 | words = text_to_word_sequence(string, lower=True) 107 | return words 108 | 109 | def get_batch(self, kind="train"): 110 | if self.vocab_size == 0: 111 | self.make_vocab() 112 | stories, questions, answers = self._read_qa(kind) 113 | s_indices = [self.to_indices(s, self.story_size) for s in stories] 114 | q_indices = [self.to_indices(q, self.question_size) 115 | for q in questions] 116 | a_indices = [self.vocab.index(a) for a in answers] 117 | a_categorical = to_categorical(a_indices, num_classes=self.vocab_size) 118 | 119 | return np.array(s_indices), np.array(q_indices), a_categorical 120 | 121 | def to_indices(self, string, fit_length=-1): 122 | if self.vocab_size == 0: 123 | raise Exception("You have to execute make_vocab") 124 | words = self.tokenize(string) 125 | indices = [self.vocab.index(w) for w in words] 126 | if fit_length > 0: 127 | indices = indices[:fit_length] 128 | pad_size = fit_length - len(indices) 129 | if pad_size > 0: 130 | indices += [self.vocab.index(self.PAD)] * pad_size 131 | return indices 132 | 133 | def to_string(self, indices): 134 | words = [self.vocab[i] for i in indices] 135 | string = " ".join([w for w in words if w != self.PAD]) 136 | return string 137 | 138 | 139 | def make_model(story_size, question_size, vocab_size, 140 | embedding_size=64, latent_size=32, drop_rate=0.3): 141 | story_input = Input(shape=(story_size,)) 142 | question_input = Input(shape=(question_size,)) 143 | 144 | story_embed_for_a = Embedding( 145 | input_dim=vocab_size, 146 | output_dim=embedding_size, 147 | input_length=story_size) 148 | question_embed = Embedding( 149 | input_dim=vocab_size, 150 | output_dim=embedding_size, 151 | input_length=question_size) 152 | story_encoder_for_a = Dropout(drop_rate)(story_embed_for_a(story_input)) 153 | question_encoder = Dropout(drop_rate)(question_embed(question_input)) 154 | 155 | # match story & question along seq_size to make attention on story 156 | # (axes=[batch, seq_size, embed_size] after encoding) 157 | match = dot([story_encoder_for_a, question_encoder], axes=[2, 2]) 158 | 159 | story_embed_for_c = Embedding( 160 | input_dim=vocab_size, 161 | output_dim=question_size, 162 | input_length=story_size 163 | ) 164 | story_encoder_for_c = Dropout(drop_rate)(story_embed_for_c(story_input)) 165 | 166 | # merge match and story context 167 | response = add([match, story_encoder_for_c]) 168 | # (question_size x story_size) => (story_size x question_size) 169 | response = Permute((2, 1))(response) 170 | 171 | answer = concatenate([response, question_encoder], axis=-1) 172 | answer = LSTM(latent_size)(answer) 173 | answer = Dropout(drop_rate)(answer) 174 | answer = Dense(vocab_size)(answer) 175 | output = Activation("softmax")(answer) 176 | model = Model(inputs=[story_input, question_input], outputs=output) 177 | 178 | return model 179 | 180 | 181 | def main(batch_size, epochs, show_result_count): 182 | log_dir = os.path.join(os.path.dirname(__file__), "logs") 183 | if not os.path.exists(log_dir): 184 | os.mkdir(log_dir) 185 | corpus = bAbI() 186 | corpus.download() 187 | corpus.make_vocab() 188 | train_s, train_q, train_a = corpus.get_batch(kind="train") 189 | test_s, test_q, test_a = corpus.get_batch(kind="test") 190 | print("{} train data, {} test data.".format(len(train_s), len(test_s))) 191 | print("vocab size is {}.".format(corpus.vocab_size)) 192 | 193 | model = make_model( 194 | corpus.story_size, corpus.question_size, corpus.vocab_size) 195 | 196 | # train the model 197 | model.compile(optimizer="rmsprop", loss="categorical_crossentropy", 198 | metrics=["accuracy"]) 199 | model.fit([train_s, train_q], [train_a], 200 | validation_data=([test_s, test_q], [test_a]), 201 | batch_size=batch_size, epochs=epochs, 202 | callbacks=[TensorBoard(log_dir=log_dir)] 203 | ) 204 | 205 | answer = np.argmax(test_a, axis=1) 206 | predicted = model.predict([test_s, test_q]) 207 | predicted = np.argmax(predicted, axis=1) 208 | 209 | for i in range(show_result_count): 210 | story = corpus.to_string(test_s[i].tolist()) 211 | question = corpus.to_string(test_q[i].tolist()) 212 | a = corpus.to_string([answer[i]]) 213 | p = corpus.to_string([predicted[i]]) 214 | ox = "o" if a == p else "x" 215 | print(story + "\n", question + "\n", 216 | "{} True: {}, Predicted: {}".format(ox, a, p)) 217 | 218 | 219 | if __name__ == "__main__": 220 | main(batch_size=64, epochs=50, show_result_count=10) 221 | -------------------------------------------------------------------------------- /ch07/custom_layer_lambda.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | from keras.layers import Input, Lambda 4 | from keras.models import Model 5 | 6 | 7 | def euclidean_distance(vecs): 8 | x, y = vecs 9 | return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True)) 10 | 11 | 12 | def euclidean_distance_output_shape(input_shapes): 13 | shape1, shape2 = input_shapes 14 | assert shape1 == shape2 # shape have to be equal 15 | return (shape1[0], 1) 16 | 17 | 18 | def measure_model(row_count=4, vec_size=3): 19 | left = Input(shape=(vec_size,)) 20 | right = Input(shape=(vec_size,)) 21 | 22 | distance = Lambda(euclidean_distance, 23 | output_shape=euclidean_distance_output_shape 24 | )([left, right]) 25 | model = Model([left, right], distance) 26 | 27 | size = row_count * vec_size 28 | left_mx = np.random.randint(9, size=size).reshape((row_count, vec_size)) 29 | right_mx = np.random.randint(9, size=size).reshape((row_count, vec_size)) 30 | 31 | output = model.predict([left_mx, right_mx]) 32 | print("Distance between\n {} \nand\n {} \nis\n {}".format( 33 | left_mx, right_mx, output 34 | )) 35 | 36 | 37 | if __name__ == "__main__": 38 | measure_model() 39 | 40 | -------------------------------------------------------------------------------- /ch07/custom_layer_normalize.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.engine.topology import Layer 3 | from keras.layers.core import Dropout, Reshape 4 | from keras.layers.convolutional import ZeroPadding2D 5 | from keras.models import Sequential 6 | import numpy as np 7 | 8 | 9 | def test_layer(layer, x): 10 | # Adjust layer input_shape to x.shape 11 | layer_config = layer.get_config() 12 | layer_config["input_shape"] = x.shape 13 | layer = layer.__class__.from_config(layer_config) 14 | model = Sequential() 15 | model.add(layer) 16 | # 1. Test building the computation graph process 17 | model.compile("rmsprop", "mse") 18 | _x = np.expand_dims(x, axis=0) # Add dimension for batch size 19 | 20 | # 2. Test run the graph process 21 | return model.predict(_x)[0] 22 | 23 | 24 | class LocalResponseNormalization(Layer): 25 | 26 | def __init__(self, n=5, alpha=0.0005, beta=0.75, k=2, **kwargs): 27 | self.n = n 28 | self.alpha = alpha 29 | self.beta = beta 30 | self.k = k 31 | super(LocalResponseNormalization, self).__init__(**kwargs) 32 | 33 | def build(self, input_shape): 34 | # In this layer, no trainable weight is used. 35 | super(LocalResponseNormalization, self).build(input_shape) 36 | 37 | def call(self, x): 38 | squared = K.square(x) 39 | # WITHIN_CHANNEL Normalization 40 | average = K.pool2d(squared, (self.n, self.n), strides=(1, 1), 41 | padding="same", pool_mode="avg") 42 | denom = K.pow(self.k + self.alpha * average, self.beta) 43 | return x / denom 44 | 45 | def compute_output_shape(self, input_shape): 46 | return input_shape 47 | 48 | 49 | # test the test harness 50 | x = np.random.randn(10, 10) 51 | layer = Dropout(0.5) 52 | y = test_layer(layer, x) 53 | assert(x.shape == y.shape) 54 | 55 | x = np.random.randn(10, 10, 3) 56 | layer = ZeroPadding2D(padding=(1, 1)) 57 | y = test_layer(layer, x) 58 | assert(x.shape[0] + 2 == y.shape[0]) 59 | assert(x.shape[1] + 2 == y.shape[1]) 60 | 61 | x = np.random.randn(10, 10) 62 | layer = Reshape((5, 20)) 63 | y = test_layer(layer, x) 64 | assert(y.shape == (5, 20)) 65 | 66 | # test custom layer 67 | x = np.random.randn(225, 225, 3) 68 | layer = LocalResponseNormalization() 69 | y = test_layer(layer, x) 70 | assert(x.shape == y.shape) 71 | -------------------------------------------------------------------------------- /ch07/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch07/data/.gitkeep -------------------------------------------------------------------------------- /ch07/deep_dream.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from urllib.request import urlretrieve 4 | from keras.preprocessing.image import load_img, img_to_array 5 | from keras import backend as K 6 | from keras.applications import vgg16 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | 11 | def preprocess_image(img_path): 12 | img = load_img(img_path) 13 | img = img_to_array(img) 14 | img = np.expand_dims(img, axis=0) 15 | img = vgg16.preprocess_input(img) 16 | return img 17 | 18 | 19 | def deprocess_image(x, gradient=False): 20 | img = x.copy() 21 | # Util function to convert a tensor into a valid image. 22 | img = img.reshape(x.shape[1], x.shape[2], x.shape[3]) # H, W, C 23 | if gradient: 24 | img = (img - img.mean()) * 255 / img.std() 25 | else: 26 | # Remove zero-center by mean pixel 27 | img[:, :, 0] += 103.939 28 | img[:, :, 1] += 116.779 29 | img[:, :, 2] += 123.68 30 | 31 | # 'BGR'->'RGB' 32 | img = img[:, :, ::-1] 33 | img = np.clip(img, 0, 255).astype("uint8") 34 | return img 35 | 36 | 37 | def main(image_path, num_pool_layers=5, iter_count=3, learning_rate=10, 38 | show_rand=False): 39 | image = preprocess_image(image_path) 40 | model = vgg16.VGG16(weights="imagenet", include_top=False) 41 | layer_dict = dict([(layer.name, layer) for layer in model.layers]) 42 | 43 | fig = plt.figure(figsize=(17, 8)) 44 | dream = model.input 45 | for i in range(num_pool_layers): 46 | _image = image.copy() 47 | rand_input = np.random.randint( 48 | 100, 150, size=_image.shape, dtype=np.uint8) 49 | layer_name = "block{:d}_pool".format(i + 1) 50 | layer_output = layer_dict[layer_name].output 51 | loss = K.mean(layer_output) 52 | grads = K.gradients(loss, dream)[0] 53 | grads /= K.maximum(K.mean(K.abs(grads)), 1e-5) # normalize grad 54 | converter = K.function([dream], [loss, grads]) 55 | 56 | grad_sum = None 57 | for j in range(iter_count): 58 | _loss_value, _grads_value = converter([_image]) 59 | _image += _grads_value * learning_rate # gradient "ascent" 60 | if show_rand: 61 | _, _grads_value = converter([rand_input]) 62 | if grad_sum is None: 63 | grad_sum = _grads_value 64 | else: 65 | grad_sum += _grads_value 66 | grad_mean = grad_sum / iter_count 67 | 68 | ax = plt.subplot(2, num_pool_layers, i + 1) 69 | ax.imshow(deprocess_image(_image)) 70 | ax.axis("off") 71 | ax.set_title("dream from {}".format(layer_name)) 72 | 73 | ax = plt.subplot(2, num_pool_layers, num_pool_layers + i + 1) 74 | ax.imshow(deprocess_image(grad_mean, gradient=True)) 75 | ax.axis("off") 76 | ax.set_title("{}'s gradient".format(layer_name)) 77 | 78 | plt.tight_layout() 79 | dir_name, file_name = os.path.split(image_path) 80 | file_root, ext = os.path.splitext(file_name) 81 | plt.savefig(os.path.join(dir_name, file_root + "_deep_dream.png")) 82 | plt.show() 83 | 84 | 85 | if __name__ == "__main__": 86 | # cat image url 87 | image_url = "http://farm2.static.flickr.com/1200/525304657_c59f741aac.jpg" 88 | data_path = os.path.join(os.path.dirname(__file__), "data/cat.jpg") 89 | urlretrieve(image_url, data_path) 90 | if len(sys.argv) > 1 and sys.argv[1] == "--rand": 91 | main(data_path, show_rand=True) 92 | else: 93 | main(data_path) 94 | -------------------------------------------------------------------------------- /ch07/functional_api.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Input, Dense, Activation 2 | from keras.models import Model 3 | 4 | x = Input(shape=(784,)) 5 | 6 | g = Dense(32) # 1 7 | s_2 = Activation("sigmoid") # 2 8 | f = Dense(10) # 3 9 | s_K = Activation("softmax") # 4 10 | y = s_K(f(s_2(g(x)))) 11 | 12 | model = Model(inputs=x, outputs=y) 13 | model.compile(loss="categorical_crossentropy", optimizer="adam") -------------------------------------------------------------------------------- /ch07/lstm_autoencoder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from zipfile import ZipFile 4 | from urllib.request import urlretrieve 5 | from collections import Counter 6 | import re 7 | import numpy as np 8 | import nltk 9 | from nltk.corpus import reuters 10 | from nltk.corpus import stopwords 11 | from keras.layers import Input, LSTM, Bidirectional, RepeatVector 12 | from keras.models import Model 13 | from keras.callbacks import TensorBoard, ModelCheckpoint 14 | from keras.models import load_model 15 | 16 | 17 | class ReutersCorpus(): 18 | 19 | def __init__(self, padding="PAD", unknown="UNK"): 20 | self.documents = [] 21 | self.stopwords = [] 22 | self.vocab = [] 23 | self._ignores = re.compile("[.,-/\"'>()&;:]") 24 | self.PAD = padding 25 | self.UNK = unknown 26 | try: 27 | self.documents = reuters.fileids() 28 | except LookupError: 29 | print("Reuters corpus does not downloaded. So download it.") 30 | nltk.download("reuters") 31 | self.documents = reuters.fileids() 32 | 33 | try: 34 | self.stopwords = stopwords.words("english") 35 | except LookupError: 36 | print("Englisth stopword does not downloaded. So download it.") 37 | nltk.download("stopwords") 38 | self.stopwords = stopwords.words("english") 39 | 40 | def build(self, vocab_size=5000): 41 | words = reuters.words() 42 | words = [self.trim(w) for w in words] 43 | words = [w for w in words if w] 44 | freq = Counter(words) 45 | freq = freq.most_common(vocab_size) 46 | self.vocab = [w_c[0] for w_c in freq] 47 | self.vocab = [self.PAD, self.UNK] + self.vocab 48 | 49 | def trim(self, word): 50 | w = word.lower().strip() 51 | if w in self.stopwords or self._ignores.match(w): 52 | return "" 53 | if w.replace(".", "").isdigit(): 54 | return "9" 55 | return w 56 | 57 | def batch_iter(self, embedding, kind="train", batch_size=64, seq_size=50): 58 | if len(self.vocab) == 0: 59 | raise Exception( 60 | "Vocabulary hasn't made yet. Please execute 'build' method." 61 | ) 62 | 63 | steps = self.get_step_count(kind, batch_size) 64 | docs = self.get_documents(kind) 65 | docs_i = self.docs_to_matrix(docs, seq_size) 66 | docs = None # free memory 67 | 68 | while True: 69 | indices = np.random.permutation(np.arange(len(docs_i))) 70 | for s in range(steps): 71 | index = s * batch_size 72 | x = docs_i[indices[index:(index + batch_size)]] 73 | x_vec = embedding[x] 74 | # input = output 75 | yield x_vec, x_vec 76 | 77 | def docs_to_matrix(self, docs, seq_size): 78 | docs_i = [] 79 | for d in docs: 80 | words = reuters.words(d) 81 | words = self.sentence_to_ids(words, seq_size) 82 | docs_i.append(words) 83 | docs_i = np.array(docs_i) 84 | return docs_i 85 | 86 | def sentence_to_ids(self, sentence, seq_size): 87 | v = self.vocab 88 | UNK = v.index(self.UNK) 89 | PAD = v.index(self.PAD) 90 | words = [self.trim(w) for w in sentence][:seq_size] 91 | words = [v.index(w) if w in v else UNK for w in words if w] 92 | if len(words) < seq_size: 93 | words += [PAD] * (seq_size - len(words)) 94 | return words 95 | 96 | def get_step_count(self, kind="train", batch_size=64): 97 | size = len(self.get_documents(kind)) 98 | return size // batch_size 99 | 100 | def get_documents(self, kind="train"): 101 | docs = list(filter(lambda doc: doc.startswith(kind), self.documents)) 102 | return docs 103 | 104 | 105 | class EmbeddingLoader(): 106 | 107 | def __init__(self, embed_dir="", size=100): 108 | self.embed_dir = embed_dir 109 | self.size = size 110 | if not self.embed_dir: 111 | self.embed_dir = os.path.join(os.path.dirname(__file__), "embed") 112 | 113 | def load(self, seq_size, corpus, download=True): 114 | url = "http://nlp.stanford.edu/data/wordvecs/glove.6B.zip" 115 | embed_name = "glove.6B.{}d.txt".format(self.size) 116 | embed_path = os.path.join(self.embed_dir, embed_name) 117 | if not os.path.isfile(embed_path): 118 | if not download: 119 | raise Exception( 120 | "Can't load embedding from {}.".format(embed_path) 121 | ) 122 | else: 123 | print("Download the GloVe embedding.") 124 | file_name = os.path.basename(url) 125 | if not os.path.isdir(self.embed_dir): 126 | os.mkdir(self.embed_dir) 127 | zip_path = os.path.join(self.embed_dir, file_name) 128 | urlretrieve(url, zip_path) 129 | with ZipFile(zip_path) as z: 130 | z.extractall(self.embed_dir) 131 | os.remove(zip_path) 132 | 133 | vocab = corpus.vocab 134 | if len(vocab) == 0: 135 | raise Exception("You have to make vocab by 'build' method.") 136 | embed_matrix = np.zeros((len(vocab), self.size)) 137 | UNK = vocab.index(corpus.UNK) 138 | with open(embed_path, mode="r", encoding="utf-8") as f: 139 | for line in f: 140 | values = line.strip().split() 141 | word = values[0].strip() 142 | vector = np.asarray(values[1:], dtype="float32") 143 | if word in vocab: 144 | index = vocab.index(word) 145 | embed_matrix[index] = vector 146 | embed_matrix[UNK] = np.random.uniform(-1, 1, self.size) 147 | return embed_matrix 148 | 149 | 150 | class AutoEncoder(): 151 | 152 | def __init__(self, seq_size=50, embed_size=100, latent_size=256): 153 | self.seq_size = seq_size 154 | self.embed_size = embed_size 155 | self.latent_size = latent_size 156 | self.model = None 157 | 158 | def build(self): 159 | inputs = Input(shape=(self.seq_size, self.embed_size), name="input") 160 | encoded = Bidirectional( 161 | LSTM(self.latent_size), 162 | merge_mode="concat", name="encoder")(inputs) 163 | encoded = RepeatVector(self.seq_size, name="replicate")(encoded) 164 | decoded = Bidirectional( 165 | LSTM(self.embed_size, return_sequences=True), 166 | merge_mode="sum", name="decoder")(encoded) 167 | 168 | self.model = Model(inputs, decoded) 169 | 170 | @classmethod 171 | def load(cls, path): 172 | model = load_model(path) 173 | _, seq_size, embed_size = model.input.shape # top is batch size 174 | latent_size = model.get_layer("encoder").input_shape[1] 175 | ae = AutoEncoder(seq_size, embed_size, latent_size) 176 | ae.model = model 177 | return ae 178 | 179 | def get_encoder(self): 180 | if self.model: 181 | m = self.model 182 | encoder = Model(m.input, m.get_layer("encoder").output) 183 | return encoder 184 | else: 185 | raise Exception("Model is not built/loaded") 186 | 187 | 188 | def main(log_dir, model_name="autoencoder.h5"): 189 | print("1. Prepare the corpus.") 190 | corpus = ReutersCorpus() 191 | corpus.build(vocab_size=5000) 192 | 193 | print("2. Make autoencoder model.") 194 | ae = AutoEncoder(seq_size=50, embed_size=100, latent_size=512) 195 | ae.build() 196 | 197 | print("3. Load GloVe embeddings.") 198 | embed_loader = EmbeddingLoader(size=ae.embed_size) 199 | embedding = embed_loader.load(ae.seq_size, corpus) 200 | 201 | print("4. Train the model (trained model is saved to {}).".format(log_dir)) 202 | batch_size = 64 203 | ae.model.compile(optimizer="sgd", loss="mse") 204 | model_file = os.path.join(log_dir, model_name) 205 | train_iter = corpus.batch_iter(embedding, "train", batch_size, ae.seq_size) 206 | test_iter = corpus.batch_iter(embedding, "test", batch_size, ae.seq_size) 207 | train_steps = corpus.get_step_count("train", batch_size) 208 | test_steps = corpus.get_step_count("test", batch_size) 209 | 210 | ae.model.fit_generator( 211 | train_iter, train_steps, 212 | epochs=20, 213 | validation_data=test_iter, 214 | validation_steps=test_steps, 215 | callbacks=[ 216 | TensorBoard(log_dir=log_dir), 217 | ModelCheckpoint(filepath=model_file, save_best_only=True) 218 | ] 219 | ) 220 | 221 | 222 | def predict(log_dir, model_name="autoencoder.h5"): 223 | print("1. Load the trained model.") 224 | model_file = os.path.join(log_dir, model_name) 225 | ae = AutoEncoder.load(model_file) 226 | 227 | print("2. Prepare the corpus.") 228 | corpus = ReutersCorpus() 229 | test_docs = corpus.get_documents("test") 230 | labels = [reuters.categories(f)[0] for f in test_docs] 231 | categories = Counter(labels).most_common() 232 | # Use categories that has more than 30 documents 233 | categories = [c[0] for c in categories if c[1] > 50] 234 | filtered = [i for i, lb in enumerate(labels) if lb in categories] 235 | labels = [categories.index(labels[i]) for i in filtered] 236 | test_docs = [test_docs[i] for i in filtered] 237 | corpus.build(vocab_size=5000) 238 | 239 | print("3. Load GloVe embeddings.") 240 | embed_loader = EmbeddingLoader(size=ae.embed_size) 241 | embedding = embed_loader.load(ae.seq_size, corpus) 242 | 243 | print("4. Use model's encoder to classify the documents.") 244 | from sklearn.cluster import KMeans 245 | docs = corpus.docs_to_matrix(test_docs, ae.seq_size) 246 | doc_vecs = embedding[docs] 247 | features = ae.get_encoder().predict(doc_vecs) 248 | clf = KMeans(n_clusters=len(categories)) 249 | clf.fit(features) 250 | ae_dist = clf.inertia_ 251 | 252 | from sklearn.feature_extraction.text import CountVectorizer 253 | test_doc_words = [" ".join(reuters.words(d)) for d in test_docs] 254 | vectorizer = CountVectorizer(vocabulary=corpus.vocab) 255 | c_features = vectorizer.fit_transform(test_doc_words) 256 | clf.fit(c_features) 257 | cnt_dist = clf.inertia_ 258 | print(" Sum of distances^2 of samples to their closest center is") 259 | print(" Autoencoder: {}".format(ae_dist)) 260 | print(" Word count base: {}".format(cnt_dist)) 261 | 262 | 263 | if __name__ == "__main__": 264 | parser = argparse.ArgumentParser( 265 | description="Try text autoencoder by reuters corpus") 266 | parser.add_argument( 267 | "--predict", action="store_const", const=True, default=False, 268 | help="Classify the sentences by trained model") 269 | 270 | args = parser.parse_args() 271 | log_dir = os.path.join(os.path.dirname(__file__), "logs") 272 | if args.predict: 273 | predict(log_dir) 274 | else: 275 | main(log_dir) 276 | -------------------------------------------------------------------------------- /ch07/regression_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request 3 | from zipfile import ZipFile 4 | from keras.layers import Input 5 | from keras.layers.core import Dense 6 | from keras.models import Model 7 | from keras.callbacks import TensorBoard 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.model_selection import train_test_split 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | 14 | def download_data(): 15 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00360/" 16 | zip_file = "AirQualityUCI.zip" 17 | file_name = "AirQualityUCI.csv" 18 | data_root = os.path.join(os.path.dirname(__file__), "data") 19 | file_path = os.path.join(data_root, file_name) 20 | 21 | if not os.path.isfile(file_path): 22 | print("Download the data for regression...") 23 | url += zip_file 24 | zip_path = os.path.join(data_root, zip_file) 25 | urllib.request.urlretrieve(url, zip_path) 26 | with ZipFile(zip_path) as z: 27 | z.extract(file_name, data_root) 28 | os.remove(zip_path) 29 | 30 | return file_path 31 | 32 | 33 | def load_dataset(file_path): 34 | dataset = pd.read_csv(file_path, sep=";", decimal=",") 35 | 36 | # Drop nameless columns 37 | unnamed = [c for c in dataset.columns if "Unnamed" in c] 38 | dataset.drop(unnamed, axis=1, inplace=True) 39 | 40 | # Drop unused columns 41 | dataset.drop(["Date", "Time"], axis=1, inplace=True) 42 | 43 | # Fill NaN by its column mean 44 | dataset.fillna(dataset.mean(), inplace=True) 45 | 46 | # Separate the data to label and features 47 | X = dataset.drop(["C6H6(GT)"], axis=1).values 48 | y = dataset["C6H6(GT)"].values.reshape(-1, 1) # get benzene values 49 | return X, y 50 | 51 | 52 | def make_model(input_size): 53 | inputs = Input(shape=(input_size,)) 54 | hidden = Dense(8, activation="relu", kernel_initializer="glorot_uniform") 55 | output = Dense(1, kernel_initializer="glorot_uniform") 56 | 57 | pred = output(hidden(inputs)) 58 | model = Model(inputs=[inputs], outputs=[pred]) 59 | return model 60 | 61 | 62 | def main(): 63 | file_path = download_data() 64 | X, y = load_dataset(file_path) 65 | 66 | # Normalize the numerical values 67 | yScaler = StandardScaler() 68 | xScaler = StandardScaler() 69 | y = yScaler.fit_transform(y) 70 | X = xScaler.fit_transform(X) 71 | 72 | # Split the data to train and test 73 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 74 | 75 | # Make model 76 | input_size = X.shape[1] # number of features 77 | model = make_model(input_size) 78 | 79 | # Train model 80 | log_dir = os.path.join(os.path.dirname(__file__), "logs") 81 | NUM_EPOCHS = 20 82 | BATCH_SIZE = 10 83 | model.compile(loss="mse", optimizer="adam") 84 | model.fit( 85 | X_train, y_train, 86 | batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, 87 | validation_split=0.2, 88 | callbacks=[TensorBoard(log_dir=log_dir)]) 89 | 90 | # Make prediction 91 | y_pred = model.predict(X_test) 92 | 93 | # Show prediction 94 | y_pred = yScaler.inverse_transform(y_pred) 95 | y_test = yScaler.inverse_transform(y_test) 96 | result = pd.DataFrame({ 97 | "prediction": pd.Series(y_pred.flatten()), 98 | "actual": pd.Series(y_test.flatten()) 99 | }) 100 | 101 | fig, ax = plt.subplots(nrows=2) 102 | ax0 = result.plot.line(ax=ax[0]) 103 | ax0.set(xlabel="time", ylabel="C6H6 concentrations") 104 | diff = result["prediction"].subtract(result["actual"]) 105 | ax1 = diff.plot.line(ax=ax[1], colormap="Accent") 106 | ax1.set(xlabel="time", ylabel="difference") 107 | plt.tight_layout() 108 | plt.show() 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /ch07/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | h5py==2.7.1 4 | matplotlib==2.1.1 5 | scikit-learn==0.19.1 6 | pandas==0.22.0 7 | nltk==3.2.5 8 | Pillow==4.3.0 9 | -------------------------------------------------------------------------------- /ch07/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow-gpu==1.8.0 3 | h5py==2.7.1 4 | matplotlib==2.1.1 5 | scikit-learn==0.19.1 6 | pandas==0.22.0 7 | nltk==3.2.5 8 | Pillow==4.3.0 9 | -------------------------------------------------------------------------------- /ch07/style_transfer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.request import urlretrieve 3 | from keras.preprocessing.image import load_img, img_to_array 4 | from keras.applications import vgg19 5 | from keras import backend as K 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | 10 | class TransferDefinition(): 11 | 12 | def __init__(self, content_image_path, style_image_path, img_nrows=400): 13 | self.width, self.height = load_img(content_image_path).size 14 | self.img_nrows = img_nrows 15 | self.img_ncols = int(self.width * self.img_nrows / self.height) 16 | 17 | def preprocess_image(self, image_path): 18 | img = load_img(image_path, target_size=(self.img_nrows, self.img_ncols)) 19 | img = img_to_array(img) 20 | img = np.expand_dims(img, axis=0) 21 | img = vgg19.preprocess_input(img) 22 | return img 23 | 24 | def deprocess_image(self, x): 25 | img = x.copy() 26 | img = img.reshape(self.img_nrows, self.img_ncols, 3) 27 | # Remove zero-center by mean pixel 28 | img[:, :, 0] += 103.939 29 | img[:, :, 1] += 116.779 30 | img[:, :, 2] += 123.68 31 | # "BGR"->"RGB" 32 | img = img[:, :, ::-1] 33 | img = np.clip(img, 0, 255).astype("uint8") 34 | return img 35 | 36 | 37 | def gram_matrix(x): 38 | assert K.ndim(x) == 3 39 | features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) 40 | gram = K.dot(features, K.transpose(features)) 41 | return gram 42 | 43 | 44 | def content_loss(content, combination): 45 | return K.sum(K.square(combination - content)) 46 | 47 | 48 | def style_loss(tdef, style, combination): 49 | assert K.ndim(style) == 3 50 | assert K.ndim(combination) == 3 51 | S = gram_matrix(style) 52 | C = gram_matrix(combination) 53 | channels = 3 54 | size = tdef.img_nrows * tdef.img_ncols 55 | return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2)) 56 | 57 | 58 | def total_variation_loss(tdef, x): 59 | assert K.ndim(x) == 4 60 | a = K.square(x[:, :tdef.img_nrows - 1, :tdef.img_ncols - 1, :] - x[:, 1:, :tdef.img_ncols - 1, :]) # noqa 61 | b = K.square(x[:, :tdef.img_nrows - 1, :tdef.img_ncols - 1, :] - x[:, :tdef.img_nrows - 1, 1:, :]) # noqa 62 | return K.sum(K.pow(a + b, 1.25)) 63 | 64 | 65 | def main(content_image_path, style_image_path, iter_count=10, 66 | content_weight=1.0, style_weight=0.1, total_variation_weight=0.001, 67 | learning_rate=0.001): 68 | tdef = TransferDefinition(content_image_path, style_image_path) 69 | 70 | # inputs 71 | content_image = K.variable(tdef.preprocess_image(content_image_path)) 72 | style_image = K.variable(tdef.preprocess_image(style_image_path)) 73 | # generated image 74 | combination_image = K.placeholder((1, tdef.img_nrows, tdef.img_ncols, 3)) 75 | input_tensor = K.concatenate([content_image, 76 | style_image, 77 | combination_image], axis=0) 78 | 79 | # load pre-trained model 80 | model = vgg19.VGG19(input_tensor=input_tensor, 81 | weights="imagenet", include_top=False) 82 | outputs_dict = dict([(layer.name, layer.output) 83 | for layer in model.layers]) 84 | 85 | # define loss 86 | loss = K.variable(0.) 87 | feature_map = outputs_dict["block5_conv2"] 88 | feature_of_content = feature_map[0, :, :, :] 89 | feature_of_combination = feature_map[2, :, :, :] 90 | 91 | loss += content_weight * content_loss( 92 | feature_of_content, 93 | feature_of_combination) 94 | 95 | feature_layers = ["block1_conv1", "block2_conv1", 96 | "block3_conv1", "block4_conv1", 97 | "block5_conv1"] 98 | 99 | for layer_name in feature_layers: 100 | feature_map = outputs_dict[layer_name] 101 | feature_of_style = feature_map[1, :, :, :] 102 | feature_of_combination = feature_map[2, :, :, :] 103 | sl = style_loss(tdef, feature_of_style, feature_of_combination) 104 | loss += (style_weight / len(feature_layers)) * sl 105 | 106 | loss += total_variation_weight * total_variation_loss(tdef, combination_image) # noqa 107 | grads = K.gradients(loss, combination_image)[0] 108 | style_transfer = K.function([combination_image], [loss, grads]) 109 | 110 | image = tdef.preprocess_image(content_image_path) 111 | for i in range(iter_count): 112 | print("Start of iteration {}".format(i + 1)) 113 | loss_value, grad_values = style_transfer([image]) 114 | image -= grad_values * learning_rate 115 | 116 | fig = plt.figure(figsize=(10, 5)) 117 | for kind in ["original", "style", "styled"]: 118 | if kind == "original": 119 | img = load_img(content_image_path, 120 | target_size=(tdef.img_nrows, tdef.img_ncols)) 121 | ax = plt.subplot(1, 3, 1) 122 | elif kind == "style": 123 | img = load_img(style_image_path, 124 | target_size=(tdef.img_nrows, tdef.img_ncols)) 125 | ax = plt.subplot(1, 3, 2) 126 | elif kind == "styled": 127 | img = tdef.deprocess_image(image) 128 | ax = plt.subplot(1, 3, 3) 129 | ax.set_title(kind) 130 | ax.imshow(img) 131 | ax.axis("off") 132 | 133 | plt.tight_layout() 134 | dir_name, file_name = os.path.split(content_image_path) 135 | file_root, ext = os.path.splitext(file_name) 136 | plt.savefig(os.path.join(dir_name, file_root + "_styled.png")) 137 | plt.show() 138 | 139 | 140 | if __name__ == "__main__": 141 | image_url = "http://farm2.static.flickr.com/1200/525304657_c59f741aac.jpg" 142 | content_path = os.path.join(os.path.dirname(__file__), "data/content.jpg") 143 | urlretrieve(image_url, content_path) 144 | 145 | image_url = "https://upload.wikimedia.org/wikipedia/commons/e/ed/Cats_forming_the_caracters_for_catfish.jpg" # noqa 146 | style_path = os.path.join(os.path.dirname(__file__), "data/style.jpg") 147 | urlretrieve(image_url, style_path) 148 | 149 | main(content_path, style_path) 150 | -------------------------------------------------------------------------------- /ch08/model/agent_network.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch08/model/agent_network.h5 -------------------------------------------------------------------------------- /ch08/model/events.out.tfevents.1505884941.smap6.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/ch08/model/events.out.tfevents.1505884941.smap6.local -------------------------------------------------------------------------------- /ch08/play_rl_network.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from keras.models import load_model 4 | from PIL import Image 5 | import numpy as np 6 | import gym 7 | from gym import wrappers 8 | import gym_ple # noqa 9 | 10 | 11 | class AgentProxy(object): 12 | INPUT_SHAPE = (80, 80, 4) 13 | 14 | def __init__(self, model_path): 15 | self.model = load_model(model_path) 16 | 17 | def evaluate(self, state): 18 | _state = np.expand_dims(state, axis=0) # add batch size dimension 19 | return self.model.predict(_state)[0] 20 | 21 | def act(self, state): 22 | q = self.evaluate(state) 23 | a = np.argmax(q) 24 | return a 25 | 26 | 27 | class Observer(object): 28 | 29 | def __init__(self, input_shape): 30 | self.size = input_shape[:2] # width x height 31 | self.num_frames = input_shape[2] # number of frames 32 | self._frames = [] 33 | 34 | def observe(self, state): 35 | g_state = Image.fromarray(state).convert("L") # to gray scale 36 | g_state = g_state.resize(self.size) # resize game screen to input size 37 | g_state = np.array(g_state).astype("float") 38 | g_state /= 255 # scale to 0~1 39 | if len(self._frames) == 0: 40 | # full fill the frame cache 41 | self._frames = [g_state] * self.num_frames 42 | else: 43 | self._frames.append(g_state) 44 | self._frames.pop(0) # remove most old state 45 | 46 | input_state = np.array(self._frames) 47 | # change frame_num x width x height => width x height x frame_num 48 | input_state = np.transpose(input_state, (1, 2, 0)) 49 | return input_state 50 | 51 | 52 | def play(epochs): 53 | model_file = "model/agent_network.h5" 54 | model_path = os.path.join(os.path.dirname(__file__), model_file) 55 | if not os.path.isfile(model_path): 56 | raise Exception( 57 | "Agent Network does not exist at {}).".format(model_file) 58 | ) 59 | 60 | movie_dir = os.path.join(os.path.dirname(__file__), "movie") 61 | 62 | agent = AgentProxy(model_path) 63 | observer = Observer(agent.INPUT_SHAPE) 64 | 65 | env = gym.make("Catcher-v0") 66 | env = wrappers.Monitor(env, directory=movie_dir, force=True) 67 | 68 | for e in range(epochs): 69 | rewards = [] 70 | initial_state = env.reset() 71 | state = observer.observe(initial_state) 72 | game_over = False 73 | 74 | # let's play the game 75 | while not game_over: 76 | env.render() 77 | action = agent.act(state) 78 | next_state, reward, game_over, info = env.step(action) 79 | next_state = observer.observe(next_state) 80 | rewards.append(reward) 81 | state = next_state 82 | 83 | score = sum(rewards) 84 | print("Game: {}/{} | Score: {}".format(e, epochs, score)) 85 | 86 | env.close() 87 | 88 | 89 | if __name__ == "__main__": 90 | epochs = 10 if len(sys.argv) < 2 else int(sys.argv[1]) 91 | play(epochs) 92 | -------------------------------------------------------------------------------- /ch08/requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow==1.8.0 3 | gym==0.10.5 4 | pygame==1.9.3 5 | h5py==2.7.1 6 | doom_py==0.0.15 -------------------------------------------------------------------------------- /ch08/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.6 2 | tensorflow-gpu==1.8.0 3 | gym==0.10.5 4 | pygame==1.9.3 5 | h5py==2.7.1 6 | doom_py==0.0.15 7 | -------------------------------------------------------------------------------- /ch08/rl_network.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from collections import deque 4 | from keras.models import Sequential 5 | from keras.layers.core import Dense, Flatten 6 | from keras.layers.convolutional import Conv2D 7 | from keras.optimizers import Adam 8 | from keras.models import clone_model 9 | from keras.callbacks import TensorBoard 10 | import tensorflow as tf 11 | from PIL import Image 12 | import numpy as np 13 | import gym 14 | import gym_ple # noqa 15 | 16 | 17 | class Agent(object): 18 | INPUT_SHAPE = (80, 80, 4) 19 | 20 | def __init__(self, num_actions): 21 | self.num_actions = num_actions 22 | model = Sequential() 23 | model.add(Conv2D( 24 | 32, kernel_size=8, strides=4, padding="same", 25 | input_shape=self.INPUT_SHAPE, kernel_initializer="normal", 26 | activation="relu")) 27 | model.add(Conv2D( 28 | 64, kernel_size=4, strides=2, padding="same", 29 | kernel_initializer="normal", 30 | activation="relu")) 31 | model.add(Conv2D( 32 | 64, kernel_size=3, strides=1, padding="same", 33 | kernel_initializer="normal", 34 | activation="relu")) 35 | model.add(Flatten()) 36 | model.add(Dense(512, kernel_initializer="normal", activation="relu")) 37 | model.add(Dense(num_actions, kernel_initializer="normal")) 38 | self.model = model 39 | 40 | def evaluate(self, state, model=None): 41 | _model = model if model else self.model 42 | _state = np.expand_dims(state, axis=0) # add batch size dimension 43 | return _model.predict(_state)[0] 44 | 45 | def act(self, state, epsilon=0): 46 | if np.random.rand() <= epsilon: 47 | a = np.random.randint(low=0, high=self.num_actions, size=1)[0] 48 | else: 49 | q = self.evaluate(state) 50 | a = np.argmax(q) 51 | return a 52 | 53 | 54 | class Observer(object): 55 | 56 | def __init__(self, input_shape): 57 | self.size = input_shape[:2] # width x height 58 | self.num_frames = input_shape[2] # number of frames 59 | self._frames = [] 60 | 61 | def observe(self, state): 62 | g_state = Image.fromarray(state).convert("L") # to gray scale 63 | g_state = g_state.resize(self.size) # resize game screen to input size 64 | g_state = np.array(g_state).astype("float") 65 | g_state /= 255 # scale to 0~1 66 | if len(self._frames) == 0: 67 | # full fill the frame cache 68 | self._frames = [g_state] * self.num_frames 69 | else: 70 | self._frames.append(g_state) 71 | self._frames.pop(0) # remove most old state 72 | 73 | input_state = np.array(self._frames) 74 | # change frame_num x width x height => width x height x frame_num 75 | input_state = np.transpose(input_state, (1, 2, 0)) 76 | return input_state 77 | 78 | 79 | class Trainer(object): 80 | 81 | def __init__(self, env, agent, optimizer, model_dir=""): 82 | self.env = env 83 | self.agent = agent 84 | self.experience = [] 85 | self._target_model = clone_model(self.agent.model) 86 | self.observer = Observer(agent.INPUT_SHAPE) 87 | self.model_dir = model_dir 88 | if not self.model_dir: 89 | self.model_dir = os.path.join(os.path.dirname(__file__), "model") 90 | if not os.path.isdir(self.model_dir): 91 | os.mkdir(self.model_dir) 92 | 93 | self.agent.model.compile(optimizer=optimizer, loss="mse") 94 | self.callback = TensorBoard(self.model_dir) 95 | self.callback.set_model(self.agent.model) 96 | 97 | def get_batch(self, batch_size, gamma): 98 | batch_indices = np.random.randint( 99 | low=0, high=len(self.experience), size=batch_size) 100 | X = np.zeros((batch_size,) + self.agent.INPUT_SHAPE) 101 | y = np.zeros((batch_size, self.agent.num_actions)) 102 | for i, b_i in enumerate(batch_indices): 103 | s, a, r, next_s, game_over = self.experience[b_i] 104 | X[i] = s 105 | y[i] = self.agent.evaluate(s) 106 | # future reward 107 | Q_sa = np.max(self.agent.evaluate(next_s, 108 | model=self._target_model)) 109 | if game_over: 110 | y[i, a] = r 111 | else: 112 | y[i, a] = r + gamma * Q_sa 113 | return X, y 114 | 115 | def write_log(self, index, loss, score): 116 | for name, value in zip(("loss", "score"), (loss, score)): 117 | summary = tf.Summary() 118 | summary_value = summary.value.add() 119 | summary_value.simple_value = value 120 | summary_value.tag = name 121 | self.callback.writer.add_summary(summary, index) 122 | self.callback.writer.flush() 123 | 124 | def train(self, 125 | gamma=0.99, 126 | initial_epsilon=0.1, final_epsilon=0.0001, 127 | memory_size=50000, 128 | observation_epochs=100, training_epochs=2000, 129 | batch_size=32, render=True): 130 | 131 | self.experience = deque(maxlen=memory_size) 132 | epochs = observation_epochs + training_epochs 133 | epsilon = initial_epsilon 134 | model_path = os.path.join(self.model_dir, "agent_network.h5") 135 | fmt = "Epoch {:04d}/{:d} | Loss {:.5f} | Score: {} | e={:.4f} train={}" 136 | 137 | for e in range(epochs): 138 | loss = 0.0 139 | rewards = [] 140 | initial_state = self.env.reset() 141 | state = self.observer.observe(initial_state) 142 | game_over = False 143 | is_training = True if e > observation_epochs else False 144 | 145 | # let's play the game 146 | while not game_over: 147 | if render: 148 | self.env.render() 149 | 150 | if not is_training: 151 | action = self.agent.act(state, epsilon=1) 152 | else: 153 | action = self.agent.act(state, epsilon) 154 | 155 | next_state, reward, game_over, info = self.env.step(action) 156 | next_state = self.observer.observe(next_state) 157 | self.experience.append( 158 | (state, action, reward, next_state, game_over) 159 | ) 160 | 161 | rewards.append(reward) 162 | 163 | if is_training: 164 | X, y = self.get_batch(batch_size, gamma) 165 | loss += self.agent.model.train_on_batch(X, y) 166 | 167 | state = next_state 168 | 169 | loss = loss / len(rewards) 170 | score = sum(rewards) 171 | 172 | if is_training: 173 | self.write_log(e - observation_epochs, loss, score) 174 | self._target_model.set_weights(self.agent.model.get_weights()) 175 | 176 | if epsilon > final_epsilon: 177 | epsilon -= (initial_epsilon - final_epsilon) / epochs 178 | 179 | print(fmt.format(e + 1, epochs, loss, score, epsilon, is_training)) 180 | 181 | if e % 100 == 0: 182 | self.agent.model.save(model_path, overwrite=True) 183 | 184 | self.agent.model.save(model_path, overwrite=True) 185 | 186 | 187 | def main(render): 188 | env = gym.make("Catcher-v0") 189 | num_actions = env.action_space.n 190 | agent = Agent(num_actions) 191 | trainer = Trainer(env, agent, Adam(lr=1e-6)) 192 | trainer.train(render=render) 193 | 194 | 195 | if __name__ == "__main__": 196 | render = False if len(sys.argv) < 2 else True 197 | main(render) 198 | -------------------------------------------------------------------------------- /deep-learning-with-keras-ja.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oreilly-japan/deep-learning-with-keras-ja/3180b9c64b2317de1bf0b8441fde914f9926b9a1/deep-learning-with-keras-ja.png --------------------------------------------------------------------------------