├── .gitattributes ├── Chapter01 ├── chapter_01_001.py └── pytorch_iris.py ├── Chapter02 ├── chapter_02_001.py └── chapter_02_002.py ├── Chapter03 ├── chapter_03_001.py └── chapter_03_002.py ├── Chapter04 ├── chapter_04_001.py ├── chapter_04_002.py └── chapter_04_003.py ├── Chapter05 ├── chapter_05_001.py ├── chapter_05_002.py └── chapter_05_003.py ├── Chapter06 ├── chapter_06_001.py └── chapter_06_002.py ├── Chapter07 ├── chapter_07_001.py └── language model │ ├── data_processing.py │ ├── data_reader.py │ ├── model.py │ ├── wap.txt │ └── war_and_peace.txt ├── Chapter08 └── chapter_08_001.py ├── Chapter09 ├── chapter_09_001_ddqn.py ├── chapter_09_001_dqn.py └── chapter_09_002_a2c.py ├── Chapter10 ├── __init__.py ├── data │ ├── data.gzip │ └── model.pt ├── keyboard_agent.py ├── main.py ├── nn_agent.py ├── train.py └── util.py ├── LICENSE └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /Chapter01/chapter_01_001.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | dataset = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 4 | names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']) 5 | 6 | dataset['species'] = pd.Categorical(dataset['species']).codes 7 | 8 | dataset = dataset.sample(frac=1, random_state=1234) 9 | 10 | train_input = dataset.values[:120, :4] 11 | train_target = dataset.values[:120, 4] 12 | 13 | test_input = dataset.values[120:, :4] 14 | test_target = dataset.values[120:, 4] 15 | 16 | import torch 17 | 18 | torch.manual_seed(1234) 19 | 20 | hidden_units = 5 21 | 22 | net = torch.nn.Sequential( 23 | torch.nn.Linear(4, hidden_units), 24 | torch.nn.ReLU(), 25 | torch.nn.Linear(hidden_units, 3) 26 | ) 27 | 28 | # choose optimizer and loss function 29 | criterion = torch.nn.CrossEntropyLoss() 30 | optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9) 31 | 32 | # train 33 | epochs = 50 34 | 35 | for epoch in range(epochs): 36 | inputs = torch.autograd.Variable(torch.Tensor(train_input).float()) 37 | targets = torch.autograd.Variable(torch.Tensor(train_target).long()) 38 | 39 | optimizer.zero_grad() 40 | out = net(inputs) 41 | loss = criterion(out, targets) 42 | loss.backward() 43 | optimizer.step() 44 | 45 | if epoch == 0 or (epoch + 1) % 10 == 0: 46 | print('Epoch %d Loss: %.4f' % (epoch + 1, loss.item())) 47 | 48 | # test 49 | import numpy as np 50 | 51 | inputs = torch.autograd.Variable(torch.Tensor(test_input).float()) 52 | targets = torch.autograd.Variable(torch.Tensor(test_target).long()) 53 | 54 | optimizer.zero_grad() 55 | out = net(inputs) 56 | _, predicted = torch.max(out.data, 1) 57 | 58 | error_count = test_target.size - np.count_nonzero((targets == predicted).numpy()) 59 | print('Errors: %d; Accuracy: %d%%' % (error_count, 100 * torch.sum(targets == predicted) / test_target.size)) 60 | -------------------------------------------------------------------------------- /Chapter01/pytorch_iris.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | dataset = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 4 | names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']) 5 | 6 | dataset['species'] = pd.Categorical(dataset['species']).codes 7 | 8 | dataset = dataset.sample(frac=1, random_state=1234) 9 | 10 | train_input = dataset.values[:120, :4] 11 | train_target = dataset.values[:120, 4] 12 | 13 | test_input = dataset.values[120:, :4] 14 | test_target = dataset.values[120:, 4] 15 | 16 | import torch 17 | 18 | torch.manual_seed(1234) 19 | 20 | hidden_units = 5 21 | 22 | net = torch.nn.Sequential( 23 | torch.nn.Linear(4, hidden_units), 24 | torch.nn.ReLU(), 25 | torch.nn.Linear(hidden_units, 3) 26 | ) 27 | 28 | # choose optimizer and loss function 29 | criterion = torch.nn.CrossEntropyLoss() 30 | optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9) 31 | 32 | # train 33 | epochs = 50 34 | 35 | for epoch in range(epochs): 36 | inputs = torch.autograd.Variable(torch.Tensor(train_input).float()) 37 | targets = torch.autograd.Variable(torch.Tensor(train_target).long()) 38 | 39 | optimizer.zero_grad() 40 | out = net(inputs) 41 | loss = criterion(out, targets) 42 | loss.backward() 43 | optimizer.step() 44 | 45 | if epoch == 0 or (epoch + 1) % 10 == 0: 46 | print('Epoch %d Loss: %.4f' % (epoch + 1, loss.data[0])) 47 | 48 | # test 49 | import numpy as np 50 | 51 | inputs = torch.autograd.Variable(torch.Tensor(test_input).float()) 52 | targets = torch.autograd.Variable(torch.Tensor(test_target).long()) 53 | 54 | optimizer.zero_grad() 55 | out = net(inputs) 56 | _, predicted = torch.max(out.data, 1) 57 | 58 | error_count = test_target.size - np.count_nonzero((targets == predicted).numpy()) 59 | print('Errors: %d; Accuracy: %d%%' % (error_count, 100 * torch.sum(targets == predicted) / test_target.size)) 60 | -------------------------------------------------------------------------------- /Chapter02/chapter_02_001.py: -------------------------------------------------------------------------------- 1 | # The user can modify the values of the weight w 2 | # as well as bias_value_1 and bias_value_2 to observe 3 | # how this plots to different step functions 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy 7 | 8 | weight_value = 1000 9 | 10 | # modify to change where the step function starts 11 | bias_value_1 = 5000 12 | 13 | # modify to change where the step function ends 14 | bias_value_2 = -5000 15 | 16 | # plot the 17 | plt.axis([-10, 10, -1, 10]) 18 | 19 | print("The step function starts at {0} and ends at {1}" 20 | .format(-bias_value_1 / weight_value, 21 | -bias_value_2 / weight_value)) 22 | 23 | inputs = numpy.arange(-10, 10, 0.01) 24 | outputs = list() 25 | 26 | # iterate over a range of inputs 27 | for x in inputs: 28 | y1 = 1.0 / (1.0 + numpy.exp(-weight_value * x - bias_value_1)) 29 | y2 = 1.0 / (1.0 + numpy.exp(-weight_value * x - bias_value_2)) 30 | 31 | # modify to change the height of the step function 32 | w = 7 33 | 34 | # network output 35 | y = y1 * w - y2 * w 36 | 37 | outputs.append(y) 38 | 39 | plt.plot(inputs, outputs, lw=2, color='black') 40 | plt.show() 41 | -------------------------------------------------------------------------------- /Chapter02/chapter_02_002.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy 3 | from matplotlib.colors import ListedColormap 4 | 5 | 6 | def tanh(x): 7 | return (1.0 - numpy.exp(-2 * x)) / (1.0 + numpy.exp(-2 * x)) 8 | 9 | 10 | def tanh_derivative(x): 11 | return (1 + tanh(x)) * (1 - tanh(x)) 12 | 13 | 14 | class NeuralNetwork: 15 | # net_arch consists of a list of integers, indicating 16 | # the number of neurons in each layer 17 | def __init__(self, net_arch): 18 | self.activation_func = tanh 19 | self.activation_derivative = tanh_derivative 20 | self.layers = len(net_arch) 21 | self.steps_per_epoch = 1000 22 | self.net_arch = net_arch 23 | 24 | # initialize the weights with random values in the range (-1,1) 25 | self.weights = [] 26 | for layer in range(len(net_arch) - 1): 27 | w = 2 * numpy.random.rand(net_arch[layer] + 1, net_arch[layer + 1]) - 1 28 | self.weights.append(w) 29 | 30 | def fit(self, data, labels, learning_rate=0.1, epochs=10): 31 | """ 32 | :param data: data is the set of all possible pairs of booleans 33 | True or False indicated by the integers 1 or 0 34 | labels is the result of the logical operation 'xor' 35 | on each of those input pairs 36 | :param labels: array of 0/1 for each datum 37 | """ 38 | 39 | # Add bias units to the input layer 40 | ones = numpy.ones((1, data.shape[0])) 41 | Z = numpy.concatenate((ones.T, data), axis=1) 42 | training = epochs * self.steps_per_epoch 43 | for k in range(training): 44 | if k % self.steps_per_epoch == 0: 45 | # print ('epochs:', k/self.steps_per_epoch) 46 | print('epochs: {}'.format(k / self.steps_per_epoch)) 47 | for s in data: 48 | print(s, nn.predict(s)) 49 | 50 | sample = numpy.random.randint(data.shape[0]) 51 | y = [Z[sample]] 52 | 53 | for i in range(len(self.weights) - 1): 54 | activation = numpy.dot(y[i], self.weights[i]) 55 | activation_f = self.activation_func(activation) 56 | # add the bias for the next layer 57 | activation_f = numpy.concatenate((numpy.ones(1), numpy.array(activation_f))) 58 | y.append(activation_f) 59 | 60 | # last layer 61 | activation = numpy.dot(y[-1], self.weights[-1]) 62 | activation_f = self.activation_func(activation) 63 | y.append(activation_f) 64 | 65 | # error for the output layer 66 | error = labels[sample] - y[-1] 67 | delta_vec = [error * self.activation_derivative(y[-1])] 68 | 69 | # we need to begin from the back from the next to last layer 70 | for i in range(self.layers - 2, 0, -1): 71 | error = delta_vec[-1].dot(self.weights[i][1:].T) 72 | error = error * self.activation_derivative(y[i][1:]) 73 | delta_vec.append(error) 74 | 75 | # reverse 76 | # [level3(output)->level2(hidden)] => [level2(hidden)->level3(output)] 77 | delta_vec.reverse() 78 | 79 | # backpropagation 80 | # 1. Multiply its output delta and input activation 81 | # to get the gradient of the weight. 82 | # 2. Subtract a ratio (percentage) of the gradient from the weight 83 | for i in range(len(self.weights)): 84 | layer = y[i].reshape(1, nn.net_arch[i] + 1) 85 | 86 | delta = delta_vec[i].reshape(1, nn.net_arch[i + 1]) 87 | self.weights[i] += learning_rate * layer.T.dot(delta) 88 | 89 | def predict(self, x): 90 | val = numpy.concatenate((numpy.ones(1).T, numpy.array(x))) 91 | for i in range(0, len(self.weights)): 92 | val = self.activation_func(numpy.dot(val, self.weights[i])) 93 | val = numpy.concatenate((numpy.ones(1).T, numpy.array(val))) 94 | 95 | return val[1] 96 | 97 | def plot_decision_regions(self, X, y, points=200): 98 | markers = ('o', '^') 99 | colors = ('red', 'blue') 100 | cmap = ListedColormap(colors) 101 | 102 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 103 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 104 | 105 | # To produce zoomed-out figures, you can replace the preceding 2 lines with: 106 | # x1_min, x1_max = -10, 11 107 | # x2_min, x2_max = -10, 11 108 | 109 | resolution = max(x1_max - x1_min, x2_max - x2_min) / float(points) 110 | 111 | xx1, xx2 = numpy.meshgrid(numpy.arange(x1_min, 112 | x1_max, 113 | resolution), 114 | numpy.arange(x2_min, x2_max, resolution)) 115 | input = numpy.array([xx1.ravel(), xx2.ravel()]).T 116 | Z = numpy.empty(0) 117 | for i in range(input.shape[0]): 118 | val = nn.predict(numpy.array(input[i])) 119 | if val < 0.5: 120 | val = 0 121 | if val >= 0.5: 122 | val = 1 123 | Z = numpy.append(Z, val) 124 | 125 | Z = Z.reshape(xx1.shape) 126 | 127 | plt.pcolormesh(xx1, xx2, Z, cmap=cmap) 128 | plt.xlim(xx1.min(), xx1.max()) 129 | plt.ylim(xx2.min(), xx2.max()) 130 | # plot all samples 131 | 132 | classes = ["False", "True"] 133 | 134 | for idx, cl in enumerate(numpy.unique(y)): 135 | plt.scatter(x=X[y == cl, 0], 136 | y=X[y == cl, 1], 137 | alpha=1.0, 138 | c=colors[idx], 139 | edgecolors='black', 140 | marker=markers[idx], 141 | s=80, 142 | label=classes[idx]) 143 | 144 | plt.xlabel('x-axis') 145 | plt.ylabel('y-axis') 146 | plt.legend(loc='upper left') 147 | plt.show() 148 | 149 | 150 | if __name__ == '__main__': 151 | numpy.random.seed(0) 152 | 153 | # Initialize the NeuralNetwork with 2 input, 2 hidden, and 1 output neurons 154 | nn = NeuralNetwork([2, 2, 1]) 155 | 156 | X = numpy.array([[0, 0], 157 | [0, 1], 158 | [1, 0], 159 | [1, 1]]) 160 | 161 | y = numpy.array([0, 1, 1, 0]) 162 | 163 | nn.fit(X, y, epochs=10) 164 | 165 | print("Final prediction") 166 | for s in X: 167 | print(s, nn.predict(s)) 168 | 169 | nn.plot_decision_regions(X, y) 170 | -------------------------------------------------------------------------------- /Chapter03/chapter_03_001.py: -------------------------------------------------------------------------------- 1 | from keras.datasets import mnist 2 | from keras.layers.core import Dense, Activation 3 | from keras.models import Sequential 4 | from keras.utils import np_utils 5 | 6 | (X_train, Y_train), (X_test, Y_test) = mnist.load_data() 7 | 8 | X_train = X_train.reshape(60000, 784) 9 | X_test = X_test.reshape(10000, 784) 10 | 11 | classes = 10 12 | Y_train = np_utils.to_categorical(Y_train, classes) 13 | Y_test = np_utils.to_categorical(Y_test, classes) 14 | 15 | input_size = 784 16 | batch_size = 100 17 | hidden_neurons = 100 18 | epochs = 100 19 | 20 | model = Sequential([ 21 | Dense(hidden_neurons, input_dim=input_size), 22 | Activation('sigmoid'), 23 | Dense(classes), 24 | Activation('softmax') 25 | ]) 26 | 27 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd') 28 | 29 | model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=1) 30 | 31 | score = model.evaluate(X_test, Y_test, verbose=1) 32 | print('Test accuracy:', score[1]) 33 | 34 | weights = model.layers[0].get_weights() 35 | 36 | import matplotlib.pyplot as plt 37 | import matplotlib.cm as cm 38 | import numpy 39 | 40 | fig = plt.figure() 41 | 42 | w = weights[0].T 43 | for neuron in range(hidden_neurons): 44 | ax = fig.add_subplot(10, 10, neuron + 1) 45 | ax.axis("off") 46 | ax.imshow(numpy.reshape(w[neuron], (28, 28)), cmap=cm.Greys_r) 47 | 48 | plt.savefig("neuron_images.png", dpi=300) 49 | plt.show() 50 | -------------------------------------------------------------------------------- /Chapter03/chapter_03_002.py: -------------------------------------------------------------------------------- 1 | from keras.datasets import cifar10 2 | from keras.layers.core import Dense, Activation 3 | from keras.models import Sequential 4 | from keras.utils import np_utils 5 | 6 | (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() 7 | 8 | X_train = X_train.reshape(50000, 3072) 9 | X_test = X_test.reshape(10000, 3072) 10 | 11 | classes = 10 12 | Y_train = np_utils.to_categorical(Y_train, classes) 13 | Y_test = np_utils.to_categorical(Y_test, classes) 14 | 15 | input_size = 3072 16 | batch_size = 100 17 | epochs = 100 18 | 19 | model = Sequential([ 20 | Dense(1024, input_dim=input_size), 21 | Activation('relu'), 22 | Dense(512), 23 | Activation('relu'), 24 | Dense(512), 25 | Activation('sigmoid'), 26 | Dense(classes), 27 | Activation('softmax') 28 | ]) 29 | 30 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd') 31 | 32 | model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, Y_test), verbose=1) 33 | 34 | import matplotlib.pyplot as plt 35 | import matplotlib.cm as cm 36 | import matplotlib.gridspec as gridspec 37 | import numpy 38 | import random 39 | 40 | fig = plt.figure() 41 | outer_grid = gridspec.GridSpec(10, 10, wspace=0.0, hspace=0.0) 42 | 43 | weights = model.layers[0].get_weights() 44 | 45 | w = weights[0].T 46 | 47 | for i, neuron in enumerate(random.sample(range(0, 1023), 100)): 48 | ax = plt.Subplot(fig, outer_grid[i]) 49 | ax.imshow(numpy.mean(numpy.reshape(w[i], (32, 32, 3)), axis=2), cmap=cm.Greys_r) 50 | ax.set_xticks([]) 51 | ax.set_yticks([]) 52 | fig.add_subplot(ax) 53 | 54 | plt.show() 55 | 56 | -------------------------------------------------------------------------------- /Chapter04/chapter_04_001.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def conv(image, im_filter): 5 | """ 6 | :param image: grayscale image as a 2-dimensional numpy array 7 | :param im_filter: 2-dimensional numpy array 8 | """ 9 | 10 | # input dimensions 11 | height = image.shape[0] 12 | width = image.shape[1] 13 | 14 | # output image with reduced dimensions 15 | im_c = np.zeros((height - len(im_filter) + 1, 16 | width - len(im_filter) + 1)) 17 | 18 | # iterate over all rows and columns 19 | for row in range(len(im_c)): 20 | for col in range(len(im_c[0])): 21 | # apply the filter 22 | for i in range(len(im_filter)): 23 | for j in range(len(im_filter[0])): 24 | im_c[row, col] += image[row + i, col + j] * im_filter[i][j] 25 | 26 | # fix out-of-bounds values 27 | im_c[im_c > 255] = 255 28 | im_c[im_c < 0] = 0 29 | 30 | # plot images for comparison 31 | import matplotlib.pyplot as plt 32 | import matplotlib.cm as cm 33 | 34 | plt.figure() 35 | plt.imshow(image, cmap=cm.Greys_r) 36 | plt.show() 37 | 38 | plt.imshow(im_c, cmap=cm.Greys_r) 39 | plt.show() 40 | 41 | 42 | import requests 43 | from PIL import Image 44 | from io import BytesIO 45 | 46 | # load the image 47 | url = "https://upload.wikimedia.org/wikipedia/commons/thumb/8/88/Commander_Eileen_Collins_-_GPN-2000-001177.jpg/382px-Commander_Eileen_Collins_-_GPN-2000-001177.jpg?download" 48 | resp = requests.get(url) 49 | image_rgb = np.asarray(Image.open(BytesIO(resp.content)).convert("RGB")) 50 | 51 | # convert to grayscale 52 | image_grayscale = np.mean(image_rgb, axis=2, dtype=np.uint) 53 | 54 | # blur filter 55 | blur = np.full([10, 10], 1. / 100) 56 | conv(image_grayscale, blur) 57 | 58 | # sobel filters 59 | sobel_x = [[-1, -2, -1], 60 | [0, 0, 0], 61 | [1, 2, 1]] 62 | conv(image_grayscale, sobel_x) 63 | 64 | sobel_y = [[-1, 0, 1], 65 | [-2, 0, 2], 66 | [-1, 0, 1]] 67 | conv(image_grayscale, sobel_y) 68 | -------------------------------------------------------------------------------- /Chapter04/chapter_04_002.py: -------------------------------------------------------------------------------- 1 | # for reproducibility 2 | from numpy.random import seed 3 | 4 | seed(1) 5 | from tensorflow import set_random_seed 6 | 7 | set_random_seed(1) 8 | 9 | from keras.datasets import mnist 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Activation 12 | from keras.layers import Convolution2D, MaxPooling2D 13 | from keras.layers import Flatten 14 | 15 | from keras.utils import np_utils 16 | 17 | (X_train, Y_train), (X_test, Y_test) = mnist.load_data() 18 | 19 | X_train = X_train.reshape(60000, 28, 28, 1) 20 | X_test = X_test.reshape(10000, 28, 28, 1) 21 | 22 | Y_train = np_utils.to_categorical(Y_train, 10) 23 | Y_test = np_utils.to_categorical(Y_test, 10) 24 | 25 | model = Sequential([ 26 | Convolution2D(filters=32, 27 | kernel_size=(3, 3), 28 | input_shape=(28, 28, 1)), # first conv layer 29 | Activation('relu'), 30 | Convolution2D(filters=32, 31 | kernel_size=(3, 3)), # second conv layer 32 | Activation('relu'), 33 | MaxPooling2D(pool_size=(2, 2)), # max pooling layer 34 | Flatten(), # flatten the output tensor 35 | Dense(64), # fully-connected hidden layer 36 | Activation('relu'), 37 | Dense(10), # output layer 38 | Activation('softmax')]) 39 | 40 | print(model.summary()) 41 | 42 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adadelta') 43 | 44 | model.fit(X_train, Y_train, batch_size=100, epochs=5, validation_split=0.1, verbose=1) 45 | 46 | score = model.evaluate(X_test, Y_test, verbose=1) 47 | print('Test accuracy:', score[1]) 48 | -------------------------------------------------------------------------------- /Chapter04/chapter_04_003.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras.datasets import cifar10 3 | from keras.layers import Conv2D, MaxPooling2D 4 | from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization 5 | from keras.models import Sequential 6 | from keras.preprocessing.image import ImageDataGenerator 7 | 8 | batch_size = 50 9 | 10 | (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() 11 | 12 | X_train = X_train.astype('float32') 13 | X_test = X_test.astype('float32') 14 | X_train /= 255 15 | X_test /= 255 16 | 17 | Y_train = keras.utils.to_categorical(Y_train, 10) 18 | Y_test = keras.utils.to_categorical(Y_test, 10) 19 | 20 | data_generator = ImageDataGenerator(rotation_range=90, 21 | width_shift_range=0.1, 22 | height_shift_range=0.1, 23 | featurewise_center=True, 24 | featurewise_std_normalization=True, 25 | horizontal_flip=True) 26 | 27 | data_generator.fit(X_train) 28 | 29 | # standardize the test set 30 | for i in range(len(X_test)): 31 | X_test[i] = data_generator.standardize(X_test[i]) 32 | 33 | model = Sequential() 34 | model.add(Conv2D(32, (3, 3), padding='same', input_shape=X_train.shape[1:])) 35 | model.add(Activation('elu')) 36 | model.add(BatchNormalization()) 37 | model.add(Conv2D(32, (3, 3), padding='same')) 38 | model.add(Activation('elu')) 39 | model.add(BatchNormalization()) 40 | model.add(MaxPooling2D(pool_size=(2, 2))) 41 | model.add(Dropout(0.2)) 42 | 43 | model.add(Conv2D(64, (3, 3), padding='same')) 44 | model.add(Activation('elu')) 45 | model.add(BatchNormalization()) 46 | model.add(Conv2D(64, (3, 3), padding='same')) 47 | model.add(Activation('elu')) 48 | model.add(BatchNormalization()) 49 | model.add(MaxPooling2D(pool_size=(2, 2))) 50 | model.add(Dropout(0.2)) 51 | 52 | model.add(Conv2D(128, (3, 3), padding='same')) 53 | model.add(Activation('elu')) 54 | model.add(BatchNormalization()) 55 | model.add(Conv2D(128, (3, 3), padding='same')) 56 | model.add(Activation('elu')) 57 | model.add(BatchNormalization()) 58 | model.add(MaxPooling2D(pool_size=(2, 2))) 59 | model.add(Dropout(0.5)) 60 | 61 | model.add(Flatten()) 62 | model.add(Dense(10, activation='softmax')) 63 | 64 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 65 | 66 | model.fit_generator( 67 | generator=data_generator.flow(x=X_train, 68 | y=Y_train, 69 | batch_size=batch_size), 70 | steps_per_epoch=len(X_train) // batch_size, 71 | epochs=100, 72 | validation_data=(X_test, Y_test), 73 | workers=4) 74 | -------------------------------------------------------------------------------- /Chapter05/chapter_05_001.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torchvision 5 | from torchvision import models, transforms 6 | 7 | batch_size = 50 8 | 9 | # training data 10 | train_data_transform = transforms.Compose([ 11 | transforms.Resize(224), 12 | transforms.RandomHorizontalFlip(), 13 | transforms.RandomVerticalFlip(), 14 | transforms.ToTensor(), 15 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 16 | ]) 17 | 18 | train_set = torchvision.datasets.CIFAR10(root='./data', 19 | train=True, 20 | download=True, 21 | transform=train_data_transform) 22 | 23 | train_loader = torch.utils.data.DataLoader(train_set, 24 | batch_size=batch_size, 25 | shuffle=True, 26 | num_workers=2) 27 | 28 | # validation data 29 | val_data_transform = transforms.Compose([ 30 | transforms.Resize(224), 31 | transforms.ToTensor(), 32 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 33 | ]) 34 | 35 | val_set = torchvision.datasets.CIFAR10(root='./data', 36 | train=False, 37 | download=True, 38 | transform=val_data_transform) 39 | 40 | val_order = torch.utils.data.DataLoader(val_set, 41 | batch_size=batch_size, 42 | shuffle=False, 43 | num_workers=2) 44 | 45 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 46 | 47 | 48 | def train_model(model, loss_function, optimizer, data_loader): 49 | # set model to training mode 50 | model.train() 51 | 52 | current_loss = 0.0 53 | current_acc = 0 54 | 55 | # iterate over the training data 56 | for i, (inputs, labels) in enumerate(data_loader): 57 | # send the input/labels to the GPU 58 | inputs = inputs.to(device) 59 | labels = labels.to(device) 60 | 61 | # zero the parameter gradients 62 | optimizer.zero_grad() 63 | 64 | with torch.set_grad_enabled(True): 65 | # forward 66 | outputs = model(inputs) 67 | _, predictions = torch.max(outputs, 1) 68 | loss = loss_function(outputs, labels) 69 | 70 | # backward 71 | loss.backward() 72 | optimizer.step() 73 | 74 | # statistics 75 | current_loss += loss.item() * inputs.size(0) 76 | current_acc += torch.sum(predictions == labels.data) 77 | 78 | total_loss = current_loss / len(data_loader.dataset) 79 | total_acc = current_acc.double() / len(data_loader.dataset) 80 | 81 | print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc)) 82 | 83 | 84 | def test_model(model, loss_function, data_loader): 85 | # set model in evaluation mode 86 | model.eval() 87 | 88 | current_loss = 0.0 89 | current_acc = 0 90 | 91 | # iterate over the validation data 92 | for i, (inputs, labels) in enumerate(data_loader): 93 | # send the input/labels to the GPU 94 | inputs = inputs.to(device) 95 | labels = labels.to(device) 96 | 97 | # forward 98 | with torch.set_grad_enabled(False): 99 | outputs = model(inputs) 100 | _, predictions = torch.max(outputs, 1) 101 | loss = loss_function(outputs, labels) 102 | 103 | # statistics 104 | current_loss += loss.item() * inputs.size(0) 105 | current_acc += torch.sum(predictions == labels.data) 106 | 107 | total_loss = current_loss / len(data_loader.dataset) 108 | total_acc = current_acc.double() / len(data_loader.dataset) 109 | 110 | print('Test Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc)) 111 | 112 | 113 | def tl_feature_extractor(epochs=3): 114 | # load the pre-trained model 115 | model = torchvision.models.resnet18(pretrained=True) 116 | 117 | # exclude existing parameters from backward pass 118 | # for performance 119 | for param in model.parameters(): 120 | param.requires_grad = False 121 | 122 | # newly constructed layers have requires_grad=True by default 123 | num_features = model.fc.in_features 124 | model.fc = nn.Linear(num_features, 10) 125 | 126 | # transfer to GPU (if available) 127 | model = model.to(device) 128 | 129 | loss_function = nn.CrossEntropyLoss() 130 | 131 | # only parameters of the final layer are being optimized 132 | optimizer = optim.Adam(model.fc.parameters()) 133 | 134 | # train 135 | for epoch in range(epochs): 136 | print('Epoch {}/{}'.format(epoch + 1, epochs)) 137 | 138 | train_model(model, loss_function, optimizer, train_loader) 139 | test_model(model, loss_function, val_order) 140 | 141 | 142 | def tl_fine_tuning(epochs=3): 143 | # load the pre-trained model 144 | model = models.resnet18(pretrained=True) 145 | 146 | # replace the last layer 147 | num_features = model.fc.in_features 148 | model.fc = nn.Linear(num_features, 10) 149 | 150 | # transfer the model to the GPU 151 | model = model.to(device) 152 | 153 | # loss function 154 | loss_function = nn.CrossEntropyLoss() 155 | 156 | # We'll optimize all parameters 157 | optimizer = optim.Adam(model.parameters()) 158 | 159 | # train 160 | for epoch in range(epochs): 161 | print('Epoch {}/{}'.format(epoch + 1, epochs)) 162 | 163 | train_model(model, loss_function, optimizer, train_loader) 164 | test_model(model, loss_function, val_order) 165 | 166 | 167 | if __name__ == '__main__': 168 | tl_feature_extractor(epochs=5) 169 | #tl_fine_tuning(epochs=5) 170 | -------------------------------------------------------------------------------- /Chapter05/chapter_05_002.py: -------------------------------------------------------------------------------- 1 | # VGG16 2 | from keras.applications.vgg16 import VGG16 3 | 4 | vgg16_model = VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) 5 | 6 | # VGG19 7 | from keras.applications.vgg19 import VGG19 8 | 9 | vgg19_model = VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) 10 | 11 | import torchvision.models as models 12 | 13 | model = models.vgg16(pretrained=True) 14 | -------------------------------------------------------------------------------- /Chapter05/chapter_05_003.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import cv2 # opencv import 3 | import numpy as np 4 | import requests 5 | 6 | # Download YOLO net config file 7 | # We'll it from the YOLO author's github repo 8 | yolo_config = 'yolov3.cfg' 9 | if not os.path.isfile(yolo_config): 10 | url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg' 11 | r = requests.get(url) 12 | with open(yolo_config, 'wb') as f: 13 | f.write(r.content) 14 | 15 | # Download YOLO net weights 16 | # We'll it from the YOLO author's website 17 | yolo_weights = 'yolov3.weights' 18 | if not os.path.isfile(yolo_weights): 19 | url = 'https://pjreddie.com/media/files/yolov3.weights' 20 | r = requests.get(url) 21 | with open(yolo_weights, 'wb') as f: 22 | f.write(r.content) 23 | 24 | # Download class names file 25 | # Contains the names of the classes the network can detect 26 | classes_file = 'coco.names' 27 | if not os.path.isfile(classes_file): 28 | url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names' 29 | r = requests.get(url) 30 | with open(classes_file, 'wb') as f: 31 | f.write(r.content) 32 | 33 | # load class names 34 | with open(classes_file, 'r') as f: 35 | classes = [line.strip() for line in f.readlines()] 36 | 37 | # Download object detection image 38 | image_file = 'source.jpg' 39 | if not os.path.isfile(image_file): 40 | url = "https://upload.wikimedia.org/wikipedia/commons/c/c7/Abbey_Road_Zebra_crossing_2004-01.jpg" 41 | r = requests.get(url) 42 | with open(image_file, 'wb') as f: 43 | f.write(r.content) 44 | 45 | # read and normalize image 46 | image = cv2.imread(image_file) 47 | blob = cv2.dnn.blobFromImage(image, 1 / 255, (416, 416), (0, 0, 0), True, crop=False) 48 | 49 | # load the network 50 | net = cv2.dnn.readNet(yolo_weights, yolo_config) 51 | 52 | # set as input to the net 53 | net.setInput(blob) 54 | 55 | # get network output layers 56 | layer_names = net.getLayerNames() 57 | output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] 58 | 59 | # inference 60 | # the network outputs multiple lists of anchor boxes, 61 | # one for each detected class 62 | outs = net.forward(output_layers) 63 | 64 | # extract bounding boxes 65 | class_ids = list() 66 | confidences = list() 67 | boxes = list() 68 | 69 | # iterate over all classes 70 | for out in outs: 71 | # iterate over the anchor boxes for each class 72 | for detection in out: 73 | # bounding box 74 | center_x = int(detection[0] * image.shape[1]) 75 | center_y = int(detection[1] * image.shape[0]) 76 | w = int(detection[2] * image.shape[1]) 77 | h = int(detection[3] * image.shape[0]) 78 | x = center_x - w // 2 79 | y = center_y - h // 2 80 | boxes.append([x, y, w, h]) 81 | 82 | # class 83 | class_id = np.argmax(detection[5:]) 84 | class_ids.append(class_id) 85 | 86 | # confidence 87 | confidence = detection[4] 88 | confidences.append(float(confidence)) 89 | 90 | # non-max suppression 91 | ids = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.3, nms_threshold=0.5) 92 | 93 | # draw the bounding boxes on the image 94 | colors = np.random.uniform(0, 255, size=(len(classes), 3)) 95 | 96 | for i in ids: 97 | i = i[0] 98 | x, y, w, h = boxes[i] 99 | class_id = class_ids[i] 100 | 101 | color = colors[class_id] 102 | 103 | cv2.rectangle(image, (round(x), round(y)), (round(x + w), round(y + h)), color, 2) 104 | 105 | label = "%s: %.2f" % (classes[class_id], confidences[i]) 106 | cv2.putText(image, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2) 107 | 108 | cv2.imshow("Object detection", image) 109 | cv2.waitKey() 110 | -------------------------------------------------------------------------------- /Chapter06/chapter_06_001.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib.markers import MarkerStyle 3 | import numpy as np 4 | from keras import backend as K 5 | from keras.datasets import mnist 6 | from keras.layers import Lambda, Input, Dense 7 | from keras.losses import binary_crossentropy 8 | from keras.models import Model 9 | 10 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 11 | 12 | image_size = x_train.shape[1] * x_train.shape[1] 13 | x_train = np.reshape(x_train, [-1, image_size]) 14 | x_test = np.reshape(x_test, [-1, image_size]) 15 | x_train = x_train.astype('float32') / 255 16 | x_test = x_test.astype('float32') / 255 17 | 18 | 19 | def build_vae(intermediate_dim=512, latent_dim=2): 20 | """ 21 | Build VAE 22 | :param intermediate_dim: size of hidden layers of the encoder/decoder 23 | :param latent_dim: latent space size 24 | :returns tuple: the encoder, the decoder, and the full vae 25 | """ 26 | 27 | # encoder first 28 | inputs = Input(shape=(image_size,), name='encoder_input') 29 | x = Dense(intermediate_dim, activation='relu')(inputs) 30 | 31 | # latent mean and variance 32 | z_mean = Dense(latent_dim, name='z_mean')(x) 33 | z_log_var = Dense(latent_dim, name='z_log_var')(x) 34 | 35 | # reparametrization trick for random sampling 36 | # Note the use of the Lambda layer 37 | # At runtime, it will call the sampling function 38 | z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) 39 | 40 | # full encoder encoder model 41 | encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') 42 | encoder.summary() 43 | 44 | # decoder 45 | latent_inputs = Input(shape=(latent_dim,), name='z_sampling') 46 | x = Dense(intermediate_dim, activation='relu')(latent_inputs) 47 | outputs = Dense(image_size, activation='sigmoid')(x) 48 | 49 | # full decoder model 50 | decoder = Model(latent_inputs, outputs, name='decoder') 51 | decoder.summary() 52 | 53 | # VAE model 54 | outputs = decoder(encoder(inputs)[2]) 55 | vae = Model(inputs, outputs, name='vae') 56 | 57 | # Loss function 58 | # we start wit the reconstruction loss 59 | reconstruction_loss = binary_crossentropy(inputs, outputs) * image_size 60 | 61 | # next is the KL divergence 62 | kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) 63 | kl_loss = K.sum(kl_loss, axis=-1) 64 | kl_loss *= -0.5 65 | 66 | # we combine them in a total loss 67 | vae_loss = K.mean(reconstruction_loss + kl_loss) 68 | vae.add_loss(vae_loss) 69 | 70 | return encoder, decoder, vae 71 | 72 | 73 | def sampling(args: tuple): 74 | """ 75 | Reparameterization trick by sampling z from unit Gaussian 76 | :param args: (tensor, tensor) mean and log of variance of q(z|x) 77 | :returns tensor: sampled latent vector z 78 | """ 79 | 80 | # unpack the input tuple 81 | z_mean, z_log_var = args 82 | 83 | # mini-batch size 84 | mb_size = K.shape(z_mean)[0] 85 | 86 | # latent space size 87 | dim = K.int_shape(z_mean)[1] 88 | 89 | # random normal vector with mean=0 and std=1.0 90 | epsilon = K.random_normal(shape=(mb_size, dim)) 91 | 92 | return z_mean + K.exp(0.5 * z_log_var) * epsilon 93 | 94 | 95 | def plot_latent_distribution(encoder, 96 | x_test, 97 | y_test, 98 | batch_size=128): 99 | """ 100 | Display a 2D plot of the digit classes in the latent space. 101 | We are interested only in z, so we only need the encoder here. 102 | :param encoder: the encoder network 103 | :param x_test: test images 104 | :param y_test: test labels 105 | :param batch_size: size of the mini-batch 106 | """ 107 | z_mean, _, _ = encoder.predict(x_test, batch_size=batch_size) 108 | plt.figure(figsize=(6, 6)) 109 | 110 | markers = ('o', 'x', '^', '<', '>', '*', 'h', 'H', 'D', 'd', 'P', 'X', '8', 's', 'p') 111 | 112 | for i in np.unique(y_test): 113 | plt.scatter(z_mean[y_test == i, 0], z_mean[y_test == i, 1], 114 | marker=MarkerStyle(markers[i], fillstyle='none'), 115 | edgecolors='black') 116 | 117 | plt.xlabel("z[0]") 118 | plt.ylabel("z[1]") 119 | plt.show() 120 | 121 | 122 | def plot_generated_images(decoder): 123 | """ 124 | Display a 2D plot of the generated images. 125 | We only need the decoder, because we'll manually sample the distribution z 126 | :param decoder: the decoder network 127 | """ 128 | 129 | # display a nxn 2D manifold of digits 130 | n = 15 131 | digit_size = 28 132 | 133 | figure = np.zeros((digit_size * n, digit_size * n)) 134 | # linearly spaced coordinates corresponding to the 2D plot 135 | # of digit classes in the latent space 136 | grid_x = np.linspace(-4, 4, n) 137 | grid_y = np.linspace(-4, 4, n)[::-1] 138 | 139 | # start sampling z1 and z2 in the ranges grid_x and grid_y 140 | for i, yi in enumerate(grid_y): 141 | for j, xi in enumerate(grid_x): 142 | z_sample = np.array([[xi, yi]]) 143 | x_decoded = decoder.predict(z_sample) 144 | digit = x_decoded[0].reshape(digit_size, digit_size) 145 | slice_i = slice(i * digit_size, (i + 1) * digit_size) 146 | slice_j = slice(j * digit_size, (j + 1) * digit_size) 147 | figure[slice_i, slice_j] = digit 148 | 149 | # plot the results 150 | plt.figure(figsize=(6, 5)) 151 | start_range = digit_size // 2 152 | end_range = n * digit_size + start_range + 1 153 | pixel_range = np.arange(start_range, end_range, digit_size) 154 | sample_range_x = np.round(grid_x, 1) 155 | sample_range_y = np.round(grid_y, 1) 156 | plt.xticks(pixel_range, sample_range_x) 157 | plt.yticks(pixel_range, sample_range_y) 158 | plt.xlabel("z[0]") 159 | plt.ylabel("z[1]") 160 | plt.imshow(figure, cmap='Greys_r') 161 | plt.show() 162 | 163 | 164 | if __name__ == '__main__': 165 | encoder, decoder, vae = build_vae() 166 | 167 | vae.compile(optimizer='adam') 168 | vae.summary() 169 | 170 | vae.fit(x_train, 171 | epochs=50, 172 | batch_size=128, 173 | validation_data=(x_test, None)) 174 | 175 | plot_latent_distribution(encoder, 176 | x_test, 177 | y_test, 178 | batch_size=128) 179 | 180 | plot_generated_images(decoder) 181 | -------------------------------------------------------------------------------- /Chapter06/chapter_06_002.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from keras.datasets import mnist 4 | from keras.layers import BatchNormalization, Input, Dense, Reshape, Flatten 5 | from keras.layers.advanced_activations import LeakyReLU 6 | from keras.models import Sequential, Model 7 | from keras.optimizers import Adam 8 | 9 | 10 | def build_generator(latent_dim: int): 11 | """ 12 | Build discriminator network 13 | :param latent_dim: latent vector size 14 | """ 15 | 16 | model = Sequential([ 17 | Dense(128, input_dim=latent_dim), 18 | LeakyReLU(alpha=0.2), 19 | BatchNormalization(momentum=0.8), 20 | Dense(256), 21 | LeakyReLU(alpha=0.2), 22 | BatchNormalization(momentum=0.8), 23 | Dense(512), 24 | LeakyReLU(alpha=0.2), 25 | BatchNormalization(momentum=0.8), 26 | Dense(np.prod((28, 28, 1)), activation='tanh'), 27 | # reshape to MNIST image size 28 | Reshape((28, 28, 1)) 29 | ]) 30 | 31 | model.summary() 32 | 33 | # the latent input vector z 34 | z = Input(shape=(latent_dim,)) 35 | generated = model(z) 36 | 37 | # build model from the input and output 38 | return Model(z, generated) 39 | 40 | 41 | def build_discriminator(): 42 | """ 43 | Build discriminator network 44 | """ 45 | 46 | model = Sequential([ 47 | Flatten(input_shape=(28, 28, 1)), 48 | Dense(256), 49 | LeakyReLU(alpha=0.2), 50 | Dense(128), 51 | LeakyReLU(alpha=0.2), 52 | Dense(1, activation='sigmoid'), 53 | ], name='discriminator') 54 | 55 | model.summary() 56 | 57 | image = Input(shape=(28, 28, 1)) 58 | output = model(image) 59 | 60 | return Model(image, output) 61 | 62 | 63 | def train(generator, discriminator, combined, steps, batch_size): 64 | """ 65 | Train the GAN system 66 | :param generator: generator 67 | :param discriminator: discriminator 68 | :param combined: stacked generator and discriminator 69 | we'll use the combined network when we train the generator 70 | :param steps: number of alternating steps for training 71 | :param batch_size: size of the minibatch 72 | """ 73 | 74 | # Load the dataset 75 | (x_train, _), _ = mnist.load_data() 76 | 77 | # Rescale in [-1, 1] interval 78 | x_train = (x_train.astype(np.float32) - 127.5) / 127.5 79 | x_train = np.expand_dims(x_train, axis=-1) 80 | 81 | # Discriminator ground truths 82 | real = np.ones((batch_size, 1)) 83 | fake = np.zeros((batch_size, 1)) 84 | 85 | latent_dim = generator.input_shape[1] 86 | 87 | for step in range(steps): 88 | # Train the discriminator 89 | 90 | # Select a random batch of images 91 | real_images = x_train[np.random.randint(0, x_train.shape[0], batch_size)] 92 | 93 | # Random batch of noise 94 | noise = np.random.normal(0, 1, (batch_size, latent_dim)) 95 | 96 | # Generate a batch of new images 97 | generated_images = generator.predict(noise) 98 | 99 | # Train the discriminator 100 | discriminator_real_loss = discriminator.train_on_batch(real_images, real) 101 | discriminator_fake_loss = discriminator.train_on_batch(generated_images, fake) 102 | discriminator_loss = 0.5 * np.add(discriminator_real_loss, discriminator_fake_loss) 103 | 104 | # Train the generator 105 | # random latent vector z 106 | noise = np.random.normal(0, 1, (batch_size, latent_dim)) 107 | 108 | # Train the generator 109 | # Note that we use the "valid" labels for the generated images 110 | # That's because we try to maximize the discriminator loss 111 | generator_loss = combined.train_on_batch(noise, real) 112 | 113 | # Display progress 114 | print("%d [Discriminator loss: %.4f%%, acc.: %.2f%%] [Generator loss: %.4f%%]" % 115 | (step, discriminator_loss[0], 100 * discriminator_loss[1], generator_loss)) 116 | 117 | 118 | def plot_generated_images(generator): 119 | """ 120 | Display a nxn 2D manifold of digits 121 | :param generator: the generator 122 | """ 123 | n = 10 124 | digit_size = 28 125 | 126 | # big array containing all images 127 | figure = np.zeros((digit_size * n, digit_size * n)) 128 | 129 | latent_dim = generator.input_shape[1] 130 | 131 | # n*n random latent distributions 132 | noise = np.random.normal(0, 1, (n * n, latent_dim)) 133 | 134 | # generate the images 135 | generated_images = generator.predict(noise) 136 | 137 | # fill the big array with images 138 | for i in range(n): 139 | for j in range(n): 140 | slice_i = slice(i * digit_size, (i + 1) * digit_size) 141 | slice_j = slice(j * digit_size, (j + 1) * digit_size) 142 | figure[slice_i, slice_j] = np.reshape(generated_images[i * n + j], (28, 28)) 143 | 144 | # plot the results 145 | plt.figure(figsize=(6, 5)) 146 | plt.axis('off') 147 | plt.imshow(figure, cmap='Greys_r') 148 | plt.show() 149 | 150 | 151 | if __name__ == '__main__': 152 | latent_dim = 64 153 | 154 | # Build and compile the discriminator 155 | discriminator = build_discriminator() 156 | discriminator.compile(loss='binary_crossentropy', 157 | optimizer=Adam(lr=0.0002, beta_1=0.5), 158 | metrics=['accuracy']) 159 | 160 | # Build the generator 161 | generator = build_generator(latent_dim) 162 | 163 | # Generator input z 164 | z = Input(shape=(latent_dim,)) 165 | generated_image = generator(z) 166 | 167 | # Only train the generator for the combined model 168 | discriminator.trainable = False 169 | 170 | # The discriminator takes generated image as input and determines validity 171 | real_or_fake = discriminator(generated_image) 172 | 173 | # Stack the generator and discriminator in a combined model 174 | # Trains the generator to deceive the discriminator 175 | combined = Model(z, real_or_fake) 176 | combined.compile(loss='binary_crossentropy', 177 | optimizer=Adam(lr=0.0002, beta_1=0.5)) 178 | 179 | # train the GAN system 180 | train(generator=generator, 181 | discriminator=discriminator, 182 | combined=combined, 183 | steps=15000, 184 | batch_size=128) 185 | 186 | # display some random generated images 187 | plot_generated_images(generator) 188 | -------------------------------------------------------------------------------- /Chapter07/chapter_07_001.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def step(s, x, U, W): 5 | return x * U + s * W 6 | 7 | 8 | def forward(x, U, W): 9 | # Number of samples in the mini-batch 10 | number_of_samples = len(x) 11 | 12 | # Length of each sample 13 | sequence_length = len(x[0]) 14 | 15 | # Initialize the state activation for each sample along the sequence 16 | s = np.zeros((number_of_samples, sequence_length + 1)) 17 | 18 | # Update the states over the sequence 19 | for t in range(0, sequence_length): 20 | s[:, t + 1] = step(s[:, t], x[:, t], U, W) # step function 21 | 22 | return s 23 | 24 | 25 | def backward(x, s, y, W): 26 | sequence_length = len(x[0]) 27 | 28 | # The network output is just the last activation of sequence 29 | s_t = s[:, -1] 30 | 31 | # Compute the gradient of the output w.r.t. MSE cost function at final state 32 | gS = 2 * (s_t - y) 33 | 34 | # Set the gradient accumulations to 0 35 | gU, gW = 0, 0 36 | 37 | # Accumulate gradients backwards 38 | for k in range(sequence_length, 0, -1): 39 | # Compute the parameter gradients and accumulate the results. 40 | gU += np.sum(gS * x[:, k - 1]) 41 | gW += np.sum(gS * s[:, k - 1]) 42 | 43 | # Compute the gradient at the output of the previous layer 44 | gS = gS * W 45 | 46 | return gU, gW 47 | 48 | 49 | def train(x, y, epochs, learning_rate=0.0005): 50 | """Train the network""" 51 | 52 | # Set initial parameters 53 | weights = (-2, 0) # (U, W) 54 | 55 | # Accumulate the losses and their respective weights 56 | losses = list() 57 | weights_u = list() 58 | weights_w = list() 59 | 60 | # Perform iterative gradient descent 61 | for i in range(epochs): 62 | # Perform forward and backward pass to get the gradients 63 | s = forward(x, weights[0], weights[1]) 64 | 65 | # Compute the loss 66 | loss = (y[0] - s[-1, -1]) ** 2 67 | 68 | # Store the loss and weights values for later display 69 | losses.append(loss) 70 | 71 | weights_u.append(weights[0]) 72 | weights_w.append(weights[1]) 73 | 74 | gradients = backward(x, s, y, weights[1]) 75 | 76 | # Update each parameter `p` by p = p - (gradient * learning_rate). 77 | # `gp` is the gradient of parameter `p` 78 | weights = tuple((p - gp * learning_rate) for p, gp in zip(weights, gradients)) 79 | 80 | print(weights) 81 | 82 | return np.array(losses), np.array(weights_u), np.array(weights_w) 83 | 84 | 85 | def plot_training(losses, weights_u, weights_w): 86 | import matplotlib.pyplot as plt 87 | 88 | # remove nan and inf values 89 | losses = losses[~np.isnan(losses)][:-1] 90 | weights_u = weights_u[~np.isnan(weights_u)][:-1] 91 | weights_w = weights_w[~np.isnan(weights_w)][:-1] 92 | 93 | # plot the weights U and W 94 | fig, ax1 = plt.subplots(figsize=(5, 3.4)) 95 | 96 | ax1.set_ylim(-3, 2) 97 | ax1.set_xlabel('epochs') 98 | ax1.plot(weights_w, label='W', color='red', linestyle='--') 99 | ax1.plot(weights_u, label='U', color='blue', linestyle=':') 100 | ax1.legend(loc='upper left') 101 | 102 | # instantiate a second axis that shares the same x-axis 103 | # plot the loss on the second axis 104 | ax2 = ax1.twinx() 105 | 106 | # uncomment to plot exploding gradients 107 | ax2.set_ylim(-3, 200) 108 | ax2.plot(losses, label='Loss', color='green') 109 | ax2.tick_params(axis='y', labelcolor='green') 110 | ax2.legend(loc='upper right') 111 | 112 | fig.tight_layout() 113 | 114 | plt.show() 115 | 116 | 117 | # Use these inputs for normal training 118 | # The first dimension represents the mini-batch 119 | x = np.array([[0, 0, 0, 0, 1, 0, 1, 0, 1, 0]]) 120 | y = np.array([3]) 121 | 122 | # Use these inputs to reproduce the exploding gradients scenario 123 | # x = np.array([[0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0]]) 124 | # y = np.array([12]) 125 | 126 | losses, weights_u, weights_w = train(x, y, epochs=150) 127 | plot_training(losses, weights_u, weights_w) 128 | -------------------------------------------------------------------------------- /Chapter07/language model/data_processing.py: -------------------------------------------------------------------------------- 1 | """Process text file for language model training.""" 2 | from __future__ import print_function, division 3 | 4 | import codecs 5 | import re 6 | 7 | filepath = 'war_and_peace.txt' # in 8 | out_file = 'wap.txt' # out 9 | 10 | # Regexes used to clean up the text 11 | NEW_LINE_IN_PARAGRAPH_REGEX = re.compile(r'(\S)\n(\S)') 12 | MULTIPLE_NEWLINES_REGEX = re.compile(r'(\n)(\n)+') 13 | 14 | # Read text as string 15 | with codecs.open(filepath, encoding='utf-8', mode='r') as f_input: 16 | book_str = f_input.read() 17 | 18 | # Cleanup 19 | book_str = NEW_LINE_IN_PARAGRAPH_REGEX.sub('\g<1> \g<2>', book_str) 20 | book_str = MULTIPLE_NEWLINES_REGEX.sub('\n\n', book_str) 21 | 22 | # Write proccessed text to file 23 | with codecs.open(out_file, encoding='utf-8', mode='w')as f_output: 24 | f_output.write(book_str) 25 | -------------------------------------------------------------------------------- /Chapter07/language model/data_reader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import codecs 4 | 5 | import numpy as np 6 | from six.moves import range 7 | 8 | 9 | class DataReader(object): 10 | """Data reader used for training language model.""" 11 | 12 | def __init__(self, filepath, batch_length, batch_size): 13 | self.batch_length = batch_length 14 | self.batch_size = batch_size 15 | # Read data into string 16 | with codecs.open(filepath, encoding='utf-8', mode='r') as f: 17 | self.data_str = f.read() 18 | self.data_length = len(self.data_str) 19 | print('data_length: ', self.data_length) 20 | # Create a list of characters, indices are class indices for softmax 21 | char_set = set() 22 | for ch in self.data_str: 23 | char_set.add(ch) 24 | self.char_list = sorted(list(char_set)) 25 | print('char_list: ', len(self.char_list), self.char_list) 26 | # Create reverse mapping to look up the index based on the character 27 | self.char_dict = {val: idx for idx, val in enumerate(self.char_list)} 28 | print('char_dict: ', self.char_dict) 29 | # Initalise random start indices 30 | self.reset_indices() 31 | 32 | def reset_indices(self): 33 | self.start_idxs = np.random.random_integers( 34 | 0, self.data_length, self.batch_size) 35 | 36 | def get_sample(self, start_idx, length): 37 | # Get a sample and wrap around the data string 38 | return [self.char_dict[self.data_str[i % self.data_length]] 39 | for i in range(start_idx, start_idx + length)] 40 | 41 | def get_input_target_sample(self, start_idx): 42 | sample = self.get_sample(start_idx, self.batch_length + 1) 43 | inpt = sample[0:self.batch_length] 44 | trgt = sample[1:self.batch_length + 1] 45 | return inpt, trgt 46 | 47 | def get_batch(self, start_idxs): 48 | input_batch = np.zeros((self.batch_size, self.batch_length), 49 | dtype=np.int32) 50 | target_batch = np.zeros((self.batch_size, self.batch_length), 51 | dtype=np.int32) 52 | for i, start_idx in enumerate(start_idxs): 53 | inpt, trgt = self.get_input_target_sample(start_idx) 54 | input_batch[i, :] = inpt 55 | target_batch[i, :] = trgt 56 | return input_batch, target_batch 57 | 58 | def __iter__(self): 59 | while True: 60 | input_batch, target_batch = self.get_batch(self.start_idxs) 61 | self.start_idxs = ( 62 | self.start_idxs + self.batch_length) % self.data_length 63 | yield input_batch, target_batch 64 | 65 | 66 | def main(): 67 | filepath = './wap.txt' 68 | batch_length = 10 69 | batch_size = 2 70 | reader = DataReader(filepath, batch_length, batch_size) 71 | s = 'As in the question of astronomy then, so in the question of history now,' 72 | print([reader.char_dict[c] for c in s]) 73 | 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /Chapter07/language model/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function, division 3 | 4 | import time 5 | 6 | import data_reader 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | 11 | class Model(object): 12 | """RNN language model.""" 13 | 14 | def __init__(self, batch_size, sequence_length, lstm_sizes, dropout, 15 | labels, save_path): 16 | self.batch_size = batch_size 17 | self.sequence_length = sequence_length 18 | self.lstm_sizes = lstm_sizes 19 | self.labels = labels 20 | self.label_map = {val: idx for idx, val in enumerate(labels)} 21 | self.number_of_characters = len(labels) 22 | self.save_path = save_path 23 | self.dropout = dropout 24 | 25 | def init_graph(self): 26 | # Variable sequence length 27 | self.inputs = tf.placeholder( 28 | tf.int32, [self.batch_size, self.sequence_length]) 29 | self.targets = tf.placeholder( 30 | tf.int32, [self.batch_size, self.sequence_length]) 31 | self.init_architecture() 32 | self.saver = tf.train.Saver(tf.trainable_variables()) 33 | 34 | def init_architecture(self): 35 | # Define a multilayer LSTM cell 36 | self.one_hot_inputs = tf.one_hot( 37 | self.inputs, depth=self.number_of_characters) 38 | cell_list = [tf.nn.rnn_cell.LSTMCell(lstm_size) for lstm_size in self.lstm_sizes] 39 | self.multi_cell_lstm = tf.nn.rnn_cell.MultiRNNCell(cell_list) 40 | # Initial state of the LSTM memory. 41 | # Keep state in graph memory to use between batches 42 | self.initial_state = self.multi_cell_lstm.zero_state( 43 | self.batch_size, tf.float32) 44 | # Convert to variables so that the state can be stored between batches 45 | # Note that LSTM states is a tuple of tensors, this structure has to be 46 | # re-created in order to use as LSTM state. 47 | self.state_variables = tf.contrib.framework.nest.pack_sequence_as( 48 | self.initial_state, 49 | [tf.Variable(var, trainable=False) 50 | for var in tf.contrib.framework.nest.flatten(self.initial_state)]) 51 | # Define the rnn through time 52 | lstm_output, final_state = tf.nn.dynamic_rnn( 53 | cell=self.multi_cell_lstm, inputs=self.one_hot_inputs, 54 | initial_state=self.state_variables) 55 | # Force the initial state to be set to the new state for the next batch 56 | # before returning the output 57 | store_states = [ 58 | state_variable.assign(new_state) 59 | for (state_variable, new_state) in zip( 60 | tf.contrib.framework.nest.flatten(self.state_variables), 61 | tf.contrib.framework.nest.flatten(final_state))] 62 | with tf.control_dependencies(store_states): 63 | lstm_output = tf.identity(lstm_output) 64 | # Reshape so that we can apply the linear transformation to all outputs 65 | output_flat = tf.reshape(lstm_output, (-1, self.lstm_sizes[-1])) 66 | # Define output layer 67 | self.logit_weights = tf.Variable( 68 | tf.truncated_normal( 69 | (self.lstm_sizes[-1], self.number_of_characters), stddev=0.01), 70 | name='logit_weights') 71 | self.logit_bias = tf.Variable( 72 | tf.zeros((self.number_of_characters)), name='logit_bias') 73 | # Apply last layer transformation 74 | self.logits_flat = tf.matmul( 75 | output_flat, self.logit_weights) + self.logit_bias 76 | probabilities_flat = tf.nn.softmax(self.logits_flat) 77 | self.probabilities = tf.reshape( 78 | probabilities_flat, 79 | (self.batch_size, -1, self.number_of_characters)) 80 | 81 | def init_train_op(self, optimizer): 82 | # Flatten the targets to be compatible with the flattened logits 83 | targets_flat = tf.reshape(self.targets, (-1,)) 84 | # Get the loss over all outputs 85 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 86 | logits=self.logits_flat, labels=targets_flat, name='x_entropy') 87 | self.loss = tf.reduce_mean(loss) 88 | trainable_variables = tf.trainable_variables() 89 | gradients = tf.gradients(loss, trainable_variables) 90 | gradients, _ = tf.clip_by_global_norm(gradients, 5) 91 | self.train_op = optimizer.apply_gradients(zip(gradients, trainable_variables)) 92 | 93 | def sample(self, session, prime_string, sample_length): 94 | self.reset_state(session) 95 | # Prime state 96 | print('prime_string: ', prime_string) 97 | for character in prime_string: 98 | character_idx = self.label_map[character] 99 | out = session.run( 100 | self.probabilities, 101 | feed_dict={self.inputs: np.asarray([[character_idx]])}) 102 | output_sample = prime_string 103 | print('start sampling') 104 | # Sample for sample_length steps 105 | for _ in range(sample_length): 106 | sample_label = np.random.choice( 107 | self.labels, size=(1), p=out[0, 0])[0] 108 | output_sample += sample_label 109 | sample_idx = self.label_map[sample_label] 110 | out = session.run( 111 | self.probabilities, 112 | feed_dict={self.inputs: np.asarray([[sample_idx]])}) 113 | 114 | return output_sample 115 | 116 | def reset_state(self, session): 117 | for state in tf.contrib.framework.nest.flatten(self.state_variables): 118 | session.run(state.initializer) 119 | 120 | def save(self, sess): 121 | self.saver.save(sess, self.save_path) 122 | 123 | def restore(self, sess): 124 | self.saver.restore(sess, self.save_path) 125 | 126 | 127 | def train_and_sample(minibatch_iterations, restore): 128 | tf.reset_default_graph() 129 | batch_size = 64 130 | lstm_sizes = [512, 512] 131 | batch_len = 100 132 | learning_rate = 2e-3 133 | 134 | filepath = './wap.txt' 135 | 136 | data_feed = data_reader.DataReader( 137 | filepath, batch_len, batch_size) 138 | labels = data_feed.char_list 139 | print('labels: ', labels) 140 | 141 | save_path = './model.tf' 142 | model = Model( 143 | batch_size, batch_len, lstm_sizes, 0.8, labels, 144 | save_path) 145 | model.init_graph() 146 | optimizer = tf.train.AdamOptimizer(learning_rate) 147 | model.init_train_op(optimizer) 148 | 149 | init_op = tf.initialize_all_variables() 150 | with tf.Session() as sess: 151 | sess.run(init_op) 152 | if restore: 153 | print('Restoring model') 154 | model.restore(sess) 155 | model.reset_state(sess) 156 | start_time = time.time() 157 | for i in range(minibatch_iterations): 158 | input_batch, target_batch = next(iter(data_feed)) 159 | loss, _ = sess.run( 160 | [model.loss, model.train_op], 161 | feed_dict={model.inputs: input_batch, model.targets: target_batch}) 162 | if i % 50 == 0 and i != 0: 163 | print('i: ', i) 164 | duration = time.time() - start_time 165 | print('loss: {} ({} sec.)'.format(loss, duration)) 166 | start_time = time.time() 167 | if i % 1000 == 0 and i != 0: 168 | model.save(sess) 169 | if i % 100 == 0 and i != 0: 170 | print('Reset initial state') 171 | model.reset_state(sess) 172 | if i % 1000 == 0 and i != 0: 173 | print('Reset minibatch feeder') 174 | data_feed.reset_indices() 175 | model.save(sess) 176 | 177 | print('\n sampling after {} iterations'.format(minibatch_iterations)) 178 | tf.reset_default_graph() 179 | model = Model( 180 | 1, None, lstm_sizes, 1.0, labels, save_path) 181 | model.init_graph() 182 | init_op = tf.initialize_all_variables() 183 | with tf.Session() as sess: 184 | sess.run(init_op) 185 | model.restore(sess) 186 | print('\nSample 1:') 187 | sample = model.sample( 188 | sess, prime_string=u'\n\nThis feeling was ', sample_length=500) 189 | print(u'sample: \n{}'.format(sample)) 190 | print('\nSample 2:') 191 | sample = model.sample( 192 | sess, prime_string=u'She was born in the year ', sample_length=500) 193 | print(u'sample: \n{}'.format(sample)) 194 | print('\nSample 3:') 195 | sample = model.sample( 196 | sess, prime_string=u'The meaning of this all is ', 197 | sample_length=500) 198 | print(u'sample: \n{}'.format(sample)) 199 | print('\nSample 4:') 200 | sample = model.sample( 201 | sess, 202 | prime_string=u'In the midst of a conversation on political matters Anna Pávlovna burst out:,', 203 | sample_length=500) 204 | print(u'sample: \n{}'.format(sample)) 205 | print('\nSample 5:') 206 | sample = model.sample( 207 | sess, prime_string=u'\n\nCHAPTER X\n\n', 208 | sample_length=500) 209 | print(u'sample: \n{}'.format(sample)) 210 | print('\nSample 5:') 211 | sample = model.sample( 212 | sess, prime_string=u'"If only you knew,"', 213 | sample_length=500) 214 | print(u'sample: \n{}'.format(sample)) 215 | 216 | 217 | def main(): 218 | total_iterations = 500 219 | print('\n\n\nTrain for {}'.format(500)) 220 | print('Total iters: {}'.format(total_iterations)) 221 | train_and_sample(500, restore=False) 222 | for i in [500, 1000, 3000, 5000, 10000, 30000, 50000, 100000, 300000]: 223 | total_iterations += i 224 | print('\n\n\nTrain for {}'.format(i)) 225 | print('Total iters: {}'.format(total_iterations)) 226 | train_and_sample(i, restore=True) 227 | 228 | 229 | if __name__ == "__main__": 230 | main() 231 | -------------------------------------------------------------------------------- /Chapter08/chapter_08_001.py: -------------------------------------------------------------------------------- 1 | import random 2 | from collections import deque 3 | 4 | import gym 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | env = gym.make('CartPole-v0') 10 | 11 | # Build the network 12 | input_size = env.observation_space.shape[0] 13 | 14 | input_placeholder = tf.placeholder("float", [None, input_size]) 15 | 16 | # weights and bias of the hidden layer 17 | weights_1 = tf.Variable(tf.truncated_normal([input_size, 20], stddev=0.01)) 18 | bias_1 = tf.Variable(tf.constant(0.0, shape=[20])) 19 | 20 | # weights and bias of the output layer 21 | weights_2 = tf.Variable(tf.truncated_normal([20, env.action_space.n], stddev=0.01)) 22 | bias_2 = tf.Variable(tf.constant(0.0, shape=[env.action_space.n])) 23 | 24 | hidden_layer = tf.nn.tanh(tf.matmul(input_placeholder, weights_1) + bias_1) 25 | output_layer = tf.matmul(hidden_layer, weights_2) + bias_2 26 | 27 | action_placeholder = tf.placeholder("float", [None, 2]) 28 | target_placeholder = tf.placeholder("float", [None]) 29 | 30 | # network estimation 31 | q_estimation = tf.reduce_sum(tf.multiply(output_layer, action_placeholder), reduction_indices=1) 32 | 33 | # loss function 34 | loss = tf.reduce_mean(tf.square(target_placeholder - q_estimation)) 35 | 36 | # Use Adam 37 | train_operation = tf.train.AdamOptimizer().minimize(loss) 38 | 39 | # initialize TF variables 40 | session = tf.Session() 41 | session.run(tf.global_variables_initializer()) 42 | 43 | 44 | def choose_next_action(state, rand_action_prob): 45 | """ 46 | Simplified e-greedy policy 47 | :param state: current state 48 | :param rand_action_prob: probability to select random action 49 | """ 50 | 51 | new_action = np.zeros([env.action_space.n]) 52 | 53 | if random.random() <= rand_action_prob: 54 | # choose an action randomly 55 | action_index = random.randrange(env.action_space.n) 56 | else: 57 | # choose an action given our state 58 | action_values = session.run(output_layer, feed_dict={input_placeholder: [state]})[0] 59 | # we will take the highest value action 60 | action_index = np.argmax(action_values) 61 | 62 | new_action[action_index] = 1 63 | return new_action 64 | 65 | 66 | def train(mini_batch): 67 | """ 68 | Train the network on a single minibatch 69 | :param mini_batch: the mini-batch 70 | """ 71 | 72 | last_state, last_action, reward, current_state, terminal = range(5) 73 | 74 | # get the batch variables 75 | previous_states = [d[last_state] for d in mini_batch] 76 | actions = [d[last_action] for d in mini_batch] 77 | rewards = [d[reward] for d in mini_batch] 78 | current_states = [d[current_state] for d in mini_batch] 79 | agents_expected_reward = [] 80 | 81 | # this gives us the agents expected reward for each action we might take 82 | agents_reward_per_action = session.run(output_layer, 83 | feed_dict={input_placeholder: current_states}) 84 | for i in range(len(mini_batch)): 85 | if mini_batch[i][terminal]: 86 | # this was a terminal frame so there is no future reward... 87 | agents_expected_reward.append(rewards[i]) 88 | else: 89 | # otherwise compute expected reward 90 | discount_factor = 0.9 91 | agents_expected_reward.append( 92 | rewards[i] + discount_factor * np.max(agents_reward_per_action[i])) 93 | 94 | # learn that these actions in these states lead to this reward 95 | session.run(train_operation, feed_dict={ 96 | input_placeholder: previous_states, 97 | action_placeholder: actions, 98 | target_placeholder: agents_expected_reward}) 99 | 100 | 101 | def q_learning(): 102 | """The Q-learning method""" 103 | 104 | episode_lengths = list() 105 | 106 | # Experience replay buffer and definition 107 | observations = deque(maxlen=200000) 108 | 109 | # Set the first action to nothing 110 | last_action = np.zeros(env.action_space.n) 111 | last_action[1] = 1 112 | last_state = env.reset() 113 | 114 | total_reward = 0 115 | episode = 1 116 | 117 | time_step = 0 118 | 119 | # Initial chance to select random action 120 | rand_action_prob = 1.0 121 | 122 | while episode <= 400: 123 | # render the cart pole on the screen 124 | # comment this for faster execution 125 | # env.render() 126 | 127 | # select action following the policy 128 | last_action = choose_next_action(last_state, rand_action_prob) 129 | 130 | # take action and receive new state and reward 131 | current_state, reward, terminal, info = env.step(np.argmax(last_action)) 132 | total_reward += reward 133 | 134 | if terminal: 135 | reward = -1. 136 | episode_lengths.append(time_step) 137 | 138 | print("Episode: %s; Steps before fail: %s; Epsilon: %.2f reward %s" % 139 | (episode, time_step, rand_action_prob, total_reward)) 140 | total_reward = 0 141 | 142 | # store the transition in previous_observations 143 | observations.append((last_state, last_action, reward, current_state, terminal)) 144 | 145 | # only train if done observing 146 | min_experience_replay_size = 5000 147 | if len(observations) > min_experience_replay_size: 148 | # mini-batch of 128 from the experience replay observations 149 | mini_batch = random.sample(observations, 128) 150 | 151 | # train the network 152 | train(mini_batch) 153 | 154 | time_step += 1 155 | 156 | # reset the environment 157 | if terminal: 158 | last_state = env.reset() 159 | time_step = 0 160 | episode += 1 161 | else: 162 | last_state = current_state 163 | 164 | # gradually reduce the probability of a random action 165 | # starting from 1 and going to 0 166 | if rand_action_prob > 0 and len(observations) > min_experience_replay_size: 167 | rand_action_prob -= 1.0 / 15000 168 | 169 | # display episodes length 170 | plt.xlabel("Episode") 171 | plt.ylabel("Length (steps)") 172 | plt.plot(episode_lengths, label='Episode length') 173 | plt.show() 174 | 175 | 176 | q_learning() 177 | -------------------------------------------------------------------------------- /Chapter09/chapter_09_001_ddqn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | import zlib 5 | from collections import deque 6 | from collections import namedtuple 7 | 8 | import gym 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | resume = True # resume training from checpoint (if exists) 14 | CHECKPOINT_PATH = 'deep_q_breakout_path_8' 15 | MB_SIZE = 32 # mini batch size 16 | ER_BUFFER_SIZE = 1000000 # experience relay (ER) buffer size 17 | COMPRESS_ER = True # compress episodes in the EP buffer 18 | EXPLORE_STEPS = 1000000 # frames over which to anneal epsilon 19 | EPSILON_START = 1.0 # starting chance of an action being random 20 | EPSILON_END = 0.01 # final chance of an action being random 21 | STATE_FRAMES = 4 # number of frames to store in the state 22 | SAVE_EVERY_X_STEPS = 10000 # how often to save the model on the disk 23 | UPDATE_Q_NET_FREQ = 1 # how often to update the q network 24 | UPDATE_TARGET_NET_EVERY_X_STEPS = 10000 # copy the q-net weights to the target net 25 | DISCOUNT_FACTOR = 0.99 # discount factor 26 | 27 | 28 | def initialize(): 29 | """Initialize the session, the networks, and the environment""" 30 | # Create environment 31 | env = gym.envs.make("BreakoutDeterministic-v4") 32 | 33 | tf.reset_default_graph() 34 | 35 | session = tf.Session() 36 | 37 | # Tracks the total nubmer of training steps 38 | tf.Variable(0, name='global_step', trainable=False) 39 | 40 | # Create q- and target- networks 41 | q_network = build_network("q_network") 42 | t_network = build_network("target_network") 43 | 44 | # create the operations to copy the q-net weights to the t-net 45 | q_net_weights = [t for t in tf.trainable_variables() if t.name.startswith(q_network.scope)] 46 | q_net_weights = sorted(q_net_weights, key=lambda v: v.name) 47 | t_net_weights = [t for t in tf.trainable_variables() if t.name.startswith(t_network.scope)] 48 | t_net_weights = sorted(t_net_weights, key=lambda v: v.name) 49 | 50 | t_net_updates = \ 51 | [n2_v.assign(n1_v) for n1_v, n2_v in zip(q_net_weights, t_net_weights)] 52 | 53 | # pre-processor of game frames 54 | frame_proc = frame_preprocessor() 55 | 56 | optimizer = tf.train.AdamOptimizer(0.00025) 57 | # optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6) 58 | 59 | # training op 60 | train_op = optimizer.minimize(q_network.loss, global_step=tf.train.get_global_step()) 61 | 62 | # restore checkpoint 63 | saver = tf.train.Saver() 64 | 65 | if not os.path.exists(CHECKPOINT_PATH): 66 | os.mkdir(CHECKPOINT_PATH) 67 | 68 | checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_PATH) 69 | if resume and checkpoint: 70 | session.run(tf.global_variables_initializer()) 71 | session.run(tf.local_variables_initializer()) 72 | 73 | print("\nRestoring checkpoint...") 74 | saver.restore(session, checkpoint.model_checkpoint_path) 75 | else: 76 | session.run(tf.global_variables_initializer()) 77 | session.run(tf.local_variables_initializer()) 78 | 79 | return session, \ 80 | q_network, \ 81 | t_network, \ 82 | t_net_updates, \ 83 | frame_proc, \ 84 | saver, \ 85 | train_op, \ 86 | env 87 | 88 | 89 | def build_network(scope: str, input_size=84, num_actions=4): 90 | """Builds the network graph.""" 91 | 92 | with tf.variable_scope(scope): 93 | # Our input are STATE_FRAMES greyscale frames of shape 84, 84 each 94 | input_placeholder = tf.placeholder(dtype=np.float32, 95 | shape=[None, input_size, input_size, STATE_FRAMES]) 96 | 97 | normalized_input = tf.to_float(input_placeholder) / 255.0 98 | 99 | # action prediction 100 | action_placeholder = tf.placeholder(dtype=tf.int32, shape=[None]) 101 | 102 | # target action 103 | target_placeholder = tf.placeholder(dtype=np.float32, shape=[None]) 104 | 105 | # Convolutional layers 106 | conv_1 = tf.layers.conv2d(normalized_input, 32, 8, 4, 107 | activation=tf.nn.relu) 108 | conv_2 = tf.layers.conv2d(conv_1, 64, 4, 2, 109 | activation=tf.nn.relu) 110 | conv_3 = tf.layers.conv2d(conv_2, 64, 3, 1, 111 | activation=tf.nn.relu) 112 | 113 | # Fully connected layers 114 | flattened = tf.layers.flatten(conv_3) 115 | fc_1 = tf.layers.dense(flattened, 512, 116 | activation=tf.nn.relu) 117 | 118 | q_estimation = tf.layers.dense(fc_1, num_actions) 119 | 120 | # Get the predictions for the chosen actions only 121 | batch_size = tf.shape(normalized_input)[0] 122 | gather_indices = tf.range(batch_size) * tf.shape(q_estimation)[1] + action_placeholder 123 | action_predictions = tf.gather(tf.reshape(q_estimation, [-1]), gather_indices) 124 | 125 | # Calculate the loss 126 | # loss = tf.reduce_mean(tf.squared_difference(target_placeholder, action_predictions)) 127 | loss = tf.losses.huber_loss(labels=target_placeholder, predictions=action_predictions, reduction=tf.losses.Reduction.MEAN) 128 | 129 | Network = namedtuple('Network', 130 | 'scope ' 131 | 'input_placeholder ' 132 | 'action_placeholder ' 133 | 'target_placeholder ' 134 | 'q_estimation ' 135 | 'action_predictions ' 136 | 'loss ') 137 | 138 | return Network(scope=scope, 139 | input_placeholder=input_placeholder, 140 | action_placeholder=action_placeholder, 141 | target_placeholder=target_placeholder, 142 | q_estimation=q_estimation, 143 | action_predictions=action_predictions, 144 | loss=loss) 145 | 146 | 147 | def choose_next_action(state, net, epsilon): 148 | """Epsilon-greedy policy""" 149 | 150 | # choose an action given our last state 151 | tmp = np.ones(env.action_space.n, dtype=float) * epsilon / env.action_space.n 152 | q_estimations = session.run(net.q_estimation, 153 | feed_dict={net.input_placeholder: np.reshape(state, (1,) + state.shape)})[0] 154 | 155 | tmp[np.argmax(q_estimations)] += (1.0 - epsilon) 156 | 157 | new_action = np.random.choice(np.arange(len(tmp)), p=tmp) 158 | 159 | return new_action 160 | 161 | 162 | def frame_preprocessor(): 163 | """Pre-processing the input data""" 164 | 165 | with tf.variable_scope("frame_processor"): 166 | input_placeholder = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8) 167 | processed_frame = tf.image.rgb_to_grayscale(input_placeholder) 168 | processed_frame = tf.image.crop_to_bounding_box(processed_frame, 34, 0, 160, 160) 169 | processed_frame = tf.image.resize_images( 170 | processed_frame, 171 | [84, 84], 172 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 173 | 174 | processed_frame = tf.squeeze(processed_frame) 175 | 176 | FramePreprocessor = namedtuple('FramePreprocessor', 'input_placeholder processed_frame') 177 | 178 | return FramePreprocessor( 179 | input_placeholder=input_placeholder, 180 | processed_frame=processed_frame) 181 | 182 | 183 | def populate_experience_replay_buffer(buffer: deque, initial_buffer_size: int): 184 | """Initial population of the experience replay buffer""" 185 | 186 | # Initialize epsilon based on the current step 187 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS 188 | epsilon = max(EPSILON_END, 189 | EPSILON_START - 190 | session.run(tf.train.get_global_step()) * epsilon_step) 191 | 192 | # Populate the replay memory with initial experience 193 | state = env.reset() 194 | state = session.run(frame_proc.processed_frame, 195 | feed_dict={frame_proc.input_placeholder: state}) 196 | 197 | state = np.stack([state] * STATE_FRAMES, axis=2) 198 | 199 | for i in range(initial_buffer_size): 200 | 201 | # Sample next state with the q_network 202 | action = choose_next_action(state, q_network, epsilon) 203 | 204 | # Perform one action step 205 | next_state, reward, terminal, info = env.step(action) 206 | next_state = session.run(frame_proc.processed_frame, 207 | feed_dict={frame_proc.input_placeholder: next_state}) 208 | 209 | # Stack the game frames in a single array 210 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) 211 | 212 | # Store the experience in ER 213 | if COMPRESS_ER: 214 | buffer.append( 215 | zlib.compress( 216 | pickle.dumps((state, action, reward, next_state, terminal), 2), 2)) 217 | else: 218 | buffer.append((state, action, reward, next_state, terminal)) 219 | 220 | # Set next state as current 221 | if terminal: 222 | state = env.reset() 223 | state = session.run(frame_proc.processed_frame, 224 | feed_dict={frame_proc.input_placeholder: state}) 225 | 226 | state = np.stack([state] * STATE_FRAMES, axis=2) 227 | else: 228 | state = next_state 229 | 230 | print("\rExperience replay buffer: {} / {} initial ({} total)".format( 231 | len(buffer), initial_buffer_size, buffer.maxlen), end="") 232 | 233 | 234 | def plot_stats(stats): 235 | """Plot the stats""" 236 | plt.figure() 237 | 238 | plt.xlabel("Episode") 239 | 240 | # plot the rewards 241 | # rolling mean of 50 242 | cumsum = np.cumsum(np.insert(stats.rewards, 0, 0)) 243 | rewards = (cumsum[50:] - cumsum[:-50]) / float(50) 244 | 245 | fig, ax1 = plt.subplots() 246 | 247 | color = 'tab:red' 248 | 249 | ax1.set_ylabel('Reward', color=color) 250 | ax1.plot(rewards, color=color) 251 | ax1.tick_params(axis='y', labelcolor=color) 252 | 253 | # plot the episode lengths 254 | # rolling mean of 50 255 | cumsum = np.cumsum(np.insert(stats.lengths, 0, 0)) 256 | lengths = (cumsum[50:] - cumsum[:-50]) / float(50) 257 | 258 | ax2 = ax1.twinx() 259 | 260 | color = 'tab:blue' 261 | ax2.set_ylabel('Length', color=color) 262 | ax2.plot(lengths, color=color) 263 | ax2.tick_params(axis='y', labelcolor=color) 264 | 265 | 266 | def deep_q_learning(): 267 | """The Q-learning training process""" 268 | 269 | # build experience replay 270 | observations = deque(maxlen=ER_BUFFER_SIZE) 271 | 272 | print("Populating replay memory...") 273 | populate_experience_replay_buffer(observations, 100000) 274 | 275 | # initialize statistics 276 | stats = namedtuple('Stats', 'rewards lengths')(rewards=list(), lengths=list()) 277 | global_time = session.run(tf.train.get_global_step()) 278 | time = 0 279 | 280 | episode = 1 281 | 282 | episode_reward = 0 283 | global_reward = 0 284 | 285 | # Start the training with an initial state 286 | state = env.reset() 287 | state = session.run(frame_proc.processed_frame, 288 | feed_dict={frame_proc.input_placeholder: state}) 289 | state = np.stack([state] * STATE_FRAMES, axis=2) 290 | 291 | while True: 292 | # env.render() 293 | 294 | # Initialize epsilon based on the current step 295 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS 296 | epsilon = max(EPSILON_END, EPSILON_START - (global_time - 1) * epsilon_step) 297 | 298 | # Copy q-net weights to the target-net 299 | if global_time % UPDATE_TARGET_NET_EVERY_X_STEPS == 0: 300 | session.run(t_net_updates) 301 | print("\nCopied model parameters to target network.") 302 | 303 | # Sample next action 304 | action = choose_next_action(state, q_network, epsilon) 305 | 306 | # Perform one step with the selected action 307 | next_state, reward, terminal, info = env.step(action) 308 | 309 | # This is how we pre-process 310 | next_state = session.run(frame_proc.processed_frame, 311 | feed_dict={frame_proc.input_placeholder: next_state}) 312 | 313 | # Stack the game frames in a single array 314 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) 315 | 316 | # Store the experience in ER 317 | if COMPRESS_ER: 318 | observations.append( 319 | zlib.compress(pickle.dumps((state, action, reward, next_state, terminal), 2), 2)) 320 | else: 321 | observations.append((state, action, reward, next_state, terminal)) 322 | 323 | # Sample a mini-batch from the experience replay memory 324 | mini_batch = random.sample(observations, MB_SIZE) 325 | if COMPRESS_ER: 326 | mini_batch = [pickle.loads(zlib.decompress(comp_item)) for comp_item in mini_batch] 327 | 328 | states_batch, action_batch, reward_batch, next_states_batch, terminal_batch = \ 329 | map(np.array, zip(*mini_batch)) 330 | 331 | # Double Q-learning 332 | if global_time % UPDATE_Q_NET_FREQ == 0: 333 | # First predict the next q values with the q-network 334 | q_values_next = session.run(q_network.q_estimation, 335 | feed_dict={q_network.input_placeholder: next_states_batch}) 336 | 337 | # The best action according to the q-network 338 | best_actions = np.argmax(q_values_next, axis=1) 339 | 340 | # Next, predict the next q values with the target-network 341 | q_values_next_target = session.run(t_network.q_estimation, 342 | feed_dict={t_network.input_placeholder: next_states_batch}) 343 | 344 | # Calculate q values and targets 345 | # Use the t-network estimations 346 | # But with the best action, selected by the q-network (Double Q-learning) 347 | targets_batch = reward_batch + \ 348 | np.invert(terminal_batch).astype(np.float32) * \ 349 | DISCOUNT_FACTOR * \ 350 | q_values_next_target[np.arange(MB_SIZE), best_actions] 351 | 352 | _, loss = session.run([train_op, q_network.loss], 353 | feed_dict={ 354 | q_network.input_placeholder: states_batch, 355 | q_network.action_placeholder: action_batch, 356 | q_network.target_placeholder: targets_batch}) 357 | 358 | episode_reward += reward 359 | global_reward += reward 360 | time += 1 361 | global_time += 1 362 | 363 | print("\rEpisode {}: " 364 | "time {:5}; " 365 | "reward {}; " 366 | "epsilon: {:.4f}; " 367 | "loss: {:.6f}; " 368 | "@ global step {} " 369 | "with total reward {}".format( 370 | episode, 371 | time, 372 | episode_reward, 373 | epsilon, 374 | loss, 375 | global_time, 376 | global_reward), end="") 377 | 378 | if terminal: 379 | # Episode end 380 | 381 | print() 382 | 383 | stats.rewards.append(int(episode_reward)) 384 | stats.lengths.append(time) 385 | 386 | time = 0 387 | episode_reward = 0 388 | episode += 1 389 | 390 | state = env.reset() 391 | state = session.run(frame_proc.processed_frame, 392 | feed_dict={frame_proc.input_placeholder: state}) 393 | state = np.stack([state] * STATE_FRAMES, axis=2) 394 | else: 395 | # Set next state as current 396 | state = next_state 397 | 398 | # save checkpoints for later 399 | if global_time % SAVE_EVERY_X_STEPS == 0: 400 | saver.save(session, CHECKPOINT_PATH + '/network', 401 | global_step=tf.train.get_global_step()) 402 | 403 | # plot the results and save the figure 404 | plot_stats(stats) 405 | 406 | fig_file = CHECKPOINT_PATH + '/stats.png' 407 | if os.path.isfile(fig_file): 408 | os.remove(fig_file) 409 | 410 | plt.savefig(fig_file) 411 | plt.close() 412 | 413 | # save the stats 414 | with open(CHECKPOINT_PATH + '/stats.arr', 'wb') as f: 415 | pickle.dump((stats.rewards, stats.lengths), f) 416 | 417 | 418 | if __name__ == '__main__': 419 | session, q_network, t_network, t_net_updates, frame_proc, saver, train_op, env = \ 420 | initialize() 421 | deep_q_learning() 422 | -------------------------------------------------------------------------------- /Chapter09/chapter_09_001_dqn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | import zlib 5 | from collections import deque 6 | from collections import namedtuple 7 | 8 | import gym 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | resume = True # resume training from checpoint (if exists) 14 | CHECKPOINT_PATH = 'deep_q_breakout_path_7' 15 | MB_SIZE = 32 # mini batch size 16 | ER_BUFFER_SIZE = 1000000 # experience relay (ER) buffer size 17 | COMPRESS_ER = True # compress episodes in the EP buffer 18 | EXPLORE_STEPS = 1000000 # frames over which to anneal epsilon 19 | EPSILON_START = 1.0 # starting chance of an action being random 20 | EPSILON_END = 0.1 # final chance of an action being random 21 | STATE_FRAMES = 4 # number of frames to store in the state 22 | SAVE_EVERY_X_STEPS = 10000 # how often to save the model on the disk 23 | UPDATE_Q_NET_FREQ = 1 # how often to update the q network 24 | UPDATE_TARGET_NET_EVERY_X_STEPS = 10000 # copy the q-net weights to the target net 25 | DISCOUNT_FACTOR = 0.99 # discount factor 26 | 27 | 28 | def initialize(): 29 | """Initialize the session, the networks, and the environment""" 30 | # Create environment 31 | env = gym.envs.make("BreakoutDeterministic-v4") 32 | 33 | tf.reset_default_graph() 34 | 35 | session = tf.Session() 36 | 37 | # Tracks the total nubmer of training steps 38 | tf.Variable(0, name='global_step', trainable=False) 39 | 40 | # Create q- and target- networks 41 | q_network = build_network("q_network") 42 | t_network = build_network("target_network") 43 | 44 | # create the operations to copy the q-net weights to the t-net 45 | q_net_weights = [t for t in tf.trainable_variables() 46 | if t.name.startswith(q_network.scope)] 47 | q_net_weights = sorted(q_net_weights, key=lambda v: v.name) 48 | t_net_weights = [t for t in tf.trainable_variables() 49 | if t.name.startswith(t_network.scope)] 50 | t_net_weights = sorted(t_net_weights, key=lambda v: v.name) 51 | 52 | t_net_updates = \ 53 | [n2_v.assign(n1_v) for n1_v, n2_v in zip(q_net_weights, t_net_weights)] 54 | 55 | # pre-processor of game frames 56 | frame_proc = frame_preprocessor() 57 | 58 | optimizer = tf.train.AdamOptimizer(0.00025) 59 | # optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6) 60 | 61 | # training op 62 | train_op = optimizer.minimize(q_network.loss, global_step=tf.train.get_global_step()) 63 | 64 | # restore checkpoint 65 | saver = tf.train.Saver() 66 | 67 | if not os.path.exists(CHECKPOINT_PATH): 68 | os.mkdir(CHECKPOINT_PATH) 69 | 70 | checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_PATH) 71 | if resume and checkpoint: 72 | session.run(tf.global_variables_initializer()) 73 | session.run(tf.local_variables_initializer()) 74 | 75 | print("\nRestoring checkpoint...") 76 | saver.restore(session, checkpoint.model_checkpoint_path) 77 | else: 78 | session.run(tf.global_variables_initializer()) 79 | session.run(tf.local_variables_initializer()) 80 | 81 | return session, \ 82 | q_network, \ 83 | t_network, \ 84 | t_net_updates, \ 85 | frame_proc, \ 86 | saver, \ 87 | train_op, \ 88 | env 89 | 90 | 91 | def build_network(scope: str, input_size=84, num_actions=4): 92 | """Builds the network graph.""" 93 | 94 | with tf.variable_scope(scope): 95 | # Our input are STATE_FRAMES greyscale frames of shape 84, 84 each 96 | input_placeholder = tf.placeholder(dtype=np.float32, 97 | shape=[None, input_size, input_size, STATE_FRAMES]) 98 | 99 | normalized_input = tf.to_float(input_placeholder) / 255.0 100 | 101 | # action prediction 102 | action_placeholder = tf.placeholder(dtype=tf.int32, shape=[None]) 103 | 104 | # target action 105 | target_placeholder = tf.placeholder(dtype=np.float32, shape=[None]) 106 | 107 | # Convolutional layers 108 | conv_1 = tf.layers.conv2d(normalized_input, 32, 8, 4, 109 | activation=tf.nn.relu) 110 | conv_2 = tf.layers.conv2d(conv_1, 64, 4, 2, 111 | activation=tf.nn.relu) 112 | conv_3 = tf.layers.conv2d(conv_2, 64, 3, 1, 113 | activation=tf.nn.relu) 114 | 115 | # Fully connected layers 116 | flattened = tf.layers.flatten(conv_3) 117 | fc_1 = tf.layers.dense(flattened, 512, 118 | activation=tf.nn.relu) 119 | 120 | q_estimation = tf.layers.dense(fc_1, num_actions) 121 | 122 | # Get the predictions for the chosen actions only 123 | batch_size = tf.shape(normalized_input)[0] 124 | gather_indices = tf.range(batch_size) * tf.shape(q_estimation)[1] + action_placeholder 125 | action_predictions = tf.gather(tf.reshape(q_estimation, [-1]), gather_indices) 126 | 127 | # Calculate the loss 128 | # loss = tf.reduce_mean(tf.squared_difference(target_placeholder, action_predictions)) 129 | loss = tf.losses.huber_loss(labels=target_placeholder, 130 | predictions=action_predictions, 131 | reduction=tf.losses.Reduction.MEAN) 132 | 133 | Network = namedtuple('Network', 134 | 'scope ' 135 | 'input_placeholder ' 136 | 'action_placeholder ' 137 | 'target_placeholder ' 138 | 'q_estimation ' 139 | 'action_predictions ' 140 | 'loss ') 141 | 142 | return Network(scope=scope, 143 | input_placeholder=input_placeholder, 144 | action_placeholder=action_placeholder, 145 | target_placeholder=target_placeholder, 146 | q_estimation=q_estimation, 147 | action_predictions=action_predictions, 148 | loss=loss) 149 | 150 | 151 | def choose_next_action(state, net, epsilon): 152 | """Epsilon-greedy policy""" 153 | 154 | # choose an action given our last state 155 | tmp = np.ones(env.action_space.n, dtype=float) * epsilon / env.action_space.n 156 | q_estimations = session.run(net.q_estimation, 157 | feed_dict={net.input_placeholder: np.reshape(state, (1,) + state.shape)})[0] 158 | 159 | tmp[np.argmax(q_estimations)] += (1.0 - epsilon) 160 | 161 | new_action = np.random.choice(np.arange(len(tmp)), p=tmp) 162 | 163 | return new_action 164 | 165 | 166 | def frame_preprocessor(): 167 | """Pre-processing the input data""" 168 | 169 | with tf.variable_scope("frame_processor"): 170 | input_placeholder = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8) 171 | processed_frame = tf.image.rgb_to_grayscale(input_placeholder) 172 | processed_frame = tf.image.crop_to_bounding_box(processed_frame, 34, 0, 160, 160) 173 | processed_frame = tf.image.resize_images( 174 | processed_frame, 175 | [84, 84], 176 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 177 | 178 | processed_frame = tf.squeeze(processed_frame) 179 | 180 | FramePreprocessor = namedtuple('FramePreprocessor', 'input_placeholder processed_frame') 181 | 182 | return FramePreprocessor( 183 | input_placeholder=input_placeholder, 184 | processed_frame=processed_frame) 185 | 186 | 187 | def populate_experience_replay_buffer(buffer: deque, initial_buffer_size: int): 188 | """Initial population of the experience replay buffer""" 189 | 190 | # Initialize epsilon based on the current step 191 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS 192 | epsilon = max(EPSILON_END, 193 | EPSILON_START - 194 | session.run(tf.train.get_global_step()) * epsilon_step) 195 | 196 | # Populate the replay memory with initial experience 197 | state = env.reset() 198 | state = session.run(frame_proc.processed_frame, 199 | feed_dict={frame_proc.input_placeholder: state}) 200 | 201 | state = np.stack([state] * STATE_FRAMES, axis=2) 202 | 203 | for i in range(initial_buffer_size): 204 | 205 | # Sample next state with the q_network 206 | action = choose_next_action(state, q_network, epsilon) 207 | 208 | # Perform one action step 209 | next_state, reward, terminal, info = env.step(action) 210 | next_state = session.run(frame_proc.processed_frame, 211 | feed_dict={frame_proc.input_placeholder: next_state}) 212 | 213 | # Stack the game frames in a single array 214 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) 215 | 216 | # Store the experience in ER 217 | if COMPRESS_ER: 218 | buffer.append( 219 | zlib.compress( 220 | pickle.dumps((state, action, reward, next_state, terminal), 2), 2)) 221 | else: 222 | buffer.append((state, action, reward, next_state, terminal)) 223 | 224 | # Set next state as current 225 | if terminal: 226 | state = env.reset() 227 | state = session.run(frame_proc.processed_frame, 228 | feed_dict={frame_proc.input_placeholder: state}) 229 | 230 | state = np.stack([state] * STATE_FRAMES, axis=2) 231 | else: 232 | state = next_state 233 | 234 | print("\rExperience replay buffer: {} / {} initial ({} total)".format( 235 | len(buffer), initial_buffer_size, buffer.maxlen), end="") 236 | 237 | 238 | def plot_stats(stats): 239 | """Plot the stats""" 240 | plt.figure() 241 | 242 | plt.xlabel("Episode") 243 | 244 | # plot the rewards 245 | # rolling mean of 50 246 | cumsum = np.cumsum(np.insert(stats.rewards, 0, 0)) 247 | rewards = (cumsum[50:] - cumsum[:-50]) / float(50) 248 | 249 | fig, ax1 = plt.subplots() 250 | 251 | color = 'tab:red' 252 | 253 | ax1.set_ylabel('Reward', color=color) 254 | ax1.plot(rewards, color=color) 255 | ax1.tick_params(axis='y', labelcolor=color) 256 | 257 | # plot the episode lengths 258 | # rolling mean of 50 259 | cumsum = np.cumsum(np.insert(stats.lengths, 0, 0)) 260 | lengths = (cumsum[50:] - cumsum[:-50]) / float(50) 261 | 262 | ax2 = ax1.twinx() 263 | 264 | color = 'tab:blue' 265 | ax2.set_ylabel('Length', color=color) 266 | ax2.plot(lengths, color=color) 267 | ax2.tick_params(axis='y', labelcolor=color) 268 | 269 | 270 | def deep_q_learning(): 271 | """The Q-learning training process""" 272 | 273 | # build experience replay 274 | observations = deque(maxlen=ER_BUFFER_SIZE) 275 | 276 | print("Populating replay memory...") 277 | populate_experience_replay_buffer(observations, 100000) 278 | 279 | # initialize statistics 280 | stats = namedtuple('Stats', 'rewards lengths')(rewards=list(), lengths=list()) 281 | global_time = session.run(tf.train.get_global_step()) 282 | time = 0 283 | 284 | episode = 1 285 | 286 | episode_reward = 0 287 | global_reward = 0 288 | 289 | # Start the training with an initial state 290 | state = env.reset() 291 | state = session.run(frame_proc.processed_frame, 292 | feed_dict={frame_proc.input_placeholder: state}) 293 | state = np.stack([state] * STATE_FRAMES, axis=2) 294 | 295 | while True: 296 | # env.render() 297 | 298 | # Initialize epsilon based on the current step 299 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS 300 | epsilon = max(EPSILON_END, EPSILON_START - (global_time - 1) * epsilon_step) 301 | 302 | # Copy q-net weights to the target-net 303 | if global_time % UPDATE_TARGET_NET_EVERY_X_STEPS == 0: 304 | session.run(t_net_updates) 305 | print("\nCopied model parameters to target network.") 306 | 307 | # Sample next action 308 | action = choose_next_action(state, q_network, epsilon) 309 | 310 | # Perform one step with the selected action 311 | next_state, reward, terminal, info = env.step(action) 312 | 313 | # This is how we pre-process 314 | next_state = session.run(frame_proc.processed_frame, 315 | feed_dict={frame_proc.input_placeholder: next_state}) 316 | 317 | # Stack the game frames in a single array 318 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) 319 | 320 | # Store the experience in ER 321 | if COMPRESS_ER: 322 | observations.append( 323 | zlib.compress(pickle.dumps((state, action, reward, next_state, terminal), 2), 2)) 324 | else: 325 | observations.append((state, action, reward, next_state, terminal)) 326 | 327 | # Sample a mini-batch from the experience replay memory 328 | mini_batch = random.sample(observations, MB_SIZE) 329 | if COMPRESS_ER: 330 | mini_batch = [pickle.loads(zlib.decompress(comp_item)) for comp_item in mini_batch] 331 | 332 | states_batch, action_batch, reward_batch, next_states_batch, terminal_batch = \ 333 | map(np.array, zip(*mini_batch)) 334 | 335 | if global_time % UPDATE_Q_NET_FREQ == 0: 336 | # Compute next q values using the target network 337 | q_values_next = session.run(t_network.q_estimation, 338 | feed_dict={t_network.input_placeholder: next_states_batch}) 339 | 340 | # Calculate q values and targets 341 | targets_batch = reward_batch + \ 342 | np.invert(terminal_batch).astype(np.float32) * \ 343 | DISCOUNT_FACTOR * \ 344 | np.amax(q_values_next, axis=1) 345 | 346 | # Perform gradient descent update 347 | states_batch = np.array(states_batch) 348 | 349 | _, loss = session.run([train_op, q_network.loss], 350 | feed_dict={ 351 | q_network.input_placeholder: states_batch, 352 | q_network.action_placeholder: action_batch, 353 | q_network.target_placeholder: targets_batch}) 354 | 355 | episode_reward += reward 356 | global_reward += reward 357 | time += 1 358 | global_time += 1 359 | 360 | print("\rEpisode {}: " 361 | "time {:5}; " 362 | "reward {}; " 363 | "epsilon: {:.4f}; " 364 | "loss: {:.6f}; " 365 | "@ global step {} " 366 | "with total reward {}".format( 367 | episode, 368 | time, 369 | episode_reward, 370 | epsilon, 371 | loss, 372 | global_time, 373 | global_reward), end="") 374 | 375 | if terminal: 376 | # Episode end 377 | 378 | print() 379 | 380 | stats.rewards.append(int(episode_reward)) 381 | stats.lengths.append(time) 382 | 383 | time = 0 384 | episode_reward = 0 385 | episode += 1 386 | 387 | state = env.reset() 388 | state = session.run(frame_proc.processed_frame, 389 | feed_dict={frame_proc.input_placeholder: state}) 390 | state = np.stack([state] * STATE_FRAMES, axis=2) 391 | else: 392 | # Set next state as current 393 | state = next_state 394 | 395 | # save checkpoints for later 396 | if global_time % SAVE_EVERY_X_STEPS == 0: 397 | saver.save(session, CHECKPOINT_PATH + '/network', 398 | global_step=tf.train.get_global_step()) 399 | 400 | # plot the results and save the figure 401 | plot_stats(stats) 402 | 403 | fig_file = CHECKPOINT_PATH + '/stats.png' 404 | if os.path.isfile(fig_file): 405 | os.remove(fig_file) 406 | 407 | plt.savefig(fig_file) 408 | plt.close() 409 | 410 | # save the stats 411 | with open(CHECKPOINT_PATH + '/stats.arr', 'wb') as f: 412 | pickle.dump((stats.rewards, stats.lengths), f) 413 | 414 | 415 | if __name__ == '__main__': 416 | session, q_network, t_network, t_net_updates, frame_proc, saver, train_op, env = \ 417 | initialize() 418 | deep_q_learning() 419 | -------------------------------------------------------------------------------- /Chapter09/chapter_09_002_a2c.py: -------------------------------------------------------------------------------- 1 | # note must import tensorflow before gym 2 | from collections import deque 3 | 4 | import gym 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | env = gym.make('CartPole-v0') 9 | 10 | ACTIONS_COUNT = 2 11 | FUTURE_REWARD_DISCOUNT = 0.9 12 | LEARN_RATE_ACTOR = 0.01 13 | LEARN_RATE_CRITIC = 0.01 14 | STORE_SCORES_LEN = 5 15 | GAMES_PER_TRAINING = 3 16 | INPUT_NODES = env.observation_space.shape[0] 17 | 18 | ACTOR_HIDDEN = 20 19 | CRITIC_HIDDEN = 20 20 | 21 | session = tf.Session() 22 | 23 | actor_feed_forward_weights_1 = tf.Variable(tf.truncated_normal([INPUT_NODES, ACTOR_HIDDEN], stddev=0.01)) 24 | actor_feed_forward_bias_1 = tf.Variable(tf.constant(0.0, shape=[ACTOR_HIDDEN])) 25 | 26 | actor_feed_forward_weights_2 = tf.Variable(tf.truncated_normal([ACTOR_HIDDEN, ACTIONS_COUNT], stddev=0.01)) 27 | actor_feed_forward_bias_2 = tf.Variable(tf.constant(0.1, shape=[ACTIONS_COUNT])) 28 | 29 | actor_input_placeholder = tf.placeholder("float", [None, INPUT_NODES]) 30 | actor_hidden_layer = tf.nn.tanh( 31 | tf.matmul(actor_input_placeholder, actor_feed_forward_weights_1) + actor_feed_forward_bias_1) 32 | actor_output_layer = tf.nn.softmax( 33 | tf.matmul(actor_hidden_layer, actor_feed_forward_weights_2) + actor_feed_forward_bias_2) 34 | 35 | actor_action_placeholder = tf.placeholder("float", [None, ACTIONS_COUNT]) 36 | actor_advantage_placeholder = tf.placeholder("float", [None, 1]) 37 | 38 | policy_gradient = tf.reduce_mean(actor_advantage_placeholder * actor_action_placeholder * tf.log(actor_output_layer)) 39 | actor_train_operation = tf.train.AdamOptimizer(LEARN_RATE_ACTOR).minimize(-policy_gradient) 40 | 41 | critic_feed_forward_weights_1 = tf.Variable(tf.truncated_normal([INPUT_NODES, CRITIC_HIDDEN], stddev=0.01)) 42 | critic_feed_forward_bias_1 = tf.Variable(tf.constant(0.0, shape=[CRITIC_HIDDEN])) 43 | 44 | critic_feed_forward_weights_2 = tf.Variable(tf.truncated_normal([CRITIC_HIDDEN, 1], stddev=0.01)) 45 | critic_feed_forward_bias_2 = tf.Variable(tf.constant(0.0, shape=[1])) 46 | 47 | critic_input_placeholder = tf.placeholder("float", [None, INPUT_NODES]) 48 | critic_hidden_layer = tf.nn.tanh( 49 | tf.matmul(critic_input_placeholder, critic_feed_forward_weights_1) + critic_feed_forward_bias_1) 50 | critic_output_layer = tf.matmul(critic_hidden_layer, critic_feed_forward_weights_2) + critic_feed_forward_bias_2 51 | 52 | critic_target_placeholder = tf.placeholder("float", [None, 1]) 53 | 54 | critic_cost = tf.reduce_mean(tf.square(critic_target_placeholder - critic_output_layer)) 55 | critic_train_operation = tf.train.AdamOptimizer(LEARN_RATE_CRITIC).minimize(critic_cost) 56 | 57 | critic_baseline = critic_target_placeholder - critic_output_layer 58 | 59 | scores = deque(maxlen=STORE_SCORES_LEN) 60 | 61 | # set the first action to do nothing 62 | last_action = np.zeros(ACTIONS_COUNT) 63 | last_action[1] = 1 64 | 65 | time = 0 66 | 67 | session.run(tf.initialize_all_variables()) 68 | 69 | 70 | def choose_next_action(state): 71 | probability_of_actions = session.run(actor_output_layer, feed_dict={actor_input_placeholder: [state]})[0] 72 | try: 73 | move = np.random.multinomial(1, probability_of_actions) 74 | except ValueError: 75 | # sometimes because of rounding errors we end up with probability_of_actions summing to greater than 1. 76 | # so need to reduce slightly to be a valid value 77 | move = np.random.multinomial(1, probability_of_actions / (sum(probability_of_actions) + 1e-6)) 78 | return move 79 | 80 | 81 | def train(states, actions_taken, advantages): 82 | # learn that these actions in these states lead to this reward 83 | session.run(actor_train_operation, feed_dict={ 84 | actor_input_placeholder: states, 85 | actor_action_placeholder: actions_taken, 86 | actor_advantage_placeholder: advantages}) 87 | 88 | 89 | last_state = env.reset() 90 | total_reward = 0 91 | current_game_observations = [] 92 | current_game_rewards = [] 93 | current_game_actions = [] 94 | 95 | episode_observation = [] 96 | episode_rewards = [] 97 | episode_actions = [] 98 | games = 0 99 | plot_x = [] 100 | plot_y = [] 101 | 102 | critic_costs = deque(maxlen=10) 103 | 104 | while True: 105 | env.render() 106 | last_action = choose_next_action(last_state) 107 | current_state, reward, terminal, info = env.step(np.argmax(last_action)) 108 | total_reward += reward 109 | 110 | if terminal: 111 | reward = -.10 112 | else: 113 | reward = 0.1 114 | 115 | current_game_observations.append(last_state) 116 | current_game_rewards.append(reward) 117 | current_game_actions.append(last_action) 118 | 119 | if terminal: 120 | games += 1 121 | scores.append(total_reward) 122 | 123 | if games % STORE_SCORES_LEN == 0: 124 | plot_x.append(games) 125 | plot_y.append(np.mean(scores)) 126 | 127 | # get temporal difference values for critic 128 | cumulative_reward = 0 129 | for i in reversed(range(len(current_game_observations))): 130 | cumulative_reward = current_game_rewards[i] + FUTURE_REWARD_DISCOUNT * cumulative_reward 131 | current_game_rewards[i] = [cumulative_reward] 132 | 133 | values_t = session.run(critic_output_layer, { 134 | critic_input_placeholder: current_game_observations}) 135 | advantages = [] 136 | 137 | for i in range(len(current_game_observations) - 1): 138 | advantages.append([current_game_rewards[i][0] + FUTURE_REWARD_DISCOUNT * values_t[i + 1][0] - values_t[i][0]]) 139 | 140 | advantages.append([current_game_rewards[-1][0] - values_t[-1][0]]) 141 | 142 | _, cost = session.run([critic_train_operation, critic_cost], { 143 | critic_input_placeholder: current_game_observations, 144 | critic_target_placeholder: current_game_rewards}) 145 | 146 | critic_costs.append(cost) 147 | 148 | print("Game: %s reward %s average scores %s critic cost %s" % 149 | (games, total_reward, 150 | np.mean(scores), np.mean(critic_costs))) 151 | 152 | episode_observation.extend(current_game_observations) 153 | episode_actions.extend(current_game_actions) 154 | episode_rewards.extend(advantages) 155 | 156 | total_reward = 0 157 | current_game_observations = [] 158 | current_game_rewards = [] 159 | current_game_actions = [] 160 | 161 | if games % GAMES_PER_TRAINING == 0: 162 | episode_rewards = np.array(episode_rewards) 163 | normalized_rewards = episode_rewards - np.mean(episode_rewards) 164 | normalized_rewards /= np.std(normalized_rewards) 165 | 166 | train(episode_observation, episode_actions, normalized_rewards) 167 | 168 | episode_observation = [] 169 | episode_actions = [] 170 | episode_rewards = [] 171 | 172 | time += 1 173 | 174 | # update the old values 175 | if terminal: 176 | last_state = env.reset() 177 | else: 178 | last_state = current_state 179 | -------------------------------------------------------------------------------- /Chapter10/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Deep-Learning-Second-Edition/a44db1a21d101009dd610ad3adafba475a648f0e/Chapter10/__init__.py -------------------------------------------------------------------------------- /Chapter10/data/data.gzip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Deep-Learning-Second-Edition/a44db1a21d101009dd610ad3adafba475a648f0e/Chapter10/data/data.gzip -------------------------------------------------------------------------------- /Chapter10/data/model.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Deep-Learning-Second-Edition/a44db1a21d101009dd610ad3adafba475a648f0e/Chapter10/data/model.pt -------------------------------------------------------------------------------- /Chapter10/keyboard_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import gzip 5 | import os 6 | import pickle 7 | import time 8 | 9 | import gym 10 | import numpy as np 11 | 12 | from util import DATA_DIR, DATA_FILE 13 | 14 | 15 | def key_press(key, mod): 16 | global human_agent_action, human_wants_restart, human_wants_exit, human_sets_pause, acceleration 17 | if key == 0xff0d: # enter 18 | human_wants_restart = True 19 | 20 | if key == 0xff1b: # escape 21 | human_wants_exit = True 22 | 23 | if key == 0x020: # space 24 | human_sets_pause = not human_sets_pause 25 | 26 | if key == 0xff52: # up 27 | acceleration = True 28 | human_agent_action[1] = 1.0 29 | human_agent_action[2] = 0 30 | if key == 0xff54: # down 31 | human_agent_action[2] = 1 # stronger brakes 32 | 33 | if key == 0xff51: # left 34 | human_agent_action[0] = -1.0 35 | 36 | # no acceleration while turning 37 | human_agent_action[1] = 0.0 38 | 39 | if key == 0xff53: # right 40 | human_agent_action[0] = +1.0 41 | 42 | # no acceleration when turning 43 | human_agent_action[1] = 0.0 44 | 45 | 46 | def key_release(key, mod): 47 | global human_agent_action, acceleration 48 | if key == 0xff52: # up 49 | acceleration = False 50 | human_agent_action[1] = 0.0 51 | 52 | if key == 0xff54: # down 53 | human_agent_action[2] = 0.0 54 | 55 | if key == 0xff51: # left 56 | human_agent_action[0] = 0 57 | 58 | # restore acceleration 59 | human_agent_action[1] = acceleration 60 | 61 | if key == 0xff53: # right 62 | human_agent_action[0] = 0 63 | 64 | # restore acceleration 65 | human_agent_action[1] = acceleration 66 | 67 | 68 | def rollout(env): 69 | global human_wants_restart, human_agent_action, human_wants_exit, human_sets_pause 70 | 71 | ACTIONS = env.action_space.shape[0] 72 | human_agent_action = np.zeros(ACTIONS, dtype=np.float32) 73 | human_wants_exit = False 74 | human_sets_pause = False 75 | 76 | human_wants_restart = False 77 | 78 | # if the file exists, append 79 | if os.path.exists(os.path.join(DATA_DIR, DATA_FILE)): 80 | with gzip.open(os.path.join(DATA_DIR, DATA_FILE), 'rb') as f: 81 | observations = pickle.load(f) 82 | else: 83 | observations = list() 84 | 85 | state = env.reset() 86 | total_reward = 0 87 | total_timesteps = 0 88 | episode = 1 89 | while 1: 90 | env.render() 91 | 92 | a = np.copy(human_agent_action) 93 | 94 | old_state = state 95 | 96 | if human_agent_action[2] != 0: 97 | human_agent_action[2] = 0.1 98 | 99 | state, r, terminal, info = env.step(human_agent_action) 100 | 101 | observations.append((old_state, a, state, r, terminal)) 102 | 103 | total_reward += r 104 | 105 | if human_wants_exit: 106 | env.close() 107 | return 108 | 109 | if human_wants_restart: 110 | human_wants_restart = False 111 | state = env.reset() 112 | continue 113 | 114 | if terminal: 115 | if episode % 5 == 0: 116 | # store generated data 117 | data_file_path = os.path.join(DATA_DIR, DATA_FILE) 118 | print("Saving observations to " + data_file_path) 119 | 120 | if not os.path.exists(DATA_DIR): 121 | os.mkdir(DATA_DIR) 122 | 123 | with gzip.open(data_file_path, 'wb') as f: 124 | pickle.dump(observations, f) 125 | 126 | print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) 127 | 128 | episode += 1 129 | 130 | state = env.reset() 131 | 132 | while human_sets_pause: 133 | env.render() 134 | time.sleep(0.1) 135 | 136 | 137 | if __name__ == '__main__': 138 | env = gym.make('CarRacing-v0') 139 | 140 | env.render() 141 | env.unwrapped.viewer.window.on_key_press = key_press 142 | env.unwrapped.viewer.window.on_key_release = key_release 143 | 144 | print("ACTIONS={}".format(env.action_space.shape[0])) 145 | print("Press keys 1 2 3 ... to take actions 1 2 3 ...") 146 | print("No keys pressed is taking action 0") 147 | 148 | rollout(env) 149 | -------------------------------------------------------------------------------- /Chapter10/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | from nn_agent import nn_agent_play 6 | from train import \ 7 | DATA_DIR, \ 8 | MODEL_FILE, \ 9 | build_network, \ 10 | train 11 | 12 | if __name__ == '__main__': 13 | # create cuda device 14 | dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 15 | 16 | # create the network 17 | model = build_network() 18 | 19 | # if true, try to restore the network from the data file 20 | restore = False 21 | if restore: 22 | model_path = os.path.join(DATA_DIR, MODEL_FILE) 23 | model.load_state_dict(torch.load(model_path)) 24 | 25 | # set the model to evaluation (and not training) mode 26 | model.eval() 27 | 28 | # transfer to the gpu 29 | model = model.to(dev) 30 | 31 | # train 32 | train(model, dev) 33 | 34 | # agent play 35 | nn_agent_play(model, dev) 36 | -------------------------------------------------------------------------------- /Chapter10/nn_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import os 5 | 6 | import gym 7 | import numpy as np 8 | import torch 9 | 10 | from train \ 11 | import \ 12 | data_transform, \ 13 | available_actions, \ 14 | build_network, \ 15 | DATA_DIR, MODEL_FILE 16 | 17 | 18 | def nn_agent_play(model, device): 19 | """ 20 | Let the agent play 21 | :param model: the network 22 | :param device: the cuda device 23 | """ 24 | 25 | env = gym.make('CarRacing-v0') 26 | 27 | # use ESC to exit 28 | global human_wants_exit 29 | human_wants_exit = False 30 | 31 | def key_press(key, mod): 32 | """Capture ESC key""" 33 | global human_wants_exit 34 | if key == 0xff1b: # escape 35 | human_wants_exit = True 36 | 37 | # initialize environment 38 | state = env.reset() 39 | env.unwrapped.viewer.window.on_key_press = key_press 40 | 41 | while 1: 42 | env.render() 43 | 44 | state = np.moveaxis(state, 2, 0) # channel first image 45 | 46 | # numpy to tensor 47 | state = torch.from_numpy(np.flip(state, axis=0).copy()) 48 | state = data_transform(state) # apply transformations 49 | state = state.unsqueeze(0) # add additional dimension 50 | state = state.to(device) # transfer to GPU 51 | 52 | # forward 53 | with torch.set_grad_enabled(False): 54 | outputs = model(state) 55 | 56 | normalized = torch.nn.functional.softmax(outputs, dim=1) 57 | 58 | # translate from net output to env action 59 | max_action = np.argmax(normalized.cpu().numpy()[0]) 60 | action = available_actions[max_action] 61 | 62 | # adjust brake power 63 | if action[2] != 0: 64 | action[2] = 0.3 65 | 66 | state, _, terminal, _ = env.step(action) # one step 67 | 68 | if terminal: 69 | state = env.reset() 70 | 71 | if human_wants_exit: 72 | env.close() 73 | return 74 | 75 | 76 | if __name__ == '__main__': 77 | dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 78 | m = build_network() 79 | m.load_state_dict(torch.load(os.path.join(DATA_DIR, MODEL_FILE))) 80 | m.eval() 81 | m = m.to(dev) 82 | nn_agent_play(m, dev) 83 | -------------------------------------------------------------------------------- /Chapter10/train.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | import pickle 4 | import random 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | 11 | from util import \ 12 | available_actions, \ 13 | data_transform, \ 14 | DATA_DIR, \ 15 | DATA_FILE, \ 16 | MODEL_FILE 17 | 18 | restore = False # restore from file if exists 19 | BATCH_SIZE = 32 # mb size 20 | EPOCHS = 30 # number of epochs 21 | TRAIN_VAL_SPLIT = 0.85 # train/val ratio 22 | 23 | # balance the dataset by multiplying rare events 24 | MULTIPLY_RARE_EVENTS = 20 25 | 26 | 27 | def read_data(): 28 | """Read the data generated by keyboard_agent.py""" 29 | with gzip.open(os.path.join(DATA_DIR, DATA_FILE), 'rb') as f: 30 | data = pickle.load(f) 31 | 32 | # balance dataset by multiplying 33 | # brake, right+brake, left+brake events 34 | # since they are too few 35 | if MULTIPLY_RARE_EVENTS > 1: 36 | data_copy = data.copy() 37 | for d in data: 38 | for a in ([[-1, 0, 1], [1, 0, 1], [0, 0, 1]]): 39 | if np.array_equal(d[1], a): 40 | data_copy += (d,) * MULTIPLY_RARE_EVENTS 41 | 42 | data = data_copy 43 | 44 | random.shuffle(data) 45 | 46 | # to numpy arrays 47 | states, actions, _, _, _ = map(np.array, zip(*data)) 48 | 49 | # reverse one-hot, actions to classes 50 | act_classes = np.full((len(actions)), -1, dtype=np.int) 51 | for i, a in enumerate(available_actions): 52 | act_classes[np.all(actions == a, axis=1)] = i 53 | 54 | # drop unsupported actions 55 | states = np.array(states) 56 | states = states[act_classes != -1] 57 | act_classes = act_classes[act_classes != -1] 58 | 59 | # drop some of the acceleration actions to balance the dataset 60 | non_accel = act_classes != available_actions.index([0, 1, 0]) 61 | drop_mask = np.random.rand(act_classes[~non_accel].size) > 0.7 62 | non_accel[~non_accel] = drop_mask 63 | states = states[non_accel] 64 | act_classes = act_classes[non_accel] 65 | 66 | # drop some of the non-action actions to balance the dataset 67 | non_act = act_classes != available_actions.index([0, 0, 0]) 68 | drop_mask = np.random.rand(act_classes[~non_act].size) > 0.3 69 | non_act[~non_act] = drop_mask 70 | states = states[non_act] 71 | act_classes = act_classes[non_act] 72 | 73 | for i, a in enumerate(available_actions): 74 | print("Actions of type {}: {}" 75 | .format(str(a), str(act_classes[act_classes == i].size))) 76 | 77 | print("Total transitions: " + str(len(act_classes))) 78 | 79 | return states, act_classes 80 | 81 | 82 | def create_datasets(): 83 | """Create training and validation datasets""" 84 | 85 | class TensorDatasetTransforms(torch.utils.data.TensorDataset): 86 | """ 87 | Helper class to allow transformations 88 | by default TensorDataset doesn't support them 89 | """ 90 | 91 | def __init__(self, x, y): 92 | super().__init__(x, y) 93 | 94 | def __getitem__(self, index): 95 | tensor = data_transform(self.tensors[0][index]) 96 | return (tensor,) + tuple(t[index] for t in self.tensors[1:]) 97 | 98 | x, y = read_data() 99 | x = np.moveaxis(x, 3, 1) # channel first (torch requirement) 100 | 101 | # train dataset 102 | x_train = x[:int(len(x) * TRAIN_VAL_SPLIT)] 103 | y_train = y[:int(len(y) * TRAIN_VAL_SPLIT)] 104 | 105 | train_set = TensorDatasetTransforms( 106 | torch.tensor(x_train), 107 | torch.tensor(y_train)) 108 | 109 | train_loader = torch.utils.data.DataLoader(train_set, 110 | batch_size=BATCH_SIZE, 111 | shuffle=True, 112 | num_workers=2) 113 | 114 | # test dataset 115 | x_val, y_val = x[int(len(x_train)):], y[int(len(y_train)):] 116 | 117 | val_set = TensorDatasetTransforms( 118 | torch.tensor(x_val), 119 | torch.tensor(y_val)) 120 | 121 | val_loader = torch.utils.data.DataLoader(val_set, 122 | batch_size=BATCH_SIZE, 123 | shuffle=False, 124 | num_workers=2) 125 | 126 | return train_loader, val_loader 127 | 128 | 129 | def build_network(): 130 | """Build the torch network""" 131 | 132 | class Flatten(nn.Module): 133 | """ 134 | Helper class to flatten the tensor 135 | between the last conv and first fc layer 136 | """ 137 | 138 | def forward(self, x): 139 | return x.view(x.size()[0], -1) 140 | 141 | # Same network as with the DQN example 142 | model = torch.nn.Sequential( 143 | torch.nn.Conv2d(1, 32, 8, 4), 144 | torch.nn.BatchNorm2d(32), 145 | torch.nn.ELU(), 146 | torch.nn.Dropout2d(0.5), 147 | torch.nn.Conv2d(32, 64, 4, 2), 148 | torch.nn.BatchNorm2d(64), 149 | torch.nn.ELU(), 150 | torch.nn.Dropout2d(0.5), 151 | torch.nn.Conv2d(64, 64, 3, 1), 152 | torch.nn.ELU(), 153 | Flatten(), 154 | torch.nn.BatchNorm1d(64 * 7 * 7), 155 | torch.nn.Dropout(), 156 | torch.nn.Linear(64 * 7 * 7, 120), 157 | torch.nn.ELU(), 158 | torch.nn.BatchNorm1d(120), 159 | torch.nn.Dropout(), 160 | torch.nn.Linear(120, len(available_actions)), 161 | ) 162 | 163 | return model 164 | 165 | 166 | def train(model, device): 167 | """ 168 | Training main method 169 | :param model: the network 170 | :param device: the cuda device 171 | """ 172 | 173 | loss_function = nn.CrossEntropyLoss() 174 | 175 | optimizer = optim.Adam(model.parameters()) 176 | 177 | train_loader, val_order = create_datasets() # read datasets 178 | 179 | # train 180 | for epoch in range(EPOCHS): 181 | print('Epoch {}/{}'.format(epoch + 1, EPOCHS)) 182 | 183 | train_epoch(model, 184 | device, 185 | loss_function, 186 | optimizer, 187 | train_loader) 188 | 189 | test(model, device, loss_function, val_order) 190 | 191 | # save model 192 | model_path = os.path.join(DATA_DIR, MODEL_FILE) 193 | torch.save(model.state_dict(), model_path) 194 | 195 | 196 | def train_epoch(model, device, loss_function, optimizer, data_loader): 197 | """Train for a single epoch""" 198 | 199 | # set model to training mode 200 | model.train() 201 | 202 | current_loss = 0.0 203 | current_acc = 0 204 | 205 | # iterate over the training data 206 | for i, (inputs, labels) in enumerate(data_loader): 207 | # send the input/labels to the GPU 208 | inputs = inputs.to(device) 209 | labels = labels.to(device) 210 | 211 | # zero the parameter gradients 212 | optimizer.zero_grad() 213 | 214 | with torch.set_grad_enabled(True): 215 | # forward 216 | outputs = model(inputs) 217 | _, predictions = torch.max(outputs, 1) 218 | loss = loss_function(outputs, labels) 219 | 220 | # backward 221 | loss.backward() 222 | optimizer.step() 223 | 224 | # statistics 225 | current_loss += loss.item() * inputs.size(0) 226 | current_acc += torch.sum(predictions == labels.data) 227 | 228 | total_loss = current_loss / len(data_loader.dataset) 229 | total_acc = current_acc.double() / len(data_loader.dataset) 230 | 231 | print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc)) 232 | 233 | 234 | def test(model, device, loss_function, data_loader): 235 | """Test over the whole dataset""" 236 | 237 | model.eval() # set model in evaluation mode 238 | 239 | current_loss = 0.0 240 | current_acc = 0 241 | 242 | # iterate over the validation data 243 | for i, (inputs, labels) in enumerate(data_loader): 244 | # send the input/labels to the GPU 245 | inputs = inputs.to(device) 246 | labels = labels.to(device) 247 | 248 | # forward 249 | with torch.set_grad_enabled(False): 250 | outputs = model(inputs) 251 | _, predictions = torch.max(outputs, 1) 252 | loss = loss_function(outputs, labels) 253 | 254 | # statistics 255 | current_loss += loss.item() * inputs.size(0) 256 | current_acc += torch.sum(predictions == labels.data) 257 | 258 | total_loss = current_loss / len(data_loader.dataset) 259 | total_acc = current_acc.double() / len(data_loader.dataset) 260 | 261 | print('Test Loss: {:.4f}; Accuracy: {:.4f}' 262 | .format(total_loss, total_acc)) 263 | 264 | 265 | if __name__ == '__main__': 266 | dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 267 | m = build_network() 268 | 269 | if restore: 270 | model_path = os.path.join(DATA_DIR, MODEL_FILE) 271 | m.load_state_dict(torch.load(model_path)) 272 | 273 | m.eval() 274 | m = m.to(dev) 275 | train(m, dev) 276 | -------------------------------------------------------------------------------- /Chapter10/util.py: -------------------------------------------------------------------------------- 1 | from torchvision import transforms 2 | 3 | DATA_DIR = 'data' 4 | DATA_FILE = 'data.gzip' 5 | MODEL_FILE = 'model.pt' 6 | 7 | # available actions 8 | available_actions = [[0, 0, 0], # no action 9 | [-1, 0, 0], # left 10 | [-1, 0, 1], # left+break 11 | [1, 0, 0], # right 12 | [1, 0, 1], # right+break 13 | [0, 1, 0], # acceleration 14 | [0, 0, 1], ] # break 15 | 16 | # transformations for training/testing 17 | data_transform = transforms.Compose([ 18 | transforms.ToPILImage(), 19 | transforms.Grayscale(1), 20 | transforms.Pad((12, 12, 12, 0)), 21 | transforms.CenterCrop(84), 22 | transforms.ToTensor(), 23 | transforms.Normalize((0,), (1,)), 24 | ]) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Python Deep Learning - Second Edition 2 | 3 | Python Deep Learning 4 | 5 | This is the code repository for [Python Deep Learning - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-second-edition?utm_source=github&utm_medium=repository&utm_campaign=), published by Packt. 6 | 7 | **Exploring deep learning techniques and neural network architectures with PyTorch, Keras, and TensorFlow** 8 | 9 | ## About the Book 10 | With the surge in artificial intelligence in applications catering to both business and consumer needs, deep learning is more important than ever for meeting current and future market demands. With Python Deep Learning Second Edition, you’ll explore deep learning, and learn how to put machine learning to use in your projects. 11 | 12 | This book covers the following exciting features: 13 | * Grasp the mathematical theory behind neural networks and deep learning processes 14 | * Investigate and resolve computer vision challenges using convolutional networks and capsule networks 15 | * Solve generative tasks using variational autoencoders and Generative Adversarial Networks 16 | * Implement complex NLP tasks using recurrent networks (LSTM and GRU) and attention models 17 | * Explore reinforcement learning and understand how agents behave in a complex environment 18 | * Get up to date with applications of deep learning in autonomous vehicles 19 | 20 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/B07KQ29CQ3/) today! 21 | 22 | https://www.packtpub.com/ 24 | 25 | ## Instructions and Navigations 26 | All of the code is organized into folders. For example, Chapter02. 27 | 28 | The code will look like the following: 29 | ``` 30 | import torch 31 | 32 | torch.manual_seed(1234) 33 | 34 | hidden_units = 5 35 | 36 | net = torch.nn.Sequential( 37 | torch.nn.Linear(4, hidden_units), 38 | torch.nn.ReLU(), 39 | torch.nn.Linear(hidden_units, 3) 40 | ) 41 | ``` 42 | 43 | **Following is what you need for this book:** 44 | This book is for data science practitioners, machine learning engineers, and those interested in deep learning who have a basic foundation in machine learning and some Python programming experience. A background in mathematics and conceptual understanding of calculus and statistics will help you gain maximum benefit from this book. 45 | 46 | With the following software and hardware list you can run all code files present in the book (Chapter 1-10). 47 | ### Software and Hardware List 48 | | Chapter | Software required | OS required | 49 | | -------- | ------------------------------------ | ----------------------------------- | 50 | | All | Python 3.6, Anaconda 5.2, Jupyter Notebook | Windows, Mac OS X, and Linux (Any) | 51 | 52 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://www.packtpub.com/sites/default/files/downloads/9781789348460_ColorImages.pdf). 53 | 54 | ### Related products 55 | * Python Deep Learning Projects[[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-projects?utm_source=github&utm_medium=repository&utm_campaign=) [[Amazon]](https://www.amazon.com/dp/9781788997096) 56 | 57 | * Advanced Deep Learning with Keras[[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/advanced-deep-learning-keras?utm_source=github&utm_medium=repository&utm_campaign=9781788629416) [[Amazon]](https://www.amazon.com/dp/9781788629416) 58 | 59 | ## Get to Know the Authors 60 | **Ivan Vasilev** started working on the first open source Java Deep Learning library with GPU support in 2013. The library was acquired by a German company, where he continued its development. He has also worked as a machine learning engineer and researcher in the area of medical image classification and segmentation with deep neural networks. Since 2017 he has focused on financial machine learning. He is working on a Python open source algorithmic trading library, which provides the infrastructure to experiment with different ML algorithms. The author holds an MSc degree in Artificial Intelligence from The University of Sofia, St. Kliment Ohridski. 61 | 62 | **Daniel Slater** started programming at age 11, developing mods for the id Software game Quake. His obsession led him to become a developer working in the gaming industry on the hit computer game series Championship Manager. He then moved into finance, working on risk- and high-performance messaging systems. He now is a staff engineer working on big data at Skimlinks to understand online user behavior. He spends his spare time training AI to beat computer games. He talks at tech conferences about deep learning and reinforcement learning; and the name of his blog is Daniel Slater's blog. His work in this field has been cited by Google. 63 | 64 | **Gianmario Spacagna** is a senior data scientist at Pirelli, processing sensors and telemetry data for the internet of things (IoT) and connected-vehicle applications. He works closely with tire mechanics, engineers, and business units to analyze and formulate hybrid, physics-driven, and data-driven automotive models. His main expertise is in building ML systems and end-to-end solutions for data products. He holds a master's degree in telematics from the Polytechnic of Turin, as well as one in software engineering of distributed systems from KTH, Stockholm. Prior to Pirelli, he worked in retail and business banking (Barclays), cyber security (Cisco), predictive marketing (AgilOne), and did some occasional freelancing. 65 | 66 | **Peter Roelants** holds a master's in computer science with a specialization in AI from KU Leuven. He works on applying deep learning to a variety of problems, such as spectral imaging, speech recognition, text understanding, and document information extraction. He currently works at Onfido as a team leader for the data extraction research team, focusing on data extraction from official documents. 67 | 68 | ## Other books by the authors 69 | [Python Deep Learning Projects](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-projects) 70 | 71 | [Advanced Deep Learning with Keras](https://www.packtpub.com/big-data-and-business-intelligence/advanced-deep-learning-keras) 72 | 73 | 74 | ### Suggestions and Feedback 75 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions. 76 | 77 | 78 | --------------------------------------------------------------------------------