├── .gitattributes
├── Chapter01
├── chapter_01_001.py
└── pytorch_iris.py
├── Chapter02
├── chapter_02_001.py
└── chapter_02_002.py
├── Chapter03
├── chapter_03_001.py
└── chapter_03_002.py
├── Chapter04
├── chapter_04_001.py
├── chapter_04_002.py
└── chapter_04_003.py
├── Chapter05
├── chapter_05_001.py
├── chapter_05_002.py
└── chapter_05_003.py
├── Chapter06
├── chapter_06_001.py
└── chapter_06_002.py
├── Chapter07
├── chapter_07_001.py
└── language model
│ ├── data_processing.py
│ ├── data_reader.py
│ ├── model.py
│ ├── wap.txt
│ └── war_and_peace.txt
├── Chapter08
└── chapter_08_001.py
├── Chapter09
├── chapter_09_001_ddqn.py
├── chapter_09_001_dqn.py
└── chapter_09_002_a2c.py
├── Chapter10
├── __init__.py
├── data
│ ├── data.gzip
│ └── model.pt
├── keyboard_agent.py
├── main.py
├── nn_agent.py
├── train.py
└── util.py
├── LICENSE
└── README.md
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/Chapter01/chapter_01_001.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | dataset = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
4 | names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])
5 |
6 | dataset['species'] = pd.Categorical(dataset['species']).codes
7 |
8 | dataset = dataset.sample(frac=1, random_state=1234)
9 |
10 | train_input = dataset.values[:120, :4]
11 | train_target = dataset.values[:120, 4]
12 |
13 | test_input = dataset.values[120:, :4]
14 | test_target = dataset.values[120:, 4]
15 |
16 | import torch
17 |
18 | torch.manual_seed(1234)
19 |
20 | hidden_units = 5
21 |
22 | net = torch.nn.Sequential(
23 | torch.nn.Linear(4, hidden_units),
24 | torch.nn.ReLU(),
25 | torch.nn.Linear(hidden_units, 3)
26 | )
27 |
28 | # choose optimizer and loss function
29 | criterion = torch.nn.CrossEntropyLoss()
30 | optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
31 |
32 | # train
33 | epochs = 50
34 |
35 | for epoch in range(epochs):
36 | inputs = torch.autograd.Variable(torch.Tensor(train_input).float())
37 | targets = torch.autograd.Variable(torch.Tensor(train_target).long())
38 |
39 | optimizer.zero_grad()
40 | out = net(inputs)
41 | loss = criterion(out, targets)
42 | loss.backward()
43 | optimizer.step()
44 |
45 | if epoch == 0 or (epoch + 1) % 10 == 0:
46 | print('Epoch %d Loss: %.4f' % (epoch + 1, loss.item()))
47 |
48 | # test
49 | import numpy as np
50 |
51 | inputs = torch.autograd.Variable(torch.Tensor(test_input).float())
52 | targets = torch.autograd.Variable(torch.Tensor(test_target).long())
53 |
54 | optimizer.zero_grad()
55 | out = net(inputs)
56 | _, predicted = torch.max(out.data, 1)
57 |
58 | error_count = test_target.size - np.count_nonzero((targets == predicted).numpy())
59 | print('Errors: %d; Accuracy: %d%%' % (error_count, 100 * torch.sum(targets == predicted) / test_target.size))
60 |
--------------------------------------------------------------------------------
/Chapter01/pytorch_iris.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | dataset = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
4 | names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])
5 |
6 | dataset['species'] = pd.Categorical(dataset['species']).codes
7 |
8 | dataset = dataset.sample(frac=1, random_state=1234)
9 |
10 | train_input = dataset.values[:120, :4]
11 | train_target = dataset.values[:120, 4]
12 |
13 | test_input = dataset.values[120:, :4]
14 | test_target = dataset.values[120:, 4]
15 |
16 | import torch
17 |
18 | torch.manual_seed(1234)
19 |
20 | hidden_units = 5
21 |
22 | net = torch.nn.Sequential(
23 | torch.nn.Linear(4, hidden_units),
24 | torch.nn.ReLU(),
25 | torch.nn.Linear(hidden_units, 3)
26 | )
27 |
28 | # choose optimizer and loss function
29 | criterion = torch.nn.CrossEntropyLoss()
30 | optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
31 |
32 | # train
33 | epochs = 50
34 |
35 | for epoch in range(epochs):
36 | inputs = torch.autograd.Variable(torch.Tensor(train_input).float())
37 | targets = torch.autograd.Variable(torch.Tensor(train_target).long())
38 |
39 | optimizer.zero_grad()
40 | out = net(inputs)
41 | loss = criterion(out, targets)
42 | loss.backward()
43 | optimizer.step()
44 |
45 | if epoch == 0 or (epoch + 1) % 10 == 0:
46 | print('Epoch %d Loss: %.4f' % (epoch + 1, loss.data[0]))
47 |
48 | # test
49 | import numpy as np
50 |
51 | inputs = torch.autograd.Variable(torch.Tensor(test_input).float())
52 | targets = torch.autograd.Variable(torch.Tensor(test_target).long())
53 |
54 | optimizer.zero_grad()
55 | out = net(inputs)
56 | _, predicted = torch.max(out.data, 1)
57 |
58 | error_count = test_target.size - np.count_nonzero((targets == predicted).numpy())
59 | print('Errors: %d; Accuracy: %d%%' % (error_count, 100 * torch.sum(targets == predicted) / test_target.size))
60 |
--------------------------------------------------------------------------------
/Chapter02/chapter_02_001.py:
--------------------------------------------------------------------------------
1 | # The user can modify the values of the weight w
2 | # as well as bias_value_1 and bias_value_2 to observe
3 | # how this plots to different step functions
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy
7 |
8 | weight_value = 1000
9 |
10 | # modify to change where the step function starts
11 | bias_value_1 = 5000
12 |
13 | # modify to change where the step function ends
14 | bias_value_2 = -5000
15 |
16 | # plot the
17 | plt.axis([-10, 10, -1, 10])
18 |
19 | print("The step function starts at {0} and ends at {1}"
20 | .format(-bias_value_1 / weight_value,
21 | -bias_value_2 / weight_value))
22 |
23 | inputs = numpy.arange(-10, 10, 0.01)
24 | outputs = list()
25 |
26 | # iterate over a range of inputs
27 | for x in inputs:
28 | y1 = 1.0 / (1.0 + numpy.exp(-weight_value * x - bias_value_1))
29 | y2 = 1.0 / (1.0 + numpy.exp(-weight_value * x - bias_value_2))
30 |
31 | # modify to change the height of the step function
32 | w = 7
33 |
34 | # network output
35 | y = y1 * w - y2 * w
36 |
37 | outputs.append(y)
38 |
39 | plt.plot(inputs, outputs, lw=2, color='black')
40 | plt.show()
41 |
--------------------------------------------------------------------------------
/Chapter02/chapter_02_002.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy
3 | from matplotlib.colors import ListedColormap
4 |
5 |
6 | def tanh(x):
7 | return (1.0 - numpy.exp(-2 * x)) / (1.0 + numpy.exp(-2 * x))
8 |
9 |
10 | def tanh_derivative(x):
11 | return (1 + tanh(x)) * (1 - tanh(x))
12 |
13 |
14 | class NeuralNetwork:
15 | # net_arch consists of a list of integers, indicating
16 | # the number of neurons in each layer
17 | def __init__(self, net_arch):
18 | self.activation_func = tanh
19 | self.activation_derivative = tanh_derivative
20 | self.layers = len(net_arch)
21 | self.steps_per_epoch = 1000
22 | self.net_arch = net_arch
23 |
24 | # initialize the weights with random values in the range (-1,1)
25 | self.weights = []
26 | for layer in range(len(net_arch) - 1):
27 | w = 2 * numpy.random.rand(net_arch[layer] + 1, net_arch[layer + 1]) - 1
28 | self.weights.append(w)
29 |
30 | def fit(self, data, labels, learning_rate=0.1, epochs=10):
31 | """
32 | :param data: data is the set of all possible pairs of booleans
33 | True or False indicated by the integers 1 or 0
34 | labels is the result of the logical operation 'xor'
35 | on each of those input pairs
36 | :param labels: array of 0/1 for each datum
37 | """
38 |
39 | # Add bias units to the input layer
40 | ones = numpy.ones((1, data.shape[0]))
41 | Z = numpy.concatenate((ones.T, data), axis=1)
42 | training = epochs * self.steps_per_epoch
43 | for k in range(training):
44 | if k % self.steps_per_epoch == 0:
45 | # print ('epochs:', k/self.steps_per_epoch)
46 | print('epochs: {}'.format(k / self.steps_per_epoch))
47 | for s in data:
48 | print(s, nn.predict(s))
49 |
50 | sample = numpy.random.randint(data.shape[0])
51 | y = [Z[sample]]
52 |
53 | for i in range(len(self.weights) - 1):
54 | activation = numpy.dot(y[i], self.weights[i])
55 | activation_f = self.activation_func(activation)
56 | # add the bias for the next layer
57 | activation_f = numpy.concatenate((numpy.ones(1), numpy.array(activation_f)))
58 | y.append(activation_f)
59 |
60 | # last layer
61 | activation = numpy.dot(y[-1], self.weights[-1])
62 | activation_f = self.activation_func(activation)
63 | y.append(activation_f)
64 |
65 | # error for the output layer
66 | error = labels[sample] - y[-1]
67 | delta_vec = [error * self.activation_derivative(y[-1])]
68 |
69 | # we need to begin from the back from the next to last layer
70 | for i in range(self.layers - 2, 0, -1):
71 | error = delta_vec[-1].dot(self.weights[i][1:].T)
72 | error = error * self.activation_derivative(y[i][1:])
73 | delta_vec.append(error)
74 |
75 | # reverse
76 | # [level3(output)->level2(hidden)] => [level2(hidden)->level3(output)]
77 | delta_vec.reverse()
78 |
79 | # backpropagation
80 | # 1. Multiply its output delta and input activation
81 | # to get the gradient of the weight.
82 | # 2. Subtract a ratio (percentage) of the gradient from the weight
83 | for i in range(len(self.weights)):
84 | layer = y[i].reshape(1, nn.net_arch[i] + 1)
85 |
86 | delta = delta_vec[i].reshape(1, nn.net_arch[i + 1])
87 | self.weights[i] += learning_rate * layer.T.dot(delta)
88 |
89 | def predict(self, x):
90 | val = numpy.concatenate((numpy.ones(1).T, numpy.array(x)))
91 | for i in range(0, len(self.weights)):
92 | val = self.activation_func(numpy.dot(val, self.weights[i]))
93 | val = numpy.concatenate((numpy.ones(1).T, numpy.array(val)))
94 |
95 | return val[1]
96 |
97 | def plot_decision_regions(self, X, y, points=200):
98 | markers = ('o', '^')
99 | colors = ('red', 'blue')
100 | cmap = ListedColormap(colors)
101 |
102 | x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
103 | x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
104 |
105 | # To produce zoomed-out figures, you can replace the preceding 2 lines with:
106 | # x1_min, x1_max = -10, 11
107 | # x2_min, x2_max = -10, 11
108 |
109 | resolution = max(x1_max - x1_min, x2_max - x2_min) / float(points)
110 |
111 | xx1, xx2 = numpy.meshgrid(numpy.arange(x1_min,
112 | x1_max,
113 | resolution),
114 | numpy.arange(x2_min, x2_max, resolution))
115 | input = numpy.array([xx1.ravel(), xx2.ravel()]).T
116 | Z = numpy.empty(0)
117 | for i in range(input.shape[0]):
118 | val = nn.predict(numpy.array(input[i]))
119 | if val < 0.5:
120 | val = 0
121 | if val >= 0.5:
122 | val = 1
123 | Z = numpy.append(Z, val)
124 |
125 | Z = Z.reshape(xx1.shape)
126 |
127 | plt.pcolormesh(xx1, xx2, Z, cmap=cmap)
128 | plt.xlim(xx1.min(), xx1.max())
129 | plt.ylim(xx2.min(), xx2.max())
130 | # plot all samples
131 |
132 | classes = ["False", "True"]
133 |
134 | for idx, cl in enumerate(numpy.unique(y)):
135 | plt.scatter(x=X[y == cl, 0],
136 | y=X[y == cl, 1],
137 | alpha=1.0,
138 | c=colors[idx],
139 | edgecolors='black',
140 | marker=markers[idx],
141 | s=80,
142 | label=classes[idx])
143 |
144 | plt.xlabel('x-axis')
145 | plt.ylabel('y-axis')
146 | plt.legend(loc='upper left')
147 | plt.show()
148 |
149 |
150 | if __name__ == '__main__':
151 | numpy.random.seed(0)
152 |
153 | # Initialize the NeuralNetwork with 2 input, 2 hidden, and 1 output neurons
154 | nn = NeuralNetwork([2, 2, 1])
155 |
156 | X = numpy.array([[0, 0],
157 | [0, 1],
158 | [1, 0],
159 | [1, 1]])
160 |
161 | y = numpy.array([0, 1, 1, 0])
162 |
163 | nn.fit(X, y, epochs=10)
164 |
165 | print("Final prediction")
166 | for s in X:
167 | print(s, nn.predict(s))
168 |
169 | nn.plot_decision_regions(X, y)
170 |
--------------------------------------------------------------------------------
/Chapter03/chapter_03_001.py:
--------------------------------------------------------------------------------
1 | from keras.datasets import mnist
2 | from keras.layers.core import Dense, Activation
3 | from keras.models import Sequential
4 | from keras.utils import np_utils
5 |
6 | (X_train, Y_train), (X_test, Y_test) = mnist.load_data()
7 |
8 | X_train = X_train.reshape(60000, 784)
9 | X_test = X_test.reshape(10000, 784)
10 |
11 | classes = 10
12 | Y_train = np_utils.to_categorical(Y_train, classes)
13 | Y_test = np_utils.to_categorical(Y_test, classes)
14 |
15 | input_size = 784
16 | batch_size = 100
17 | hidden_neurons = 100
18 | epochs = 100
19 |
20 | model = Sequential([
21 | Dense(hidden_neurons, input_dim=input_size),
22 | Activation('sigmoid'),
23 | Dense(classes),
24 | Activation('softmax')
25 | ])
26 |
27 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd')
28 |
29 | model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=1)
30 |
31 | score = model.evaluate(X_test, Y_test, verbose=1)
32 | print('Test accuracy:', score[1])
33 |
34 | weights = model.layers[0].get_weights()
35 |
36 | import matplotlib.pyplot as plt
37 | import matplotlib.cm as cm
38 | import numpy
39 |
40 | fig = plt.figure()
41 |
42 | w = weights[0].T
43 | for neuron in range(hidden_neurons):
44 | ax = fig.add_subplot(10, 10, neuron + 1)
45 | ax.axis("off")
46 | ax.imshow(numpy.reshape(w[neuron], (28, 28)), cmap=cm.Greys_r)
47 |
48 | plt.savefig("neuron_images.png", dpi=300)
49 | plt.show()
50 |
--------------------------------------------------------------------------------
/Chapter03/chapter_03_002.py:
--------------------------------------------------------------------------------
1 | from keras.datasets import cifar10
2 | from keras.layers.core import Dense, Activation
3 | from keras.models import Sequential
4 | from keras.utils import np_utils
5 |
6 | (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
7 |
8 | X_train = X_train.reshape(50000, 3072)
9 | X_test = X_test.reshape(10000, 3072)
10 |
11 | classes = 10
12 | Y_train = np_utils.to_categorical(Y_train, classes)
13 | Y_test = np_utils.to_categorical(Y_test, classes)
14 |
15 | input_size = 3072
16 | batch_size = 100
17 | epochs = 100
18 |
19 | model = Sequential([
20 | Dense(1024, input_dim=input_size),
21 | Activation('relu'),
22 | Dense(512),
23 | Activation('relu'),
24 | Dense(512),
25 | Activation('sigmoid'),
26 | Dense(classes),
27 | Activation('softmax')
28 | ])
29 |
30 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd')
31 |
32 | model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, Y_test), verbose=1)
33 |
34 | import matplotlib.pyplot as plt
35 | import matplotlib.cm as cm
36 | import matplotlib.gridspec as gridspec
37 | import numpy
38 | import random
39 |
40 | fig = plt.figure()
41 | outer_grid = gridspec.GridSpec(10, 10, wspace=0.0, hspace=0.0)
42 |
43 | weights = model.layers[0].get_weights()
44 |
45 | w = weights[0].T
46 |
47 | for i, neuron in enumerate(random.sample(range(0, 1023), 100)):
48 | ax = plt.Subplot(fig, outer_grid[i])
49 | ax.imshow(numpy.mean(numpy.reshape(w[i], (32, 32, 3)), axis=2), cmap=cm.Greys_r)
50 | ax.set_xticks([])
51 | ax.set_yticks([])
52 | fig.add_subplot(ax)
53 |
54 | plt.show()
55 |
56 |
--------------------------------------------------------------------------------
/Chapter04/chapter_04_001.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def conv(image, im_filter):
5 | """
6 | :param image: grayscale image as a 2-dimensional numpy array
7 | :param im_filter: 2-dimensional numpy array
8 | """
9 |
10 | # input dimensions
11 | height = image.shape[0]
12 | width = image.shape[1]
13 |
14 | # output image with reduced dimensions
15 | im_c = np.zeros((height - len(im_filter) + 1,
16 | width - len(im_filter) + 1))
17 |
18 | # iterate over all rows and columns
19 | for row in range(len(im_c)):
20 | for col in range(len(im_c[0])):
21 | # apply the filter
22 | for i in range(len(im_filter)):
23 | for j in range(len(im_filter[0])):
24 | im_c[row, col] += image[row + i, col + j] * im_filter[i][j]
25 |
26 | # fix out-of-bounds values
27 | im_c[im_c > 255] = 255
28 | im_c[im_c < 0] = 0
29 |
30 | # plot images for comparison
31 | import matplotlib.pyplot as plt
32 | import matplotlib.cm as cm
33 |
34 | plt.figure()
35 | plt.imshow(image, cmap=cm.Greys_r)
36 | plt.show()
37 |
38 | plt.imshow(im_c, cmap=cm.Greys_r)
39 | plt.show()
40 |
41 |
42 | import requests
43 | from PIL import Image
44 | from io import BytesIO
45 |
46 | # load the image
47 | url = "https://upload.wikimedia.org/wikipedia/commons/thumb/8/88/Commander_Eileen_Collins_-_GPN-2000-001177.jpg/382px-Commander_Eileen_Collins_-_GPN-2000-001177.jpg?download"
48 | resp = requests.get(url)
49 | image_rgb = np.asarray(Image.open(BytesIO(resp.content)).convert("RGB"))
50 |
51 | # convert to grayscale
52 | image_grayscale = np.mean(image_rgb, axis=2, dtype=np.uint)
53 |
54 | # blur filter
55 | blur = np.full([10, 10], 1. / 100)
56 | conv(image_grayscale, blur)
57 |
58 | # sobel filters
59 | sobel_x = [[-1, -2, -1],
60 | [0, 0, 0],
61 | [1, 2, 1]]
62 | conv(image_grayscale, sobel_x)
63 |
64 | sobel_y = [[-1, 0, 1],
65 | [-2, 0, 2],
66 | [-1, 0, 1]]
67 | conv(image_grayscale, sobel_y)
68 |
--------------------------------------------------------------------------------
/Chapter04/chapter_04_002.py:
--------------------------------------------------------------------------------
1 | # for reproducibility
2 | from numpy.random import seed
3 |
4 | seed(1)
5 | from tensorflow import set_random_seed
6 |
7 | set_random_seed(1)
8 |
9 | from keras.datasets import mnist
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Activation
12 | from keras.layers import Convolution2D, MaxPooling2D
13 | from keras.layers import Flatten
14 |
15 | from keras.utils import np_utils
16 |
17 | (X_train, Y_train), (X_test, Y_test) = mnist.load_data()
18 |
19 | X_train = X_train.reshape(60000, 28, 28, 1)
20 | X_test = X_test.reshape(10000, 28, 28, 1)
21 |
22 | Y_train = np_utils.to_categorical(Y_train, 10)
23 | Y_test = np_utils.to_categorical(Y_test, 10)
24 |
25 | model = Sequential([
26 | Convolution2D(filters=32,
27 | kernel_size=(3, 3),
28 | input_shape=(28, 28, 1)), # first conv layer
29 | Activation('relu'),
30 | Convolution2D(filters=32,
31 | kernel_size=(3, 3)), # second conv layer
32 | Activation('relu'),
33 | MaxPooling2D(pool_size=(2, 2)), # max pooling layer
34 | Flatten(), # flatten the output tensor
35 | Dense(64), # fully-connected hidden layer
36 | Activation('relu'),
37 | Dense(10), # output layer
38 | Activation('softmax')])
39 |
40 | print(model.summary())
41 |
42 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adadelta')
43 |
44 | model.fit(X_train, Y_train, batch_size=100, epochs=5, validation_split=0.1, verbose=1)
45 |
46 | score = model.evaluate(X_test, Y_test, verbose=1)
47 | print('Test accuracy:', score[1])
48 |
--------------------------------------------------------------------------------
/Chapter04/chapter_04_003.py:
--------------------------------------------------------------------------------
1 | import keras
2 | from keras.datasets import cifar10
3 | from keras.layers import Conv2D, MaxPooling2D
4 | from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
5 | from keras.models import Sequential
6 | from keras.preprocessing.image import ImageDataGenerator
7 |
8 | batch_size = 50
9 |
10 | (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
11 |
12 | X_train = X_train.astype('float32')
13 | X_test = X_test.astype('float32')
14 | X_train /= 255
15 | X_test /= 255
16 |
17 | Y_train = keras.utils.to_categorical(Y_train, 10)
18 | Y_test = keras.utils.to_categorical(Y_test, 10)
19 |
20 | data_generator = ImageDataGenerator(rotation_range=90,
21 | width_shift_range=0.1,
22 | height_shift_range=0.1,
23 | featurewise_center=True,
24 | featurewise_std_normalization=True,
25 | horizontal_flip=True)
26 |
27 | data_generator.fit(X_train)
28 |
29 | # standardize the test set
30 | for i in range(len(X_test)):
31 | X_test[i] = data_generator.standardize(X_test[i])
32 |
33 | model = Sequential()
34 | model.add(Conv2D(32, (3, 3), padding='same', input_shape=X_train.shape[1:]))
35 | model.add(Activation('elu'))
36 | model.add(BatchNormalization())
37 | model.add(Conv2D(32, (3, 3), padding='same'))
38 | model.add(Activation('elu'))
39 | model.add(BatchNormalization())
40 | model.add(MaxPooling2D(pool_size=(2, 2)))
41 | model.add(Dropout(0.2))
42 |
43 | model.add(Conv2D(64, (3, 3), padding='same'))
44 | model.add(Activation('elu'))
45 | model.add(BatchNormalization())
46 | model.add(Conv2D(64, (3, 3), padding='same'))
47 | model.add(Activation('elu'))
48 | model.add(BatchNormalization())
49 | model.add(MaxPooling2D(pool_size=(2, 2)))
50 | model.add(Dropout(0.2))
51 |
52 | model.add(Conv2D(128, (3, 3), padding='same'))
53 | model.add(Activation('elu'))
54 | model.add(BatchNormalization())
55 | model.add(Conv2D(128, (3, 3), padding='same'))
56 | model.add(Activation('elu'))
57 | model.add(BatchNormalization())
58 | model.add(MaxPooling2D(pool_size=(2, 2)))
59 | model.add(Dropout(0.5))
60 |
61 | model.add(Flatten())
62 | model.add(Dense(10, activation='softmax'))
63 |
64 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
65 |
66 | model.fit_generator(
67 | generator=data_generator.flow(x=X_train,
68 | y=Y_train,
69 | batch_size=batch_size),
70 | steps_per_epoch=len(X_train) // batch_size,
71 | epochs=100,
72 | validation_data=(X_test, Y_test),
73 | workers=4)
74 |
--------------------------------------------------------------------------------
/Chapter05/chapter_05_001.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import torchvision
5 | from torchvision import models, transforms
6 |
7 | batch_size = 50
8 |
9 | # training data
10 | train_data_transform = transforms.Compose([
11 | transforms.Resize(224),
12 | transforms.RandomHorizontalFlip(),
13 | transforms.RandomVerticalFlip(),
14 | transforms.ToTensor(),
15 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
16 | ])
17 |
18 | train_set = torchvision.datasets.CIFAR10(root='./data',
19 | train=True,
20 | download=True,
21 | transform=train_data_transform)
22 |
23 | train_loader = torch.utils.data.DataLoader(train_set,
24 | batch_size=batch_size,
25 | shuffle=True,
26 | num_workers=2)
27 |
28 | # validation data
29 | val_data_transform = transforms.Compose([
30 | transforms.Resize(224),
31 | transforms.ToTensor(),
32 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
33 | ])
34 |
35 | val_set = torchvision.datasets.CIFAR10(root='./data',
36 | train=False,
37 | download=True,
38 | transform=val_data_transform)
39 |
40 | val_order = torch.utils.data.DataLoader(val_set,
41 | batch_size=batch_size,
42 | shuffle=False,
43 | num_workers=2)
44 |
45 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
46 |
47 |
48 | def train_model(model, loss_function, optimizer, data_loader):
49 | # set model to training mode
50 | model.train()
51 |
52 | current_loss = 0.0
53 | current_acc = 0
54 |
55 | # iterate over the training data
56 | for i, (inputs, labels) in enumerate(data_loader):
57 | # send the input/labels to the GPU
58 | inputs = inputs.to(device)
59 | labels = labels.to(device)
60 |
61 | # zero the parameter gradients
62 | optimizer.zero_grad()
63 |
64 | with torch.set_grad_enabled(True):
65 | # forward
66 | outputs = model(inputs)
67 | _, predictions = torch.max(outputs, 1)
68 | loss = loss_function(outputs, labels)
69 |
70 | # backward
71 | loss.backward()
72 | optimizer.step()
73 |
74 | # statistics
75 | current_loss += loss.item() * inputs.size(0)
76 | current_acc += torch.sum(predictions == labels.data)
77 |
78 | total_loss = current_loss / len(data_loader.dataset)
79 | total_acc = current_acc.double() / len(data_loader.dataset)
80 |
81 | print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc))
82 |
83 |
84 | def test_model(model, loss_function, data_loader):
85 | # set model in evaluation mode
86 | model.eval()
87 |
88 | current_loss = 0.0
89 | current_acc = 0
90 |
91 | # iterate over the validation data
92 | for i, (inputs, labels) in enumerate(data_loader):
93 | # send the input/labels to the GPU
94 | inputs = inputs.to(device)
95 | labels = labels.to(device)
96 |
97 | # forward
98 | with torch.set_grad_enabled(False):
99 | outputs = model(inputs)
100 | _, predictions = torch.max(outputs, 1)
101 | loss = loss_function(outputs, labels)
102 |
103 | # statistics
104 | current_loss += loss.item() * inputs.size(0)
105 | current_acc += torch.sum(predictions == labels.data)
106 |
107 | total_loss = current_loss / len(data_loader.dataset)
108 | total_acc = current_acc.double() / len(data_loader.dataset)
109 |
110 | print('Test Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc))
111 |
112 |
113 | def tl_feature_extractor(epochs=3):
114 | # load the pre-trained model
115 | model = torchvision.models.resnet18(pretrained=True)
116 |
117 | # exclude existing parameters from backward pass
118 | # for performance
119 | for param in model.parameters():
120 | param.requires_grad = False
121 |
122 | # newly constructed layers have requires_grad=True by default
123 | num_features = model.fc.in_features
124 | model.fc = nn.Linear(num_features, 10)
125 |
126 | # transfer to GPU (if available)
127 | model = model.to(device)
128 |
129 | loss_function = nn.CrossEntropyLoss()
130 |
131 | # only parameters of the final layer are being optimized
132 | optimizer = optim.Adam(model.fc.parameters())
133 |
134 | # train
135 | for epoch in range(epochs):
136 | print('Epoch {}/{}'.format(epoch + 1, epochs))
137 |
138 | train_model(model, loss_function, optimizer, train_loader)
139 | test_model(model, loss_function, val_order)
140 |
141 |
142 | def tl_fine_tuning(epochs=3):
143 | # load the pre-trained model
144 | model = models.resnet18(pretrained=True)
145 |
146 | # replace the last layer
147 | num_features = model.fc.in_features
148 | model.fc = nn.Linear(num_features, 10)
149 |
150 | # transfer the model to the GPU
151 | model = model.to(device)
152 |
153 | # loss function
154 | loss_function = nn.CrossEntropyLoss()
155 |
156 | # We'll optimize all parameters
157 | optimizer = optim.Adam(model.parameters())
158 |
159 | # train
160 | for epoch in range(epochs):
161 | print('Epoch {}/{}'.format(epoch + 1, epochs))
162 |
163 | train_model(model, loss_function, optimizer, train_loader)
164 | test_model(model, loss_function, val_order)
165 |
166 |
167 | if __name__ == '__main__':
168 | tl_feature_extractor(epochs=5)
169 | #tl_fine_tuning(epochs=5)
170 |
--------------------------------------------------------------------------------
/Chapter05/chapter_05_002.py:
--------------------------------------------------------------------------------
1 | # VGG16
2 | from keras.applications.vgg16 import VGG16
3 |
4 | vgg16_model = VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
5 |
6 | # VGG19
7 | from keras.applications.vgg19 import VGG19
8 |
9 | vgg19_model = VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
10 |
11 | import torchvision.models as models
12 |
13 | model = models.vgg16(pretrained=True)
14 |
--------------------------------------------------------------------------------
/Chapter05/chapter_05_003.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import cv2 # opencv import
3 | import numpy as np
4 | import requests
5 |
6 | # Download YOLO net config file
7 | # We'll it from the YOLO author's github repo
8 | yolo_config = 'yolov3.cfg'
9 | if not os.path.isfile(yolo_config):
10 | url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg'
11 | r = requests.get(url)
12 | with open(yolo_config, 'wb') as f:
13 | f.write(r.content)
14 |
15 | # Download YOLO net weights
16 | # We'll it from the YOLO author's website
17 | yolo_weights = 'yolov3.weights'
18 | if not os.path.isfile(yolo_weights):
19 | url = 'https://pjreddie.com/media/files/yolov3.weights'
20 | r = requests.get(url)
21 | with open(yolo_weights, 'wb') as f:
22 | f.write(r.content)
23 |
24 | # Download class names file
25 | # Contains the names of the classes the network can detect
26 | classes_file = 'coco.names'
27 | if not os.path.isfile(classes_file):
28 | url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names'
29 | r = requests.get(url)
30 | with open(classes_file, 'wb') as f:
31 | f.write(r.content)
32 |
33 | # load class names
34 | with open(classes_file, 'r') as f:
35 | classes = [line.strip() for line in f.readlines()]
36 |
37 | # Download object detection image
38 | image_file = 'source.jpg'
39 | if not os.path.isfile(image_file):
40 | url = "https://upload.wikimedia.org/wikipedia/commons/c/c7/Abbey_Road_Zebra_crossing_2004-01.jpg"
41 | r = requests.get(url)
42 | with open(image_file, 'wb') as f:
43 | f.write(r.content)
44 |
45 | # read and normalize image
46 | image = cv2.imread(image_file)
47 | blob = cv2.dnn.blobFromImage(image, 1 / 255, (416, 416), (0, 0, 0), True, crop=False)
48 |
49 | # load the network
50 | net = cv2.dnn.readNet(yolo_weights, yolo_config)
51 |
52 | # set as input to the net
53 | net.setInput(blob)
54 |
55 | # get network output layers
56 | layer_names = net.getLayerNames()
57 | output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
58 |
59 | # inference
60 | # the network outputs multiple lists of anchor boxes,
61 | # one for each detected class
62 | outs = net.forward(output_layers)
63 |
64 | # extract bounding boxes
65 | class_ids = list()
66 | confidences = list()
67 | boxes = list()
68 |
69 | # iterate over all classes
70 | for out in outs:
71 | # iterate over the anchor boxes for each class
72 | for detection in out:
73 | # bounding box
74 | center_x = int(detection[0] * image.shape[1])
75 | center_y = int(detection[1] * image.shape[0])
76 | w = int(detection[2] * image.shape[1])
77 | h = int(detection[3] * image.shape[0])
78 | x = center_x - w // 2
79 | y = center_y - h // 2
80 | boxes.append([x, y, w, h])
81 |
82 | # class
83 | class_id = np.argmax(detection[5:])
84 | class_ids.append(class_id)
85 |
86 | # confidence
87 | confidence = detection[4]
88 | confidences.append(float(confidence))
89 |
90 | # non-max suppression
91 | ids = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.3, nms_threshold=0.5)
92 |
93 | # draw the bounding boxes on the image
94 | colors = np.random.uniform(0, 255, size=(len(classes), 3))
95 |
96 | for i in ids:
97 | i = i[0]
98 | x, y, w, h = boxes[i]
99 | class_id = class_ids[i]
100 |
101 | color = colors[class_id]
102 |
103 | cv2.rectangle(image, (round(x), round(y)), (round(x + w), round(y + h)), color, 2)
104 |
105 | label = "%s: %.2f" % (classes[class_id], confidences[i])
106 | cv2.putText(image, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
107 |
108 | cv2.imshow("Object detection", image)
109 | cv2.waitKey()
110 |
--------------------------------------------------------------------------------
/Chapter06/chapter_06_001.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib.markers import MarkerStyle
3 | import numpy as np
4 | from keras import backend as K
5 | from keras.datasets import mnist
6 | from keras.layers import Lambda, Input, Dense
7 | from keras.losses import binary_crossentropy
8 | from keras.models import Model
9 |
10 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
11 |
12 | image_size = x_train.shape[1] * x_train.shape[1]
13 | x_train = np.reshape(x_train, [-1, image_size])
14 | x_test = np.reshape(x_test, [-1, image_size])
15 | x_train = x_train.astype('float32') / 255
16 | x_test = x_test.astype('float32') / 255
17 |
18 |
19 | def build_vae(intermediate_dim=512, latent_dim=2):
20 | """
21 | Build VAE
22 | :param intermediate_dim: size of hidden layers of the encoder/decoder
23 | :param latent_dim: latent space size
24 | :returns tuple: the encoder, the decoder, and the full vae
25 | """
26 |
27 | # encoder first
28 | inputs = Input(shape=(image_size,), name='encoder_input')
29 | x = Dense(intermediate_dim, activation='relu')(inputs)
30 |
31 | # latent mean and variance
32 | z_mean = Dense(latent_dim, name='z_mean')(x)
33 | z_log_var = Dense(latent_dim, name='z_log_var')(x)
34 |
35 | # reparametrization trick for random sampling
36 | # Note the use of the Lambda layer
37 | # At runtime, it will call the sampling function
38 | z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
39 |
40 | # full encoder encoder model
41 | encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
42 | encoder.summary()
43 |
44 | # decoder
45 | latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
46 | x = Dense(intermediate_dim, activation='relu')(latent_inputs)
47 | outputs = Dense(image_size, activation='sigmoid')(x)
48 |
49 | # full decoder model
50 | decoder = Model(latent_inputs, outputs, name='decoder')
51 | decoder.summary()
52 |
53 | # VAE model
54 | outputs = decoder(encoder(inputs)[2])
55 | vae = Model(inputs, outputs, name='vae')
56 |
57 | # Loss function
58 | # we start wit the reconstruction loss
59 | reconstruction_loss = binary_crossentropy(inputs, outputs) * image_size
60 |
61 | # next is the KL divergence
62 | kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
63 | kl_loss = K.sum(kl_loss, axis=-1)
64 | kl_loss *= -0.5
65 |
66 | # we combine them in a total loss
67 | vae_loss = K.mean(reconstruction_loss + kl_loss)
68 | vae.add_loss(vae_loss)
69 |
70 | return encoder, decoder, vae
71 |
72 |
73 | def sampling(args: tuple):
74 | """
75 | Reparameterization trick by sampling z from unit Gaussian
76 | :param args: (tensor, tensor) mean and log of variance of q(z|x)
77 | :returns tensor: sampled latent vector z
78 | """
79 |
80 | # unpack the input tuple
81 | z_mean, z_log_var = args
82 |
83 | # mini-batch size
84 | mb_size = K.shape(z_mean)[0]
85 |
86 | # latent space size
87 | dim = K.int_shape(z_mean)[1]
88 |
89 | # random normal vector with mean=0 and std=1.0
90 | epsilon = K.random_normal(shape=(mb_size, dim))
91 |
92 | return z_mean + K.exp(0.5 * z_log_var) * epsilon
93 |
94 |
95 | def plot_latent_distribution(encoder,
96 | x_test,
97 | y_test,
98 | batch_size=128):
99 | """
100 | Display a 2D plot of the digit classes in the latent space.
101 | We are interested only in z, so we only need the encoder here.
102 | :param encoder: the encoder network
103 | :param x_test: test images
104 | :param y_test: test labels
105 | :param batch_size: size of the mini-batch
106 | """
107 | z_mean, _, _ = encoder.predict(x_test, batch_size=batch_size)
108 | plt.figure(figsize=(6, 6))
109 |
110 | markers = ('o', 'x', '^', '<', '>', '*', 'h', 'H', 'D', 'd', 'P', 'X', '8', 's', 'p')
111 |
112 | for i in np.unique(y_test):
113 | plt.scatter(z_mean[y_test == i, 0], z_mean[y_test == i, 1],
114 | marker=MarkerStyle(markers[i], fillstyle='none'),
115 | edgecolors='black')
116 |
117 | plt.xlabel("z[0]")
118 | plt.ylabel("z[1]")
119 | plt.show()
120 |
121 |
122 | def plot_generated_images(decoder):
123 | """
124 | Display a 2D plot of the generated images.
125 | We only need the decoder, because we'll manually sample the distribution z
126 | :param decoder: the decoder network
127 | """
128 |
129 | # display a nxn 2D manifold of digits
130 | n = 15
131 | digit_size = 28
132 |
133 | figure = np.zeros((digit_size * n, digit_size * n))
134 | # linearly spaced coordinates corresponding to the 2D plot
135 | # of digit classes in the latent space
136 | grid_x = np.linspace(-4, 4, n)
137 | grid_y = np.linspace(-4, 4, n)[::-1]
138 |
139 | # start sampling z1 and z2 in the ranges grid_x and grid_y
140 | for i, yi in enumerate(grid_y):
141 | for j, xi in enumerate(grid_x):
142 | z_sample = np.array([[xi, yi]])
143 | x_decoded = decoder.predict(z_sample)
144 | digit = x_decoded[0].reshape(digit_size, digit_size)
145 | slice_i = slice(i * digit_size, (i + 1) * digit_size)
146 | slice_j = slice(j * digit_size, (j + 1) * digit_size)
147 | figure[slice_i, slice_j] = digit
148 |
149 | # plot the results
150 | plt.figure(figsize=(6, 5))
151 | start_range = digit_size // 2
152 | end_range = n * digit_size + start_range + 1
153 | pixel_range = np.arange(start_range, end_range, digit_size)
154 | sample_range_x = np.round(grid_x, 1)
155 | sample_range_y = np.round(grid_y, 1)
156 | plt.xticks(pixel_range, sample_range_x)
157 | plt.yticks(pixel_range, sample_range_y)
158 | plt.xlabel("z[0]")
159 | plt.ylabel("z[1]")
160 | plt.imshow(figure, cmap='Greys_r')
161 | plt.show()
162 |
163 |
164 | if __name__ == '__main__':
165 | encoder, decoder, vae = build_vae()
166 |
167 | vae.compile(optimizer='adam')
168 | vae.summary()
169 |
170 | vae.fit(x_train,
171 | epochs=50,
172 | batch_size=128,
173 | validation_data=(x_test, None))
174 |
175 | plot_latent_distribution(encoder,
176 | x_test,
177 | y_test,
178 | batch_size=128)
179 |
180 | plot_generated_images(decoder)
181 |
--------------------------------------------------------------------------------
/Chapter06/chapter_06_002.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | from keras.datasets import mnist
4 | from keras.layers import BatchNormalization, Input, Dense, Reshape, Flatten
5 | from keras.layers.advanced_activations import LeakyReLU
6 | from keras.models import Sequential, Model
7 | from keras.optimizers import Adam
8 |
9 |
10 | def build_generator(latent_dim: int):
11 | """
12 | Build discriminator network
13 | :param latent_dim: latent vector size
14 | """
15 |
16 | model = Sequential([
17 | Dense(128, input_dim=latent_dim),
18 | LeakyReLU(alpha=0.2),
19 | BatchNormalization(momentum=0.8),
20 | Dense(256),
21 | LeakyReLU(alpha=0.2),
22 | BatchNormalization(momentum=0.8),
23 | Dense(512),
24 | LeakyReLU(alpha=0.2),
25 | BatchNormalization(momentum=0.8),
26 | Dense(np.prod((28, 28, 1)), activation='tanh'),
27 | # reshape to MNIST image size
28 | Reshape((28, 28, 1))
29 | ])
30 |
31 | model.summary()
32 |
33 | # the latent input vector z
34 | z = Input(shape=(latent_dim,))
35 | generated = model(z)
36 |
37 | # build model from the input and output
38 | return Model(z, generated)
39 |
40 |
41 | def build_discriminator():
42 | """
43 | Build discriminator network
44 | """
45 |
46 | model = Sequential([
47 | Flatten(input_shape=(28, 28, 1)),
48 | Dense(256),
49 | LeakyReLU(alpha=0.2),
50 | Dense(128),
51 | LeakyReLU(alpha=0.2),
52 | Dense(1, activation='sigmoid'),
53 | ], name='discriminator')
54 |
55 | model.summary()
56 |
57 | image = Input(shape=(28, 28, 1))
58 | output = model(image)
59 |
60 | return Model(image, output)
61 |
62 |
63 | def train(generator, discriminator, combined, steps, batch_size):
64 | """
65 | Train the GAN system
66 | :param generator: generator
67 | :param discriminator: discriminator
68 | :param combined: stacked generator and discriminator
69 | we'll use the combined network when we train the generator
70 | :param steps: number of alternating steps for training
71 | :param batch_size: size of the minibatch
72 | """
73 |
74 | # Load the dataset
75 | (x_train, _), _ = mnist.load_data()
76 |
77 | # Rescale in [-1, 1] interval
78 | x_train = (x_train.astype(np.float32) - 127.5) / 127.5
79 | x_train = np.expand_dims(x_train, axis=-1)
80 |
81 | # Discriminator ground truths
82 | real = np.ones((batch_size, 1))
83 | fake = np.zeros((batch_size, 1))
84 |
85 | latent_dim = generator.input_shape[1]
86 |
87 | for step in range(steps):
88 | # Train the discriminator
89 |
90 | # Select a random batch of images
91 | real_images = x_train[np.random.randint(0, x_train.shape[0], batch_size)]
92 |
93 | # Random batch of noise
94 | noise = np.random.normal(0, 1, (batch_size, latent_dim))
95 |
96 | # Generate a batch of new images
97 | generated_images = generator.predict(noise)
98 |
99 | # Train the discriminator
100 | discriminator_real_loss = discriminator.train_on_batch(real_images, real)
101 | discriminator_fake_loss = discriminator.train_on_batch(generated_images, fake)
102 | discriminator_loss = 0.5 * np.add(discriminator_real_loss, discriminator_fake_loss)
103 |
104 | # Train the generator
105 | # random latent vector z
106 | noise = np.random.normal(0, 1, (batch_size, latent_dim))
107 |
108 | # Train the generator
109 | # Note that we use the "valid" labels for the generated images
110 | # That's because we try to maximize the discriminator loss
111 | generator_loss = combined.train_on_batch(noise, real)
112 |
113 | # Display progress
114 | print("%d [Discriminator loss: %.4f%%, acc.: %.2f%%] [Generator loss: %.4f%%]" %
115 | (step, discriminator_loss[0], 100 * discriminator_loss[1], generator_loss))
116 |
117 |
118 | def plot_generated_images(generator):
119 | """
120 | Display a nxn 2D manifold of digits
121 | :param generator: the generator
122 | """
123 | n = 10
124 | digit_size = 28
125 |
126 | # big array containing all images
127 | figure = np.zeros((digit_size * n, digit_size * n))
128 |
129 | latent_dim = generator.input_shape[1]
130 |
131 | # n*n random latent distributions
132 | noise = np.random.normal(0, 1, (n * n, latent_dim))
133 |
134 | # generate the images
135 | generated_images = generator.predict(noise)
136 |
137 | # fill the big array with images
138 | for i in range(n):
139 | for j in range(n):
140 | slice_i = slice(i * digit_size, (i + 1) * digit_size)
141 | slice_j = slice(j * digit_size, (j + 1) * digit_size)
142 | figure[slice_i, slice_j] = np.reshape(generated_images[i * n + j], (28, 28))
143 |
144 | # plot the results
145 | plt.figure(figsize=(6, 5))
146 | plt.axis('off')
147 | plt.imshow(figure, cmap='Greys_r')
148 | plt.show()
149 |
150 |
151 | if __name__ == '__main__':
152 | latent_dim = 64
153 |
154 | # Build and compile the discriminator
155 | discriminator = build_discriminator()
156 | discriminator.compile(loss='binary_crossentropy',
157 | optimizer=Adam(lr=0.0002, beta_1=0.5),
158 | metrics=['accuracy'])
159 |
160 | # Build the generator
161 | generator = build_generator(latent_dim)
162 |
163 | # Generator input z
164 | z = Input(shape=(latent_dim,))
165 | generated_image = generator(z)
166 |
167 | # Only train the generator for the combined model
168 | discriminator.trainable = False
169 |
170 | # The discriminator takes generated image as input and determines validity
171 | real_or_fake = discriminator(generated_image)
172 |
173 | # Stack the generator and discriminator in a combined model
174 | # Trains the generator to deceive the discriminator
175 | combined = Model(z, real_or_fake)
176 | combined.compile(loss='binary_crossentropy',
177 | optimizer=Adam(lr=0.0002, beta_1=0.5))
178 |
179 | # train the GAN system
180 | train(generator=generator,
181 | discriminator=discriminator,
182 | combined=combined,
183 | steps=15000,
184 | batch_size=128)
185 |
186 | # display some random generated images
187 | plot_generated_images(generator)
188 |
--------------------------------------------------------------------------------
/Chapter07/chapter_07_001.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def step(s, x, U, W):
5 | return x * U + s * W
6 |
7 |
8 | def forward(x, U, W):
9 | # Number of samples in the mini-batch
10 | number_of_samples = len(x)
11 |
12 | # Length of each sample
13 | sequence_length = len(x[0])
14 |
15 | # Initialize the state activation for each sample along the sequence
16 | s = np.zeros((number_of_samples, sequence_length + 1))
17 |
18 | # Update the states over the sequence
19 | for t in range(0, sequence_length):
20 | s[:, t + 1] = step(s[:, t], x[:, t], U, W) # step function
21 |
22 | return s
23 |
24 |
25 | def backward(x, s, y, W):
26 | sequence_length = len(x[0])
27 |
28 | # The network output is just the last activation of sequence
29 | s_t = s[:, -1]
30 |
31 | # Compute the gradient of the output w.r.t. MSE cost function at final state
32 | gS = 2 * (s_t - y)
33 |
34 | # Set the gradient accumulations to 0
35 | gU, gW = 0, 0
36 |
37 | # Accumulate gradients backwards
38 | for k in range(sequence_length, 0, -1):
39 | # Compute the parameter gradients and accumulate the results.
40 | gU += np.sum(gS * x[:, k - 1])
41 | gW += np.sum(gS * s[:, k - 1])
42 |
43 | # Compute the gradient at the output of the previous layer
44 | gS = gS * W
45 |
46 | return gU, gW
47 |
48 |
49 | def train(x, y, epochs, learning_rate=0.0005):
50 | """Train the network"""
51 |
52 | # Set initial parameters
53 | weights = (-2, 0) # (U, W)
54 |
55 | # Accumulate the losses and their respective weights
56 | losses = list()
57 | weights_u = list()
58 | weights_w = list()
59 |
60 | # Perform iterative gradient descent
61 | for i in range(epochs):
62 | # Perform forward and backward pass to get the gradients
63 | s = forward(x, weights[0], weights[1])
64 |
65 | # Compute the loss
66 | loss = (y[0] - s[-1, -1]) ** 2
67 |
68 | # Store the loss and weights values for later display
69 | losses.append(loss)
70 |
71 | weights_u.append(weights[0])
72 | weights_w.append(weights[1])
73 |
74 | gradients = backward(x, s, y, weights[1])
75 |
76 | # Update each parameter `p` by p = p - (gradient * learning_rate).
77 | # `gp` is the gradient of parameter `p`
78 | weights = tuple((p - gp * learning_rate) for p, gp in zip(weights, gradients))
79 |
80 | print(weights)
81 |
82 | return np.array(losses), np.array(weights_u), np.array(weights_w)
83 |
84 |
85 | def plot_training(losses, weights_u, weights_w):
86 | import matplotlib.pyplot as plt
87 |
88 | # remove nan and inf values
89 | losses = losses[~np.isnan(losses)][:-1]
90 | weights_u = weights_u[~np.isnan(weights_u)][:-1]
91 | weights_w = weights_w[~np.isnan(weights_w)][:-1]
92 |
93 | # plot the weights U and W
94 | fig, ax1 = plt.subplots(figsize=(5, 3.4))
95 |
96 | ax1.set_ylim(-3, 2)
97 | ax1.set_xlabel('epochs')
98 | ax1.plot(weights_w, label='W', color='red', linestyle='--')
99 | ax1.plot(weights_u, label='U', color='blue', linestyle=':')
100 | ax1.legend(loc='upper left')
101 |
102 | # instantiate a second axis that shares the same x-axis
103 | # plot the loss on the second axis
104 | ax2 = ax1.twinx()
105 |
106 | # uncomment to plot exploding gradients
107 | ax2.set_ylim(-3, 200)
108 | ax2.plot(losses, label='Loss', color='green')
109 | ax2.tick_params(axis='y', labelcolor='green')
110 | ax2.legend(loc='upper right')
111 |
112 | fig.tight_layout()
113 |
114 | plt.show()
115 |
116 |
117 | # Use these inputs for normal training
118 | # The first dimension represents the mini-batch
119 | x = np.array([[0, 0, 0, 0, 1, 0, 1, 0, 1, 0]])
120 | y = np.array([3])
121 |
122 | # Use these inputs to reproduce the exploding gradients scenario
123 | # x = np.array([[0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0]])
124 | # y = np.array([12])
125 |
126 | losses, weights_u, weights_w = train(x, y, epochs=150)
127 | plot_training(losses, weights_u, weights_w)
128 |
--------------------------------------------------------------------------------
/Chapter07/language model/data_processing.py:
--------------------------------------------------------------------------------
1 | """Process text file for language model training."""
2 | from __future__ import print_function, division
3 |
4 | import codecs
5 | import re
6 |
7 | filepath = 'war_and_peace.txt' # in
8 | out_file = 'wap.txt' # out
9 |
10 | # Regexes used to clean up the text
11 | NEW_LINE_IN_PARAGRAPH_REGEX = re.compile(r'(\S)\n(\S)')
12 | MULTIPLE_NEWLINES_REGEX = re.compile(r'(\n)(\n)+')
13 |
14 | # Read text as string
15 | with codecs.open(filepath, encoding='utf-8', mode='r') as f_input:
16 | book_str = f_input.read()
17 |
18 | # Cleanup
19 | book_str = NEW_LINE_IN_PARAGRAPH_REGEX.sub('\g<1> \g<2>', book_str)
20 | book_str = MULTIPLE_NEWLINES_REGEX.sub('\n\n', book_str)
21 |
22 | # Write proccessed text to file
23 | with codecs.open(out_file, encoding='utf-8', mode='w')as f_output:
24 | f_output.write(book_str)
25 |
--------------------------------------------------------------------------------
/Chapter07/language model/data_reader.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 |
3 | import codecs
4 |
5 | import numpy as np
6 | from six.moves import range
7 |
8 |
9 | class DataReader(object):
10 | """Data reader used for training language model."""
11 |
12 | def __init__(self, filepath, batch_length, batch_size):
13 | self.batch_length = batch_length
14 | self.batch_size = batch_size
15 | # Read data into string
16 | with codecs.open(filepath, encoding='utf-8', mode='r') as f:
17 | self.data_str = f.read()
18 | self.data_length = len(self.data_str)
19 | print('data_length: ', self.data_length)
20 | # Create a list of characters, indices are class indices for softmax
21 | char_set = set()
22 | for ch in self.data_str:
23 | char_set.add(ch)
24 | self.char_list = sorted(list(char_set))
25 | print('char_list: ', len(self.char_list), self.char_list)
26 | # Create reverse mapping to look up the index based on the character
27 | self.char_dict = {val: idx for idx, val in enumerate(self.char_list)}
28 | print('char_dict: ', self.char_dict)
29 | # Initalise random start indices
30 | self.reset_indices()
31 |
32 | def reset_indices(self):
33 | self.start_idxs = np.random.random_integers(
34 | 0, self.data_length, self.batch_size)
35 |
36 | def get_sample(self, start_idx, length):
37 | # Get a sample and wrap around the data string
38 | return [self.char_dict[self.data_str[i % self.data_length]]
39 | for i in range(start_idx, start_idx + length)]
40 |
41 | def get_input_target_sample(self, start_idx):
42 | sample = self.get_sample(start_idx, self.batch_length + 1)
43 | inpt = sample[0:self.batch_length]
44 | trgt = sample[1:self.batch_length + 1]
45 | return inpt, trgt
46 |
47 | def get_batch(self, start_idxs):
48 | input_batch = np.zeros((self.batch_size, self.batch_length),
49 | dtype=np.int32)
50 | target_batch = np.zeros((self.batch_size, self.batch_length),
51 | dtype=np.int32)
52 | for i, start_idx in enumerate(start_idxs):
53 | inpt, trgt = self.get_input_target_sample(start_idx)
54 | input_batch[i, :] = inpt
55 | target_batch[i, :] = trgt
56 | return input_batch, target_batch
57 |
58 | def __iter__(self):
59 | while True:
60 | input_batch, target_batch = self.get_batch(self.start_idxs)
61 | self.start_idxs = (
62 | self.start_idxs + self.batch_length) % self.data_length
63 | yield input_batch, target_batch
64 |
65 |
66 | def main():
67 | filepath = './wap.txt'
68 | batch_length = 10
69 | batch_size = 2
70 | reader = DataReader(filepath, batch_length, batch_size)
71 | s = 'As in the question of astronomy then, so in the question of history now,'
72 | print([reader.char_dict[c] for c in s])
73 |
74 |
75 | if __name__ == "__main__":
76 | main()
77 |
--------------------------------------------------------------------------------
/Chapter07/language model/model.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import print_function, division
3 |
4 | import time
5 |
6 | import data_reader
7 | import numpy as np
8 | import tensorflow as tf
9 |
10 |
11 | class Model(object):
12 | """RNN language model."""
13 |
14 | def __init__(self, batch_size, sequence_length, lstm_sizes, dropout,
15 | labels, save_path):
16 | self.batch_size = batch_size
17 | self.sequence_length = sequence_length
18 | self.lstm_sizes = lstm_sizes
19 | self.labels = labels
20 | self.label_map = {val: idx for idx, val in enumerate(labels)}
21 | self.number_of_characters = len(labels)
22 | self.save_path = save_path
23 | self.dropout = dropout
24 |
25 | def init_graph(self):
26 | # Variable sequence length
27 | self.inputs = tf.placeholder(
28 | tf.int32, [self.batch_size, self.sequence_length])
29 | self.targets = tf.placeholder(
30 | tf.int32, [self.batch_size, self.sequence_length])
31 | self.init_architecture()
32 | self.saver = tf.train.Saver(tf.trainable_variables())
33 |
34 | def init_architecture(self):
35 | # Define a multilayer LSTM cell
36 | self.one_hot_inputs = tf.one_hot(
37 | self.inputs, depth=self.number_of_characters)
38 | cell_list = [tf.nn.rnn_cell.LSTMCell(lstm_size) for lstm_size in self.lstm_sizes]
39 | self.multi_cell_lstm = tf.nn.rnn_cell.MultiRNNCell(cell_list)
40 | # Initial state of the LSTM memory.
41 | # Keep state in graph memory to use between batches
42 | self.initial_state = self.multi_cell_lstm.zero_state(
43 | self.batch_size, tf.float32)
44 | # Convert to variables so that the state can be stored between batches
45 | # Note that LSTM states is a tuple of tensors, this structure has to be
46 | # re-created in order to use as LSTM state.
47 | self.state_variables = tf.contrib.framework.nest.pack_sequence_as(
48 | self.initial_state,
49 | [tf.Variable(var, trainable=False)
50 | for var in tf.contrib.framework.nest.flatten(self.initial_state)])
51 | # Define the rnn through time
52 | lstm_output, final_state = tf.nn.dynamic_rnn(
53 | cell=self.multi_cell_lstm, inputs=self.one_hot_inputs,
54 | initial_state=self.state_variables)
55 | # Force the initial state to be set to the new state for the next batch
56 | # before returning the output
57 | store_states = [
58 | state_variable.assign(new_state)
59 | for (state_variable, new_state) in zip(
60 | tf.contrib.framework.nest.flatten(self.state_variables),
61 | tf.contrib.framework.nest.flatten(final_state))]
62 | with tf.control_dependencies(store_states):
63 | lstm_output = tf.identity(lstm_output)
64 | # Reshape so that we can apply the linear transformation to all outputs
65 | output_flat = tf.reshape(lstm_output, (-1, self.lstm_sizes[-1]))
66 | # Define output layer
67 | self.logit_weights = tf.Variable(
68 | tf.truncated_normal(
69 | (self.lstm_sizes[-1], self.number_of_characters), stddev=0.01),
70 | name='logit_weights')
71 | self.logit_bias = tf.Variable(
72 | tf.zeros((self.number_of_characters)), name='logit_bias')
73 | # Apply last layer transformation
74 | self.logits_flat = tf.matmul(
75 | output_flat, self.logit_weights) + self.logit_bias
76 | probabilities_flat = tf.nn.softmax(self.logits_flat)
77 | self.probabilities = tf.reshape(
78 | probabilities_flat,
79 | (self.batch_size, -1, self.number_of_characters))
80 |
81 | def init_train_op(self, optimizer):
82 | # Flatten the targets to be compatible with the flattened logits
83 | targets_flat = tf.reshape(self.targets, (-1,))
84 | # Get the loss over all outputs
85 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
86 | logits=self.logits_flat, labels=targets_flat, name='x_entropy')
87 | self.loss = tf.reduce_mean(loss)
88 | trainable_variables = tf.trainable_variables()
89 | gradients = tf.gradients(loss, trainable_variables)
90 | gradients, _ = tf.clip_by_global_norm(gradients, 5)
91 | self.train_op = optimizer.apply_gradients(zip(gradients, trainable_variables))
92 |
93 | def sample(self, session, prime_string, sample_length):
94 | self.reset_state(session)
95 | # Prime state
96 | print('prime_string: ', prime_string)
97 | for character in prime_string:
98 | character_idx = self.label_map[character]
99 | out = session.run(
100 | self.probabilities,
101 | feed_dict={self.inputs: np.asarray([[character_idx]])})
102 | output_sample = prime_string
103 | print('start sampling')
104 | # Sample for sample_length steps
105 | for _ in range(sample_length):
106 | sample_label = np.random.choice(
107 | self.labels, size=(1), p=out[0, 0])[0]
108 | output_sample += sample_label
109 | sample_idx = self.label_map[sample_label]
110 | out = session.run(
111 | self.probabilities,
112 | feed_dict={self.inputs: np.asarray([[sample_idx]])})
113 |
114 | return output_sample
115 |
116 | def reset_state(self, session):
117 | for state in tf.contrib.framework.nest.flatten(self.state_variables):
118 | session.run(state.initializer)
119 |
120 | def save(self, sess):
121 | self.saver.save(sess, self.save_path)
122 |
123 | def restore(self, sess):
124 | self.saver.restore(sess, self.save_path)
125 |
126 |
127 | def train_and_sample(minibatch_iterations, restore):
128 | tf.reset_default_graph()
129 | batch_size = 64
130 | lstm_sizes = [512, 512]
131 | batch_len = 100
132 | learning_rate = 2e-3
133 |
134 | filepath = './wap.txt'
135 |
136 | data_feed = data_reader.DataReader(
137 | filepath, batch_len, batch_size)
138 | labels = data_feed.char_list
139 | print('labels: ', labels)
140 |
141 | save_path = './model.tf'
142 | model = Model(
143 | batch_size, batch_len, lstm_sizes, 0.8, labels,
144 | save_path)
145 | model.init_graph()
146 | optimizer = tf.train.AdamOptimizer(learning_rate)
147 | model.init_train_op(optimizer)
148 |
149 | init_op = tf.initialize_all_variables()
150 | with tf.Session() as sess:
151 | sess.run(init_op)
152 | if restore:
153 | print('Restoring model')
154 | model.restore(sess)
155 | model.reset_state(sess)
156 | start_time = time.time()
157 | for i in range(minibatch_iterations):
158 | input_batch, target_batch = next(iter(data_feed))
159 | loss, _ = sess.run(
160 | [model.loss, model.train_op],
161 | feed_dict={model.inputs: input_batch, model.targets: target_batch})
162 | if i % 50 == 0 and i != 0:
163 | print('i: ', i)
164 | duration = time.time() - start_time
165 | print('loss: {} ({} sec.)'.format(loss, duration))
166 | start_time = time.time()
167 | if i % 1000 == 0 and i != 0:
168 | model.save(sess)
169 | if i % 100 == 0 and i != 0:
170 | print('Reset initial state')
171 | model.reset_state(sess)
172 | if i % 1000 == 0 and i != 0:
173 | print('Reset minibatch feeder')
174 | data_feed.reset_indices()
175 | model.save(sess)
176 |
177 | print('\n sampling after {} iterations'.format(minibatch_iterations))
178 | tf.reset_default_graph()
179 | model = Model(
180 | 1, None, lstm_sizes, 1.0, labels, save_path)
181 | model.init_graph()
182 | init_op = tf.initialize_all_variables()
183 | with tf.Session() as sess:
184 | sess.run(init_op)
185 | model.restore(sess)
186 | print('\nSample 1:')
187 | sample = model.sample(
188 | sess, prime_string=u'\n\nThis feeling was ', sample_length=500)
189 | print(u'sample: \n{}'.format(sample))
190 | print('\nSample 2:')
191 | sample = model.sample(
192 | sess, prime_string=u'She was born in the year ', sample_length=500)
193 | print(u'sample: \n{}'.format(sample))
194 | print('\nSample 3:')
195 | sample = model.sample(
196 | sess, prime_string=u'The meaning of this all is ',
197 | sample_length=500)
198 | print(u'sample: \n{}'.format(sample))
199 | print('\nSample 4:')
200 | sample = model.sample(
201 | sess,
202 | prime_string=u'In the midst of a conversation on political matters Anna Pávlovna burst out:,',
203 | sample_length=500)
204 | print(u'sample: \n{}'.format(sample))
205 | print('\nSample 5:')
206 | sample = model.sample(
207 | sess, prime_string=u'\n\nCHAPTER X\n\n',
208 | sample_length=500)
209 | print(u'sample: \n{}'.format(sample))
210 | print('\nSample 5:')
211 | sample = model.sample(
212 | sess, prime_string=u'"If only you knew,"',
213 | sample_length=500)
214 | print(u'sample: \n{}'.format(sample))
215 |
216 |
217 | def main():
218 | total_iterations = 500
219 | print('\n\n\nTrain for {}'.format(500))
220 | print('Total iters: {}'.format(total_iterations))
221 | train_and_sample(500, restore=False)
222 | for i in [500, 1000, 3000, 5000, 10000, 30000, 50000, 100000, 300000]:
223 | total_iterations += i
224 | print('\n\n\nTrain for {}'.format(i))
225 | print('Total iters: {}'.format(total_iterations))
226 | train_and_sample(i, restore=True)
227 |
228 |
229 | if __name__ == "__main__":
230 | main()
231 |
--------------------------------------------------------------------------------
/Chapter08/chapter_08_001.py:
--------------------------------------------------------------------------------
1 | import random
2 | from collections import deque
3 |
4 | import gym
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import tensorflow as tf
8 |
9 | env = gym.make('CartPole-v0')
10 |
11 | # Build the network
12 | input_size = env.observation_space.shape[0]
13 |
14 | input_placeholder = tf.placeholder("float", [None, input_size])
15 |
16 | # weights and bias of the hidden layer
17 | weights_1 = tf.Variable(tf.truncated_normal([input_size, 20], stddev=0.01))
18 | bias_1 = tf.Variable(tf.constant(0.0, shape=[20]))
19 |
20 | # weights and bias of the output layer
21 | weights_2 = tf.Variable(tf.truncated_normal([20, env.action_space.n], stddev=0.01))
22 | bias_2 = tf.Variable(tf.constant(0.0, shape=[env.action_space.n]))
23 |
24 | hidden_layer = tf.nn.tanh(tf.matmul(input_placeholder, weights_1) + bias_1)
25 | output_layer = tf.matmul(hidden_layer, weights_2) + bias_2
26 |
27 | action_placeholder = tf.placeholder("float", [None, 2])
28 | target_placeholder = tf.placeholder("float", [None])
29 |
30 | # network estimation
31 | q_estimation = tf.reduce_sum(tf.multiply(output_layer, action_placeholder), reduction_indices=1)
32 |
33 | # loss function
34 | loss = tf.reduce_mean(tf.square(target_placeholder - q_estimation))
35 |
36 | # Use Adam
37 | train_operation = tf.train.AdamOptimizer().minimize(loss)
38 |
39 | # initialize TF variables
40 | session = tf.Session()
41 | session.run(tf.global_variables_initializer())
42 |
43 |
44 | def choose_next_action(state, rand_action_prob):
45 | """
46 | Simplified e-greedy policy
47 | :param state: current state
48 | :param rand_action_prob: probability to select random action
49 | """
50 |
51 | new_action = np.zeros([env.action_space.n])
52 |
53 | if random.random() <= rand_action_prob:
54 | # choose an action randomly
55 | action_index = random.randrange(env.action_space.n)
56 | else:
57 | # choose an action given our state
58 | action_values = session.run(output_layer, feed_dict={input_placeholder: [state]})[0]
59 | # we will take the highest value action
60 | action_index = np.argmax(action_values)
61 |
62 | new_action[action_index] = 1
63 | return new_action
64 |
65 |
66 | def train(mini_batch):
67 | """
68 | Train the network on a single minibatch
69 | :param mini_batch: the mini-batch
70 | """
71 |
72 | last_state, last_action, reward, current_state, terminal = range(5)
73 |
74 | # get the batch variables
75 | previous_states = [d[last_state] for d in mini_batch]
76 | actions = [d[last_action] for d in mini_batch]
77 | rewards = [d[reward] for d in mini_batch]
78 | current_states = [d[current_state] for d in mini_batch]
79 | agents_expected_reward = []
80 |
81 | # this gives us the agents expected reward for each action we might take
82 | agents_reward_per_action = session.run(output_layer,
83 | feed_dict={input_placeholder: current_states})
84 | for i in range(len(mini_batch)):
85 | if mini_batch[i][terminal]:
86 | # this was a terminal frame so there is no future reward...
87 | agents_expected_reward.append(rewards[i])
88 | else:
89 | # otherwise compute expected reward
90 | discount_factor = 0.9
91 | agents_expected_reward.append(
92 | rewards[i] + discount_factor * np.max(agents_reward_per_action[i]))
93 |
94 | # learn that these actions in these states lead to this reward
95 | session.run(train_operation, feed_dict={
96 | input_placeholder: previous_states,
97 | action_placeholder: actions,
98 | target_placeholder: agents_expected_reward})
99 |
100 |
101 | def q_learning():
102 | """The Q-learning method"""
103 |
104 | episode_lengths = list()
105 |
106 | # Experience replay buffer and definition
107 | observations = deque(maxlen=200000)
108 |
109 | # Set the first action to nothing
110 | last_action = np.zeros(env.action_space.n)
111 | last_action[1] = 1
112 | last_state = env.reset()
113 |
114 | total_reward = 0
115 | episode = 1
116 |
117 | time_step = 0
118 |
119 | # Initial chance to select random action
120 | rand_action_prob = 1.0
121 |
122 | while episode <= 400:
123 | # render the cart pole on the screen
124 | # comment this for faster execution
125 | # env.render()
126 |
127 | # select action following the policy
128 | last_action = choose_next_action(last_state, rand_action_prob)
129 |
130 | # take action and receive new state and reward
131 | current_state, reward, terminal, info = env.step(np.argmax(last_action))
132 | total_reward += reward
133 |
134 | if terminal:
135 | reward = -1.
136 | episode_lengths.append(time_step)
137 |
138 | print("Episode: %s; Steps before fail: %s; Epsilon: %.2f reward %s" %
139 | (episode, time_step, rand_action_prob, total_reward))
140 | total_reward = 0
141 |
142 | # store the transition in previous_observations
143 | observations.append((last_state, last_action, reward, current_state, terminal))
144 |
145 | # only train if done observing
146 | min_experience_replay_size = 5000
147 | if len(observations) > min_experience_replay_size:
148 | # mini-batch of 128 from the experience replay observations
149 | mini_batch = random.sample(observations, 128)
150 |
151 | # train the network
152 | train(mini_batch)
153 |
154 | time_step += 1
155 |
156 | # reset the environment
157 | if terminal:
158 | last_state = env.reset()
159 | time_step = 0
160 | episode += 1
161 | else:
162 | last_state = current_state
163 |
164 | # gradually reduce the probability of a random action
165 | # starting from 1 and going to 0
166 | if rand_action_prob > 0 and len(observations) > min_experience_replay_size:
167 | rand_action_prob -= 1.0 / 15000
168 |
169 | # display episodes length
170 | plt.xlabel("Episode")
171 | plt.ylabel("Length (steps)")
172 | plt.plot(episode_lengths, label='Episode length')
173 | plt.show()
174 |
175 |
176 | q_learning()
177 |
--------------------------------------------------------------------------------
/Chapter09/chapter_09_001_ddqn.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import random
4 | import zlib
5 | from collections import deque
6 | from collections import namedtuple
7 |
8 | import gym
9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 | import tensorflow as tf
12 |
13 | resume = True # resume training from checpoint (if exists)
14 | CHECKPOINT_PATH = 'deep_q_breakout_path_8'
15 | MB_SIZE = 32 # mini batch size
16 | ER_BUFFER_SIZE = 1000000 # experience relay (ER) buffer size
17 | COMPRESS_ER = True # compress episodes in the EP buffer
18 | EXPLORE_STEPS = 1000000 # frames over which to anneal epsilon
19 | EPSILON_START = 1.0 # starting chance of an action being random
20 | EPSILON_END = 0.01 # final chance of an action being random
21 | STATE_FRAMES = 4 # number of frames to store in the state
22 | SAVE_EVERY_X_STEPS = 10000 # how often to save the model on the disk
23 | UPDATE_Q_NET_FREQ = 1 # how often to update the q network
24 | UPDATE_TARGET_NET_EVERY_X_STEPS = 10000 # copy the q-net weights to the target net
25 | DISCOUNT_FACTOR = 0.99 # discount factor
26 |
27 |
28 | def initialize():
29 | """Initialize the session, the networks, and the environment"""
30 | # Create environment
31 | env = gym.envs.make("BreakoutDeterministic-v4")
32 |
33 | tf.reset_default_graph()
34 |
35 | session = tf.Session()
36 |
37 | # Tracks the total nubmer of training steps
38 | tf.Variable(0, name='global_step', trainable=False)
39 |
40 | # Create q- and target- networks
41 | q_network = build_network("q_network")
42 | t_network = build_network("target_network")
43 |
44 | # create the operations to copy the q-net weights to the t-net
45 | q_net_weights = [t for t in tf.trainable_variables() if t.name.startswith(q_network.scope)]
46 | q_net_weights = sorted(q_net_weights, key=lambda v: v.name)
47 | t_net_weights = [t for t in tf.trainable_variables() if t.name.startswith(t_network.scope)]
48 | t_net_weights = sorted(t_net_weights, key=lambda v: v.name)
49 |
50 | t_net_updates = \
51 | [n2_v.assign(n1_v) for n1_v, n2_v in zip(q_net_weights, t_net_weights)]
52 |
53 | # pre-processor of game frames
54 | frame_proc = frame_preprocessor()
55 |
56 | optimizer = tf.train.AdamOptimizer(0.00025)
57 | # optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
58 |
59 | # training op
60 | train_op = optimizer.minimize(q_network.loss, global_step=tf.train.get_global_step())
61 |
62 | # restore checkpoint
63 | saver = tf.train.Saver()
64 |
65 | if not os.path.exists(CHECKPOINT_PATH):
66 | os.mkdir(CHECKPOINT_PATH)
67 |
68 | checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_PATH)
69 | if resume and checkpoint:
70 | session.run(tf.global_variables_initializer())
71 | session.run(tf.local_variables_initializer())
72 |
73 | print("\nRestoring checkpoint...")
74 | saver.restore(session, checkpoint.model_checkpoint_path)
75 | else:
76 | session.run(tf.global_variables_initializer())
77 | session.run(tf.local_variables_initializer())
78 |
79 | return session, \
80 | q_network, \
81 | t_network, \
82 | t_net_updates, \
83 | frame_proc, \
84 | saver, \
85 | train_op, \
86 | env
87 |
88 |
89 | def build_network(scope: str, input_size=84, num_actions=4):
90 | """Builds the network graph."""
91 |
92 | with tf.variable_scope(scope):
93 | # Our input are STATE_FRAMES greyscale frames of shape 84, 84 each
94 | input_placeholder = tf.placeholder(dtype=np.float32,
95 | shape=[None, input_size, input_size, STATE_FRAMES])
96 |
97 | normalized_input = tf.to_float(input_placeholder) / 255.0
98 |
99 | # action prediction
100 | action_placeholder = tf.placeholder(dtype=tf.int32, shape=[None])
101 |
102 | # target action
103 | target_placeholder = tf.placeholder(dtype=np.float32, shape=[None])
104 |
105 | # Convolutional layers
106 | conv_1 = tf.layers.conv2d(normalized_input, 32, 8, 4,
107 | activation=tf.nn.relu)
108 | conv_2 = tf.layers.conv2d(conv_1, 64, 4, 2,
109 | activation=tf.nn.relu)
110 | conv_3 = tf.layers.conv2d(conv_2, 64, 3, 1,
111 | activation=tf.nn.relu)
112 |
113 | # Fully connected layers
114 | flattened = tf.layers.flatten(conv_3)
115 | fc_1 = tf.layers.dense(flattened, 512,
116 | activation=tf.nn.relu)
117 |
118 | q_estimation = tf.layers.dense(fc_1, num_actions)
119 |
120 | # Get the predictions for the chosen actions only
121 | batch_size = tf.shape(normalized_input)[0]
122 | gather_indices = tf.range(batch_size) * tf.shape(q_estimation)[1] + action_placeholder
123 | action_predictions = tf.gather(tf.reshape(q_estimation, [-1]), gather_indices)
124 |
125 | # Calculate the loss
126 | # loss = tf.reduce_mean(tf.squared_difference(target_placeholder, action_predictions))
127 | loss = tf.losses.huber_loss(labels=target_placeholder, predictions=action_predictions, reduction=tf.losses.Reduction.MEAN)
128 |
129 | Network = namedtuple('Network',
130 | 'scope '
131 | 'input_placeholder '
132 | 'action_placeholder '
133 | 'target_placeholder '
134 | 'q_estimation '
135 | 'action_predictions '
136 | 'loss ')
137 |
138 | return Network(scope=scope,
139 | input_placeholder=input_placeholder,
140 | action_placeholder=action_placeholder,
141 | target_placeholder=target_placeholder,
142 | q_estimation=q_estimation,
143 | action_predictions=action_predictions,
144 | loss=loss)
145 |
146 |
147 | def choose_next_action(state, net, epsilon):
148 | """Epsilon-greedy policy"""
149 |
150 | # choose an action given our last state
151 | tmp = np.ones(env.action_space.n, dtype=float) * epsilon / env.action_space.n
152 | q_estimations = session.run(net.q_estimation,
153 | feed_dict={net.input_placeholder: np.reshape(state, (1,) + state.shape)})[0]
154 |
155 | tmp[np.argmax(q_estimations)] += (1.0 - epsilon)
156 |
157 | new_action = np.random.choice(np.arange(len(tmp)), p=tmp)
158 |
159 | return new_action
160 |
161 |
162 | def frame_preprocessor():
163 | """Pre-processing the input data"""
164 |
165 | with tf.variable_scope("frame_processor"):
166 | input_placeholder = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8)
167 | processed_frame = tf.image.rgb_to_grayscale(input_placeholder)
168 | processed_frame = tf.image.crop_to_bounding_box(processed_frame, 34, 0, 160, 160)
169 | processed_frame = tf.image.resize_images(
170 | processed_frame,
171 | [84, 84],
172 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
173 |
174 | processed_frame = tf.squeeze(processed_frame)
175 |
176 | FramePreprocessor = namedtuple('FramePreprocessor', 'input_placeholder processed_frame')
177 |
178 | return FramePreprocessor(
179 | input_placeholder=input_placeholder,
180 | processed_frame=processed_frame)
181 |
182 |
183 | def populate_experience_replay_buffer(buffer: deque, initial_buffer_size: int):
184 | """Initial population of the experience replay buffer"""
185 |
186 | # Initialize epsilon based on the current step
187 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS
188 | epsilon = max(EPSILON_END,
189 | EPSILON_START -
190 | session.run(tf.train.get_global_step()) * epsilon_step)
191 |
192 | # Populate the replay memory with initial experience
193 | state = env.reset()
194 | state = session.run(frame_proc.processed_frame,
195 | feed_dict={frame_proc.input_placeholder: state})
196 |
197 | state = np.stack([state] * STATE_FRAMES, axis=2)
198 |
199 | for i in range(initial_buffer_size):
200 |
201 | # Sample next state with the q_network
202 | action = choose_next_action(state, q_network, epsilon)
203 |
204 | # Perform one action step
205 | next_state, reward, terminal, info = env.step(action)
206 | next_state = session.run(frame_proc.processed_frame,
207 | feed_dict={frame_proc.input_placeholder: next_state})
208 |
209 | # Stack the game frames in a single array
210 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2)
211 |
212 | # Store the experience in ER
213 | if COMPRESS_ER:
214 | buffer.append(
215 | zlib.compress(
216 | pickle.dumps((state, action, reward, next_state, terminal), 2), 2))
217 | else:
218 | buffer.append((state, action, reward, next_state, terminal))
219 |
220 | # Set next state as current
221 | if terminal:
222 | state = env.reset()
223 | state = session.run(frame_proc.processed_frame,
224 | feed_dict={frame_proc.input_placeholder: state})
225 |
226 | state = np.stack([state] * STATE_FRAMES, axis=2)
227 | else:
228 | state = next_state
229 |
230 | print("\rExperience replay buffer: {} / {} initial ({} total)".format(
231 | len(buffer), initial_buffer_size, buffer.maxlen), end="")
232 |
233 |
234 | def plot_stats(stats):
235 | """Plot the stats"""
236 | plt.figure()
237 |
238 | plt.xlabel("Episode")
239 |
240 | # plot the rewards
241 | # rolling mean of 50
242 | cumsum = np.cumsum(np.insert(stats.rewards, 0, 0))
243 | rewards = (cumsum[50:] - cumsum[:-50]) / float(50)
244 |
245 | fig, ax1 = plt.subplots()
246 |
247 | color = 'tab:red'
248 |
249 | ax1.set_ylabel('Reward', color=color)
250 | ax1.plot(rewards, color=color)
251 | ax1.tick_params(axis='y', labelcolor=color)
252 |
253 | # plot the episode lengths
254 | # rolling mean of 50
255 | cumsum = np.cumsum(np.insert(stats.lengths, 0, 0))
256 | lengths = (cumsum[50:] - cumsum[:-50]) / float(50)
257 |
258 | ax2 = ax1.twinx()
259 |
260 | color = 'tab:blue'
261 | ax2.set_ylabel('Length', color=color)
262 | ax2.plot(lengths, color=color)
263 | ax2.tick_params(axis='y', labelcolor=color)
264 |
265 |
266 | def deep_q_learning():
267 | """The Q-learning training process"""
268 |
269 | # build experience replay
270 | observations = deque(maxlen=ER_BUFFER_SIZE)
271 |
272 | print("Populating replay memory...")
273 | populate_experience_replay_buffer(observations, 100000)
274 |
275 | # initialize statistics
276 | stats = namedtuple('Stats', 'rewards lengths')(rewards=list(), lengths=list())
277 | global_time = session.run(tf.train.get_global_step())
278 | time = 0
279 |
280 | episode = 1
281 |
282 | episode_reward = 0
283 | global_reward = 0
284 |
285 | # Start the training with an initial state
286 | state = env.reset()
287 | state = session.run(frame_proc.processed_frame,
288 | feed_dict={frame_proc.input_placeholder: state})
289 | state = np.stack([state] * STATE_FRAMES, axis=2)
290 |
291 | while True:
292 | # env.render()
293 |
294 | # Initialize epsilon based on the current step
295 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS
296 | epsilon = max(EPSILON_END, EPSILON_START - (global_time - 1) * epsilon_step)
297 |
298 | # Copy q-net weights to the target-net
299 | if global_time % UPDATE_TARGET_NET_EVERY_X_STEPS == 0:
300 | session.run(t_net_updates)
301 | print("\nCopied model parameters to target network.")
302 |
303 | # Sample next action
304 | action = choose_next_action(state, q_network, epsilon)
305 |
306 | # Perform one step with the selected action
307 | next_state, reward, terminal, info = env.step(action)
308 |
309 | # This is how we pre-process
310 | next_state = session.run(frame_proc.processed_frame,
311 | feed_dict={frame_proc.input_placeholder: next_state})
312 |
313 | # Stack the game frames in a single array
314 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2)
315 |
316 | # Store the experience in ER
317 | if COMPRESS_ER:
318 | observations.append(
319 | zlib.compress(pickle.dumps((state, action, reward, next_state, terminal), 2), 2))
320 | else:
321 | observations.append((state, action, reward, next_state, terminal))
322 |
323 | # Sample a mini-batch from the experience replay memory
324 | mini_batch = random.sample(observations, MB_SIZE)
325 | if COMPRESS_ER:
326 | mini_batch = [pickle.loads(zlib.decompress(comp_item)) for comp_item in mini_batch]
327 |
328 | states_batch, action_batch, reward_batch, next_states_batch, terminal_batch = \
329 | map(np.array, zip(*mini_batch))
330 |
331 | # Double Q-learning
332 | if global_time % UPDATE_Q_NET_FREQ == 0:
333 | # First predict the next q values with the q-network
334 | q_values_next = session.run(q_network.q_estimation,
335 | feed_dict={q_network.input_placeholder: next_states_batch})
336 |
337 | # The best action according to the q-network
338 | best_actions = np.argmax(q_values_next, axis=1)
339 |
340 | # Next, predict the next q values with the target-network
341 | q_values_next_target = session.run(t_network.q_estimation,
342 | feed_dict={t_network.input_placeholder: next_states_batch})
343 |
344 | # Calculate q values and targets
345 | # Use the t-network estimations
346 | # But with the best action, selected by the q-network (Double Q-learning)
347 | targets_batch = reward_batch + \
348 | np.invert(terminal_batch).astype(np.float32) * \
349 | DISCOUNT_FACTOR * \
350 | q_values_next_target[np.arange(MB_SIZE), best_actions]
351 |
352 | _, loss = session.run([train_op, q_network.loss],
353 | feed_dict={
354 | q_network.input_placeholder: states_batch,
355 | q_network.action_placeholder: action_batch,
356 | q_network.target_placeholder: targets_batch})
357 |
358 | episode_reward += reward
359 | global_reward += reward
360 | time += 1
361 | global_time += 1
362 |
363 | print("\rEpisode {}: "
364 | "time {:5}; "
365 | "reward {}; "
366 | "epsilon: {:.4f}; "
367 | "loss: {:.6f}; "
368 | "@ global step {} "
369 | "with total reward {}".format(
370 | episode,
371 | time,
372 | episode_reward,
373 | epsilon,
374 | loss,
375 | global_time,
376 | global_reward), end="")
377 |
378 | if terminal:
379 | # Episode end
380 |
381 | print()
382 |
383 | stats.rewards.append(int(episode_reward))
384 | stats.lengths.append(time)
385 |
386 | time = 0
387 | episode_reward = 0
388 | episode += 1
389 |
390 | state = env.reset()
391 | state = session.run(frame_proc.processed_frame,
392 | feed_dict={frame_proc.input_placeholder: state})
393 | state = np.stack([state] * STATE_FRAMES, axis=2)
394 | else:
395 | # Set next state as current
396 | state = next_state
397 |
398 | # save checkpoints for later
399 | if global_time % SAVE_EVERY_X_STEPS == 0:
400 | saver.save(session, CHECKPOINT_PATH + '/network',
401 | global_step=tf.train.get_global_step())
402 |
403 | # plot the results and save the figure
404 | plot_stats(stats)
405 |
406 | fig_file = CHECKPOINT_PATH + '/stats.png'
407 | if os.path.isfile(fig_file):
408 | os.remove(fig_file)
409 |
410 | plt.savefig(fig_file)
411 | plt.close()
412 |
413 | # save the stats
414 | with open(CHECKPOINT_PATH + '/stats.arr', 'wb') as f:
415 | pickle.dump((stats.rewards, stats.lengths), f)
416 |
417 |
418 | if __name__ == '__main__':
419 | session, q_network, t_network, t_net_updates, frame_proc, saver, train_op, env = \
420 | initialize()
421 | deep_q_learning()
422 |
--------------------------------------------------------------------------------
/Chapter09/chapter_09_001_dqn.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import random
4 | import zlib
5 | from collections import deque
6 | from collections import namedtuple
7 |
8 | import gym
9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 | import tensorflow as tf
12 |
13 | resume = True # resume training from checpoint (if exists)
14 | CHECKPOINT_PATH = 'deep_q_breakout_path_7'
15 | MB_SIZE = 32 # mini batch size
16 | ER_BUFFER_SIZE = 1000000 # experience relay (ER) buffer size
17 | COMPRESS_ER = True # compress episodes in the EP buffer
18 | EXPLORE_STEPS = 1000000 # frames over which to anneal epsilon
19 | EPSILON_START = 1.0 # starting chance of an action being random
20 | EPSILON_END = 0.1 # final chance of an action being random
21 | STATE_FRAMES = 4 # number of frames to store in the state
22 | SAVE_EVERY_X_STEPS = 10000 # how often to save the model on the disk
23 | UPDATE_Q_NET_FREQ = 1 # how often to update the q network
24 | UPDATE_TARGET_NET_EVERY_X_STEPS = 10000 # copy the q-net weights to the target net
25 | DISCOUNT_FACTOR = 0.99 # discount factor
26 |
27 |
28 | def initialize():
29 | """Initialize the session, the networks, and the environment"""
30 | # Create environment
31 | env = gym.envs.make("BreakoutDeterministic-v4")
32 |
33 | tf.reset_default_graph()
34 |
35 | session = tf.Session()
36 |
37 | # Tracks the total nubmer of training steps
38 | tf.Variable(0, name='global_step', trainable=False)
39 |
40 | # Create q- and target- networks
41 | q_network = build_network("q_network")
42 | t_network = build_network("target_network")
43 |
44 | # create the operations to copy the q-net weights to the t-net
45 | q_net_weights = [t for t in tf.trainable_variables()
46 | if t.name.startswith(q_network.scope)]
47 | q_net_weights = sorted(q_net_weights, key=lambda v: v.name)
48 | t_net_weights = [t for t in tf.trainable_variables()
49 | if t.name.startswith(t_network.scope)]
50 | t_net_weights = sorted(t_net_weights, key=lambda v: v.name)
51 |
52 | t_net_updates = \
53 | [n2_v.assign(n1_v) for n1_v, n2_v in zip(q_net_weights, t_net_weights)]
54 |
55 | # pre-processor of game frames
56 | frame_proc = frame_preprocessor()
57 |
58 | optimizer = tf.train.AdamOptimizer(0.00025)
59 | # optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
60 |
61 | # training op
62 | train_op = optimizer.minimize(q_network.loss, global_step=tf.train.get_global_step())
63 |
64 | # restore checkpoint
65 | saver = tf.train.Saver()
66 |
67 | if not os.path.exists(CHECKPOINT_PATH):
68 | os.mkdir(CHECKPOINT_PATH)
69 |
70 | checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_PATH)
71 | if resume and checkpoint:
72 | session.run(tf.global_variables_initializer())
73 | session.run(tf.local_variables_initializer())
74 |
75 | print("\nRestoring checkpoint...")
76 | saver.restore(session, checkpoint.model_checkpoint_path)
77 | else:
78 | session.run(tf.global_variables_initializer())
79 | session.run(tf.local_variables_initializer())
80 |
81 | return session, \
82 | q_network, \
83 | t_network, \
84 | t_net_updates, \
85 | frame_proc, \
86 | saver, \
87 | train_op, \
88 | env
89 |
90 |
91 | def build_network(scope: str, input_size=84, num_actions=4):
92 | """Builds the network graph."""
93 |
94 | with tf.variable_scope(scope):
95 | # Our input are STATE_FRAMES greyscale frames of shape 84, 84 each
96 | input_placeholder = tf.placeholder(dtype=np.float32,
97 | shape=[None, input_size, input_size, STATE_FRAMES])
98 |
99 | normalized_input = tf.to_float(input_placeholder) / 255.0
100 |
101 | # action prediction
102 | action_placeholder = tf.placeholder(dtype=tf.int32, shape=[None])
103 |
104 | # target action
105 | target_placeholder = tf.placeholder(dtype=np.float32, shape=[None])
106 |
107 | # Convolutional layers
108 | conv_1 = tf.layers.conv2d(normalized_input, 32, 8, 4,
109 | activation=tf.nn.relu)
110 | conv_2 = tf.layers.conv2d(conv_1, 64, 4, 2,
111 | activation=tf.nn.relu)
112 | conv_3 = tf.layers.conv2d(conv_2, 64, 3, 1,
113 | activation=tf.nn.relu)
114 |
115 | # Fully connected layers
116 | flattened = tf.layers.flatten(conv_3)
117 | fc_1 = tf.layers.dense(flattened, 512,
118 | activation=tf.nn.relu)
119 |
120 | q_estimation = tf.layers.dense(fc_1, num_actions)
121 |
122 | # Get the predictions for the chosen actions only
123 | batch_size = tf.shape(normalized_input)[0]
124 | gather_indices = tf.range(batch_size) * tf.shape(q_estimation)[1] + action_placeholder
125 | action_predictions = tf.gather(tf.reshape(q_estimation, [-1]), gather_indices)
126 |
127 | # Calculate the loss
128 | # loss = tf.reduce_mean(tf.squared_difference(target_placeholder, action_predictions))
129 | loss = tf.losses.huber_loss(labels=target_placeholder,
130 | predictions=action_predictions,
131 | reduction=tf.losses.Reduction.MEAN)
132 |
133 | Network = namedtuple('Network',
134 | 'scope '
135 | 'input_placeholder '
136 | 'action_placeholder '
137 | 'target_placeholder '
138 | 'q_estimation '
139 | 'action_predictions '
140 | 'loss ')
141 |
142 | return Network(scope=scope,
143 | input_placeholder=input_placeholder,
144 | action_placeholder=action_placeholder,
145 | target_placeholder=target_placeholder,
146 | q_estimation=q_estimation,
147 | action_predictions=action_predictions,
148 | loss=loss)
149 |
150 |
151 | def choose_next_action(state, net, epsilon):
152 | """Epsilon-greedy policy"""
153 |
154 | # choose an action given our last state
155 | tmp = np.ones(env.action_space.n, dtype=float) * epsilon / env.action_space.n
156 | q_estimations = session.run(net.q_estimation,
157 | feed_dict={net.input_placeholder: np.reshape(state, (1,) + state.shape)})[0]
158 |
159 | tmp[np.argmax(q_estimations)] += (1.0 - epsilon)
160 |
161 | new_action = np.random.choice(np.arange(len(tmp)), p=tmp)
162 |
163 | return new_action
164 |
165 |
166 | def frame_preprocessor():
167 | """Pre-processing the input data"""
168 |
169 | with tf.variable_scope("frame_processor"):
170 | input_placeholder = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8)
171 | processed_frame = tf.image.rgb_to_grayscale(input_placeholder)
172 | processed_frame = tf.image.crop_to_bounding_box(processed_frame, 34, 0, 160, 160)
173 | processed_frame = tf.image.resize_images(
174 | processed_frame,
175 | [84, 84],
176 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
177 |
178 | processed_frame = tf.squeeze(processed_frame)
179 |
180 | FramePreprocessor = namedtuple('FramePreprocessor', 'input_placeholder processed_frame')
181 |
182 | return FramePreprocessor(
183 | input_placeholder=input_placeholder,
184 | processed_frame=processed_frame)
185 |
186 |
187 | def populate_experience_replay_buffer(buffer: deque, initial_buffer_size: int):
188 | """Initial population of the experience replay buffer"""
189 |
190 | # Initialize epsilon based on the current step
191 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS
192 | epsilon = max(EPSILON_END,
193 | EPSILON_START -
194 | session.run(tf.train.get_global_step()) * epsilon_step)
195 |
196 | # Populate the replay memory with initial experience
197 | state = env.reset()
198 | state = session.run(frame_proc.processed_frame,
199 | feed_dict={frame_proc.input_placeholder: state})
200 |
201 | state = np.stack([state] * STATE_FRAMES, axis=2)
202 |
203 | for i in range(initial_buffer_size):
204 |
205 | # Sample next state with the q_network
206 | action = choose_next_action(state, q_network, epsilon)
207 |
208 | # Perform one action step
209 | next_state, reward, terminal, info = env.step(action)
210 | next_state = session.run(frame_proc.processed_frame,
211 | feed_dict={frame_proc.input_placeholder: next_state})
212 |
213 | # Stack the game frames in a single array
214 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2)
215 |
216 | # Store the experience in ER
217 | if COMPRESS_ER:
218 | buffer.append(
219 | zlib.compress(
220 | pickle.dumps((state, action, reward, next_state, terminal), 2), 2))
221 | else:
222 | buffer.append((state, action, reward, next_state, terminal))
223 |
224 | # Set next state as current
225 | if terminal:
226 | state = env.reset()
227 | state = session.run(frame_proc.processed_frame,
228 | feed_dict={frame_proc.input_placeholder: state})
229 |
230 | state = np.stack([state] * STATE_FRAMES, axis=2)
231 | else:
232 | state = next_state
233 |
234 | print("\rExperience replay buffer: {} / {} initial ({} total)".format(
235 | len(buffer), initial_buffer_size, buffer.maxlen), end="")
236 |
237 |
238 | def plot_stats(stats):
239 | """Plot the stats"""
240 | plt.figure()
241 |
242 | plt.xlabel("Episode")
243 |
244 | # plot the rewards
245 | # rolling mean of 50
246 | cumsum = np.cumsum(np.insert(stats.rewards, 0, 0))
247 | rewards = (cumsum[50:] - cumsum[:-50]) / float(50)
248 |
249 | fig, ax1 = plt.subplots()
250 |
251 | color = 'tab:red'
252 |
253 | ax1.set_ylabel('Reward', color=color)
254 | ax1.plot(rewards, color=color)
255 | ax1.tick_params(axis='y', labelcolor=color)
256 |
257 | # plot the episode lengths
258 | # rolling mean of 50
259 | cumsum = np.cumsum(np.insert(stats.lengths, 0, 0))
260 | lengths = (cumsum[50:] - cumsum[:-50]) / float(50)
261 |
262 | ax2 = ax1.twinx()
263 |
264 | color = 'tab:blue'
265 | ax2.set_ylabel('Length', color=color)
266 | ax2.plot(lengths, color=color)
267 | ax2.tick_params(axis='y', labelcolor=color)
268 |
269 |
270 | def deep_q_learning():
271 | """The Q-learning training process"""
272 |
273 | # build experience replay
274 | observations = deque(maxlen=ER_BUFFER_SIZE)
275 |
276 | print("Populating replay memory...")
277 | populate_experience_replay_buffer(observations, 100000)
278 |
279 | # initialize statistics
280 | stats = namedtuple('Stats', 'rewards lengths')(rewards=list(), lengths=list())
281 | global_time = session.run(tf.train.get_global_step())
282 | time = 0
283 |
284 | episode = 1
285 |
286 | episode_reward = 0
287 | global_reward = 0
288 |
289 | # Start the training with an initial state
290 | state = env.reset()
291 | state = session.run(frame_proc.processed_frame,
292 | feed_dict={frame_proc.input_placeholder: state})
293 | state = np.stack([state] * STATE_FRAMES, axis=2)
294 |
295 | while True:
296 | # env.render()
297 |
298 | # Initialize epsilon based on the current step
299 | epsilon_step = (EPSILON_START - EPSILON_END) / EXPLORE_STEPS
300 | epsilon = max(EPSILON_END, EPSILON_START - (global_time - 1) * epsilon_step)
301 |
302 | # Copy q-net weights to the target-net
303 | if global_time % UPDATE_TARGET_NET_EVERY_X_STEPS == 0:
304 | session.run(t_net_updates)
305 | print("\nCopied model parameters to target network.")
306 |
307 | # Sample next action
308 | action = choose_next_action(state, q_network, epsilon)
309 |
310 | # Perform one step with the selected action
311 | next_state, reward, terminal, info = env.step(action)
312 |
313 | # This is how we pre-process
314 | next_state = session.run(frame_proc.processed_frame,
315 | feed_dict={frame_proc.input_placeholder: next_state})
316 |
317 | # Stack the game frames in a single array
318 | next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2)
319 |
320 | # Store the experience in ER
321 | if COMPRESS_ER:
322 | observations.append(
323 | zlib.compress(pickle.dumps((state, action, reward, next_state, terminal), 2), 2))
324 | else:
325 | observations.append((state, action, reward, next_state, terminal))
326 |
327 | # Sample a mini-batch from the experience replay memory
328 | mini_batch = random.sample(observations, MB_SIZE)
329 | if COMPRESS_ER:
330 | mini_batch = [pickle.loads(zlib.decompress(comp_item)) for comp_item in mini_batch]
331 |
332 | states_batch, action_batch, reward_batch, next_states_batch, terminal_batch = \
333 | map(np.array, zip(*mini_batch))
334 |
335 | if global_time % UPDATE_Q_NET_FREQ == 0:
336 | # Compute next q values using the target network
337 | q_values_next = session.run(t_network.q_estimation,
338 | feed_dict={t_network.input_placeholder: next_states_batch})
339 |
340 | # Calculate q values and targets
341 | targets_batch = reward_batch + \
342 | np.invert(terminal_batch).astype(np.float32) * \
343 | DISCOUNT_FACTOR * \
344 | np.amax(q_values_next, axis=1)
345 |
346 | # Perform gradient descent update
347 | states_batch = np.array(states_batch)
348 |
349 | _, loss = session.run([train_op, q_network.loss],
350 | feed_dict={
351 | q_network.input_placeholder: states_batch,
352 | q_network.action_placeholder: action_batch,
353 | q_network.target_placeholder: targets_batch})
354 |
355 | episode_reward += reward
356 | global_reward += reward
357 | time += 1
358 | global_time += 1
359 |
360 | print("\rEpisode {}: "
361 | "time {:5}; "
362 | "reward {}; "
363 | "epsilon: {:.4f}; "
364 | "loss: {:.6f}; "
365 | "@ global step {} "
366 | "with total reward {}".format(
367 | episode,
368 | time,
369 | episode_reward,
370 | epsilon,
371 | loss,
372 | global_time,
373 | global_reward), end="")
374 |
375 | if terminal:
376 | # Episode end
377 |
378 | print()
379 |
380 | stats.rewards.append(int(episode_reward))
381 | stats.lengths.append(time)
382 |
383 | time = 0
384 | episode_reward = 0
385 | episode += 1
386 |
387 | state = env.reset()
388 | state = session.run(frame_proc.processed_frame,
389 | feed_dict={frame_proc.input_placeholder: state})
390 | state = np.stack([state] * STATE_FRAMES, axis=2)
391 | else:
392 | # Set next state as current
393 | state = next_state
394 |
395 | # save checkpoints for later
396 | if global_time % SAVE_EVERY_X_STEPS == 0:
397 | saver.save(session, CHECKPOINT_PATH + '/network',
398 | global_step=tf.train.get_global_step())
399 |
400 | # plot the results and save the figure
401 | plot_stats(stats)
402 |
403 | fig_file = CHECKPOINT_PATH + '/stats.png'
404 | if os.path.isfile(fig_file):
405 | os.remove(fig_file)
406 |
407 | plt.savefig(fig_file)
408 | plt.close()
409 |
410 | # save the stats
411 | with open(CHECKPOINT_PATH + '/stats.arr', 'wb') as f:
412 | pickle.dump((stats.rewards, stats.lengths), f)
413 |
414 |
415 | if __name__ == '__main__':
416 | session, q_network, t_network, t_net_updates, frame_proc, saver, train_op, env = \
417 | initialize()
418 | deep_q_learning()
419 |
--------------------------------------------------------------------------------
/Chapter09/chapter_09_002_a2c.py:
--------------------------------------------------------------------------------
1 | # note must import tensorflow before gym
2 | from collections import deque
3 |
4 | import gym
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | env = gym.make('CartPole-v0')
9 |
10 | ACTIONS_COUNT = 2
11 | FUTURE_REWARD_DISCOUNT = 0.9
12 | LEARN_RATE_ACTOR = 0.01
13 | LEARN_RATE_CRITIC = 0.01
14 | STORE_SCORES_LEN = 5
15 | GAMES_PER_TRAINING = 3
16 | INPUT_NODES = env.observation_space.shape[0]
17 |
18 | ACTOR_HIDDEN = 20
19 | CRITIC_HIDDEN = 20
20 |
21 | session = tf.Session()
22 |
23 | actor_feed_forward_weights_1 = tf.Variable(tf.truncated_normal([INPUT_NODES, ACTOR_HIDDEN], stddev=0.01))
24 | actor_feed_forward_bias_1 = tf.Variable(tf.constant(0.0, shape=[ACTOR_HIDDEN]))
25 |
26 | actor_feed_forward_weights_2 = tf.Variable(tf.truncated_normal([ACTOR_HIDDEN, ACTIONS_COUNT], stddev=0.01))
27 | actor_feed_forward_bias_2 = tf.Variable(tf.constant(0.1, shape=[ACTIONS_COUNT]))
28 |
29 | actor_input_placeholder = tf.placeholder("float", [None, INPUT_NODES])
30 | actor_hidden_layer = tf.nn.tanh(
31 | tf.matmul(actor_input_placeholder, actor_feed_forward_weights_1) + actor_feed_forward_bias_1)
32 | actor_output_layer = tf.nn.softmax(
33 | tf.matmul(actor_hidden_layer, actor_feed_forward_weights_2) + actor_feed_forward_bias_2)
34 |
35 | actor_action_placeholder = tf.placeholder("float", [None, ACTIONS_COUNT])
36 | actor_advantage_placeholder = tf.placeholder("float", [None, 1])
37 |
38 | policy_gradient = tf.reduce_mean(actor_advantage_placeholder * actor_action_placeholder * tf.log(actor_output_layer))
39 | actor_train_operation = tf.train.AdamOptimizer(LEARN_RATE_ACTOR).minimize(-policy_gradient)
40 |
41 | critic_feed_forward_weights_1 = tf.Variable(tf.truncated_normal([INPUT_NODES, CRITIC_HIDDEN], stddev=0.01))
42 | critic_feed_forward_bias_1 = tf.Variable(tf.constant(0.0, shape=[CRITIC_HIDDEN]))
43 |
44 | critic_feed_forward_weights_2 = tf.Variable(tf.truncated_normal([CRITIC_HIDDEN, 1], stddev=0.01))
45 | critic_feed_forward_bias_2 = tf.Variable(tf.constant(0.0, shape=[1]))
46 |
47 | critic_input_placeholder = tf.placeholder("float", [None, INPUT_NODES])
48 | critic_hidden_layer = tf.nn.tanh(
49 | tf.matmul(critic_input_placeholder, critic_feed_forward_weights_1) + critic_feed_forward_bias_1)
50 | critic_output_layer = tf.matmul(critic_hidden_layer, critic_feed_forward_weights_2) + critic_feed_forward_bias_2
51 |
52 | critic_target_placeholder = tf.placeholder("float", [None, 1])
53 |
54 | critic_cost = tf.reduce_mean(tf.square(critic_target_placeholder - critic_output_layer))
55 | critic_train_operation = tf.train.AdamOptimizer(LEARN_RATE_CRITIC).minimize(critic_cost)
56 |
57 | critic_baseline = critic_target_placeholder - critic_output_layer
58 |
59 | scores = deque(maxlen=STORE_SCORES_LEN)
60 |
61 | # set the first action to do nothing
62 | last_action = np.zeros(ACTIONS_COUNT)
63 | last_action[1] = 1
64 |
65 | time = 0
66 |
67 | session.run(tf.initialize_all_variables())
68 |
69 |
70 | def choose_next_action(state):
71 | probability_of_actions = session.run(actor_output_layer, feed_dict={actor_input_placeholder: [state]})[0]
72 | try:
73 | move = np.random.multinomial(1, probability_of_actions)
74 | except ValueError:
75 | # sometimes because of rounding errors we end up with probability_of_actions summing to greater than 1.
76 | # so need to reduce slightly to be a valid value
77 | move = np.random.multinomial(1, probability_of_actions / (sum(probability_of_actions) + 1e-6))
78 | return move
79 |
80 |
81 | def train(states, actions_taken, advantages):
82 | # learn that these actions in these states lead to this reward
83 | session.run(actor_train_operation, feed_dict={
84 | actor_input_placeholder: states,
85 | actor_action_placeholder: actions_taken,
86 | actor_advantage_placeholder: advantages})
87 |
88 |
89 | last_state = env.reset()
90 | total_reward = 0
91 | current_game_observations = []
92 | current_game_rewards = []
93 | current_game_actions = []
94 |
95 | episode_observation = []
96 | episode_rewards = []
97 | episode_actions = []
98 | games = 0
99 | plot_x = []
100 | plot_y = []
101 |
102 | critic_costs = deque(maxlen=10)
103 |
104 | while True:
105 | env.render()
106 | last_action = choose_next_action(last_state)
107 | current_state, reward, terminal, info = env.step(np.argmax(last_action))
108 | total_reward += reward
109 |
110 | if terminal:
111 | reward = -.10
112 | else:
113 | reward = 0.1
114 |
115 | current_game_observations.append(last_state)
116 | current_game_rewards.append(reward)
117 | current_game_actions.append(last_action)
118 |
119 | if terminal:
120 | games += 1
121 | scores.append(total_reward)
122 |
123 | if games % STORE_SCORES_LEN == 0:
124 | plot_x.append(games)
125 | plot_y.append(np.mean(scores))
126 |
127 | # get temporal difference values for critic
128 | cumulative_reward = 0
129 | for i in reversed(range(len(current_game_observations))):
130 | cumulative_reward = current_game_rewards[i] + FUTURE_REWARD_DISCOUNT * cumulative_reward
131 | current_game_rewards[i] = [cumulative_reward]
132 |
133 | values_t = session.run(critic_output_layer, {
134 | critic_input_placeholder: current_game_observations})
135 | advantages = []
136 |
137 | for i in range(len(current_game_observations) - 1):
138 | advantages.append([current_game_rewards[i][0] + FUTURE_REWARD_DISCOUNT * values_t[i + 1][0] - values_t[i][0]])
139 |
140 | advantages.append([current_game_rewards[-1][0] - values_t[-1][0]])
141 |
142 | _, cost = session.run([critic_train_operation, critic_cost], {
143 | critic_input_placeholder: current_game_observations,
144 | critic_target_placeholder: current_game_rewards})
145 |
146 | critic_costs.append(cost)
147 |
148 | print("Game: %s reward %s average scores %s critic cost %s" %
149 | (games, total_reward,
150 | np.mean(scores), np.mean(critic_costs)))
151 |
152 | episode_observation.extend(current_game_observations)
153 | episode_actions.extend(current_game_actions)
154 | episode_rewards.extend(advantages)
155 |
156 | total_reward = 0
157 | current_game_observations = []
158 | current_game_rewards = []
159 | current_game_actions = []
160 |
161 | if games % GAMES_PER_TRAINING == 0:
162 | episode_rewards = np.array(episode_rewards)
163 | normalized_rewards = episode_rewards - np.mean(episode_rewards)
164 | normalized_rewards /= np.std(normalized_rewards)
165 |
166 | train(episode_observation, episode_actions, normalized_rewards)
167 |
168 | episode_observation = []
169 | episode_actions = []
170 | episode_rewards = []
171 |
172 | time += 1
173 |
174 | # update the old values
175 | if terminal:
176 | last_state = env.reset()
177 | else:
178 | last_state = current_state
179 |
--------------------------------------------------------------------------------
/Chapter10/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Python-Deep-Learning-Second-Edition/a44db1a21d101009dd610ad3adafba475a648f0e/Chapter10/__init__.py
--------------------------------------------------------------------------------
/Chapter10/data/data.gzip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Python-Deep-Learning-Second-Edition/a44db1a21d101009dd610ad3adafba475a648f0e/Chapter10/data/data.gzip
--------------------------------------------------------------------------------
/Chapter10/data/model.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Python-Deep-Learning-Second-Edition/a44db1a21d101009dd610ad3adafba475a648f0e/Chapter10/data/model.pt
--------------------------------------------------------------------------------
/Chapter10/keyboard_agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import print_function
3 |
4 | import gzip
5 | import os
6 | import pickle
7 | import time
8 |
9 | import gym
10 | import numpy as np
11 |
12 | from util import DATA_DIR, DATA_FILE
13 |
14 |
15 | def key_press(key, mod):
16 | global human_agent_action, human_wants_restart, human_wants_exit, human_sets_pause, acceleration
17 | if key == 0xff0d: # enter
18 | human_wants_restart = True
19 |
20 | if key == 0xff1b: # escape
21 | human_wants_exit = True
22 |
23 | if key == 0x020: # space
24 | human_sets_pause = not human_sets_pause
25 |
26 | if key == 0xff52: # up
27 | acceleration = True
28 | human_agent_action[1] = 1.0
29 | human_agent_action[2] = 0
30 | if key == 0xff54: # down
31 | human_agent_action[2] = 1 # stronger brakes
32 |
33 | if key == 0xff51: # left
34 | human_agent_action[0] = -1.0
35 |
36 | # no acceleration while turning
37 | human_agent_action[1] = 0.0
38 |
39 | if key == 0xff53: # right
40 | human_agent_action[0] = +1.0
41 |
42 | # no acceleration when turning
43 | human_agent_action[1] = 0.0
44 |
45 |
46 | def key_release(key, mod):
47 | global human_agent_action, acceleration
48 | if key == 0xff52: # up
49 | acceleration = False
50 | human_agent_action[1] = 0.0
51 |
52 | if key == 0xff54: # down
53 | human_agent_action[2] = 0.0
54 |
55 | if key == 0xff51: # left
56 | human_agent_action[0] = 0
57 |
58 | # restore acceleration
59 | human_agent_action[1] = acceleration
60 |
61 | if key == 0xff53: # right
62 | human_agent_action[0] = 0
63 |
64 | # restore acceleration
65 | human_agent_action[1] = acceleration
66 |
67 |
68 | def rollout(env):
69 | global human_wants_restart, human_agent_action, human_wants_exit, human_sets_pause
70 |
71 | ACTIONS = env.action_space.shape[0]
72 | human_agent_action = np.zeros(ACTIONS, dtype=np.float32)
73 | human_wants_exit = False
74 | human_sets_pause = False
75 |
76 | human_wants_restart = False
77 |
78 | # if the file exists, append
79 | if os.path.exists(os.path.join(DATA_DIR, DATA_FILE)):
80 | with gzip.open(os.path.join(DATA_DIR, DATA_FILE), 'rb') as f:
81 | observations = pickle.load(f)
82 | else:
83 | observations = list()
84 |
85 | state = env.reset()
86 | total_reward = 0
87 | total_timesteps = 0
88 | episode = 1
89 | while 1:
90 | env.render()
91 |
92 | a = np.copy(human_agent_action)
93 |
94 | old_state = state
95 |
96 | if human_agent_action[2] != 0:
97 | human_agent_action[2] = 0.1
98 |
99 | state, r, terminal, info = env.step(human_agent_action)
100 |
101 | observations.append((old_state, a, state, r, terminal))
102 |
103 | total_reward += r
104 |
105 | if human_wants_exit:
106 | env.close()
107 | return
108 |
109 | if human_wants_restart:
110 | human_wants_restart = False
111 | state = env.reset()
112 | continue
113 |
114 | if terminal:
115 | if episode % 5 == 0:
116 | # store generated data
117 | data_file_path = os.path.join(DATA_DIR, DATA_FILE)
118 | print("Saving observations to " + data_file_path)
119 |
120 | if not os.path.exists(DATA_DIR):
121 | os.mkdir(DATA_DIR)
122 |
123 | with gzip.open(data_file_path, 'wb') as f:
124 | pickle.dump(observations, f)
125 |
126 | print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
127 |
128 | episode += 1
129 |
130 | state = env.reset()
131 |
132 | while human_sets_pause:
133 | env.render()
134 | time.sleep(0.1)
135 |
136 |
137 | if __name__ == '__main__':
138 | env = gym.make('CarRacing-v0')
139 |
140 | env.render()
141 | env.unwrapped.viewer.window.on_key_press = key_press
142 | env.unwrapped.viewer.window.on_key_release = key_release
143 |
144 | print("ACTIONS={}".format(env.action_space.shape[0]))
145 | print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
146 | print("No keys pressed is taking action 0")
147 |
148 | rollout(env)
149 |
--------------------------------------------------------------------------------
/Chapter10/main.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 |
5 | from nn_agent import nn_agent_play
6 | from train import \
7 | DATA_DIR, \
8 | MODEL_FILE, \
9 | build_network, \
10 | train
11 |
12 | if __name__ == '__main__':
13 | # create cuda device
14 | dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
15 |
16 | # create the network
17 | model = build_network()
18 |
19 | # if true, try to restore the network from the data file
20 | restore = False
21 | if restore:
22 | model_path = os.path.join(DATA_DIR, MODEL_FILE)
23 | model.load_state_dict(torch.load(model_path))
24 |
25 | # set the model to evaluation (and not training) mode
26 | model.eval()
27 |
28 | # transfer to the gpu
29 | model = model.to(dev)
30 |
31 | # train
32 | train(model, dev)
33 |
34 | # agent play
35 | nn_agent_play(model, dev)
36 |
--------------------------------------------------------------------------------
/Chapter10/nn_agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import print_function
3 |
4 | import os
5 |
6 | import gym
7 | import numpy as np
8 | import torch
9 |
10 | from train \
11 | import \
12 | data_transform, \
13 | available_actions, \
14 | build_network, \
15 | DATA_DIR, MODEL_FILE
16 |
17 |
18 | def nn_agent_play(model, device):
19 | """
20 | Let the agent play
21 | :param model: the network
22 | :param device: the cuda device
23 | """
24 |
25 | env = gym.make('CarRacing-v0')
26 |
27 | # use ESC to exit
28 | global human_wants_exit
29 | human_wants_exit = False
30 |
31 | def key_press(key, mod):
32 | """Capture ESC key"""
33 | global human_wants_exit
34 | if key == 0xff1b: # escape
35 | human_wants_exit = True
36 |
37 | # initialize environment
38 | state = env.reset()
39 | env.unwrapped.viewer.window.on_key_press = key_press
40 |
41 | while 1:
42 | env.render()
43 |
44 | state = np.moveaxis(state, 2, 0) # channel first image
45 |
46 | # numpy to tensor
47 | state = torch.from_numpy(np.flip(state, axis=0).copy())
48 | state = data_transform(state) # apply transformations
49 | state = state.unsqueeze(0) # add additional dimension
50 | state = state.to(device) # transfer to GPU
51 |
52 | # forward
53 | with torch.set_grad_enabled(False):
54 | outputs = model(state)
55 |
56 | normalized = torch.nn.functional.softmax(outputs, dim=1)
57 |
58 | # translate from net output to env action
59 | max_action = np.argmax(normalized.cpu().numpy()[0])
60 | action = available_actions[max_action]
61 |
62 | # adjust brake power
63 | if action[2] != 0:
64 | action[2] = 0.3
65 |
66 | state, _, terminal, _ = env.step(action) # one step
67 |
68 | if terminal:
69 | state = env.reset()
70 |
71 | if human_wants_exit:
72 | env.close()
73 | return
74 |
75 |
76 | if __name__ == '__main__':
77 | dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
78 | m = build_network()
79 | m.load_state_dict(torch.load(os.path.join(DATA_DIR, MODEL_FILE)))
80 | m.eval()
81 | m = m.to(dev)
82 | nn_agent_play(m, dev)
83 |
--------------------------------------------------------------------------------
/Chapter10/train.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import os
3 | import pickle
4 | import random
5 |
6 | import numpy as np
7 | import torch
8 | import torch.nn as nn
9 | import torch.optim as optim
10 |
11 | from util import \
12 | available_actions, \
13 | data_transform, \
14 | DATA_DIR, \
15 | DATA_FILE, \
16 | MODEL_FILE
17 |
18 | restore = False # restore from file if exists
19 | BATCH_SIZE = 32 # mb size
20 | EPOCHS = 30 # number of epochs
21 | TRAIN_VAL_SPLIT = 0.85 # train/val ratio
22 |
23 | # balance the dataset by multiplying rare events
24 | MULTIPLY_RARE_EVENTS = 20
25 |
26 |
27 | def read_data():
28 | """Read the data generated by keyboard_agent.py"""
29 | with gzip.open(os.path.join(DATA_DIR, DATA_FILE), 'rb') as f:
30 | data = pickle.load(f)
31 |
32 | # balance dataset by multiplying
33 | # brake, right+brake, left+brake events
34 | # since they are too few
35 | if MULTIPLY_RARE_EVENTS > 1:
36 | data_copy = data.copy()
37 | for d in data:
38 | for a in ([[-1, 0, 1], [1, 0, 1], [0, 0, 1]]):
39 | if np.array_equal(d[1], a):
40 | data_copy += (d,) * MULTIPLY_RARE_EVENTS
41 |
42 | data = data_copy
43 |
44 | random.shuffle(data)
45 |
46 | # to numpy arrays
47 | states, actions, _, _, _ = map(np.array, zip(*data))
48 |
49 | # reverse one-hot, actions to classes
50 | act_classes = np.full((len(actions)), -1, dtype=np.int)
51 | for i, a in enumerate(available_actions):
52 | act_classes[np.all(actions == a, axis=1)] = i
53 |
54 | # drop unsupported actions
55 | states = np.array(states)
56 | states = states[act_classes != -1]
57 | act_classes = act_classes[act_classes != -1]
58 |
59 | # drop some of the acceleration actions to balance the dataset
60 | non_accel = act_classes != available_actions.index([0, 1, 0])
61 | drop_mask = np.random.rand(act_classes[~non_accel].size) > 0.7
62 | non_accel[~non_accel] = drop_mask
63 | states = states[non_accel]
64 | act_classes = act_classes[non_accel]
65 |
66 | # drop some of the non-action actions to balance the dataset
67 | non_act = act_classes != available_actions.index([0, 0, 0])
68 | drop_mask = np.random.rand(act_classes[~non_act].size) > 0.3
69 | non_act[~non_act] = drop_mask
70 | states = states[non_act]
71 | act_classes = act_classes[non_act]
72 |
73 | for i, a in enumerate(available_actions):
74 | print("Actions of type {}: {}"
75 | .format(str(a), str(act_classes[act_classes == i].size)))
76 |
77 | print("Total transitions: " + str(len(act_classes)))
78 |
79 | return states, act_classes
80 |
81 |
82 | def create_datasets():
83 | """Create training and validation datasets"""
84 |
85 | class TensorDatasetTransforms(torch.utils.data.TensorDataset):
86 | """
87 | Helper class to allow transformations
88 | by default TensorDataset doesn't support them
89 | """
90 |
91 | def __init__(self, x, y):
92 | super().__init__(x, y)
93 |
94 | def __getitem__(self, index):
95 | tensor = data_transform(self.tensors[0][index])
96 | return (tensor,) + tuple(t[index] for t in self.tensors[1:])
97 |
98 | x, y = read_data()
99 | x = np.moveaxis(x, 3, 1) # channel first (torch requirement)
100 |
101 | # train dataset
102 | x_train = x[:int(len(x) * TRAIN_VAL_SPLIT)]
103 | y_train = y[:int(len(y) * TRAIN_VAL_SPLIT)]
104 |
105 | train_set = TensorDatasetTransforms(
106 | torch.tensor(x_train),
107 | torch.tensor(y_train))
108 |
109 | train_loader = torch.utils.data.DataLoader(train_set,
110 | batch_size=BATCH_SIZE,
111 | shuffle=True,
112 | num_workers=2)
113 |
114 | # test dataset
115 | x_val, y_val = x[int(len(x_train)):], y[int(len(y_train)):]
116 |
117 | val_set = TensorDatasetTransforms(
118 | torch.tensor(x_val),
119 | torch.tensor(y_val))
120 |
121 | val_loader = torch.utils.data.DataLoader(val_set,
122 | batch_size=BATCH_SIZE,
123 | shuffle=False,
124 | num_workers=2)
125 |
126 | return train_loader, val_loader
127 |
128 |
129 | def build_network():
130 | """Build the torch network"""
131 |
132 | class Flatten(nn.Module):
133 | """
134 | Helper class to flatten the tensor
135 | between the last conv and first fc layer
136 | """
137 |
138 | def forward(self, x):
139 | return x.view(x.size()[0], -1)
140 |
141 | # Same network as with the DQN example
142 | model = torch.nn.Sequential(
143 | torch.nn.Conv2d(1, 32, 8, 4),
144 | torch.nn.BatchNorm2d(32),
145 | torch.nn.ELU(),
146 | torch.nn.Dropout2d(0.5),
147 | torch.nn.Conv2d(32, 64, 4, 2),
148 | torch.nn.BatchNorm2d(64),
149 | torch.nn.ELU(),
150 | torch.nn.Dropout2d(0.5),
151 | torch.nn.Conv2d(64, 64, 3, 1),
152 | torch.nn.ELU(),
153 | Flatten(),
154 | torch.nn.BatchNorm1d(64 * 7 * 7),
155 | torch.nn.Dropout(),
156 | torch.nn.Linear(64 * 7 * 7, 120),
157 | torch.nn.ELU(),
158 | torch.nn.BatchNorm1d(120),
159 | torch.nn.Dropout(),
160 | torch.nn.Linear(120, len(available_actions)),
161 | )
162 |
163 | return model
164 |
165 |
166 | def train(model, device):
167 | """
168 | Training main method
169 | :param model: the network
170 | :param device: the cuda device
171 | """
172 |
173 | loss_function = nn.CrossEntropyLoss()
174 |
175 | optimizer = optim.Adam(model.parameters())
176 |
177 | train_loader, val_order = create_datasets() # read datasets
178 |
179 | # train
180 | for epoch in range(EPOCHS):
181 | print('Epoch {}/{}'.format(epoch + 1, EPOCHS))
182 |
183 | train_epoch(model,
184 | device,
185 | loss_function,
186 | optimizer,
187 | train_loader)
188 |
189 | test(model, device, loss_function, val_order)
190 |
191 | # save model
192 | model_path = os.path.join(DATA_DIR, MODEL_FILE)
193 | torch.save(model.state_dict(), model_path)
194 |
195 |
196 | def train_epoch(model, device, loss_function, optimizer, data_loader):
197 | """Train for a single epoch"""
198 |
199 | # set model to training mode
200 | model.train()
201 |
202 | current_loss = 0.0
203 | current_acc = 0
204 |
205 | # iterate over the training data
206 | for i, (inputs, labels) in enumerate(data_loader):
207 | # send the input/labels to the GPU
208 | inputs = inputs.to(device)
209 | labels = labels.to(device)
210 |
211 | # zero the parameter gradients
212 | optimizer.zero_grad()
213 |
214 | with torch.set_grad_enabled(True):
215 | # forward
216 | outputs = model(inputs)
217 | _, predictions = torch.max(outputs, 1)
218 | loss = loss_function(outputs, labels)
219 |
220 | # backward
221 | loss.backward()
222 | optimizer.step()
223 |
224 | # statistics
225 | current_loss += loss.item() * inputs.size(0)
226 | current_acc += torch.sum(predictions == labels.data)
227 |
228 | total_loss = current_loss / len(data_loader.dataset)
229 | total_acc = current_acc.double() / len(data_loader.dataset)
230 |
231 | print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc))
232 |
233 |
234 | def test(model, device, loss_function, data_loader):
235 | """Test over the whole dataset"""
236 |
237 | model.eval() # set model in evaluation mode
238 |
239 | current_loss = 0.0
240 | current_acc = 0
241 |
242 | # iterate over the validation data
243 | for i, (inputs, labels) in enumerate(data_loader):
244 | # send the input/labels to the GPU
245 | inputs = inputs.to(device)
246 | labels = labels.to(device)
247 |
248 | # forward
249 | with torch.set_grad_enabled(False):
250 | outputs = model(inputs)
251 | _, predictions = torch.max(outputs, 1)
252 | loss = loss_function(outputs, labels)
253 |
254 | # statistics
255 | current_loss += loss.item() * inputs.size(0)
256 | current_acc += torch.sum(predictions == labels.data)
257 |
258 | total_loss = current_loss / len(data_loader.dataset)
259 | total_acc = current_acc.double() / len(data_loader.dataset)
260 |
261 | print('Test Loss: {:.4f}; Accuracy: {:.4f}'
262 | .format(total_loss, total_acc))
263 |
264 |
265 | if __name__ == '__main__':
266 | dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
267 | m = build_network()
268 |
269 | if restore:
270 | model_path = os.path.join(DATA_DIR, MODEL_FILE)
271 | m.load_state_dict(torch.load(model_path))
272 |
273 | m.eval()
274 | m = m.to(dev)
275 | train(m, dev)
276 |
--------------------------------------------------------------------------------
/Chapter10/util.py:
--------------------------------------------------------------------------------
1 | from torchvision import transforms
2 |
3 | DATA_DIR = 'data'
4 | DATA_FILE = 'data.gzip'
5 | MODEL_FILE = 'model.pt'
6 |
7 | # available actions
8 | available_actions = [[0, 0, 0], # no action
9 | [-1, 0, 0], # left
10 | [-1, 0, 1], # left+break
11 | [1, 0, 0], # right
12 | [1, 0, 1], # right+break
13 | [0, 1, 0], # acceleration
14 | [0, 0, 1], ] # break
15 |
16 | # transformations for training/testing
17 | data_transform = transforms.Compose([
18 | transforms.ToPILImage(),
19 | transforms.Grayscale(1),
20 | transforms.Pad((12, 12, 12, 0)),
21 | transforms.CenterCrop(84),
22 | transforms.ToTensor(),
23 | transforms.Normalize((0,), (1,)),
24 | ])
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Python Deep Learning - Second Edition
2 |
3 |
4 |
5 | This is the code repository for [Python Deep Learning - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-second-edition?utm_source=github&utm_medium=repository&utm_campaign=), published by Packt.
6 |
7 | **Exploring deep learning techniques and neural network architectures with PyTorch, Keras, and TensorFlow**
8 |
9 | ## About the Book
10 | With the surge in artificial intelligence in applications catering to both business and consumer needs, deep learning is more important than ever for meeting current and future market demands. With Python Deep Learning Second Edition, you’ll explore deep learning, and learn how to put machine learning to use in your projects.
11 |
12 | This book covers the following exciting features:
13 | * Grasp the mathematical theory behind neural networks and deep learning processes
14 | * Investigate and resolve computer vision challenges using convolutional networks and capsule networks
15 | * Solve generative tasks using variational autoencoders and Generative Adversarial Networks
16 | * Implement complex NLP tasks using recurrent networks (LSTM and GRU) and attention models
17 | * Explore reinforcement learning and understand how agents behave in a complex environment
18 | * Get up to date with applications of deep learning in autonomous vehicles
19 |
20 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/B07KQ29CQ3/) today!
21 |
22 |
24 |
25 | ## Instructions and Navigations
26 | All of the code is organized into folders. For example, Chapter02.
27 |
28 | The code will look like the following:
29 | ```
30 | import torch
31 |
32 | torch.manual_seed(1234)
33 |
34 | hidden_units = 5
35 |
36 | net = torch.nn.Sequential(
37 | torch.nn.Linear(4, hidden_units),
38 | torch.nn.ReLU(),
39 | torch.nn.Linear(hidden_units, 3)
40 | )
41 | ```
42 |
43 | **Following is what you need for this book:**
44 | This book is for data science practitioners, machine learning engineers, and those interested in deep learning who have a basic foundation in machine learning and some Python programming experience. A background in mathematics and conceptual understanding of calculus and statistics will help you gain maximum benefit from this book.
45 |
46 | With the following software and hardware list you can run all code files present in the book (Chapter 1-10).
47 | ### Software and Hardware List
48 | | Chapter | Software required | OS required |
49 | | -------- | ------------------------------------ | ----------------------------------- |
50 | | All | Python 3.6, Anaconda 5.2, Jupyter Notebook | Windows, Mac OS X, and Linux (Any) |
51 |
52 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://www.packtpub.com/sites/default/files/downloads/9781789348460_ColorImages.pdf).
53 |
54 | ### Related products
55 | * Python Deep Learning Projects[[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-projects?utm_source=github&utm_medium=repository&utm_campaign=) [[Amazon]](https://www.amazon.com/dp/9781788997096)
56 |
57 | * Advanced Deep Learning with Keras[[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/advanced-deep-learning-keras?utm_source=github&utm_medium=repository&utm_campaign=9781788629416) [[Amazon]](https://www.amazon.com/dp/9781788629416)
58 |
59 | ## Get to Know the Authors
60 | **Ivan Vasilev** started working on the first open source Java Deep Learning library with GPU support in 2013. The library was acquired by a German company, where he continued its development. He has also worked as a machine learning engineer and researcher in the area of medical image classification and segmentation with deep neural networks. Since 2017 he has focused on financial machine learning. He is working on a Python open source algorithmic trading library, which provides the infrastructure to experiment with different ML algorithms. The author holds an MSc degree in Artificial Intelligence from The University of Sofia, St. Kliment Ohridski.
61 |
62 | **Daniel Slater** started programming at age 11, developing mods for the id Software game Quake. His obsession led him to become a developer working in the gaming industry on the hit computer game series Championship Manager. He then moved into finance, working on risk- and high-performance messaging systems. He now is a staff engineer working on big data at Skimlinks to understand online user behavior. He spends his spare time training AI to beat computer games. He talks at tech conferences about deep learning and reinforcement learning; and the name of his blog is Daniel Slater's blog. His work in this field has been cited by Google.
63 |
64 | **Gianmario Spacagna** is a senior data scientist at Pirelli, processing sensors and telemetry data for the internet of things (IoT) and connected-vehicle applications. He works closely with tire mechanics, engineers, and business units to analyze and formulate hybrid, physics-driven, and data-driven automotive models. His main expertise is in building ML systems and end-to-end solutions for data products. He holds a master's degree in telematics from the Polytechnic of Turin, as well as one in software engineering of distributed systems from KTH, Stockholm. Prior to Pirelli, he worked in retail and business banking (Barclays), cyber security (Cisco), predictive marketing (AgilOne), and did some occasional freelancing.
65 |
66 | **Peter Roelants** holds a master's in computer science with a specialization in AI from KU Leuven. He works on applying deep learning to a variety of problems, such as spectral imaging, speech recognition, text understanding, and document information extraction. He currently works at Onfido as a team leader for the data extraction research team, focusing on data extraction from official documents.
67 |
68 | ## Other books by the authors
69 | [Python Deep Learning Projects](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-projects)
70 |
71 | [Advanced Deep Learning with Keras](https://www.packtpub.com/big-data-and-business-intelligence/advanced-deep-learning-keras)
72 |
73 |
74 | ### Suggestions and Feedback
75 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions.
76 |
77 |
78 |
--------------------------------------------------------------------------------