├── img
├── MLP_XOR.png
├── autoencoder1.png
├── autoencoder2.png
├── cnn1.png
├── cnn2.png
├── cnn3.png
├── cnn4.png
├── cnn_architecture.png
├── cnn_architecture1.png
├── cnn_filter.png
├── cnn_practice.png
├── cnn_train.png
├── dropout.png
├── early_stop.png
├── elem_mul.png
├── filter_diff.png
├── igoogleatwork.png
├── iworkatgoogle.png
├── lstm_cell.png
├── lstm_cell2.png
├── lstm_classifier_diagram.png
├── lstm_detail.png
├── lstm_model_diagram.png
├── lstm_model_overview.png
├── max_pool.png
├── mlp_4.png
├── mlp_5.png
├── mlp_drop_out.png
├── mlp_input1.png
├── mlp_input2.png
├── mlp_input3.png
├── mlp_overview.png
├── mlp_overview2.png
├── mnist_sample.png
├── perceptron_structure.png
├── practice_cnn.png
├── relu.png
├── reshape_mnist.png
├── rgb.png
├── rgb1.png
├── rgb2.png
├── rnn_simple_diagram.png
├── rnn_single.png
├── sgd.png
├── simple_mlp_mnist.png
├── simple_rnn.png
├── stride.png
├── stride_result.png
├── truth_table.png
├── w2v_diagram.png
├── w2v_lookup.png
├── zeropadding.png
└── zeropadding1.png
└── src
├── AutoKeras.ipynb
├── CNN_Tensorflow.ipynb
├── CNN_Tensorflow_colab.ipynb
├── LSTM_Tensorflow.ipynb
├── LSTM_paragraph_classifier.ipynb
├── MLP_MNIST_Tensorflow.ipynb
├── MLP_MNIST_Tensorflow_Early_Stopping.ipynb
├── MLP_MNIST_Tensorflow_Early_Stopping_DropOut.ipynb
├── MLP_XOR_Soution_Tensorflow.ipynb
├── Vanilla_RNN_Tensorflow.ipynb
├── autoencoder.ipynb
├── single_neuron_perceptron.ipynb
└── word2vec_tensorflow.ipynb
/img/MLP_XOR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/MLP_XOR.png
--------------------------------------------------------------------------------
/img/autoencoder1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/autoencoder1.png
--------------------------------------------------------------------------------
/img/autoencoder2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/autoencoder2.png
--------------------------------------------------------------------------------
/img/cnn1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn1.png
--------------------------------------------------------------------------------
/img/cnn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn2.png
--------------------------------------------------------------------------------
/img/cnn3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn3.png
--------------------------------------------------------------------------------
/img/cnn4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn4.png
--------------------------------------------------------------------------------
/img/cnn_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn_architecture.png
--------------------------------------------------------------------------------
/img/cnn_architecture1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn_architecture1.png
--------------------------------------------------------------------------------
/img/cnn_filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn_filter.png
--------------------------------------------------------------------------------
/img/cnn_practice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn_practice.png
--------------------------------------------------------------------------------
/img/cnn_train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/cnn_train.png
--------------------------------------------------------------------------------
/img/dropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/dropout.png
--------------------------------------------------------------------------------
/img/early_stop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/early_stop.png
--------------------------------------------------------------------------------
/img/elem_mul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/elem_mul.png
--------------------------------------------------------------------------------
/img/filter_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/filter_diff.png
--------------------------------------------------------------------------------
/img/igoogleatwork.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/igoogleatwork.png
--------------------------------------------------------------------------------
/img/iworkatgoogle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/iworkatgoogle.png
--------------------------------------------------------------------------------
/img/lstm_cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/lstm_cell.png
--------------------------------------------------------------------------------
/img/lstm_cell2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/lstm_cell2.png
--------------------------------------------------------------------------------
/img/lstm_classifier_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/lstm_classifier_diagram.png
--------------------------------------------------------------------------------
/img/lstm_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/lstm_detail.png
--------------------------------------------------------------------------------
/img/lstm_model_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/lstm_model_diagram.png
--------------------------------------------------------------------------------
/img/lstm_model_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/lstm_model_overview.png
--------------------------------------------------------------------------------
/img/max_pool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/max_pool.png
--------------------------------------------------------------------------------
/img/mlp_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_4.png
--------------------------------------------------------------------------------
/img/mlp_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_5.png
--------------------------------------------------------------------------------
/img/mlp_drop_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_drop_out.png
--------------------------------------------------------------------------------
/img/mlp_input1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_input1.png
--------------------------------------------------------------------------------
/img/mlp_input2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_input2.png
--------------------------------------------------------------------------------
/img/mlp_input3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_input3.png
--------------------------------------------------------------------------------
/img/mlp_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_overview.png
--------------------------------------------------------------------------------
/img/mlp_overview2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mlp_overview2.png
--------------------------------------------------------------------------------
/img/mnist_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/mnist_sample.png
--------------------------------------------------------------------------------
/img/perceptron_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/perceptron_structure.png
--------------------------------------------------------------------------------
/img/practice_cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/practice_cnn.png
--------------------------------------------------------------------------------
/img/relu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/relu.png
--------------------------------------------------------------------------------
/img/reshape_mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/reshape_mnist.png
--------------------------------------------------------------------------------
/img/rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/rgb.png
--------------------------------------------------------------------------------
/img/rgb1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/rgb1.png
--------------------------------------------------------------------------------
/img/rgb2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/rgb2.png
--------------------------------------------------------------------------------
/img/rnn_simple_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/rnn_simple_diagram.png
--------------------------------------------------------------------------------
/img/rnn_single.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/rnn_single.png
--------------------------------------------------------------------------------
/img/sgd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/sgd.png
--------------------------------------------------------------------------------
/img/simple_mlp_mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/simple_mlp_mnist.png
--------------------------------------------------------------------------------
/img/simple_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/simple_rnn.png
--------------------------------------------------------------------------------
/img/stride.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/stride.png
--------------------------------------------------------------------------------
/img/stride_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/stride_result.png
--------------------------------------------------------------------------------
/img/truth_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/truth_table.png
--------------------------------------------------------------------------------
/img/w2v_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/w2v_diagram.png
--------------------------------------------------------------------------------
/img/w2v_lookup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/w2v_lookup.png
--------------------------------------------------------------------------------
/img/zeropadding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/zeropadding.png
--------------------------------------------------------------------------------
/img/zeropadding1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minsuk-heo/deeplearning/5cfec3c028383ae1b279c1003e70f456fd2c5269/img/zeropadding1.png
--------------------------------------------------------------------------------
/src/AutoKeras.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# AutoKeras Practice\n",
8 | "We will practice Auto Keras with simple example. \n",
9 | "You don't need experties nor GPU for this practice. \n",
10 | "All you need is just importing autokeras for image classification."
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 9,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/html": [
21 | "
"
22 | ],
23 | "text/plain": [
24 | ""
25 | ]
26 | },
27 | "execution_count": 9,
28 | "metadata": {},
29 | "output_type": "execute_result"
30 | }
31 | ],
32 | "source": [
33 | "from IPython.display import Image\n",
34 | "Image(url= \"https://github.com/jhfjhfj1/autokeras/blob/master/logo.png?raw=true\", width=500, height=250)"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "Auto-Keras is an open source software library for automated machine learning (AutoML). \n",
42 | "The ultimate goal of AutoML is to allow domain experts with limited data science or machine learning background easily accessible to deep learning models. Auto-Keras provides functions to automatically search for architecture and hyperparameters of deep learning models. \n",
43 | "http://autokeras.com/"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "# Citing this work\n",
51 | "If you use Auto-Keras in a scientific publication, you are highly encouraged (though not required) to cite the following paper:\n",
52 | "\n",
53 | "Efficient Neural Architecture Search with Network Morphism. Haifeng Jin, Qingquan Song, and Xia Hu. arXiv:1806.10282."
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "# Why Auto Keras than other AutoML?\n",
61 | "### Don't spend time for hyperparameter tuning or playing with different layers.\n",
62 | "Auto Keras will find it for you automatically.\n",
63 | "### Auto Keras doesn't have vendor nor cloud platform dependencies. \n",
64 | "For example, if you use Google Cloud AutoML, you will have Google Cloud dependency. \n",
65 | "With Auto Keras, you can practice AutoML with your laptop or with your GPU cluster if you have GPU cluster."
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "# Practice\n",
73 | "We will practice MNIST image classifier on personal laptop."
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 1,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stderr",
83 | "output_type": "stream",
84 | "text": [
85 | "Using TensorFlow backend.\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "from keras.datasets import mnist\n",
91 | "from autokeras.classifier import ImageClassifier"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "## Load MNIST data\n",
99 | "We will practice with MNIST data from keras dataset."
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 2,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
109 | "x_train = x_train.reshape(x_train.shape + (1,))\n",
110 | "x_test = x_test.reshape(x_test.shape + (1,))"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "## Train\n"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "The main advantage of using Auto Keras is that you even don't need to know about \n",
125 | "which Neural Network you will use for your image classifier. \n",
126 | "While Auto Keras will try multiple CNN based neural network with different layers and find best one for you."
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "Simply running **clf = ImageClassifier()** will work. however in order to see \n",
134 | "1) how train is going \n",
135 | "2) shorten maximum iteration for fast training \n",
136 | "I gave few arguments in this practice. \n",
137 | "That said, you even don't need to know iteration for your image classifier training."
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 3,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "clf = ImageClassifier(verbose=True, searcher_args={'trainer_args':{'max_iter_num':5}})"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "importantly, I gave 5 hours time limit, in order to finish this practice in 5 hours. \n",
154 | "By default, Auto Keras has default time limit as 24 hours in current version."
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 4,
160 | "metadata": {},
161 | "outputs": [
162 | {
163 | "name": "stdout",
164 | "output_type": "stream",
165 | "text": [
166 | "Initializing search.\n",
167 | "Initialization finished.\n",
168 | "Training model 0\n",
169 | "Saving model.\n",
170 | "Model ID: 0\n",
171 | "Loss: tensor(5.2479)\n",
172 | "Accuracy 96.00399999999999\n",
173 | "Training model 1\n",
174 | "Father ID: 0\n",
175 | "[('to_wider_model', 1, 64)]\n",
176 | "Saving model.\n",
177 | "Model ID: 1\n",
178 | "Loss: tensor(5.0132)\n",
179 | "Accuracy 96.25600000000001\n",
180 | "Training model 2\n",
181 | "Father ID: 1\n",
182 | "[('to_wider_model', 19, 64)]\n",
183 | "Saving model.\n",
184 | "Model ID: 2\n",
185 | "Loss: tensor(3.0112)\n",
186 | "Accuracy 97.64000000000001\n",
187 | "Training model 3\n",
188 | "Father ID: 2\n",
189 | "[('to_wider_model', 1, 128)]\n",
190 | "Saving model.\n",
191 | "Model ID: 3\n",
192 | "Loss: tensor(2.3075)\n",
193 | "Accuracy 98.296\n"
194 | ]
195 | }
196 | ],
197 | "source": [
198 | "clf.fit(x_train, y_train, time_limit=5 * 60 * 60)"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "from the above result, you can find the auto keras is searching the best model by adjusting CNN model with multiple approach."
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "## Train the best model\n",
213 | "final_fit function will choose best model and fit the model with your data. \n",
214 | "in this example. I gave 10 more iteration to make the model be trained more with data. \n",
215 | "if you give retrain=True, the model architecture will initialize weights and bias and retrain again."
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 5,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "name": "stdout",
225 | "output_type": "stream",
226 | "text": [
227 | "...............................................\n",
228 | "Epoch 1: loss 3.737567901611328, accuracy 98.43\n",
229 | "...............................................\n",
230 | "Epoch 2: loss 3.925536870956421, accuracy 98.34\n",
231 | "...............................................\n",
232 | "Epoch 3: loss 3.422757148742676, accuracy 98.53\n",
233 | "...............................................\n",
234 | "Epoch 4: loss 3.3036224842071533, accuracy 98.62\n",
235 | "...............................................\n",
236 | "Epoch 5: loss 4.0281524658203125, accuracy 98.45\n",
237 | "...............................................\n",
238 | "Epoch 6: loss 3.3080132007598877, accuracy 98.63\n",
239 | "...............................................\n",
240 | "Epoch 7: loss 3.359560966491699, accuracy 98.6\n",
241 | "...............................................\n",
242 | "Epoch 8: loss 3.4960057735443115, accuracy 98.59\n",
243 | "...............................................\n",
244 | "Epoch 9: loss 3.6699087619781494, accuracy 98.51\n",
245 | "...............................................\n",
246 | "Epoch 10: loss 3.0567498207092285, accuracy 98.74\n"
247 | ]
248 | }
249 | ],
250 | "source": [
251 | "clf.final_fit(x_train, y_train, x_test, y_test, retrain=False, trainer_args={'max_iter_num':10})"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "## Test\n",
259 | "Testing your best model with test dataset."
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 6,
265 | "metadata": {},
266 | "outputs": [
267 | {
268 | "name": "stdout",
269 | "output_type": "stream",
270 | "text": [
271 | "98.58\n"
272 | ]
273 | }
274 | ],
275 | "source": [
276 | "y = clf.evaluate(x_test, y_test)\n",
277 | "print(y * 100)"
278 | ]
279 | },
280 | {
281 | "cell_type": "markdown",
282 | "metadata": {},
283 | "source": [
284 | "# Best Model Architecture Overview\n",
285 | "Let's take a look a the best image classifier model's architecture"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 10,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "best_model = clf.load_searcher().load_best_model()"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "we can find the total number of layers by command below,"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 19,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "51"
313 | ]
314 | },
315 | "execution_count": 19,
316 | "metadata": {},
317 | "output_type": "execute_result"
318 | }
319 | ],
320 | "source": [
321 | "best_model.n_layers"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | "You can find model architecture by command below,"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 22,
334 | "metadata": {},
335 | "outputs": [
336 | {
337 | "name": "stdout",
338 | "output_type": "stream",
339 | "text": [
340 | "TorchModel(\n",
341 | " (0): ReLU()\n",
342 | " (1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
343 | " (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
344 | " (3): Dropout2d(p=0.25)\n",
345 | " (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
346 | " (5): ReLU()\n",
347 | " (6): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
348 | " (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
349 | " (8): Dropout2d(p=0.25)\n",
350 | " (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
351 | " (10): ReLU()\n",
352 | " (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
353 | " (12): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
354 | " (13): Dropout2d(p=0.25)\n",
355 | " (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
356 | " (15): TorchFlatten()\n",
357 | " (16): Linear(in_features=576, out_features=10, bias=True)\n",
358 | " (17): LogSoftmax()\n",
359 | " (18): ReLU()\n",
360 | " (19): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
361 | " (20): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
362 | " (21): Dropout2d(p=0.25)\n",
363 | " (22): ReLU()\n",
364 | " (23): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
365 | " (24): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
366 | " (25): Dropout2d(p=0.25)\n",
367 | " (26): ReLU()\n",
368 | " (27): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
369 | " (28): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
370 | " (29): Dropout2d(p=0.25)\n",
371 | " (30): ReLU()\n",
372 | " (31): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1.5, 1.5))\n",
373 | " (32): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
374 | " (33): Dropout2d(p=0.25)\n",
375 | " (34): ReLU()\n",
376 | " (35): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), padding=(0.5, 0.5))\n",
377 | " (36): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
378 | " (37): Dropout2d(p=0.25)\n",
379 | " (38): TorchAdd()\n",
380 | " (39): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
381 | " (40): ReLU()\n",
382 | " (41): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), padding=(0.5, 0.5))\n",
383 | " (42): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
384 | " (43): Dropout2d(p=0.25)\n",
385 | " (44): TorchAdd()\n",
386 | " (45): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
387 | " (46): ReLU()\n",
388 | " (47): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), padding=(0.5, 0.5))\n",
389 | " (48): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
390 | " (49): Dropout2d(p=0.25)\n",
391 | " (50): TorchAdd()\n",
392 | ")\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "from torchvision import models\n",
398 | "print(best_model.produce_model())"
399 | ]
400 | }
401 | ],
402 | "metadata": {
403 | "kernelspec": {
404 | "display_name": "Python 3",
405 | "language": "python",
406 | "name": "python3"
407 | },
408 | "language_info": {
409 | "codemirror_mode": {
410 | "name": "ipython",
411 | "version": 3
412 | },
413 | "file_extension": ".py",
414 | "mimetype": "text/x-python",
415 | "name": "python",
416 | "nbconvert_exporter": "python",
417 | "pygments_lexer": "ipython3",
418 | "version": "3.6.4"
419 | }
420 | },
421 | "nbformat": 4,
422 | "nbformat_minor": 2
423 | }
424 |
--------------------------------------------------------------------------------
/src/LSTM_Tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "from IPython.display import Image"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "inputs = np.array([\n",
20 | " [ [1,0] ]\n",
21 | "])"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "# LSTM\n",
29 | "An RNN composed of LSTM units is often called an LSTM network. A common LSTM unit is composed of a cell, an input gate, an output gate and a forget gate. The cell remembers values over arbitrary time intervals and the three gates regulate the flow of information into and out of the cell. LSTM networks are well-suited to classifying, processing and making predictions based on time series data, since there can be lags of unknown duration between important events in a time series. LSTMs were developed to deal with the exploding and vanishing gradient problems that can be encountered when training traditional RNNs. Relative insensitivity to gap length is an advantage of LSTM over RNNs, hidden Markov models and other sequence learning methods in numerous applications\n",
30 | "https://en.wikipedia.org/wiki/Long_short-term_memory"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 3,
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/html": [
41 | "
"
42 | ],
43 | "text/plain": [
44 | ""
45 | ]
46 | },
47 | "execution_count": 3,
48 | "metadata": {},
49 | "output_type": "execute_result"
50 | }
51 | ],
52 | "source": [
53 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/lstm_detail.png\", width=500, height=250)"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "Tensorflow already has prebuild LSTM cell, so we don't have to implement above picture, instead we just remember below picture. All we need to know is there is input and output and there are two states, one is hidden state and the other is memory cell."
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 4,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "data": {
70 | "text/html": [
71 | "
"
72 | ],
73 | "text/plain": [
74 | ""
75 | ]
76 | },
77 | "execution_count": 4,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/lstm_cell2.png\", width=500, height=250)"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "from below code, _state.c is the orange line and _state.h is the blue line."
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 6,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "output values\n",
103 | "[[[0.09927537]]]\n",
104 | "\n",
105 | "memory cell value \n",
106 | "[[0.18134572]]\n",
107 | "\n",
108 | "hidden state value \n",
109 | "[[0.09927537]]\n"
110 | ]
111 | }
112 | ],
113 | "source": [
114 | "import tensorflow as tf\n",
115 | "tf.reset_default_graph()\n",
116 | "tf.set_random_seed(777)\n",
117 | "\n",
118 | "tf_inputs = tf.constant(inputs, dtype=tf.float32)\n",
119 | "lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=1)\n",
120 | "outputs, state = tf.nn.dynamic_rnn(cell=lstm_cell, dtype=tf.float32, inputs=tf_inputs)\n",
121 | "\n",
122 | "with tf.Session() as sess:\n",
123 | " sess.run(tf.global_variables_initializer())\n",
124 | " _output, _state = sess.run([outputs, state])\n",
125 | " print(\"output values\")\n",
126 | " print(_output)\n",
127 | " print(\"\\nmemory cell value \")\n",
128 | " print(_state.c)\n",
129 | " print(\"\\nhidden state value \")\n",
130 | " print(_state.h)"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": []
139 | }
140 | ],
141 | "metadata": {
142 | "kernelspec": {
143 | "display_name": "Python 3",
144 | "language": "python",
145 | "name": "python3"
146 | },
147 | "language_info": {
148 | "codemirror_mode": {
149 | "name": "ipython",
150 | "version": 3
151 | },
152 | "file_extension": ".py",
153 | "mimetype": "text/x-python",
154 | "name": "python",
155 | "nbconvert_exporter": "python",
156 | "pygments_lexer": "ipython3",
157 | "version": "3.6.4"
158 | }
159 | },
160 | "nbformat": 4,
161 | "nbformat_minor": 2
162 | }
163 |
--------------------------------------------------------------------------------
/src/MLP_MNIST_Tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MLP (MNIST, Tensorflow)\n",
8 | "In this tutorial, we will use MNIST data to practice Multi Layer Perceptron with Tensorflow."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import tensorflow as tf\n",
18 | "import numpy as np\n",
19 | "from IPython.display import Image"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# MLP Architecture\n",
27 | "here is the overview of MLP architecture we will implement with Tensorflow"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "
"
39 | ],
40 | "text/plain": [
41 | ""
42 | ]
43 | },
44 | "execution_count": 2,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png\", width=500, height=250)"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "# Collect MNIST Data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "name": "stdout",
76 | "output_type": "stream",
77 | "text": [
78 | "(60000, 28, 28)\n",
79 | "(10000, 28, 28)\n"
80 | ]
81 | }
82 | ],
83 | "source": [
84 | "print(x_train.shape)\n",
85 | "print(x_test.shape)"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "train data has **60000** samples \n",
93 | "test data has **10000** samples \n",
94 | "every data is **28 * 28** pixels \n",
95 | "\n",
96 | "below image shows 28*28 pixel image sample for hand written number '0' from MNIST data. \n",
97 | "MNIST is gray scale image [0 to 255] for hand written number."
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | ""
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "# Split train data into train and validation data\n",
112 | "Validation during training gives advantages below, \n",
113 | "1) check if train goes well based on validation score \n",
114 | "2) apply **early stopping** when validation score doesn't improve while train score goes up (overcome **overfitting**)"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 5,
120 | "metadata": {},
121 | "outputs": [],
122 | "source": [
123 | "x_val = x_train[50000:60000]\n",
124 | "x_train = x_train[0:50000]\n",
125 | "y_val = y_train[50000:60000]\n",
126 | "y_train = y_train[0:50000]"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 6,
132 | "metadata": {},
133 | "outputs": [
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "train data has 50000 samples\n",
139 | "every train data is 28 * 28 image\n"
140 | ]
141 | }
142 | ],
143 | "source": [
144 | "print(\"train data has \" + str(x_train.shape[0]) + \" samples\")\n",
145 | "print(\"every train data is \" + str(x_train.shape[1]) \n",
146 | " + \" * \" + str(x_train.shape[2]) + \" image\")"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 7,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "validation data has 10000 samples\n",
159 | "every train data is 28 * 28 image\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "print(\"validation data has \" + str(x_val.shape[0]) + \" samples\")\n",
165 | "print(\"every train data is \" + str(x_val.shape[1]) \n",
166 | " + \" * \" + str(x_train.shape[2]) + \" image\")"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "28 * 28 pixels has gray scale value from **0** to **255**"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 8,
179 | "metadata": {},
180 | "outputs": [
181 | {
182 | "name": "stdout",
183 | "output_type": "stream",
184 | "text": [
185 | "[ 0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241\n",
186 | " 0 0 0 0 0 0 0 0 0 0]\n"
187 | ]
188 | }
189 | ],
190 | "source": [
191 | "# sample to show gray scale values\n",
192 | "print(x_train[0][8])"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "each train data has its label **0** to **9**"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 9,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "[5 0 4 1 9 2 1 3 1]\n"
212 | ]
213 | }
214 | ],
215 | "source": [
216 | "# sample to show labels for first train data to 10th train data\n",
217 | "print(y_train[0:9])"
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "test data has **10000** samples \n",
225 | "every test data is **28 * 28** image "
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 10,
231 | "metadata": {},
232 | "outputs": [
233 | {
234 | "name": "stdout",
235 | "output_type": "stream",
236 | "text": [
237 | "test data has 10000 samples\n",
238 | "every test data is 28 * 28 image\n"
239 | ]
240 | }
241 | ],
242 | "source": [
243 | "print(\"test data has \" + str(x_test.shape[0]) + \" samples\")\n",
244 | "print(\"every test data is \" + str(x_test.shape[1]) \n",
245 | " + \" * \" + str(x_test.shape[2]) + \" image\")"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "# Reshape\n",
253 | "In order to fully connect all pixels to hidden layer, \n",
254 | "we will reshape (28, 28) into (28x28,1) shape. \n",
255 | "It means we flatten row x column shape to an array having 28x28 (756) items."
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 11,
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/html": [
266 | "
"
267 | ],
268 | "text/plain": [
269 | ""
270 | ]
271 | },
272 | "execution_count": 11,
273 | "metadata": {},
274 | "output_type": "execute_result"
275 | }
276 | ],
277 | "source": [
278 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/reshape_mnist.png\", width=500, height=250)"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 12,
284 | "metadata": {},
285 | "outputs": [
286 | {
287 | "name": "stdout",
288 | "output_type": "stream",
289 | "text": [
290 | "(50000, 784)\n",
291 | "(10000, 784)\n"
292 | ]
293 | }
294 | ],
295 | "source": [
296 | "x_train = x_train.reshape(50000, 784)\n",
297 | "x_val = x_val.reshape(10000, 784)\n",
298 | "x_test = x_test.reshape(10000, 784)\n",
299 | "\n",
300 | "print(x_train.shape)\n",
301 | "print(x_test.shape)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 13,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
313 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
314 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
315 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
316 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
317 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
318 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
319 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
320 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
321 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
322 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
323 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18,\n",
324 | " 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0,\n",
325 | " 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253,\n",
326 | " 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0,\n",
327 | " 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253,\n",
328 | " 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0,\n",
329 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253,\n",
330 | " 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0,\n",
331 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
332 | " 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0,\n",
333 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
334 | " 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0,\n",
335 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
336 | " 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0,\n",
337 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
338 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70,\n",
339 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
340 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35,\n",
341 | " 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
342 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
343 | " 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0,\n",
344 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
345 | " 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0,\n",
346 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
347 | " 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187,\n",
348 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
349 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249,\n",
350 | " 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
351 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130,\n",
352 | " 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0,\n",
353 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148,\n",
354 | " 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0,\n",
355 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114,\n",
356 | " 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0,\n",
357 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66,\n",
358 | " 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0,\n",
359 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171,\n",
360 | " 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0,\n",
361 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172,\n",
362 | " 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0,\n",
363 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
364 | " 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0,\n",
365 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
366 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
367 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
368 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
369 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
370 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
371 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
372 | " 0, 0, 0, 0], dtype=uint8)"
373 | ]
374 | },
375 | "execution_count": 13,
376 | "metadata": {},
377 | "output_type": "execute_result"
378 | }
379 | ],
380 | "source": [
381 | "x_train[0]"
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "# Normalize data\n",
389 | "normalization usually helps faster learning speed, better performance \n",
390 | "by reducing variance and giving same range to all input features. \n",
391 | "since MNIST data set all input has 0 to 255, normalization only helps reducing variances. \n",
392 | "it turned out normalization is better than standardization for MNIST data with my MLP architeture, \n",
393 | "I believe this is because relu handles 0 differently on both feed forward and back propagation. \n",
394 | "handling 0 differently is important for MNIST, since 1-255 means there is some hand written, \n",
395 | "while 0 means no hand written on that pixel."
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 14,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "x_train = x_train.astype('float32')\n",
405 | "x_val = x_val.astype('float32')\n",
406 | "x_test = x_test.astype('float32')\n",
407 | "\n",
408 | "gray_scale = 255\n",
409 | "x_train /= gray_scale\n",
410 | "x_val /= gray_scale\n",
411 | "x_test /= gray_scale"
412 | ]
413 | },
414 | {
415 | "cell_type": "markdown",
416 | "metadata": {},
417 | "source": [
418 | "# label to one hot encoding value"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 15,
424 | "metadata": {},
425 | "outputs": [],
426 | "source": [
427 | "num_classes = 10\n",
428 | "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n",
429 | "y_val = tf.keras.utils.to_categorical(y_val, num_classes)\n",
430 | "y_test = tf.keras.utils.to_categorical(y_test, num_classes)"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 16,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "data": {
440 | "text/plain": [
441 | "array([[0., 0., 0., ..., 0., 0., 0.],\n",
442 | " [1., 0., 0., ..., 0., 0., 0.],\n",
443 | " [0., 0., 0., ..., 0., 0., 0.],\n",
444 | " ...,\n",
445 | " [0., 0., 0., ..., 0., 1., 0.],\n",
446 | " [0., 0., 0., ..., 0., 0., 0.],\n",
447 | " [0., 0., 0., ..., 0., 1., 0.]])"
448 | ]
449 | },
450 | "execution_count": 16,
451 | "metadata": {},
452 | "output_type": "execute_result"
453 | }
454 | ],
455 | "source": [
456 | "y_train"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {},
462 | "source": [
463 | "# Tensorflow MLP Graph\n",
464 | "Let's implement the MLP graph with Tensorflow"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 17,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/html": [
475 | "
"
476 | ],
477 | "text/plain": [
478 | ""
479 | ]
480 | },
481 | "execution_count": 17,
482 | "metadata": {},
483 | "output_type": "execute_result"
484 | }
485 | ],
486 | "source": [
487 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png\", width=500, height=250)"
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": 18,
493 | "metadata": {},
494 | "outputs": [],
495 | "source": [
496 | "x = tf.placeholder(tf.float32, [None, 784])\n",
497 | "y = tf.placeholder(tf.float32, [None, 10])"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 19,
503 | "metadata": {},
504 | "outputs": [],
505 | "source": [
506 | "def mlp(x):\n",
507 | " # hidden layer1\n",
508 | " w1 = tf.Variable(tf.random_uniform([784,256]))\n",
509 | " b1 = tf.Variable(tf.zeros([256]))\n",
510 | " h1 = tf.nn.relu(tf.matmul(x, w1) + b1)\n",
511 | " # hidden layer2\n",
512 | " w2 = tf.Variable(tf.random_uniform([256,128]))\n",
513 | " b2 = tf.Variable(tf.zeros([128]))\n",
514 | " h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)\n",
515 | " # output layer\n",
516 | " w3 = tf.Variable(tf.random_uniform([128,10]))\n",
517 | " b3 = tf.Variable(tf.zeros([10]))\n",
518 | " logits= tf.matmul(h2, w3) + b3\n",
519 | " \n",
520 | " return logits"
521 | ]
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": 20,
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "logits = mlp(x)"
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": 21,
535 | "metadata": {},
536 | "outputs": [],
537 | "source": [
538 | "loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\n",
539 | " logits=logits, labels=y))"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": 22,
545 | "metadata": {},
546 | "outputs": [],
547 | "source": [
548 | "train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss_op)"
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "execution_count": 23,
554 | "metadata": {},
555 | "outputs": [
556 | {
557 | "name": "stdout",
558 | "output_type": "stream",
559 | "text": [
560 | "epoch: 0, validation accuracy: 0.1064, loss: 9479.23924316406\n",
561 | "epoch: 1, validation accuracy: 0.7404, loss: 487.9066563034058\n",
562 | "epoch: 2, validation accuracy: 0.8683, loss: 20.24389074325562\n",
563 | "epoch: 3, validation accuracy: 0.8761, loss: 11.892984285354613\n",
564 | "epoch: 4, validation accuracy: 0.8858, loss: 9.276838760375973\n",
565 | "epoch: 5, validation accuracy: 0.8785, loss: 8.25918293952942\n",
566 | "epoch: 6, validation accuracy: 0.8832, loss: 7.402374687194823\n",
567 | "epoch: 7, validation accuracy: 0.906, loss: 6.622725062370303\n",
568 | "epoch: 8, validation accuracy: 0.9034, loss: 5.537717547416686\n",
569 | "epoch: 9, validation accuracy: 0.8971, loss: 4.807866144180299\n",
570 | "epoch: 10, validation accuracy: 0.8396, loss: 7.349521398544313\n",
571 | "epoch: 11, validation accuracy: 0.9066, loss: 6.607095966339113\n",
572 | "epoch: 12, validation accuracy: 0.8217, loss: 52.06143003463745\n",
573 | "epoch: 13, validation accuracy: 0.8922, loss: 15.170511302948\n",
574 | "epoch: 14, validation accuracy: 0.9016, loss: 6.205790314674376\n",
575 | "epoch: 15, validation accuracy: 0.9036, loss: 4.978821859359741\n",
576 | "epoch: 16, validation accuracy: 0.9083, loss: 4.454537310600279\n",
577 | "epoch: 17, validation accuracy: 0.9112, loss: 3.8163385868072504\n",
578 | "epoch: 18, validation accuracy: 0.9137, loss: 3.665376300811767\n",
579 | "epoch: 19, validation accuracy: 0.9171, loss: 3.8778756713867173\n",
580 | "epoch: 20, validation accuracy: 0.9159, loss: 3.3953911519050597\n",
581 | "epoch: 21, validation accuracy: 0.9175, loss: 3.1974300575256356\n",
582 | "epoch: 22, validation accuracy: 0.8809, loss: 4.378022892475128\n",
583 | "epoch: 23, validation accuracy: 0.8764, loss: 4.933798418045042\n",
584 | "epoch: 24, validation accuracy: 0.912, loss: 4.379148626327513\n",
585 | "epoch: 25, validation accuracy: 0.9154, loss: 3.86887104511261\n",
586 | "epoch: 26, validation accuracy: 0.9196, loss: 3.1193934822082525\n",
587 | "epoch: 27, validation accuracy: 0.9205, loss: 2.864556527137757\n",
588 | "epoch: 28, validation accuracy: 0.9095, loss: 2.6170078659057614\n",
589 | "epoch: 29, validation accuracy: 0.9123, loss: 2.499971570968628\n",
590 | "[Test Accuracy] : 0.9089\n"
591 | ]
592 | }
593 | ],
594 | "source": [
595 | "# initialize\n",
596 | "init = tf.global_variables_initializer()\n",
597 | "\n",
598 | "# train hyperparameters\n",
599 | "epoch_cnt = 30\n",
600 | "batch_size = 1000\n",
601 | "iteration = len(x_train) // batch_size\n",
602 | "\n",
603 | "# Start training\n",
604 | "with tf.Session() as sess:\n",
605 | " # Run the initializer\n",
606 | " sess.run(init)\n",
607 | " for epoch in range(epoch_cnt):\n",
608 | " avg_loss = 0.\n",
609 | " start = 0; end = batch_size\n",
610 | " \n",
611 | " for i in range(iteration):\n",
612 | " _, loss = sess.run([train_op, loss_op], \n",
613 | " feed_dict={x: x_train[start: end], y: y_train[start: end]})\n",
614 | " start += batch_size; end += batch_size\n",
615 | " # Compute average loss\n",
616 | " avg_loss += loss / iteration\n",
617 | " \n",
618 | " # Validate model\n",
619 | " preds = tf.nn.softmax(logits) # Apply softmax to logits\n",
620 | " correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))\n",
621 | " # Calculate accuracy\n",
622 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
623 | " cur_val_acc = accuracy.eval({x: x_val, y: y_val})\n",
624 | " print(\"epoch: \"+str(epoch)+\", validation accuracy: \" \n",
625 | " + str(cur_val_acc) +', loss: '+str(avg_loss))\n",
626 | " \n",
627 | " # Test model\n",
628 | " preds = tf.nn.softmax(logits) # Apply softmax to logits\n",
629 | " correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))\n",
630 | " # Calculate accuracy\n",
631 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
632 | " print(\"[Test Accuracy] :\", accuracy.eval({x: x_test, y: y_test}))"
633 | ]
634 | }
635 | ],
636 | "metadata": {
637 | "kernelspec": {
638 | "display_name": "Python 3",
639 | "language": "python",
640 | "name": "python3"
641 | },
642 | "language_info": {
643 | "codemirror_mode": {
644 | "name": "ipython",
645 | "version": 3
646 | },
647 | "file_extension": ".py",
648 | "mimetype": "text/x-python",
649 | "name": "python",
650 | "nbconvert_exporter": "python",
651 | "pygments_lexer": "ipython3",
652 | "version": "3.6.5"
653 | }
654 | },
655 | "nbformat": 4,
656 | "nbformat_minor": 2
657 | }
658 |
--------------------------------------------------------------------------------
/src/MLP_MNIST_Tensorflow_Early_Stopping.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MLP (MNIST, Tensorflow) with Early Stopping\n",
8 | "In this tutorial, we will apply early stopping on MNIST MLP tensorflow code. "
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import tensorflow as tf\n",
18 | "import numpy as np\n",
19 | "from IPython.display import Image"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# MLP Architecture\n",
27 | "here is the overview of MLP architecture we will implement with Tensorflow"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "
"
39 | ],
40 | "text/plain": [
41 | ""
42 | ]
43 | },
44 | "execution_count": 2,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png\", width=500, height=250)"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "# Collect MNIST Data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "name": "stdout",
76 | "output_type": "stream",
77 | "text": [
78 | "(60000, 28, 28)\n",
79 | "(10000, 28, 28)\n"
80 | ]
81 | }
82 | ],
83 | "source": [
84 | "print(x_train.shape)\n",
85 | "print(x_test.shape)"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "train data has **60000** samples \n",
93 | "test data has **10000** samples \n",
94 | "every data is **28 * 28** pixels \n",
95 | "\n",
96 | "below image shows 28*28 pixel image sample for hand written number '0' from MNIST data. \n",
97 | "MNIST is gray scale image [0 to 255] for hand written number."
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | ""
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "# Split train data into train and validation data\n",
112 | "Validation during training gives advantages below, \n",
113 | "1) check if train goes well based on validation score \n",
114 | "2) apply **early stopping** when validation score doesn't improve while train score goes up (overcome **overfitting**)"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 5,
120 | "metadata": {},
121 | "outputs": [],
122 | "source": [
123 | "x_val = x_train[50000:60000]\n",
124 | "x_train = x_train[0:50000]\n",
125 | "y_val = y_train[50000:60000]\n",
126 | "y_train = y_train[0:50000]"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 6,
132 | "metadata": {},
133 | "outputs": [
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "train data has 50000 samples\n",
139 | "every train data is 28 * 28 image\n"
140 | ]
141 | }
142 | ],
143 | "source": [
144 | "print(\"train data has \" + str(x_train.shape[0]) + \" samples\")\n",
145 | "print(\"every train data is \" + str(x_train.shape[1]) \n",
146 | " + \" * \" + str(x_train.shape[2]) + \" image\")"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 7,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "validation data has 10000 samples\n",
159 | "every train data is 28 * 28 image\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "print(\"validation data has \" + str(x_val.shape[0]) + \" samples\")\n",
165 | "print(\"every train data is \" + str(x_val.shape[1]) \n",
166 | " + \" * \" + str(x_train.shape[2]) + \" image\")"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "28 * 28 pixels has gray scale value from **0** to **255**"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 8,
179 | "metadata": {},
180 | "outputs": [
181 | {
182 | "name": "stdout",
183 | "output_type": "stream",
184 | "text": [
185 | "[ 0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241\n",
186 | " 0 0 0 0 0 0 0 0 0 0]\n"
187 | ]
188 | }
189 | ],
190 | "source": [
191 | "# sample to show gray scale values\n",
192 | "print(x_train[0][8])"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "each train data has its label **0** to **9**"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 9,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "[5 0 4 1 9 2 1 3 1]\n"
212 | ]
213 | }
214 | ],
215 | "source": [
216 | "# sample to show labels for first train data to 10th train data\n",
217 | "print(y_train[0:9])"
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "test data has **10000** samples \n",
225 | "every test data is **28 * 28** image "
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 10,
231 | "metadata": {},
232 | "outputs": [
233 | {
234 | "name": "stdout",
235 | "output_type": "stream",
236 | "text": [
237 | "test data has 10000 samples\n",
238 | "every test data is 28 * 28 image\n"
239 | ]
240 | }
241 | ],
242 | "source": [
243 | "print(\"test data has \" + str(x_test.shape[0]) + \" samples\")\n",
244 | "print(\"every test data is \" + str(x_test.shape[1]) \n",
245 | " + \" * \" + str(x_test.shape[2]) + \" image\")"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "# Reshape\n",
253 | "In order to fully connect all pixels to hidden layer, \n",
254 | "we will reshape (28, 28) into (28x28,1) shape. \n",
255 | "It means we flatten row x column shape to an array having 28x28 (756) items."
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 11,
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/html": [
266 | "
"
267 | ],
268 | "text/plain": [
269 | ""
270 | ]
271 | },
272 | "execution_count": 11,
273 | "metadata": {},
274 | "output_type": "execute_result"
275 | }
276 | ],
277 | "source": [
278 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/reshape_mnist.png\", width=500, height=250)"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 12,
284 | "metadata": {},
285 | "outputs": [
286 | {
287 | "name": "stdout",
288 | "output_type": "stream",
289 | "text": [
290 | "(50000, 784)\n",
291 | "(10000, 784)\n"
292 | ]
293 | }
294 | ],
295 | "source": [
296 | "x_train = x_train.reshape(50000, 784)\n",
297 | "x_val = x_val.reshape(10000, 784)\n",
298 | "x_test = x_test.reshape(10000, 784)\n",
299 | "\n",
300 | "print(x_train.shape)\n",
301 | "print(x_test.shape)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 13,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
313 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
314 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
315 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
316 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
317 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
318 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
319 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
320 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
321 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
322 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
323 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18,\n",
324 | " 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0,\n",
325 | " 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253,\n",
326 | " 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0,\n",
327 | " 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253,\n",
328 | " 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0,\n",
329 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253,\n",
330 | " 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0,\n",
331 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
332 | " 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0,\n",
333 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
334 | " 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0,\n",
335 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
336 | " 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0,\n",
337 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
338 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70,\n",
339 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
340 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35,\n",
341 | " 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
342 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
343 | " 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0,\n",
344 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
345 | " 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0,\n",
346 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
347 | " 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187,\n",
348 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
349 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249,\n",
350 | " 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
351 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130,\n",
352 | " 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0,\n",
353 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148,\n",
354 | " 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0,\n",
355 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114,\n",
356 | " 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0,\n",
357 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66,\n",
358 | " 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0,\n",
359 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171,\n",
360 | " 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0,\n",
361 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172,\n",
362 | " 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0,\n",
363 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
364 | " 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0,\n",
365 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
366 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
367 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
368 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
369 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
370 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
371 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
372 | " 0, 0, 0, 0], dtype=uint8)"
373 | ]
374 | },
375 | "execution_count": 13,
376 | "metadata": {},
377 | "output_type": "execute_result"
378 | }
379 | ],
380 | "source": [
381 | "x_train[0]"
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "# Normalize data\n",
389 | "normalization usually helps faster learning speed, better performance \n",
390 | "by reducing variance and giving same range to all input features. \n",
391 | "since MNIST data set all input has 0 to 255, normalization only helps reducing variances. \n",
392 | "it turned out normalization is better than standardization for MNIST data with my MLP architeture, \n",
393 | "I believe this is because relu handles 0 differently on both feed forward and back propagation. \n",
394 | "handling 0 differently is important for MNIST, since 1-255 means there is some hand written, \n",
395 | "while 0 means no hand written on that pixel."
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 14,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "x_train = x_train.astype('float32')\n",
405 | "x_val = x_val.astype('float32')\n",
406 | "x_test = x_test.astype('float32')\n",
407 | "\n",
408 | "gray_scale = 255\n",
409 | "x_train /= gray_scale\n",
410 | "x_val /= gray_scale\n",
411 | "x_test /= gray_scale"
412 | ]
413 | },
414 | {
415 | "cell_type": "markdown",
416 | "metadata": {},
417 | "source": [
418 | "# label to one hot encoding value"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 15,
424 | "metadata": {},
425 | "outputs": [],
426 | "source": [
427 | "num_classes = 10\n",
428 | "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n",
429 | "y_val = tf.keras.utils.to_categorical(y_val, num_classes)\n",
430 | "y_test = tf.keras.utils.to_categorical(y_test, num_classes)"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 16,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "data": {
440 | "text/plain": [
441 | "array([[0., 0., 0., ..., 0., 0., 0.],\n",
442 | " [1., 0., 0., ..., 0., 0., 0.],\n",
443 | " [0., 0., 0., ..., 0., 0., 0.],\n",
444 | " ...,\n",
445 | " [0., 0., 0., ..., 0., 1., 0.],\n",
446 | " [0., 0., 0., ..., 0., 0., 0.],\n",
447 | " [0., 0., 0., ..., 0., 1., 0.]])"
448 | ]
449 | },
450 | "execution_count": 16,
451 | "metadata": {},
452 | "output_type": "execute_result"
453 | }
454 | ],
455 | "source": [
456 | "y_train"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {},
462 | "source": [
463 | "# Tensorflow MLP Graph\n",
464 | "Let's implement the MLP graph with Tensorflow"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 17,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/html": [
475 | "
"
476 | ],
477 | "text/plain": [
478 | ""
479 | ]
480 | },
481 | "execution_count": 17,
482 | "metadata": {},
483 | "output_type": "execute_result"
484 | }
485 | ],
486 | "source": [
487 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png\", width=500, height=250)"
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": 18,
493 | "metadata": {},
494 | "outputs": [],
495 | "source": [
496 | "x = tf.placeholder(tf.float32, [None, 784])\n",
497 | "y = tf.placeholder(tf.float32, [None, 10])"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 19,
503 | "metadata": {},
504 | "outputs": [],
505 | "source": [
506 | "def mlp(x):\n",
507 | " # hidden layer1\n",
508 | " w1 = tf.Variable(tf.random_uniform([784,256]))\n",
509 | " b1 = tf.Variable(tf.zeros([256]))\n",
510 | " h1 = tf.nn.relu(tf.matmul(x, w1) + b1)\n",
511 | " # hidden layer2\n",
512 | " w2 = tf.Variable(tf.random_uniform([256,128]))\n",
513 | " b2 = tf.Variable(tf.zeros([128]))\n",
514 | " h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)\n",
515 | " # output layer\n",
516 | " w3 = tf.Variable(tf.random_uniform([128,10]))\n",
517 | " b3 = tf.Variable(tf.zeros([10]))\n",
518 | " logits= tf.matmul(h2, w3) + b3\n",
519 | " \n",
520 | " return logits"
521 | ]
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": 20,
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "logits = mlp(x)"
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": 21,
535 | "metadata": {},
536 | "outputs": [],
537 | "source": [
538 | "loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\n",
539 | " logits=logits, labels=y))"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": 22,
545 | "metadata": {},
546 | "outputs": [],
547 | "source": [
548 | "train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss_op)"
549 | ]
550 | },
551 | {
552 | "cell_type": "markdown",
553 | "metadata": {},
554 | "source": [
555 | "# Early Stopping\n",
556 | "When validation accuracy doesn't improve while train accuracy keep improves, \n",
557 | "we can early stop the train in order to avoid overfitting."
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 23,
563 | "metadata": {},
564 | "outputs": [
565 | {
566 | "data": {
567 | "text/html": [
568 | "
"
569 | ],
570 | "text/plain": [
571 | ""
572 | ]
573 | },
574 | "execution_count": 23,
575 | "metadata": {},
576 | "output_type": "execute_result"
577 | }
578 | ],
579 | "source": [
580 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/early_stop.png\", width=500, height=250)"
581 | ]
582 | },
583 | {
584 | "cell_type": "code",
585 | "execution_count": 24,
586 | "metadata": {},
587 | "outputs": [],
588 | "source": [
589 | "# initialize\n",
590 | "init = tf.global_variables_initializer()\n",
591 | "\n",
592 | "# Add ops to save and restore all the variables.\n",
593 | "saver = tf.train.Saver()\n",
594 | "\n",
595 | "# train hyperparameters\n",
596 | "epoch_cnt = 300\n",
597 | "batch_size = 1000\n",
598 | "iteration = len(x_train) // batch_size\n",
599 | "\n",
600 | "earlystop_threshold = 5\n",
601 | "earlystop_cnt = 0"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "execution_count": 25,
607 | "metadata": {},
608 | "outputs": [
609 | {
610 | "name": "stdout",
611 | "output_type": "stream",
612 | "text": [
613 | "epoch: 0, train acc: 0.2051, val acc: 0.2073\n",
614 | "epoch: 1, train acc: 0.71618, val acc: 0.7463\n",
615 | "epoch: 2, train acc: 0.85374, val acc: 0.8714\n",
616 | "epoch: 3, train acc: 0.86732, val acc: 0.8775\n",
617 | "epoch: 4, train acc: 0.87164, val acc: 0.8805\n",
618 | "epoch: 5, train acc: 0.88568, val acc: 0.8936\n",
619 | "epoch: 6, train acc: 0.89468, val acc: 0.9053\n",
620 | "epoch: 7, train acc: 0.89692, val acc: 0.9068\n",
621 | "epoch: 8, train acc: 0.88106, val acc: 0.893\n",
622 | "epoch: 9, train acc: 0.87626, val acc: 0.8858\n",
623 | "epoch: 10, train acc: 0.8929, val acc: 0.8976\n",
624 | "overfitting warning: 0\n",
625 | "epoch: 11, train acc: 0.90986, val acc: 0.9135\n",
626 | "epoch: 12, train acc: 0.60314, val acc: 0.6213\n",
627 | "epoch: 13, train acc: 0.89902, val acc: 0.9051\n",
628 | "overfitting warning: 0\n",
629 | "epoch: 14, train acc: 0.90816, val acc: 0.9083\n",
630 | "overfitting warning: 1\n",
631 | "epoch: 15, train acc: 0.90432, val acc: 0.9064\n",
632 | "epoch: 16, train acc: 0.91086, val acc: 0.9091\n",
633 | "overfitting warning: 0\n",
634 | "epoch: 17, train acc: 0.90958, val acc: 0.9075\n",
635 | "epoch: 18, train acc: 0.8864, val acc: 0.8913\n",
636 | "epoch: 19, train acc: 0.90518, val acc: 0.9037\n",
637 | "overfitting warning: 0\n",
638 | "epoch: 20, train acc: 0.91278, val acc: 0.912\n",
639 | "overfitting warning: 1\n",
640 | "epoch: 21, train acc: 0.9124, val acc: 0.9124\n",
641 | "epoch: 22, train acc: 0.89056, val acc: 0.8923\n",
642 | "epoch: 23, train acc: 0.92114, val acc: 0.9195\n",
643 | "epoch: 24, train acc: 0.89118, val acc: 0.8911\n",
644 | "epoch: 25, train acc: 0.91298, val acc: 0.9142\n",
645 | "overfitting warning: 0\n",
646 | "epoch: 26, train acc: 0.91998, val acc: 0.9144\n",
647 | "overfitting warning: 1\n",
648 | "epoch: 27, train acc: 0.91468, val acc: 0.9109\n",
649 | "epoch: 28, train acc: 0.9189, val acc: 0.9142\n",
650 | "overfitting warning: 0\n",
651 | "epoch: 29, train acc: 0.912, val acc: 0.9079\n",
652 | "epoch: 30, train acc: 0.92502, val acc: 0.92\n",
653 | "epoch: 31, train acc: 0.92216, val acc: 0.9144\n",
654 | "epoch: 32, train acc: 0.93306, val acc: 0.9253\n",
655 | "epoch: 33, train acc: 0.92712, val acc: 0.9203\n",
656 | "epoch: 34, train acc: 0.93412, val acc: 0.9226\n",
657 | "overfitting warning: 0\n",
658 | "epoch: 35, train acc: 0.92746, val acc: 0.9172\n",
659 | "epoch: 36, train acc: 0.93478, val acc: 0.9231\n",
660 | "overfitting warning: 0\n",
661 | "epoch: 37, train acc: 0.93384, val acc: 0.9263\n",
662 | "epoch: 38, train acc: 0.93596, val acc: 0.9237\n",
663 | "overfitting warning: 0\n",
664 | "epoch: 39, train acc: 0.91672, val acc: 0.907\n",
665 | "epoch: 40, train acc: 0.92634, val acc: 0.9168\n",
666 | "overfitting warning: 0\n",
667 | "epoch: 41, train acc: 0.94104, val acc: 0.9286\n",
668 | "epoch: 42, train acc: 0.91084, val acc: 0.9037\n",
669 | "epoch: 43, train acc: 0.93574, val acc: 0.9221\n",
670 | "overfitting warning: 0\n",
671 | "epoch: 44, train acc: 0.95308, val acc: 0.938\n",
672 | "epoch: 45, train acc: 0.93942, val acc: 0.9265\n",
673 | "epoch: 46, train acc: 0.92026, val acc: 0.9082\n",
674 | "epoch: 47, train acc: 0.95724, val acc: 0.9394\n",
675 | "epoch: 48, train acc: 0.95712, val acc: 0.9398\n",
676 | "epoch: 49, train acc: 0.95174, val acc: 0.9324\n",
677 | "epoch: 50, train acc: 0.95126, val acc: 0.929\n",
678 | "epoch: 51, train acc: 0.9623, val acc: 0.9418\n",
679 | "epoch: 52, train acc: 0.95804, val acc: 0.9384\n",
680 | "epoch: 53, train acc: 0.96124, val acc: 0.9406\n",
681 | "overfitting warning: 0\n",
682 | "epoch: 54, train acc: 0.9504, val acc: 0.928\n",
683 | "epoch: 55, train acc: 0.95066, val acc: 0.9315\n",
684 | "overfitting warning: 0\n",
685 | "epoch: 56, train acc: 0.95204, val acc: 0.9323\n",
686 | "overfitting warning: 1\n",
687 | "epoch: 57, train acc: 0.9556, val acc: 0.9345\n",
688 | "overfitting warning: 2\n",
689 | "epoch: 58, train acc: 0.97104, val acc: 0.9491\n",
690 | "epoch: 59, train acc: 0.96274, val acc: 0.9414\n",
691 | "epoch: 60, train acc: 0.95818, val acc: 0.9363\n",
692 | "epoch: 61, train acc: 0.96462, val acc: 0.9403\n",
693 | "overfitting warning: 0\n",
694 | "epoch: 62, train acc: 0.97752, val acc: 0.952\n",
695 | "epoch: 63, train acc: 0.97924, val acc: 0.9516\n",
696 | "overfitting warning: 0\n",
697 | "epoch: 64, train acc: 0.96168, val acc: 0.9397\n",
698 | "epoch: 65, train acc: 0.97306, val acc: 0.949\n",
699 | "overfitting warning: 0\n",
700 | "epoch: 66, train acc: 0.97876, val acc: 0.9525\n",
701 | "epoch: 67, train acc: 0.98102, val acc: 0.9528\n",
702 | "epoch: 68, train acc: 0.9851, val acc: 0.9549\n",
703 | "epoch: 69, train acc: 0.9785, val acc: 0.9501\n",
704 | "epoch: 70, train acc: 0.97738, val acc: 0.9498\n",
705 | "epoch: 71, train acc: 0.97242, val acc: 0.9477\n",
706 | "epoch: 72, train acc: 0.97876, val acc: 0.9497\n",
707 | "overfitting warning: 0\n",
708 | "epoch: 73, train acc: 0.98862, val acc: 0.9584\n",
709 | "epoch: 74, train acc: 0.98528, val acc: 0.956\n",
710 | "epoch: 75, train acc: 0.97704, val acc: 0.9492\n",
711 | "epoch: 76, train acc: 0.98174, val acc: 0.9525\n",
712 | "overfitting warning: 0\n",
713 | "epoch: 77, train acc: 0.9823, val acc: 0.9541\n",
714 | "overfitting warning: 1\n",
715 | "epoch: 78, train acc: 0.98594, val acc: 0.9558\n",
716 | "overfitting warning: 2\n",
717 | "epoch: 79, train acc: 0.98658, val acc: 0.9578\n",
718 | "overfitting warning: 3\n",
719 | "epoch: 80, train acc: 0.98428, val acc: 0.9566\n",
720 | "epoch: 81, train acc: 0.98738, val acc: 0.9596\n",
721 | "epoch: 82, train acc: 0.9808, val acc: 0.9518\n",
722 | "epoch: 83, train acc: 0.97794, val acc: 0.9489\n",
723 | "epoch: 84, train acc: 0.98024, val acc: 0.9492\n",
724 | "overfitting warning: 0\n",
725 | "epoch: 85, train acc: 0.99022, val acc: 0.9569\n",
726 | "overfitting warning: 1\n",
727 | "epoch: 86, train acc: 0.9932, val acc: 0.9604\n",
728 | "epoch: 87, train acc: 0.99268, val acc: 0.9597\n",
729 | "overfitting warning: 0\n",
730 | "epoch: 88, train acc: 0.99124, val acc: 0.9579\n",
731 | "overfitting warning: 1\n",
732 | "epoch: 89, train acc: 0.99336, val acc: 0.9606\n",
733 | "epoch: 90, train acc: 0.99146, val acc: 0.9573\n",
734 | "overfitting warning: 0\n",
735 | "epoch: 91, train acc: 0.99264, val acc: 0.9614\n",
736 | "epoch: 92, train acc: 0.99172, val acc: 0.9597\n",
737 | "overfitting warning: 0\n",
738 | "epoch: 93, train acc: 0.99026, val acc: 0.9575\n",
739 | "overfitting warning: 1\n",
740 | "epoch: 94, train acc: 0.9983, val acc: 0.9656\n",
741 | "epoch: 95, train acc: 0.99646, val acc: 0.9643\n",
742 | "overfitting warning: 0\n",
743 | "epoch: 96, train acc: 0.99716, val acc: 0.966\n",
744 | "epoch: 97, train acc: 0.99562, val acc: 0.9632\n",
745 | "overfitting warning: 0\n",
746 | "epoch: 98, train acc: 0.9987, val acc: 0.9647\n",
747 | "overfitting warning: 1\n",
748 | "epoch: 99, train acc: 0.99992, val acc: 0.967\n",
749 | "epoch: 100, train acc: 0.99984, val acc: 0.9669\n",
750 | "overfitting warning: 0\n",
751 | "epoch: 101, train acc: 0.99996, val acc: 0.9679\n",
752 | "epoch: 102, train acc: 0.99996, val acc: 0.9673\n",
753 | "overfitting warning: 0\n",
754 | "epoch: 103, train acc: 1.0, val acc: 0.9682\n",
755 | "epoch: 104, train acc: 1.0, val acc: 0.9684\n",
756 | "epoch: 105, train acc: 1.0, val acc: 0.9684\n",
757 | "epoch: 106, train acc: 1.0, val acc: 0.9685\n",
758 | "epoch: 107, train acc: 1.0, val acc: 0.9684\n",
759 | "overfitting warning: 0\n",
760 | "epoch: 108, train acc: 1.0, val acc: 0.9685\n",
761 | "epoch: 109, train acc: 1.0, val acc: 0.9685\n",
762 | "epoch: 110, train acc: 1.0, val acc: 0.9685\n",
763 | "epoch: 111, train acc: 1.0, val acc: 0.9685\n",
764 | "epoch: 112, train acc: 1.0, val acc: 0.9685\n",
765 | "epoch: 113, train acc: 1.0, val acc: 0.9685\n",
766 | "epoch: 114, train acc: 1.0, val acc: 0.9685\n",
767 | "epoch: 115, train acc: 1.0, val acc: 0.9685\n",
768 | "epoch: 116, train acc: 1.0, val acc: 0.9685\n",
769 | "epoch: 117, train acc: 1.0, val acc: 0.9685\n",
770 | "epoch: 118, train acc: 1.0, val acc: 0.9685\n",
771 | "epoch: 119, train acc: 1.0, val acc: 0.9686\n",
772 | "epoch: 120, train acc: 1.0, val acc: 0.9686\n",
773 | "epoch: 121, train acc: 1.0, val acc: 0.9687\n",
774 | "epoch: 122, train acc: 1.0, val acc: 0.9689\n",
775 | "epoch: 123, train acc: 1.0, val acc: 0.9688\n",
776 | "overfitting warning: 0\n",
777 | "epoch: 124, train acc: 1.0, val acc: 0.9689\n",
778 | "epoch: 125, train acc: 1.0, val acc: 0.9689\n",
779 | "epoch: 126, train acc: 1.0, val acc: 0.969\n",
780 | "epoch: 127, train acc: 1.0, val acc: 0.969\n",
781 | "epoch: 128, train acc: 1.0, val acc: 0.969\n",
782 | "epoch: 129, train acc: 1.0, val acc: 0.969\n",
783 | "epoch: 130, train acc: 1.0, val acc: 0.969\n",
784 | "epoch: 131, train acc: 1.0, val acc: 0.969\n",
785 | "epoch: 132, train acc: 1.0, val acc: 0.969\n",
786 | "epoch: 133, train acc: 1.0, val acc: 0.969\n",
787 | "epoch: 134, train acc: 1.0, val acc: 0.9692\n",
788 | "epoch: 135, train acc: 1.0, val acc: 0.9693\n",
789 | "epoch: 136, train acc: 1.0, val acc: 0.9693\n",
790 | "epoch: 137, train acc: 1.0, val acc: 0.9693\n",
791 | "epoch: 138, train acc: 1.0, val acc: 0.9695\n",
792 | "epoch: 139, train acc: 1.0, val acc: 0.9694\n",
793 | "overfitting warning: 0\n",
794 | "epoch: 140, train acc: 1.0, val acc: 0.9694\n",
795 | "overfitting warning: 1\n",
796 | "epoch: 141, train acc: 1.0, val acc: 0.9694\n",
797 | "overfitting warning: 2\n",
798 | "epoch: 142, train acc: 1.0, val acc: 0.9694\n",
799 | "overfitting warning: 3\n",
800 | "epoch: 143, train acc: 1.0, val acc: 0.9694\n",
801 | "overfitting warning: 4\n",
802 | "epoch: 144, train acc: 1.0, val acc: 0.9694\n",
803 | "early stopped on 144\n"
804 | ]
805 | }
806 | ],
807 | "source": [
808 | "# Start training\n",
809 | "with tf.Session() as sess:\n",
810 | " # Run the initializer\n",
811 | " sess.run(init)\n",
812 | " prev_train_acc = 0.0\n",
813 | " max_val_acc = 0.0\n",
814 | " \n",
815 | " for epoch in range(epoch_cnt):\n",
816 | " avg_loss = 0.\n",
817 | " start = 0; end = batch_size\n",
818 | " \n",
819 | " for i in range(iteration):\n",
820 | " _, loss = sess.run([train_op, loss_op], \n",
821 | " feed_dict={x: x_train[start: end], y: y_train[start: end]})\n",
822 | " start += batch_size; end += batch_size\n",
823 | " # Compute train average loss\n",
824 | " avg_loss += loss / iteration\n",
825 | " \n",
826 | " # Validate model\n",
827 | " preds = tf.nn.softmax(logits) # Apply softmax to logits\n",
828 | " correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))\n",
829 | " # Calculate accuracy\n",
830 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
831 | " # train accuracy\n",
832 | " cur_train_acc = accuracy.eval({x: x_train, y: y_train})\n",
833 | " # validation accuarcy\n",
834 | " cur_val_acc = accuracy.eval({x: x_val, y: y_val})\n",
835 | " # validation loss\n",
836 | " cur_val_loss = loss_op.eval({x: x_val, y: y_val})\n",
837 | " \n",
838 | " print(\"epoch: \"+str(epoch)+\n",
839 | " \", train acc: \" + str(cur_train_acc) +\n",
840 | " \", val acc: \" + str(cur_val_acc) )\n",
841 | " #', train loss: '+str(avg_loss)+\n",
842 | " #', val loss: '+str(cur_val_loss))\n",
843 | " \n",
844 | " if cur_val_acc < max_val_acc:\n",
845 | " if cur_train_acc > prev_train_acc or cur_train_acc > 0.99:\n",
846 | " if earlystop_cnt == earlystop_threshold:\n",
847 | " print(\"early stopped on \"+str(epoch))\n",
848 | " break\n",
849 | " else:\n",
850 | " print(\"overfitting warning: \"+str(earlystop_cnt))\n",
851 | " earlystop_cnt += 1\n",
852 | " else:\n",
853 | " earlystop_cnt = 0\n",
854 | " else:\n",
855 | " earlystop_cnt = 0\n",
856 | " max_val_acc = cur_val_acc\n",
857 | " # Save the variables to file.\n",
858 | " save_path = saver.save(sess, \"model/model.ckpt\")\n",
859 | " prev_train_acc = cur_train_acc"
860 | ]
861 | },
862 | {
863 | "cell_type": "markdown",
864 | "metadata": {},
865 | "source": [
866 | "# Testing with the best epoch"
867 | ]
868 | },
869 | {
870 | "cell_type": "code",
871 | "execution_count": 26,
872 | "metadata": {},
873 | "outputs": [
874 | {
875 | "name": "stdout",
876 | "output_type": "stream",
877 | "text": [
878 | "INFO:tensorflow:Restoring parameters from model/model.ckpt\n",
879 | "[Test Accuracy] : 0.9673\n"
880 | ]
881 | }
882 | ],
883 | "source": [
884 | "# Start testing\n",
885 | "with tf.Session() as sess:\n",
886 | " # Restore variables from disk.\n",
887 | " saver.restore(sess, \"model/model.ckpt\")\n",
888 | " correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))\n",
889 | " # Calculate accuracy\n",
890 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
891 | " print(\"[Test Accuracy] :\", accuracy.eval({x: x_test, y: y_test}))"
892 | ]
893 | }
894 | ],
895 | "metadata": {
896 | "kernelspec": {
897 | "display_name": "Python 3",
898 | "language": "python",
899 | "name": "python3"
900 | },
901 | "language_info": {
902 | "codemirror_mode": {
903 | "name": "ipython",
904 | "version": 3
905 | },
906 | "file_extension": ".py",
907 | "mimetype": "text/x-python",
908 | "name": "python",
909 | "nbconvert_exporter": "python",
910 | "pygments_lexer": "ipython3",
911 | "version": "3.6.4"
912 | }
913 | },
914 | "nbformat": 4,
915 | "nbformat_minor": 2
916 | }
917 |
--------------------------------------------------------------------------------
/src/MLP_MNIST_Tensorflow_Early_Stopping_DropOut.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MLP (MNIST, Tensorflow) with Early Stopping, DropOut\n",
8 | "In this tutorial, we will apply early stopping, dropout on MNIST MLP tensorflow code. "
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import tensorflow as tf\n",
18 | "import numpy as np\n",
19 | "from IPython.display import Image"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# MLP Architecture\n",
27 | "here is the overview of MLP architecture we will implement with Tensorflow"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "
"
39 | ],
40 | "text/plain": [
41 | ""
42 | ]
43 | },
44 | "execution_count": 2,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/dropout.png\", width=500, height=250)"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "# Collect MNIST Data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "name": "stdout",
76 | "output_type": "stream",
77 | "text": [
78 | "(60000, 28, 28)\n",
79 | "(10000, 28, 28)\n"
80 | ]
81 | }
82 | ],
83 | "source": [
84 | "print(x_train.shape)\n",
85 | "print(x_test.shape)"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "train data has **60000** samples \n",
93 | "test data has **10000** samples \n",
94 | "every data is **28 * 28** pixels \n",
95 | "\n",
96 | "below image shows 28*28 pixel image sample for hand written number '0' from MNIST data. \n",
97 | "MNIST is gray scale image [0 to 255] for hand written number."
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | ""
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "# Split train data into train and validation data\n",
112 | "Validation during training gives advantages below, \n",
113 | "1) check if train goes well based on validation score \n",
114 | "2) apply **early stopping** when validation score doesn't improve while train score goes up (overcome **overfitting**)"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 5,
120 | "metadata": {},
121 | "outputs": [],
122 | "source": [
123 | "x_val = x_train[50000:60000]\n",
124 | "x_train = x_train[0:50000]\n",
125 | "y_val = y_train[50000:60000]\n",
126 | "y_train = y_train[0:50000]"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 6,
132 | "metadata": {},
133 | "outputs": [
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "train data has 50000 samples\n",
139 | "every train data is 28 * 28 image\n"
140 | ]
141 | }
142 | ],
143 | "source": [
144 | "print(\"train data has \" + str(x_train.shape[0]) + \" samples\")\n",
145 | "print(\"every train data is \" + str(x_train.shape[1]) \n",
146 | " + \" * \" + str(x_train.shape[2]) + \" image\")"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 7,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "validation data has 10000 samples\n",
159 | "every train data is 28 * 28 image\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "print(\"validation data has \" + str(x_val.shape[0]) + \" samples\")\n",
165 | "print(\"every train data is \" + str(x_val.shape[1]) \n",
166 | " + \" * \" + str(x_train.shape[2]) + \" image\")"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "28 * 28 pixels has gray scale value from **0** to **255**"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 8,
179 | "metadata": {},
180 | "outputs": [
181 | {
182 | "name": "stdout",
183 | "output_type": "stream",
184 | "text": [
185 | "[ 0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241\n",
186 | " 0 0 0 0 0 0 0 0 0 0]\n"
187 | ]
188 | }
189 | ],
190 | "source": [
191 | "# sample to show gray scale values\n",
192 | "print(x_train[0][8])"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "each train data has its label **0** to **9**"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 9,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "[5 0 4 1 9 2 1 3 1]\n"
212 | ]
213 | }
214 | ],
215 | "source": [
216 | "# sample to show labels for first train data to 10th train data\n",
217 | "print(y_train[0:9])"
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "test data has **10000** samples \n",
225 | "every test data is **28 * 28** image "
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 10,
231 | "metadata": {},
232 | "outputs": [
233 | {
234 | "name": "stdout",
235 | "output_type": "stream",
236 | "text": [
237 | "test data has 10000 samples\n",
238 | "every test data is 28 * 28 image\n"
239 | ]
240 | }
241 | ],
242 | "source": [
243 | "print(\"test data has \" + str(x_test.shape[0]) + \" samples\")\n",
244 | "print(\"every test data is \" + str(x_test.shape[1]) \n",
245 | " + \" * \" + str(x_test.shape[2]) + \" image\")"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "# Reshape\n",
253 | "In order to fully connect all pixels to hidden layer, \n",
254 | "we will reshape (28, 28) into (28x28,1) shape. \n",
255 | "It means we flatten row x column shape to an array having 28x28 (756) items."
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 11,
261 | "metadata": {},
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/html": [
266 | "
"
267 | ],
268 | "text/plain": [
269 | ""
270 | ]
271 | },
272 | "execution_count": 11,
273 | "metadata": {},
274 | "output_type": "execute_result"
275 | }
276 | ],
277 | "source": [
278 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/reshape_mnist.png\", width=500, height=250)"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 12,
284 | "metadata": {},
285 | "outputs": [
286 | {
287 | "name": "stdout",
288 | "output_type": "stream",
289 | "text": [
290 | "(50000, 784)\n",
291 | "(10000, 784)\n"
292 | ]
293 | }
294 | ],
295 | "source": [
296 | "x_train = x_train.reshape(50000, 784)\n",
297 | "x_val = x_val.reshape(10000, 784)\n",
298 | "x_test = x_test.reshape(10000, 784)\n",
299 | "\n",
300 | "print(x_train.shape)\n",
301 | "print(x_test.shape)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 13,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
313 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
314 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
315 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
316 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
317 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
318 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
319 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
320 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
321 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
322 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
323 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18,\n",
324 | " 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0,\n",
325 | " 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253,\n",
326 | " 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0,\n",
327 | " 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253,\n",
328 | " 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0,\n",
329 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253,\n",
330 | " 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0,\n",
331 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
332 | " 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0,\n",
333 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
334 | " 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0,\n",
335 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
336 | " 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0,\n",
337 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
338 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70,\n",
339 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
340 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35,\n",
341 | " 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
342 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
343 | " 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0,\n",
344 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
345 | " 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0,\n",
346 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
347 | " 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187,\n",
348 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
349 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249,\n",
350 | " 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
351 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130,\n",
352 | " 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0,\n",
353 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148,\n",
354 | " 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0,\n",
355 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114,\n",
356 | " 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0,\n",
357 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66,\n",
358 | " 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0,\n",
359 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171,\n",
360 | " 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0,\n",
361 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172,\n",
362 | " 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0,\n",
363 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
364 | " 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0,\n",
365 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
366 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
367 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
368 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
369 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
370 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
371 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
372 | " 0, 0, 0, 0], dtype=uint8)"
373 | ]
374 | },
375 | "execution_count": 13,
376 | "metadata": {},
377 | "output_type": "execute_result"
378 | }
379 | ],
380 | "source": [
381 | "x_train[0]"
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "# Normalize data\n",
389 | "normalization usually helps faster learning speed, better performance \n",
390 | "by reducing variance and giving same range to all input features. \n",
391 | "since MNIST data set all input has 0 to 255, normalization only helps reducing variances. \n",
392 | "it turned out normalization is better than standardization for MNIST data with my MLP architeture, \n",
393 | "I believe this is because relu handles 0 differently on both feed forward and back propagation. \n",
394 | "handling 0 differently is important for MNIST, since 1-255 means there is some hand written, \n",
395 | "while 0 means no hand written on that pixel."
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 14,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "x_train = x_train.astype('float32')\n",
405 | "x_val = x_val.astype('float32')\n",
406 | "x_test = x_test.astype('float32')\n",
407 | "\n",
408 | "gray_scale = 255\n",
409 | "x_train /= gray_scale\n",
410 | "x_val /= gray_scale\n",
411 | "x_test /= gray_scale"
412 | ]
413 | },
414 | {
415 | "cell_type": "markdown",
416 | "metadata": {},
417 | "source": [
418 | "# label to one hot encoding value"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 15,
424 | "metadata": {},
425 | "outputs": [],
426 | "source": [
427 | "num_classes = 10\n",
428 | "y_train = tf.keras.utils.to_categorical(y_train, num_classes)\n",
429 | "y_val = tf.keras.utils.to_categorical(y_val, num_classes)\n",
430 | "y_test = tf.keras.utils.to_categorical(y_test, num_classes)"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 16,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "data": {
440 | "text/plain": [
441 | "array([[0., 0., 0., ..., 0., 0., 0.],\n",
442 | " [1., 0., 0., ..., 0., 0., 0.],\n",
443 | " [0., 0., 0., ..., 0., 0., 0.],\n",
444 | " ...,\n",
445 | " [0., 0., 0., ..., 0., 1., 0.],\n",
446 | " [0., 0., 0., ..., 0., 0., 0.],\n",
447 | " [0., 0., 0., ..., 0., 1., 0.]])"
448 | ]
449 | },
450 | "execution_count": 16,
451 | "metadata": {},
452 | "output_type": "execute_result"
453 | }
454 | ],
455 | "source": [
456 | "y_train"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {},
462 | "source": [
463 | "# Tensorflow MLP Graph\n",
464 | "Let's implement the MLP graph with Tensorflow"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 17,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/html": [
475 | "
"
476 | ],
477 | "text/plain": [
478 | ""
479 | ]
480 | },
481 | "execution_count": 17,
482 | "metadata": {},
483 | "output_type": "execute_result"
484 | }
485 | ],
486 | "source": [
487 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/simple_mlp_mnist.png\", width=500, height=250)"
488 | ]
489 | },
490 | {
491 | "cell_type": "markdown",
492 | "metadata": {},
493 | "source": [
494 | "# Drop Out\n",
495 | "We will apply dropout at the last hidden layer. \n",
496 | "Dropout is a avoiding overfitting technic by randomly deactivate some nodes in the hidden layer. \n",
497 | "Dropout has ensemble effect since every mini batch has different activated nodes. \n",
498 | "Adjusting variables in the deep learning model with drop out mini batch result is similar to random forest approach. \n",
499 | "keep_prob is the tensorflow placeholder for dropout ratio. "
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": 18,
505 | "metadata": {},
506 | "outputs": [],
507 | "source": [
508 | "x = tf.placeholder(tf.float32, [None, 784])\n",
509 | "y = tf.placeholder(tf.float32, [None, 10])\n",
510 | "keep_prob = tf.placeholder(tf.float32)"
511 | ]
512 | },
513 | {
514 | "cell_type": "markdown",
515 | "metadata": {},
516 | "source": [
517 | "In the hidden layer2, you can see we use dropout. \n",
518 | "keep_prob will be filled when we train or test."
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": 19,
524 | "metadata": {},
525 | "outputs": [],
526 | "source": [
527 | "def mlp(x):\n",
528 | " # hidden layer1\n",
529 | " w1 = tf.Variable(tf.random_uniform([784,256]))\n",
530 | " b1 = tf.Variable(tf.zeros([256]))\n",
531 | " h1 = tf.nn.relu(tf.matmul(x, w1) + b1)\n",
532 | " # hidden layer2\n",
533 | " w2 = tf.Variable(tf.random_uniform([256,128]))\n",
534 | " b2 = tf.Variable(tf.zeros([128]))\n",
535 | " h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)\n",
536 | " h2_drop = tf.nn.dropout(h2, keep_prob)\n",
537 | " # output layer\n",
538 | " w3 = tf.Variable(tf.random_uniform([128,10]))\n",
539 | " b3 = tf.Variable(tf.zeros([10]))\n",
540 | " logits= tf.matmul(h2_drop, w3) + b3\n",
541 | " \n",
542 | " return logits"
543 | ]
544 | },
545 | {
546 | "cell_type": "code",
547 | "execution_count": 20,
548 | "metadata": {},
549 | "outputs": [],
550 | "source": [
551 | "logits = mlp(x)"
552 | ]
553 | },
554 | {
555 | "cell_type": "code",
556 | "execution_count": 21,
557 | "metadata": {},
558 | "outputs": [],
559 | "source": [
560 | "loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\n",
561 | " logits=logits, labels=y))"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 22,
567 | "metadata": {},
568 | "outputs": [],
569 | "source": [
570 | "train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss_op)"
571 | ]
572 | },
573 | {
574 | "cell_type": "markdown",
575 | "metadata": {},
576 | "source": [
577 | "# Early Stopping\n",
578 | "When validation accuracy doesn't improve while train accuracy keep improves, \n",
579 | "we can early stop the train in order to avoid overfitting."
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 23,
585 | "metadata": {},
586 | "outputs": [
587 | {
588 | "data": {
589 | "text/html": [
590 | "
"
591 | ],
592 | "text/plain": [
593 | ""
594 | ]
595 | },
596 | "execution_count": 23,
597 | "metadata": {},
598 | "output_type": "execute_result"
599 | }
600 | ],
601 | "source": [
602 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/early_stop.png\", width=500, height=250)"
603 | ]
604 | },
605 | {
606 | "cell_type": "code",
607 | "execution_count": 24,
608 | "metadata": {},
609 | "outputs": [],
610 | "source": [
611 | "# initialize\n",
612 | "init = tf.global_variables_initializer()\n",
613 | "\n",
614 | "# Add ops to save and restore all the variables.\n",
615 | "saver = tf.train.Saver()\n",
616 | "\n",
617 | "# train hyperparameters\n",
618 | "epoch_cnt = 300\n",
619 | "batch_size = 1000\n",
620 | "iteration = len(x_train) // batch_size\n",
621 | "\n",
622 | "earlystop_threshold = 5\n",
623 | "earlystop_cnt = 0"
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {},
629 | "source": [
630 | "Whenever we feed data, we must make sure to fill keep_prob for drop out ratio. \n",
631 | "I used 0.9 for train, since I want to dropout 10% of nodes in the hidden layer2. \n",
632 | "I used 1.0 for test, since I don't want to dropout during testing."
633 | ]
634 | },
635 | {
636 | "cell_type": "code",
637 | "execution_count": 25,
638 | "metadata": {},
639 | "outputs": [
640 | {
641 | "name": "stdout",
642 | "output_type": "stream",
643 | "text": [
644 | "epoch: 0, train acc: 0.14934, val acc: 0.1518\n",
645 | "epoch: 1, train acc: 0.5617, val acc: 0.585\n",
646 | "epoch: 2, train acc: 0.57466, val acc: 0.5973\n",
647 | "epoch: 3, train acc: 0.62728, val acc: 0.6573\n",
648 | "epoch: 4, train acc: 0.66994, val acc: 0.6933\n",
649 | "epoch: 5, train acc: 0.71234, val acc: 0.734\n",
650 | "epoch: 6, train acc: 0.74974, val acc: 0.7668\n",
651 | "epoch: 7, train acc: 0.77604, val acc: 0.7897\n",
652 | "epoch: 8, train acc: 0.79734, val acc: 0.8072\n",
653 | "epoch: 9, train acc: 0.81464, val acc: 0.8209\n",
654 | "epoch: 10, train acc: 0.82668, val acc: 0.8333\n",
655 | "epoch: 11, train acc: 0.83912, val acc: 0.8436\n",
656 | "epoch: 12, train acc: 0.84832, val acc: 0.852\n",
657 | "epoch: 13, train acc: 0.8569, val acc: 0.8591\n",
658 | "epoch: 14, train acc: 0.86394, val acc: 0.8664\n",
659 | "epoch: 15, train acc: 0.87184, val acc: 0.8728\n",
660 | "epoch: 16, train acc: 0.87796, val acc: 0.878\n",
661 | "epoch: 17, train acc: 0.883, val acc: 0.8847\n",
662 | "epoch: 18, train acc: 0.88768, val acc: 0.8886\n",
663 | "epoch: 19, train acc: 0.89228, val acc: 0.894\n",
664 | "epoch: 20, train acc: 0.89612, val acc: 0.8959\n",
665 | "epoch: 21, train acc: 0.9003, val acc: 0.8985\n",
666 | "epoch: 22, train acc: 0.90356, val acc: 0.9004\n",
667 | "epoch: 23, train acc: 0.90676, val acc: 0.9027\n",
668 | "epoch: 24, train acc: 0.90954, val acc: 0.905\n",
669 | "epoch: 25, train acc: 0.91176, val acc: 0.9063\n",
670 | "epoch: 26, train acc: 0.91406, val acc: 0.9101\n",
671 | "epoch: 27, train acc: 0.9165, val acc: 0.9109\n",
672 | "epoch: 28, train acc: 0.91912, val acc: 0.9118\n",
673 | "epoch: 29, train acc: 0.92116, val acc: 0.9139\n",
674 | "epoch: 30, train acc: 0.92358, val acc: 0.9167\n",
675 | "epoch: 31, train acc: 0.92558, val acc: 0.9188\n",
676 | "epoch: 32, train acc: 0.92692, val acc: 0.9181\n",
677 | "overfitting warning: 0\n",
678 | "epoch: 33, train acc: 0.92858, val acc: 0.92\n",
679 | "epoch: 34, train acc: 0.93106, val acc: 0.9206\n",
680 | "epoch: 35, train acc: 0.93248, val acc: 0.9217\n",
681 | "epoch: 36, train acc: 0.9345, val acc: 0.923\n",
682 | "epoch: 37, train acc: 0.93538, val acc: 0.9227\n",
683 | "overfitting warning: 0\n",
684 | "epoch: 38, train acc: 0.93724, val acc: 0.9237\n",
685 | "epoch: 39, train acc: 0.93872, val acc: 0.9241\n",
686 | "epoch: 40, train acc: 0.93954, val acc: 0.9261\n",
687 | "epoch: 41, train acc: 0.9408, val acc: 0.9264\n",
688 | "epoch: 42, train acc: 0.94226, val acc: 0.9278\n",
689 | "epoch: 43, train acc: 0.94298, val acc: 0.9289\n",
690 | "epoch: 44, train acc: 0.94458, val acc: 0.93\n",
691 | "epoch: 45, train acc: 0.94582, val acc: 0.9312\n",
692 | "epoch: 46, train acc: 0.94666, val acc: 0.9313\n",
693 | "epoch: 47, train acc: 0.94762, val acc: 0.9314\n",
694 | "epoch: 48, train acc: 0.9493, val acc: 0.932\n",
695 | "epoch: 49, train acc: 0.95, val acc: 0.9324\n",
696 | "epoch: 50, train acc: 0.95104, val acc: 0.9323\n",
697 | "overfitting warning: 0\n",
698 | "epoch: 51, train acc: 0.95226, val acc: 0.9331\n",
699 | "epoch: 52, train acc: 0.9532, val acc: 0.9328\n",
700 | "overfitting warning: 0\n",
701 | "epoch: 53, train acc: 0.95378, val acc: 0.9341\n",
702 | "epoch: 54, train acc: 0.95542, val acc: 0.9353\n",
703 | "epoch: 55, train acc: 0.95566, val acc: 0.9344\n",
704 | "overfitting warning: 0\n",
705 | "epoch: 56, train acc: 0.95618, val acc: 0.9339\n",
706 | "overfitting warning: 1\n",
707 | "epoch: 57, train acc: 0.95778, val acc: 0.9354\n",
708 | "epoch: 58, train acc: 0.95866, val acc: 0.9356\n",
709 | "epoch: 59, train acc: 0.9585, val acc: 0.9351\n",
710 | "epoch: 60, train acc: 0.95996, val acc: 0.9357\n",
711 | "epoch: 61, train acc: 0.96062, val acc: 0.9358\n",
712 | "epoch: 62, train acc: 0.96002, val acc: 0.9349\n",
713 | "epoch: 63, train acc: 0.96178, val acc: 0.9356\n",
714 | "overfitting warning: 0\n",
715 | "epoch: 64, train acc: 0.96248, val acc: 0.9379\n",
716 | "epoch: 65, train acc: 0.96358, val acc: 0.9379\n",
717 | "epoch: 66, train acc: 0.9637, val acc: 0.9373\n",
718 | "overfitting warning: 0\n",
719 | "epoch: 67, train acc: 0.96504, val acc: 0.9374\n",
720 | "overfitting warning: 1\n",
721 | "epoch: 68, train acc: 0.96534, val acc: 0.9375\n",
722 | "overfitting warning: 2\n",
723 | "epoch: 69, train acc: 0.9657, val acc: 0.9378\n",
724 | "overfitting warning: 3\n",
725 | "epoch: 70, train acc: 0.96624, val acc: 0.9391\n",
726 | "epoch: 71, train acc: 0.9666, val acc: 0.9397\n",
727 | "epoch: 72, train acc: 0.968, val acc: 0.94\n",
728 | "epoch: 73, train acc: 0.96786, val acc: 0.9402\n",
729 | "epoch: 74, train acc: 0.96946, val acc: 0.941\n",
730 | "epoch: 75, train acc: 0.96952, val acc: 0.9414\n",
731 | "epoch: 76, train acc: 0.96924, val acc: 0.9401\n",
732 | "epoch: 77, train acc: 0.9698, val acc: 0.9397\n",
733 | "overfitting warning: 0\n",
734 | "epoch: 78, train acc: 0.9707, val acc: 0.9428\n",
735 | "epoch: 79, train acc: 0.9711, val acc: 0.9423\n",
736 | "overfitting warning: 0\n",
737 | "epoch: 80, train acc: 0.9727, val acc: 0.9431\n",
738 | "epoch: 81, train acc: 0.97234, val acc: 0.9434\n",
739 | "epoch: 82, train acc: 0.97266, val acc: 0.9425\n",
740 | "overfitting warning: 0\n",
741 | "epoch: 83, train acc: 0.97346, val acc: 0.9432\n",
742 | "overfitting warning: 1\n",
743 | "epoch: 84, train acc: 0.97446, val acc: 0.9438\n",
744 | "epoch: 85, train acc: 0.97454, val acc: 0.9428\n",
745 | "overfitting warning: 0\n",
746 | "epoch: 86, train acc: 0.97474, val acc: 0.9445\n",
747 | "epoch: 87, train acc: 0.97552, val acc: 0.945\n",
748 | "epoch: 88, train acc: 0.97578, val acc: 0.9431\n",
749 | "overfitting warning: 0\n",
750 | "epoch: 89, train acc: 0.9757, val acc: 0.9444\n",
751 | "epoch: 90, train acc: 0.9769, val acc: 0.9434\n",
752 | "overfitting warning: 0\n",
753 | "epoch: 91, train acc: 0.97744, val acc: 0.945\n",
754 | "epoch: 92, train acc: 0.9787, val acc: 0.9455\n",
755 | "epoch: 93, train acc: 0.97788, val acc: 0.9446\n",
756 | "epoch: 94, train acc: 0.97854, val acc: 0.9453\n",
757 | "overfitting warning: 0\n",
758 | "epoch: 95, train acc: 0.9785, val acc: 0.9457\n",
759 | "epoch: 96, train acc: 0.97986, val acc: 0.946\n",
760 | "epoch: 97, train acc: 0.97984, val acc: 0.946\n",
761 | "epoch: 98, train acc: 0.98062, val acc: 0.9462\n",
762 | "epoch: 99, train acc: 0.98024, val acc: 0.9469\n",
763 | "epoch: 100, train acc: 0.98068, val acc: 0.9471\n",
764 | "epoch: 101, train acc: 0.98176, val acc: 0.947\n",
765 | "overfitting warning: 0\n",
766 | "epoch: 102, train acc: 0.9814, val acc: 0.9471\n",
767 | "epoch: 103, train acc: 0.98242, val acc: 0.9487\n",
768 | "epoch: 104, train acc: 0.9824, val acc: 0.9471\n",
769 | "epoch: 105, train acc: 0.9818, val acc: 0.9485\n",
770 | "epoch: 106, train acc: 0.98302, val acc: 0.95\n",
771 | "epoch: 107, train acc: 0.98298, val acc: 0.9488\n",
772 | "epoch: 108, train acc: 0.98378, val acc: 0.9498\n",
773 | "overfitting warning: 0\n",
774 | "epoch: 109, train acc: 0.98264, val acc: 0.9482\n",
775 | "epoch: 110, train acc: 0.98354, val acc: 0.9485\n",
776 | "overfitting warning: 0\n",
777 | "epoch: 111, train acc: 0.98332, val acc: 0.9484\n",
778 | "epoch: 112, train acc: 0.98318, val acc: 0.9496\n",
779 | "epoch: 113, train acc: 0.98458, val acc: 0.9501\n",
780 | "epoch: 114, train acc: 0.98398, val acc: 0.9491\n",
781 | "epoch: 115, train acc: 0.9848, val acc: 0.9495\n",
782 | "overfitting warning: 0\n",
783 | "epoch: 116, train acc: 0.98544, val acc: 0.9491\n",
784 | "overfitting warning: 1\n",
785 | "epoch: 117, train acc: 0.98564, val acc: 0.9512\n",
786 | "epoch: 118, train acc: 0.98624, val acc: 0.9516\n",
787 | "epoch: 119, train acc: 0.98528, val acc: 0.9506\n",
788 | "epoch: 120, train acc: 0.98508, val acc: 0.9496\n",
789 | "epoch: 121, train acc: 0.9842, val acc: 0.9488\n",
790 | "epoch: 122, train acc: 0.98288, val acc: 0.9473\n",
791 | "epoch: 123, train acc: 0.98246, val acc: 0.948\n",
792 | "epoch: 124, train acc: 0.98518, val acc: 0.9516\n",
793 | "epoch: 125, train acc: 0.9846, val acc: 0.9498\n",
794 | "epoch: 126, train acc: 0.98614, val acc: 0.9497\n",
795 | "overfitting warning: 0\n",
796 | "epoch: 127, train acc: 0.98482, val acc: 0.9508\n",
797 | "epoch: 128, train acc: 0.98524, val acc: 0.9498\n",
798 | "overfitting warning: 0\n",
799 | "epoch: 129, train acc: 0.9863, val acc: 0.9501\n",
800 | "overfitting warning: 1\n",
801 | "epoch: 130, train acc: 0.98562, val acc: 0.9513\n",
802 | "epoch: 131, train acc: 0.98658, val acc: 0.9508\n",
803 | "overfitting warning: 0\n",
804 | "epoch: 132, train acc: 0.98414, val acc: 0.9493\n",
805 | "epoch: 133, train acc: 0.9857, val acc: 0.9487\n",
806 | "overfitting warning: 0\n",
807 | "epoch: 134, train acc: 0.9867, val acc: 0.9501\n",
808 | "overfitting warning: 1\n",
809 | "epoch: 135, train acc: 0.98732, val acc: 0.9519\n",
810 | "epoch: 136, train acc: 0.98798, val acc: 0.9506\n",
811 | "overfitting warning: 0\n",
812 | "epoch: 137, train acc: 0.98624, val acc: 0.9491\n",
813 | "epoch: 138, train acc: 0.987, val acc: 0.9527\n",
814 | "epoch: 139, train acc: 0.9863, val acc: 0.9523\n",
815 | "epoch: 140, train acc: 0.98806, val acc: 0.9548\n",
816 | "epoch: 141, train acc: 0.98792, val acc: 0.9526\n",
817 | "epoch: 142, train acc: 0.98784, val acc: 0.9519\n",
818 | "epoch: 143, train acc: 0.98722, val acc: 0.9518\n",
819 | "epoch: 144, train acc: 0.9884, val acc: 0.9514\n",
820 | "overfitting warning: 0\n",
821 | "epoch: 145, train acc: 0.98562, val acc: 0.952\n",
822 | "epoch: 146, train acc: 0.9823, val acc: 0.9485\n",
823 | "epoch: 147, train acc: 0.98936, val acc: 0.9536\n",
824 | "overfitting warning: 0\n",
825 | "epoch: 148, train acc: 0.98716, val acc: 0.953\n",
826 | "epoch: 149, train acc: 0.98628, val acc: 0.9525\n",
827 | "epoch: 150, train acc: 0.98576, val acc: 0.9521\n",
828 | "epoch: 151, train acc: 0.98664, val acc: 0.9494\n",
829 | "overfitting warning: 0\n",
830 | "epoch: 152, train acc: 0.9884, val acc: 0.9552\n",
831 | "epoch: 153, train acc: 0.9892, val acc: 0.9531\n",
832 | "overfitting warning: 0\n",
833 | "epoch: 154, train acc: 0.98852, val acc: 0.9528\n",
834 | "epoch: 155, train acc: 0.98786, val acc: 0.9529\n",
835 | "epoch: 156, train acc: 0.98874, val acc: 0.9538\n",
836 | "overfitting warning: 0\n",
837 | "epoch: 157, train acc: 0.98928, val acc: 0.9549\n",
838 | "overfitting warning: 1\n"
839 | ]
840 | },
841 | {
842 | "name": "stdout",
843 | "output_type": "stream",
844 | "text": [
845 | "epoch: 158, train acc: 0.99028, val acc: 0.9553\n",
846 | "epoch: 159, train acc: 0.99028, val acc: 0.9561\n",
847 | "epoch: 160, train acc: 0.99018, val acc: 0.9562\n",
848 | "epoch: 161, train acc: 0.9893, val acc: 0.9547\n",
849 | "epoch: 162, train acc: 0.98958, val acc: 0.9554\n",
850 | "overfitting warning: 0\n",
851 | "epoch: 163, train acc: 0.98898, val acc: 0.9534\n",
852 | "epoch: 164, train acc: 0.98744, val acc: 0.9508\n",
853 | "epoch: 165, train acc: 0.9909, val acc: 0.9559\n",
854 | "overfitting warning: 0\n",
855 | "epoch: 166, train acc: 0.98936, val acc: 0.9542\n",
856 | "epoch: 167, train acc: 0.98948, val acc: 0.9559\n",
857 | "overfitting warning: 0\n",
858 | "epoch: 168, train acc: 0.98962, val acc: 0.9566\n",
859 | "epoch: 169, train acc: 0.99068, val acc: 0.9553\n",
860 | "overfitting warning: 0\n",
861 | "epoch: 170, train acc: 0.98922, val acc: 0.953\n",
862 | "epoch: 171, train acc: 0.99092, val acc: 0.9581\n",
863 | "epoch: 172, train acc: 0.9866, val acc: 0.9522\n",
864 | "epoch: 173, train acc: 0.99192, val acc: 0.9564\n",
865 | "overfitting warning: 0\n",
866 | "epoch: 174, train acc: 0.99118, val acc: 0.9571\n",
867 | "overfitting warning: 1\n",
868 | "epoch: 175, train acc: 0.98826, val acc: 0.9536\n",
869 | "epoch: 176, train acc: 0.99054, val acc: 0.956\n",
870 | "overfitting warning: 0\n",
871 | "epoch: 177, train acc: 0.99044, val acc: 0.9565\n",
872 | "overfitting warning: 1\n",
873 | "epoch: 178, train acc: 0.99214, val acc: 0.9577\n",
874 | "overfitting warning: 2\n",
875 | "epoch: 179, train acc: 0.99076, val acc: 0.9561\n",
876 | "overfitting warning: 3\n",
877 | "epoch: 180, train acc: 0.99002, val acc: 0.9553\n",
878 | "overfitting warning: 4\n",
879 | "epoch: 181, train acc: 0.99154, val acc: 0.9589\n",
880 | "epoch: 182, train acc: 0.98936, val acc: 0.9544\n",
881 | "epoch: 183, train acc: 0.9904, val acc: 0.9573\n",
882 | "overfitting warning: 0\n",
883 | "epoch: 184, train acc: 0.98904, val acc: 0.9556\n",
884 | "epoch: 185, train acc: 0.98958, val acc: 0.9555\n",
885 | "overfitting warning: 0\n",
886 | "epoch: 186, train acc: 0.99036, val acc: 0.9553\n",
887 | "overfitting warning: 1\n",
888 | "epoch: 187, train acc: 0.99202, val acc: 0.9578\n",
889 | "overfitting warning: 2\n",
890 | "epoch: 188, train acc: 0.99204, val acc: 0.9579\n",
891 | "overfitting warning: 3\n",
892 | "epoch: 189, train acc: 0.99112, val acc: 0.9573\n",
893 | "overfitting warning: 4\n",
894 | "epoch: 190, train acc: 0.99126, val acc: 0.9562\n",
895 | "early stopped on 190\n"
896 | ]
897 | }
898 | ],
899 | "source": [
900 | "# Start training\n",
901 | "with tf.Session() as sess:\n",
902 | " # Run the initializer\n",
903 | " sess.run(init)\n",
904 | " prev_train_acc = 0.0\n",
905 | " max_val_acc = 0.0\n",
906 | " \n",
907 | " for epoch in range(epoch_cnt):\n",
908 | " avg_loss = 0.\n",
909 | " start = 0; end = batch_size\n",
910 | " \n",
911 | " for i in range(iteration):\n",
912 | " _, loss = sess.run([train_op, loss_op], \n",
913 | " feed_dict={x: x_train[start: end], y: y_train[start: end], \n",
914 | " keep_prob: 0.9})\n",
915 | " start += batch_size; end += batch_size\n",
916 | " # Compute train average loss\n",
917 | " avg_loss += loss / iteration\n",
918 | " \n",
919 | " # Validate model\n",
920 | " preds = tf.nn.softmax(logits) # Apply softmax to logits\n",
921 | " correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))\n",
922 | " # Calculate accuracy\n",
923 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
924 | " # train accuracy\n",
925 | " cur_train_acc = accuracy.eval({x: x_train, y: y_train,keep_prob: 1.0})\n",
926 | " # validation accuarcy\n",
927 | " cur_val_acc = accuracy.eval({x: x_val, y: y_val, keep_prob: 1.0})\n",
928 | " # validation loss\n",
929 | " cur_val_loss = loss_op.eval({x: x_val, y: y_val,keep_prob: 1.0})\n",
930 | " \n",
931 | " print(\"epoch: \"+str(epoch)+\n",
932 | " \", train acc: \" + str(cur_train_acc) +\n",
933 | " \", val acc: \" + str(cur_val_acc) )\n",
934 | " #', train loss: '+str(avg_loss)+\n",
935 | " #', val loss: '+str(cur_val_loss))\n",
936 | " \n",
937 | " if cur_val_acc < max_val_acc:\n",
938 | " if cur_train_acc > prev_train_acc or cur_train_acc > 0.99:\n",
939 | " if earlystop_cnt == earlystop_threshold:\n",
940 | " print(\"early stopped on \"+str(epoch))\n",
941 | " break\n",
942 | " else:\n",
943 | " print(\"overfitting warning: \"+str(earlystop_cnt))\n",
944 | " earlystop_cnt += 1\n",
945 | " else:\n",
946 | " earlystop_cnt = 0\n",
947 | " else:\n",
948 | " earlystop_cnt = 0\n",
949 | " max_val_acc = cur_val_acc\n",
950 | " # Save the variables to file.\n",
951 | " save_path = saver.save(sess, \"model/model.ckpt\")\n",
952 | " prev_train_acc = cur_train_acc"
953 | ]
954 | },
955 | {
956 | "cell_type": "markdown",
957 | "metadata": {},
958 | "source": [
959 | "# Testing with the best epoch"
960 | ]
961 | },
962 | {
963 | "cell_type": "code",
964 | "execution_count": 26,
965 | "metadata": {},
966 | "outputs": [
967 | {
968 | "name": "stdout",
969 | "output_type": "stream",
970 | "text": [
971 | "INFO:tensorflow:Restoring parameters from model/model.ckpt\n",
972 | "[Test Accuracy] : 0.9554\n"
973 | ]
974 | }
975 | ],
976 | "source": [
977 | "# Start testing\n",
978 | "with tf.Session() as sess:\n",
979 | " # Restore variables from disk.\n",
980 | " saver.restore(sess, \"model/model.ckpt\")\n",
981 | " correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1))\n",
982 | " # Calculate accuracy\n",
983 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
984 | " print(\"[Test Accuracy] :\", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))"
985 | ]
986 | }
987 | ],
988 | "metadata": {
989 | "kernelspec": {
990 | "display_name": "Python 3",
991 | "language": "python",
992 | "name": "python3"
993 | },
994 | "language_info": {
995 | "codemirror_mode": {
996 | "name": "ipython",
997 | "version": 3
998 | },
999 | "file_extension": ".py",
1000 | "mimetype": "text/x-python",
1001 | "name": "python",
1002 | "nbconvert_exporter": "python",
1003 | "pygments_lexer": "ipython3",
1004 | "version": "3.6.4"
1005 | }
1006 | },
1007 | "nbformat": 4,
1008 | "nbformat_minor": 2
1009 | }
1010 |
--------------------------------------------------------------------------------
/src/MLP_XOR_Soution_Tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MLP XOR Solution"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "We will implement tensorflow code for XOR operation using Multi Layer Perceptron (a.k.a MLP)"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## single perceptron only works on linearly separable classification\n",
22 | "One perceptron is one decision boundary, so it only solve linearly separable problem."
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | ""
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "MLP (multi layer perceptron) with two neurons in hidden layer can solve XOR. \n",
37 | "Two neurons in hidden layer will draw two boundary lines (z1, z2), \n",
38 | "\n",
39 | "we can make z1, z2 truth table like below,\n",
40 | "z1, z2, value\n",
41 | "0, 0, 0\n",
42 | "0, 1, 1\n",
43 | "1, 0, 1\n",
44 | "\n",
45 | "As you can see from below upper 2d chart, now it is linearly separable on z1, z2 axis, \n",
46 | "one perceptron in the next layer can classify output from hidden layer."
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | ""
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "Because step function is hard to optimize using back propagation due to Non-differentiable, \n",
61 | "We will use sigmoid as its activation instead of step function."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | " "
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "# Practice with Tensorflow"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 1,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "import tensorflow as tf"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "# Define Tensorflow Graph\n",
92 | "firstly, we will define train data shape. \n",
93 | "XOR train data has input X and output Y. \n",
94 | "\n",
95 | "X is [4,2] shape like below, \n",
96 | "[0, 0], [0, 1], [1, 0], [1, 1] \n",
97 | "\n",
98 | "Y is [4,1] shape like below, \n",
99 | "[[0], [1], [1], [0]] "
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 2,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "X = tf.placeholder(tf.float32, shape=[4,2])\n",
109 | "Y = tf.placeholder(tf.float32, shape=[4,1])"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "# First Layer"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 3,
122 | "metadata": {},
123 | "outputs": [],
124 | "source": [
125 | "# we define first layer has two neurons taking two input values. \n",
126 | "W1 = tf.Variable(tf.random_uniform([2,2]))\n",
127 | "# each neuron has one bias.\n",
128 | "B1 = tf.Variable(tf.zeros([2]))\n",
129 | "# First Layer's output is Z which is the sigmoid(W1 * X + B1)\n",
130 | "Z = tf.sigmoid(tf.matmul(X, W1) + B1)"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "# Second Layer"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 4,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "# we define second layer has one neurons taking two input values. \n",
147 | "W2 = tf.Variable(tf.random_uniform([2,1]))\n",
148 | "# one neuron has one bias.\n",
149 | "B2 = tf.Variable(tf.zeros([1]))\n",
150 | "# Second Layer's output is Y_hat which is the sigmoid(W2 * Z + B2)\n",
151 | "Y_hat = tf.sigmoid(tf.matmul(Z, W2) + B2)"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | "# Loss Function"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 5,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "# cross entropy\n",
168 | "loss = tf.reduce_mean(-1*((Y*tf.log(Y_hat))+((1-Y)*tf.log(1.0-Y_hat))))"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "# Optimizer"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 6,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "# Gradient Descent\n",
185 | "train_step = tf.train.GradientDescentOptimizer(0.05).minimize(loss)"
186 | ]
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {},
191 | "source": [
192 | "# Train"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 7,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": [
201 | "# train data\n",
202 | "train_X = [[0,0],[0,1],[1,0],[1,1]]\n",
203 | "train_Y = [[0],[1],[1],[0]]"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 10,
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "name": "stdout",
213 | "output_type": "stream",
214 | "text": [
215 | "train data: [[0, 0], [0, 1], [1, 0], [1, 1]]\n",
216 | "Epoch : 0\n",
217 | "Output : [[0.5759099]\n",
218 | " [0.5988261]\n",
219 | " [0.5978792]\n",
220 | " [0.617011 ]]\n",
221 | "Epoch : 5000\n",
222 | "Output : [[0.24778686]\n",
223 | " [0.63629276]\n",
224 | " [0.55345565]\n",
225 | " [0.59068745]]\n",
226 | "Epoch : 10000\n",
227 | "Output : [[0.075223 ]\n",
228 | " [0.9185553 ]\n",
229 | " [0.91934305]\n",
230 | " [0.10548425]]\n",
231 | "Epoch : 15000\n",
232 | "Output : [[0.0316886 ]\n",
233 | " [0.97331434]\n",
234 | " [0.9735103 ]\n",
235 | " [0.03062824]]\n",
236 | "Final Output : [[0.01958315]\n",
237 | " [0.9845865 ]\n",
238 | " [0.98467547]\n",
239 | " [0.01702813]]\n"
240 | ]
241 | }
242 | ],
243 | "source": [
244 | "# initialize\n",
245 | "init = tf.global_variables_initializer()\n",
246 | "# Start training\n",
247 | "with tf.Session() as sess:\n",
248 | " # Run the initializer\n",
249 | " sess.run(init)\n",
250 | " print(\"train data: \"+str(train_X))\n",
251 | " for i in range(20000):\n",
252 | " sess.run(train_step, feed_dict={X: train_X, Y: train_Y})\n",
253 | " if i % 5000 == 0:\n",
254 | " print('Epoch : ', i)\n",
255 | " print('Output : ', sess.run(Y_hat, feed_dict={X: train_X, Y: train_Y}))\n",
256 | " \n",
257 | " print('Final Output : ', sess.run(Y_hat, feed_dict={X: train_X, Y: train_Y}))"
258 | ]
259 | }
260 | ],
261 | "metadata": {
262 | "kernelspec": {
263 | "display_name": "Python 3",
264 | "language": "python",
265 | "name": "python3"
266 | },
267 | "language_info": {
268 | "codemirror_mode": {
269 | "name": "ipython",
270 | "version": 3
271 | },
272 | "file_extension": ".py",
273 | "mimetype": "text/x-python",
274 | "name": "python",
275 | "nbconvert_exporter": "python",
276 | "pygments_lexer": "ipython3",
277 | "version": "3.6.4"
278 | }
279 | },
280 | "nbformat": 4,
281 | "nbformat_minor": 2
282 | }
283 |
--------------------------------------------------------------------------------
/src/Vanilla_RNN_Tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "We will take a look how tensorflow basic RNN works!"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "from IPython.display import Image"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "From below example, pos tag can be different for same word according to sequence of words. "
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "I work at google => (pronoun) (verb) (preposition) (noun) \n",
32 | "I google at work => (pronoun) (verb) (preposition) (noun)"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "RNN is the neural network which takes previous state and current input to output current state. Therefore RNN is the greate candidate for our example. \n",
40 | "\n",
41 | "Below diagram shows how RNN output pos tagging for \"I work at google\"."
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/html": [
52 | "
"
53 | ],
54 | "text/plain": [
55 | ""
56 | ]
57 | },
58 | "execution_count": 2,
59 | "metadata": {},
60 | "output_type": "execute_result"
61 | }
62 | ],
63 | "source": [
64 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/iworkatgoogle.png\", width=500, height=250)"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "Below diagram shows how RNN output pos tagging for \"I google at work\"."
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 3,
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "data": {
81 | "text/html": [
82 | "
"
83 | ],
84 | "text/plain": [
85 | ""
86 | ]
87 | },
88 | "execution_count": 3,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/igoogleatwork.png\", width=500, height=250)"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "Vanilla RNN architecture is basically like below diagram and we will take a look tensorflow BasicRNNCell for clear understanding of below diagram."
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 4,
107 | "metadata": {},
108 | "outputs": [
109 | {
110 | "data": {
111 | "text/html": [
112 | "
"
113 | ],
114 | "text/plain": [
115 | ""
116 | ]
117 | },
118 | "execution_count": 4,
119 | "metadata": {},
120 | "output_type": "execute_result"
121 | }
122 | ],
123 | "source": [
124 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/rnn_simple_diagram.png\", width=500, height=250)"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "## What is difference between output and state?"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "from our diagram, y is the output and arrows going to next RNN cell is the state or a.k.a hidden state. if you print state, you will get the last hidden state value which is the most right arrow from diagram. \n",
139 | "\n",
140 | "if you have just one cell in your RNN, the output and state have same value. Why? because you can see there are two lines outgoing from tanh from each cell. these two lines have same value. "
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 5,
146 | "metadata": {},
147 | "outputs": [
148 | {
149 | "data": {
150 | "text/html": [
151 | "
"
152 | ],
153 | "text/plain": [
154 | ""
155 | ]
156 | },
157 | "execution_count": 5,
158 | "metadata": {},
159 | "output_type": "execute_result"
160 | }
161 | ],
162 | "source": [
163 | "Image(url= \"https://raw.githubusercontent.com/minsuk-heo/deeplearning/master/img/rnn_single.png\", width=500, height=250)"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "above diagram also shows how many weights and bias are exist when input shape is [1,2], and the rnn cell shape is [1,3]. \n",
171 | "in order to have [1,3] from input and **W**xh's matrix multiplication, since input is [1,2], **W**xh must have [2,3] \n",
172 | "in order to have [1,3] from previous state and **W**hh's matrix multiplication, since previous state is [1,3], **W**hh must have [3,3]"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 6,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "import tensorflow as tf\n"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 7,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": [
190 | "inputs = np.array([\n",
191 | " [ [1,2] ]\n",
192 | "])"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 8,
198 | "metadata": {},
199 | "outputs": [
200 | {
201 | "name": "stdout",
202 | "output_type": "stream",
203 | "text": [
204 | "Tensor(\"rnn/transpose_1:0\", shape=(1, 1, 3), dtype=float32)\n",
205 | "Tensor(\"rnn/while/Exit_3:0\", shape=(1, 3), dtype=float32)\n",
206 | "weights\n",
207 | "\n",
208 | "\n",
209 | "output values\n",
210 | "[[[-0.9314169 0.75578666 -0.6819246 ]]]\n",
211 | "\n",
212 | "state value\n",
213 | "[[-0.9314169 0.75578666 -0.6819246 ]]\n",
214 | "weights\n",
215 | "rnn/basic_rnn_cell/kernel:0 [[-0.62831575 0.38538355 0.79733914]\n",
216 | " [-0.5203329 0.30046564 -0.8150209 ]\n",
217 | " [ 0.39399797 0.16670114 0.4062907 ]\n",
218 | " [-0.6391754 0.8460203 0.5266966 ]\n",
219 | " [ 0.41124135 0.66347724 -0.0210759 ]]\n",
220 | "rnn/basic_rnn_cell/bias:0 [0. 0. 0.]\n"
221 | ]
222 | }
223 | ],
224 | "source": [
225 | "tf.reset_default_graph()\n",
226 | "tf.set_random_seed(777)\n",
227 | "tf_inputs = tf.constant(inputs, dtype=tf.float32)\n",
228 | "rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=3)\n",
229 | "outputs, state = tf.nn.dynamic_rnn(cell=rnn_cell, dtype=tf.float32, inputs=tf_inputs)\n",
230 | "variables_names =[v.name for v in tf.trainable_variables()]\n",
231 | "\n",
232 | "print(outputs)\n",
233 | "print(state)\n",
234 | "print(\"weights\")\n",
235 | "for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n",
236 | " print(v)\n",
237 | "\n",
238 | "with tf.Session() as sess:\n",
239 | " sess.run(tf.global_variables_initializer())\n",
240 | " output_run, state_run = sess.run([outputs, state])\n",
241 | " print(\"output values\")\n",
242 | " print(output_run)\n",
243 | " print(\"\\nstate value\")\n",
244 | " print(state_run)\n",
245 | " print(\"weights\")\n",
246 | " values = sess.run(variables_names)\n",
247 | " for k,v in zip(variables_names, values):\n",
248 | " print(k, v)"
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {},
254 | "source": [
255 | "# Practice rnn cell with sentence\n",
256 | "here we practice with our example \"I work at google\" and \"I google at work\". each word represented with one hot encoding."
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 9,
262 | "metadata": {},
263 | "outputs": [],
264 | "source": [
265 | "# I [1,0,0,0]\n",
266 | "# work [0,1,0,0]\n",
267 | "# at [0,0,1,0]\n",
268 | "# google [0,0,0,1]\n",
269 | "#\n",
270 | "# I work at google = [ [1,0,0,0], [0,1,0,0], [0,0,1,0], [0,0,0,1] ]\n",
271 | "# I google at work = [ [1,0,0,0], [0,0,0,1], [0,0,1,0], [0,1,0,0] ]\n",
272 | "\n",
273 | "inputs = np.array([\n",
274 | " [ [1,0,0,0], [0,1,0,0], [0,0,1,0], [0,0,0,1] ],\n",
275 | " [ [1,0,0,0], [0,0,0,1], [0,0,1,0], [0,1,0,0] ]\n",
276 | "])"
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {},
282 | "source": [
283 | "# RNN\n",
284 | "By running below code block, you can see every word's output are different except for first word. This is because current output is generated not only from input but also previous state. This is reason why RNN can differentiate same word into different pos tag using the word sequence in a sentence."
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": 10,
290 | "metadata": {},
291 | "outputs": [
292 | {
293 | "name": "stdout",
294 | "output_type": "stream",
295 | "text": [
296 | "Tensor(\"rnn/transpose_1:0\", shape=(2, 4, 3), dtype=float32)\n",
297 | "Tensor(\"rnn/while/Exit_3:0\", shape=(2, 3), dtype=float32)\n",
298 | "weights\n",
299 | "\n",
300 | "\n",
301 | "output values\n",
302 | "[[[-0.50944704 0.33166462 0.6126557 ]\n",
303 | " [-0.20793891 0.24406303 -0.75278705]\n",
304 | " [-0.06346128 -0.52844936 0.68356085]\n",
305 | " [-0.36491966 0.8857268 -0.02324398]]\n",
306 | "\n",
307 | " [[-0.50944704 0.33166462 0.6126557 ]\n",
308 | " [-0.30707452 0.62735885 0.21719742]\n",
309 | " [ 0.5043804 -0.14038289 0.3744523 ]\n",
310 | " [-0.11641283 0.70696247 -0.7512605 ]]]\n",
311 | "\n",
312 | "state value\n",
313 | "[[-0.36491966 0.8857268 -0.02324398]\n",
314 | " [-0.11641283 0.70696247 -0.7512605 ]]\n",
315 | "weights\n",
316 | "rnn/basic_rnn_cell/kernel:0 [[-0.56198275 0.34469748 0.7131618 ]\n",
317 | " [-0.4653999 0.2687447 -0.7289769 ]\n",
318 | " [ 0.35240245 0.14910203 0.36339748]\n",
319 | " [-0.57169586 0.7567036 0.47109187]\n",
320 | " [ 0.3678255 0.5934322 -0.01885086]\n",
321 | " [ 0.31208777 -0.40880746 0.22867584]\n",
322 | " [ 0.5521256 0.682691 -0.5481483 ]]\n",
323 | "rnn/basic_rnn_cell/bias:0 [0. 0. 0.]\n"
324 | ]
325 | }
326 | ],
327 | "source": [
328 | "tf.reset_default_graph()\n",
329 | "tf.set_random_seed(777)\n",
330 | "tf_inputs = tf.constant(inputs, dtype=tf.float32)\n",
331 | "rnn_cell = tf.contrib.rnn.BasicRNNCell(num_units=3)\n",
332 | "outputs, state = tf.nn.dynamic_rnn(cell=rnn_cell, dtype=tf.float32, inputs=tf_inputs)\n",
333 | "variables_names =[v.name for v in tf.trainable_variables()]\n",
334 | "\n",
335 | "print(outputs)\n",
336 | "print(state)\n",
337 | "print(\"weights\")\n",
338 | "for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n",
339 | " print(v)\n",
340 | " \n",
341 | "with tf.Session() as sess:\n",
342 | " sess.run(tf.global_variables_initializer())\n",
343 | " output_run, state_run = sess.run([outputs, state])\n",
344 | " print(\"output values\")\n",
345 | " print(output_run)\n",
346 | " print(\"\\nstate value\")\n",
347 | " print(state_run)\n",
348 | " print(\"weights\")\n",
349 | " values = sess.run(variables_names)\n",
350 | " for k,v in zip(variables_names, values):\n",
351 | " print(k, v)"
352 | ]
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "metadata": {},
357 | "source": [
358 | "When input was \"I work at google\", \"work\" output was [-0.20793891 0.24406303 -0.75278705] \n",
359 | "When input was \"I google at work\" \"work\" outpu was [-0.11641283 0.70696247 -0.7512605 ] \n",
360 | "Also you can see state is exactly same with last output value."
361 | ]
362 | }
363 | ],
364 | "metadata": {
365 | "kernelspec": {
366 | "display_name": "Python 3",
367 | "language": "python",
368 | "name": "python3"
369 | },
370 | "language_info": {
371 | "codemirror_mode": {
372 | "name": "ipython",
373 | "version": 3
374 | },
375 | "file_extension": ".py",
376 | "mimetype": "text/x-python",
377 | "name": "python",
378 | "nbconvert_exporter": "python",
379 | "pygments_lexer": "ipython3",
380 | "version": "3.6.4"
381 | }
382 | },
383 | "nbformat": 4,
384 | "nbformat_minor": 2
385 | }
386 |
--------------------------------------------------------------------------------
/src/single_neuron_perceptron.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Single Neuron Perceptron\n",
8 | "Neural Network (a.k.a deep learning) is a set of layers, and the layers are set of neurons. \n",
9 | "In order to understand how deep learning works, \n",
10 | "understanding of single neuron (a.k.a node or perceptron) is very important."
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "Multi Layer Perceptron (a.k.a MLP) which is basic model of deep learning works like below."
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | " "
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "Neuron (a.k.a node, perceptron) works like below picture."
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | ""
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "There are various activation functions used by many type of deep learnings, \n",
46 | "Traditionally, Single Neuron Perceptron used step function as activation function. \n",
47 | "Single Perceptron could solve AND, OR operation, but it couldn't solve XOR operation.\n",
48 | "XOR can be solved by Multi Layer Perceptron (MLP), even the MLP has perceptron in its name, \n",
49 | "any activation function can be used for MLP."
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | ""
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "# Practice with Tensorflow"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 1,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "import tensorflow as tf"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "# Constants\n",
80 | "We will practice perceptron with AND, OR, XOR operation. \n",
81 | "For Truth table and bias of the perceptron, we created constant here."
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 2,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "T = 1.0\n",
91 | "F = 0.0\n",
92 | "bias = 1.0"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "# Collect Data\n",
100 | "Here is the truth table we will solve using perceptron.\n",
101 | "\n",
102 | "We will choose data for any operation from these functions."
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 3,
108 | "metadata": {},
109 | "outputs": [],
110 | "source": [
111 | "def get_AND_data():\n",
112 | " X = [\n",
113 | " [F, F, bias],\n",
114 | " [F, T, bias],\n",
115 | " [T, F, bias],\n",
116 | " [T, T, bias]\n",
117 | " ]\n",
118 | " \n",
119 | " Y = [\n",
120 | " [F],\n",
121 | " [F],\n",
122 | " [F],\n",
123 | " [T]\n",
124 | " ]\n",
125 | " \n",
126 | " return X, Y\n",
127 | "\n",
128 | "def get_OR_data():\n",
129 | " X = [\n",
130 | " [F, F, bias],\n",
131 | " [F, T, bias],\n",
132 | " [T, F, bias],\n",
133 | " [T, T, bias]\n",
134 | " ]\n",
135 | " \n",
136 | " Y = [\n",
137 | " [F],\n",
138 | " [T],\n",
139 | " [T],\n",
140 | " [T]\n",
141 | " ]\n",
142 | " \n",
143 | " return X, Y\n",
144 | "\n",
145 | "def get_XOR_data():\n",
146 | " X = [\n",
147 | " [F, F, bias],\n",
148 | " [F, T, bias],\n",
149 | " [T, F, bias],\n",
150 | " [T, T, bias]\n",
151 | " ]\n",
152 | " \n",
153 | " Y = [\n",
154 | " [F],\n",
155 | " [T],\n",
156 | " [T],\n",
157 | " [F]\n",
158 | " ]\n",
159 | " \n",
160 | " return X, Y"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "Choose data for your practice."
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 4,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "X, Y = get_AND_data()\n",
177 | "#X, Y = get_OR_data()\n",
178 | "#X, Y = get_XOR_data()"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "# Initialize weights\n",
186 | "We will initialize weight with random number. \n",
187 | "since the truth table has two inputs with one bias, \n",
188 | "and we just have single neuron, the shape of weight is [3,1]"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 5,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "W = tf.Variable(tf.random_normal([3, 1]))"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "# Activation Function\n",
205 | "Perceptron uses step function as its activation function. \n",
206 | "step(x) = { 1 if x > 0; 0 otherwise }"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 6,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "def step(x):\n",
216 | " return tf.to_float(tf.greater(x, 0))"
217 | ]
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "metadata": {},
222 | "source": [
223 | "# Loss Function\n",
224 | "We will simply use Mean Square Error as its loss function"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 7,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "f = tf.matmul(X, W)\n",
234 | "output = step(f)\n",
235 | "error = tf.subtract(Y, output)\n",
236 | "mse = tf.reduce_mean(tf.square(error))"
237 | ]
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {},
242 | "source": [
243 | "# Optimize weights\n",
244 | "Here is how we update weights using unified learning rule which derived from below concept. \n",
245 | "\n",
246 | "if target == 1 and activation == 0: \n",
247 | " w_new = w_old + input \n",
248 | " \n",
249 | "if target == 0 and activation == 1: \n",
250 | " w_new = w_old - input "
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 8,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "delta = tf.matmul(X, error, transpose_a=True)\n",
260 | "train = tf.assign(W, tf.add(W, delta))"
261 | ]
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "metadata": {},
266 | "source": [
267 | "# Training and Testing\n",
268 | "Start Trainin and Testing"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 9,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "name": "stdout",
278 | "output_type": "stream",
279 | "text": [
280 | "epoch: 1 mse: 0.75\n",
281 | "epoch: 2 mse: 0.25\n",
282 | "epoch: 3 mse: 0.25\n",
283 | "epoch: 4 mse: 0.25\n",
284 | "epoch: 5 mse: 0.5\n",
285 | "epoch: 6 mse: 0.25\n",
286 | "epoch: 7 mse: 0.0\n",
287 | "\n",
288 | "Testing Result:\n",
289 | "[array([[0.],\n",
290 | " [0.],\n",
291 | " [0.],\n",
292 | " [1.]], dtype=float32)]\n"
293 | ]
294 | }
295 | ],
296 | "source": [
297 | "# Initialize the variables (i.e. assign their default value)\n",
298 | "init = tf.global_variables_initializer()\n",
299 | "\n",
300 | "# Start training\n",
301 | "with tf.Session() as sess:\n",
302 | " # Run the initializer\n",
303 | " sess.run(init)\n",
304 | " err = 1\n",
305 | " epoch, max_epochs = 0, 20\n",
306 | " while err > 0.0 and epoch < max_epochs:\n",
307 | " epoch += 1\n",
308 | " err = sess.run(mse)\n",
309 | " sess.run(train)\n",
310 | " print('epoch:', epoch, 'mse:', err)\n",
311 | " \n",
312 | " print(\"\\nTesting Result:\")\n",
313 | " print(sess.run([output]))"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {},
319 | "source": [
320 | "## single perceptron only works on linearly separable classification\n",
321 | "One perceptron is one decision boundary, so it only solve linearly separable problem."
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | ""
329 | ]
330 | },
331 | {
332 | "cell_type": "markdown",
333 | "metadata": {},
334 | "source": [
335 | "MLP (multi layer perceptron) with two neurons in hidden layer can solve XOR. \n",
336 | "Two neurons in hidden layer will draw two boundary lines (z1, z2), \n",
337 | "\n",
338 | "we can make z1, z2 truth table like below,\n",
339 | "z1, z2, value\n",
340 | "0, 0, 0\n",
341 | "0, 1, 1\n",
342 | "1, 0, 1\n",
343 | "\n",
344 | "As you can see from below upper 2d chart, now it is linearly separable on z1, z2 axis, \n",
345 | "one perceptron in the next layer can classify output from hidden layer."
346 | ]
347 | },
348 | {
349 | "cell_type": "markdown",
350 | "metadata": {},
351 | "source": [
352 | ""
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": null,
358 | "metadata": {},
359 | "outputs": [],
360 | "source": []
361 | }
362 | ],
363 | "metadata": {
364 | "kernelspec": {
365 | "display_name": "Python 3",
366 | "language": "python",
367 | "name": "python3"
368 | },
369 | "language_info": {
370 | "codemirror_mode": {
371 | "name": "ipython",
372 | "version": 3
373 | },
374 | "file_extension": ".py",
375 | "mimetype": "text/x-python",
376 | "name": "python",
377 | "nbconvert_exporter": "python",
378 | "pygments_lexer": "ipython3",
379 | "version": "3.6.4"
380 | }
381 | },
382 | "nbformat": 4,
383 | "nbformat_minor": 2
384 | }
385 |
--------------------------------------------------------------------------------
/src/word2vec_tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Word2Vec\n",
8 | "here I implement word2vec with very simple example using tensorflow \n",
9 | "word2vec is vector representation for words with similarity"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# Collect Data\n",
17 | "we will use only 10 sentences to create word vectors"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 64,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "corpus = ['king is a strong man', \n",
27 | " 'queen is a wise woman', \n",
28 | " 'boy is a young man',\n",
29 | " 'girl is a young woman',\n",
30 | " 'prince is a young king',\n",
31 | " 'princess is a young queen',\n",
32 | " 'man is strong', \n",
33 | " 'woman is pretty',\n",
34 | " 'prince is a boy will be king',\n",
35 | " 'princess is a girl will be queen']"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "# Remove stop words\n",
43 | "In order for efficiency of creating word vector, we will remove commonly used words"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 65,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "def remove_stop_words(corpus):\n",
53 | " stop_words = ['is', 'a', 'will', 'be']\n",
54 | " results = []\n",
55 | " for text in corpus:\n",
56 | " tmp = text.split(' ')\n",
57 | " for stop_word in stop_words:\n",
58 | " if stop_word in tmp:\n",
59 | " tmp.remove(stop_word)\n",
60 | " results.append(\" \".join(tmp))\n",
61 | " \n",
62 | " return results"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 66,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "corpus = remove_stop_words(corpus)"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 67,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "words = []\n",
81 | "for text in corpus:\n",
82 | " for word in text.split(' '):\n",
83 | " words.append(word)\n",
84 | "\n",
85 | "words = set(words)"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "here we have word set by which we will have word vector"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 68,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "data": {
102 | "text/plain": [
103 | "{'boy',\n",
104 | " 'girl',\n",
105 | " 'king',\n",
106 | " 'man',\n",
107 | " 'pretty',\n",
108 | " 'prince',\n",
109 | " 'princess',\n",
110 | " 'queen',\n",
111 | " 'strong',\n",
112 | " 'wise',\n",
113 | " 'woman',\n",
114 | " 'young'}"
115 | ]
116 | },
117 | "execution_count": 68,
118 | "metadata": {},
119 | "output_type": "execute_result"
120 | }
121 | ],
122 | "source": [
123 | "words"
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "metadata": {},
129 | "source": [
130 | "# data generation\n",
131 | "we will generate label for each word using skip gram. "
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 69,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "word2int = {}\n",
141 | "\n",
142 | "for i,word in enumerate(words):\n",
143 | " word2int[word] = i\n",
144 | "\n",
145 | "sentences = []\n",
146 | "for sentence in corpus:\n",
147 | " sentences.append(sentence.split())\n",
148 | " \n",
149 | "WINDOW_SIZE = 2\n",
150 | "\n",
151 | "data = []\n",
152 | "for sentence in sentences:\n",
153 | " for idx, word in enumerate(sentence):\n",
154 | " for neighbor in sentence[max(idx - WINDOW_SIZE, 0) : min(idx + WINDOW_SIZE, len(sentence)) + 1] : \n",
155 | " if neighbor != word:\n",
156 | " data.append([word, neighbor])"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 70,
162 | "metadata": {},
163 | "outputs": [
164 | {
165 | "name": "stdout",
166 | "output_type": "stream",
167 | "text": [
168 | "king strong man\n",
169 | "queen wise woman\n",
170 | "boy young man\n",
171 | "girl young woman\n",
172 | "prince young king\n",
173 | "princess young queen\n",
174 | "man strong\n",
175 | "woman pretty\n",
176 | "prince boy king\n",
177 | "princess girl queen\n"
178 | ]
179 | }
180 | ],
181 | "source": [
182 | "import pandas as pd\n",
183 | "for text in corpus:\n",
184 | " print(text)\n",
185 | "\n",
186 | "df = pd.DataFrame(data, columns = ['input', 'label'])"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 71,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/html": [
197 | "\n",
198 | "\n",
211 | "
\n",
212 | " \n",
213 | " \n",
214 | " | \n",
215 | " input | \n",
216 | " label | \n",
217 | "
\n",
218 | " \n",
219 | " \n",
220 | " \n",
221 | " 0 | \n",
222 | " king | \n",
223 | " strong | \n",
224 | "
\n",
225 | " \n",
226 | " 1 | \n",
227 | " king | \n",
228 | " man | \n",
229 | "
\n",
230 | " \n",
231 | " 2 | \n",
232 | " strong | \n",
233 | " king | \n",
234 | "
\n",
235 | " \n",
236 | " 3 | \n",
237 | " strong | \n",
238 | " man | \n",
239 | "
\n",
240 | " \n",
241 | " 4 | \n",
242 | " man | \n",
243 | " king | \n",
244 | "
\n",
245 | " \n",
246 | " 5 | \n",
247 | " man | \n",
248 | " strong | \n",
249 | "
\n",
250 | " \n",
251 | " 6 | \n",
252 | " queen | \n",
253 | " wise | \n",
254 | "
\n",
255 | " \n",
256 | " 7 | \n",
257 | " queen | \n",
258 | " woman | \n",
259 | "
\n",
260 | " \n",
261 | " 8 | \n",
262 | " wise | \n",
263 | " queen | \n",
264 | "
\n",
265 | " \n",
266 | " 9 | \n",
267 | " wise | \n",
268 | " woman | \n",
269 | "
\n",
270 | " \n",
271 | "
\n",
272 | "
"
273 | ],
274 | "text/plain": [
275 | " input label\n",
276 | "0 king strong\n",
277 | "1 king man\n",
278 | "2 strong king\n",
279 | "3 strong man\n",
280 | "4 man king\n",
281 | "5 man strong\n",
282 | "6 queen wise\n",
283 | "7 queen woman\n",
284 | "8 wise queen\n",
285 | "9 wise woman"
286 | ]
287 | },
288 | "execution_count": 71,
289 | "metadata": {},
290 | "output_type": "execute_result"
291 | }
292 | ],
293 | "source": [
294 | "df.head(10)"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 72,
300 | "metadata": {},
301 | "outputs": [
302 | {
303 | "data": {
304 | "text/plain": [
305 | "(52, 2)"
306 | ]
307 | },
308 | "execution_count": 72,
309 | "metadata": {},
310 | "output_type": "execute_result"
311 | }
312 | ],
313 | "source": [
314 | "df.shape"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 73,
320 | "metadata": {},
321 | "outputs": [
322 | {
323 | "data": {
324 | "text/plain": [
325 | "{'boy': 5,\n",
326 | " 'girl': 3,\n",
327 | " 'king': 8,\n",
328 | " 'man': 0,\n",
329 | " 'pretty': 9,\n",
330 | " 'prince': 4,\n",
331 | " 'princess': 2,\n",
332 | " 'queen': 10,\n",
333 | " 'strong': 6,\n",
334 | " 'wise': 11,\n",
335 | " 'woman': 7,\n",
336 | " 'young': 1}"
337 | ]
338 | },
339 | "execution_count": 73,
340 | "metadata": {},
341 | "output_type": "execute_result"
342 | }
343 | ],
344 | "source": [
345 | "word2int"
346 | ]
347 | },
348 | {
349 | "cell_type": "markdown",
350 | "metadata": {},
351 | "source": [
352 | "# Define Tensorflow Graph"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 75,
358 | "metadata": {},
359 | "outputs": [],
360 | "source": [
361 | "import tensorflow as tf\n",
362 | "import numpy as np\n",
363 | "\n",
364 | "ONE_HOT_DIM = len(words)\n",
365 | "\n",
366 | "# function to convert numbers to one hot vectors\n",
367 | "def to_one_hot_encoding(data_point_index):\n",
368 | " one_hot_encoding = np.zeros(ONE_HOT_DIM)\n",
369 | " one_hot_encoding[data_point_index] = 1\n",
370 | " return one_hot_encoding\n",
371 | "\n",
372 | "X = [] # input word\n",
373 | "Y = [] # target word\n",
374 | "\n",
375 | "for x, y in zip(df['input'], df['label']):\n",
376 | " X.append(to_one_hot_encoding(word2int[ x ]))\n",
377 | " Y.append(to_one_hot_encoding(word2int[ y ]))\n",
378 | "\n",
379 | "# convert them to numpy arrays\n",
380 | "X_train = np.asarray(X)\n",
381 | "Y_train = np.asarray(Y)\n",
382 | "\n",
383 | "# making placeholders for X_train and Y_train\n",
384 | "x = tf.placeholder(tf.float32, shape=(None, ONE_HOT_DIM))\n",
385 | "y_label = tf.placeholder(tf.float32, shape=(None, ONE_HOT_DIM))\n",
386 | "\n",
387 | "# word embedding will be 2 dimension for 2d visualization\n",
388 | "EMBEDDING_DIM = 2 \n",
389 | "\n",
390 | "# hidden layer: which represents word vector eventually\n",
391 | "W1 = tf.Variable(tf.random_normal([ONE_HOT_DIM, EMBEDDING_DIM]))\n",
392 | "b1 = tf.Variable(tf.random_normal([1])) #bias\n",
393 | "hidden_layer = tf.add(tf.matmul(x,W1), b1)\n",
394 | "\n",
395 | "# output layer\n",
396 | "W2 = tf.Variable(tf.random_normal([EMBEDDING_DIM, ONE_HOT_DIM]))\n",
397 | "b2 = tf.Variable(tf.random_normal([1]))\n",
398 | "prediction = tf.nn.softmax(tf.add( tf.matmul(hidden_layer, W2), b2))\n",
399 | "\n",
400 | "# loss function: cross entropy\n",
401 | "loss = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(prediction), axis=[1]))\n",
402 | "\n",
403 | "# training operation\n",
404 | "train_op = tf.train.GradientDescentOptimizer(0.05).minimize(loss)"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {},
410 | "source": [
411 | "# Train"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": 76,
417 | "metadata": {},
418 | "outputs": [
419 | {
420 | "name": "stdout",
421 | "output_type": "stream",
422 | "text": [
423 | "iteration 0 loss is : 4.2683053\n",
424 | "iteration 3000 loss is : 1.8142775\n",
425 | "iteration 6000 loss is : 1.7689418\n",
426 | "iteration 9000 loss is : 1.7487823\n",
427 | "iteration 12000 loss is : 1.7366564\n",
428 | "iteration 15000 loss is : 1.7282344\n",
429 | "iteration 18000 loss is : 1.7217968\n"
430 | ]
431 | }
432 | ],
433 | "source": [
434 | "sess = tf.Session()\n",
435 | "init = tf.global_variables_initializer()\n",
436 | "sess.run(init) \n",
437 | "\n",
438 | "iteration = 20000\n",
439 | "for i in range(iteration):\n",
440 | " # input is X_train which is one hot encoded word\n",
441 | " # label is Y_train which is one hot encoded neighbor word\n",
442 | " sess.run(train_op, feed_dict={x: X_train, y_label: Y_train})\n",
443 | " if i % 3000 == 0:\n",
444 | " print('iteration '+str(i)+' loss is : ', sess.run(loss, feed_dict={x: X_train, y_label: Y_train}))"
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": 77,
450 | "metadata": {},
451 | "outputs": [
452 | {
453 | "name": "stdout",
454 | "output_type": "stream",
455 | "text": [
456 | "[[ 0.15244952 0.9640051 ]\n",
457 | " [ 0.02790108 -0.06382209]\n",
458 | " [-5.340872 -2.4975982 ]\n",
459 | " [-1.1164831 -0.90147173]\n",
460 | " [-0.3675601 2.7438807 ]\n",
461 | " [-0.7659049 2.3928514 ]\n",
462 | " [-1.4453228 6.2860384 ]\n",
463 | " [-1.7536867 -0.6607232 ]\n",
464 | " [-0.1546773 1.0085508 ]\n",
465 | " [ 0.5907395 -2.8868985 ]\n",
466 | " [-0.61234784 -0.536076 ]\n",
467 | " [-3.745981 -3.1733289 ]]\n"
468 | ]
469 | }
470 | ],
471 | "source": [
472 | "# Now the hidden layer (W1 + b1) is actually the word look up table\n",
473 | "vectors = sess.run(W1 + b1)\n",
474 | "print(vectors)"
475 | ]
476 | },
477 | {
478 | "cell_type": "markdown",
479 | "metadata": {},
480 | "source": [
481 | "# word vector in table"
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": 78,
487 | "metadata": {},
488 | "outputs": [
489 | {
490 | "data": {
491 | "text/html": [
492 | "\n",
493 | "\n",
506 | "
\n",
507 | " \n",
508 | " \n",
509 | " | \n",
510 | " word | \n",
511 | " x1 | \n",
512 | " x2 | \n",
513 | "
\n",
514 | " \n",
515 | " \n",
516 | " \n",
517 | " 0 | \n",
518 | " man | \n",
519 | " 0.152450 | \n",
520 | " 0.964005 | \n",
521 | "
\n",
522 | " \n",
523 | " 1 | \n",
524 | " young | \n",
525 | " 0.027901 | \n",
526 | " -0.063822 | \n",
527 | "
\n",
528 | " \n",
529 | " 2 | \n",
530 | " princess | \n",
531 | " -5.340872 | \n",
532 | " -2.497598 | \n",
533 | "
\n",
534 | " \n",
535 | " 3 | \n",
536 | " girl | \n",
537 | " -1.116483 | \n",
538 | " -0.901472 | \n",
539 | "
\n",
540 | " \n",
541 | " 4 | \n",
542 | " prince | \n",
543 | " -0.367560 | \n",
544 | " 2.743881 | \n",
545 | "
\n",
546 | " \n",
547 | " 5 | \n",
548 | " boy | \n",
549 | " -0.765905 | \n",
550 | " 2.392851 | \n",
551 | "
\n",
552 | " \n",
553 | " 6 | \n",
554 | " strong | \n",
555 | " -1.445323 | \n",
556 | " 6.286038 | \n",
557 | "
\n",
558 | " \n",
559 | " 7 | \n",
560 | " woman | \n",
561 | " -1.753687 | \n",
562 | " -0.660723 | \n",
563 | "
\n",
564 | " \n",
565 | " 8 | \n",
566 | " king | \n",
567 | " -0.154677 | \n",
568 | " 1.008551 | \n",
569 | "
\n",
570 | " \n",
571 | " 9 | \n",
572 | " pretty | \n",
573 | " 0.590739 | \n",
574 | " -2.886899 | \n",
575 | "
\n",
576 | " \n",
577 | " 10 | \n",
578 | " queen | \n",
579 | " -0.612348 | \n",
580 | " -0.536076 | \n",
581 | "
\n",
582 | " \n",
583 | " 11 | \n",
584 | " wise | \n",
585 | " -3.745981 | \n",
586 | " -3.173329 | \n",
587 | "
\n",
588 | " \n",
589 | "
\n",
590 | "
"
591 | ],
592 | "text/plain": [
593 | " word x1 x2\n",
594 | "0 man 0.152450 0.964005\n",
595 | "1 young 0.027901 -0.063822\n",
596 | "2 princess -5.340872 -2.497598\n",
597 | "3 girl -1.116483 -0.901472\n",
598 | "4 prince -0.367560 2.743881\n",
599 | "5 boy -0.765905 2.392851\n",
600 | "6 strong -1.445323 6.286038\n",
601 | "7 woman -1.753687 -0.660723\n",
602 | "8 king -0.154677 1.008551\n",
603 | "9 pretty 0.590739 -2.886899\n",
604 | "10 queen -0.612348 -0.536076\n",
605 | "11 wise -3.745981 -3.173329"
606 | ]
607 | },
608 | "execution_count": 78,
609 | "metadata": {},
610 | "output_type": "execute_result"
611 | }
612 | ],
613 | "source": [
614 | "w2v_df = pd.DataFrame(vectors, columns = ['x1', 'x2'])\n",
615 | "w2v_df['word'] = words\n",
616 | "w2v_df = w2v_df[['word', 'x1', 'x2']]\n",
617 | "w2v_df"
618 | ]
619 | },
620 | {
621 | "cell_type": "markdown",
622 | "metadata": {},
623 | "source": [
624 | "# word vector in 2d chart"
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": 79,
630 | "metadata": {},
631 | "outputs": [
632 | {
633 | "data": {
634 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAJCCAYAAADp1TKRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xu01nWd9//XR1AY8bgGajTJ7ayf5oG9gc22UUE85aFEvfHW1J+2QKaxmJymVlrj2KihNXdFZo13eucYOlpKajpTNg6iltIw6eagEoGH7h2V3TP483CLpAl8f39Ie1miqHzg2hsej7VYy+u6vtfn+/5+F8v95HsddmmaJgAAbJitWj0AAMDmQFQBAFQgqgAAKhBVAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoYGArdjp06NCmra2tFbsGAHhT5s2b92TTNMPWt11LoqqtrS3d3d2t2DUAwJtSSvn5G9nOy38AABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAFsAS677LKsXLmy1WPAZk1UAWwBXi+qVq9evYmngc2TqALYzDz//PM59thjM3LkyIwYMSKf+cxn8sQTT+Swww7LYYcdliTZbrvtcsEFF+TP/uzPMnfu3Nx1110ZPXp02tvbM2XKlLz44otJkra2tlx44YXp7OxMe3t7lixZkiRZvnx5jjzyyHR2duZDH/pQdt999zz55JMtO2boC0QVwGbmjjvuyK677poHH3wwixYtysc+9rHsuuuuueeee3LPPfckeTm8RowYkR//+Mfp6urK5MmTM3PmzDz88MNZtWpVrrjiit71hg4dmvnz52fq1KmZPn16kuQzn/lMDj/88MyfPz8TJ07MsmXLWnKs0JeIKoDNTHt7e2bPnp1PfepTue+++7Ljjju+apsBAwbkv//3/54kWbp0afbYY4/stddeSZJJkybl3nvv7d32xBNPTJKMGTMmPT09SZI5c+bk1FNPTZIcc8wx2XnnnTfmIUG/MLDVAwBQ11577ZV58+bl+9//fs4777wcddRRr9pm8ODBGTBgQJKkaZrXXW/QoEFJXg6xVatWvaHnwJbIlSqAzcwTTzyRbbfdNmeccUbOOeeczJ8/P9tvv32ee+65dW6/9957p6enJ4899liS5LrrrsshhxzyuvsYN25cvv3tbydJZs2alaeffrruQUA/5EoVwGbm4YcfzrnnnputttoqW2+9da644orMnTs3733ve7PLLrv0vq/qdwYPHpwZM2bk5JNPzqpVq7L//vvnwx/+8Ovu48ILL8xpp52WmTNn5pBDDskuu+yS7bfffmMeFvR5pRWXcLu6upru7u5Nvl8A6njxxRczYMCADBw4MHPnzs3UqVOzcOHCVo8FG0UpZV7TNF3r286VKgDetGXLluX9739/1qxZk2222SZXXXVVq0eClhNVALxpe+65ZxYsWNDqMaBP8UZ1AIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACqoElWllJ1KKTeXUpaUUn5aSjmwxroAAP1FrS///EqSO5qmOamUsk2SbSutCwDQL2xwVJVSdkgyPsnkJGma5rdJfruh6wIA9Cc1Xv770yTLk8wopSwopfxjKWXIH25USjmrlNJdSulevnx5hd0CAPQdNaJqYJLOJFc0TTM6yfNJ/uYPN2qa5utN03Q1TdM1bNiwCrsFAOg7akTVL5P8smmaH6+9fXNejiwAgC3GBkdV0zT/J8kvSinvWnvXEUkWb+i6AAD9Sa1P//1Vkm+u/eTfz5KcWWldAIB+oUpUNU2zMElXjbUAAPoj36gOAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKqkVVKWVAKWVBKeV7tdYEAOgval6p+uskP624HgBAv1ElqkopuyU5Nsk/1lgPAKC/qXWl6rIkn0yy5rU2KKWcVUrpLqV0L1++vNJuAQD6hg2OqlLKhCT/1TTNvNfbrmmarzdN09U0TdewYcM2dLcAAH1KjStVY5McX0rpSXJjksNLKddXWBcAoN/Y4Khqmua8pml2a5qmLcmpSe5umuaMDZ4MAKAf8T1VAAAVDKy5WNM0P0jyg5prAgD0B65UAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAG/ABRdckNmzZ7d6DPqwga0eAAD6utWrV2fatGmtHoM+zpUqALZoPT092XvvvTNp0qR0dHTkpJNOysqVK9PW1pZp06Zl3LhxuemmmzJ58uTcfPPNSZK2trZceOGF6ezsTHt7e5YsWZIkWbFiRc4888y0t7eno6Mjt9xyS5Jk1qxZOfDAA9PZ2ZmTTz45K1asaNnxsvGIKgC2eEuXLs1ZZ52Vhx56KDvssEO+9rWvJUkGDx6cOXPm5NRTT33Vc4YOHZr58+dn6tSpmT59epLk4osvzo477piHH344Dz30UA4//PA8+eSTueSSSzJ79uzMnz8/XV1dufTSSzfp8bFpePkPgC3e8OHDM3bs2CTJGWecka9+9atJklNOOeU1n3PiiScmScaMGZPvfOc7SZLZs2fnxhtv7N1m5513zve+970sXry4d/3f/va3OfDAAzfKcdBaogqALV4pZZ23hwwZ8prPGTRoUJJkwIABWbVqVZKkaZpXrdU0TY488sjccMMNNUemD/LyHwBbvGXLlmXu3LlJkhtuuCHjxo17S+scddRRufzyy3tvP/300znggAPyox/9KI899liSZOXKlXnkkUc2fGj6HFEFwBZvn332ybXXXpuOjo489dRTmTp16lta59Of/nSefvrpjBgxIiNHjsw999yTYcOG5Zprrslpp52Wjo6OHHDAAb1vbGfzUpqm2eQ77erqarq7uzf5fgHgD/X09GTChAlZtGhRq0ehjyqlzGuapmt927lSBQBQgagCYIvW1tbmKhVViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgDoU3p6ejJixIhWjwFvmqgCAKhAVAHQ56xatSqTJk1KR0dHTjrppKxcuTJ33XVXRo8enfb29kyZMiUvvvhi7rrrrkycOLH3eXfeeWdOPPHEFk7OlkxUAdDnLF26NGeddVYeeuih7LDDDrn00kszefLkzJw5Mw8//HBWrVqVK664Iocffnh++tOfZvny5UmSGTNm5Mwzz2zx9GypRBUAfc7w4cMzduzYJMkZZ5yRu+66K3vssUf22muvJMmkSZNy7733ppSSD3zgA7n++uvzzDPPZO7cuXnve9/bytHZgg1s9QAA8IdKKW942zPPPDPHHXdcBg8enJNPPjkDB/rRRmu4UgVAn7Ns2bLMnTs3SXLDDTfkPe95T3p6evLYY48lSa677roccsghSZJdd901u+66ay655JJMnjy5VSODqAKg79lnn31y7bXXpqOjI0899VQ+/vGPZ8aMGTn55JPT3t6erbbaKh/+8Id7tz/99NMzfPjw7Lvvvi2cmi2da6QA9CltbW1ZvHjxq+4/4ogjsmDBgnU+Z86cOfmLv/iLjT0avC5RBUC/NmbMmAwZMiRf+tKXWj0KWzhRBUC/Nm/evFaPAEm8pwoAoApRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhgg6OqlDK8lHJPKeWnpZSflFL+usZgAAD9SY1fqLwqySeapplfStk+ybxSyp1N0yyusDYAQL+wwVeqmqb5ddM089f+93NJfprkHRu6LgBAf1L1PVWllLYko5P8uOa6AAB9XbWoKqVsl+SWJB9rmub/ruPxs0op3aWU7uXLl9faLQBAn1AlqkopW+floPpm0zTfWdc2TdN8vWmarqZpuoYNG1ZjtwAAfUaNT/+VJFcn+WnTNJdu+EgAAP1PjStVY5N8IMnhpZSFa/+8r8K6AAD9xgZ/pULTNHOSlAqzAAD0W75RHQCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAKAt6inpycjRoz4vfu6u7vz0Y9+tEUT0UoDWz0AAGxOurq60tXV1eoxaAFXqgCggp/97GcZPXp0vvjFL2bChAlJkosuuihTpkzJoYcemj/90z/NV7/61d7tL7744uy999458sgjc9ppp2X69OmtGp1KXKkCgA20dOnSnHrqqZkxY0aeeeaZ/PCHP+x9bMmSJbnnnnvy3HPP5V3velemTp2aBx98MLfccksWLFiQVatWpbOzM2PGjGnhEVCDK1UAsAGWL1+eE044Iddff31GjRr1qsePPfbYDBo0KEOHDs3b3va2/Od//mfmzJmTE044IX/0R3+U7bffPscdd1wLJqc2UQUAG2DHHXfM8OHD86Mf/Widjw8aNKj3vwcMGJBVq1alaZrXXbOnpyd77713PvjBD2bEiBE5/fTTM3v27IwdOzZ77rln7r///tx///056KCDMnr06Bx00EFZunRpkuSaa67JiSeemGOOOSZ77rlnPvnJT9Y7WF6XqAKADbDNNtvktttuyz/90z/lW9/61ht6zrhx4/Ld7343L7zwQlasWJHbb7/9Vds89thj+eu//us89NBDWbJkSb71rW9lzpw5mT59ej73uc9l7733zr333psFCxZk2rRp+du//dve5y5cuDAzZ87Mww8/nJkzZ+YXv/hFtePltXlPFQBsoCFDhuR73/tejjzyyHz6059e7/b7779/jj/++IwcOTK77757urq6suOOO/7eNnvssUfa29uTJPvtt1+OOOKIlFLS3t6enp6ePPvss5k0aVIeffTRlFLy0ksv9T73iCOO6F1v3333zc9//vMMHz684hGzLqIKAN6itra2LFq0KEmy00475YEHHkiSnHDCCUle/vTfK/1u2yQ555xzctFFF2XlypUZP358PvGJT/zetq982XCrrbbqvb3VVltl1apV+bu/+7scdthhufXWW9PT05NDDz10nc/93UuObHyiCgBa4KyzzsrixYvzwgsvZNKkSens7HxTz3/22Wfzjne8I8nL76Oi9UQVALTAG33/1Wv55Cc/mUmTJuXSSy/N4YcfXmkqNkRZ3ycQNoaurq6mu7t7k+8XAODNKqXMa5pmvV+T79N/AAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgAAKhBVAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgAAKhBVAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgAAKhBVAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgAAKhBVAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgAAKqgSVaWUY0opS0spj5VS/qbGmgAA/ckGR1UpZUCS/5nkvUn2TXJaKWXfDV0XADY3f/d3f5evfOUrvbfPP//8fOUrX8m5556bESNGpL29PTNnzkyS/OAHP8iECRN6tz377LNzzTXXJEna2tpy4YUXprOzM+3t7VmyZEmSZPny5TnyyCPT2dmZD33oQ9l9993z5JNPbroD3MLVuFL17iSPNU3zs6ZpfpvkxiQnVFgXADYrf/7nf55rr702SbJmzZrceOON2W233bJw4cI8+OCDmT17ds4999z8+te/Xu9aQ4cOzfz58zN16tRMnz49SfKZz3wmhx9+eObPn5+JEydm2bJlG/V4+H01ouodSX7xitu/XHsfAPAKbW1t+eM//uMsWLAgs2bNyujRozNnzpycdtppGTBgQN7+9rfnkEMOyQMPPLDetU488cQkyZgxY9LT05MkmTNnTk499dQkyTHHHJOdd955ox0Lr1Yjqso67mtetVEpZ5VSuksp3cuXL6+wWwDofz74wQ/mmmuuyYwZMzJlypQ0zat+ZCZJBg4cmDVr1vTefuGFF37v8UGDBiVJBgwYkFWrViXJa67FplEjqn6ZZPgrbu+W5Ik/3Khpmq83TdPVNE3XsGHDKuwWAPqfiRMn5o477sgDDzyQo48+OuPHj8/MmTOzevXqLF++PPfee2/e/e53Z/fdd8/ixYvz4osv5tlnn81dd9213rXHjRuXb3/720mSWbNm5emnn97Yh8MrDKywxgNJ9iyl7JHkV0lOTfL/VlgXADY722yzTQ477LDstNNOGTBgQCZOnJi5c+dm5MiRKaXkC1/4Qv7kT/4kSfL+978/HR0d2XPPPTN69Oj1rn3hhRfmtNNOy8yZM3PIIYdkl112yfbbb7+xD4m1So1LhaWU9yW5LMmAJN9omuazr7d9V1dX093dvcH7BYD+Zs2aNens7MxNN92UPffcs+raL774YgYMGJCBAwdm7ty5mTp1ahYuXFh1H1uiUsq8pmm61rddjStVaZrm+0m+X2MtANhcLV68OBMmTMjEiROrB1WSLFu2LO9///uzZs2abLPNNrnqqquq74PXVuVK1ZvlShUA0F+80StVfk0NAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUA9Fuf/exn8653vSvvec97ctppp2X69Ok59NBD093dnSR58skn09bWliRZvXp1zj333Oy///7p6OjI//pf/6t3nS9+8Yu991944YVJkp6enuyzzz75i7/4i+y333456qij8pvf/GaTHyP9h6gCoF+aN29ebrzxxixYsCDf+c538sADD7zu9ldffXV23HHHPPDAA3nggQdy1VVX5X//7/+dWbNm5dFHH83999+fhQsXZt68ebn33nuTJI8++mg+8pGP5Cc/+Ul22mmn3HLLLZvi0OinBrZ6AAB4K+67775MnDgx2267bZLk+OOPf93tZ82alYceeig333xzkuTZZ5/No48+mlmzZmXWrFkZPXp0kmTFihV59NFH8853vjN77LFHRo0alSQZM2ZMenp6Nt4B0e+JKgD6rVLKq+4bOHBg1qxZkyR54YUXeu9vmib/8A//kKOPPvr3tv+3f/u3nHfeefnQhz70e/f39PRk0KBBvbcHDBjg5T9el5f/AOiXxo8fn1tvvTW/+c1v8txzz+W73/1ukqStrS3z5s1Lkt6rUkly9NFH54orrshLL72UJHnkkUfy/PPP5+ijj843vvGNrFixIknyq1/9Kv/1X/+1iY+GzYErVQD0S52dnTnllFMyatSo7L777jn44IOTJOecc07e//7357rrrsvhhx/eu/0HP/jB9PT0pLOzM03TZNiwYbntttty1FFH5ac//WkOPPDAJMl2222X66+/PgMGDGjJcdF/laZpNvlOu7q6mt99MgNgS/WFL3whgwcPzkc/+tF8/OMfz4MPPpi77747d911V2bMmJFjjz02n/vc59I0TY499th8/vOfT/LyD/2PfOQjmT17dnbeeed87nOfyyc/+cksW7Ysl112WY4//vj09PTkAx/4QJ5//vkkyeWXX56DDjooP/jBD3LRRRdl6NChWbRoUcaMGZPrr79+nS+j9TcXXXRRtttuu5xzzjmtHoXNTCllXtM0Xevbzst/AC0yfvz43HfffUmS7u7urFixIi+99FLmzJmTPffcM5/61Kdy9913Z+HChXnggQdy2223JUmef/75HHrooZk3b1623377fPrTn86dd96ZW2+9NRdccEGS5G1ve1vuvPPOzJ8/PzNnzsxHP/rR3v0uWLAgl112WRYvXpyf/exn+dGPfrTpDx42Q17+A2iRMWPGZN68eXnuuecyaNCgdHZ2pru7O/fdd1+OO+64HHrooRk2bFiS5PTTT8+9996b//bf/lu22WabHHPMMUmS9vb2DBo0KFtvvXXa29t7P5320ksv5eyzz87ChQszYMCAPPLII737ffe7353ddtstSTJq1Kj09PRk3Lhxm/bgN4KLLrqo1SOwhRNVAC2y9dZbp62tLTNmzMhBBx2Ujo6O3HPPPXn88cfzzne+s/fN1ut63u9erttqq616P6G21VZbZdWqVUmSL3/5y3n729+eBx98MGvWrMngwYN7n/+Hn2j73XOADePlP4AWGj9+fKZPn57x48fn4IMPzpVXXplRo0blgAMOyA9/+MM8+eSTWb16dW644YYccsghb3jdZ599Nrvssku22mqrXHfddVm9evVGPAogEVUALXXwwQfn17/+dQ488MC8/e1vz+DBg3PwwQdnl112yd///d/nsMMOy8iRI9PZ2ZkTTjjhDa/7l3/5l7n22mtzwAEH5JFHHsmQIUM24lEAiU//AQC8Lp/+AwDYhEQVAEAFogqADXbBBRdk9uzZ63xs8uTJv/frYmBz5SsVANhg06ZNW+f9PnXIlkRUAfCmXHzxxfnmN7+Z4cOHZ+jQoRkzZkwWLVqUCRMm5KSTTkpbW1umTJmSWbNm5eyzz271uLDJiCoA3rDu7u7ccsstWbBgQVatWpXOzs6MGTPmVdsNHjw4c+bMSZLccccdm3pMaAlRBcAbNmfOnJxwwgn5oz/6oyTJcccdt87tTjnllE05FvQJ3qgOwBv2Rr/b0JeNsiUSVQC8YePGjct3v/vdvPDCC1mxYkVuv/32Vo8EfYaX/wB4w/bff/8cf/zxGTlyZHbfffd0dXVlxx13bPVY0Cf4NTUAvCkrVqzIdtttl5UrV2b8+PH5+te/ns7OzlaPBRvNG/01Na5UAfCmnHXWWVm8eHFeeOGFTJo0SVDBWqIKgDflW9/6VqtHgD7JG9UBACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCgAlEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCggg2KqlLKF0spS0opD5VSbi2l7FRrMACA/mRDr1TdmWRE0zQdSR5Jct6GjwQA0P9sUFQ1TTOraZpVa2/+R5LdNnwkAID+p+Z7qqYk+deK6wEA9BsD17dBKWV2kj9Zx0PnN03zz2u3OT/JqiTffJ11zkpyVpK8853vfEvDAgD0VeuNqqZp3vN6j5dSJiWZkOSIpmma11nn60m+niRdXV2vuR0AQH+03qh6PaWUY5J8KskhTdOsrDMSAED/s6Hvqbo8yfZJ7iylLCylXFlhJgCAfmeDrlQ1TfP/1BoEAKA/843qAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFQgqgAAKhBVAAAViCoAgApEFQBABaKqn7rgggsye/bsVo8BAKy1Qb9QmdZYvXp1pk2b1uoxAIBXcKWqj+np6cnee++dSZMmpaOjIyeddFJWrlyZtra2TJs2LePGjctNN92UyZMn5+abb06StLW15cILL0xnZ2fa29uzZMmSJMmKFSty5plnpr29PR0dHbnllluSJLNmzcqBBx6Yzs7OnHzyyVmxYkWS5G/+5m+y7777pqOjI+ecc06S5KabbsqIESMycuTIjB8/vgVnBAD6B1eq+qClS5fm6quvztixYzNlypR87WtfS5IMHjw4c+bMSZLccccdv/ecoUOHZv78+fna176W6dOn5x//8R9z8cUXZ8cdd8zDDz+cJHn66afz5JNP5pJLLsns2bMzZMiQfP7zn8+ll16as88+O7feemuWLFmSUkqeeeaZJMm0adPyb//2b3nHO97Rex8A8GquVPVBw4cPz9ixY5MkZ5xxRm9InXLKKa/5nBNPPDFJMmbMmPT09CRJZs+enY985CO92+y88875j//4jyxevDhjx47NqFGjcu211+ZWc3vNAAAMlElEQVTnP/95dthhhwwePDgf/OAH853vfCfbbrttkmTs2LGZPHlyrrrqqqxevXpjHC4AbBZcqeqDSinrvD1kyJDXfM6gQYOSJAMGDMiqVauSJE3TvGqtpmly5JFH5oYbbnjVGvfff3/uuuuu3Hjjjbn88stz991358orr8yPf/zj3H777Rk1alQWLlyYP/7jP96g4wOAzZErVX3QsmXLMnfu3CTJDTfckHHjxr2ldY466qhcfvnlvbeffvrpHHDAAfnRj36Uxx57LEmycuXKPPLII1mxYkWeffbZvO9978tll12WhQsXJkkef/zx/Nmf/VmmTZuWoUOH5he/+MUGHh0AbJ5EVR+0zz775Nprr01HR0eeeuqpTJ069S2t8+lPfzpPP/107xvN77nnngwbNizXXHNNTjvttHR0dOSAAw7IkiVL8txzz2XChAnp6OjIIYccki9/+ctJknPPPTft7e0ZMWJExo8fn5EjR9Y8VADYbJSmaTb5Tru6upru7u5Nvt/+oKenJxMmTMiiRYtaPQoAkKSUMq9pmq71bedKFQBABaKqj2lra3OVCgD6IVEFAFCBqAIAqEBUAQBUIKoAACoQVQAAFYgqAIAKRBUAQAWiCgCo4rbbbsvixYt7b19zzTV54oknWjjRpiWqAIA3bPXq1a/5mKgCAMjLv3927733zqRJk9LR0ZGTTjopK1euTFtbW6ZNm5Zx48blpptuyuOPP55jjjkmY8aMycEHH5wlS5bk3//93/Mv//IvOffcczNq1Kh8/vOfT3d3d04//fSMGjUqt99+eyZOnNi7rzvvvDMnnnhiC4+2voGtHgAA6DuWLl2aq6++OmPHjs2UKVPyta99LUkyePDgzJkzJ0lyxBFH5Morr8yee+6ZH//4x/nLv/zL3H333Tn++OMzYcKEnHTSSUmSf/3Xf8306dPT1dWVpmnyiU98IsuXL8+wYcMyY8aMnHnmmS07zo1BVAEAvYYPH56xY8cmSc4444x89atfTZKccsopSZIVK1bk3//933PyySf3PufFF19c77qllHzgAx/I9ddfnzPPPDNz587NP/3TP22EI2gdUQUA9CqlrPP2kCFDkiRr1qzJTjvtlIULF77ptc8888wcd9xxGTx4cE4++eQMHLh5ZYj3VAEAvZYtW5a5c+cmSW644YaMGzfu9x7fYYcdsscee+Smm25KkjRNkwcffDBJsv322+e5557r3fYPb++6667Zddddc8kll2Ty5Mkb+Ug2PVEFAPTaZ599cu2116ajoyNPPfVUpk6d+qptvvnNb+bqq6/OyJEjs99+++Wf//mfkySnnnpqvvjFL2b06NF5/PHHM3ny5Hz4wx/OqFGj8pvf/CZJcvrpp2f48OHZd999N+lxbQqlaZpNvtOurq6mu7t7k+8XAHhtPT09mTBhQhYtWrTR9nH22Wdn9OjR+fM///ONto/aSinzmqbpWt92m9eLmQBAnzVmzJgMGTIkX/rSl1o9ykYhqgCAJElbW9tGvUo1b968jbZ2X+A9VQAAFYgqAIAKRBUAQAWiCjah973vfXnmmWdaPQYAG4E3qsMm9P3vf7/VIwCwkbhSBRV94Qtf6P09WR//+Mdz+OGHJ0nuuuuunHHGGWlra8uTTz6Z559/Pscee2xGjhyZESNGZObMmUle/mTMIYcckjFjxuToo4/Or3/965YdCwBvjqiCisaPH5/77rsvSdLd3Z0VK1bkpZdeypw5c3LwwQf3bnfHHXdk1113zYMPPphFixblmGOOyUsvvZS/+qu/ys0335x58+ZlypQpOf/881t1KAC8SaIKKhozZkzmzZuX5557LoMGDcqBBx6Y7u7u3Hfffb8XVe3t7Zk9e3Y+9alP5b777suOO+6YpUuXZtGiRTnyyCMzatSoXHLJJfnlL3/ZwqMB4M3wniqoaOutt05bW1tmzJiRgw46KB0dHbnnnnvy+OOPZ5999undbq+99sq8efPy/e9/P+edd16OOuqoTJw4Mfvtt1/vLzIFoH9xpQoqGz9+fKZPn57x48fn4IMPzpVXXplRo0allNK7zRNPPJFtt902Z5xxRs4555zMnz8/73rXu7J8+fLeqHrppZfyk5/8pFWHAcCb5EoVVHbwwQfns5/9bA488MAMGTIkgwcP/r2X/pLk4YcfzrnnnputttoqW2+9da644opss802ufnmm/PRj340zz77bFatWpWPfexj2W+//Vp0JAC8GaVpmk2+066urqa7u3uT7xcA4M0qpcxrmqZrfdt5+Q8AoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQgagCAKhAVAEAVCCqAAAqEFUAABWIKgCACkQVAEAFogoAoAJRBQBQQZWoKqWcU0ppSilDa6wHANDfbHBUlVKGJzkyybINHwcAoH+qcaXqy0k+maSpsBYAQL+0QVFVSjk+ya+apnmw0jwAAP3SwPVtUEqZneRP1vHQ+Un+NslRb2RHpZSzkpyVJO985zvfxIgAAH1faZq39qpdKaU9yV1JVq69a7ckTyR5d9M0/+f1ntvV1dV0d3e/pf0CAGxKpZR5TdN0rW+79V6pei1N0zyc5G2v2GFPkq6maZ58q2sCAPRXvqcKAKCCt3yl6g81TdNWay0AgP7GlSoAgApEFQBABaIKAKCCt/yVChu001KWJ/n5RtzF0CQ+hfhqzstrc27WzXlZN+fltTk36+a8rFt/OS+7N00zbH0btSSqNrZSSvcb+T6JLY3z8tqcm3VzXtbNeXltzs26OS/rtrmdFy//AQBUIKoAACrYXKPq660eoI9yXl6bc7Nuzsu6OS+vzblZN+dl3Tar87JZvqcKAGBT21yvVAEAbFKbdVSVUv6qlLK0lPKTUsoXWj1PX1BKuaiU8qtSysK1f97X6pn6klLKOaWUppQytNWz9BWllItLKQ+t/fsyq5Sya6tn6gtKKV8spSxZe25uLaXs1OqZ+oJSyslr/5+7ppSy2Xyqa0OUUo5Z+7PosVLK37R6nr6glPKNUsp/lVIWtXqWmjbbqCqlHJbkhCQdTdPsl2R6i0fqS77cNM2otX++3+ph+opSyvAkRyZZ1upZ+pgvNk3T0TTNqCTfS3JBqwfqI+5MMqJpmo4kjyQ5r8Xz9BWLkpyY5N5WD9IXlFIGJPmfSd6bZN8kp5VS9m3tVH3CNUmOafUQtW22UZVkapL/0TTNi0nSNM1/tXge+r4vJ/lkEm80fIWmaf7vK24OifOTJGmaZlbTNKvW3vyPJLu1cp6+ommanzZNs7TVc/Qh707yWNM0P2ua5rdJbszL/+DfojVNc2+Sp1o9R22bc1TtleTgUsqPSyk/LKXs3+qB+pCz175k8Y1Sys6tHqYvKKUcn+RXTdM82OpZ+qJSymdLKb9IcnpcqVqXKUn+tdVD0Ce9I8kvXnH7l2vvYzM0sNUDbIhSyuwkf7KOh87Py8e2c5IDkuyf5NullD9ttoCPO67nvFyR5OK8fLXh4iRfyss/EDZ76zkvf5vkqE07Ud/xeuemaZp/bprm/CTnl1LOS3J2kgs36YAtsr7zsnab85OsSvLNTTlbK72R80Kvso77NvufQ1uqfh1VTdO857UeK6VMTfKdtRF1fyllTV7+HUPLN9V8rfJ65+WVSilX5eX3yGwRXuu8lFLak+yR5MFSSvLyyzjzSynvbprm/2zCEVvmjf6dSfKtJLdnC4mq9Z2XUsqkJBOSHLEl/IPtd97E3xdevjI1/BW3d0vyRItmYSPbnF/+uy3J4UlSStkryTbpH7+0caMqpezyipsT8/KbSrdoTdM83DTN25qmaWuapi0v/0+wc0sJqvUppez5ipvHJ1nSqln6klLKMUk+leT4pmlWtnoe+qwHkuxZStmjlLJNklOT/EuLZ2Ij2Wy//HPtX95vJBmV5LdJzmma5u7WTtV6pZTr8vI5aZL0JPlQ0zS/bulQfUwppSdJV9M0W3yEJ0kp5ZYk70qyJsnPk3y4aZpftXaq1iulPJZkUJL/b+1d/9E0zYdbOFKfUEqZmOQfkgxL8kyShU3THN3aqVpr7VfXXJZkQJJvNE3z2RaP1HKllBuSHJqXX0H6zyQXNk1zdUuHqmCzjSoAgE1pc375DwBgkxFVAAAViCoAgApEFQBABaIKAKACUQUAUIGoAgCoQFQBAFTw/wOO2/DB69usngAAAABJRU5ErkJggg==\n",
635 | "text/plain": [
636 | ""
637 | ]
638 | },
639 | "metadata": {},
640 | "output_type": "display_data"
641 | }
642 | ],
643 | "source": [
644 | "import matplotlib.pyplot as plt\n",
645 | "\n",
646 | "fig, ax = plt.subplots()\n",
647 | "\n",
648 | "for word, x1, x2 in zip(w2v_df['word'], w2v_df['x1'], w2v_df['x2']):\n",
649 | " ax.annotate(word, (x1,x2 ))\n",
650 | " \n",
651 | "PADDING = 1.0\n",
652 | "x_axis_min = np.amin(vectors, axis=0)[0] - PADDING\n",
653 | "y_axis_min = np.amin(vectors, axis=0)[1] - PADDING\n",
654 | "x_axis_max = np.amax(vectors, axis=0)[0] + PADDING\n",
655 | "y_axis_max = np.amax(vectors, axis=0)[1] + PADDING\n",
656 | " \n",
657 | "plt.xlim(x_axis_min,x_axis_max)\n",
658 | "plt.ylim(y_axis_min,y_axis_max)\n",
659 | "plt.rcParams[\"figure.figsize\"] = (10,10)\n",
660 | "\n",
661 | "plt.show()"
662 | ]
663 | },
664 | {
665 | "cell_type": "code",
666 | "execution_count": null,
667 | "metadata": {},
668 | "outputs": [],
669 | "source": []
670 | }
671 | ],
672 | "metadata": {
673 | "kernelspec": {
674 | "display_name": "Python 3",
675 | "language": "python",
676 | "name": "python3"
677 | },
678 | "language_info": {
679 | "codemirror_mode": {
680 | "name": "ipython",
681 | "version": 3
682 | },
683 | "file_extension": ".py",
684 | "mimetype": "text/x-python",
685 | "name": "python",
686 | "nbconvert_exporter": "python",
687 | "pygments_lexer": "ipython3",
688 | "version": "3.6.5"
689 | }
690 | },
691 | "nbformat": 4,
692 | "nbformat_minor": 2
693 | }
694 |
--------------------------------------------------------------------------------