├── test_images ├── labels.csv~ ├── 1.jpeg ├── 2.jpeg ├── 3.jpeg ├── 4.jpeg ├── 5.jpeg ├── 6.jpeg ├── 7.jpeg ├── 8.jpeg ├── 9.jpeg └── 10.jpeg ├── result_images ├── lenet.png ├── alexnet.png ├── testset.jpg ├── trainset.jpg ├── validset.jpg ├── GoogLeNet.png ├── inception.jpg ├── distribution.jpg ├── exploratory.jpg └── online image.jpg ├── signnames.csv ├── README.md ├── LeNet.ipynb └── GoogLeNet.ipynb /test_images/labels.csv~: -------------------------------------------------------------------------------- 1 | 00001_00005.ppm,32,32,6,6,27,27,0 2 | -------------------------------------------------------------------------------- /test_images/1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/1.jpeg -------------------------------------------------------------------------------- /test_images/2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/2.jpeg -------------------------------------------------------------------------------- /test_images/3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/3.jpeg -------------------------------------------------------------------------------- /test_images/4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/4.jpeg -------------------------------------------------------------------------------- /test_images/5.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/5.jpeg -------------------------------------------------------------------------------- /test_images/6.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/6.jpeg -------------------------------------------------------------------------------- /test_images/7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/7.jpeg -------------------------------------------------------------------------------- /test_images/8.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/8.jpeg -------------------------------------------------------------------------------- /test_images/9.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/9.jpeg -------------------------------------------------------------------------------- /test_images/10.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/test_images/10.jpeg -------------------------------------------------------------------------------- /result_images/lenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/lenet.png -------------------------------------------------------------------------------- /result_images/alexnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/alexnet.png -------------------------------------------------------------------------------- /result_images/testset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/testset.jpg -------------------------------------------------------------------------------- /result_images/trainset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/trainset.jpg -------------------------------------------------------------------------------- /result_images/validset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/validset.jpg -------------------------------------------------------------------------------- /result_images/GoogLeNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/GoogLeNet.png -------------------------------------------------------------------------------- /result_images/inception.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/inception.jpg -------------------------------------------------------------------------------- /result_images/distribution.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/distribution.jpg -------------------------------------------------------------------------------- /result_images/exploratory.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/exploratory.jpg -------------------------------------------------------------------------------- /result_images/online image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liferlisiqi/Traffic-Sign-Classifier/HEAD/result_images/online image.jpg -------------------------------------------------------------------------------- /signnames.csv: -------------------------------------------------------------------------------- 1 | ClassId,SignName 2 | 0,Speed limit (20km/h) 3 | 1,Speed limit (30km/h) 4 | 2,Speed limit (50km/h) 5 | 3,Speed limit (60km/h) 6 | 4,Speed limit (70km/h) 7 | 5,Speed limit (80km/h) 8 | 6,End of speed limit (80km/h) 9 | 7,Speed limit (100km/h) 10 | 8,Speed limit (120km/h) 11 | 9,No passing 12 | 10,No passing for vehicles over 3.5 metric tons 13 | 11,Right-of-way at the next intersection 14 | 12,Priority road 15 | 13,Yield 16 | 14,Stop 17 | 15,No vehicles 18 | 16,Vehicles over 3.5 metric tons prohibited 19 | 17,No entry 20 | 18,General caution 21 | 19,Dangerous curve to the left 22 | 20,Dangerous curve to the right 23 | 21,Double curve 24 | 22,Bumpy road 25 | 23,Slippery road 26 | 24,Road narrows on the right 27 | 25,Road work 28 | 26,Traffic signals 29 | 27,Pedestrians 30 | 28,Children crossing 31 | 29,Bicycles crossing 32 | 30,Beware of ice/snow 33 | 31,Wild animals crossing 34 | 32,End of all speed and passing limits 35 | 33,Turn right ahead 36 | 34,Turn left ahead 37 | 35,Ahead only 38 | 36,Go straight or right 39 | 37,Go straight or left 40 | 38,Keep right 41 | 39,Keep left 42 | 40,Roundabout mandatory 43 | 41,End of no passing 44 | 42,End of no passing by vehicles over 3.5 metric tons 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Traffic Sign Classification 2 | 3 | ### Overview 4 | In this project, I used deep neural networks and three classic convolutional neural network architectures(LeNet, AlexNet and GoogLeNet) to classify traffic signs. I will train and validate a model so it can classify traffic sign images using the [German Traffic Sign Dataset](http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset). After the model is trained, I will then try out my model on images of German traffic signs that I find on the web. 5 | 6 | ### The goals / steps of this project are the following: 7 | * Load and explore the data set. 8 | * Realize LeNet architecture and use `ReLu`, `mini-batch gradient descent` and `dropout`. 9 | * Realize AlexNet and make some modifications, use `learning rate decay`, `Adam optimization` and `L2 regulization`. 10 | * Use GoogLeNet to classify traffic signs and make some modifications, use `inception` and `overlapping pooling` and `average pooling`. 11 | * Analyze the softmax probabilities of the new images 12 | * Summarize the results 13 | 14 | ### Dependencies 15 | python3.5 16 | matplotlib (2.1.1) 17 | opencv-python (3.3.1.11) 18 | numpy (1.13.3) 19 | tensorflow-gpu (1.4.1) 20 | sklearn (0.19.1) 21 | 22 | ### Dataset 23 | Download the [data set](https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip). This is a pickled dataset in which the images are already resized to 32x32. It contains a training, validation and test set. 24 | 25 | 26 | [//]: # (Image References) 27 | [exploratory]: ./result_images/exploratory.jpg "exploratory" 28 | [distribution]: ./result_images/distribution.jpg "distribution" 29 | [lenet]: ./result_images/lenet.png "lenet" 30 | [alexnet]: ./result_images/alexnet.png "alexnet" 31 | [inception]: ./result_images/inception.jpg "inception" 32 | [googlenet]: ./result_images/GoogLeNet.png "googlenet" 33 | [image2]: ./test_images/1.jpg "Traffic Sign 1" 34 | [image3]: ./test_images/2.jpg "Traffic Sign 2" 35 | [image4]: ./test_images/3.jpg "Traffic Sign 3" 36 | [image5]: ./test_images/4.jpg "Traffic Sign 4" 37 | [image6]: ./test_images/5.jpg "Traffic Sign 5" 38 | [image7]: ./test_images/6.jpg "Traffic Sign 6" 39 | [image8]: ./test_images/7.jpg "Traffic Sign 7" 40 | [image9]: ./test_images/8.jpg "Traffic Sign 8" 41 | [image10]: ./test_images/9.jpg "Traffic Sign 9" 42 | [image11]: ./test_images/10.jpg "Traffic Sign 10" 43 | 44 | [Data pre-process.ipynb](https://github.com/liferlisiqi/Traffic-Sign-Classifier/blob/master/Data%20pre-process.ipynb) 45 | --- 46 | 47 | I used the numpy library to calculate summary statistics of the traffic signs data set: 48 | * The size of training set is: 34799 49 | * The size of the validation set is: 4410 50 | * The size of test set is: 12630 51 | * The shape of a traffic sign image is: (32, 32 ,3) 52 | * The number of unique classes/labels in the data set is: 43 53 | 54 | Here is an exploratory visualization of the training data set. 55 | ![alt text][exploratory] 56 | 57 | The distribution of training, validation and testing set is showing in the following bar charts. 58 | ![alt text][distribution] 59 | 60 | [LeNet.ipynb](https://github.com/liferlisiqi/Traffic-Sign-Classifier/blob/master/LeNet.ipynb) 61 | --- 62 | The [LeNet](http://219.216.82.193/cache/10/03/yann.lecun.com/b1a1c4acb57f1b447bfe36e103910875/lecun-01a.pdf) model is proposed by Yann LeCun in 1998, it is the most classific cnn model for image recognition, its architecture is as following: 63 | 64 | ![alt text][lenet] 65 | 66 | In the LeNet architecture I realized for traffic signs recognition, three tricks as used as follows: 67 | 68 | - 1 ReLu 69 | ReLu nonlinear function is used as the activation function after the convolutional layer. More information about ReLu and other activation functions can be find at [Lecture 6 | Training Neural Networks I](https://www.youtube.com/watch?v=wEoyxE0GP2M&index=6&list=PLC1qU-LWwrF64f4QKQT-Vg5Wr4qEE1Zxk&t=0s). 70 | - 2 Mini-batch gradient descent 71 | Mini-batch gradient descent is the combine of batch gradient descent and stochastic gradient descent, it is based on the statistics to estimate the average of gradient of all the training data by a batch of selected samples. 72 | - 3 Dropout 73 | Dropout is a regularization technique for reducing overfitting in neural networks by preventing complex co-adaptations on training data. It is proposed in the paper [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://219.216.82.193/cache/2/03/jmlr.org/9b2dcdb089f9b8f19cea175c9d6b5150/srivastava14a.pdf). It is usually after fully connected layers. Awkwardly, there is a very small problem that LeNet will not overfitting to trainging set sometimes. Thus the dropout will not play a big role or even make the model worse for simple like LeNet. And the training set error maybe be higher than validation set error while training. 74 | 75 | My LeNet consists of the following layers: 76 | 77 | | Layer | Description | Input | Output | 78 | |:---------------------:|:---------------------------------------------:|:---------:|:-----------:| 79 | | Convolution | kernel: 5x5; stride:1x1; padding: valid | 32x32x3 | 28x28x6 | 80 | | Max pooling | kernel: 2x2; stride:2x2; | 28x28x6 | 14x14x6 | 81 | | Convolution | kernel: 5x5; stride:1x1; padding: valid | 14x14x6 | 10x10x16 | 82 | | Max pooling | kernel: 2x2; stride:2x2; | 10x10x16 | 5x5x16 | 83 | | Flatten | Input 5x5x16 -> Output 400 | 5x5x16 | 400 | 84 | | Fully connected | connect every neurel with next layer | 400 | 120 | 85 | | Fully connected | connect every neurel with next layer | 120 | 80 | 86 | | Fully connected | output 43 probabilities for each lablel | 80 | 43 | 87 | 88 | 89 | ### Training 90 | I have turned the following three hyperparameters to train my model. 91 | LEARNING_RATE = 1e-2 92 | EPOCHS = 50 93 | BATCH_SIZE = 128 94 | It takes about 2 minutes to train the model on GetForce 750 ti. 95 | 96 | The results are: 97 | * accuracy of training set: 96.6% 98 | * accuracy of validation set: 92.0% 99 | * accuracy of test set: 89.7% 100 | 101 | We can see that the model is overfitting to the training data and the accuracy on validation set is a little lower than on training set. The LeNet model is efficient and simple, many cnn architectures are inspired by it, like AlexNet. 102 | 103 | [AlexNet.ipynb](https://github.com/liferlisiqi/Traffic-Sign-Classifier/blob/master/AlexNet.ipynb) 104 | --- 105 | 106 | [AlexNet](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) is the first popularized CNN architecture in computer vision developed by Alex Krizhevsky, Geoffrey Hinton, and Ilya Sutskever. It is the champion of ImageNet ILSVRC challenge in 2012 and significantly outperformed the second runner-up. The AlexNet has a similar architecture with LeNet, but it is deeper and bigger. 107 | 108 | ![alt text][alexnet] 109 | 110 | Cause the input dimension and output dimension of traffic signs recognition on GTRSB is 32x32x3 and 43, which is different from the original dimension of AlexNet, so I made some change to fit the requirement. And the architecture I realized for recognizing traffic signs as the following table: 111 | 112 | | Layer | Description | Input | Output | 113 | |:---------------------:|:---------------------------------------------:|:---------:|:-----------:| 114 | | Convolution | kernel: 5x5; stride:1x1; padding: valid | 32x32x3 | 28x28x9 | 115 | | Max pooling | kernel: 2x2; stride:2x2; | 28x28x9 | 14x14x9 | 116 | | Convolution | kernel: 3x3; stride:1x1; padding: valid | 14x14x9 | 12x12x32 | 117 | | Max pooling | kernel: 2x2; stride:2x2; | 12x12x32 | 6x6x32 | 118 | | Convolution | kernel: 3x3; stride:1x1; padding: same | 6x6x32 | 6x6x48 | 119 | | Convolution | kernel: 3x3; stride:1x1; padding: same | 6x6x48 | 6x6x64 | 120 | | Convolution | kernel: 3x3; stride:1x1; padding: same | 6x6x64 | 6x6x96 | 121 | | Max pooling | kernel: 2x2; stride:2x2; | 6x6x96 | 3x3x96 | 122 | | Flatten | Input 3x3x96 -> Output 864 | 3x3x96 | 864 | 123 | | Fully connected | connect every neurel with next layer | 864 | 400 | 124 | | Fully connected | connect every neurel with next layer | 400 | 160 | 125 | | Fully connected | output 43 probabilities for each lablel | 160 | 43 | 126 | 127 | 128 | Apart from this, I have used following methods to make the model work better: 129 | 130 | - Learning rate decay 131 | In training deep networks, when the learning rate is large, the system contains too much kinetic energy and the parameter vector bounces around chaotically, ubable to settle down into deeper; when the learning rate is small, you will be wasting computation bouncing around chaotically with little improvement for a long time. If the learning rate can decay from large to small while training, the network will move fast at the begining and improve little by little in the end. There are three commonly used types of method: step dacay, exponential decay and 1/t decay, more information can be found [here](http://cs231n.github.io/neural-networks-3/#anneal) and [here](https://zhuanlan.zhihu.com/p/32923584). Cause I use tensorflow to realize AlexNet and exponential dacay are used for learning decay, so I choose it as my method, its usage can be find [here](https://www.tensorflow.org/api_docs/python/tf/train/exponential_decay) is used to decay learning rate. Maybe it is not a good method, since there is tow more hyper parameters (decay_step and decay_rate) to tune. 132 | - Adam optimization 133 | [Adam](https://arxiv.org/abs/1412.6980) is a popular optimization recently proposed by Diederik P. Kingma and Jimmy Ba, like previous proposed Adagrad and RMSprop, it is a kind of adaptive learning rate method. With Adam, we don't have to use learning rate decay and tune three parameters for perfect learning rate. It is fabilous, so I will use it in most of times. After adapting Adam, the accuracy for training set, validation set and testing set are 99.9%, 96.9% and 94.2% respectively. The model is a little overfitting to training set, so some regularization methods are used to reduce it. 134 | - L2 regulization 135 | L2 regulization is used to reduce overfitting by adding regulization loss to loss function, it is based on the assume that the bigger regulization loss is the more complex the model is. It is well known that complex model is more easily overfit to training set, thus, through reducing regulization loss to make the model simpler. 136 | The regulization loss is the sum of L2 norm of weights for each layer multiple regulization parameter `lambda` in most cases, `lambda` is a small positive number that controls the regulization degree. Tensorflow documetn for how to use l2 regulization can be find [here](https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss). 137 | 138 | ### Training 139 | I have turned the following three hyperparameters to train my model. 140 | * LEARNING_RATE = 5e-4 141 | * EPOCHS = 30 142 | * BATCH_SIZE = 128 143 | * keep_prop = 0.5 144 | * LAMBDA = 1e-5 145 | 146 | The results are: 147 | * accuracy of training set: 100.0% 148 | * accuracy of validation set: 96.0% 149 | * accuracy of test set: 94.6% 150 | 151 | [GoogLeNet.ipynb](https://github.com/liferlisiqi/Traffic-Sign-Classifier/blob/master/GoogLeNet.ipynb) 152 | --- 153 | [GoogLeNet](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf) was the winner of the ILSVRC 2014, it main contribution was the development of `Inception Module` that dramatically reduced the number of parameters in the network. 154 | ![alt text][inception] 155 | Additionally, this paper uses `Average Pooling` instead of `Fully connected layer` at the top of the ConvNet, eliminating a large amount of parameters that do not seem to matter much. The overall architecture of GoogLeNet is as the following table. 156 | 157 | ![alt text][googlenet] 158 | 159 | The original architecture of GoogLeNet is a little hard to train by my GPU, so I choose to reduce the number of layers from 22 to 14, the details of network is showing in the following table. 160 | 161 | | Type | Kernel/Stride | Output | Parameters | 162 | |:-------------:|:-------------:|:---------:|:-----------:| 163 | | conv | 3x3/2x2 | 16x16x64 | 1,792 | 164 | | inception(2a) | | 16x16x256 | 137,072 | 165 | | inception(2b) | | 16x16x480 | 388,736 | 166 | | max pool | 3x3/2x2 | 7x7x480 | | 167 | | inception(3a) | | 7x7x512 | 433,792 | 168 | | inception(3a) | | 7x7x512 | 449,160 | 169 | | max pool | 3x3/2x2 | 3x3x512 | | 170 | | inception(4a) | | 3x3x832 | 859,136 | 171 | | inception(4a) | | 3x3x1024 | 1,444,080 | 172 | | avg pool | 3x3/1x1 | 1x1x1024 | | 173 | | flatten | 864 | 1024 | | 174 | | full | 43 | 43 | 44,032 | 175 | 176 | Some details for this architecture is as following: 177 | - Inception Module 178 | The inception module is the core of this architecture, it is driven by two disadvantage of previous architecture: a large amount of parameters which lead to overfitting and dramatically use of computational resources. It's navie implement doesn't have 1x1 conv before/after 3x3 conv, 5x5 conv and max pooling layer. The reason why adding 1x1 convolutional layer is that it can reduce the depth of the output from previous layer, therefore, the amount of operations can be significantly reduced. More details can be found in [Going deeper with convolutions](https://arxiv.org/pdf/1409.4842.pdf). Since max pooling will reduce the shape of input feature map, so I realize it by padding with zeros and another implement can look [here](https://hacktilldawn.com/2016/09/25/inception-modules-explained-and-implemented/). 179 | - Overlapping pooling 180 | The normal pooling operation is with kernel size = 2 and stride = 2, and the overlapping pooling means kernel size > stride, like kernel size = 3 and stride = 2, thus there will be overlapping fields. According to [Traffic Sign Recognition with Multi-Scale Convolutional Networks](http://219.216.82.193/cache/13/03/yann.lecun.com/a46bf8e4b17c2a9e46a2a899a68a0a0d/sermanet-ijcnn-11.pdf), overlapping pooling can slightly reduce the error rates compared to non-overlapping and make the model more difficult to overfit. 181 | 182 | ### Training 183 | I have turned the following three hyperparameters to train my model. 184 | * LEARNING_RATE = 5e-4 185 | * EPOCHS = 35 186 | * BATCH_SIZE = 128 187 | * keep_prop = 0.5 188 | 189 | The results are: 190 | * accuracy of training set: 100.0% 191 | * accuracy of validation set: 98.5% 192 | * accuracy of test set: 98.1% 193 | 194 | Summary 195 | --- 196 | In this project, I use three classific CNN architecture to recognize traffic signs from GTSRB, they are LeNet, AlexNet and GoogLeNet. Since the original architecture may no be suit for images from GRSRB, so I made some changes to them. In addition, I use some methods and tricks to train the model, like mini-batch gradient descent, Adam optimization, L2 regularization, learning rate decay and so on. Finally, ten online traffic images are used to test my model, result shows that it work very well, all the ten signs are perfected recognized. 197 | 198 | References 199 | --- 200 | [The German Traffic Sign Recognition Benchmark](http://benchmark.ini.rub.de/?section=gtsrb&subsection=news) 201 | [Man vs. computer: Benchmarking machine learning algorithms for traffic sign recognition](https://www.sciencedirect.com/science/article/pii/S0893608012000457?via%3Dihub) 202 | [Traffic Sign Recognition with Multi-Scale Convolutional Networks](http://219.216.82.193/cache/13/03/yann.lecun.com/a46bf8e4b17c2a9e46a2a899a68a0a0d/sermanet-ijcnn-11.pdf) 203 | [The German Traffic Sign Recognition Benchmark: A multi-class classification competition](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6033395) 204 | [Gradient-Based Learning Applied to Document Recognition](http://219.216.82.193/cache/10/03/yann.lecun.com/b1a1c4acb57f1b447bfe36e103910875/lecun-01a.pdf) 205 | [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) 206 | [Going deeper with convolutions](https://arxiv.org/pdf/1409.4842.pdf) 207 | -------------------------------------------------------------------------------- /LeNet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "All modules imported.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import os\n", 18 | "import pickle\n", 19 | "import math\n", 20 | "import random\n", 21 | "import csv\n", 22 | "from PIL import Image\n", 23 | "\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import cv2\n", 26 | "import numpy as np\n", 27 | "import tensorflow as tf\n", 28 | "from sklearn.utils import shuffle\n", 29 | "from tensorflow.contrib.layers import flatten\n", 30 | "\n", 31 | "print('All modules imported.')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# 1 Reload the preprocessed data" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "(34799, 32, 32, 3) (34799,)\n", 51 | "(4410, 32, 32, 3) (4410,)\n", 52 | "(12630, 32, 32, 3) (12630,)\n", 53 | "43\n", 54 | "Data loaded.\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "# 1 Reload the preprocessed data\n", 60 | "\n", 61 | "pickle_file = './pre-traffic-signs-data/pre-data.pickle'\n", 62 | "with open(pickle_file, 'rb') as f:\n", 63 | " pickle_data = pickle.load(f)\n", 64 | " X_train = pickle_data['train_features']\n", 65 | " y_train = pickle_data['train_labels']\n", 66 | " X_valid = pickle_data['valid_features']\n", 67 | " y_valid = pickle_data['valid_labels']\n", 68 | " X_test = pickle_data['test_features']\n", 69 | " y_test = pickle_data['test_labels']\n", 70 | " signnames = pickle_data['signnames']\n", 71 | " del pickle_data # Free up memory\n", 72 | " \n", 73 | "# Shuffle the data set\n", 74 | "X_train, y_train = shuffle(X_train, y_train)\n", 75 | "X_valid, y_valid = shuffle(X_valid, y_valid)\n", 76 | "X_test, y_test = shuffle(X_test, y_test)\n", 77 | "\n", 78 | "print(X_train.shape, y_train.shape)\n", 79 | "print(X_valid.shape, y_valid.shape)\n", 80 | "print(X_test.shape, y_test.shape)\n", 81 | "print(len(signnames))\n", 82 | "print('Data loaded.')" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "# 2 LeNet Architecture" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "def LeNet(x, KEEP_PROB):\n", 99 | " # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer\n", 100 | " mu = 0\n", 101 | " sigma = 0.1\n", 102 | " \n", 103 | " # Layer 1: Input = 32x32x3. Output = 28x28x6.\n", 104 | " # Convolutional. \n", 105 | " conv1_w = tf.Variable(tf.truncated_normal((5, 5, 3, 6), mu, sigma))\n", 106 | " conv1_b = tf.Variable(tf.zeros(6))\n", 107 | " conv1 = tf.nn.conv2d(x, conv1_w, [1, 1, 1, 1], 'VALID') + conv1_b\n", 108 | " # Activation.\n", 109 | " conv1 = tf.nn.relu(conv1)\n", 110 | " # Pooling. Input = 28x28x6. Output = 14x14x6.\n", 111 | " pool1 = tf.nn.max_pool(conv1, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')\n", 112 | " \n", 113 | " # Layer 2: Input = 14x14x6. Output = 10x10x16.\n", 114 | " # Convolutional. \n", 115 | " conv2_w = tf.Variable(tf.truncated_normal((5, 5, 6, 16), mu, sigma))\n", 116 | " conv2_b = tf.Variable(tf.zeros(16))\n", 117 | " conv2 = tf.nn.conv2d(pool1, conv2_w, [1, 1, 1, 1], 'VALID') + conv2_b\n", 118 | " # Activation.\n", 119 | " conv2 = tf.nn.relu(conv2)\n", 120 | " # Pooling. Input = 10x10x16. Output = 5x5x16.\n", 121 | " pool2 = tf.nn.max_pool(conv2, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')\n", 122 | " \n", 123 | " # Flatten. Input = 5x5x16. Output = 400.\n", 124 | " flat = flatten(pool2) \n", 125 | " \n", 126 | " # Layer 3: Input = 400. Output = 120.\n", 127 | " # Fully Connected. \n", 128 | " full1_w = tf.Variable(tf.truncated_normal((400, 120), mu, sigma))\n", 129 | " full1_b = tf.Variable(tf.zeros(120))\n", 130 | " full1 = tf.matmul(flat, full1_w) + full1_b\n", 131 | " # Activation.\n", 132 | " full1 = tf.nn.relu(full1) \n", 133 | " # Dropout\n", 134 | " full1 = tf.nn.dropout(full1, KEEP_PROB)\n", 135 | " \n", 136 | " # Layer 4: Input = 120. Output = 84.\n", 137 | " # Fully Connected. \n", 138 | " full2_w = tf.Variable(tf.truncated_normal((120, 84), mu, sigma))\n", 139 | " full2_b = tf.Variable(tf.zeros(84))\n", 140 | " full2 = tf.matmul(full1, full2_w) + full2_b\n", 141 | " # Activation.\n", 142 | " full2 = tf.nn.relu(full2)\n", 143 | " # Dropout\n", 144 | " full2 = tf.nn.dropout(full2, KEEP_PROB)\n", 145 | " \n", 146 | " # Layer 5: Fully Connected. Input = 84. Output = 43.\n", 147 | " full3_w = tf.Variable(tf.truncated_normal((84, 43), mu, sigma))\n", 148 | " full3_b = tf.Variable(tf.zeros(43))\n", 149 | " logits = tf.matmul(full2, full3_w) + full3_b\n", 150 | " \n", 151 | " return logits" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "# 3 Training" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## 3.1 Strategy" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 7, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "# Placeholder\n", 175 | "x = tf.placeholder(tf.float32, (None, 32, 32, 3))\n", 176 | "y = tf.placeholder(tf.int32, (None))\n", 177 | "one_hot_y = tf.one_hot(y, 43)\n", 178 | "keep_prob = tf.placeholder_with_default(1.0, shape=())\n", 179 | "\n", 180 | "# Hyperparameters\n", 181 | "LEARNING_RATE = 1e-2\n", 182 | "EPOCHS = 50\n", 183 | "BATCH_SIZE = 128\n", 184 | "\n", 185 | "# Train method\n", 186 | "logits = LeNet(x, keep_prob)\n", 187 | "cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits)\n", 188 | "loss_operation = tf.reduce_mean(cross_entropy)\n", 189 | "optimizer = tf.train.GradientDescentOptimizer(learning_rate = LEARNING_RATE)\n", 190 | "training_operation = optimizer.minimize(loss_operation)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## 3.2 Evaluation" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 8, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n", 207 | "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 208 | "\n", 209 | "def evaluate(X_data, y_data):\n", 210 | " num_examples = len(X_data)\n", 211 | " total_accuracy = 0\n", 212 | " sess = tf.get_default_session()\n", 213 | " for offset in range(0, num_examples, BATCH_SIZE):\n", 214 | " batch_x, batch_y = X_data[offset:offset + BATCH_SIZE], y_data[offset:offset + BATCH_SIZE]\n", 215 | " accuracy, loss = sess.run([accuracy_operation, loss_operation], feed_dict={x: batch_x, y: batch_y})\n", 216 | " total_accuracy += (accuracy * len(batch_x))\n", 217 | " return total_accuracy / num_examples, loss" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "## 3.3 Train the Model\n", 225 | "A validation set can be used to assess how well the model is performing. A low accuracy on the training and validation sets imply underfitting. A high accuracy on the training set but low accuracy on the validation set implies overfitting. The optimizer used is mini-batch gradient descent, each select a sub batch of all the training data." 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 9, 231 | "metadata": { 232 | "scrolled": false 233 | }, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "Training...\n", 240 | "EPOCH 1 : Validation Accuracy = 0.084\n", 241 | "EPOCH 2 : Validation Accuracy = 0.172\n", 242 | "EPOCH 3 : Validation Accuracy = 0.255\n", 243 | "EPOCH 4 : Validation Accuracy = 0.315\n", 244 | "EPOCH 5 : Validation Accuracy = 0.357\n", 245 | "EPOCH 6 : Validation Accuracy = 0.392\n", 246 | "EPOCH 7 : Validation Accuracy = 0.426\n", 247 | "EPOCH 8 : Validation Accuracy = 0.452\n", 248 | "EPOCH 9 : Validation Accuracy = 0.463\n", 249 | "EPOCH 10 : Validation Accuracy = 0.487\n", 250 | "EPOCH 11 : Validation Accuracy = 0.515\n", 251 | "EPOCH 12 : Validation Accuracy = 0.542\n", 252 | "EPOCH 13 : Validation Accuracy = 0.564\n", 253 | "EPOCH 14 : Validation Accuracy = 0.590\n", 254 | "EPOCH 15 : Validation Accuracy = 0.602\n", 255 | "EPOCH 16 : Validation Accuracy = 0.644\n", 256 | "EPOCH 17 : Validation Accuracy = 0.658\n", 257 | "EPOCH 18 : Validation Accuracy = 0.675\n", 258 | "EPOCH 19 : Validation Accuracy = 0.713\n", 259 | "EPOCH 20 : Validation Accuracy = 0.720\n", 260 | "EPOCH 21 : Validation Accuracy = 0.746\n", 261 | "EPOCH 22 : Validation Accuracy = 0.740\n", 262 | "EPOCH 23 : Validation Accuracy = 0.767\n", 263 | "EPOCH 24 : Validation Accuracy = 0.771\n", 264 | "EPOCH 25 : Validation Accuracy = 0.793\n", 265 | "EPOCH 26 : Validation Accuracy = 0.798\n", 266 | "EPOCH 27 : Validation Accuracy = 0.809\n", 267 | "EPOCH 28 : Validation Accuracy = 0.822\n", 268 | "EPOCH 29 : Validation Accuracy = 0.829\n", 269 | "EPOCH 30 : Validation Accuracy = 0.836\n", 270 | "EPOCH 31 : Validation Accuracy = 0.841\n", 271 | "EPOCH 32 : Validation Accuracy = 0.848\n", 272 | "EPOCH 33 : Validation Accuracy = 0.840\n", 273 | "EPOCH 34 : Validation Accuracy = 0.854\n", 274 | "EPOCH 35 : Validation Accuracy = 0.865\n", 275 | "EPOCH 36 : Validation Accuracy = 0.873\n", 276 | "EPOCH 37 : Validation Accuracy = 0.874\n", 277 | "EPOCH 38 : Validation Accuracy = 0.871\n", 278 | "EPOCH 39 : Validation Accuracy = 0.877\n", 279 | "EPOCH 40 : Validation Accuracy = 0.877\n", 280 | "EPOCH 41 : Validation Accuracy = 0.881\n", 281 | "EPOCH 42 : Validation Accuracy = 0.887\n", 282 | "EPOCH 43 : Validation Accuracy = 0.889\n", 283 | "EPOCH 44 : Validation Accuracy = 0.894\n", 284 | "EPOCH 45 : Validation Accuracy = 0.894\n", 285 | "EPOCH 46 : Validation Accuracy = 0.898\n", 286 | "EPOCH 47 : Validation Accuracy = 0.900\n", 287 | "EPOCH 48 : Validation Accuracy = 0.905\n", 288 | "EPOCH 49 : Validation Accuracy = 0.896\n", 289 | "EPOCH 50 : Validation Accuracy = 0.901\n", 290 | "Model saved\n" 291 | ] 292 | }, 293 | { 294 | "data": { 295 | "image/png": "\n", 296 | "text/plain": [ 297 | "" 298 | ] 299 | }, 300 | "metadata": {}, 301 | "output_type": "display_data" 302 | }, 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "CPU times: user 2min 40s, sys: 21 s, total: 3min 1s\n", 308 | "Wall time: 3min 21s\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "%%time\n", 314 | "# 2min 7s\n", 315 | "train_losses = []\n", 316 | "valid_losses = []\n", 317 | "saver = tf.train.Saver()\n", 318 | "with tf.Session() as sess:\n", 319 | " sess.run(tf.global_variables_initializer())\n", 320 | " num_examples = len(X_train)\n", 321 | "\n", 322 | " print(\"Training...\")\n", 323 | " for i in range(EPOCHS):\n", 324 | " X_train, y_train = shuffle(X_train, y_train)\n", 325 | " print(\"EPOCH {} :\".format(i+1), end=' ')\n", 326 | " for offset in range(0, num_examples, BATCH_SIZE):\n", 327 | " end = offset + BATCH_SIZE\n", 328 | " batch_x, batch_y = X_train[offset:end], y_train[offset:end]\n", 329 | " _, train_loss = sess.run([training_operation, loss_operation], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})\n", 330 | " train_losses.append(train_loss)\n", 331 | " validation_accuracy, valid_loss = evaluate(X_valid, y_valid)\n", 332 | " print(\"Validation Accuracy = {:.3f}\".format(validation_accuracy))\n", 333 | " valid_losses.append(valid_loss)\n", 334 | " \n", 335 | " saver.save(sess, './model/lenet.ckpt')\n", 336 | " print(\"Model saved\")\n", 337 | " \n", 338 | "plt.subplot(2, 1, 2)\n", 339 | "plt.plot(train_losses, label='train')\n", 340 | "plt.plot([(i+1) * int(num_examples / BATCH_SIZE) for i in range(EPOCHS)], valid_losses, label='val')\n", 341 | "plt.title('training and validation loss history')\n", 342 | "plt.xlabel('Epoch')\n", 343 | "plt.ylabel('Loss')\n", 344 | "plt.gcf().set_size_inches(15, 12)\n", 345 | "plt.legend()\n", 346 | "plt.show()" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "# 4 Testing" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "## 4.1 Test the model on testing dataset" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 12, 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "name": "stdout", 370 | "output_type": "stream", 371 | "text": [ 372 | "INFO:tensorflow:Restoring parameters from ./model/lenet.ckpt\n" 373 | ] 374 | }, 375 | { 376 | "data": { 377 | "image/png": "\n", 378 | "text/plain": [ 379 | "" 380 | ] 381 | }, 382 | "metadata": {}, 383 | "output_type": "display_data" 384 | } 385 | ], 386 | "source": [ 387 | "with tf.Session() as sess:\n", 388 | " saver.restore(sess, './model/lenet.ckpt')\n", 389 | " train_accuracy,_ = evaluate(X_train, y_train)\n", 390 | " valid_accuracy,_ = evaluate(X_valid, y_valid)\n", 391 | " test_accuracy,_ = evaluate(X_test, y_test)\n", 392 | " \n", 393 | "accuracys = [train_accuracy, valid_accuracy, test_accuracy]\n", 394 | "tick_labels = [\"training set\", \"validation set\", \"testing set\"]\n", 395 | "plt.bar(range(3), accuracys)\n", 396 | "plt.xlabel('data set')\n", 397 | "plt.ylabel('accuracy')\n", 398 | "plt.xticks(range(3), tick_labels)\n", 399 | "for x_,y_ in zip(range(3), accuracys):\n", 400 | " plt.text(x_ - 0.1, y_, '%.3f'%y_)\n", 401 | "plt.show()" 402 | ] 403 | } 404 | ], 405 | "metadata": { 406 | "kernelspec": { 407 | "display_name": "Python 3", 408 | "language": "python", 409 | "name": "python3" 410 | }, 411 | "language_info": { 412 | "codemirror_mode": { 413 | "name": "ipython", 414 | "version": 3 415 | }, 416 | "file_extension": ".py", 417 | "mimetype": "text/x-python", 418 | "name": "python", 419 | "nbconvert_exporter": "python", 420 | "pygments_lexer": "ipython3", 421 | "version": "3.5.2" 422 | } 423 | }, 424 | "nbformat": 4, 425 | "nbformat_minor": 2 426 | } 427 | -------------------------------------------------------------------------------- /GoogLeNet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "All modules imported.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import os\n", 18 | "import pickle\n", 19 | "import math\n", 20 | "import random\n", 21 | "import csv\n", 22 | "from PIL import Image\n", 23 | "\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import cv2\n", 26 | "import numpy as np\n", 27 | "import tensorflow as tf\n", 28 | "import tensorflow.contrib.layers as layers\n", 29 | "from sklearn.utils import shuffle\n", 30 | "\n", 31 | "print('All modules imported.')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# 1 Reload the preprocessed data" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "(34799, 32, 32, 3) (34799,)\n", 51 | "(4410, 32, 32, 3) (4410,)\n", 52 | "(12630, 32, 32, 3) (12630,)\n", 53 | "43\n", 54 | "Data loaded.\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "# 1 Reload the preprocessed data\n", 60 | "\n", 61 | "pickle_file = './pre-traffic-signs-data/pre-data.pickle'\n", 62 | "with open(pickle_file, 'rb') as f:\n", 63 | " pickle_data = pickle.load(f)\n", 64 | " X_train = pickle_data['train_features']\n", 65 | " y_train = pickle_data['train_labels']\n", 66 | " X_valid = pickle_data['valid_features']\n", 67 | " y_valid = pickle_data['valid_labels']\n", 68 | " X_test = pickle_data['test_features']\n", 69 | " y_test = pickle_data['test_labels']\n", 70 | " signnames = pickle_data['signnames']\n", 71 | " del pickle_data # Free up memory\n", 72 | " \n", 73 | "# Shuffle the data set\n", 74 | "X_train, y_train = shuffle(X_train, y_train)\n", 75 | "X_valid, y_valid = shuffle(X_valid, y_valid)\n", 76 | "X_test, y_test = shuffle(X_test, y_test)\n", 77 | "\n", 78 | "print(X_train.shape, y_train.shape)\n", 79 | "print(X_valid.shape, y_valid.shape)\n", 80 | "print(X_test.shape, y_test.shape)\n", 81 | "print(len(signnames))\n", 82 | "print('Data loaded.')" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "# 2 Model Architecture" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## 2.1 Inception model and GoogLeNet" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 3, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "def Inception(inputs, conv11_size, conv33_11_size, conv33_size,conv55_11_size, conv55_size, pool11_size):\n", 106 | " conv11 = layers.conv2d(inputs, conv11_size, [1, 1])\n", 107 | " conv33_reduce = layers.conv2d(inputs, conv33_11_size, [1, 1])\n", 108 | " conv33 = layers.conv2d(conv33_reduce, conv33_size, [3, 3])\n", 109 | " conv55_reduce = layers.conv2d(inputs, conv55_11_size, [1, 1])\n", 110 | " conv55 = layers.conv2d(conv55_reduce, conv55_size, [5, 5])\n", 111 | " pool_proj = layers.max_pool2d(inputs, [3, 3], stride = 1, padding='SAME')\n", 112 | " pool11 = layers.conv2d(pool_proj, pool11_size, [1, 1])\n", 113 | " return tf.concat([conv11, conv33, conv55, pool11], 3)\n", 114 | "\n", 115 | "def GoogLeNet(inputs, dropout_keep_prob): # inputs size:32x32x3\n", 116 | " conv1 = layers.conv2d(inputs, 64, [3, 3], stride = 2) # 16x16x64\n", 117 | " \n", 118 | " inception_2a = Inception(conv1, 64, 96, 128, 16, 32, 32) # 16x16x256\n", 119 | " inception_2b = Inception(inception_2a, 128, 128, 192, 32, 96, 64) # 16x16x480\n", 120 | " pool2 = layers.max_pool2d(inception_2b, [3, 3]) # 7x7x480 ? why\n", 121 | " \n", 122 | " inception_3a = Inception(pool2, 192, 96, 208, 16, 48, 64) # 7x7x512\n", 123 | " inception_3b = Inception(inception_3a, 160, 112, 224, 24, 64, 64) # 7x7x512\n", 124 | " pool3 = layers.max_pool2d(inception_3b, [3, 3]) # 3x3x512\n", 125 | " \n", 126 | " inception_4a = Inception(pool3, 256, 160, 320, 32, 128, 128) # 3x3x832\n", 127 | " inception_4b = Inception(inception_4a, 384, 192, 384, 48, 128, 128) # 3x3x1024\n", 128 | " pool4 = layers.avg_pool2d(inception_4b, [3, 3], stride = 1) \n", 129 | "\n", 130 | " reshape = tf.reshape(pool4, [-1, 1024])\n", 131 | " dropout = layers.dropout(reshape, dropout_keep_prob)\n", 132 | " logits = layers.fully_connected(dropout, 43, activation_fn=None)\n", 133 | " \n", 134 | " return logits" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "# 3 Training" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## 3.1 Strategy" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 4, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# Placeholder\n", 158 | "x = tf.placeholder(tf.float32, (None, 32, 32, 3))\n", 159 | "y = tf.placeholder(tf.int32, (None))\n", 160 | "one_hot_y = tf.one_hot(y, 43)\n", 161 | "keep_prob = tf.placeholder_with_default(1.0, shape=())\n", 162 | "\n", 163 | "# Hyperparameters\n", 164 | "LEARNING_RATE = 4e-4\n", 165 | "EPOCHS = 35\n", 166 | "BATCH_SIZE = 128\n", 167 | "\n", 168 | "# Train method\n", 169 | "logits = GoogLeNet(x, keep_prob)\n", 170 | "cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits)\n", 171 | "loss_op = tf.reduce_mean(cross_entropy)\n", 172 | "optimizer = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE)\n", 173 | "train_op = optimizer.minimize(loss_op)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "## 3.2 Evaluation" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 5, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n", 190 | "accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 191 | "\n", 192 | "def evaluate(X_data, y_data):\n", 193 | " num_examples = len(X_data)\n", 194 | " total_accuracy = 0\n", 195 | " sess = tf.get_default_session()\n", 196 | "# with tf.Session() as sess:\n", 197 | "# sess.run(tf.global_variables_initializer())\n", 198 | " for offset in range(0, num_examples, BATCH_SIZE):\n", 199 | " batch_x, batch_y = X_data[offset:offset + BATCH_SIZE], y_data[offset:offset + BATCH_SIZE]\n", 200 | " accuracy = sess.run(accuracy_op, feed_dict={x: batch_x, y: batch_y})\n", 201 | " total_accuracy += (accuracy * len(batch_x))\n", 202 | " return total_accuracy / num_examples" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "## 3.3 Train the Model\n", 210 | "A validation set can be used to assess how well the model is performing. A low accuracy on the training and validation sets imply underfitting. A high accuracy on the training set but low accuracy on the validation set implies overfitting." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 6, 216 | "metadata": { 217 | "scrolled": false 218 | }, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "Training...\n", 225 | "EPOCH 1 : Validation Accuracy = 0.460\n", 226 | "EPOCH 2 : Validation Accuracy = 0.878\n", 227 | "EPOCH 3 : Validation Accuracy = 0.903\n", 228 | "EPOCH 4 : Validation Accuracy = 0.948\n", 229 | "EPOCH 5 : Validation Accuracy = 0.952\n", 230 | "EPOCH 6 : Validation Accuracy = 0.951\n", 231 | "EPOCH 7 : Validation Accuracy = 0.951\n", 232 | "EPOCH 8 : Validation Accuracy = 0.948\n", 233 | "EPOCH 9 : Validation Accuracy = 0.950\n", 234 | "EPOCH 10 : Validation Accuracy = 0.968\n", 235 | "EPOCH 11 : Validation Accuracy = 0.959\n", 236 | "EPOCH 12 : Validation Accuracy = 0.956\n", 237 | "EPOCH 13 : Validation Accuracy = 0.965\n", 238 | "EPOCH 14 : Validation Accuracy = 0.972\n", 239 | "EPOCH 15 : Validation Accuracy = 0.968\n", 240 | "EPOCH 16 : Validation Accuracy = 0.952\n", 241 | "EPOCH 17 : Validation Accuracy = 0.973\n", 242 | "EPOCH 18 : Validation Accuracy = 0.975\n", 243 | "EPOCH 19 : Validation Accuracy = 0.959\n", 244 | "EPOCH 20 : Validation Accuracy = 0.977\n", 245 | "EPOCH 21 : Validation Accuracy = 0.978\n", 246 | "EPOCH 22 : Validation Accuracy = 0.973\n", 247 | "EPOCH 23 : Validation Accuracy = 0.983\n", 248 | "EPOCH 24 : Validation Accuracy = 0.977\n", 249 | "EPOCH 25 : Validation Accuracy = 0.979\n", 250 | "EPOCH 26 : Validation Accuracy = 0.981\n", 251 | "EPOCH 27 : Validation Accuracy = 0.977\n", 252 | "EPOCH 28 : Validation Accuracy = 0.984\n", 253 | "EPOCH 29 : Validation Accuracy = 0.985\n", 254 | "EPOCH 30 : Validation Accuracy = 0.987\n", 255 | "EPOCH 31 : Validation Accuracy = 0.987\n", 256 | "EPOCH 32 : Validation Accuracy = 0.973\n", 257 | "EPOCH 33 : Validation Accuracy = 0.963\n", 258 | "EPOCH 34 : Validation Accuracy = 0.968\n", 259 | "EPOCH 35 : Validation Accuracy = 0.979\n", 260 | "Model saved\n", 261 | "CPU times: user 28min 52s, sys: 4min 17s, total: 33min 10s\n", 262 | "Wall time: 44min 41s\n" 263 | ] 264 | } 265 | ], 266 | "source": [ 267 | "%%time\n", 268 | "saver = tf.train.Saver()\n", 269 | "train_accuracy = []\n", 270 | "valid_accuracy = []\n", 271 | "with tf.Session() as sess:\n", 272 | " sess.run(tf.global_variables_initializer())\n", 273 | " num_examples = len(X_train)\n", 274 | " print(\"Training...\")\n", 275 | " for i in range(EPOCHS):\n", 276 | " X_train, y_train = shuffle(X_train, y_train)\n", 277 | " total_train_acc = 0\n", 278 | " print(\"EPOCH {} :\".format(i+1), end=' ')\n", 279 | " for offset in range(0, num_examples, BATCH_SIZE):\n", 280 | " end = offset + BATCH_SIZE\n", 281 | " batch_x, batch_y = X_train[offset:end], y_train[offset:end]\n", 282 | " _, train_acc = sess.run([train_op, accuracy_op], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})\n", 283 | " total_train_acc += (train_acc * len(batch_x))\n", 284 | " train_accuracy.append(total_train_acc / num_examples)\n", 285 | " valid_acc = evaluate(X_valid, y_valid)\n", 286 | " valid_accuracy.append(valid_acc)\n", 287 | " print(\"Validation Accuracy = {:.3f}\".format(valid_acc))\n", 288 | " \n", 289 | " saver.save(sess, './model/googlenet.ckpt')\n", 290 | " print(\"Model saved\")" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "#### Plot the accuracy of training and validation operation" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 7, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "image/png": "\n", 308 | "text/plain": [ 309 | "" 310 | ] 311 | }, 312 | "metadata": {}, 313 | "output_type": "display_data" 314 | } 315 | ], 316 | "source": [ 317 | "plt.subplot(2, 1, 2)\n", 318 | "plt.plot(train_accuracy, label='train')\n", 319 | "plt.plot(valid_accuracy, label='valid')\n", 320 | "plt.title('Accuracy history')\n", 321 | "plt.xlabel('Epoch')\n", 322 | "plt.ylabel('Loss')\n", 323 | "plt.gcf().set_size_inches(15, 15)\n", 324 | "plt.legend()\n", 325 | "plt.show()" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "# 4 Testing" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "## 4.1 Test the model on testing dataset" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 6, 345 | "metadata": {}, 346 | "outputs": [ 347 | { 348 | "name": "stdout", 349 | "output_type": "stream", 350 | "text": [ 351 | "INFO:tensorflow:Restoring parameters from ./model/googlenet.ckpt\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "saver = tf.train.import_meta_graph('./model/googlenet.ckpt.meta')\n", 357 | "with tf.Session() as sess:\n", 358 | " saver.restore(sess, './model/googlenet.ckpt')" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 10, 364 | "metadata": {}, 365 | "outputs": [ 366 | { 367 | "name": "stdout", 368 | "output_type": "stream", 369 | "text": [ 370 | "INFO:tensorflow:Restoring parameters from ./model/googlenet.ckpt\n" 371 | ] 372 | }, 373 | { 374 | "data": { 375 | "image/png": "\n", 376 | "text/plain": [ 377 | "" 378 | ] 379 | }, 380 | "metadata": {}, 381 | "output_type": "display_data" 382 | } 383 | ], 384 | "source": [ 385 | "with tf.Session() as sess:\n", 386 | " saver.restore(sess, './model/googlenet.ckpt')\n", 387 | "# sess.run(tf.global_variables_initializer())\n", 388 | " train_accuracy = evaluate(X_train, y_train)\n", 389 | " valid_accuracy = evaluate(X_valid, y_valid)\n", 390 | " test_accuracy = evaluate(X_test, y_test)\n", 391 | " \n", 392 | "accuracys = [train_accuracy, valid_accuracy, test_accuracy]\n", 393 | "tick_labels = [\"training set\", \"validation set\", \"testing set\"]\n", 394 | "plt.bar(range(3), accuracys)\n", 395 | "plt.xlabel('data set')\n", 396 | "plt.ylabel('accuracy')\n", 397 | "plt.xticks(range(3), tick_labels)\n", 398 | "for x_,y_ in zip(range(3), accuracys):\n", 399 | " plt.text(x_ - 0.1, y_, '%.3f'%y_)\n", 400 | "plt.show()" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "## 4.2 Test the Model on New Images" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 15, 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "image/png": "\n", 418 | "text/plain": [ 419 | "" 420 | ] 421 | }, 422 | "metadata": {}, 423 | "output_type": "display_data" 424 | } 425 | ], 426 | "source": [ 427 | "online_images = []\n", 428 | "online_labels = [0, 9, 12, 14, 17, 22, 25, 26, 31, 35]\n", 429 | "\n", 430 | "for i in range(1, 11):\n", 431 | " image = plt.imread('./test_images/' + str(i) +'.jpeg')\n", 432 | " image_reshape = cv2.resize(image,(32, 32), interpolation = cv2.INTER_CUBIC)\n", 433 | " online_images.append(image_reshape)\n", 434 | "\n", 435 | "plt.figure(figsize=(25, 10))\n", 436 | "plt.subplots_adjust(hspace = .1, wspace=.1)\n", 437 | "for i in range(len(online_images)):\n", 438 | " plt.subplot(1, 10, i + 1)\n", 439 | " plt.imshow(online_images[i])\n", 440 | " plt.title(signnames[int(online_labels[i])])\n", 441 | " plt.xticks([]), plt.yticks([])\n", 442 | "plt.savefig('./result_images/online image.jpg')\n", 443 | "\n", 444 | "online_images = np.array(online_images)\n", 445 | "online_images = online_images.astype(np.float32) / 128. - 1.\n", 446 | "online_labels = np.array(online_labels)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "markdown", 451 | "metadata": {}, 452 | "source": [ 453 | "### Predict the Sign Type for Each Image" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 16, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | "INFO:tensorflow:Restoring parameters from ./model/googlenet.ckpt\n", 466 | "Test Accuracy = 1.000\n", 467 | "Predict the Sign Type for Each Image\n", 468 | "[ 0 9 12 14 17 22 25 26 31 35]\n" 469 | ] 470 | } 471 | ], 472 | "source": [ 473 | "with tf.Session() as sess:\n", 474 | " saver.restore(sess, './model/googlenet.ckpt')\n", 475 | " test_accuracy = evaluate(online_images, online_labels)\n", 476 | " print(\"Test Accuracy = {:.3f}\".format(test_accuracy)) \n", 477 | " logits_value = sess.run(logits, feed_dict={x: online_images})\n", 478 | " probabilities = sess.run(tf.nn.softmax(logits_value))\n", 479 | " \n", 480 | "predict = probabilities.argmax(axis=1)\n", 481 | "print(\"Predict the Sign Type for Each Image\")\n", 482 | "print(predict)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "### Output Top 5 Softmax Probabilities For Each Image Found on the Web" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 52, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "image/png": "\n", 500 | "text/plain": [ 501 | "" 502 | ] 503 | }, 504 | "metadata": {}, 505 | "output_type": "display_data" 506 | } 507 | ], 508 | "source": [ 509 | "### Print out the top five softmax probabilities for the predictions on the German traffic sign images found on the web. \n", 510 | "with tf.Session() as sess:\n", 511 | " top5 = sess.run(tf.nn.top_k(tf.constant(probabilities), k=5))\n", 512 | "\n", 513 | "values = top5.values\n", 514 | "indices = top5.indices\n", 515 | "fig, axes = plt.subplots(2, 5, figsize=(25, 8))\n", 516 | "for i in range(2):\n", 517 | " for j in range(5):\n", 518 | " axes[i][j].bar(range(5), values[i*5+j])\n", 519 | " axes[i][j].set_xticklabels(indices[i*5+j])\n", 520 | " axes[i][j].set_title(\"answer: \"+str(online_labels[i*5+j]))\n", 521 | " for x_,y_ in zip(range(5), values[i*5+j]):\n", 522 | " axes[i][j].text(x_ - 0.25, y_, '%.3f'%y_)" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": null, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [] 531 | } 532 | ], 533 | "metadata": { 534 | "kernelspec": { 535 | "display_name": "Python 3", 536 | "language": "python", 537 | "name": "python3" 538 | }, 539 | "language_info": { 540 | "codemirror_mode": { 541 | "name": "ipython", 542 | "version": 3 543 | }, 544 | "file_extension": ".py", 545 | "mimetype": "text/x-python", 546 | "name": "python", 547 | "nbconvert_exporter": "python", 548 | "pygments_lexer": "ipython3", 549 | "version": "3.5.2" 550 | } 551 | }, 552 | "nbformat": 4, 553 | "nbformat_minor": 2 554 | } 555 | --------------------------------------------------------------------------------