├── etc ├── data │ ├── meta.mat │ └── filelist.mat ├── untar1.sh ├── untar.sh └── spring10_new_synsets.txt ├── reference └── 4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf ├── LICENSE ├── .gitignore ├── codes ├── classify.py ├── test.py ├── models │ └── alexnet.py ├── train.py └── util.py ├── README.md └── spring10_new_synsets.txt /etc/data/meta.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryujaehun/alexnet/HEAD/etc/data/meta.mat -------------------------------------------------------------------------------- /etc/untar1.sh: -------------------------------------------------------------------------------- 1 | #!bin/bash 2 | for file in *.tar 3 | do 4 | tar xvf ${file} 5 | done 6 | -------------------------------------------------------------------------------- /etc/data/filelist.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryujaehun/alexnet/HEAD/etc/data/filelist.mat -------------------------------------------------------------------------------- /etc/untar.sh: -------------------------------------------------------------------------------- 1 | #!bin/bash 2 | for file in *.tar 3 | do 4 | mkdir${file%.*} && tar xvf ${file} -C ${file%.*} 5 | done 6 | -------------------------------------------------------------------------------- /reference/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryujaehun/alexnet/HEAD/reference/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Ryujaehun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /codes/classify.py: -------------------------------------------------------------------------------- 1 | """ 2 | Written by Matteo Dunnhofer - 2017 3 | 4 | Classify an input image 5 | """ 6 | import sys 7 | import os.path 8 | import tensorflow as tf 9 | import util as tu 10 | from models import alexnet 11 | import numpy as np 12 | 13 | def classify( 14 | image, 15 | top_k, 16 | k_patches, 17 | ckpt_path, 18 | imagenet_path): 19 | """ Procedure to classify the image given through the command line 20 | 21 | Args: 22 | image: path to the image to classify 23 | top_k: integer representing the number of predictions with highest probability 24 | to retrieve 25 | k_patches: number of crops taken from an image and to input to the model 26 | ckpt_path: path to model's tensorflow checkpoint 27 | imagenet_path: path to ILSRVC12 ImageNet folder containing train images, 28 | validation images, annotations and metadata file 29 | 30 | """ 31 | wnids, words = tu.load_imagenet_meta(os.path.join(imagenet_path, 'data/meta.mat')) 32 | 33 | # taking a few crops from an image 34 | image_patches = tu.read_k_patches(image, k_patches) 35 | 36 | x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 37 | 38 | _, pred = alexnet.classifier(x, dropout=1.0) 39 | 40 | # calculate the average precision through the crops 41 | avg_prediction = tf.div(tf.reduce_sum(pred, 0), k_patches) 42 | 43 | # retrieve top 5 scores 44 | scores, indexes = tf.nn.top_k(avg_prediction, k=top_k) 45 | 46 | saver = tf.train.Saver() 47 | 48 | with tf.Session(config=tf.ConfigProto()) as sess: 49 | saver.restore(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) 50 | 51 | s, i = sess.run([scores, indexes], feed_dict={x: image_patches}) 52 | s, i = np.squeeze(s), np.squeeze(i) 53 | 54 | print('AlexNet saw:') 55 | for idx in range(top_k): 56 | print ('{} - score: {}'.format(words[i[idx]], s[idx])) 57 | 58 | 59 | if __name__ == '__main__': 60 | TOP_K = 5 61 | K_CROPS = 5 62 | IMAGENET_PATH = '/media/desktop/F64E50644E502023/ILSVRC2012' 63 | CKPT_PATH = 'ckpt-alexnet' 64 | 65 | image_path = sys.argv[1] 66 | 67 | classify( 68 | image_path, 69 | TOP_K, 70 | K_CROPS, 71 | CKPT_PATH, 72 | IMAGENET_PATH) 73 | 74 | -------------------------------------------------------------------------------- /codes/test.py: -------------------------------------------------------------------------------- 1 | 2 | import os.path 3 | import tensorflow as tf 4 | import util as tu 5 | from models import alexnet 6 | import numpy as np 7 | 8 | def test( 9 | top_k, 10 | k_patches, 11 | display_step, 12 | imagenet_path, 13 | ckpt_path): 14 | """ 15 | Procedure to evaluate top-1 and top-k accuracy (and error-rate) on the 16 | ILSVRC2012 validation (test) set. 17 | 18 | Args: 19 | top_k: integer representing the number of predictions with highest probability 20 | to retrieve 21 | k_patches: number of crops taken from an image and to input to the model 22 | display_step: number representing how often printing the current testing accuracy 23 | imagenet_path: path to ILSRVC12 ImageNet folder containing train images, 24 | validation images, annotations and metadata file 25 | ckpt_path: path to model's tensorflow checkpoint 26 | """ 27 | 28 | test_images = sorted(os.listdir(os.path.join(imagenet_path, 'ILSVRC2012_img_val'))) 29 | test_labels = tu.read_test_labels(os.path.join(imagenet_path, 'data/ILSVRC2012_validation_ground_truth.txt')) 30 | 31 | test_examples = len(test_images) 32 | 33 | x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 34 | y = tf.placeholder(tf.float32, [None, 1000]) 35 | 36 | _, pred = alexnet.classifier(x, 1.0) 37 | 38 | # calculate the average precision of the crops of the image 39 | avg_prediction = tf.div(tf.reduce_sum(pred, 0), k_patches) 40 | 41 | # accuracy 42 | top1_correct = tf.equal(tf.argmax(avg_prediction, 0), tf.argmax(y, 1)) 43 | top1_accuracy = tf.reduce_mean(tf.cast(top1_correct, tf.float32)) 44 | 45 | topk_correct = tf.nn.in_top_k(tf.stack([avg_prediction]), tf.argmax(y, 1), k=top_k) 46 | topk_accuracy = tf.reduce_mean(tf.cast(topk_correct, tf.float32)) 47 | 48 | saver = tf.train.Saver() 49 | 50 | with tf.Session(config=tf.ConfigProto()) as sess: 51 | saver.restore(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) 52 | 53 | total_top1_accuracy = 0. 54 | total_topk_accuracy = 0. 55 | 56 | for i in range(test_examples): 57 | # taking a few patches from an image 58 | image_patches = tu.read_k_patches(os.path.join(imagenet_path, 'ILSVRC2012_img_val', test_images[i]), k_patches) 59 | label = test_labels[i] 60 | 61 | top1_a, topk_a = sess.run([top1_accuracy, topk_accuracy], feed_dict={x: image_patches, y: [label]}) 62 | total_top1_accuracy += top1_a 63 | total_topk_accuracy += topk_a 64 | 65 | if i % display_step == 0: 66 | print ('Examples done: {:5d}/{} ---- Top-1: {:.4f} -- Top-{}: {:.4f}'.format(i + 1, test_examples, total_top1_accuracy / (i + 1), top_k, total_topk_accuracy / (i + 1))) 67 | 68 | print ('---- Final accuracy ----') 69 | print ('Top-1: {:.4f} -- Top-{}: {:.4f}'.format(total_top1_accuracy / test_examples, top_k, total_topk_accuracy / test_examples)) 70 | print ('Top-1 error rate: {:.4f} -- Top-{} error rate: {:.4f}'.format(1 - (total_top1_accuracy / test_examples), top_k, 1 - (total_topk_accuracy / test_examples))) 71 | 72 | if __name__ == '__main__': 73 | TOP_K = 5 74 | K_PATCHES = 5 75 | DISPLAY_STEP = 10 76 | IMAGENET_PATH = '/media/desktop/F64E50644E502023/ILSVRC2012' 77 | CKPT_PATH = 'ckpt-alexnet' 78 | 79 | test( 80 | TOP_K, 81 | K_PATCHES, 82 | DISPLAY_STEP, 83 | IMAGENET_PATH, 84 | CKPT_PATH) 85 | 86 | 87 | -------------------------------------------------------------------------------- /codes/models/alexnet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import tensorflow as tf 4 | import util as tu 5 | 6 | def alexnet(x): 7 | """ 8 | AlexNet convolutional layers definition 9 | 10 | Args: 11 | x: tensor of shape [batch_size, width, height, channels] 12 | 13 | Returns: 14 | pool5: tensor with all convolutions, pooling and lrn operations applied 15 | 16 | """ 17 | with tf.name_scope('alexnetwork') as scope: 18 | with tf.name_scope('conv1') as inner_scope: 19 | wcnn1 = tu.weight([11, 11, 3, 96], name='wcnn1') 20 | bcnn1 = tu.bias(0.0, [96], name='bcnn1') 21 | conv1 = tf.add(tu.conv2d(x, wcnn1, stride=(4, 4), padding='SAME'), bcnn1) 22 | #conv1 = tu.batch_norm(conv1) 23 | conv1 = tu.relu(conv1) 24 | norm1 = tu.lrn(conv1, depth_radius=5, bias=1.0, alpha=1e-04, beta=0.75) 25 | pool1 = tu.max_pool2d(norm1, kernel=[1, 3, 3, 1], stride=[1, 2, 2, 1], padding='VALID') 26 | 27 | with tf.name_scope('conv2') as inner_scope: 28 | wcnn2 = tu.weight([5, 5, 96, 256], name='wcnn2') 29 | bcnn2 = tu.bias(1.0, [256], name='bcnn2') 30 | conv2 = tf.add(tu.conv2d(pool1, wcnn2, stride=(1, 1), padding='SAME'), bcnn2) 31 | #conv2 = tu.batch_norm(conv2) 32 | conv2 = tu.relu(conv2) 33 | norm2 = tu.lrn(conv2, depth_radius=5, bias=1.0, alpha=1e-04, beta=0.75) 34 | pool2 = tu.max_pool2d(norm2, kernel=[1, 3, 3, 1], stride=[1, 2, 2, 1], padding='VALID') 35 | 36 | with tf.name_scope('conv3') as inner_scope: 37 | wcnn3 = tu.weight([3, 3, 256, 384], name='wcnn3') 38 | bcnn3 = tu.bias(0.0, [384], name='bcnn3') 39 | conv3 = tf.add(tu.conv2d(pool2, wcnn3, stride=(1, 1), padding='SAME'), bcnn3) 40 | #conv3 = tu.batch_norm(conv3) 41 | conv3 = tu.relu(conv3) 42 | 43 | with tf.name_scope('conv4') as inner_scope: 44 | wcnn4 = tu.weight([3, 3, 384, 384], name='wcnn4') 45 | bcnn4 = tu.bias(1.0, [384], name='bcnn4') 46 | conv4 = tf.add(tu.conv2d(conv3, wcnn4, stride=(1, 1), padding='SAME'), bcnn4) 47 | #conv4 = tu.batch_norm(conv4) 48 | conv4 = tu.relu(conv4) 49 | 50 | with tf.name_scope('conv5') as inner_scope: 51 | wcnn5 = tu.weight([3, 3, 384, 256], name='wcnn5') 52 | bcnn5 = tu.bias(1.0, [256], name='bcnn5') 53 | conv5 = tf.add(tu.conv2d(conv4, wcnn5, stride=(1, 1), padding='SAME'), bcnn5) 54 | #conv5 = tu.batch_norm(conv5) 55 | conv5 = tu.relu(conv5) 56 | pool5 = tu.max_pool2d(conv5, kernel=[1, 3, 3, 1], stride=[1, 2, 2, 1], padding='VALID') 57 | 58 | return pool5 59 | 60 | def classifier(x, dropout): 61 | """ 62 | AlexNet fully connected layers definition 63 | 64 | Args: 65 | x: tensor of shape [batch_size, width, height, channels] 66 | dropout: probability of non dropping out units 67 | 68 | Returns: 69 | fc3: 1000 linear tensor taken just before applying the softmax operation 70 | it is needed to feed it to tf.softmax_cross_entropy_with_logits() 71 | softmax: 1000 linear tensor representing the output probabilities of the image to classify 72 | 73 | """ 74 | pool5 = alexnet(x) 75 | 76 | dim = pool5.get_shape().as_list() 77 | flat_dim = dim[1] * dim[2] * dim[3] # 6 * 6 * 256 78 | flat = tf.reshape(pool5, [-1, flat_dim]) 79 | 80 | with tf.name_scope('classifier') as scope: 81 | with tf.name_scope('fullyconected1') as inner_scope: 82 | wfc1 = tu.weight([flat_dim, 4096], name='wfc1') 83 | bfc1 = tu.bias(0.0, [4096], name='bfc1') 84 | fc1 = tf.add(tf.matmul(flat, wfc1), bfc1) 85 | #fc1 = tu.batch_norm(fc1) 86 | fc1 = tu.relu(fc1) 87 | fc1 = tf.nn.dropout(fc1, dropout) 88 | 89 | with tf.name_scope('fullyconected2') as inner_scope: 90 | wfc2 = tu.weight([4096, 4096], name='wfc2') 91 | bfc2 = tu.bias(0.0, [4096], name='bfc2') 92 | fc2 = tf.add(tf.matmul(fc1, wfc2), bfc2) 93 | #fc2 = tu.batch_norm(fc2) 94 | fc2 = tu.relu(fc2) 95 | fc2 = tf.nn.dropout(fc2, dropout) 96 | 97 | with tf.name_scope('classifier_output') as inner_scope: 98 | wfc3 = tu.weight([4096, 1000], name='wfc3') 99 | bfc3 = tu.bias(0.0, [1000], name='bfc3') 100 | fc3 = tf.add(tf.matmul(fc2, wfc3), bfc3) 101 | softmax = tf.nn.softmax(fc3) 102 | 103 | return fc3, softmax 104 | 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # alexnet 2 | ___ 3 | 4 | 5 | 6 | ## about 7 | 8 | >AlexNet is the name of a [convolutional neural](https://en.wikipedia.org/wiki/Convolutional_neural_network) network, originally written with [CUDA](https://en.wikipedia.org/wiki/CUDA) to run with [GPU](https://en.wikipedia.org/wiki/GPU) support, which competed in the [ImageNet Large Scale Visual Recognition Challenge](https://en.wikipedia.org/wiki/ImageNet_Large_Scale_Visual_Recognition_Challenge) in 2012. The network achieved a top-5 error of 15.3%, more than 10.8 percentage points ahead of the runner up. AlexNet was designed by the SuperVision group, consisting of Alex Krizhevsky, Geoffrey Hinton, and Ilya Sutskever. -wikipedia 9 | 10 | 11 | ## architecture 12 | 13 | The neural network, which has 60 million parameters and 650,000 neurons, consists 14 | of five convolutional layers, some of which are followed by max-pooling layers, 15 | and three fully-connected layers with a final 1000-way softmax. To make training 16 | faster, we used non-saturating neurons and a very efficient GPU implementation 17 | of the convolution operation. To reduce overfitting in the fully-connected 18 | layers we employed a recently-developed regularization method called “dropout” 19 | that proved to be very effective. 20 | ![](https://kratzert.github.io/images/finetune_alexnet/alexnet.png) 21 | 22 | ## batch normaliztion 23 | 24 | [batch normaliztion](https://arxiv.org/abs/1502.03167)is decreasing technical skill,Gradient Vanishing & Gradient Exploding 25 | ![](http://nmhkahn.github.io/assets/Casestudy-CNN/alex-norm1.png) 26 | 27 | 28 | ### k=2,n=5,α=10−4,β=0.75k=2,n=5,α=10−4,β=0.75 29 | 30 | ![](https://shuuki4.files.wordpress.com/2016/01/bn1.png) 31 | ![](https://shuuki4.files.wordpress.com/2016/01/bn2.png) 32 | 33 | ## optimizer 34 | 35 | Apply AdamOptimizer 36 | ![](http://i.imgur.com/2dKCQHh.gif?1) 37 | ![](http://i.imgur.com/pD0hWu5.gif?1) 38 | ![](http://i.imgur.com/NKsFHJb.gif?1) 39 | 40 | ## requirement 41 | 42 | * tensorflow-gpu (ver.1.3.1) 43 | * cv2 (ver.3.3.0) 44 | * numpy (ver 1.13.3) 45 | * scipy (ver 0.19.1) 46 | 47 | 48 | ## Usage 49 | 1. Download the image file from the link below.(LSVRC2012 train,val,test,Development kit (Task 1)) 50 | 1. untar.(There is a script in `etc`) 51 | 1. Modify `IMAGENET_PATH` in train.py hyperparameter(maybe you need). 52 | 53 | ## train 54 | ___ 55 | 56 | #### From the beginning 57 | 58 | ``` 59 | python3 train.py 60 | ``` 61 | 62 | #### resume training 63 | 64 | ``` 65 | python3 train.py -resume 66 | ``` 67 | 68 | ## test 69 | 70 | ``` 71 | python3 test.py 72 | ``` 73 | 74 | ## Classify 75 | 76 | ``` 77 | python classify.py image 78 | ``` 79 | 80 | ## tensorboard 81 | 82 | ``` 83 | tensorboard --logdir path/to/summary/train/ 84 | ``` 85 | 86 | ![](https://galoismilk.org/storage/etc/graph-large_attrs_key=_too_large_attrs&limit_attr_size=1024&run=.png) 87 | 88 | 89 | ## TODO 90 | 91 | * ~~apply another optimizer ~~ 92 | * ~~apply tensorboard ~~ 93 | * ~~Fit to a GPU~~ 94 | * ~~Application of the technique to the paper~~ 95 | * Eliminate bottlenecks 96 | 97 | 98 | 99 | ## file_architecture 100 | 101 | ``` 102 | ILSVRC 2012 training set folder should be srtuctured like this: 103 | ILSVRC2012_img_train 104 | |_n01440764 105 | |_n01443537 106 | |_n01484850 107 | |_n01491361 108 | |_ ... 109 | ``` 110 | 111 | #### you must untar training file `untar.sh` 112 | 113 | 114 | ## download 115 | 116 | [download LSVRC 2012 image data file](http://www.image-net.org/challenges/LSVRC/2012/nonpub-downloads) 117 | 118 | 119 | 120 | ## Remove log 121 | 122 | If you do not want to see the log at startup 123 | train.py line 97, remove `allow_soft_placement=True, log_device_placement=True` 124 | 125 | ## references 126 | 127 | [optimizer](http://ruder.io/optimizing-gradient-descent/) 128 | 129 | [AlexNet training on ImageNet LSVRC 2012](https://github.com/dontfollowmeimcrazy/imagenet) 130 | 131 | [Tensorflow Models](https://github.com/tensorflow/models) 132 | 133 | [Tensorflow API](https://www.tensorflow.org/versions/r1.2/api_docs/) 134 | 135 | ## Licence 136 | 137 | [MIT Licence](LICENSE) 138 | -------------------------------------------------------------------------------- /codes/train.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import os.path 4 | import time 5 | from models import alexnet 6 | import tensorflow as tf 7 | import util as tu 8 | import numpy as np 9 | import threading 10 | 11 | def train( 12 | threads_numbers, 13 | epochs, 14 | batch_size, 15 | learning_rate, 16 | dropout, 17 | lmbda, 18 | resume, 19 | imagenet_path, 20 | display_step, 21 | test_step, 22 | ckpt_path, 23 | summary_path): 24 | """ Procedure to train the model on ImageNet ILSVRC 2012 training set 25 | 26 | Args: 27 | resume: boolean variable, true if want to resume the training, false to train from scratch 28 | imagenet_path: path to ILSRVC12 ImageNet folder containing train images, 29 | validation images, annotations and metadata file 30 | display_step: number representing how often printing the current training accuracy 31 | test_step: number representing how often make a test and print the validation accuracy 32 | ckpt_path: path where to save model's tensorflow checkpoint (or from where resume) 33 | summary_path: path where to save logs for TensorBoard 34 | 35 | """ 36 | train_img_path = os.path.join(imagenet_path, 'ILSVRC2012_img_train') 37 | ts_size = tu.imagenet_size(train_img_path) 38 | num_batches = int(float(ts_size) / batch_size) 39 | 40 | wnid_labels, _ = tu.load_imagenet_meta(os.path.join(imagenet_path, 'data/meta.mat')) 41 | with tf.device('/gpu:0'): 42 | x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 43 | y = tf.placeholder(tf.float32, [None, 1000]) 44 | lr = tf.placeholder(tf.float32) 45 | keep_prob = tf.placeholder(tf.float32) 46 | 47 | # queue of examples being filled on the cpu 48 | with tf.device('/cpu:0'): 49 | q = tf.FIFOQueue(batch_size * 3, [tf.float32, tf.float32], shapes=[[224, 224, 3], [1000]]) 50 | enqueue_op = q.enqueue_many([x, y]) 51 | x_b, y_b = q.dequeue_many(batch_size) 52 | 53 | pred, _ = alexnet.classifier(x_b, keep_prob) 54 | with tf.device('/gpu:0'): 55 | # cross-entropy and weight decay 56 | with tf.name_scope('cross_entropy'): 57 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y_b, name='cross-entropy')) 58 | 59 | with tf.name_scope('l2_loss'): 60 | l2_loss = tf.reduce_sum(lmbda * tf.stack([tf.nn.l2_loss(v) for v in tf.get_collection('weights')])) 61 | tf.summary.scalar('l2_loss', l2_loss) 62 | 63 | with tf.name_scope('loss'): 64 | loss = cross_entropy + l2_loss 65 | tf.summary.scalar('loss', loss) 66 | 67 | # accuracy 68 | with tf.name_scope('accuracy'): 69 | correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_b, 1)) 70 | accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 71 | tf.summary.scalar('accuracy', accuracy) 72 | 73 | global_step = tf.Variable(0, trainable=False) 74 | epoch = tf.div(global_step, num_batches) 75 | 76 | # momentum optimizer 77 | with tf.name_scope('optimizer'): 78 | optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step) 79 | 80 | # merge summaries to write them to file 81 | merged = tf.summary.merge_all() 82 | 83 | # checkpoint saver 84 | saver = tf.train.Saver() 85 | 86 | coord = tf.train.Coordinator() 87 | 88 | #init = tf.initialize_all_variables() 89 | init = tf.global_variables_initializer() 90 | # 91 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess: 92 | if resume: 93 | saver.restore(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) 94 | else: 95 | sess.run(init) 96 | 97 | # enqueuing batches procedure 98 | def enqueue_batches(): 99 | while not coord.should_stop(): 100 | im, l = tu.read_batch(batch_size, train_img_path, wnid_labels) 101 | sess.run(enqueue_op, feed_dict={x: im,y: l}) 102 | 103 | # creating and starting parallel threads to fill the queue 104 | num_threads = threads_numbers 105 | for i in range(num_threads): 106 | t = threading.Thread(target=enqueue_batches) 107 | t.setDaemon(True) 108 | t.start() 109 | 110 | # operation to write logs for tensorboard visualization 111 | train_writer = tf.summary.FileWriter(os.path.join(summary_path, 'train'), sess.graph) 112 | 113 | start_time = time.time() 114 | for e in range(sess.run(epoch), epochs): 115 | for i in range(num_batches): 116 | 117 | _, step = sess.run([optimizer, global_step], feed_dict={lr: learning_rate, keep_prob: dropout}) 118 | #train_writer.add_summary(summary, step) 119 | 120 | # decaying learning rate 121 | if step == 170000 or step == 350000: 122 | learning_rate /= 10 123 | 124 | # display current training informations 125 | if step % display_step == 0: 126 | temp_time=time.time() 127 | c, a = sess.run([loss, accuracy], feed_dict={lr: learning_rate, keep_prob: 1.0}) 128 | print ("time: ",temp_time-start_time,'Epoch: {:03d} Step/Batch: {:09d} --- Loss: {:.7f} Training accuracy: {:.4f}'.format(e, step, c, a)) 129 | 130 | # make test and evaluate validation accuracy 131 | if step % test_step == 0: 132 | val_im, val_cls = tu.read_validation_batch(batch_size, os.path.join(imagenet_path, 'ILSVRC2012_img_val'), os.path.join(imagenet_path, 'data/ILSVRC2012_validation_ground_truth.txt')) 133 | v_a = sess.run(accuracy, feed_dict={x_b: val_im, y_b: val_cls, lr: learning_rate, keep_prob: 1.0}) 134 | # intermediate time 135 | int_time = time.time() 136 | print ('Elapsed time: {}'.format(tu.format_time(int_time - start_time))) 137 | print ('Validation accuracy: {:.04f}'.format(v_a)) 138 | # save weights to file 139 | save_path = saver.save(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) 140 | print('Variables saved in file: %s' % save_path) 141 | 142 | end_time = time.time() 143 | print ('Elapsed time: {}').format(tu.format_time(end_time - start_time)) 144 | save_path = saver.save(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) 145 | print('Variables saved in file: %s' % save_path) 146 | 147 | coord.request_stop() 148 | 149 | 150 | if __name__ == '__main__': 151 | Threads_numbers=4 152 | DROPOUT = 0.5 153 | LAMBDA = 5e-05 # for weight decay 154 | LEARNING_RATE = 1e-03 155 | EPOCHS = 90 156 | BATCH_SIZE = 128 157 | CKPT_PATH = 'ckpt-alexnet' 158 | if not os.path.exists(CKPT_PATH): 159 | os.makedirs(CKPT_PATH) 160 | SUMMARY = 'summary' 161 | if not os.path.exists(SUMMARY): 162 | os.makedirs(SUMMARY) 163 | 164 | IMAGENET_PATH = '/media/desktop/F64E50644E502023/ILSVRC2012' 165 | DISPLAY_STEP = 10 166 | TEST_STEP = 500 167 | if len(sys.argv)==1: 168 | resume=False 169 | elif sys.argv[1] == '-resume': 170 | resume = True 171 | 172 | train( 173 | Threads_numbers, 174 | EPOCHS, 175 | BATCH_SIZE, 176 | LEARNING_RATE, 177 | DROPOUT, 178 | LAMBDA, 179 | resume, 180 | IMAGENET_PATH, 181 | DISPLAY_STEP, 182 | TEST_STEP, 183 | CKPT_PATH, 184 | SUMMARY) 185 | -------------------------------------------------------------------------------- /codes/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Written by Matteo Dunnhofer - 2017 3 | 4 | Helper functions and procedures 5 | """ 6 | import os 7 | import random 8 | import tensorflow as tf 9 | import numpy as np 10 | from scipy.io import loadmat 11 | import cv2 12 | 13 | ################ TensorFlow standard operations wrappers ##################### 14 | def weight(shape, name): 15 | initial = tf.truncated_normal(shape, stddev=0.01) 16 | w = tf.Variable(initial, name=name) 17 | tf.add_to_collection('weights', w) 18 | return w 19 | 20 | def bias(value, shape, name): 21 | initial = tf.constant(value, shape=shape) 22 | return tf.Variable(initial, name=name) 23 | 24 | def conv2d(x, W, stride, padding): 25 | return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1], padding=padding) 26 | 27 | def max_pool2d(x, kernel, stride, padding): 28 | return tf.nn.max_pool(x, ksize=kernel, strides=stride, padding=padding) 29 | 30 | def lrn(x, depth_radius, bias, alpha, beta): 31 | return tf.nn.local_response_normalization(x, depth_radius, bias, alpha, beta) 32 | 33 | def relu(x): 34 | return tf.nn.relu(x) 35 | 36 | def batch_norm(x): 37 | epsilon = 1e-3 38 | batch_mean, batch_var = tf.nn.moments(x, [0]) 39 | return tf.nn.batch_normalization(x, batch_mean, batch_var, None, None, epsilon) 40 | 41 | ################ batch creation functions ##################### 42 | 43 | def onehot(index): 44 | """ It creates a one-hot vector with a 1.0 in 45 | position represented by index 46 | """ 47 | onehot = np.zeros(1000) 48 | onehot[index] = 1.0 49 | return onehot 50 | 51 | def read_batch(batch_size, images_source, wnid_labels): 52 | """ It returns a batch of single images (no data-augmentation) 53 | 54 | ILSVRC 2012 training set folder should be srtuctured like this: 55 | ILSVRC2012_img_train 56 | |_n01440764 57 | |_n01443537 58 | |_n01484850 59 | |_n01491361 60 | |_ ... 61 | 62 | Args: 63 | batch_size: need explanation? :) 64 | images_sources: path to ILSVRC 2012 training set folder 65 | wnid_labels: list of ImageNet wnid lexicographically ordered 66 | 67 | Returns: 68 | batch_images: a tensor (numpy array of images) of shape [batch_size, width, height, channels] 69 | batch_labels: a tensor (numpy array of onehot vectors) of shape [batch_size, 1000] 70 | """ 71 | batch_images = [] 72 | batch_labels = [] 73 | 74 | for i in range(batch_size): 75 | # random class choice 76 | # (randomly choose a folder of image of the same class from a list of previously sorted wnids) 77 | class_index = random.randint(0, 999) 78 | 79 | folder = wnid_labels[class_index] 80 | batch_images.append(read_image(os.path.join(images_source, folder))) 81 | batch_labels.append(onehot(class_index)) 82 | 83 | np.vstack(batch_images) 84 | np.vstack(batch_labels) 85 | return batch_images, batch_labels 86 | 87 | def read_image(images_folder): 88 | """ It reads a single image file into a numpy array and preprocess it 89 | 90 | Args: 91 | images_folder: path where to random choose an image 92 | 93 | Returns: 94 | im_array: the numpy array of the image [width, height, channels] 95 | """ 96 | # random image choice inside the folder 97 | # (randomly choose an image inside the folder) 98 | image_path = os.path.join(images_folder, random.choice(os.listdir(images_folder))) 99 | 100 | # load and normalize image 101 | im_array = preprocess_image(image_path) 102 | 103 | 104 | return im_array 105 | 106 | def preprocess_image(image_path): 107 | """ It reads an image, it resize it to have the lowest dimesnion of 256px, 108 | it randomly choose a 224x224 crop inside the resized image and normilize the numpy 109 | array subtracting the ImageNet training set mean 110 | 111 | Args: 112 | images_path: path of the image 113 | 114 | Returns: 115 | cropped_im_array: the numpy array of the image normalized [width, height, channels] 116 | """ 117 | IMAGENET_MEAN = np.array([104., 117., 124.]) 118 | 119 | img = cv2.imread(image_path) 120 | height, width, _ = img.shape 121 | #if np.random.random()<0.5: 122 | # img = cv2.flip(img,1) 123 | # resize of the image (setting lowest dimension to 256px) 124 | if width