├── data └── cifar10 │ └── test.txt ├── figure ├── net_block.png ├── stem_block.png ├── dense_block.png ├── loss_and_accuracy.png └── architecture_peleenet_classification.png ├── main.py ├── config.py ├── README.md ├── cache.py ├── python3 ├── cache.py ├── cifar10.py └── dataset.py ├── layers.py ├── utils.py ├── cifar10.py ├── dataset.py └── PeleeNet.py /data/cifar10/test.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /figure/net_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nnUyi/PeleeNet/HEAD/figure/net_block.png -------------------------------------------------------------------------------- /figure/stem_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nnUyi/PeleeNet/HEAD/figure/stem_block.png -------------------------------------------------------------------------------- /figure/dense_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nnUyi/PeleeNet/HEAD/figure/dense_block.png -------------------------------------------------------------------------------- /figure/loss_and_accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nnUyi/PeleeNet/HEAD/figure/loss_and_accuracy.png -------------------------------------------------------------------------------- /figure/architecture_peleenet_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nnUyi/PeleeNet/HEAD/figure/architecture_peleenet_classification.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # coding='utf-8' 2 | ''' 3 | author: Youzhao Yang 4 | date: 05/08/2018 5 | github: https://github.com/nnuyi 6 | ''' 7 | 8 | import tensorflow as tf 9 | from config import Config 10 | from PeleeNet import PeleeNet 11 | 12 | def main(): 13 | config = Config() 14 | config.check_dir() 15 | config.print_config() 16 | 17 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) 18 | gpu_options.allow_growth = True 19 | 20 | sess_config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) 21 | with tf.Session(config=sess_config) as sess: 22 | peleenet = PeleeNet(config=config.config, sess=sess) 23 | peleenet.build_model() 24 | if config.config.is_training: 25 | peleenet.train_model() 26 | if config.config.is_testing: 27 | peleenet.test_model() 28 | 29 | if __name__=='__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # coding='utf-8' 2 | ''' 3 | author: Youzhao Yang 4 | date: 05/08/2018 5 | github: https://github.com/nnuyi 6 | ''' 7 | 8 | import tensorflow as tf 9 | import os 10 | 11 | class Config: 12 | def __init__(self): 13 | self.flags = tf.app.flags 14 | self.flags.DEFINE_integer('epochs', 500, 'training epochs') 15 | self.flags.DEFINE_integer('batchsize', 64, 'training batchsize') 16 | self.flags.DEFINE_integer('input_height', 32, 'input height') 17 | self.flags.DEFINE_integer('input_width', 32, 'input width') 18 | self.flags.DEFINE_integer('input_channel', 3, 'input channel') 19 | self.flags.DEFINE_integer('num_class', 10, 'numbers of class') 20 | self.flags.DEFINE_float('learning_rate', 0.001, 'learning rate') 21 | self.flags.DEFINE_float('beta1', 0.9, 'beta1') 22 | self.flags.DEFINE_float('beta2', 0.999, 'beta2') 23 | self.flags.DEFINE_float('momentum', 0.9, 'monument for rmsprop optimizer') 24 | self.flags.DEFINE_float('weight_decay', 5e-4, 'weight decay') 25 | self.flags.DEFINE_string('checkpoint_dir', 'checkpoint', 'checkpoint directory') 26 | self.flags.DEFINE_string('logs_dir', 'logs', 'logs directory') 27 | self.flags.DEFINE_string('dataset', 'cifar10', 'dataset type') 28 | self.flags.DEFINE_bool('is_training', False, 'training or testing') 29 | self.flags.DEFINE_bool('is_testing', False, 'training or testing') 30 | 31 | self.config = self.flags.FLAGS 32 | 33 | def check_dir(self): 34 | if not os.path.exists(self.config.checkpoint_dir): 35 | os.mkdir(self.config.checkpoint_dir) 36 | if not os.path.exists(self.config.logs_dir): 37 | os.mkdir(self.config.logs_dir) 38 | 39 | def print_config(self): 40 | print('Config Proto:') 41 | print('-'*30) 42 | print('dataset:{}'.format(self.config.dataset)) 43 | print('epochs:{}'.format(self.config.epochs)) 44 | print('batchsize:{}'.format(self.config.batchsize)) 45 | print('learning_rate:{}'.format(self.config.learning_rate)) 46 | print('beta1:{}'.format(self.config.beta1)) 47 | print('beta2:{}'.format(self.config.beta2)) 48 | print('-'*30) 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PeleeNet 2 | - A tensorflow implement for PeleeNet described in [Pelee: A Real-Time Object Detection System on Mobile Devices](https://arxiv.org/pdf/1804.06882.pdf). This repo mainly focuses on **classification part**. 3 | 4 | ## Architecture in paper 5 | - Architecture of peleenet 6 | 7 |

8 | 9 | - Net block: stem block & dense block 10 | 11 |

12 | 13 | ## Architecture in tensorboard 14 | - stem block 15 |

16 | 17 | - dense block 18 |

19 | 20 | # Configuration 21 | - GPU: Geforce GTX 1080Ti 22 | - ubuntu16.04 23 | 24 | # Requirements 25 | - tensorflow >= 1.0 26 | - python 2.* or python 3.* 27 | - numpy 28 | - scipy 29 | - cPickle 30 | - Pillow 31 | 32 | **Notes**: **Python2 is default**. If you have python3.*, cifar10.py, cache.py dataset.py should be **replaced by files stored in python3 folder**. Any problems, you can email me!!! 33 | 34 | # Repo Structure 35 | The following structure show the main frame of this repo. 36 | 37 | ```text 38 | PeleeNet 39 | |———— data/ # store cifar10 dataset 40 | |———— cifar10/ 41 | |———— python/ # python3 support 42 | |———— cifar10.py 43 | |———— cache.py 44 | |———— dataset.py 45 | |———— main.py # repo entry 46 | |———— PeleeNet.py # mobilenet class 47 | |———— layers.py # stem block, dense_block, transition_layer 48 | |———— config.py # parameters setting 49 | |———— utils.py # generate datasource 50 | |———— cifar10.py # cifar10.py, cache.py dataset.py for cifar10 reading 51 | |———— cache.py 52 | |———— datatset.py 53 | 54 | # if you want to use your own datasets, add your datasets type in line 38 in utils.py. 55 | # Images are [input_height, input_width, input_channel] formats and labels are one_hot encoding formats. 56 | ``` 57 | 58 | # Usages 59 | ## Download Repo 60 | $ git clone https://github.com/nnuyi/PeleeNet.git 61 | $ cd PeleeNet 62 | 63 | ## Datasets 64 | In this repo, since the computation, I mainly focus on ***CIFAR10*** datasets. 65 | 66 | - **CIFAR10:** You are required to download *CIFAR10* datasets [here](https://www.cs.toronto.edu/~kriz/cifar.html), unzip it and store it in ***'./data/cifar10/'*** , note that **CIFAR-10 python version** is required. You can unzip it in ***'./data/cifar10/'*** using the following command: 67 | 68 | $ tar -zxvf cifar-10-python.tar.gz 69 | # you will see that data_batch_* are stored in './data/cifar10/cifar-10-batches-py/' 70 | 71 | ## Training 72 | ### [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) 73 | 74 | $ python main.py --batchsize=128 \ 75 | --is_training=True \ 76 | --is_testing=False \ 77 | --datasets=cifar10 \ 78 | --input_height=32 \ 79 | --input_width=32 \ 80 | --input_channels=3 \ 81 | --num_class=10 82 | 83 | # If GPU options is avaiable, you can use it as the instruction shows below: 84 | $ CUDA_VISIBLE_DEVICES=[no] \ 85 | python main.py --batchsize=128 \ 86 | --is_training=True \ 87 | --is_testing=False \ 88 | --datasets=cifar10 \ 89 | --input_height=32 \ 90 | --input_width=32 \ 91 | --input_channels=3 \ 92 | --num_class=10 93 | 94 | # notes: [no] is the device number of GPU, you can set it according to you machine 95 | $ CUDA_VISIBLE_DEVICES=0 \ 96 | python main.py --batchsize=128 \ 97 | --is_training=True \ 98 | --is_testing=False \ 99 | --datasets=cifar10 \ 100 | --input_height=32 \ 101 | --input_width=32 \ 102 | --input_channels=3 \ 103 | --num_class=10 104 | 105 | ## Results 106 | ### Classification 107 | - After training, you can see that the testing accuracy rate can reach to **89.83%**. 108 | - loss function and training accuracy shows below: 109 | 110 |

111 | 112 | # TODO 113 | - Continute to fine-tuning hyperparameters to improve its accuracy!!! 114 | - Train in cifar100 115 | - Train in Caltech101 116 | 117 | # References 118 | - [Pelee: A Real-Time Object Detection System on Mobile Devices](https://arxiv.org/pdf/1804.06882.pdf) 119 | - cifar10.py is derived from [Hvass-Labs's codes](https://github.com/Hvass-Labs/TensorFlow-Tutorials) 120 | 121 | # Contact 122 | Email: computerscienceyyz@163.com 123 | -------------------------------------------------------------------------------- /cache.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Cache-wrapper for a function or class. 4 | # 5 | # Save the result of calling a function or creating an object-instance 6 | # to harddisk. This is used to persist the data so it can be reloaded 7 | # very quickly and easily. 8 | # 9 | # Implemented in Python 3.5 10 | # 11 | ######################################################################## 12 | # 13 | # This file is part of the TensorFlow Tutorials available at: 14 | # 15 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 16 | # 17 | # Published under the MIT License. See the file LICENSE for details. 18 | # 19 | # Copyright 2016 by Magnus Erik Hvass Pedersen 20 | # 21 | ######################################################################## 22 | 23 | import os 24 | import pickle 25 | import numpy as np 26 | 27 | ######################################################################## 28 | 29 | 30 | def cache(cache_path, fn, *args, **kwargs): 31 | """ 32 | Cache-wrapper for a function or class. If the cache-file exists 33 | then the data is reloaded and returned, otherwise the function 34 | is called and the result is saved to cache. The fn-argument can 35 | also be a class instead, in which case an object-instance is 36 | created and saved to the cache-file. 37 | :param cache_path: 38 | File-path for the cache-file. 39 | :param fn: 40 | Function or class to be called. 41 | :param args: 42 | Arguments to the function or class-init. 43 | :param kwargs: 44 | Keyword arguments to the function or class-init. 45 | :return: 46 | The result of calling the function or creating the object-instance. 47 | """ 48 | 49 | # If the cache-file exists. 50 | if os.path.exists(cache_path): 51 | # Load the cached data from the file. 52 | with open(cache_path, mode='rb') as file: 53 | obj = pickle.load(file) 54 | 55 | print("- Data loaded from cache-file: " + cache_path) 56 | else: 57 | # The cache-file does not exist. 58 | 59 | # Call the function / class-init with the supplied arguments. 60 | obj = fn(*args, **kwargs) 61 | 62 | # Save the data to a cache-file. 63 | with open(cache_path, mode='wb') as file: 64 | pickle.dump(obj, file) 65 | 66 | print("- Data saved to cache-file: " + cache_path) 67 | 68 | return obj 69 | 70 | 71 | ######################################################################## 72 | 73 | 74 | def convert_numpy2pickle(in_path, out_path): 75 | """ 76 | Convert a numpy-file to pickle-file. 77 | The first version of the cache-function used numpy for saving the data. 78 | Instead of re-calculating all the data, you can just convert the 79 | cache-file using this function. 80 | :param in_path: 81 | Input file in numpy-format written using numpy.save(). 82 | :param out_path: 83 | Output file written as a pickle-file. 84 | :return: 85 | Nothing. 86 | """ 87 | 88 | # Load the data using numpy. 89 | data = np.load(in_path) 90 | 91 | # Save the data using pickle. 92 | with open(out_path, mode='wb') as file: 93 | pickle.dump(data, file) 94 | 95 | 96 | ######################################################################## 97 | 98 | if __name__ == '__main__': 99 | # This is a short example of using a cache-file. 100 | 101 | # This is the function that will only get called if the result 102 | # is not already saved in the cache-file. This would normally 103 | # be a function that takes a long time to compute, or if you 104 | # need persistent data for some other reason. 105 | def expensive_function(a, b): 106 | return a * b 107 | 108 | print('Computing expensive_function() ...') 109 | 110 | # Either load the result from a cache-file if it already exists, 111 | # otherwise calculate expensive_function(a=123, b=456) and 112 | # save the result to the cache-file for next time. 113 | result = cache(cache_path='cache_expensive_function.pkl', 114 | fn=expensive_function, a=123, b=456) 115 | 116 | print('result =', result) 117 | 118 | # Newline. 119 | print() 120 | 121 | # This is another example which saves an object to a cache-file. 122 | 123 | # We want to cache an object-instance of this class. 124 | # The motivation is to do an expensive computation only once, 125 | # or if we need to persist the data for some other reason. 126 | class ExpensiveClass: 127 | def __init__(self, c, d): 128 | self.c = c 129 | self.d = d 130 | self.result = c * d 131 | 132 | def print_result(self): 133 | print('c =', self.c) 134 | print('d =', self.d) 135 | print('result = c * d =', self.result) 136 | 137 | print('Creating object from ExpensiveClass() ...') 138 | 139 | # Either load the object from a cache-file if it already exists, 140 | # otherwise make an object-instance ExpensiveClass(c=123, d=456) 141 | # and save the object to the cache-file for the next time. 142 | obj = cache(cache_path='cache_ExpensiveClass.pkl', 143 | fn=ExpensiveClass, c=123, d=456) 144 | 145 | obj.print_result() 146 | 147 | ######################################################################## 148 | 149 | -------------------------------------------------------------------------------- /python3/cache.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Cache-wrapper for a function or class. 4 | # 5 | # Save the result of calling a function or creating an object-instance 6 | # to harddisk. This is used to persist the data so it can be reloaded 7 | # very quickly and easily. 8 | # 9 | # Implemented in Python 3.5 10 | # 11 | ######################################################################## 12 | # 13 | # This file is part of the TensorFlow Tutorials available at: 14 | # 15 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 16 | # 17 | # Published under the MIT License. See the file LICENSE for details. 18 | # 19 | # Copyright 2016 by Magnus Erik Hvass Pedersen 20 | # 21 | ######################################################################## 22 | 23 | import os 24 | import pickle 25 | import numpy as np 26 | 27 | ######################################################################## 28 | 29 | 30 | def cache(cache_path, fn, *args, **kwargs): 31 | """ 32 | Cache-wrapper for a function or class. If the cache-file exists 33 | then the data is reloaded and returned, otherwise the function 34 | is called and the result is saved to cache. The fn-argument can 35 | also be a class instead, in which case an object-instance is 36 | created and saved to the cache-file. 37 | :param cache_path: 38 | File-path for the cache-file. 39 | :param fn: 40 | Function or class to be called. 41 | :param args: 42 | Arguments to the function or class-init. 43 | :param kwargs: 44 | Keyword arguments to the function or class-init. 45 | :return: 46 | The result of calling the function or creating the object-instance. 47 | """ 48 | 49 | # If the cache-file exists. 50 | if os.path.exists(cache_path): 51 | # Load the cached data from the file. 52 | with open(cache_path, mode='rb') as file: 53 | obj = pickle.load(file) 54 | 55 | print("- Data loaded from cache-file: " + cache_path) 56 | else: 57 | # The cache-file does not exist. 58 | 59 | # Call the function / class-init with the supplied arguments. 60 | obj = fn(*args, **kwargs) 61 | 62 | # Save the data to a cache-file. 63 | with open(cache_path, mode='wb') as file: 64 | pickle.dump(obj, file) 65 | 66 | print("- Data saved to cache-file: " + cache_path) 67 | 68 | return obj 69 | 70 | 71 | ######################################################################## 72 | 73 | 74 | def convert_numpy2pickle(in_path, out_path): 75 | """ 76 | Convert a numpy-file to pickle-file. 77 | The first version of the cache-function used numpy for saving the data. 78 | Instead of re-calculating all the data, you can just convert the 79 | cache-file using this function. 80 | :param in_path: 81 | Input file in numpy-format written using numpy.save(). 82 | :param out_path: 83 | Output file written as a pickle-file. 84 | :return: 85 | Nothing. 86 | """ 87 | 88 | # Load the data using numpy. 89 | data = np.load(in_path) 90 | 91 | # Save the data using pickle. 92 | with open(out_path, mode='wb') as file: 93 | pickle.dump(data, file) 94 | 95 | 96 | ######################################################################## 97 | 98 | if __name__ == '__main__': 99 | # This is a short example of using a cache-file. 100 | 101 | # This is the function that will only get called if the result 102 | # is not already saved in the cache-file. This would normally 103 | # be a function that takes a long time to compute, or if you 104 | # need persistent data for some other reason. 105 | def expensive_function(a, b): 106 | return a * b 107 | 108 | print('Computing expensive_function() ...') 109 | 110 | # Either load the result from a cache-file if it already exists, 111 | # otherwise calculate expensive_function(a=123, b=456) and 112 | # save the result to the cache-file for next time. 113 | result = cache(cache_path='cache_expensive_function.pkl', 114 | fn=expensive_function, a=123, b=456) 115 | 116 | print('result =', result) 117 | 118 | # Newline. 119 | print() 120 | 121 | # This is another example which saves an object to a cache-file. 122 | 123 | # We want to cache an object-instance of this class. 124 | # The motivation is to do an expensive computation only once, 125 | # or if we need to persist the data for some other reason. 126 | class ExpensiveClass: 127 | def __init__(self, c, d): 128 | self.c = c 129 | self.d = d 130 | self.result = c * d 131 | 132 | def print_result(self): 133 | print('c =', self.c) 134 | print('d =', self.d) 135 | print('result = c * d =', self.result) 136 | 137 | print('Creating object from ExpensiveClass() ...') 138 | 139 | # Either load the object from a cache-file if it already exists, 140 | # otherwise make an object-instance ExpensiveClass(c=123, d=456) 141 | # and save the object to the cache-file for the next time. 142 | obj = cache(cache_path='cache_ExpensiveClass.pkl', 143 | fn=ExpensiveClass, c=123, d=456) 144 | 145 | obj.print_result() 146 | 147 | ######################################################################## 148 | 149 | -------------------------------------------------------------------------------- /layers.py: -------------------------------------------------------------------------------- 1 | # coding='utf-8' 2 | ''' 3 | author: Youzhao Yang 4 | date: 05/08/2018 5 | github: https://github.com/nnuyi 6 | ''' 7 | 8 | import tensorflow as tf 9 | import tensorflow.contrib.slim as slim 10 | 11 | class Layer: 12 | # stem_block 13 | def _stem_block(self, input_x, num_init_channel=32, is_training=True, reuse=False): 14 | block_name = 'stem_block' 15 | with tf.variable_scope(block_name) as scope: 16 | if reuse: 17 | scope.reuse_variables() 18 | with slim.arg_scope([slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.02), 19 | normalizer_fn=slim.batch_norm, 20 | activation_fn=tf.nn.relu) as s: 21 | conv0 = slim.conv2d(input_x, num_init_channel, 3, 1, scope='stem_block_conv0') 22 | 23 | conv1_l0 = slim.conv2d(conv0, int(num_init_channel/2), 1, 1, scope='stem_block_conv1_l0') 24 | conv1_l1 = slim.conv2d(conv1_l0, num_init_channel, 3, 1, scope='stem_block_conv1_l1') 25 | 26 | maxpool1_r0 = slim.max_pool2d(conv0, 2, 1, padding='SAME', scope='stem_block_maxpool1_r0') 27 | 28 | filter_concat = tf.concat([conv1_l1, maxpool1_r0], axis=-1) 29 | 30 | output = slim.conv2d(filter_concat, num_init_channel, 1, 1, scope='stem_block_output') 31 | 32 | return output 33 | 34 | def _dense_block(self, input_x, stage, num_block, k, bottleneck_width, is_training=True, reuse=False): 35 | with slim.arg_scope([slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.02), 36 | normalizer_fn=slim.batch_norm, 37 | activation_fn=tf.nn.relu) as s: 38 | output = input_x 39 | 40 | for index in range(num_block): 41 | dense_block_name = 'stage_{}_dense_block_{}'.format(stage, index) 42 | with tf.variable_scope(dense_block_name) as scope: 43 | if reuse: 44 | scope.reuse_variables() 45 | 46 | inter_channel = k*bottleneck_width 47 | # left channel 48 | conv_left_0 = slim.conv2d(output, inter_channel, 1, 1, scope='conv_left_0') 49 | conv_left_1 = slim.conv2d(conv_left_0, k, 3, 1, scope='conv_left_1') 50 | # right channel 51 | conv_right_0 = slim.conv2d(output, inter_channel, 1, 1, scope='conv_right_0') 52 | conv_right_1 = slim.conv2d(conv_right_0, k, 3, 1, scope='conv_right_1') 53 | conv_right_2 = slim.conv2d(conv_right_1, k, 3, 1, scope='conv_right_2') 54 | 55 | output = tf.concat([output, conv_left_1, conv_right_2], axis=3) 56 | return output 57 | 58 | def _transition_layer(self, input_x, stage, output_channel, is_avgpool=True, is_training=True, reuse=False): 59 | transition_layer_name = 'stage_{}_transition_layer'.format(stage) 60 | 61 | with tf.variable_scope(transition_layer_name) as scope: 62 | if reuse: 63 | scope.reuse_variables() 64 | with slim.arg_scope([slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.02), 65 | normalizer_fn=slim.batch_norm, 66 | activation_fn=tf.nn.relu) as s: 67 | conv0 = slim.conv2d(input_x, output_channel, 1, 1, scope='transition_layer_conv0') 68 | if is_avgpool: 69 | output = slim.avg_pool2d(conv0, 2, 2, scope='transition_layer_avgpool') 70 | else: 71 | output = conv0 72 | return output 73 | 74 | def _classification_layer(self, input_x, num_class, keep_prob=0.5, is_training=True, reuse=False): 75 | classification_layer_name = 'classification_layer' 76 | with tf.variable_scope(classification_layer_name) as scope: 77 | if reuse: 78 | scope.reuse_variables() 79 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.02), 80 | normalizer_fn=None, 81 | activation_fn=None), \ 82 | slim.arg_scope([slim.dropout], keep_prob=keep_prob) as s: 83 | 84 | shape = input_x.get_shape().as_list() 85 | filter_size = [shape[1], shape[2]] 86 | global_avgpool = slim.avg_pool2d(input_x, filter_size, scope='global_avgpool') 87 | 88 | # dropout 89 | # dropout = slim.dropout(global_avgpool) 90 | flatten = tf.reshape(global_avgpool, [shape[0], -1]) 91 | logits = slim.fully_connected(flatten, num_class, scope='fc') 92 | 93 | return logits 94 | 95 | if __name__=='__main__': 96 | input_x = tf.Variable(tf.random_normal([64,224,224,32])) 97 | layer = Layer() 98 | stem_block_output = layer._stem_block(input_x, 32) 99 | dense_block_output = layer._dense_block(input_x, 0, 3, 16, 2) 100 | transition_layer_output = layer._transition_layer(dense_block_output, 0, is_avgpool=False) 101 | print(stem_block_output.get_shape().as_list()) 102 | print(dense_block_output.get_shape().as_list()) 103 | print(transition_layer_output.get_shape().as_list()) 104 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # coding='utf-8' 2 | ''' 3 | author: Youzhao Yang 4 | date: 05/08/2018 5 | github: https://github.com/nnuyi 6 | ''' 7 | 8 | import random 9 | import numpy as np 10 | import cifar10 11 | from tensorflow.examples.tutorials.mnist import input_data 12 | 13 | from PIL import Image, ImageEnhance, ImageOps, ImageFile 14 | 15 | import matplotlib.pyplot as plt 16 | class Datasource: 17 | def __init__(self, images, labels): 18 | self.images = images 19 | self.labels = labels 20 | 21 | def get_data(data_type='mnist', is_training=True): 22 | if data_type == 'mnist': 23 | raw_data = input_data.read_data_sets('./data/mnist/', one_hot=True) 24 | shape = [28,28,1] 25 | if is_training: 26 | size = len(raw_data.train.images) 27 | images = np.reshape(raw_data.train.images, [size]+shape) 28 | labels = raw_data.train.labels 29 | else: 30 | size = len(raw_data.test.images) 31 | images = np.reshape(raw_data.test.images, [size]+shape) 32 | labels = raw_data.test.labels 33 | elif data_type == 'cifar10': 34 | if is_training: 35 | images, _, labels = cifar10.load_training_data() 36 | else: 37 | images, _, labels = cifar10.load_test_data() 38 | else: 39 | raise Exception('data type error: {}'.format(data_type)) 40 | 41 | datasource = Datasource(images, labels) 42 | return datasource 43 | 44 | def gen_data(datasource, is_training=True): 45 | while True: 46 | indices = list(range(len(datasource.images))) 47 | random.shuffle(indices) 48 | if is_training: 49 | for i in indices: 50 | image = pre_process(datasource.images[i]) 51 | # image = datasource.images[i] 52 | label = datasource.labels[i] 53 | yield image, label 54 | else: 55 | for i in indices: 56 | image = datasource.images[i] 57 | label = datasource.labels[i] 58 | yield image, label 59 | 60 | def gen_batch_data(datasource, batchsize, is_training=True): 61 | data_gen = gen_data(datasource, is_training=is_training) 62 | while True: 63 | images = [] 64 | labels = [] 65 | for i in range(batchsize): 66 | image, label = next(data_gen) 67 | images.append(image) 68 | labels.append(label) 69 | yield np.array(images), np.array(labels) 70 | 71 | def data_augment(image): 72 | shape = image.shape 73 | is_colorful = shape[-1]==3 74 | # numpy.ndarray to PIL 75 | if not is_colorful: 76 | image = Image.fromarray(np.squeeze(np.uint8(image*255))) 77 | else: 78 | image = Image.fromarray(np.uint8(image*255)) 79 | 80 | def distort_color(image): 81 | # saturation 82 | random_factor = np.random.randint(0, 31) / 10. 83 | color_image = ImageEnhance.Color(image).enhance(random_factor) 84 | # brightness 85 | random_factor = np.random.randint(10, 21) / 10. 86 | brightness_image = ImageEnhance.Brightness(color_image).enhance(random_factor) 87 | # contrast 88 | random_factor = np.random.randint(10, 21) / 10. 89 | contrast_image = ImageEnhance.Contrast(brightness_image).enhance(random_factor) 90 | # sharpness 91 | random_factor = np.random.randint(0, 31) / 10. 92 | sharpness_image = ImageEnhance.Sharpness(contrast_image).enhance(random_factor) 93 | 94 | return sharpness_image 95 | 96 | def distort(image): 97 | distort_image = image 98 | # random rotate: angle range from 1 degree to 45 degree 99 | random_angle = np.random.randint(0,15) 100 | distort_image = image.rotate(random_angle, Image.BICUBIC) 101 | ''' 102 | # random center crop 103 | random_scale = np.random.uniform(0.7,1) 104 | width, height = distort_image.size[0], distort_image.size[1] 105 | random_width, random_height = width*random_scale, height*random_scale 106 | width_offset, height_offset = (width-random_width)/2, (height-random_height)/2 107 | # (left, top, right, bottom) 108 | bounding_box = (width_offset, height_offset, width_offset+random_width, height_offset+random_height) 109 | distort_image = distort_image.crop(bounding_box) 110 | # resize to original size 111 | distort_image = distort_image.resize((width, height)) 112 | ''' 113 | # random flip 114 | random_flip = np.random.randint(0,3) 115 | if random_flip == 0: 116 | distort_image = distort_image.transpose(Image.FLIP_LEFT_RIGHT) 117 | elif random_flip == 1: 118 | distort_image = distort_image.transpose(Image.FLIP_TOP_BOTTOM) 119 | else: 120 | pass 121 | 122 | # color jittering 123 | if is_colorful: 124 | distort_image = distort_color(distort_image) 125 | 126 | return distort_image 127 | 128 | # data augment 129 | distort_image = distort(image) 130 | # PIL to numpy.ndarray 131 | # plt.imshow(np.array(distort_image).astype(np.float32)/255.) 132 | # plt.show() 133 | if not is_colorful: 134 | distort_image = np.expand_dims(np.array(distort_image).astype(np.float32)/255., -1) 135 | else: 136 | distort_image = np.array(distort_image).astype(np.float32)/255. 137 | 138 | return distort_image 139 | 140 | def pre_process(image): 141 | image = data_augment(image) 142 | return image 143 | 144 | # test 145 | if __name__=='__main__': 146 | mnist = input_data.read_data_sets("./mnist/", one_hot=True) 147 | datasource = get_data(mnist) 148 | data_gen = gen_batch_data(datasource, 10) 149 | for i in range(10): 150 | images, labels = next(data_gen) 151 | print(images.shape) 152 | print(labels.shape) 153 | -------------------------------------------------------------------------------- /cifar10.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Functions for downloading the CIFAR-10 data-set from the internet 4 | # and loading it into memory. 5 | # 6 | # Implemented in Python 2.7 7 | # 8 | # Usage: 9 | # 1) Set the variable data_path with the desired storage path. 10 | # 2) Call maybe_download_and_extract() to download the data-set 11 | # if it is not already located in the given data_path. 12 | # 3) Call load_class_names() to get an array of the class-names. 13 | # 4) Call load_training_data() and load_test_data() to get 14 | # the images, class-numbers and one-hot encoded class-labels 15 | # for the training-set and test-set. 16 | # 5) Use the returned data in your own program. 17 | # 18 | # Format: 19 | # The images for the training- and test-sets are returned as 4-dim numpy 20 | # arrays each with the shape: [image_number, height, width, channel] 21 | # where the individual pixels are floats between 0.0 and 1.0. 22 | # 23 | ######################################################################## 24 | # 25 | # This file is part of the TensorFlow Tutorials available at: 26 | # 27 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 28 | # 29 | # Published under the MIT License. See the file LICENSE for details. 30 | # 31 | # Copyright 2016 by Magnus Erik Hvass Pedersen 32 | # 33 | ######################################################################## 34 | # -*- coding=utf-8 -*- 35 | import numpy as np 36 | import cPickle as pickle 37 | import os 38 | from dataset import one_hot_encoded 39 | 40 | ######################################################################## 41 | 42 | # Directory where you want to download and save the data-set. 43 | # Set this before you start calling any of the functions below. 44 | data_path = "data/cifar10/" 45 | 46 | # URL for the data-set on the internet. 47 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 48 | 49 | ######################################################################## 50 | # Various constants for the size of the images. 51 | # Use these constants in your own program. 52 | 53 | # Width and height of each image. 54 | img_size = 32 55 | 56 | # Number of channels in each image, 3 channels: Red, Green, Blue. 57 | num_channels = 3 58 | 59 | # Length of an image when flattened to a 1-dim array. 60 | img_size_flat = img_size * img_size * num_channels 61 | 62 | # Number of classes. 63 | num_classes = 10 64 | 65 | ######################################################################## 66 | # Various constants used to allocate arrays of the correct size. 67 | 68 | # Number of files for the training-set. 69 | _num_files_train = 5 70 | 71 | # Number of images for each batch-file in the training-set. 72 | _images_per_file = 10000 73 | 74 | # Total number of images in the training-set. 75 | # This is used to pre-allocate arrays for efficiency. 76 | _num_images_train = _num_files_train * _images_per_file 77 | 78 | ######################################################################## 79 | # Private functions for downloading, unpacking and loading data-files. 80 | 81 | 82 | def _get_file_path(filename=""): 83 | """ 84 | Return the full path of a data-file for the data-set. 85 | If filename=="" then return the directory of the files. 86 | """ 87 | 88 | return os.path.join(data_path, "cifar-10-batches-py/", filename) 89 | 90 | 91 | def _unpickle(filename): 92 | """ 93 | Unpickle the given file and return the data. 94 | Note that the appropriate dir-name is prepended the filename. 95 | """ 96 | 97 | # Create full path for the file. 98 | file_path = _get_file_path(filename) 99 | 100 | print("Loading data: " + file_path) 101 | 102 | with open(file_path, mode='rb') as file: 103 | # In Python 3.X it is important to set the encoding, 104 | # otherwise an exception is raised here. 105 | data = pickle.load(file) 106 | 107 | return data 108 | 109 | 110 | def _convert_images(raw): 111 | """ 112 | Convert images from the CIFAR-10 format and 113 | return a 4-dim array with shape: [image_number, height, width, channel] 114 | where the pixels are floats between 0.0 and 1.0. 115 | """ 116 | 117 | # Convert the raw images from the data-files to floating-points. 118 | raw_float = np.array(raw, dtype=float) / 255.0 119 | 120 | # Reshape the array to 4-dimensions. 121 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 122 | 123 | # Reorder the indices of the array. 124 | images = images.transpose([0, 2, 3, 1]) 125 | 126 | return images 127 | 128 | 129 | def _load_data(filename): 130 | """ 131 | Load a pickled data-file from the CIFAR-10 data-set 132 | and return the converted images (see above) and the class-number 133 | for each image. 134 | """ 135 | 136 | # Load the pickled data-file. 137 | data = _unpickle(filename) 138 | 139 | # Get the raw images. 140 | raw_images = data[b'data'] 141 | 142 | # Get the class-numbers for each image. Convert to numpy-array. 143 | cls = np.array(data[b'labels']) 144 | 145 | # Convert the images. 146 | images = _convert_images(raw_images) 147 | 148 | return images, cls 149 | 150 | 151 | ######################################################################## 152 | def load_class_names(): 153 | """ 154 | Load the names for the classes in the CIFAR-10 data-set. 155 | Returns a list with the names. Example: names[3] is the name 156 | associated with class-number 3. 157 | """ 158 | 159 | # Load the class-names from the pickled file. 160 | raw = _unpickle(filename="batches.meta")[b'label_names'] 161 | 162 | # Convert from binary strings. 163 | names = [x.decode('utf-8') for x in raw] 164 | 165 | return names 166 | 167 | 168 | def load_training_data(): 169 | """ 170 | Load all the training-data for the CIFAR-10 data-set. 171 | The data-set is split into 5 data-files which are merged here. 172 | Returns the images, class-numbers and one-hot encoded class-labels. 173 | """ 174 | 175 | # Pre-allocate the arrays for the images and class-numbers for efficiency. 176 | images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float) 177 | cls = np.zeros(shape=[_num_images_train], dtype=int) 178 | 179 | # Begin-index for the current batch. 180 | begin = 0 181 | 182 | # For each data-file. 183 | for i in range(_num_files_train): 184 | # Load the images and class-numbers from the data-file. 185 | images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1)) 186 | 187 | # Number of images in this batch. 188 | num_images = len(images_batch) 189 | 190 | # End-index for the current batch. 191 | end = begin + num_images 192 | 193 | # Store the images into the array. 194 | images[begin:end, :] = images_batch 195 | 196 | # Store the class-numbers into the array. 197 | cls[begin:end] = cls_batch 198 | 199 | # The begin-index for the next batch is the current end-index. 200 | begin = end 201 | 202 | return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes) 203 | 204 | 205 | def load_test_data(): 206 | """ 207 | Load all the test-data for the CIFAR-10 data-set. 208 | Returns the images, class-numbers and one-hot encoded class-labels. 209 | """ 210 | 211 | images, cls = _load_data(filename="test_batch") 212 | 213 | return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes) 214 | 215 | ######################################################################## 216 | -------------------------------------------------------------------------------- /python3/cifar10.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Functions for downloading the CIFAR-10 data-set from the internet 4 | # and loading it into memory. 5 | # 6 | # Implemented in Python 3.5 7 | # 8 | # Usage: 9 | # 1) Set the variable data_path with the desired storage path. 10 | # 2) Call maybe_download_and_extract() to download the data-set 11 | # if it is not already located in the given data_path. 12 | # 3) Call load_class_names() to get an array of the class-names. 13 | # 4) Call load_training_data() and load_test_data() to get 14 | # the images, class-numbers and one-hot encoded class-labels 15 | # for the training-set and test-set. 16 | # 5) Use the returned data in your own program. 17 | # 18 | # Format: 19 | # The images for the training- and test-sets are returned as 4-dim numpy 20 | # arrays each with the shape: [image_number, height, width, channel] 21 | # where the individual pixels are floats between 0.0 and 1.0. 22 | # 23 | ######################################################################## 24 | # 25 | # This file is part of the TensorFlow Tutorials available at: 26 | # 27 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 28 | # 29 | # Published under the MIT License. See the file LICENSE for details. 30 | # 31 | # Copyright 2016 by Magnus Erik Hvass Pedersen 32 | # 33 | ######################################################################## 34 | 35 | import numpy as np 36 | import pickle 37 | import os 38 | from dataset import one_hot_encoded 39 | 40 | ######################################################################## 41 | 42 | # Directory where you want to download and save the data-set. 43 | # Set this before you start calling any of the functions below. 44 | data_path = "data/cifar10/" 45 | 46 | # URL for the data-set on the internet. 47 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 48 | 49 | ######################################################################## 50 | # Various constants for the size of the images. 51 | # Use these constants in your own program. 52 | 53 | # Width and height of each image. 54 | img_size = 32 55 | 56 | # Number of channels in each image, 3 channels: Red, Green, Blue. 57 | num_channels = 3 58 | 59 | # Length of an image when flattened to a 1-dim array. 60 | img_size_flat = img_size * img_size * num_channels 61 | 62 | # Number of classes. 63 | num_classes = 10 64 | 65 | ######################################################################## 66 | # Various constants used to allocate arrays of the correct size. 67 | 68 | # Number of files for the training-set. 69 | _num_files_train = 5 70 | 71 | # Number of images for each batch-file in the training-set. 72 | _images_per_file = 10000 73 | 74 | # Total number of images in the training-set. 75 | # This is used to pre-allocate arrays for efficiency. 76 | _num_images_train = _num_files_train * _images_per_file 77 | 78 | ######################################################################## 79 | # Private functions for downloading, unpacking and loading data-files. 80 | 81 | 82 | def _get_file_path(filename=""): 83 | """ 84 | Return the full path of a data-file for the data-set. 85 | If filename=="" then return the directory of the files. 86 | """ 87 | 88 | return os.path.join(data_path, "cifar-10-batches-py/", filename) 89 | 90 | 91 | def _unpickle(filename): 92 | """ 93 | Unpickle the given file and return the data. 94 | Note that the appropriate dir-name is prepended the filename. 95 | """ 96 | 97 | # Create full path for the file. 98 | file_path = _get_file_path(filename) 99 | 100 | print("Loading data: " + file_path) 101 | 102 | with open(file_path, mode='rb') as file: 103 | # In Python 3.X it is important to set the encoding, 104 | # otherwise an exception is raised here. 105 | data = pickle.load(file, encoding='bytes') 106 | 107 | return data 108 | 109 | 110 | def _convert_images(raw): 111 | """ 112 | Convert images from the CIFAR-10 format and 113 | return a 4-dim array with shape: [image_number, height, width, channel] 114 | where the pixels are floats between 0.0 and 1.0. 115 | """ 116 | 117 | # Convert the raw images from the data-files to floating-points. 118 | raw_float = np.array(raw, dtype=float) / 255.0 119 | 120 | # Reshape the array to 4-dimensions. 121 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 122 | 123 | # Reorder the indices of the array. 124 | images = images.transpose([0, 2, 3, 1]) 125 | 126 | return images 127 | 128 | 129 | def _load_data(filename): 130 | """ 131 | Load a pickled data-file from the CIFAR-10 data-set 132 | and return the converted images (see above) and the class-number 133 | for each image. 134 | """ 135 | 136 | # Load the pickled data-file. 137 | data = _unpickle(filename) 138 | 139 | # Get the raw images. 140 | raw_images = data[b'data'] 141 | 142 | # Get the class-numbers for each image. Convert to numpy-array. 143 | cls = np.array(data[b'labels']) 144 | 145 | # Convert the images. 146 | images = _convert_images(raw_images) 147 | 148 | return images, cls 149 | 150 | 151 | ######################################################################## 152 | # Public functions that you may call to download the data-set from 153 | # the internet and load the data into memory. 154 | 155 | 156 | def maybe_download_and_extract(): 157 | """ 158 | Download and extract the CIFAR-10 data-set if it doesn't already exist 159 | in data_path (set this variable first to the desired path). 160 | """ 161 | 162 | download.maybe_download_and_extract(url=data_url, download_dir=data_path) 163 | 164 | 165 | def load_class_names(): 166 | """ 167 | Load the names for the classes in the CIFAR-10 data-set. 168 | Returns a list with the names. Example: names[3] is the name 169 | associated with class-number 3. 170 | """ 171 | 172 | # Load the class-names from the pickled file. 173 | raw = _unpickle(filename="batches.meta")[b'label_names'] 174 | 175 | # Convert from binary strings. 176 | names = [x.decode('utf-8') for x in raw] 177 | 178 | return names 179 | 180 | 181 | def load_training_data(): 182 | """ 183 | Load all the training-data for the CIFAR-10 data-set. 184 | The data-set is split into 5 data-files which are merged here. 185 | Returns the images, class-numbers and one-hot encoded class-labels. 186 | """ 187 | 188 | # Pre-allocate the arrays for the images and class-numbers for efficiency. 189 | images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float) 190 | cls = np.zeros(shape=[_num_images_train], dtype=int) 191 | 192 | # Begin-index for the current batch. 193 | begin = 0 194 | 195 | # For each data-file. 196 | for i in range(_num_files_train): 197 | # Load the images and class-numbers from the data-file. 198 | images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1)) 199 | 200 | # Number of images in this batch. 201 | num_images = len(images_batch) 202 | 203 | # End-index for the current batch. 204 | end = begin + num_images 205 | 206 | # Store the images into the array. 207 | images[begin:end, :] = images_batch 208 | 209 | # Store the class-numbers into the array. 210 | cls[begin:end] = cls_batch 211 | 212 | # The begin-index for the next batch is the current end-index. 213 | begin = end 214 | 215 | return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes) 216 | 217 | 218 | def load_test_data(): 219 | """ 220 | Load all the test-data for the CIFAR-10 data-set. 221 | Returns the images, class-numbers and one-hot encoded class-labels. 222 | """ 223 | 224 | images, cls = _load_data(filename="test_batch") 225 | 226 | return images, cls, one_hot_encoded(class_numbers=cls, num_classes=num_classes) 227 | 228 | ######################################################################## 229 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Class for creating a data-set consisting of all files in a directory. 4 | # 5 | # Example usage is shown in the file knifey.py and Tutorial #09. 6 | # 7 | # Implemented in Python 3.5 8 | # 9 | ######################################################################## 10 | # 11 | # This file is part of the TensorFlow Tutorials available at: 12 | # 13 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 14 | # 15 | # Published under the MIT License. See the file LICENSE for details. 16 | # 17 | # Copyright 2016 by Magnus Erik Hvass Pedersen 18 | # 19 | ######################################################################## 20 | 21 | import numpy as np 22 | import os 23 | from cache import cache 24 | 25 | ######################################################################## 26 | 27 | 28 | def one_hot_encoded(class_numbers, num_classes=None): 29 | """ 30 | Generate the One-Hot encoded class-labels from an array of integers. 31 | For example, if class_number=2 and num_classes=4 then 32 | the one-hot encoded label is the float array: [0. 0. 1. 0.] 33 | :param class_numbers: 34 | Array of integers with class-numbers. 35 | Assume the integers are from zero to num_classes-1 inclusive. 36 | :param num_classes: 37 | Number of classes. If None then use max(cls)-1. 38 | :return: 39 | 2-dim array of shape: [len(cls), num_classes] 40 | """ 41 | 42 | # Find the number of classes if None is provided. 43 | if num_classes is None: 44 | num_classes = np.max(class_numbers) - 1 45 | 46 | return np.eye(num_classes, dtype=float)[class_numbers] 47 | 48 | 49 | ######################################################################## 50 | 51 | 52 | class DataSet: 53 | def __init__(self, in_dir, exts='.jpg'): 54 | """ 55 | Create a data-set consisting of the filenames in the given directory 56 | and sub-dirs that match the given filename-extensions. 57 | For example, the knifey-spoony data-set (see knifey.py) has the 58 | following dir-structure: 59 | knifey-spoony/forky/ 60 | knifey-spoony/knifey/ 61 | knifey-spoony/spoony/ 62 | knifey-spoony/forky/test/ 63 | knifey-spoony/knifey/test/ 64 | knifey-spoony/spoony/test/ 65 | This means there are 3 classes called: forky, knifey, and spoony. 66 | If we set in_dir = "knifey-spoony/" and create a new DataSet-object 67 | then it will scan through these directories and create a training-set 68 | and test-set for each of these classes. 69 | The training-set will contain a list of all the *.jpg filenames 70 | in the following directories: 71 | knifey-spoony/forky/ 72 | knifey-spoony/knifey/ 73 | knifey-spoony/spoony/ 74 | The test-set will contain a list of all the *.jpg filenames 75 | in the following directories: 76 | knifey-spoony/forky/test/ 77 | knifey-spoony/knifey/test/ 78 | knifey-spoony/spoony/test/ 79 | See the TensorFlow Tutorial #09 for a usage example. 80 | :param in_dir: 81 | Root-dir for the files in the data-set. 82 | This would be 'knifey-spoony/' in the example above. 83 | :param exts: 84 | String or tuple of strings with valid filename-extensions. 85 | Not case-sensitive. 86 | :return: 87 | Object instance. 88 | """ 89 | 90 | # Extend the input directory to the full path. 91 | in_dir = os.path.abspath(in_dir) 92 | 93 | # Input directory. 94 | self.in_dir = in_dir 95 | 96 | # Convert all file-extensions to lower-case. 97 | self.exts = tuple(ext.lower() for ext in exts) 98 | 99 | # Names for the classes. 100 | self.class_names = [] 101 | 102 | # Filenames for all the files in the training-set. 103 | self.filenames = [] 104 | 105 | # Filenames for all the files in the test-set. 106 | self.filenames_test = [] 107 | 108 | # Class-number for each file in the training-set. 109 | self.class_numbers = [] 110 | 111 | # Class-number for each file in the test-set. 112 | self.class_numbers_test = [] 113 | 114 | # Total number of classes in the data-set. 115 | self.num_classes = 0 116 | 117 | # For all files/dirs in the input directory. 118 | for name in os.listdir(in_dir): 119 | # Full path for the file / dir. 120 | current_dir = os.path.join(in_dir, name) 121 | 122 | # If it is a directory. 123 | if os.path.isdir(current_dir): 124 | # Add the dir-name to the list of class-names. 125 | self.class_names.append(name) 126 | 127 | # Training-set. 128 | 129 | # Get all the valid filenames in the dir (not sub-dirs). 130 | filenames = self._get_filenames(current_dir) 131 | 132 | # Append them to the list of all filenames for the training-set. 133 | self.filenames.extend(filenames) 134 | 135 | # The class-number for this class. 136 | class_number = self.num_classes 137 | 138 | # Create an array of class-numbers. 139 | class_numbers = [class_number] * len(filenames) 140 | 141 | # Append them to the list of all class-numbers for the training-set. 142 | self.class_numbers.extend(class_numbers) 143 | 144 | # Test-set. 145 | 146 | # Get all the valid filenames in the sub-dir named 'test'. 147 | filenames_test = self._get_filenames(os.path.join(current_dir, 'test')) 148 | 149 | # Append them to the list of all filenames for the test-set. 150 | self.filenames_test.extend(filenames_test) 151 | 152 | # Create an array of class-numbers. 153 | class_numbers = [class_number] * len(filenames_test) 154 | 155 | # Append them to the list of all class-numbers for the test-set. 156 | self.class_numbers_test.extend(class_numbers) 157 | 158 | # Increase the total number of classes in the data-set. 159 | self.num_classes += 1 160 | 161 | def _get_filenames(self, dir): 162 | """ 163 | Create and return a list of filenames with matching extensions in the given directory. 164 | :param dir: 165 | Directory to scan for files. Sub-dirs are not scanned. 166 | :return: 167 | List of filenames. Only filenames. Does not include the directory. 168 | """ 169 | 170 | # Initialize empty list. 171 | filenames = [] 172 | 173 | # If the directory exists. 174 | if os.path.exists(dir): 175 | # Get all the filenames with matching extensions. 176 | for filename in os.listdir(dir): 177 | if filename.lower().endswith(self.exts): 178 | filenames.append(filename) 179 | 180 | return filenames 181 | 182 | def get_paths(self, test=False): 183 | """ 184 | Get the full paths for the files in the data-set. 185 | :param test: 186 | Boolean. Return the paths for the test-set (True) or training-set (False). 187 | :return: 188 | Iterator with strings for the path-names. 189 | """ 190 | 191 | if test: 192 | # Use the filenames and class-numbers for the test-set. 193 | filenames = self.filenames_test 194 | class_numbers = self.class_numbers_test 195 | 196 | # Sub-dir for test-set. 197 | test_dir = "test/" 198 | else: 199 | # Use the filenames and class-numbers for the training-set. 200 | filenames = self.filenames 201 | class_numbers = self.class_numbers 202 | 203 | # Don't use a sub-dir for test-set. 204 | test_dir = "" 205 | 206 | for filename, cls in zip(filenames, class_numbers): 207 | # Full path-name for the file. 208 | path = os.path.join(self.in_dir, self.class_names[cls], test_dir, filename) 209 | 210 | yield path 211 | 212 | def get_training_set(self): 213 | """ 214 | Return the list of paths for the files in the training-set, 215 | and the list of class-numbers as integers, 216 | and the class-numbers as one-hot encoded arrays. 217 | """ 218 | 219 | return list(self.get_paths()), \ 220 | np.asarray(self.class_numbers), \ 221 | one_hot_encoded(class_numbers=self.class_numbers, 222 | num_classes=self.num_classes) 223 | 224 | def get_test_set(self): 225 | """ 226 | Return the list of paths for the files in the test-set, 227 | and the list of class-numbers as integers, 228 | and the class-numbers as one-hot encoded arrays. 229 | """ 230 | 231 | return list(self.get_paths(test=True)), \ 232 | np.asarray(self.class_numbers_test), \ 233 | one_hot_encoded(class_numbers=self.class_numbers_test, 234 | num_classes=self.num_classes) 235 | 236 | 237 | ######################################################################## 238 | 239 | 240 | def load_cached(cache_path, in_dir): 241 | """ 242 | Wrapper-function for creating a DataSet-object, which will be 243 | loaded from a cache-file if it already exists, otherwise a new 244 | object will be created and saved to the cache-file. 245 | This is useful if you need to ensure the ordering of the 246 | filenames is consistent every time you load the data-set, 247 | for example if you use the DataSet-object in combination 248 | with Transfer Values saved to another cache-file, see e.g. 249 | Tutorial #09 for an example of this. 250 | :param cache_path: 251 | File-path for the cache-file. 252 | :param in_dir: 253 | Root-dir for the files in the data-set. 254 | This is an argument for the DataSet-init function. 255 | :return: 256 | The DataSet-object. 257 | """ 258 | 259 | print("Creating dataset from the files in: " + in_dir) 260 | 261 | # If the object-instance for DataSet(in_dir=data_dir) already 262 | # exists in the cache-file then reload it, otherwise create 263 | # an object instance and save it to the cache-file for next time. 264 | dataset = cache(cache_path=cache_path, 265 | fn=DataSet, in_dir=in_dir) 266 | 267 | return dataset 268 | 269 | 270 | ######################################################################## 271 | -------------------------------------------------------------------------------- /python3/dataset.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Class for creating a data-set consisting of all files in a directory. 4 | # 5 | # Example usage is shown in the file knifey.py and Tutorial #09. 6 | # 7 | # Implemented in Python 3.5 8 | # 9 | ######################################################################## 10 | # 11 | # This file is part of the TensorFlow Tutorials available at: 12 | # 13 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 14 | # 15 | # Published under the MIT License. See the file LICENSE for details. 16 | # 17 | # Copyright 2016 by Magnus Erik Hvass Pedersen 18 | # 19 | ######################################################################## 20 | 21 | import numpy as np 22 | import os 23 | from cache import cache 24 | 25 | ######################################################################## 26 | 27 | 28 | def one_hot_encoded(class_numbers, num_classes=None): 29 | """ 30 | Generate the One-Hot encoded class-labels from an array of integers. 31 | For example, if class_number=2 and num_classes=4 then 32 | the one-hot encoded label is the float array: [0. 0. 1. 0.] 33 | :param class_numbers: 34 | Array of integers with class-numbers. 35 | Assume the integers are from zero to num_classes-1 inclusive. 36 | :param num_classes: 37 | Number of classes. If None then use max(cls)-1. 38 | :return: 39 | 2-dim array of shape: [len(cls), num_classes] 40 | """ 41 | 42 | # Find the number of classes if None is provided. 43 | if num_classes is None: 44 | num_classes = np.max(class_numbers) - 1 45 | 46 | return np.eye(num_classes, dtype=float)[class_numbers] 47 | 48 | 49 | ######################################################################## 50 | 51 | 52 | class DataSet: 53 | def __init__(self, in_dir, exts='.jpg'): 54 | """ 55 | Create a data-set consisting of the filenames in the given directory 56 | and sub-dirs that match the given filename-extensions. 57 | For example, the knifey-spoony data-set (see knifey.py) has the 58 | following dir-structure: 59 | knifey-spoony/forky/ 60 | knifey-spoony/knifey/ 61 | knifey-spoony/spoony/ 62 | knifey-spoony/forky/test/ 63 | knifey-spoony/knifey/test/ 64 | knifey-spoony/spoony/test/ 65 | This means there are 3 classes called: forky, knifey, and spoony. 66 | If we set in_dir = "knifey-spoony/" and create a new DataSet-object 67 | then it will scan through these directories and create a training-set 68 | and test-set for each of these classes. 69 | The training-set will contain a list of all the *.jpg filenames 70 | in the following directories: 71 | knifey-spoony/forky/ 72 | knifey-spoony/knifey/ 73 | knifey-spoony/spoony/ 74 | The test-set will contain a list of all the *.jpg filenames 75 | in the following directories: 76 | knifey-spoony/forky/test/ 77 | knifey-spoony/knifey/test/ 78 | knifey-spoony/spoony/test/ 79 | See the TensorFlow Tutorial #09 for a usage example. 80 | :param in_dir: 81 | Root-dir for the files in the data-set. 82 | This would be 'knifey-spoony/' in the example above. 83 | :param exts: 84 | String or tuple of strings with valid filename-extensions. 85 | Not case-sensitive. 86 | :return: 87 | Object instance. 88 | """ 89 | 90 | # Extend the input directory to the full path. 91 | in_dir = os.path.abspath(in_dir) 92 | 93 | # Input directory. 94 | self.in_dir = in_dir 95 | 96 | # Convert all file-extensions to lower-case. 97 | self.exts = tuple(ext.lower() for ext in exts) 98 | 99 | # Names for the classes. 100 | self.class_names = [] 101 | 102 | # Filenames for all the files in the training-set. 103 | self.filenames = [] 104 | 105 | # Filenames for all the files in the test-set. 106 | self.filenames_test = [] 107 | 108 | # Class-number for each file in the training-set. 109 | self.class_numbers = [] 110 | 111 | # Class-number for each file in the test-set. 112 | self.class_numbers_test = [] 113 | 114 | # Total number of classes in the data-set. 115 | self.num_classes = 0 116 | 117 | # For all files/dirs in the input directory. 118 | for name in os.listdir(in_dir): 119 | # Full path for the file / dir. 120 | current_dir = os.path.join(in_dir, name) 121 | 122 | # If it is a directory. 123 | if os.path.isdir(current_dir): 124 | # Add the dir-name to the list of class-names. 125 | self.class_names.append(name) 126 | 127 | # Training-set. 128 | 129 | # Get all the valid filenames in the dir (not sub-dirs). 130 | filenames = self._get_filenames(current_dir) 131 | 132 | # Append them to the list of all filenames for the training-set. 133 | self.filenames.extend(filenames) 134 | 135 | # The class-number for this class. 136 | class_number = self.num_classes 137 | 138 | # Create an array of class-numbers. 139 | class_numbers = [class_number] * len(filenames) 140 | 141 | # Append them to the list of all class-numbers for the training-set. 142 | self.class_numbers.extend(class_numbers) 143 | 144 | # Test-set. 145 | 146 | # Get all the valid filenames in the sub-dir named 'test'. 147 | filenames_test = self._get_filenames(os.path.join(current_dir, 'test')) 148 | 149 | # Append them to the list of all filenames for the test-set. 150 | self.filenames_test.extend(filenames_test) 151 | 152 | # Create an array of class-numbers. 153 | class_numbers = [class_number] * len(filenames_test) 154 | 155 | # Append them to the list of all class-numbers for the test-set. 156 | self.class_numbers_test.extend(class_numbers) 157 | 158 | # Increase the total number of classes in the data-set. 159 | self.num_classes += 1 160 | 161 | def _get_filenames(self, dir): 162 | """ 163 | Create and return a list of filenames with matching extensions in the given directory. 164 | :param dir: 165 | Directory to scan for files. Sub-dirs are not scanned. 166 | :return: 167 | List of filenames. Only filenames. Does not include the directory. 168 | """ 169 | 170 | # Initialize empty list. 171 | filenames = [] 172 | 173 | # If the directory exists. 174 | if os.path.exists(dir): 175 | # Get all the filenames with matching extensions. 176 | for filename in os.listdir(dir): 177 | if filename.lower().endswith(self.exts): 178 | filenames.append(filename) 179 | 180 | return filenames 181 | 182 | def get_paths(self, test=False): 183 | """ 184 | Get the full paths for the files in the data-set. 185 | :param test: 186 | Boolean. Return the paths for the test-set (True) or training-set (False). 187 | :return: 188 | Iterator with strings for the path-names. 189 | """ 190 | 191 | if test: 192 | # Use the filenames and class-numbers for the test-set. 193 | filenames = self.filenames_test 194 | class_numbers = self.class_numbers_test 195 | 196 | # Sub-dir for test-set. 197 | test_dir = "test/" 198 | else: 199 | # Use the filenames and class-numbers for the training-set. 200 | filenames = self.filenames 201 | class_numbers = self.class_numbers 202 | 203 | # Don't use a sub-dir for test-set. 204 | test_dir = "" 205 | 206 | for filename, cls in zip(filenames, class_numbers): 207 | # Full path-name for the file. 208 | path = os.path.join(self.in_dir, self.class_names[cls], test_dir, filename) 209 | 210 | yield path 211 | 212 | def get_training_set(self): 213 | """ 214 | Return the list of paths for the files in the training-set, 215 | and the list of class-numbers as integers, 216 | and the class-numbers as one-hot encoded arrays. 217 | """ 218 | 219 | return list(self.get_paths()), \ 220 | np.asarray(self.class_numbers), \ 221 | one_hot_encoded(class_numbers=self.class_numbers, 222 | num_classes=self.num_classes) 223 | 224 | def get_test_set(self): 225 | """ 226 | Return the list of paths for the files in the test-set, 227 | and the list of class-numbers as integers, 228 | and the class-numbers as one-hot encoded arrays. 229 | """ 230 | 231 | return list(self.get_paths(test=True)), \ 232 | np.asarray(self.class_numbers_test), \ 233 | one_hot_encoded(class_numbers=self.class_numbers_test, 234 | num_classes=self.num_classes) 235 | 236 | 237 | ######################################################################## 238 | 239 | 240 | def load_cached(cache_path, in_dir): 241 | """ 242 | Wrapper-function for creating a DataSet-object, which will be 243 | loaded from a cache-file if it already exists, otherwise a new 244 | object will be created and saved to the cache-file. 245 | This is useful if you need to ensure the ordering of the 246 | filenames is consistent every time you load the data-set, 247 | for example if you use the DataSet-object in combination 248 | with Transfer Values saved to another cache-file, see e.g. 249 | Tutorial #09 for an example of this. 250 | :param cache_path: 251 | File-path for the cache-file. 252 | :param in_dir: 253 | Root-dir for the files in the data-set. 254 | This is an argument for the DataSet-init function. 255 | :return: 256 | The DataSet-object. 257 | """ 258 | 259 | print("Creating dataset from the files in: " + in_dir) 260 | 261 | # If the object-instance for DataSet(in_dir=data_dir) already 262 | # exists in the cache-file then reload it, otherwise create 263 | # an object instance and save it to the cache-file for next time. 264 | dataset = cache(cache_path=cache_path, 265 | fn=DataSet, in_dir=in_dir) 266 | 267 | return dataset 268 | 269 | 270 | ######################################################################## 271 | -------------------------------------------------------------------------------- /PeleeNet.py: -------------------------------------------------------------------------------- 1 | # coding='utf-8' 2 | ''' 3 | author: Youzhao Yang 4 | date: 05/08/2018 5 | github: https://github.com/nnuyi 6 | ''' 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | import time 11 | import os 12 | 13 | from tqdm import tqdm 14 | from layers import Layer 15 | from utils import get_data, gen_batch_data 16 | 17 | class PeleeNet: 18 | model_name = 'PeleeNet' 19 | ''' 20 | PeleeNet Class 21 | ''' 22 | def __init__(self, config=None, sess=None): 23 | self.sess = sess 24 | self.config = config 25 | 26 | self.num_class = self.config.num_class 27 | self.input_height = self.config.input_height 28 | self.input_width = self.config.input_width 29 | self.input_channel = self.config.input_channel 30 | 31 | self.batchsize = self.config.batchsize 32 | 33 | self.layer = Layer() 34 | 35 | def peleenet(self, input_x, k=32, num_init_channel=64, block_config=[3,4,8,6], bottleneck_width=[2,2,4,4], is_training=True, reuse=False): 36 | with tf.variable_scope(self.model_name) as scope: 37 | if reuse: 38 | scope.reuse_variables() 39 | 40 | ''' 41 | -------------------------------------------------------------------- 42 | feature extraction 43 | -------------------------------------------------------------------- 44 | ''' 45 | # _stem_block(self, input_x, num_init_channel=32, is_training=True, reuse=False): 46 | from_layer = self.layer._stem_block(input_x, 47 | num_init_channel=num_init_channel, 48 | is_training=is_training, 49 | reuse=reuse) 50 | 51 | # _dense_block(self, input_x, stage, num_block, k, bottleneck_width, is_training=True, reuse=False): 52 | # _transition_layer(self, input_x, stage, is_avgpool=True, is_training=True, reuse=False): 53 | stage = 0 54 | for num_block, bottleneck_coeff in zip(block_config, bottleneck_width): 55 | stage = stage + 1 56 | # dense_block 57 | from_layer = self.layer._dense_block(from_layer, 58 | stage, 59 | num_block, 60 | k, 61 | bottleneck_coeff, 62 | is_training=is_training, 63 | reuse=reuse) 64 | 65 | is_avgpool = True if stage < 4 else False 66 | output_channel = from_layer.get_shape().as_list()[-1] 67 | # transition_layer 68 | from_layer = self.layer._transition_layer(from_layer, 69 | stage, 70 | output_channel=output_channel, 71 | is_avgpool=is_avgpool, 72 | is_training=is_training, 73 | reuse=reuse) 74 | 75 | ''' 76 | -------------------------------------------------------------------- 77 | classification 78 | -------------------------------------------------------------------- 79 | ''' 80 | # _classification_layer(self, input_x, num_class, keep_prob=0.5, is_training=True, reuse=False): 81 | logits = self.layer._classification_layer(from_layer, self.num_class, is_training=is_training, reuse=reuse) 82 | return logits 83 | 84 | def build_model(self): 85 | self.input_train = tf.placeholder(tf.float32, [self.batchsize, self.input_height, self.input_width, self.input_channel], name='input_train') 86 | self.input_test = tf.placeholder(tf.float32, [self.batchsize, self.input_height, self.input_width, self.input_channel], name='input_test') 87 | self.one_hot_labels = tf.placeholder(tf.float32, [self.batchsize, self.num_class], name='one_hot_labels') 88 | 89 | # logits data and one_hot_labels 90 | self.logits_train = self.peleenet(self.input_train, is_training=True, reuse=False) 91 | self.logits_test = self.peleenet(self.input_test, is_training=False, reuse=True) 92 | # self.one_hot_labels = tf.one_hot(self.input_label, self.num_class) 93 | 94 | # loss function 95 | def softmax_cross_entropy_with_logits(x, y): 96 | try: 97 | return tf.nn.softmax_cross_entropy_with_logits(logits=x, labels=y) 98 | except: 99 | return tf.nn.softmax_cross_entropy_with_logits(targets=x, labels=y) 100 | # weights regularization 101 | self.weights_reg = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 102 | self.loss = tf.reduce_mean(softmax_cross_entropy_with_logits(self.logits_train, self.one_hot_labels)) + self.config.weight_decay*self.weights_reg 103 | 104 | # optimizer 105 | ''' 106 | self.adam_optim = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate, 107 | beta1=self.config.beta1, 108 | beta2=self.config.beta2).minimize(self.loss) 109 | ''' 110 | self.rmsprop_optim = tf.train.RMSPropOptimizer(learning_rate=self.config.learning_rate, 111 | momentum=self.config.momentum).minimize(self.loss) 112 | 113 | # accuracy 114 | self.predicetion = tf.nn.softmax(self.logits_test, 1) 115 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predicetion, 1), tf.argmax(self.one_hot_labels, 1)), tf.float32)) 116 | 117 | # summary 118 | self.loss_summary = tf.summary.scalar('entrophy loss', self.loss) 119 | self.accuracy_summary = tf.summary.scalar('accuracy', self.accuracy) 120 | 121 | self.summaries = tf.summary.merge_all() 122 | self.summary_writer = tf.summary.FileWriter('logs', self.sess.graph) 123 | 124 | # saver 125 | self.saver = tf.train.Saver() 126 | 127 | def train_model(self): 128 | # initialize variables 129 | tf.global_variables_initializer().run() 130 | 131 | # load model 132 | if self.load_model(): 133 | print('load model successfully') 134 | else: 135 | print('fail to load model') 136 | 137 | # get datasource 138 | datasource = get_data(self.config.dataset, is_training=True) 139 | gen_data = gen_batch_data(datasource, self.batchsize, is_training=True) 140 | ites_per_epoch = int(len(datasource.images)/self.batchsize) 141 | 142 | step = 0 143 | for epoch in range(self.config.epochs): 144 | for ite in tqdm(range(ites_per_epoch)): 145 | images, labels = next(gen_data) 146 | _, loss, accuracy, summaries = self.sess.run([self.rmsprop_optim, self.loss, self.accuracy, self.summaries], feed_dict={ 147 | self.input_train:images, 148 | self.input_test:images, 149 | self.one_hot_labels:labels 150 | }) 151 | 152 | step = step + 1 153 | self.summary_writer.add_summary(summaries, global_step=step) 154 | 155 | # test model 156 | if np.mod(epoch, 1) == 0: 157 | print('--epoch_{} -- training accuracy:{}'.format(epoch, accuracy)) 158 | self.test_model() 159 | 160 | # save model 161 | if np.mod(epoch, 5) == 0: 162 | self.save_model() 163 | 164 | def test_model(self): 165 | if not self.config.is_training: 166 | # initialize variables 167 | tf.global_variables_initializer().run() 168 | # load model 169 | if self.load_model(): 170 | print('load model successfully') 171 | else: 172 | print('fail to load model') 173 | 174 | datasource = get_data(self.config.dataset, is_training=False) 175 | gen_data = gen_batch_data(datasource, self.batchsize, is_training=False) 176 | ites_per_epoch = int(len(datasource.images)/self.batchsize) 177 | 178 | accuracy = [] 179 | for ite in range(ites_per_epoch): 180 | images, labels = next(gen_data) 181 | accuracy_per_epoch = self.sess.run([self.accuracy], feed_dict={ 182 | self.input_test:images, 183 | self.one_hot_labels:labels 184 | }) 185 | accuracy.append(accuracy_per_epoch[0]) 186 | 187 | acc = np.mean(accuracy) 188 | print('--test epoch -- accuracy:{:.4f}'.format(acc)) 189 | 190 | # load model 191 | def load_model(self): 192 | if not os.path.isfile(os.path.join(self.model_dir, 'checkpoint')): 193 | return False 194 | self.save.restore(self.sess, self.model_pos) 195 | 196 | # save model 197 | def save_model(self): 198 | if not os.path.exists(self.model_dir): 199 | os.mkdir(self.model_dir) 200 | self.saver.save(self.sess, self.model_pos) 201 | 202 | @property 203 | def model_dir(self): 204 | return '{}/{}'.format(self.config.checkpoint_dir, self.config.dataset) 205 | 206 | @property 207 | def model_pos(self): 208 | return '{}/{}/{}'.format(self.config.checkpoint_dir, self.config.dataset, self.model_name) 209 | 210 | if __name__=='__main__': 211 | input_x = tf.placeholder(tf.float32, [64, 224,224,3], name='input_train') 212 | peleenet = PeleeNet() 213 | start_time = time.time() 214 | output = peleenet.peleenet(input_x) 215 | end_time = time.time() 216 | print('total time:{}'.format(end_time-start_time)) 217 | print(output.get_shape().as_list()) 218 | --------------------------------------------------------------------------------