├── .gitignore ├── README.md ├── cifar-100_example.py ├── datasets ├── cars.py ├── cifar10.py ├── cifar100.py ├── imagenet.py ├── mnist.py ├── penn_treebank.py └── svhn.py ├── example_train.py ├── imagenet_example.py ├── libs ├── components.py └── custom_ops.py ├── nets ├── bn_conv.py ├── deep_roots.py ├── highway_uniform.py ├── resnet_old_reference.py └── resnet_uniform.py ├── pip3_requirements.txt ├── slim_deep_roots_eval.py ├── slim_deep_roots_train.py ├── slim_eval.py ├── slim_highway_eval.py ├── slim_highway_train.py ├── slim_old_resnet_eval.py ├── slim_old_resnet_train.py ├── slim_resnet_eval.py ├── slim_resnet_train.py ├── slim_train.py ├── test.py └── utils ├── cars.py ├── cifar10.py ├── cifar100.py ├── download.py ├── download.pyc ├── imagenet.py ├── imagenet_download ├── preprocess_imagenet_validation_data.py ├── process_bounding_boxes.py └── run_me.sh ├── penn_treebank.py └── svhn.py /.gitignore: -------------------------------------------------------------------------------- 1 | # logs and downloaded data 2 | logs/ 3 | data/ 4 | env/ 5 | 6 | # pycharm 7 | .idea/ 8 | 9 | # compiled 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow Input Pipelines 2 | 3 | Use these TensorFlow(v0.11) pipelines to automatically download and easily fetch batches of data and labels from some of the most used datasets in Deep Learning. The implementations are threaded, efficient, can be randomized and also include large datasets such as imagenet. 4 | 5 | ### Supported Datasets 6 | - MNIST 7 | - CIFAR-10 8 | - CIFAR-100 9 | - SVHN 10 | - Stanford Cars 196 11 | - Imagenet (no automatic data download, but a shell script is provided in utils/imagenet_download/) 12 | - Penn Treebank 13 | 14 | (more datasets will be added soon ...) 15 | 16 | ### Example 17 | ```python 18 | import tensorflow as tf 19 | sess = tf.Session() 20 | 21 | with tf.device('/cpu:0'): 22 | from datasets.svhn import svhn_data 23 | d = svhn_data(batch_size=256, sess=sess) 24 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 25 | 26 | for i in range(5): 27 | print("batch ", i) 28 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 29 | # logits = model(image_batch, target_batch) 30 | # ... 31 | print(image_batch.shape) 32 | print(target_batch.shape) 33 | 34 | d.close() 35 | sess.close() 36 | ``` 37 | 38 | ### Installation and Running the Cifar-100 Example 39 | ``` 40 | schlag@box:~/MyStuff/input_pipelines$ mkvirtualenv $(pwd | awk '{print $1"/env"}') 41 | Using base prefix '/usr' 42 | New python executable in /home/schlag/MyStuff/input_pipelines/env/bin/python3 43 | Also creating executable in /home/schlag/MyStuff/input_pipelines/env/bin/python 44 | Installing setuptools, pip, wheel...done. 45 | schlag@box:~/MyStuff/input_pipelines$ source env/bin/activate 46 | (env) schlag@box:~/MyStuff/input_pipelines$ pip3 install -r pip3_requirements.txt 47 | Collecting numpy==1.11.2 (from -r pip3_requirements.txt (line 1)) 48 | Using cached numpy-1.11.2-cp35-cp35m-manylinux1_x86_64.whl 49 | Collecting pickleshare==0.7.4 (from -r pip3_requirements.txt (line 2)) 50 | Using cached pickleshare-0.7.4-py2.py3-none-any.whl 51 | Collecting protobuf==3.0.0 (from -r pip3_requirements.txt (line 3)) 52 | Using cached protobuf-3.0.0-py2.py3-none-any.whl 53 | Collecting scipy==0.18.1 (from -r pip3_requirements.txt (line 4)) 54 | Using cached scipy-0.18.1-cp35-cp35m-manylinux1_x86_64.whl 55 | Collecting six==1.10.0 (from -r pip3_requirements.txt (line 5)) 56 | Using cached six-1.10.0-py2.py3-none-any.whl 57 | Requirement already satisfied: setuptools in ./env/lib/python3.5/site-packages (from protobuf==3.0.0->-r pip3_requirements.txt (line 3)) 58 | Installing collected packages: numpy, pickleshare, six, protobuf, scipy 59 | Successfully installed numpy-1.11.2 pickleshare-0.7.4 protobuf-3.0.0 scipy-0.18.1 six-1.10.0 60 | (env) schlag@box:~/MyStuff/input_pipelines$ pip3 install ../tf-builds/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl 61 | Processing /home/schlag/MyStuff/tf-builds/tensorflow-0.11.0rc2-cp35-cp35m-linux_x86_64.whl 62 | Requirement already satisfied: wheel>=0.26 in ./env/lib/python3.5/site-packages (from tensorflow==0.11.0rc2) 63 | Requirement already satisfied: six>=1.10.0 in ./env/lib/python3.5/site-packages (from tensorflow==0.11.0rc2) 64 | Collecting protobuf==3.1.0 (from tensorflow==0.11.0rc2) 65 | Using cached protobuf-3.1.0-py2.py3-none-any.whl 66 | Requirement already satisfied: numpy>=1.11.0 in ./env/lib/python3.5/site-packages (from tensorflow==0.11.0rc2) 67 | Requirement already satisfied: setuptools in ./env/lib/python3.5/site-packages (from protobuf==3.1.0->tensorflow==0.11.0rc2) 68 | Installing collected packages: protobuf, tensorflow 69 | Found existing installation: protobuf 3.0.0 70 | Uninstalling protobuf-3.0.0: 71 | Successfully uninstalled protobuf-3.0.0 72 | Successfully installed protobuf-3.1.0 tensorflow-0.11.0rc2 73 | (env) schlag@box:~/MyStuff/input_pipelines$ python cifar-100_example.py 74 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcublas.so.8.0.27 locally 75 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcudnn.so.5.1.5 locally 76 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcufft.so.8.0.27 locally 77 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcuda.so.1 locally 78 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcurand.so.8.0.27 locally 79 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:885] Found device 0 with properties: 80 | name: GeForce GTX 1080 81 | major: 6 minor: 1 memoryClockRate (GHz) 1.7335 82 | pciBusID 0000:05:00.0 83 | Total memory: 7.92GiB 84 | Free memory: 6.63GiB 85 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:906] DMA: 0 86 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:916] 0: Y 87 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:05:00.0) 88 | Loading CIFAR-100 data 89 | - Download progress: 100.0% 90 | Download finished. Extracting files. 91 | Extracting finished. Cleaning up. 92 | Done. 93 | Loading data: data/CIFAR-100/cifar-100-python/train 94 | batch 0 95 | (256, 32, 32, 3) 96 | (256, 100) 97 | batch 1 98 | (256, 32, 32, 3) 99 | (256, 100) 100 | batch 2 101 | (256, 32, 32, 3) 102 | (256, 100) 103 | batch 3 104 | (256, 32, 32, 3) 105 | (256, 100) 106 | batch 4 107 | (256, 32, 32, 3) 108 | (256, 100) 109 | batch 5 110 | (256, 32, 32, 3) 111 | (256, 100) 112 | batch 6 113 | (256, 32, 32, 3) 114 | (256, 100) 115 | batch 7 116 | (256, 32, 32, 3) 117 | (256, 100) 118 | batch 8 119 | (256, 32, 32, 3) 120 | (256, 100) 121 | batch 9 122 | (256, 32, 32, 3) 123 | (256, 100) 124 | done! 125 | 126 | ``` 127 | 128 | 129 | ### Download the Imagenet Data 130 | You need to use the supplied shell script in order to download the imagenet data. This can take a long time. The train archive is almost 150GB in size. 131 | 132 | ``` 133 | (env) schlag@box:~/MyStuff/input_pipelines$ cd utils/imagenet_download/ 134 | (env) schlag@box:~/MyStuff/input_pipelines/utils/imagenet_download$ sh run_me.sh 135 | ** snip (this will take a while) ** 136 | (env) schlag@box:~/MyStuff/input_pipelines/utils/imagenet_download$ cd ../../ 137 | (env) schlag@box:~/MyStuff/input_pipelines$ python imagenet_example.py 138 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcublas.so.8.0.27 locally 139 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcudnn.so.5.1.5 locally 140 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcufft.so.8.0.27 locally 141 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcuda.so.1 locally 142 | I tensorflow/stream_executor/dso_loader.cc:128] successfully opened CUDA library libcurand.so.8.0.27 locally 143 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:885] Found device 0 with properties: 144 | name: GeForce GTX 1080 145 | major: 6 minor: 1 memoryClockRate (GHz) 1.7335 146 | pciBusID 0000:05:00.0 147 | Total memory: 7.92GiB 148 | Free memory: 6.61GiB 149 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:906] DMA: 0 150 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:916] 0: Y 151 | I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:05:00.0) 152 | Successfully read 615299 bounding boxes across 544546 images. 153 | Determining list of input files and labels from data/imagenet/validation/. 154 | Finished finding files in 100 of 1000 classes. 155 | Finished finding files in 200 of 1000 classes. 156 | Finished finding files in 300 of 1000 classes. 157 | Finished finding files in 400 of 1000 classes. 158 | Finished finding files in 500 of 1000 classes. 159 | Finished finding files in 600 of 1000 classes. 160 | Finished finding files in 700 of 1000 classes. 161 | Finished finding files in 800 of 1000 classes. 162 | Finished finding files in 900 of 1000 classes. 163 | Finished finding files in 1000 of 1000 classes. 164 | Found 50000 JPEG files across 1000 labels inside data/imagenet/validation/. 165 | Determining list of input files and labels from data/imagenet/train/. 166 | Finished finding files in 100 of 1000 classes. 167 | Finished finding files in 200 of 1000 classes. 168 | Finished finding files in 300 of 1000 classes. 169 | Finished finding files in 400 of 1000 classes. 170 | Finished finding files in 500 of 1000 classes. 171 | Finished finding files in 600 of 1000 classes. 172 | Finished finding files in 700 of 1000 classes. 173 | Finished finding files in 800 of 1000 classes. 174 | Finished finding files in 900 of 1000 classes. 175 | Finished finding files in 1000 of 1000 classes. 176 | Found 1281167 JPEG files across 1000 labels inside data/imagenet/train/. 177 | Loading imagenet data 178 | Train directory seems to exist 179 | Validation directory seems to exist 180 | batch 0 181 | (64, 299, 299, 3) 182 | (64, 1000) 183 | batch 1 184 | (64, 299, 299, 3) 185 | (64, 1000) 186 | batch 2 187 | (64, 299, 299, 3) 188 | (64, 1000) 189 | batch 3 190 | (64, 299, 299, 3) 191 | (64, 1000) 192 | batch 4 193 | (64, 299, 299, 3) 194 | (64, 1000) 195 | batch 5 196 | (64, 299, 299, 3) 197 | (64, 1000) 198 | batch 6 199 | (64, 299, 299, 3) 200 | (64, 1000) 201 | batch 7 202 | (64, 299, 299, 3) 203 | (64, 1000) 204 | batch 8 205 | (64, 299, 299, 3) 206 | (64, 1000) 207 | batch 9 208 | (64, 299, 299, 3) 209 | (64, 1000) 210 | done! 211 | 212 | ``` 213 | 214 | ### Train Script Template 215 | A CNN training script template is provided with the following features: 216 | - easy switchin of datasets 217 | - separate training and testing streams 218 | - continous console log 219 | - test-set evaluation after every epoch 220 | - automatically saves the best performing model parameters 221 | - automatically decreases the learning rate after if there is no improvement in accuracy 222 | - evaluate top 1 and top n accuracies 223 | - easy parameter loading from a previous save point to continue training 224 | - prints a confusion matrix in your console 225 | 226 | -------------------------------------------------------------------------------- /cifar-100_example.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: example of how to use the cifar-100 input pipeline 4 | # Date: 11.2016 5 | # 6 | # TODO: How can we prevent the Enqueue operation was cancelled error? 7 | 8 | import tensorflow as tf 9 | import time 10 | 11 | sess = tf.Session() 12 | 13 | input_image_batch = tf.placeholder(tf.float32, shape=[256, 32, 32, 3], name="input_image_batch") 14 | input_label_batch = tf.placeholder(tf.float32, shape=[None, 100], name="input_label_batch") 15 | 16 | with tf.device('/cpu:0'): 17 | from datasets.cifar100 import cifar100_data 18 | d = cifar100_data(batch_size=256) 19 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 20 | 21 | 22 | for i in range(10): 23 | print("batch ", i) 24 | image_batch, target_batch = d.sess.run([image_batch_tensor, target_batch_tensor]) 25 | 26 | print(image_batch.shape) 27 | print(target_batch.shape) 28 | 29 | res = sess.run(input_image_batch, feed_dict={input_image_batch: image_batch, 30 | input_label_batch: target_batch}) 31 | print(type(res)) 32 | 33 | 34 | print("done!") 35 | print("Well, almost. Closing the queue and the session. This will lead to the following warning/error ...") 36 | time.sleep(8) 37 | d.close() 38 | 39 | exit() 40 | -------------------------------------------------------------------------------- /datasets/cars.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: stanford cars 196 input pipeline 4 | # Date: 11.2016 5 | # 6 | # 7 | 8 | """ Usage: 9 | import tensorflow as tf 10 | sess = tf.Session() 11 | 12 | with tf.device('/cpu:0'): 13 | from datasets.cars import cars_data 14 | d = cars_data(batch_size=64, sess=sess) 15 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 16 | 17 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 18 | print(image_batch.shape) 19 | print(target_batch.shape) 20 | """ 21 | 22 | import tensorflow as tf 23 | import numpy as np 24 | import threading 25 | 26 | from utils import cars 27 | 28 | class cars_data: 29 | """ 30 | Downloads the stanford cars 196 dataset and creates an input pipeline ready to be fed into a model. 31 | 32 | - decodes jpg images 33 | - scales images into a uniform size 34 | - shuffles the input 35 | - builds batches 36 | """ 37 | 38 | NUM_THREADS = 8 39 | NUMBER_OF_CLASSES = 196 40 | TRAIN_SET_SIZE = 8041 41 | TEST_SET_SIZE = 8144 42 | 43 | IMAGE_HEIGHT = 75 44 | IMAGE_WIDTH = 100 45 | NUM_OF_CHANNELS = 3 46 | 47 | def __init__(self, batch_size, sess, 48 | filename_feed_size=200, 49 | filename_queue_capacity=800, 50 | batch_queue_capacity=1150, 51 | min_after_dequeue=1150, # 100MB RAM ~=1150 images 52 | image_height=IMAGE_HEIGHT, 53 | image_width=IMAGE_WIDTH): 54 | """ Downloads the data if necessary. """ 55 | self.batch_size = batch_size 56 | self.filename_feed_size = filename_feed_size 57 | self.filename_queue_capacity = filename_queue_capacity 58 | self.batch_queue_capacity = batch_queue_capacity + 3 * batch_size # add some extra 59 | self.min_after_dequeue = min_after_dequeue 60 | self.sess = sess 61 | self.IMAGE_HEIGHT = image_height 62 | self.IMAGE_WIDTH = image_width 63 | cars.download_data() 64 | 65 | def build_train_data_tensor(self, shuffle=False, augmentation=False): 66 | img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls, targets = cars.load_training_data() 67 | return self.__build_generic_data_tensor(img_path, targets, shuffle, augmentation) 68 | 69 | def build_test_data_tensor(self, shuffle=False, augmentation=False): 70 | img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls, targets = cars.load_test_data() 71 | return self.__build_generic_data_tensor(img_path, targets, shuffle, augmentation) 72 | 73 | def __build_generic_data_tensor(self, all_img_paths, all_targets, shuffle, augmentation): 74 | """ 75 | Creates the input pipeline and performs some preprocessing. 76 | The full dataset needs to fit into memory for this version. 77 | """ 78 | set_size = all_img_paths.shape[0] 79 | 80 | imagepath_input = tf.placeholder(tf.string, shape=[self.filename_feed_size]) 81 | target_input = tf.placeholder(tf.float32, shape=[self.filename_feed_size, self.NUMBER_OF_CLASSES]) 82 | 83 | self.filename_queue = tf.FIFOQueue(self.filename_queue_capacity, [tf.string, tf.float32], 84 | shapes=[[], [self.NUMBER_OF_CLASSES]]) 85 | enqueue_op = self.filename_queue.enqueue_many([imagepath_input, target_input]) 86 | single_path, single_target = self.filename_queue.dequeue() 87 | 88 | file_content = tf.read_file(single_path) 89 | single_image = tf.image.decode_jpeg(file_content, channels=self.NUM_OF_CHANNELS) 90 | 91 | # convert to [0, 1] 92 | single_image = tf.image.convert_image_dtype(single_image, 93 | dtype=tf.float32, 94 | saturate=True) 95 | 96 | single_image = tf.image.resize_images(single_image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH]) 97 | 98 | # Data Augmentation 99 | if augmentation: 100 | single_image = tf.image.resize_image_with_crop_or_pad(single_image, self.IMAGE_HEIGHT+4, self.IMAGE_WIDTH+4) 101 | single_image = tf.random_crop(single_image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.NUM_OF_CHANNELS]) 102 | single_image = tf.image.random_flip_left_right(single_image) 103 | 104 | single_image = tf.image.per_image_standardization(single_image) 105 | 106 | # memory calculation: 107 | # 1 image uses 75*100*3*4 bytes = ~90kb 108 | # 100MB RAM ~=1150 images 109 | if shuffle: 110 | images_batch, target_batch = tf.train.shuffle_batch([single_image, single_target], 111 | batch_size=self.batch_size, 112 | capacity=self.batch_queue_capacity, 113 | min_after_dequeue=self.min_after_dequeue, 114 | num_threads=self.NUM_THREADS) 115 | else: 116 | images_batch, target_batch = tf.train.batch([single_image, single_target], 117 | batch_size=self.batch_size, 118 | capacity=self.batch_queue_capacity, 119 | num_threads=self.NUM_THREADS) 120 | 121 | def enqueue(sess): 122 | under = 0 123 | max = len(all_img_paths) 124 | while not self.coord.should_stop(): 125 | upper = under + self.filename_feed_size 126 | if upper <= max: 127 | curr_data = all_img_paths[under:upper] 128 | curr_target = all_targets[under:upper] 129 | under = upper 130 | else: 131 | rest = upper - max 132 | curr_data = np.concatenate((all_img_paths[under:max], all_img_paths[0:rest])) 133 | curr_target = np.concatenate((all_targets[under:max], all_targets[0:rest])) 134 | under = rest 135 | 136 | sess.run(enqueue_op, feed_dict={imagepath_input: curr_data, 137 | target_input: curr_target}) 138 | 139 | enqueue_thread = threading.Thread(target=enqueue, args=[self.sess]) 140 | 141 | self.coord = tf.train.Coordinator() 142 | self.threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) 143 | 144 | enqueue_thread.isDaemon() 145 | enqueue_thread.start() 146 | 147 | return images_batch, target_batch 148 | 149 | def __del__(self): 150 | self.close() 151 | 152 | def close(self): 153 | self.filename_queue.close(cancel_pending_enqueues=True) 154 | self.coord.request_stop() 155 | self.coord.join(self.threads) 156 | 157 | -------------------------------------------------------------------------------- /datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # 3 | ############################################################################## 4 | # Author: Imanol Schlag (more info on ischlag.github.io) 5 | # Description: CIFAR-10 input pipeline 6 | # Date: 11.2016 7 | # 8 | # 9 | 10 | """ Usage: 11 | import tensorflow as tf 12 | sess = tf.Session() 13 | 14 | with tf.device('/cpu:0'): 15 | from datasets.cifar10 import cifar10_data 16 | d = cifar10_data(batch_size=256, sess=sess) 17 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 18 | 19 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 20 | print(image_batch.shape) 21 | print(target_batch.shape) 22 | """ 23 | 24 | import tensorflow as tf 25 | import numpy as np 26 | import threading 27 | 28 | from utils import cifar10 29 | 30 | class cifar10_data: 31 | """ 32 | Downloads the CIFAR10 dataset and creates an input pipeline ready to be fed into a model. 33 | 34 | - Reshapes flat images into 32x32 35 | - converts [0 1] to [-1 1] 36 | - shuffles the input 37 | - builds batches 38 | """ 39 | NUM_THREADS = 8 40 | NUMBER_OF_CLASSES = 10 41 | 42 | TRAIN_SET_SIZE = 50000 43 | TEST_SET_SIZE = 10000 44 | IMAGE_WIDTH = 32 45 | IMAGE_HEIGHT = 32 46 | NUM_OF_CHANNELS = 3 47 | 48 | def __init__(self, batch_size, sess, 49 | feed_size=200, 50 | feed_queue_capacity=800, 51 | batch_queue_capacity=1000, 52 | min_after_dequeue=1000): 53 | """ Downloads the cifar10 data if necessary. """ 54 | print("Loading CIFAR-10 data") 55 | self.batch_size = batch_size 56 | self.feed_size = feed_size 57 | self.feed_queue_capacity = feed_queue_capacity 58 | self.batch_queue_capacity = batch_queue_capacity + 3 * batch_size 59 | self.min_after_dequeue = min_after_dequeue 60 | self.sess = sess 61 | cifar10.maybe_download_and_extract() 62 | 63 | def build_train_data_tensor(self, shuffle=False, augmentation=False): 64 | images, _, targets = cifar10.load_training_data() 65 | return self.__build_generic_data_tensor(images, 66 | targets, 67 | shuffle, 68 | augmentation) 69 | 70 | def build_test_data_tensor(self, shuffle=False, augmentation=False): 71 | images, _, targets = cifar10.load_test_data() 72 | return self.__build_generic_data_tensor(images, 73 | targets, 74 | shuffle, 75 | augmentation) 76 | 77 | def __build_generic_data_tensor(self, raw_images, raw_targets, shuffle, augmentation): 78 | """ Creates the input pipeline and performs some preprocessing. """ 79 | 80 | # load the data from numpy into our queue in blocks of feed_size samples 81 | set_size, width, height, channels = raw_images.shape 82 | 83 | image_input = tf.placeholder(tf.float32, shape=[self.feed_size, width, height, channels]) 84 | target_input = tf.placeholder(tf.float32, shape=[self.feed_size, self.NUMBER_OF_CLASSES]) 85 | 86 | self.queue = tf.FIFOQueue(self.feed_queue_capacity, [tf.float32, tf.float32], 87 | shapes=[[width, height, channels], [self.NUMBER_OF_CLASSES]]) 88 | enqueue_op = self.queue.enqueue_many([image_input, target_input]) 89 | image, target = self.queue.dequeue() 90 | 91 | # Data Augmentation 92 | if augmentation: 93 | image = tf.image.resize_image_with_crop_or_pad(image, self.IMAGE_HEIGHT+4, self.IMAGE_WIDTH+4) 94 | image = tf.random_crop(image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.NUM_OF_CHANNELS]) 95 | image = tf.image.random_flip_left_right(image) 96 | 97 | image = tf.image.per_image_standardization(image) 98 | 99 | if shuffle: 100 | images_batch, target_batch = tf.train.shuffle_batch([image, target], 101 | batch_size=self.batch_size, 102 | capacity=self.batch_queue_capacity, 103 | min_after_dequeue=self.min_after_dequeue) 104 | else: 105 | images_batch, target_batch = tf.train.batch([image, target], 106 | batch_size=self.batch_size, 107 | capacity=self.batch_queue_capacity) 108 | 109 | def enqueue(sess): 110 | under = 0 111 | max = len(raw_images) 112 | while not self.coord.should_stop(): 113 | upper = under + self.feed_size 114 | if upper <= max: 115 | curr_data = raw_images[under:upper] 116 | curr_target = raw_targets[under:upper] 117 | under = upper 118 | else: 119 | rest = upper - max 120 | curr_data = np.concatenate((raw_images[under:max], raw_images[0:rest])) 121 | curr_target = np.concatenate((raw_targets[under:max], raw_targets[0:rest])) 122 | under = rest 123 | 124 | sess.run(enqueue_op, feed_dict={image_input: curr_data, 125 | target_input: curr_target}) 126 | 127 | enqueue_thread = threading.Thread(target=enqueue, args=[self.sess]) 128 | 129 | self.coord = tf.train.Coordinator() 130 | self.threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) 131 | 132 | enqueue_thread.isDaemon() 133 | enqueue_thread.start() 134 | 135 | return images_batch, target_batch 136 | 137 | def __del__(self): 138 | self.close() 139 | 140 | 141 | def close(self): 142 | self.queue.close(cancel_pending_enqueues=True) 143 | self.coord.request_stop() 144 | self.coord.join(self.threads) 145 | -------------------------------------------------------------------------------- /datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: CIFAR-100 input pipeline 4 | # Date: 11.2016 5 | # 6 | # 7 | 8 | """ Usage: 9 | import tensorflow as tf 10 | sess = tf.Session() 11 | 12 | with tf.device('/cpu:0'): 13 | from datasets.cifar100 import cifar100_data 14 | d = cifar100_data(batch_size=256, sess=sess) 15 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 16 | 17 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 18 | print(image_batch.shape) 19 | print(target_batch.shape) 20 | """ 21 | import tensorflow as tf 22 | import numpy as np 23 | import threading 24 | 25 | from utils import cifar100 26 | 27 | class cifar100_data: 28 | """ 29 | Downloads the CIFAR100 dataset and creates an input pipeline ready to be fed into a model. 30 | 31 | - Reshapes flat images into 32x32 32 | - converts [0 1] to [-1 1] 33 | - shuffles the input 34 | - builds batches 35 | """ 36 | NUM_THREADS = 8 37 | NUMBER_OF_CLASSES = 100 38 | 39 | TRAIN_SET_SIZE = 50000 40 | TEST_SET_SIZE = 10000 41 | IMAGE_WIDTH = 32 42 | IMAGE_HEIGHT = 32 43 | NUM_OF_CHANNELS = 3 44 | 45 | def __init__(self, batch_size, sess, 46 | feed_size=200, 47 | feed_queue_capacity=800, 48 | batch_queue_capacity=1000, 49 | min_after_dequeue=1000): 50 | """ Downloads the cifar100 data if necessary. """ 51 | print("Loading CIFAR-100 data") 52 | self.batch_size = batch_size 53 | self.feed_size = feed_size 54 | self.feed_queue_capacity = feed_queue_capacity 55 | self.batch_queue_capacity = batch_queue_capacity + 3 * batch_size 56 | self.min_after_dequeue = min_after_dequeue 57 | self.sess = sess 58 | cifar100.maybe_download_and_extract() 59 | 60 | def build_train_data_tensor(self, shuffle=False, augmentation=False): 61 | 62 | images, _, targets = cifar100.load_training_data() 63 | return self.__build_generic_data_tensor(images, 64 | targets, 65 | shuffle, 66 | augmentation) 67 | 68 | def build_test_data_tensor(self, shuffle=False, augmentation=False): 69 | 70 | images, _, targets = cifar100.load_test_data() 71 | return self.__build_generic_data_tensor(images, 72 | targets, 73 | shuffle, 74 | augmentation) 75 | 76 | def __build_generic_data_tensor(self, raw_images, raw_targets, shuffle, augmentation): 77 | """ Creates the input pipeline and performs some preprocessing. """ 78 | 79 | # load the data from numpy into our queue in blocks of feed_size samples 80 | set_size, width, height, channels = raw_images.shape 81 | 82 | image_input = tf.placeholder(tf.float32, shape=[self.feed_size, width, height, channels]) 83 | target_input = tf.placeholder(tf.float32, shape=[self.feed_size, self.NUMBER_OF_CLASSES]) 84 | 85 | self.queue = tf.FIFOQueue(self.feed_queue_capacity, [tf.float32, tf.float32], 86 | shapes=[[width, height, channels], [self.NUMBER_OF_CLASSES]]) 87 | enqueue_op = self.queue.enqueue_many([image_input, target_input]) 88 | image, target = self.queue.dequeue() 89 | 90 | # Data Augmentation 91 | if augmentation: 92 | image = tf.image.resize_image_with_crop_or_pad(image, self.IMAGE_HEIGHT+4, self.IMAGE_WIDTH+4) 93 | image = tf.random_crop(image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.NUM_OF_CHANNELS]) 94 | image = tf.image.random_flip_left_right(image) 95 | 96 | image = tf.image.per_image_standardization(image) 97 | 98 | if shuffle: 99 | images_batch, target_batch = tf.train.shuffle_batch([image, target], 100 | batch_size=self.batch_size, 101 | capacity=self.batch_queue_capacity, 102 | min_after_dequeue=self.min_after_dequeue) 103 | else: 104 | images_batch, target_batch = tf.train.batch([image, target], 105 | batch_size=self.batch_size, 106 | capacity=self.batch_queue_capacity) 107 | 108 | def enqueue(sess): 109 | under = 0 110 | max = len(raw_images) 111 | while not self.coord.should_stop(): 112 | upper = under + self.feed_size 113 | if upper <= max: 114 | curr_data = raw_images[under:upper] 115 | curr_target = raw_targets[under:upper] 116 | under = upper 117 | else: 118 | rest = upper - max 119 | curr_data = np.concatenate((raw_images[under:max], raw_images[0:rest])) 120 | curr_target = np.concatenate((raw_targets[under:max], raw_targets[0:rest])) 121 | under = rest 122 | 123 | sess.run(enqueue_op, feed_dict={image_input: curr_data, 124 | target_input: curr_target}) 125 | 126 | enqueue_thread = threading.Thread(target=enqueue, args=[self.sess]) 127 | 128 | self.coord = tf.train.Coordinator() 129 | self.threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) 130 | 131 | enqueue_thread.isDaemon() 132 | enqueue_thread.start() 133 | 134 | return images_batch, target_batch 135 | 136 | def __del__(self): 137 | self.close() 138 | 139 | 140 | def close(self): 141 | self.queue.close(cancel_pending_enqueues=True) 142 | self.coord.request_stop() 143 | self.coord.join(self.threads) 144 | self.sess.close() -------------------------------------------------------------------------------- /datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: imagenet input pipeline 4 | # Date: 11.2016 5 | # 6 | # 7 | # TODO: 23 images are not jpeg and should be used with the according decoder. 8 | 9 | """ Usage: 10 | import tensorflow as tf 11 | sess = tf.Session() 12 | 13 | with tf.device('/cpu:0'): 14 | from datasets.imagenet import imagenet_data 15 | d = imagenet_data(batch_size=64, sess=sess) 16 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 17 | 18 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 19 | print(image_batch.shape) 20 | print(target_batch.shape) 21 | """ 22 | 23 | import tensorflow as tf 24 | import numpy as np 25 | import threading 26 | 27 | from utils import imagenet 28 | 29 | class imagenet_data: 30 | """ 31 | Downloads the imagenet dataset and creates an input pipeline ready to be fed into a model. 32 | 33 | memory calculation: 34 | 1 image is 299*299*3*4 bytes = ~1MB 35 | 1024MB RAM = ~1000 images 36 | 37 | empirical memory usage with default config: 38 | TensorFlow +500MB 39 | imagenet_utils (loading all paths and labels) +400MB 40 | build input pipeline and fill queues +2.2GB 41 | 42 | - decodes jpg images 43 | - scales images into a uniform size 44 | - shuffles the input if specified 45 | - builds batches 46 | """ 47 | NUM_THREADS = 8 48 | NUMBER_OF_CLASSES = 1000 49 | TRAIN_SET_SIZE = len(imagenet.data.train_filenames) # 1281167 # ~250MB for string with paths 50 | TEST_SET_SIZE = len(imagenet.data.val_filenames) # 50000 51 | IMAGE_HEIGHT = 299 52 | IMAGE_WIDTH = 299 53 | NUM_OF_CHANNELS = 3 54 | 55 | def __init__(self, batch_size, sess, 56 | filename_feed_size=200, 57 | filename_queue_capacity=800, 58 | batch_queue_capacity=1000, 59 | min_after_dequeue=1000, 60 | image_height=IMAGE_HEIGHT, 61 | image_width=IMAGE_WIDTH): 62 | """ Downloads the data if necessary. """ 63 | print("Loading imagenet data") 64 | self.batch_size = batch_size 65 | self.filename_feed_size = filename_feed_size 66 | self.filename_queue_capacity = filename_queue_capacity 67 | self.batch_queue_capacity = batch_queue_capacity + 3 * batch_size 68 | self.min_after_dequeue = min_after_dequeue 69 | self.sess = sess 70 | self.IMAGE_HEIGHT = image_height 71 | self.IMAGE_WIDTH = image_width 72 | imagenet.check_if_downloaded() 73 | 74 | def build_train_data_tensor(self, shuffle=False, augmentation=False): 75 | img_path, cls = imagenet.load_training_data() 76 | return self.__build_generic_data_tensor(img_path, cls, shuffle, augmentation) 77 | 78 | def build_test_data_tensor(self, shuffle=False, augmentation=False): 79 | img_path, cls = imagenet.load_test_data() 80 | return self.__build_generic_data_tensor(img_path, cls, shuffle, augmentation) 81 | 82 | def __build_generic_data_tensor(self, all_img_paths, all_targets, shuffle, augmentation): 83 | """ 84 | Creates the input pipeline and performs some preprocessing. 85 | The full dataset needs to fit into memory for this version. 86 | """ 87 | 88 | ## filename queue 89 | imagepath_input = tf.placeholder(tf.string, shape=[self.filename_feed_size]) 90 | target_input = tf.placeholder(tf.float32, shape=[self.filename_feed_size]) 91 | 92 | self.filename_queue = tf.FIFOQueue(self.filename_queue_capacity, [tf.string, tf.float32], 93 | shapes=[[], []]) 94 | enqueue_op = self.filename_queue.enqueue_many([imagepath_input, target_input]) 95 | single_path, single_target = self.filename_queue.dequeue() 96 | 97 | # one hot encode the target 98 | single_target = tf.cast(tf.sub(single_target, tf.constant(1.0)), tf.int32) 99 | single_target = tf.one_hot(single_target, depth=self.NUMBER_OF_CLASSES) 100 | 101 | # load the jpg image according to path 102 | file_content = tf.read_file(single_path) 103 | single_image = tf.image.decode_jpeg(file_content, channels=self.NUM_OF_CHANNELS) 104 | 105 | # convert to [0, 1] 106 | single_image = tf.image.convert_image_dtype(single_image, 107 | dtype=tf.float32, 108 | saturate=True) 109 | 110 | single_image = tf.image.resize_images(single_image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH]) 111 | 112 | # Data Augmentation 113 | if augmentation: 114 | single_image = tf.image.resize_image_with_crop_or_pad(single_image, self.IMAGE_HEIGHT+4, self.IMAGE_WIDTH+4) 115 | single_image = tf.random_crop(single_image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.NUM_OF_CHANNELS]) 116 | single_image = tf.image.random_flip_left_right(single_image) 117 | 118 | single_image = tf.image.per_image_standardization(single_image) 119 | 120 | if shuffle: 121 | images_batch, target_batch = tf.train.shuffle_batch([single_image, single_target], 122 | batch_size=self.batch_size, 123 | capacity=self.batch_queue_capacity, 124 | min_after_dequeue=self.min_after_dequeue, 125 | num_threads=self.NUM_THREADS) 126 | else: 127 | images_batch, target_batch = tf.train.batch([single_image, single_target], 128 | batch_size=self.batch_size, 129 | capacity=self.batch_queue_capacity, 130 | num_threads=1) 131 | 132 | def enqueue(sess): 133 | under = 0 134 | max = len(all_img_paths) 135 | while not self.coord.should_stop(): 136 | upper = under + self.filename_feed_size 137 | if upper <= max: 138 | curr_data = all_img_paths[under:upper] 139 | curr_target = all_targets[under:upper] 140 | under = upper 141 | else: 142 | rest = upper - max 143 | curr_data = np.concatenate((all_img_paths[under:max], all_img_paths[0:rest])) 144 | curr_target = np.concatenate((all_targets[under:max], all_targets[0:rest])) 145 | under = rest 146 | 147 | sess.run(enqueue_op, feed_dict={imagepath_input: curr_data, 148 | target_input: curr_target}) 149 | 150 | enqueue_thread = threading.Thread(target=enqueue, args=[self.sess]) 151 | 152 | self.coord = tf.train.Coordinator() 153 | self.threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) 154 | 155 | enqueue_thread.isDaemon() 156 | enqueue_thread.start() 157 | 158 | return images_batch, target_batch 159 | 160 | def __del__(self): 161 | self.close() 162 | 163 | def close(self): 164 | self.filename_queue.close(cancel_pending_enqueues=True) 165 | self.coord.request_stop() 166 | self.coord.join(self.threads) 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /datasets/mnist.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ############################################################################## 3 | # Author: Imanol Schlag (more info on ischlag.github.io) 4 | # Description: MNIST input pipeline 5 | # Date: 11.2016 6 | # 7 | # Note: Only uses one queue, 8 | 9 | """ Usage: 10 | import tensorflow as tf 11 | 12 | with tf.device('/cpu:0'): 13 | from datasets.mnist import mnist_data 14 | data = mnist_data(53) 15 | image_batch_tensor, target_batch_tensor = data.build_train_data_tensor() 16 | 17 | sess = tf.Session() 18 | init_op = tf.initialize_all_variables() 19 | sess.run(init_op) 20 | coord = tf.train.Coordinator() 21 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 22 | 23 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 24 | print(image_batch.shape) 25 | print(target_batch.shape) 26 | """ 27 | 28 | import tensorflow as tf 29 | 30 | from tensorflow.python.framework import ops 31 | from tensorflow.examples.tutorials.mnist import input_data 32 | 33 | class mnist_data: 34 | """ 35 | Downloads the MNIST dataset and creates an input pipeline ready to be fed into a model. 36 | 37 | - Reshapes flat images into 28 x 28 38 | - converts [0 1] to [-1 1] 39 | - shuffles the input 40 | - builds batches 41 | """ 42 | NUM_THREADS = 8 43 | NUMBER_OF_CLASSES = 10 44 | IMAGE_WIDTH = 28 45 | IMAGE_HEIGHT = 28 46 | NUM_OF_CHANNELS = 1 47 | 48 | def __init__(self, batch_size): 49 | """ Downloads the mnist data if necessary. """ 50 | print("Loading MNIST data") 51 | self.batch_size = batch_size 52 | self.mnist = input_data.read_data_sets('data/MNIST', one_hot=True) 53 | 54 | self.TRAIN_SET_SIZE = self.mnist.train.images.shape[0] 55 | self.TEST_SET_SIZE = self.mnist.test.images.shape[0] 56 | self.VALIDATION_SET_SIZE = self.mnist.validation.images.shape[0] 57 | 58 | def build_train_data_tensor(self, shuffle=False, augmentation=False): 59 | return self.__build_generic_data_tensor(self.mnist.train.images, 60 | self.mnist.train.labels, 61 | shuffle, 62 | augmentation) 63 | 64 | def build_test_data_tensor(self, shuffle, augmentation=False): 65 | return self.__build_generic_data_tensor(self.mnist.test.images, 66 | self.mnist.test.labels, 67 | shuffle, 68 | augmentation) 69 | 70 | def build_validation_data_tensor(self, shuffle, augmentation=False): 71 | return self.__build_generic_data_tensor(self.mnist.validation.images, 72 | self.mnist.validation.labels, 73 | shuffle, 74 | augmentation) 75 | 76 | def __build_generic_data_tensor(self, raw_images, raw_targets, shuffle, augmentation): 77 | """ Creates the input pipeline and performs some preprocessing. """ 78 | 79 | images = ops.convert_to_tensor(raw_images) 80 | targets = ops.convert_to_tensor(raw_targets) 81 | 82 | set_size = raw_images.shape[0] 83 | 84 | images = tf.reshape(images, [set_size, 28, 28, 1]) 85 | image, label = tf.train.slice_input_producer([images, targets], shuffle=shuffle) 86 | 87 | # Data Augmentation 88 | if augmentation: 89 | image = tf.image.resize_image_with_crop_or_pad(image, self.IMAGE_HEIGHT+4, self.IMAGE_WIDTH+4) 90 | image = tf.random_crop(image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.NUM_OF_CHANNELS]) 91 | image = tf.image.random_flip_left_right(image) 92 | 93 | image = tf.image.per_image_standardization(image) 94 | 95 | images_batch, labels_batch = tf.train.batch([image, label], batch_size=self.batch_size, num_threads=self.NUM_THREADS) 96 | 97 | return images_batch, labels_batch 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /datasets/penn_treebank.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: penn treebank input pipeline 4 | # Date: 11.2016 5 | # 6 | # Note: Code mostly from the TensorFlow ptb example but with automatic download. 7 | # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py 8 | 9 | """ Usage: 10 | import tensorflow as tf 11 | sess = tf.Session() 12 | 13 | with tf.device('/cpu:0'): 14 | from datasets.penn_treebank import penn_treebank_data 15 | d = penn_treebank_data(batch_size=2, num_steps=5) 16 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 17 | 18 | coord = tf.train.Coordinator() 19 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 20 | 21 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 22 | print(image_batch) 23 | print(target_batch) 24 | """ 25 | 26 | import tensorflow as tf 27 | 28 | from utils import penn_treebank 29 | 30 | class penn_treebank_data: 31 | """ 32 | Downloads the stanford cars 196 dataset and creates an input pipeline ready to be fed into a model. 33 | 34 | - decodes jpg images 35 | - scales images into a uniform size 36 | - shuffles the input 37 | - builds batches 38 | """ 39 | 40 | NUM_THREADS = 8 41 | 42 | def __init__(self, batch_size, num_steps): # 100MB RAM ~=1150 images 43 | """ Downloads the data if necessary. """ 44 | print("Loading penn treebank data") 45 | self.batch_size = batch_size 46 | self.num_steps = num_steps 47 | penn_treebank.download_data() 48 | 49 | def build_train_data_tensor(self): 50 | data, _ = penn_treebank.load_training_data() 51 | return self.__build_generic_data_tensor(data) 52 | 53 | def build_test_data_tensor(self): 54 | data, _ = penn_treebank.load_training_data() 55 | return self.__build_generic_data_tensor(data) 56 | 57 | def build_validation_data_tensor(self): 58 | data, _ = penn_treebank.load_validation_data() 59 | return self.__build_generic_data_tensor(data) 60 | 61 | def __build_generic_data_tensor(self, raw_data): 62 | """Iterate on the raw PTB data. 63 | This chunks up raw_data into batches of examples and returns Tensors that 64 | are drawn from these batches. 65 | Args: 66 | raw_data: one of the raw data outputs from ptb_raw_data. 67 | batch_size: int, the batch size. 68 | num_steps: int, the number of unrolls. 69 | name: the name of this operation (optional). 70 | Returns: 71 | A pair of Tensors, each shaped [batch_size, num_steps]. The second element 72 | of the tuple is the same data time-shifted to the right by one. 73 | Raises: 74 | tf.errors.InvalidArgumentError: if batch_size or num_steps are too high. 75 | """ 76 | raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32) 77 | 78 | data_len = tf.size(raw_data) 79 | batch_len = data_len // self.batch_size 80 | data = tf.reshape(raw_data[0: self.batch_size * batch_len], 81 | [self.batch_size, batch_len]) 82 | 83 | epoch_size = (batch_len - 1) // self.num_steps 84 | assertion = tf.assert_positive( 85 | epoch_size, 86 | message="epoch_size == 0, decrease batch_size or num_steps") 87 | with tf.control_dependencies([assertion]): 88 | epoch_size = tf.identity(epoch_size, name="epoch_size") 89 | 90 | i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() 91 | x = tf.slice(data, [0, i * self.num_steps], [self.batch_size, self.num_steps]) 92 | y = tf.slice(data, [0, i * self.num_steps + 1], [self.batch_size, self.num_steps]) 93 | return x, y 94 | 95 | -------------------------------------------------------------------------------- /datasets/svhn.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: SVHN input pipeline 4 | # Date: 11.2016 5 | # 6 | # 7 | 8 | """ Usage: 9 | import tensorflow as tf 10 | sess = tf.Session() 11 | 12 | with tf.device('/cpu:0'): 13 | from datasets.svhn import svhn_data 14 | d = svhn_data(batch_size=256, sess=sess) 15 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 16 | 17 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 18 | print(image_batch.shape) 19 | print(target_batch.shape) 20 | """ 21 | import tensorflow as tf 22 | import numpy as np 23 | import threading 24 | 25 | from utils import svhn 26 | 27 | class svhn_data: 28 | """ 29 | Downloads the SVHN dataset and creates an input pipeline ready to be fed into a model. 30 | 31 | - Reshapes flat images into 32x32 32 | - converts [0 1] to [-1 1] 33 | - shuffles the input 34 | - builds batches 35 | """ 36 | NUM_THREADS = 8 37 | NUMBER_OF_CLASSES = 10 38 | 39 | TRAIN_SET_SIZE = 73257 40 | TEST_SET_SIZE = 26032 41 | IMAGE_WIDTH = 32 42 | IMAGE_HEIGHT = 32 43 | NUM_OF_CHANNELS = 3 44 | 45 | def __init__(self, batch_size, sess, feed_size=200, feed_queue_capacity=800, batch_queue_capacity=5, min_after_dequeue=4): 46 | """ Downloads the cifar100 data if necessary. """ 47 | print("Loading SVHN data") 48 | self.batch_size = batch_size 49 | self.feed_size = feed_size 50 | self.feed_queue_capacity = feed_queue_capacity 51 | self.batch_queue_capacity = batch_queue_capacity + 3 * batch_size 52 | self.min_after_dequeue = min_after_dequeue 53 | self.sess = sess 54 | svhn.download_data() 55 | 56 | def build_train_data_tensor(self, shuffle=False, augmentation=False): 57 | images, _, targets = svhn.load_training_data() 58 | return self.__build_generic_data_tensor(images, 59 | targets, 60 | shuffle, 61 | augmentation) 62 | 63 | def build_test_data_tensor(self, shuffle=False, augmentation=False): 64 | images, _, targets = svhn.load_test_data() 65 | return self.__build_generic_data_tensor(images, 66 | targets, 67 | shuffle, 68 | augmentation) 69 | 70 | def __build_generic_data_tensor(self, raw_images, raw_targets, shuffle, augmentation): 71 | """ 72 | Creates the input pipeline and performs some preprocessing. 73 | The full dataset needs to fit into memory for this version. 74 | """ 75 | 76 | # load the data from numpy into our queue in blocks of feed_size samples 77 | set_size, width, height, channels = raw_images.shape 78 | 79 | image_input = tf.placeholder(tf.float32, shape=[self.feed_size, width, height, channels]) 80 | target_input = tf.placeholder(tf.float32, shape=[self.feed_size, self.NUMBER_OF_CLASSES]) 81 | 82 | self.queue = tf.FIFOQueue(self.feed_queue_capacity, [tf.float32, tf.float32], 83 | shapes=[[width, height, channels], [self.NUMBER_OF_CLASSES]]) 84 | enqueue_op = self.queue.enqueue_many([image_input, target_input]) 85 | image, target = self.queue.dequeue() 86 | 87 | # Data Augmentation 88 | if augmentation: 89 | image = tf.image.resize_image_with_crop_or_pad(image, self.IMAGE_HEIGHT+4, self.IMAGE_WIDTH+4) 90 | image = tf.random_crop(image, [self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.NUM_OF_CHANNELS]) 91 | image = tf.image.random_flip_left_right(image) 92 | 93 | image = tf.image.per_image_standardization(image) 94 | 95 | if shuffle: 96 | images_batch, target_batch = tf.train.shuffle_batch([image, target], 97 | batch_size=self.batch_size, 98 | capacity=self.batch_queue_capacity, 99 | min_after_dequeue=self.min_after_dequeue) 100 | else: 101 | images_batch, target_batch = tf.train.batch([image, target], 102 | batch_size=self.batch_size, 103 | capacity=self.batch_queue_capacity) 104 | 105 | def enqueue(sess): 106 | under = 0 107 | max = len(raw_images) 108 | while not self.coord.should_stop(): 109 | upper = under + self.feed_size 110 | if upper <= max: 111 | curr_data = raw_images[under:upper] 112 | curr_target = raw_targets[under:upper] 113 | under = upper 114 | else: 115 | rest = upper - max 116 | curr_data = np.concatenate((raw_images[under:max], raw_images[0:rest])) 117 | curr_target = np.concatenate((raw_targets[under:max], raw_targets[0:rest])) 118 | under = rest 119 | 120 | sess.run(enqueue_op, feed_dict={image_input: curr_data, 121 | target_input: curr_target}) 122 | 123 | enqueue_thread = threading.Thread(target=enqueue, args=[self.sess]) 124 | 125 | self.coord = tf.train.Coordinator() 126 | self.threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) 127 | 128 | enqueue_thread.isDaemon() 129 | enqueue_thread.start() 130 | 131 | return images_batch, target_batch 132 | 133 | def __del__(self): 134 | self.close() 135 | 136 | def close(self): 137 | self.queue.close(cancel_pending_enqueues=True) 138 | self.coord.request_stop() 139 | self.coord.join(self.threads) 140 | 141 | -------------------------------------------------------------------------------- /imagenet_example.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: example of how to use the imagenet input pipeline 4 | # Date: 11.2016 5 | # 6 | # TODO: How can we prevent the Enqueue operation was cancelled error? 7 | 8 | import tensorflow as tf 9 | import time 10 | 11 | sess = tf.Session() 12 | 13 | with tf.device('/cpu:0'): 14 | from datasets.imagenet import imagenet_data 15 | d = imagenet_data(batch_size=64, sess=sess) 16 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor() 17 | 18 | for i in range(10): 19 | print("batch ", i) 20 | image_batch, target_batch = sess.run([image_batch_tensor, target_batch_tensor]) 21 | print(image_batch.shape) 22 | print(target_batch.shape) 23 | 24 | print("done!") 25 | print("Well, almost. Closing the queue and the session. This will lead to the following warning/error ...") 26 | time.sleep(8) 27 | d.close() 28 | sess.close() 29 | exit() -------------------------------------------------------------------------------- /libs/components.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: TensorFlow building blocks for models. 4 | # Date: 11.2016 5 | # 6 | # 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | def dense(data, 12 | n_units, 13 | phase_train, 14 | activation, 15 | scope, 16 | initializer, 17 | dropout=True): 18 | """ Fully-connected network layer.""" 19 | shape = data.get_shape().as_list() 20 | print("DENSE IN: ", data) 21 | with tf.variable_scope(scope): 22 | #w = tf.get_variable('dense-weights', 23 | # [shape[1], n_units], 24 | # initializer=initializer) 25 | w = tf.Variable(tf.random_normal([shape[1], n_units], stddev=0.01), 26 | name='dense-weights') 27 | b = tf.get_variable('dense-bias', 28 | [n_units], 29 | initializer=tf.zeros_initializer) 30 | dense = activation(tf.matmul(data, w) + b) 31 | if dropout: 32 | dense = tf.cond(phase_train, lambda: tf.nn.dropout(dense, 0.5), lambda: dense) 33 | 34 | print("DENSE OUT:", dense) 35 | return dense 36 | 37 | def flatten(pre): 38 | """ Flattens the 2d kernel images into a single vector. Ignore the batch dimensionality.""" 39 | pre_shape = pre.get_shape().as_list() 40 | print("FLAT IN: ", pre) 41 | flat = tf.reshape(pre, [pre_shape[0], pre_shape[1] * pre_shape[2] * pre_shape[3]]) 42 | print("FLAT OUT: ", flat) 43 | return flat 44 | 45 | def conv2d(data, 46 | n_filters, 47 | scope, 48 | initializer, 49 | k_h=3, k_w=3, 50 | stride_h=1, stride_w=1, 51 | bias=True, 52 | padding='SAME'): 53 | """ Convolutional layer implementation without an activation function""" 54 | with tf.variable_scope(scope): 55 | print("CONV IN: ", data) 56 | #w = tf.get_variable('conv-weights', 57 | # [k_h, k_w, data.get_shape()[-1], n_filters], 58 | # initializer=initializer) 59 | w = tf.Variable(tf.random_normal([int(k_h), int(k_w), int(data.get_shape()[-1]), int(n_filters)], stddev=0.01), 60 | name='conv-weights') 61 | conv = tf.nn.conv2d(data, w, 62 | strides=[1, stride_h, stride_w, 1], 63 | padding=padding) 64 | b = tf.get_variable('conv-bias', 65 | [n_filters], 66 | initializer=tf.zeros_initializer) 67 | conv = tf.nn.bias_add(conv, b) 68 | print("CONV OUT: ", conv) 69 | return conv 70 | 71 | 72 | def batch_norm(x, n_out, phase_train, scope='bn'): 73 | """ 74 | Batch normalization on convolutional maps. 75 | Args: 76 | x: Tensor, 4D BHWD input maps 77 | n_out: integer, depth of input maps 78 | phase_train: boolean tf.Varialbe, true indicates training phase 79 | scope: string, variable scope 80 | Return: 81 | normed: batch-normalized maps 82 | 83 | Note: 84 | Source is http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow 85 | """ 86 | #print("BNORM IN: ", x) 87 | with tf.variable_scope(scope): 88 | beta = tf.Variable(tf.constant(0.0, shape=[n_out]), 89 | name='beta', trainable=True) 90 | gamma = tf.Variable(tf.constant(1.0, shape=[n_out]), 91 | name='gamma', trainable=True) 92 | batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') 93 | ema = tf.train.ExponentialMovingAverage(decay=0.5) 94 | 95 | def mean_var_with_update(): 96 | ema_apply_op = ema.apply([batch_mean, batch_var]) 97 | with tf.control_dependencies([ema_apply_op]): 98 | return tf.identity(batch_mean), tf.identity(batch_var) 99 | 100 | mean, var = tf.cond(phase_train, 101 | mean_var_with_update, 102 | lambda: (ema.average(batch_mean), ema.average(batch_var))) 103 | normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) 104 | 105 | #print("BNORM OUT:", normed) 106 | return normed 107 | 108 | def push_into_queue(value, queue, tag, step, writer): 109 | """Pushes new values into a queue of fixed length and writes the average of that queue into a summary operation.""" 110 | queue.pop() 111 | queue.appendleft(value) 112 | avg = np.mean(queue).item() 113 | avg_summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=avg)]) 114 | writer.add_summary(avg_summary, global_step=step) 115 | return avg -------------------------------------------------------------------------------- /libs/custom_ops.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def log_number_of_params(): 4 | total_parameters = 0 5 | for variable in tf.trainable_variables(): 6 | # shape is an array of tf.Dimension 7 | shape = variable.get_shape() 8 | #tf.logging.info('Shape: %s', shape) 9 | #tf.logging.info('shape length: %s', len(shape)) 10 | variable_parametes = 1 11 | for dim in shape: 12 | #tf.logging.info('dim: %s', dim) 13 | variable_parametes *= dim.value 14 | #tf.logging.info('variable params: %s', variable_parametes) 15 | total_parameters += variable_parametes 16 | tf.logging.info('Total number of parameters: %s', total_parameters) -------------------------------------------------------------------------------- /nets/bn_conv.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | def conv_block(data, n_filter, is_training, scope, stride=1): 5 | """An ConvBlock is a repetitive composition used in the model.""" 6 | with tf.variable_scope(scope): 7 | conv1 = slim.layers.conv2d(data, n_filter, [3, 3], stride=stride, padding='SAME', activation_fn=None, 8 | weights_initializer=tf.random_normal_initializer(stddev=0.01)) 9 | norm1 = slim.layers.batch_norm(conv1, scale=False, decay=0.9, epsilon=0.001, is_training=is_training) 10 | relu1 = tf.nn.relu(norm1) 11 | 12 | conv2 = slim.layers.conv2d(relu1, n_filter, [3, 3], stride=stride, padding='SAME', activation_fn=None, 13 | weights_initializer=tf.random_normal_initializer(stddev=0.01)) 14 | norm2 = slim.layers.batch_norm(conv2, scale=False, decay=0.9, epsilon=0.001, is_training=is_training) 15 | relu2 = tf.nn.relu(norm2) 16 | 17 | return relu2 18 | 19 | def inference(x, num_classes, is_training): 20 | """Defines the architecture and returns logits.""" 21 | block1 = conv_block(x, 64, is_training, "block1") 22 | pool1 = tf.nn.max_pool(block1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 23 | 24 | block2 = conv_block(pool1, 128, is_training, "block2") 25 | pool2 = tf.nn.max_pool(block2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 26 | 27 | block3 = conv_block(pool2, 256, is_training, "block3") 28 | pool3 = tf.nn.max_pool(block3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 29 | 30 | flat = slim.layers.flatten(pool3) 31 | 32 | dense1 = slim.layers.fully_connected(flat, 2048, tf.nn.relu, scope="dense1", 33 | weights_initializer=tf.random_normal_initializer(stddev=0.01)) 34 | dense1_dropout = slim.layers.dropout(dense1, is_training=is_training) 35 | 36 | dense2 = slim.layers.fully_connected(dense1_dropout, 2048, tf.nn.relu, scope="dense2", 37 | weights_initializer=tf.random_normal_initializer(stddev=0.01)) 38 | dense2_dropout = slim.layers.dropout(dense2, is_training=is_training) 39 | 40 | dense6 = slim.layers.fully_connected(dense2_dropout, num_classes, tf.nn.relu, scope="dense3", 41 | weights_initializer=tf.random_normal_initializer(stddev=0.01)) 42 | return dense6 -------------------------------------------------------------------------------- /nets/deep_roots.py: -------------------------------------------------------------------------------- 1 | """ResNet model implemented using slim components 2 | 3 | Related ResNet papers: 4 | https://arxiv.org/pdf/1603.05027v2.pdf 5 | https://arxiv.org/pdf/1512.03385v1.pdf 6 | https://arxiv.org/pdf/1605.07146v1.pdf 7 | """ 8 | from collections import namedtuple 9 | from libs import custom_ops 10 | 11 | import numpy as np 12 | import tensorflow as tf 13 | import tensorflow.contrib.slim as slim 14 | 15 | 16 | HParams = namedtuple('HParams', 17 | 'batch_size, num_classes, min_lrn_rate, lrn_rate, ' 18 | 'num_residual_units, use_bottleneck, weight_decay_rate, ' 19 | 'relu_leakiness, optimizer') 20 | 21 | 22 | class ResNet(object): 23 | """ResNet model.""" 24 | 25 | def __init__(self, hps, images, labels, mode): 26 | """ResNet constructor. 27 | 28 | Args: 29 | hps: Hyperparameters. 30 | images: Batches of images. [batch_size, image_size, image_size, 3] 31 | labels: Batches of labels. [batch_size, num_classes] 32 | mode: One of 'train' and 'eval'. 33 | """ 34 | self.hps = hps 35 | self._images = images 36 | self.labels = labels 37 | self.mode = mode 38 | 39 | self._extra_train_ops = [] 40 | 41 | def build_graph(self): 42 | """Build a whole graph for the model.""" 43 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 44 | self._build_model() 45 | custom_ops.log_number_of_params() 46 | if self.mode == 'train': 47 | self._build_train_op() 48 | self.summaries = tf.merge_all_summaries() 49 | 50 | def _build_model(self): 51 | """Build the core model within the graph.""" 52 | x = self._images 53 | tf.logging.info('Image Shape: %s', x.get_shape()) 54 | 55 | with tf.variable_scope('init'): 56 | x = self._conv('init_conv', x, 16, stride=1) 57 | 58 | tf.logging.info('Initial Output: %s', x.get_shape()) 59 | 60 | with tf.variable_scope('block1-split'): 61 | x_A, x_B = tf.split(3, 2, x) 62 | 63 | with tf.variable_scope('block1'): 64 | tf.logging.info("Block 1, input: %s", x.get_shape()) 65 | x_A = self.stage(x_A, self.hps.num_residual_units, 8, first_layer_stride=1, scope='A') 66 | x_B = self.stage(x_B, self.hps.num_residual_units, 8, first_layer_stride=1, scope='B') 67 | 68 | with tf.variable_scope('stage2-split'): 69 | x_AA, x_AB = tf.split(3, 2, x_A) 70 | x_BA, x_BB = tf.split(3, 2, x_B) 71 | 72 | with tf.variable_scope('block2'): 73 | tf.logging.info("Block 2, input: %s", x_A.get_shape()) 74 | x_AA = self.stage(x_AA, self.hps.num_residual_units, 8, first_layer_stride=2, scope='AA') 75 | x_AB = self.stage(x_AB, self.hps.num_residual_units, 8, first_layer_stride=2, scope='AB') 76 | x_BA = self.stage(x_BA, self.hps.num_residual_units, 8, first_layer_stride=2, scope='BA') 77 | x_BB = self.stage(x_BB, self.hps.num_residual_units, 8, first_layer_stride=2, scope='BB') 78 | 79 | with tf.variable_scope('stage2-split'): 80 | x_AAA, x_AAB = tf.split(3, 2, x_AA) 81 | x_ABA, x_ABB = tf.split(3, 2, x_AB) 82 | x_BAA, x_BAB = tf.split(3, 2, x_BA) 83 | x_BBA, x_BBB = tf.split(3, 2, x_BB) 84 | 85 | with tf.variable_scope('block3'): 86 | tf.logging.info("Block 3, input: %s", x_AA.get_shape()) 87 | x_AAA = self.stage(x_AAA, self.hps.num_residual_units, 8, first_layer_stride=2, scope='AAA') 88 | x_AAB = self.stage(x_AAB, self.hps.num_residual_units, 8, first_layer_stride=2, scope='AAB') 89 | x_ABA = self.stage(x_ABA, self.hps.num_residual_units, 8, first_layer_stride=2, scope='ABA') 90 | x_ABB = self.stage(x_ABB, self.hps.num_residual_units, 8, first_layer_stride=2, scope='ABB') 91 | x_BAA = self.stage(x_BAA, self.hps.num_residual_units, 8, first_layer_stride=2, scope='BAA') 92 | x_BAB = self.stage(x_BAB, self.hps.num_residual_units, 8, first_layer_stride=2, scope='BAB') 93 | x_BBA = self.stage(x_BBA, self.hps.num_residual_units, 8, first_layer_stride=2, scope='BBA') 94 | x_BBB = self.stage(x_BBB, self.hps.num_residual_units, 8, first_layer_stride=2, scope='BBB') 95 | 96 | with tf.variable_scope('unify'): 97 | x = tf.concat(3, [x_AAA, x_AAB, x_ABA, x_ABB, x_BAA, x_BAB, x_BBA, x_BBB]) 98 | tf.logging.info('Concat Output: %s', x.get_shape()) 99 | 100 | with tf.variable_scope('final'): 101 | x = self._conv('unify', x, 40, 1) 102 | x = self._batch_norm(x) 103 | x = self._relu(x, self.hps.relu_leakiness) 104 | # avg pool 105 | x = self._global_avg_pool(x) 106 | tf.logging.info('Final Layer Output: %s', x.get_shape()) 107 | 108 | with tf.variable_scope('logit'): 109 | x = slim.layers.flatten(x) 110 | tf.logging.info('Flatten Output: %s', x.get_shape()) 111 | self.logits = self._fully_connected(x, self.hps.num_classes) 112 | self.predictions = tf.nn.softmax(self.logits) 113 | 114 | with tf.variable_scope('costs'): 115 | xent = tf.nn.softmax_cross_entropy_with_logits( 116 | self.logits, self.labels) 117 | self.cost = tf.reduce_mean(xent, name='xent') 118 | self.cost += self._decay() 119 | 120 | tf.scalar_summary(self.mode + '/cost', self.cost) 121 | 122 | def stage(self, x, n_residuals, out_filter, first_layer_stride=2, scope='default_stage'): 123 | with tf.variable_scope(scope): 124 | tf.logging.info("Stage " + scope) 125 | with tf.variable_scope('residual_' + str(0)): 126 | x = self._residual(x, out_filter, stride=first_layer_stride) 127 | #x = self._highway(x, out_filter, bias_init=-2, stride=first_layer_stride) 128 | for i in range(1, n_residuals): 129 | with tf.variable_scope('residual_' + str(i)): 130 | x = self._residual(x, out_filter, stride=1) 131 | #x = self._highway(x, out_filter, bias_init=-2, stride=1) 132 | return x 133 | 134 | def _classic(self, x, out_filter, stride=1): 135 | x = self._batch_norm(x) 136 | x = self._relu(x, self.hps.relu_leakiness) 137 | x = self._conv('conv', x, out_filter, stride=stride) 138 | tf.logging.info('Classic Block Output: %s', x.get_shape()) 139 | return x 140 | 141 | def _residual(self, x, out_filter, stride=1): 142 | """Residual unit with 2 sub layers.""" 143 | orig_x = x 144 | 145 | with tf.variable_scope('sub1'): 146 | x = self._batch_norm(x) 147 | x = self._relu(x, self.hps.relu_leakiness) 148 | x = self._conv('conv1', x, out_filter, stride=stride) 149 | 150 | with tf.variable_scope('sub2'): 151 | x = self._batch_norm(x) 152 | x = self._relu(x, self.hps.relu_leakiness) 153 | x = self._conv('conv2', x, out_filter, stride=1) 154 | 155 | with tf.variable_scope('sub_add'): 156 | in_filter = orig_x.get_shape()[-1].value 157 | in_kernel_size = orig_x.get_shape()[-2].value 158 | out_kernel_size = x.get_shape()[-2].value 159 | if in_filter != out_filter or in_kernel_size != out_kernel_size: 160 | orig_x = tf.nn.avg_pool(orig_x, [1, stride, stride, 1], [1, stride, stride, 1], 'VALID') 161 | orig_x = tf.pad( 162 | orig_x, [[0, 0], [0, 0], [0, 0], 163 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 164 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 165 | x += orig_x 166 | 167 | tf.logging.info('Residual Block Output: %s', x.get_shape()) 168 | return x 169 | 170 | def _highway(self, x, out_filter, bias_init, stride=1): 171 | """Highway unit with 2 sub layers.""" 172 | orig_x = x 173 | 174 | with tf.variable_scope('sub1'): 175 | x = self._batch_norm(x) 176 | x = self._relu(x, self.hps.relu_leakiness) 177 | x = self._conv('conv1', x, out_filter, stride=stride) 178 | 179 | with tf.variable_scope('sub2'): 180 | x = self._batch_norm(x) 181 | x = self._relu(x, self.hps.relu_leakiness) 182 | x = self._conv('conv2', x, out_filter, stride=1) 183 | 184 | with tf.variable_scope('sub_add'): 185 | in_filter = orig_x.get_shape()[-1].value 186 | if in_filter != out_filter: 187 | orig_x = tf.nn.avg_pool(orig_x, [1, stride, stride, 1], [1, stride, stride, 1], 'VALID') 188 | orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], 189 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 190 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 191 | 192 | filter_size = 3 193 | n = filter_size * filter_size * out_filter 194 | T = slim.conv2d(x, out_filter, [3, 3], stride=1, 195 | weights_initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)), 196 | biases_initializer=tf.constant_initializer(bias_init), 197 | activation_fn=tf.nn.sigmoid, 198 | scope='transform_gate_1') 199 | 200 | # bias_init leads the network initially to be biased towards carry behaviour (i.e. T = 0) 201 | x = T * x + (1.0 - T) * orig_x 202 | 203 | tf.logging.info('Highway Block Output: %s', x.get_shape()) 204 | return x 205 | 206 | def _build_train_op(self): 207 | """Build training specific ops for the graph.""" 208 | self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32) 209 | tf.scalar_summary(self.mode + '/learning rate', self.lrn_rate) 210 | 211 | trainable_variables = tf.trainable_variables() 212 | grads = tf.gradients(self.cost, trainable_variables) 213 | 214 | if self.hps.optimizer == 'sgd': 215 | optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate) 216 | elif self.hps.optimizer == 'mom': 217 | #optimizer = tf.train.AdamOptimizer(0.001) 218 | #ooptimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9, use_nesterov=True) 219 | optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9) 220 | 221 | apply_op = optimizer.apply_gradients( 222 | zip(grads, trainable_variables), 223 | global_step=self.global_step, name='train_step') 224 | 225 | train_ops = [apply_op] + self._extra_train_ops + tf.get_collection(tf.GraphKeys.UPDATE_OPS) 226 | self.train_op = tf.group(*train_ops) 227 | 228 | def _decay(self): 229 | """L2 weight decay loss.""" 230 | costs = [] 231 | for var in tf.trainable_variables(): 232 | if var.op.name.find(r'weights') > 0: 233 | costs.append(tf.nn.l2_loss(var)) 234 | #tf.histogram_summary(self.mode + '/' + var.op.name, var) 235 | 236 | return tf.mul(self.hps.weight_decay_rate, tf.add_n(costs)) 237 | 238 | def _batch_norm(self, x): 239 | if self.mode == 'train': 240 | return slim.layers.batch_norm(x, scale=False, decay=0.9, scope='bn_2', is_training=True) 241 | else: 242 | return slim.layers.batch_norm(x, scale=False, decay=0.9, scope='bn_2', is_training=False) 243 | 244 | def _relu(self, x, leakiness=0.0): 245 | """Relu, with optional leaky support.""" 246 | return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') 247 | 248 | def _conv(self, name, x, out_filters, stride): 249 | filter_size = 3 250 | n = filter_size * filter_size * out_filters 251 | return slim.layers.conv2d(x, out_filters, [filter_size, filter_size], stride=stride, 252 | padding='SAME', activation_fn=None, 253 | weights_initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)), 254 | #weights_initializer=tf.random_normal_initializer(stddev=0.01), 255 | #weights_initializer=tf.contrib.layers.variance_scaling_initializer(), 256 | scope=name) 257 | 258 | def _fully_connected(self, x, out_dim): 259 | return slim.layers.fully_connected(x, out_dim, 260 | activation_fn=None, 261 | #weights_initializer=tf.uniform_unit_scaling_initializer(factor=1.0) 262 | weights_initializer=tf.uniform_unit_scaling_initializer(factor=1.0) 263 | #weights_initializer=tf.random_normal_initializer(stddev=0.01) 264 | #weights_initializer=tf.contrib.layers.variance_scaling_initializer() 265 | ) 266 | 267 | def _max_pool(self, x): 268 | x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 269 | tf.logging.info('Max-Pool Output: %s', x.get_shape()) 270 | return x 271 | 272 | def _global_avg_pool(self, x): 273 | assert x.get_shape().ndims == 4 274 | return tf.reduce_mean(x, [1, 2]) 275 | 276 | -------------------------------------------------------------------------------- /nets/highway_uniform.py: -------------------------------------------------------------------------------- 1 | """ResNet model implemented using slim components 2 | 3 | Related ResNet papers: 4 | https://arxiv.org/pdf/1603.05027v2.pdf 5 | https://arxiv.org/pdf/1512.03385v1.pdf 6 | https://arxiv.org/pdf/1605.07146v1.pdf 7 | """ 8 | from collections import namedtuple 9 | from libs import custom_ops 10 | 11 | import numpy as np 12 | import tensorflow as tf 13 | import tensorflow.contrib.slim as slim 14 | 15 | HParams = namedtuple('HParams', 16 | 'batch_size, num_classes, min_lrn_rate, lrn_rate, ' 17 | 'num_residual_units, use_bottleneck, weight_decay_rate, ' 18 | 'relu_leakiness, optimizer') 19 | 20 | 21 | class ResNet(object): 22 | """ResNet model.""" 23 | 24 | def __init__(self, hps, images, labels, mode): 25 | """ResNet constructor. 26 | 27 | Args: 28 | hps: Hyperparameters. 29 | images: Batches of images. [batch_size, image_size, image_size, 3] 30 | labels: Batches of labels. [batch_size, num_classes] 31 | mode: One of 'train' and 'eval'. 32 | """ 33 | self.hps = hps 34 | self._images = images 35 | self.labels = labels 36 | self.mode = mode 37 | 38 | self._extra_train_ops = [] 39 | 40 | def build_graph(self): 41 | """Build a whole graph for the model.""" 42 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 43 | self._build_model() 44 | custom_ops.log_number_of_params() 45 | if self.mode == 'train': 46 | self._build_train_op() 47 | self.summaries = tf.merge_all_summaries() 48 | 49 | def _build_model(self): 50 | """Build the core model within the graph.""" 51 | x = self._images 52 | tf.logging.info('Image Shape: %s', x.get_shape()) 53 | 54 | with tf.variable_scope('init'): 55 | x = self._conv('init_conv', x, 10, stride=1) 56 | 57 | tf.logging.info('Initial Output: %s', x.get_shape()) 58 | 59 | with tf.variable_scope('stage1'): 60 | tf.logging.info("Stage 1") 61 | x = self.stage(x, self.hps.num_residual_units, 10, first_layer_stride=1) 62 | 63 | with tf.variable_scope('stage2'): 64 | tf.logging.info("Stage 2") 65 | x = self.stage(x, self.hps.num_residual_units, 20, first_layer_stride=2) 66 | 67 | with tf.variable_scope('stage3'): 68 | tf.logging.info("Stage 3") 69 | x = self.stage(x, self.hps.num_residual_units, 40, first_layer_stride=2) 70 | 71 | 72 | with tf.variable_scope('final'): 73 | x = self._batch_norm(x) 74 | x = self._relu(x, self.hps.relu_leakiness) 75 | # avg pool 76 | x = self._global_avg_pool(x) 77 | 78 | with tf.variable_scope('logit'): 79 | x = slim.layers.flatten(x) 80 | tf.logging.info('Flatten Output: %s', x.get_shape()) 81 | self.logits = self._fully_connected(x, self.hps.num_classes) 82 | self.predictions = tf.nn.softmax(self.logits) 83 | 84 | with tf.variable_scope('costs'): 85 | xent = tf.nn.softmax_cross_entropy_with_logits( 86 | self.logits, self.labels) 87 | self.cost = tf.reduce_mean(xent, name='xent') 88 | self.cost += self._decay() 89 | 90 | tf.scalar_summary(self.mode + '/cost', self.cost) 91 | 92 | def stage(self, x, n_residuals, out_filter, first_layer_stride=2): 93 | #with tf.variable_scope("classic"): 94 | # x = self._classic(x, out_filter) 95 | with tf.variable_scope('residual_' + str(0)): 96 | x = self._highway(x, out_filter, bias_init=-2, stride=first_layer_stride) 97 | for i in range(1, n_residuals): 98 | with tf.variable_scope('residual_' + str(i)): 99 | x = self._highway(x, out_filter, bias_init=-2, stride=1) 100 | return x 101 | 102 | def _classic(self, x, out_filter, stride=1): 103 | x = self._batch_norm(x) 104 | x = self._relu(x, self.hps.relu_leakiness) 105 | x = self._conv('conv', x, out_filter, stride=stride) 106 | tf.logging.info('Classic Block Output: %s', x.get_shape()) 107 | return x 108 | 109 | def _residual(self, x, out_filter, stride=1): 110 | """Residual unit with 2 sub layers.""" 111 | orig_x = x 112 | 113 | with tf.variable_scope('sub1'): 114 | x = self._batch_norm(x) 115 | x = self._relu(x, self.hps.relu_leakiness) 116 | x = self._conv('conv1', x, out_filter, stride=stride) 117 | 118 | with tf.variable_scope('sub2'): 119 | x = self._batch_norm(x) 120 | x = self._relu(x, self.hps.relu_leakiness) 121 | x = self._conv('conv2', x, out_filter, stride=1) 122 | 123 | with tf.variable_scope('sub_add'): 124 | in_filter = orig_x.get_shape()[-1].value 125 | if in_filter != out_filter: 126 | orig_x = tf.nn.avg_pool(orig_x, [1, stride, stride, 1], [1, stride, stride, 1], 'VALID') 127 | orig_x = tf.pad( 128 | orig_x, [[0, 0], [0, 0], [0, 0], 129 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 130 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 131 | x += orig_x 132 | 133 | tf.logging.info('Residual Block Output: %s', x.get_shape()) 134 | return x 135 | 136 | def _highway(self, x, out_filter, bias_init, stride=1): 137 | """Highway unit with 2 sub layers.""" 138 | orig_x = x 139 | 140 | with tf.variable_scope('sub1'): 141 | x = self._batch_norm(x) 142 | x = self._relu(x, self.hps.relu_leakiness) 143 | x = self._conv('conv1', x, out_filter, stride=stride) 144 | 145 | with tf.variable_scope('sub2'): 146 | x = self._batch_norm(x) 147 | x = self._relu(x, self.hps.relu_leakiness) 148 | x = self._conv('conv2', x, out_filter, stride=1) 149 | 150 | with tf.variable_scope('sub_add'): 151 | in_filter = orig_x.get_shape()[-1].value 152 | if in_filter != out_filter: 153 | orig_x = tf.nn.avg_pool(orig_x, [1, stride, stride, 1], [1, stride, stride, 1], 'VALID') 154 | orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], 155 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 156 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 157 | 158 | filter_size = 3 159 | n = filter_size * filter_size * out_filter 160 | T = slim.conv2d(x, out_filter, [3, 3], stride=1, 161 | weights_initializer=tf.contrib.layers.xavier_initializer(), 162 | biases_initializer=tf.constant_initializer(bias_init), 163 | activation_fn=tf.nn.sigmoid, 164 | scope='transform_gate_1') 165 | 166 | # bias_init leads the network initially to be biased towards carry behaviour (i.e. T = 0) 167 | x = T * x + (1.0 - T) * orig_x 168 | 169 | tf.logging.info('Highway Block Output: %s', x.get_shape()) 170 | return x 171 | 172 | def _build_train_op(self): 173 | """Build training specific ops for the graph.""" 174 | self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32) 175 | tf.scalar_summary(self.mode + '/learning rate', self.lrn_rate) 176 | 177 | trainable_variables = tf.trainable_variables() 178 | grads = tf.gradients(self.cost, trainable_variables) 179 | 180 | if self.hps.optimizer == 'sgd': 181 | optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate) 182 | elif self.hps.optimizer == 'mom': 183 | #optimizer = tf.train.AdamOptimizer(0.001) 184 | #ooptimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9, use_nesterov=True) 185 | optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9) 186 | 187 | clipped_grads, _ = tf.clip_by_global_norm(grads, 1) 188 | apply_op = optimizer.apply_gradients( 189 | zip(clipped_grads, trainable_variables), 190 | global_step=self.global_step, name='train_step') 191 | 192 | train_ops = [apply_op] + self._extra_train_ops + tf.get_collection(tf.GraphKeys.UPDATE_OPS) 193 | self.train_op = tf.group(*train_ops) 194 | 195 | def _decay(self): 196 | """L2 weight decay loss.""" 197 | costs = [] 198 | for var in tf.trainable_variables(): 199 | if var.op.name.find(r'weights') > 0: 200 | costs.append(tf.nn.l2_loss(var)) 201 | #tf.histogram_summary(self.mode + '/' + var.op.name, var) 202 | 203 | return tf.mul(self.hps.weight_decay_rate, tf.add_n(costs)) 204 | 205 | def _batch_norm(self, x): 206 | if self.mode == 'train': 207 | return slim.layers.batch_norm(x, scale=False, decay=0.9, scope='bn_2', is_training=True) 208 | else: 209 | return slim.layers.batch_norm(x, scale=False, decay=0.9, scope='bn_2', is_training=False) 210 | 211 | def _relu(self, x, leakiness=0.0): 212 | """Relu, with optional leaky support.""" 213 | return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') 214 | 215 | def _conv(self, name, x, out_filters, stride): 216 | filter_size = 3 217 | n = filter_size * filter_size * out_filters 218 | return slim.layers.conv2d(x, out_filters, [filter_size, filter_size], stride=stride, 219 | padding='SAME', activation_fn=None, 220 | weights_initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)), 221 | #weights_initializer=tf.random_normal_initializer(stddev=0.01), 222 | #weights_initializer=tf.contrib.layers.variance_scaling_initializer(), 223 | scope=name) 224 | 225 | def _fully_connected(self, x, out_dim): 226 | return slim.layers.fully_connected(x, out_dim, 227 | activation_fn=None, 228 | #weights_initializer=tf.uniform_unit_scaling_initializer(factor=1.0) 229 | weights_initializer=tf.uniform_unit_scaling_initializer(factor=1.0) 230 | #weights_initializer=tf.random_normal_initializer(stddev=0.01) 231 | #weights_initializer=tf.contrib.layers.variance_scaling_initializer() 232 | ) 233 | 234 | def _max_pool(self, x): 235 | x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 236 | tf.logging.info('Max-Pool Output: %s', x.get_shape()) 237 | return x 238 | 239 | def _global_avg_pool(self, x): 240 | assert x.get_shape().ndims == 4 241 | return tf.reduce_mean(x, [1, 2]) -------------------------------------------------------------------------------- /nets/resnet_old_reference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """ResNet model. 17 | 18 | Related papers: 19 | https://arxiv.org/pdf/1603.05027v2.pdf 20 | https://arxiv.org/pdf/1512.03385v1.pdf 21 | https://arxiv.org/pdf/1605.07146v1.pdf 22 | """ 23 | from collections import namedtuple 24 | 25 | import numpy as np 26 | import tensorflow as tf 27 | 28 | from tensorflow.python.training import moving_averages 29 | 30 | 31 | HParams = namedtuple('HParams', 32 | 'batch_size, num_classes, min_lrn_rate, lrn_rate, ' 33 | 'num_residual_units, use_bottleneck, weight_decay_rate, ' 34 | 'relu_leakiness, optimizer') 35 | 36 | 37 | class ResNet(object): 38 | """ResNet model.""" 39 | 40 | def __init__(self, hps, images, labels, mode): 41 | """ResNet constructor. 42 | 43 | Args: 44 | hps: Hyperparameters. 45 | images: Batches of images. [batch_size, image_size, image_size, 3] 46 | labels: Batches of labels. [batch_size, num_classes] 47 | mode: One of 'train' and 'eval'. 48 | """ 49 | self.hps = hps 50 | self._images = images 51 | self.labels = labels 52 | self.mode = mode 53 | 54 | self._extra_train_ops = [] 55 | 56 | def build_graph(self): 57 | """Build a whole graph for the model.""" 58 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 59 | self._build_model() 60 | if self.mode == 'train': 61 | self._build_train_op() 62 | self.summaries = tf.merge_all_summaries() 63 | 64 | def _stride_arr(self, stride): 65 | """Map a stride scalar to the stride array for tf.nn.conv2d.""" 66 | return [1, stride, stride, 1] 67 | 68 | def _build_model(self): 69 | """Build the core model within the graph.""" 70 | with tf.variable_scope('init'): 71 | x = self._images 72 | x = self._conv('init_conv', x, 3, 3, 10, self._stride_arr(1)) 73 | 74 | strides = [1, 2, 2] 75 | activate_before_residual = [True, False, False] 76 | if self.hps.use_bottleneck: 77 | res_func = self._bottleneck_residual 78 | filters = [16, 64, 128, 256] 79 | else: 80 | res_func = self._residual 81 | filters = [10, 10, 20, 40] 82 | # Uncomment the following codes to use w28-10 wide residual network. 83 | # It is more memory efficient than very deep residual network and has 84 | # comparably good performance. 85 | # https://arxiv.org/pdf/1605.07146v1.pdf 86 | #filters = [16, 160, 320, 640] 87 | # Update hps.num_residual_units to 9 88 | 89 | with tf.variable_scope('unit_1_0'): 90 | tf.logging.info("unit_1") 91 | x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]), 92 | activate_before_residual[0]) 93 | for i in range(1, self.hps.num_residual_units): 94 | with tf.variable_scope('unit_1_%d' % i): 95 | x = res_func(x, filters[1], filters[1], self._stride_arr(1), False) 96 | 97 | with tf.variable_scope('unit_2_0'): 98 | tf.logging.info("unit_2") 99 | x = res_func(x, filters[1], filters[2], self._stride_arr(strides[1]), 100 | activate_before_residual[1]) 101 | for i in range(1, self.hps.num_residual_units): 102 | with tf.variable_scope('unit_2_%d' % i): 103 | x = res_func(x, filters[2], filters[2], self._stride_arr(1), False) 104 | 105 | with tf.variable_scope('unit_3_0'): 106 | tf.logging.info("unit_3") 107 | x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]), 108 | activate_before_residual[2]) 109 | for i in range(1, self.hps.num_residual_units): 110 | with tf.variable_scope('unit_3_%d' % i): 111 | x = res_func(x, filters[3], filters[3], self._stride_arr(1), False) 112 | 113 | with tf.variable_scope('unit_last'): 114 | x = self._batch_norm('final_bn', x) 115 | x = self._relu(x, self.hps.relu_leakiness) 116 | x = self._global_avg_pool(x) 117 | 118 | with tf.variable_scope('logit'): 119 | self.logits = self._fully_connected(x, self.hps.num_classes) 120 | self.predictions = tf.nn.softmax(self.logits) 121 | 122 | with tf.variable_scope('costs'): 123 | xent = tf.nn.softmax_cross_entropy_with_logits( 124 | self.logits, self.labels) 125 | self.cost = tf.reduce_mean(xent, name='xent') 126 | self.cost += self._decay() 127 | 128 | tf.scalar_summary('cost', self.cost) 129 | 130 | def _build_train_op(self): 131 | """Build training specific ops for the graph.""" 132 | self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32) 133 | tf.scalar_summary('learning rate', self.lrn_rate) 134 | 135 | trainable_variables = tf.trainable_variables() 136 | grads = tf.gradients(self.cost, trainable_variables) 137 | 138 | if self.hps.optimizer == 'sgd': 139 | optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate) 140 | elif self.hps.optimizer == 'mom': 141 | optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9) 142 | 143 | apply_op = optimizer.apply_gradients( 144 | zip(grads, trainable_variables), 145 | global_step=self.global_step, name='train_step') 146 | 147 | train_ops = [apply_op] + self._extra_train_ops 148 | self.train_op = tf.group(*train_ops) 149 | 150 | # TODO(xpan): Consider batch_norm in contrib/layers/python/layers/layers.py 151 | def _batch_norm(self, name, x): 152 | """Batch normalization.""" 153 | with tf.variable_scope(name): 154 | params_shape = [x.get_shape()[-1]] 155 | 156 | beta = tf.get_variable( 157 | 'beta', params_shape, tf.float32, 158 | initializer=tf.constant_initializer(0.0, tf.float32)) 159 | gamma = tf.get_variable( 160 | 'gamma', params_shape, tf.float32, 161 | initializer=tf.constant_initializer(1.0, tf.float32)) 162 | 163 | if self.mode == 'train': 164 | mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') 165 | 166 | moving_mean = tf.get_variable( 167 | 'moving_mean', params_shape, tf.float32, 168 | initializer=tf.constant_initializer(0.0, tf.float32), 169 | trainable=False) 170 | moving_variance = tf.get_variable( 171 | 'moving_variance', params_shape, tf.float32, 172 | initializer=tf.constant_initializer(1.0, tf.float32), 173 | trainable=False) 174 | 175 | self._extra_train_ops.append(moving_averages.assign_moving_average( 176 | moving_mean, mean, 0.9)) 177 | self._extra_train_ops.append(moving_averages.assign_moving_average( 178 | moving_variance, variance, 0.9)) 179 | else: 180 | mean = tf.get_variable( 181 | 'moving_mean', params_shape, tf.float32, 182 | initializer=tf.constant_initializer(0.0, tf.float32), 183 | trainable=False) 184 | variance = tf.get_variable( 185 | 'moving_variance', params_shape, tf.float32, 186 | initializer=tf.constant_initializer(1.0, tf.float32), 187 | trainable=False) 188 | tf.histogram_summary(mean.op.name, mean) 189 | tf.histogram_summary(variance.op.name, variance) 190 | # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net. 191 | y = tf.nn.batch_normalization( 192 | x, mean, variance, beta, gamma, 0.001) 193 | y.set_shape(x.get_shape()) 194 | return y 195 | 196 | def _residual(self, x, in_filter, out_filter, stride, 197 | activate_before_residual=False): 198 | """Residual unit with 2 sub layers.""" 199 | if activate_before_residual: 200 | with tf.variable_scope('shared_activation'): 201 | x = self._batch_norm('init_bn', x) 202 | x = self._relu(x, self.hps.relu_leakiness) 203 | orig_x = x 204 | else: 205 | with tf.variable_scope('residual_only_activation'): 206 | orig_x = x 207 | x = self._batch_norm('init_bn', x) 208 | x = self._relu(x, self.hps.relu_leakiness) 209 | 210 | with tf.variable_scope('sub1'): 211 | x = self._conv('conv1', x, 3, in_filter, out_filter, stride) 212 | 213 | with tf.variable_scope('sub2'): 214 | x = self._batch_norm('bn2', x) 215 | x = self._relu(x, self.hps.relu_leakiness) 216 | x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1]) 217 | 218 | with tf.variable_scope('sub_add'): 219 | if in_filter != out_filter: 220 | orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID') 221 | orig_x = tf.pad( 222 | orig_x, [[0, 0], [0, 0], [0, 0], 223 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 224 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 225 | x += orig_x 226 | 227 | tf.logging.info('image after unit %s', x.get_shape()) 228 | return x 229 | 230 | def _bottleneck_residual(self, x, in_filter, out_filter, stride, 231 | activate_before_residual=False): 232 | """Bottleneck resisual unit with 3 sub layers.""" 233 | if activate_before_residual: 234 | with tf.variable_scope('common_bn_relu'): 235 | x = self._batch_norm('init_bn', x) 236 | x = self._relu(x, self.hps.relu_leakiness) 237 | orig_x = x 238 | else: 239 | with tf.variable_scope('residual_bn_relu'): 240 | orig_x = x 241 | x = self._batch_norm('init_bn', x) 242 | x = self._relu(x, self.hps.relu_leakiness) 243 | 244 | with tf.variable_scope('sub1'): 245 | x = self._conv('conv1', x, 1, in_filter, out_filter/4, stride) 246 | 247 | with tf.variable_scope('sub2'): 248 | x = self._batch_norm('bn2', x) 249 | x = self._relu(x, self.hps.relu_leakiness) 250 | x = self._conv('conv2', x, 3, out_filter/4, out_filter/4, [1, 1, 1, 1]) 251 | 252 | with tf.variable_scope('sub3'): 253 | x = self._batch_norm('bn3', x) 254 | x = self._relu(x, self.hps.relu_leakiness) 255 | x = self._conv('conv3', x, 1, out_filter/4, out_filter, [1, 1, 1, 1]) 256 | 257 | with tf.variable_scope('sub_add'): 258 | if in_filter != out_filter: 259 | orig_x = self._conv('project', orig_x, 1, in_filter, out_filter, stride) 260 | x += orig_x 261 | 262 | tf.logging.info('image after unit %s', x.get_shape()) 263 | return x 264 | 265 | def _decay(self): 266 | """L2 weight decay loss.""" 267 | costs = [] 268 | for var in tf.trainable_variables(): 269 | if var.op.name.find(r'DW') > 0: 270 | costs.append(tf.nn.l2_loss(var)) 271 | # tf.histogram_summary(var.op.name, var) 272 | 273 | return tf.mul(self.hps.weight_decay_rate, tf.add_n(costs)) 274 | 275 | def _conv(self, name, x, filter_size, in_filters, out_filters, strides): 276 | """Convolution.""" 277 | with tf.variable_scope(name): 278 | n = filter_size * filter_size * out_filters 279 | kernel = tf.get_variable( 280 | 'DW', [filter_size, filter_size, in_filters, out_filters], 281 | tf.float32, initializer=tf.random_normal_initializer( 282 | stddev=np.sqrt(2.0/n))) 283 | return tf.nn.conv2d(x, kernel, strides, padding='SAME') 284 | 285 | def _relu(self, x, leakiness=0.0): 286 | """Relu, with optional leaky support.""" 287 | return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') 288 | 289 | def _fully_connected(self, x, out_dim): 290 | """FullyConnected layer for final output.""" 291 | x = tf.reshape(x, [self.hps.batch_size, -1]) 292 | w = tf.get_variable( 293 | 'DW', [x.get_shape()[1], out_dim], 294 | initializer=tf.uniform_unit_scaling_initializer(factor=1.0)) 295 | b = tf.get_variable('biases', [out_dim], 296 | initializer=tf.constant_initializer()) 297 | return tf.nn.xw_plus_b(x, w, b) 298 | 299 | def _global_avg_pool(self, x): 300 | assert x.get_shape().ndims == 4 301 | return tf.reduce_mean(x, [1, 2]) -------------------------------------------------------------------------------- /nets/resnet_uniform.py: -------------------------------------------------------------------------------- 1 | """ResNet model implemented using slim components 2 | 3 | Related ResNet papers: 4 | https://arxiv.org/pdf/1603.05027v2.pdf 5 | https://arxiv.org/pdf/1512.03385v1.pdf 6 | https://arxiv.org/pdf/1605.07146v1.pdf 7 | """ 8 | from collections import namedtuple 9 | from libs import custom_ops 10 | 11 | import numpy as np 12 | import tensorflow as tf 13 | import tensorflow.contrib.slim as slim 14 | 15 | HParams = namedtuple('HParams', 16 | 'batch_size, num_classes, min_lrn_rate, lrn_rate, ' 17 | 'num_residual_units, use_bottleneck, weight_decay_rate, ' 18 | 'relu_leakiness, optimizer') 19 | 20 | 21 | class ResNet(object): 22 | """ResNet model.""" 23 | 24 | def __init__(self, hps, images, labels, mode): 25 | """ResNet constructor. 26 | 27 | Args: 28 | hps: Hyperparameters. 29 | images: Batches of images. [batch_size, image_size, image_size, 3] 30 | labels: Batches of labels. [batch_size, num_classes] 31 | mode: One of 'train' and 'eval'. 32 | """ 33 | self.hps = hps 34 | self._images = images 35 | self.labels = labels 36 | self.mode = mode 37 | 38 | self._extra_train_ops = [] 39 | 40 | def build_graph(self): 41 | """Build a whole graph for the model.""" 42 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 43 | self._build_model() 44 | custom_ops.log_number_of_params() 45 | if self.mode == 'train': 46 | self._build_train_op() 47 | self.summaries = tf.merge_all_summaries() 48 | 49 | def _build_model(self): 50 | """Build the core model within the graph.""" 51 | x = self._images 52 | tf.logging.info('Image Shape: %s', x.get_shape()) 53 | 54 | with tf.variable_scope('init'): 55 | x = self._conv('init_conv', x, 10, stride=1) 56 | 57 | tf.logging.info('Initial Output: %s', x.get_shape()) 58 | 59 | with tf.variable_scope('stage1'): 60 | tf.logging.info("Stage 1") 61 | x = self.stage(x, self.hps.num_residual_units, 10, first_layer_stride=1) 62 | 63 | #x = self._max_pool(x) 64 | 65 | with tf.variable_scope('stage2'): 66 | tf.logging.info("Stage 2") 67 | x = self.stage(x, self.hps.num_residual_units, 20, first_layer_stride=2) 68 | 69 | #x = self._max_pool(x) 70 | 71 | with tf.variable_scope('stage3'): 72 | tf.logging.info("Stage 3") 73 | x = self.stage(x, self.hps.num_residual_units, 40, first_layer_stride=2) 74 | 75 | # snip 76 | #x = self._max_pool(x) 77 | """ 78 | with tf.variable_scope('stage4'): 79 | tf.logging.info("Stage 4") 80 | x = self.stage(x, self.hps.num_residual_units, 64) 81 | 82 | with tf.variable_scope('stage5'): 83 | tf.logging.info("Stage 5") 84 | x = self.stage(x, self.hps.num_residual_units, 64) 85 | 86 | with tf.variable_scope('stage6'): 87 | tf.logging.info("Stage 6") 88 | x = self.stage(x, self.hps.num_residual_units, 64) 89 | 90 | 91 | with tf.variable_scope('stage7'): 92 | tf.logging.info("Stage 7") 93 | x = self.stage(x, self.hps.num_residual_units, 64) 94 | 95 | with tf.variable_scope('stage8'): 96 | tf.logging.info("Stage 8") 97 | x = self.stage(x, self.hps.num_residual_units, 64) 98 | 99 | with tf.variable_scope('stage9'): 100 | tf.logging.info("Stage 9") 101 | x = self.stage(x, self.hps.num_residual_units, 64) 102 | 103 | with tf.variable_scope('stage10'): 104 | tf.logging.info("Stage 10") 105 | x = self.stage(x, self.hps.num_residual_units, 64) 106 | """ 107 | 108 | with tf.variable_scope('final'): 109 | x = self._batch_norm(x) 110 | x = self._relu(x, self.hps.relu_leakiness) 111 | #x = self._max_pool(x) 112 | # avg pool 113 | x = self._global_avg_pool(x) 114 | 115 | with tf.variable_scope('logit'): 116 | x = slim.layers.flatten(x) 117 | tf.logging.info('Flatten Output: %s', x.get_shape()) 118 | self.logits = self._fully_connected(x, self.hps.num_classes) 119 | self.predictions = tf.nn.softmax(self.logits) 120 | 121 | with tf.variable_scope('costs'): 122 | xent = tf.nn.softmax_cross_entropy_with_logits( 123 | self.logits, self.labels) 124 | self.cost = tf.reduce_mean(xent, name='xent') 125 | self.cost += self._decay() 126 | 127 | tf.scalar_summary(self.mode + '/cost', self.cost) 128 | 129 | def stage(self, x, n_residuals, out_filter, first_layer_stride=2): 130 | #with tf.variable_scope("classic"): 131 | # x = self._classic(x, out_filter) 132 | with tf.variable_scope('residual_' + str(0)): 133 | x = self._residual(x, out_filter, stride=first_layer_stride) 134 | for i in range(1, n_residuals): 135 | with tf.variable_scope('residual_' + str(i)): 136 | x = self._residual(x, out_filter, stride=1) 137 | return x 138 | 139 | def _classic(self, x, out_filter, stride=1): 140 | x = self._batch_norm(x) 141 | x = self._relu(x, self.hps.relu_leakiness) 142 | x = self._conv('conv', x, out_filter, stride=stride) 143 | tf.logging.info('Classic Block Output: %s', x.get_shape()) 144 | return x 145 | 146 | def _residual(self, x, out_filter, stride=1): 147 | """Residual unit with 2 sub layers.""" 148 | orig_x = x 149 | 150 | with tf.variable_scope('sub1'): 151 | x = self._batch_norm(x) 152 | x = self._relu(x, self.hps.relu_leakiness) 153 | x = self._conv('conv1', x, out_filter, stride=stride) 154 | 155 | with tf.variable_scope('sub2'): 156 | x = self._batch_norm(x) 157 | x = self._relu(x, self.hps.relu_leakiness) 158 | x = self._conv('conv2', x, out_filter, stride=1) 159 | 160 | with tf.variable_scope('sub_add'): 161 | in_filter = orig_x.get_shape()[-1].value 162 | if in_filter != out_filter: 163 | orig_x = tf.nn.avg_pool(orig_x, [1, stride, stride, 1], [1, stride, stride, 1], 'VALID') 164 | orig_x = tf.pad( 165 | orig_x, [[0, 0], [0, 0], [0, 0], 166 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 167 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 168 | x += orig_x 169 | 170 | tf.logging.info('Residual Block Output: %s', x.get_shape()) 171 | return x 172 | 173 | def _highway(self, x, out_filter, bias_init, stride=1): 174 | """Residual unit with 2 sub layers.""" 175 | orig_x = x 176 | 177 | with tf.variable_scope('sub1'): 178 | x = self._batch_norm(x) 179 | x = self._relu(x, self.hps.relu_leakiness) 180 | x = self._conv('conv1', x, out_filter, stride=stride) 181 | 182 | with tf.variable_scope('sub2'): 183 | x = self._batch_norm(x) 184 | x = self._relu(x, self.hps.relu_leakiness) 185 | x = self._conv('conv2', x, out_filter, stride=1) 186 | 187 | with tf.variable_scope('sub_add'): 188 | in_filter = orig_x.get_shape()[-1].value 189 | if in_filter != out_filter: 190 | orig_x = tf.nn.avg_pool(orig_x, [1, stride, stride, 1], [1, stride, stride, 1], 'VALID') 191 | orig_x = tf.pad(orig_x, [[0, 0], [0, 0], [0, 0], 192 | [(out_filter-in_filter)//2, (out_filter-in_filter)//2]]) 193 | tf.logging.info("avg pooling to fit dimensions. Add out: %s", x.get_shape()) 194 | 195 | filter_size = 3 196 | n = filter_size * filter_size * out_filter 197 | T = slim.conv2d(x, out_filter, [3, 3], stride=stride, 198 | weights_initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)), 199 | biases_initializer=tf.constant_initializer(bias_init), 200 | activation_fn=tf.nn.sigmoid, 201 | scope='transform_gate_1') 202 | 203 | # bias_init leads the network initially to be biased towards carry behaviour (i.e. T = 0) 204 | x = T * x + (1.0 - T) * orig_x 205 | 206 | tf.logging.info('Highway Block Output: %s', x.get_shape()) 207 | return x 208 | 209 | def _build_train_op(self): 210 | """Build training specific ops for the graph.""" 211 | self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32) 212 | tf.scalar_summary(self.mode + '/learning rate', self.lrn_rate) 213 | 214 | trainable_variables = tf.trainable_variables() 215 | grads = tf.gradients(self.cost, trainable_variables) 216 | 217 | if self.hps.optimizer == 'sgd': 218 | optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate) 219 | elif self.hps.optimizer == 'mom': 220 | #optimizer = tf.train.AdamOptimizer(0.001) 221 | #ooptimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9, use_nesterov=True) 222 | optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9) 223 | 224 | apply_op = optimizer.apply_gradients( 225 | zip(grads, trainable_variables), 226 | global_step=self.global_step, name='train_step') 227 | 228 | train_ops = [apply_op] + self._extra_train_ops + tf.get_collection(tf.GraphKeys.UPDATE_OPS) 229 | self.train_op = tf.group(*train_ops) 230 | 231 | def _decay(self): 232 | """L2 weight decay loss.""" 233 | costs = [] 234 | for var in tf.trainable_variables(): 235 | if var.op.name.find(r'weights') > 0: 236 | costs.append(tf.nn.l2_loss(var)) 237 | #tf.histogram_summary(self.mode + '/' + var.op.name, var) 238 | 239 | return tf.mul(self.hps.weight_decay_rate, tf.add_n(costs)) 240 | 241 | def _batch_norm(self, x): 242 | if self.mode == 'train': 243 | return slim.layers.batch_norm(x, scale=False, decay=0.9, scope='bn_2', is_training=True) 244 | else: 245 | return slim.layers.batch_norm(x, scale=False, decay=0.9, scope='bn_2', is_training=False) 246 | 247 | def _relu(self, x, leakiness=0.0): 248 | """Relu, with optional leaky support.""" 249 | return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') 250 | 251 | def _conv(self, name, x, out_filters, stride): 252 | filter_size = 3 253 | n = filter_size * filter_size * out_filters 254 | return slim.layers.conv2d(x, out_filters, [filter_size, filter_size], stride=stride, 255 | padding='SAME', activation_fn=None, 256 | weights_initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)), 257 | #weights_initializer=tf.random_normal_initializer(stddev=0.01), 258 | #weights_initializer=tf.contrib.layers.variance_scaling_initializer(), 259 | scope=name) 260 | 261 | def _fully_connected(self, x, out_dim): 262 | return slim.layers.fully_connected(x, out_dim, 263 | activation_fn=None, 264 | #weights_initializer=tf.uniform_unit_scaling_initializer(factor=1.0) 265 | weights_initializer=tf.uniform_unit_scaling_initializer(factor=1.0) 266 | #weights_initializer=tf.random_normal_initializer(stddev=0.01) 267 | #weights_initializer=tf.contrib.layers.variance_scaling_initializer() 268 | ) 269 | 270 | def _max_pool(self, x): 271 | x = tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 272 | tf.logging.info('Max-Pool Output: %s', x.get_shape()) 273 | return x 274 | 275 | def _global_avg_pool(self, x): 276 | assert x.get_shape().ndims == 4 277 | return tf.reduce_mean(x, [1, 2]) 278 | 279 | -------------------------------------------------------------------------------- /pip3_requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.11.2 2 | pickleshare==0.7.4 3 | protobuf==3.0.0 4 | scipy==0.18.1 5 | six==1.10.0 6 | -------------------------------------------------------------------------------- /slim_deep_roots_eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | import math 7 | from datetime import datetime 8 | 9 | from datasets.cifar10 import cifar10_data 10 | from datasets.cifar100 import cifar100_data 11 | 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | log_dir = "logs/cifar10/deep_roots/" 16 | eval_dir = log_dir 17 | batch_size = 128 18 | num_classes = 10 19 | epoch_size = 10000.0 20 | num_iter = int(math.ceil(epoch_size/batch_size)) 21 | load_latest_checkpoint = False 22 | 23 | eval_interval_secs = 3 24 | run_once = False 25 | 26 | tf.logging.set_verbosity(tf.logging.INFO) 27 | sess = tf.Session() 28 | 29 | ## Data 30 | with tf.device('/cpu:0'): 31 | d = cifar10_data(batch_size=batch_size, sess=sess) 32 | image_batch_tensor, target_batch_tensor = d.build_test_data_tensor(shuffle=False, augmentation=False) 33 | 34 | ## Model 35 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #from tensorflow.contrib.slim.nets import resnet_v2 37 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 38 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 39 | # num_classes=num_classes, 40 | # global_pool=True)# reduce output to rank 2 (not working) 41 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 42 | import nets.deep_roots 43 | hps = nets.deep_roots.HParams(batch_size=batch_size, 44 | num_classes=num_classes, 45 | min_lrn_rate=None, 46 | lrn_rate=None, 47 | num_residual_units=4, 48 | use_bottleneck=False, 49 | weight_decay_rate=0.0002, 50 | relu_leakiness=0.1, 51 | optimizer='mom') 52 | model = nets.deep_roots.ResNet(hps, image_batch_tensor, target_batch_tensor, 'eval') 53 | model.build_graph() 54 | 55 | ## Losses and Accuracies 56 | 57 | ## Optimizer 58 | 59 | ## Summaries 60 | 61 | ## Initialization 62 | saver = tf.train.Saver(max_to_keep=10000000) 63 | summary_writer = tf.train.SummaryWriter(eval_dir) 64 | coord = tf.train.Coordinator() 65 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 66 | sess.run(tf.global_variables_initializer()) 67 | 68 | 69 | def _eval_model_checkpoint(model_checkpoint_path): 70 | if model_checkpoint_path: 71 | tf.logging.info("Restoring from checkpoint %s" % model_checkpoint_path) 72 | saver.restore(sess, model_checkpoint_path) 73 | else: 74 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 75 | return 76 | 77 | # Counts the number of correct predictions. 78 | total_loss = 0.0 79 | total_sample_count = num_iter * batch_size 80 | step = 0 81 | global_step = model_checkpoint_path.split('/')[-1].split('-')[-1] 82 | 83 | tf.logging.info('%s: starting evaluation.' % (datetime.now())) 84 | start_time = time.time() 85 | correct_prediction, total_prediction = 0, 0 86 | while step < num_iter and not coord.should_stop(): 87 | loss_value, predictions, truth = sess.run([model.cost, model.predictions, model.labels]) 88 | 89 | total_loss += np.sum(loss_value) 90 | truth = np.argmax(truth, axis=1) 91 | predictions = np.argmax(predictions, axis=1) 92 | correct_prediction += np.sum(truth == predictions) 93 | total_prediction += predictions.shape[0] 94 | 95 | step += 1 96 | if step % 200 == 0: 97 | duration = time.time() - start_time 98 | sec_per_batch = duration / 20.0 99 | examples_per_sec = batch_size / sec_per_batch 100 | tf.logging.info('[%d batches out of %d] (%.1f examples/sec; %.3f' 101 | 'sec/batch)' % (step, num_iter, examples_per_sec, sec_per_batch)) 102 | start_time = time.time() 103 | 104 | # compute test set accuracy 105 | accuracy = correct_prediction * 100.0 / total_prediction 106 | avg_loss = total_loss / total_sample_count 107 | 108 | tf.logging.info('%s: top_1_acc: %6.3f%%, avg_loss: %.7f [%d examples]' % 109 | (global_step, accuracy, avg_loss, total_sample_count)) 110 | 111 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="test/accuracy", simple_value=accuracy)]) 112 | avg_loss_summary = tf.Summary(value=[tf.Summary.Value(tag="test/avg_loss", simple_value=avg_loss)]) 113 | summary_writer.add_summary(accuracy_sum, global_step) 114 | summary_writer.add_summary(avg_loss_summary, global_step) 115 | summary_writer.flush() 116 | 117 | ## Eval 118 | if run_once: 119 | ckpt = tf.train.get_checkpoint_state(log_dir) 120 | if ckpt and ckpt.model_checkpoint_path: 121 | _eval_model_checkpoint(ckpt.model_checkpoint_path) 122 | else: 123 | tf.logging.error('No checkpoint file found') 124 | exit() 125 | 126 | else: 127 | done = [] 128 | while True: 129 | tf.logging.info("checking for new models in %s ... " % log_dir) 130 | ckpt = tf.train.get_checkpoint_state(log_dir) 131 | if ckpt and ckpt.model_checkpoint_path: 132 | for path in ckpt.all_model_checkpoint_paths: 133 | if not path in done: 134 | done.append(path) 135 | _eval_model_checkpoint(path) 136 | else: 137 | tf.logging.error('No checkpoint file found') 138 | time.sleep(eval_interval_secs) 139 | 140 | print("done!") 141 | -------------------------------------------------------------------------------- /slim_deep_roots_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | from collections import deque 7 | 8 | from datasets.cifar10 import cifar10_data 9 | from datasets.cifar100 import cifar100_data 10 | 11 | from libs import components 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | tf.logging.set_verbosity(tf.logging.INFO) 16 | 17 | log_dir = "logs/cifar10/deep_roots/" 18 | batch_size = 64 19 | num_classes = 10 20 | epoch_in_steps = int(50000.0/batch_size) 21 | max_step = epoch_in_steps * 15 22 | load_latest_checkpoint = False 23 | step = 0 24 | lrn_rate = 0.1 25 | 26 | sess = tf.Session() 27 | 28 | ## Data 29 | with tf.device('/cpu:0'): 30 | d = cifar10_data(batch_size=batch_size, sess=sess) 31 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor(shuffle=True, augmentation=True) 32 | 33 | ## Model 34 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 35 | #logits = highway_test.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #from tensorflow.contrib.slim.nets import resnet_v2 37 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 38 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 39 | # num_classes=num_classes, 40 | # global_pool=True)# reduce output to rank 2 (not working) 41 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 42 | 43 | #import nets.resnet 44 | import nets.deep_roots 45 | hps = nets.deep_roots.HParams(batch_size=batch_size, 46 | num_classes=num_classes, 47 | min_lrn_rate=0.0001, 48 | lrn_rate=0.1, 49 | num_residual_units=4, 50 | use_bottleneck=False, 51 | weight_decay_rate=0.0002, 52 | relu_leakiness=0.1, 53 | optimizer='mom') 54 | model = nets.deep_roots.ResNet(hps, image_batch_tensor, target_batch_tensor, 'train') 55 | model.build_graph() 56 | 57 | ## Losses and Accuracies 58 | avg_loss_queue = deque(epoch_in_steps * [0]) 59 | avg_top1_queue = deque(epoch_in_steps * [0]) 60 | 61 | ## Optimizer 62 | 63 | ## Summaries 64 | summary_op = model.summaries 65 | 66 | ## Initialization 67 | saver = tf.train.Saver(max_to_keep=10000000) 68 | summary_writer = tf.train.SummaryWriter(log_dir, graph=sess.graph) 69 | coord = tf.train.Coordinator() 70 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 71 | sess.run(tf.global_variables_initializer()) 72 | 73 | ## Load Pretrained 74 | if load_latest_checkpoint: 75 | checkpoint = tf.train.latest_checkpoint(log_dir) 76 | if checkpoint: 77 | tf.logging.info("Restoring from checkpoint %s" % checkpoint) 78 | saver.restore(sess, checkpoint) 79 | step = sess.run(model.global_step) 80 | else: 81 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 82 | exit() 83 | 84 | ## Train 85 | tf.logging.info('start training ...') 86 | total_start_time = time.time() 87 | while not coord.should_stop(): 88 | start_time = time.time() 89 | correct_prediction, total_prediction = 0, 0 90 | (_, summaries, loss, train_step, predictions, truth) = sess.run( 91 | [model.train_op, summary_op, model.cost, model.global_step, model.predictions, model.labels], 92 | feed_dict={model.lrn_rate: lrn_rate}) 93 | 94 | if train_step < 20000: # 40000 95 | lrn_rate = 0.1 96 | elif train_step < 40000: # 60000 97 | lrn_rate = 0.01 98 | elif train_step < 50000: # 80000 99 | lrn_rate = 0.001 100 | else: 101 | lrn_rate = 0.0001 102 | 103 | duration = time.time() - start_time 104 | truth = np.argmax(truth, axis=1) 105 | predictions = np.argmax(predictions, axis=1) 106 | accuracy = np.sum(truth == predictions) * 100 / batch_size 107 | avg_accuracy = components.push_into_queue(accuracy, avg_top1_queue, "train/avg_accuracy", train_step, summary_writer) 108 | avg_loss = components.push_into_queue(loss, avg_loss_queue, "train/avg_loss", train_step, summary_writer) 109 | 110 | if step % 100 == 0: 111 | total_duration = (time.time() - total_start_time) / 60.0 112 | examples_per_sec = batch_size / float(duration) 113 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="train/accuracy", simple_value=accuracy)]) 114 | 115 | if step == 500: 116 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f, top-1: %5.2f%% (%.1f examples/sec; %.3f sec/batch)') 117 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, accuracy, examples_per_sec, duration)) 118 | else: 119 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f (%4.3f), top-1: %5.2f%% (%5.2f%%)') 120 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, avg_loss, accuracy, avg_accuracy)) 121 | 122 | summary_writer.add_summary(accuracy_sum, train_step) 123 | summary_writer.add_summary(summaries, train_step) 124 | summary_writer.flush() 125 | 126 | if step % 1000 == 0: 127 | tf.logging.info("saving checkpoint") 128 | checkpoint_path = os.path.join(log_dir, 'model.ckpt') 129 | saver.save(sess, checkpoint_path, global_step=model.global_step) 130 | 131 | step += 1 132 | 133 | coord.join(threads) 134 | 135 | print("done!") 136 | -------------------------------------------------------------------------------- /slim_eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | import math 7 | from datetime import datetime 8 | 9 | from datasets.cifar10 import cifar10_data 10 | from datasets.cifar100 import cifar100_data 11 | 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | log_dir = "logs/cifar10/8l_conv/" 16 | eval_dir = "logs/cifar10/8l_conv/" 17 | batch_size = 64 18 | num_classes = 10 19 | epoch_size = 10000.0 20 | num_iter = int(math.ceil(epoch_size/batch_size)) 21 | eval_interval_secs = 10 22 | run_once = False 23 | 24 | sess = tf.Session() 25 | 26 | ## Data 27 | with tf.device('/cpu:0'): 28 | d = cifar10_data(batch_size=batch_size, sess=sess) 29 | image_batch_tensor, target_batch_tensor = d.build_test_data_tensor(shuffle=False) 30 | 31 | ## Model 32 | logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 33 | #from tensorflow.contrib.slim.nets import resnet_v2 34 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 35 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 36 | # num_classes=num_classes, 37 | # global_pool=True)# reduce output to rank 2 (not working) 38 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 39 | 40 | ## Losses and Accuracies 41 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, 42 | tf.cast(target_batch_tensor, tf.float32), 43 | name="cross-entropy") 44 | loss = tf.reduce_mean(cross_entropy, name='loss') 45 | 46 | top_1_correct = tf.nn.in_top_k(logits, tf.argmax(target_batch_tensor, 1), 1) 47 | top_5_correct = tf.nn.in_top_k(logits, tf.argmax(target_batch_tensor, 1), 5) 48 | 49 | ## Initialization 50 | saver = tf.train.Saver(max_to_keep=10000000) 51 | summary_writer = tf.train.SummaryWriter(eval_dir) 52 | coord = tf.train.Coordinator() 53 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 54 | sess.run(tf.global_variables_initializer()) 55 | 56 | def _eval_model_checkpoint(model_checkpoint_path): 57 | if model_checkpoint_path: 58 | print("Restoring from checkpoint", model_checkpoint_path) 59 | saver.restore(sess, model_checkpoint_path) 60 | else: 61 | print("Couldn't find checkpoint to restore from. Exiting.") 62 | return 63 | 64 | # Counts the number of correct predictions. 65 | count_top_1 = 0.0 66 | count_top_5 = 0.0 67 | count_avg_loss = 0.0 68 | total_sample_count = num_iter * batch_size 69 | step = 0 70 | global_step = model_checkpoint_path.split('/')[-1].split('-')[-1] 71 | 72 | print('%s: starting evaluation.' % (datetime.now())) 73 | start_time = time.time() 74 | while step < num_iter and not coord.should_stop(): 75 | top_1_val, top_5_val, loss_value = sess.run([top_1_correct, top_5_correct, loss]) 76 | count_top_1 += np.sum(top_1_val) 77 | count_top_5 += np.sum(top_5_val) 78 | count_avg_loss += np.mean(loss_value) 79 | step += 1 80 | if step % 40 == 0: 81 | duration = time.time() - start_time 82 | sec_per_batch = duration / 20.0 83 | examples_per_sec = batch_size / sec_per_batch 84 | print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f' 85 | 'sec/batch)' % (datetime.now(), step, num_iter, 86 | examples_per_sec, sec_per_batch)) 87 | start_time = time.time() 88 | 89 | # compute test set accuracy 90 | top_1_accuracy = count_top_1 / total_sample_count 91 | top_5_accuracy = count_top_5 / total_sample_count 92 | avg_loss = count_avg_loss / total_sample_count 93 | print('%s: top_1_acc=%.4f, top_5_acc=%.4f avg_loss=%.7f [%d examples]' % 94 | (datetime.now(), top_1_accuracy, top_5_accuracy, count_avg_loss, total_sample_count)) 95 | 96 | top_1_summary = tf.Summary(value=[tf.Summary.Value(tag="test/top_1_accuracy", simple_value=top_1_accuracy)]) 97 | top_5_summary = tf.Summary(value=[tf.Summary.Value(tag="test/top_5_accuracy", simple_value=top_5_accuracy)]) 98 | avg_loss_summary = tf.Summary(value=[tf.Summary.Value(tag="test/avg_loss", simple_value=avg_loss)]) 99 | summary_writer.add_summary(top_1_summary, global_step) 100 | summary_writer.add_summary(top_5_summary, global_step) 101 | summary_writer.add_summary(avg_loss_summary, global_step) 102 | summary_writer.flush() 103 | 104 | # Eval 105 | if run_once: 106 | ckpt = tf.train.get_checkpoint_state(log_dir) 107 | if ckpt and ckpt.model_checkpoint_path: 108 | _eval_model_checkpoint(ckpt.model_checkpoint_path) 109 | else: 110 | print('No checkpoint file found') 111 | exit() 112 | 113 | else: 114 | done = [] 115 | while True: 116 | print("checking for new models ...") 117 | ckpt = tf.train.get_checkpoint_state(log_dir) 118 | if ckpt and ckpt.model_checkpoint_path: 119 | for path in ckpt.all_model_checkpoint_paths: 120 | if not path in done: 121 | done.append(path) 122 | _eval_model_checkpoint(path) 123 | else: 124 | print('No checkpoint file found') 125 | time.sleep(eval_interval_secs) 126 | 127 | 128 | print("done!") 129 | -------------------------------------------------------------------------------- /slim_highway_eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | import math 7 | from datetime import datetime 8 | 9 | from datasets.cifar10 import cifar10_data 10 | from datasets.cifar100 import cifar100_data 11 | 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | log_dir = "logs/cifar10/hw_uniform/" 16 | eval_dir = log_dir 17 | batch_size = 128 18 | num_classes = 10 19 | epoch_size = 10000.0 20 | num_iter = int(math.ceil(epoch_size/batch_size)) 21 | load_latest_checkpoint = False 22 | 23 | eval_interval_secs = 3 24 | run_once = False 25 | 26 | tf.logging.set_verbosity(tf.logging.INFO) 27 | sess = tf.Session() 28 | 29 | ## Data 30 | with tf.device('/cpu:0'): 31 | d = cifar10_data(batch_size=batch_size, sess=sess) 32 | image_batch_tensor, target_batch_tensor = d.build_test_data_tensor(shuffle=False, augmentation=False) 33 | 34 | ## Model 35 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #from tensorflow.contrib.slim.nets import resnet_v2 37 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 38 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 39 | # num_classes=num_classes, 40 | # global_pool=True)# reduce output to rank 2 (not working) 41 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 42 | import nets.highway_uniform 43 | hps = nets.highway_uniform.HParams(batch_size=batch_size, 44 | num_classes=num_classes, 45 | min_lrn_rate=None, 46 | lrn_rate=None, 47 | num_residual_units=4, 48 | use_bottleneck=False, 49 | weight_decay_rate=0.0002, 50 | relu_leakiness=0.1, 51 | optimizer='mom') 52 | model = nets.highway_uniform.ResNet(hps, image_batch_tensor, target_batch_tensor, 'eval') 53 | model.build_graph() 54 | 55 | ## Losses and Accuracies 56 | 57 | ## Optimizer 58 | 59 | ## Summaries 60 | 61 | ## Initialization 62 | saver = tf.train.Saver(max_to_keep=10000000) 63 | summary_writer = tf.train.SummaryWriter(eval_dir) 64 | coord = tf.train.Coordinator() 65 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 66 | sess.run(tf.global_variables_initializer()) 67 | 68 | 69 | def _eval_model_checkpoint(model_checkpoint_path): 70 | if model_checkpoint_path: 71 | tf.logging.info("Restoring from checkpoint %s" % model_checkpoint_path) 72 | saver.restore(sess, model_checkpoint_path) 73 | else: 74 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 75 | return 76 | 77 | # Counts the number of correct predictions. 78 | total_loss = 0.0 79 | total_sample_count = num_iter * batch_size 80 | step = 0 81 | global_step = model_checkpoint_path.split('/')[-1].split('-')[-1] 82 | 83 | tf.logging.info('%s: starting evaluation.' % (datetime.now())) 84 | start_time = time.time() 85 | correct_prediction, total_prediction = 0, 0 86 | while step < num_iter and not coord.should_stop(): 87 | loss_value, predictions, truth = sess.run([model.cost, model.predictions, model.labels]) 88 | 89 | total_loss += np.sum(loss_value) 90 | truth = np.argmax(truth, axis=1) 91 | predictions = np.argmax(predictions, axis=1) 92 | correct_prediction += np.sum(truth == predictions) 93 | total_prediction += predictions.shape[0] 94 | 95 | step += 1 96 | if step % 200 == 0: 97 | duration = time.time() - start_time 98 | sec_per_batch = duration / 20.0 99 | examples_per_sec = batch_size / sec_per_batch 100 | tf.logging.info('[%d batches out of %d] (%.1f examples/sec; %.3f' 101 | 'sec/batch)' % (step, num_iter, examples_per_sec, sec_per_batch)) 102 | start_time = time.time() 103 | 104 | # compute test set accuracy 105 | accuracy = correct_prediction * 100.0 / total_prediction 106 | avg_loss = total_loss / total_sample_count 107 | 108 | tf.logging.info('%s: top_1_acc: %6.3f%%, avg_loss: %.7f [%d examples]' % 109 | (global_step, accuracy, avg_loss, total_sample_count)) 110 | 111 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="test/accuracy", simple_value=accuracy)]) 112 | avg_loss_summary = tf.Summary(value=[tf.Summary.Value(tag="test/avg_loss", simple_value=avg_loss)]) 113 | summary_writer.add_summary(accuracy_sum, global_step) 114 | summary_writer.add_summary(avg_loss_summary, global_step) 115 | summary_writer.flush() 116 | 117 | ## Eval 118 | if run_once: 119 | ckpt = tf.train.get_checkpoint_state(log_dir) 120 | if ckpt and ckpt.model_checkpoint_path: 121 | _eval_model_checkpoint(ckpt.model_checkpoint_path) 122 | else: 123 | tf.logging.error('No checkpoint file found') 124 | exit() 125 | 126 | else: 127 | done = [] 128 | while True: 129 | tf.logging.info("checking for new models in %s ... " % log_dir) 130 | ckpt = tf.train.get_checkpoint_state(log_dir) 131 | if ckpt and ckpt.model_checkpoint_path: 132 | for path in ckpt.all_model_checkpoint_paths: 133 | if not path in done: 134 | done.append(path) 135 | _eval_model_checkpoint(path) 136 | else: 137 | tf.logging.error('No checkpoint file found') 138 | time.sleep(eval_interval_secs) 139 | 140 | print("done!") 141 | -------------------------------------------------------------------------------- /slim_highway_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | from collections import deque 7 | 8 | from datasets.cifar10 import cifar10_data 9 | from datasets.cifar100 import cifar100_data 10 | 11 | from libs import components 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | tf.logging.set_verbosity(tf.logging.INFO) 16 | 17 | log_dir = "logs/cifar10/hw_uniform/" 18 | batch_size = 64 19 | num_classes = 10 20 | epoch_in_steps = int(50000.0/batch_size) 21 | max_step = epoch_in_steps * 15 22 | load_latest_checkpoint = False 23 | step = 0 24 | lrn_rate = 0.1 25 | 26 | sess = tf.Session() 27 | 28 | ## Data 29 | with tf.device('/cpu:0'): 30 | d = cifar10_data(batch_size=batch_size, sess=sess) 31 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor(shuffle=True, augmentation=True) 32 | 33 | ## Model 34 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 35 | #logits = highway_test.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #from tensorflow.contrib.slim.nets import resnet_v2 37 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 38 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 39 | # num_classes=num_classes, 40 | # global_pool=True)# reduce output to rank 2 (not working) 41 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 42 | 43 | #import nets.resnet 44 | import nets.highway_uniform 45 | hps = nets.highway_uniform.HParams(batch_size=batch_size, 46 | num_classes=num_classes, 47 | min_lrn_rate=0.0001, 48 | lrn_rate=0.1, 49 | num_residual_units=4, 50 | use_bottleneck=False, 51 | weight_decay_rate=0.0002, 52 | relu_leakiness=0.1, 53 | optimizer='mom') 54 | model = nets.highway_uniform.ResNet(hps, image_batch_tensor, target_batch_tensor, 'train') 55 | model.build_graph() 56 | 57 | ## Losses and Accuracies 58 | avg_loss_queue = deque(epoch_in_steps * [0]) 59 | avg_top1_queue = deque(epoch_in_steps * [0]) 60 | 61 | ## Optimizer 62 | 63 | ## Summaries 64 | summary_op = model.summaries 65 | 66 | ## Initialization 67 | saver = tf.train.Saver(max_to_keep=10000000) 68 | summary_writer = tf.train.SummaryWriter(log_dir, graph=sess.graph) 69 | coord = tf.train.Coordinator() 70 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 71 | sess.run(tf.global_variables_initializer()) 72 | 73 | ## Load Pretrained 74 | if load_latest_checkpoint: 75 | checkpoint = tf.train.latest_checkpoint(log_dir) 76 | if checkpoint: 77 | tf.logging.info("Restoring from checkpoint %s" % checkpoint) 78 | saver.restore(sess, checkpoint) 79 | step = sess.run(model.global_step) 80 | else: 81 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 82 | exit() 83 | 84 | ## Train 85 | tf.logging.info('start training ...') 86 | total_start_time = time.time() 87 | while not coord.should_stop(): 88 | start_time = time.time() 89 | correct_prediction, total_prediction = 0, 0 90 | (_, summaries, loss, train_step, predictions, truth) = sess.run( 91 | [model.train_op, summary_op, model.cost, model.global_step, model.predictions, model.labels], 92 | feed_dict={model.lrn_rate: lrn_rate}) 93 | 94 | if train_step < 20000: # 40000 95 | lrn_rate = 0.1 96 | elif train_step < 40000: # 60000 97 | lrn_rate = 0.01 98 | elif train_step < 50000: # 80000 99 | lrn_rate = 0.001 100 | else: 101 | lrn_rate = 0.0001 102 | 103 | duration = time.time() - start_time 104 | truth = np.argmax(truth, axis=1) 105 | predictions = np.argmax(predictions, axis=1) 106 | accuracy = np.sum(truth == predictions) * 100 / batch_size 107 | avg_accuracy = components.push_into_queue(accuracy, avg_top1_queue, "train/avg_accuracy", train_step, summary_writer) 108 | avg_loss = components.push_into_queue(loss, avg_loss_queue, "train/avg_loss", train_step, summary_writer) 109 | 110 | if step % 100 == 0: 111 | total_duration = (time.time() - total_start_time) / 60.0 112 | examples_per_sec = batch_size / float(duration) 113 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="train/accuracy", simple_value=accuracy)]) 114 | 115 | if step == 500: 116 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f, top-1: %5.2f%% (%.1f examples/sec; %.3f sec/batch)') 117 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, accuracy, examples_per_sec, duration)) 118 | else: 119 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f (%4.3f), top-1: %5.2f%% (%5.2f%%)') 120 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, avg_loss, accuracy, avg_accuracy)) 121 | 122 | summary_writer.add_summary(accuracy_sum, train_step) 123 | summary_writer.add_summary(summaries, train_step) 124 | summary_writer.flush() 125 | 126 | if step % 1000 == 0: 127 | tf.logging.info("saving checkpoint") 128 | checkpoint_path = os.path.join(log_dir, 'model.ckpt') 129 | saver.save(sess, checkpoint_path, global_step=model.global_step) 130 | 131 | step += 1 132 | 133 | coord.join(threads) 134 | 135 | print("done!") 136 | -------------------------------------------------------------------------------- /slim_old_resnet_eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | import math 7 | from datetime import datetime 8 | 9 | from datasets.cifar10 import cifar10_data 10 | from datasets.cifar100 import cifar100_data 11 | 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | log_dir = "logs/cifar10/old_small/" 16 | eval_dir = log_dir 17 | batch_size = 128 18 | num_classes = 10 19 | epoch_size = 10000.0 20 | num_iter = int(math.ceil(epoch_size/batch_size)) 21 | load_latest_checkpoint = False 22 | 23 | eval_interval_secs = 3 24 | run_once = False 25 | 26 | tf.logging.set_verbosity(tf.logging.INFO) 27 | sess = tf.Session() 28 | 29 | ## Data 30 | with tf.device('/cpu:0'): 31 | d = cifar10_data(batch_size=batch_size, sess=sess) 32 | image_batch_tensor, target_batch_tensor = d.build_test_data_tensor(shuffle=False, augmentation=False) 33 | 34 | ## Model 35 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #from tensorflow.contrib.slim.nets import resnet_v2 37 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 38 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 39 | # num_classes=num_classes, 40 | # global_pool=True)# reduce output to rank 2 (not working) 41 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 42 | import nets.resnet_old_reference 43 | hps = nets.resnet_old_reference.HParams(batch_size=batch_size, 44 | num_classes=num_classes, 45 | min_lrn_rate=None, 46 | lrn_rate=None, 47 | num_residual_units=4, 48 | use_bottleneck=False, 49 | weight_decay_rate=0.0002, 50 | relu_leakiness=0.1, 51 | optimizer='mom') 52 | model = nets.resnet_old_reference.ResNet(hps, image_batch_tensor, target_batch_tensor, 'eval') 53 | model.build_graph() 54 | 55 | ## Losses and Accuracies 56 | 57 | ## Optimizer 58 | 59 | ## Summaries 60 | 61 | ## Initialization 62 | saver = tf.train.Saver(max_to_keep=10000000) 63 | summary_writer = tf.train.SummaryWriter(eval_dir) 64 | coord = tf.train.Coordinator() 65 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 66 | sess.run(tf.global_variables_initializer()) 67 | 68 | 69 | def _eval_model_checkpoint(model_checkpoint_path): 70 | if model_checkpoint_path: 71 | tf.logging.info("Restoring from checkpoint %s" % model_checkpoint_path) 72 | saver.restore(sess, model_checkpoint_path) 73 | else: 74 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 75 | return 76 | 77 | # Counts the number of correct predictions. 78 | total_loss = 0.0 79 | total_sample_count = num_iter * batch_size 80 | step = 0 81 | global_step = model_checkpoint_path.split('/')[-1].split('-')[-1] 82 | 83 | tf.logging.info('%s: starting evaluation.' % (datetime.now())) 84 | start_time = time.time() 85 | correct_prediction, total_prediction = 0, 0 86 | while step < num_iter and not coord.should_stop(): 87 | loss_value, predictions, truth = sess.run([model.cost, model.predictions, model.labels]) 88 | 89 | total_loss += np.sum(loss_value) 90 | truth = np.argmax(truth, axis=1) 91 | predictions = np.argmax(predictions, axis=1) 92 | correct_prediction += np.sum(truth == predictions) 93 | total_prediction += predictions.shape[0] 94 | 95 | step += 1 96 | if step % 200 == 0: 97 | duration = time.time() - start_time 98 | sec_per_batch = duration / 20.0 99 | examples_per_sec = batch_size / sec_per_batch 100 | tf.logging.info('[%d batches out of %d] (%.1f examples/sec; %.3f' 101 | 'sec/batch)' % (step, num_iter, examples_per_sec, sec_per_batch)) 102 | start_time = time.time() 103 | 104 | # compute test set accuracy 105 | accuracy = correct_prediction * 100.0 / total_prediction 106 | avg_loss = total_loss / total_sample_count 107 | 108 | tf.logging.info('%s: top_1_acc: %6.3f%%, avg_loss: %.7f [%d examples]' % 109 | (global_step, accuracy, avg_loss, total_sample_count)) 110 | 111 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="test/accuracy", simple_value=accuracy)]) 112 | avg_loss_summary = tf.Summary(value=[tf.Summary.Value(tag="test/avg_loss", simple_value=avg_loss)]) 113 | summary_writer.add_summary(accuracy_sum, global_step) 114 | summary_writer.add_summary(avg_loss_summary, global_step) 115 | summary_writer.flush() 116 | 117 | ## Eval 118 | if run_once: 119 | ckpt = tf.train.get_checkpoint_state(log_dir) 120 | if ckpt and ckpt.model_checkpoint_path: 121 | _eval_model_checkpoint(ckpt.model_checkpoint_path) 122 | else: 123 | tf.logging.error('No checkpoint file found') 124 | exit() 125 | 126 | else: 127 | done = [] 128 | while True: 129 | tf.logging.info("checking for new models in %s ... " % log_dir) 130 | ckpt = tf.train.get_checkpoint_state(log_dir) 131 | if ckpt and ckpt.model_checkpoint_path: 132 | for path in ckpt.all_model_checkpoint_paths: 133 | if not path in done: 134 | done.append(path) 135 | _eval_model_checkpoint(path) 136 | else: 137 | tf.logging.error('No checkpoint file found') 138 | time.sleep(eval_interval_secs) 139 | 140 | print("done!") 141 | -------------------------------------------------------------------------------- /slim_old_resnet_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | from collections import deque 7 | 8 | from datasets.cifar10 import cifar10_data 9 | from datasets.cifar100 import cifar100_data 10 | 11 | from libs import components 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | tf.logging.set_verbosity(tf.logging.INFO) 16 | 17 | log_dir = "logs/cifar10/old_small/" 18 | ckpt_dir = log_dir # "logs/cifar10/wrn_1/" 19 | batch_size = 64 20 | num_classes = 10 21 | epoch_in_steps = int(50000.0/batch_size) 22 | max_step = epoch_in_steps * 15 23 | load_latest_checkpoint = False 24 | step = 0 25 | lrn_rate = 0.1 26 | 27 | sess = tf.Session() 28 | 29 | ## Data 30 | with tf.device('/cpu:0'): 31 | d = cifar10_data(batch_size=batch_size, sess=sess) 32 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor(shuffle=True, augmentation=True) 33 | 34 | ## Model 35 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #logits = highway_test.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 37 | #from tensorflow.contrib.slim.nets import resnet_v2 38 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 39 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 40 | # num_classes=num_classes, 41 | # global_pool=True)# reduce output to rank 2 (not working) 42 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 43 | 44 | #import nets.resnet 45 | import nets.resnet_old_reference 46 | hps = nets.resnet_old_reference.HParams(batch_size=batch_size, 47 | num_classes=num_classes, 48 | min_lrn_rate=0.0001, 49 | lrn_rate=0.1, 50 | num_residual_units=4, 51 | use_bottleneck=False, 52 | weight_decay_rate=0.0002, 53 | relu_leakiness=0.1, 54 | optimizer='mom') 55 | model = nets.resnet_old_reference.ResNet(hps, image_batch_tensor, target_batch_tensor, 'train') 56 | model.build_graph() 57 | 58 | ## Losses and Accuracies 59 | avg_loss_queue = deque(epoch_in_steps * [0]) 60 | avg_top1_queue = deque(epoch_in_steps * [0]) 61 | 62 | ## Optimizer 63 | 64 | ## Summaries 65 | summary_op = model.summaries 66 | 67 | ## Initialization 68 | saver = tf.train.Saver(max_to_keep=10000000) 69 | summary_writer = tf.train.SummaryWriter(log_dir, graph=sess.graph) 70 | coord = tf.train.Coordinator() 71 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 72 | sess.run(tf.global_variables_initializer()) 73 | 74 | ## Load Pretrained 75 | if load_latest_checkpoint: 76 | checkpoint = tf.train.latest_checkpoint(ckpt_dir) 77 | if checkpoint: 78 | tf.logging.info("Restoring from checkpoint %s" % checkpoint) 79 | saver.restore(sess, checkpoint) 80 | step = sess.run(model.global_step) 81 | else: 82 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 83 | exit() 84 | 85 | ## Train 86 | tf.logging.info('start training ...') 87 | total_start_time = time.time() 88 | while not coord.should_stop(): 89 | start_time = time.time() 90 | correct_prediction, total_prediction = 0, 0 91 | (_, summaries, loss, train_step, predictions, truth) = sess.run( 92 | [model.train_op, summary_op, model.cost, model.global_step, model.predictions, model.labels], 93 | feed_dict={model.lrn_rate: lrn_rate}) 94 | 95 | if train_step < 20000: # 15000: # 40000 96 | lrn_rate = 0.1 97 | elif train_step < 40000: #30000: # 60000 98 | lrn_rate = 0.01 99 | elif train_step < 50000: # # 80000 100 | lrn_rate = 0.001 101 | else: 102 | lrn_rate = 0.0001 103 | 104 | duration = time.time() - start_time 105 | truth = np.argmax(truth, axis=1) 106 | predictions = np.argmax(predictions, axis=1) 107 | accuracy = np.sum(truth == predictions) * 100 / batch_size 108 | avg_accuracy = components.push_into_queue(accuracy, avg_top1_queue, "train/avg_accuracy", train_step, summary_writer) 109 | avg_loss = components.push_into_queue(loss, avg_loss_queue, "train/avg_loss", train_step, summary_writer) 110 | 111 | if step % 100 == 0: 112 | total_duration = (time.time() - total_start_time) / 60.0 113 | examples_per_sec = batch_size / float(duration) 114 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="train/accuracy", simple_value=accuracy)]) 115 | 116 | if step == 500: 117 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f, top-1: %5.2f%% (%.1f examples/sec; %.3f sec/batch)') 118 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, accuracy, examples_per_sec, duration)) 119 | else: 120 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f (%4.3f), top-1: %5.2f%% (%5.2f%%)') 121 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, avg_loss, accuracy, avg_accuracy)) 122 | 123 | summary_writer.add_summary(accuracy_sum, train_step) 124 | summary_writer.add_summary(summaries, train_step) 125 | summary_writer.flush() 126 | 127 | if step % 1000 == 0: 128 | tf.logging.info("saving checkpoint") 129 | checkpoint_path = os.path.join(log_dir, 'model.ckpt') 130 | saver.save(sess, checkpoint_path, global_step=model.global_step) 131 | 132 | step += 1 133 | 134 | coord.join(threads) 135 | 136 | print("done!") 137 | -------------------------------------------------------------------------------- /slim_resnet_eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | import math 7 | from datetime import datetime 8 | 9 | from datasets.cifar10 import cifar10_data 10 | from datasets.cifar100 import cifar100_data 11 | 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | log_dir = "logs/cifar10/resnet_uniform/" 16 | eval_dir = log_dir 17 | batch_size = 128 18 | num_classes = 10 19 | epoch_size = 10000.0 20 | num_iter = int(math.ceil(epoch_size/batch_size)) 21 | load_latest_checkpoint = False 22 | 23 | eval_interval_secs = 3 24 | run_once = False 25 | 26 | tf.logging.set_verbosity(tf.logging.INFO) 27 | sess = tf.Session() 28 | 29 | ## Data 30 | with tf.device('/cpu:0'): 31 | d = cifar10_data(batch_size=batch_size, sess=sess) 32 | image_batch_tensor, target_batch_tensor = d.build_test_data_tensor(shuffle=False, augmentation=False) 33 | 34 | ## Model 35 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #from tensorflow.contrib.slim.nets import resnet_v2 37 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 38 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 39 | # num_classes=num_classes, 40 | # global_pool=True)# reduce output to rank 2 (not working) 41 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 42 | import nets.resnet_uniform 43 | hps = nets.resnet_uniform.HParams(batch_size=batch_size, 44 | num_classes=num_classes, 45 | min_lrn_rate=None, 46 | lrn_rate=None, 47 | num_residual_units=4, 48 | use_bottleneck=False, 49 | weight_decay_rate=0.0002, 50 | relu_leakiness=0.1, 51 | optimizer='mom') 52 | model = nets.resnet_uniform.ResNet(hps, image_batch_tensor, target_batch_tensor, 'eval') 53 | model.build_graph() 54 | 55 | ## Losses and Accuracies 56 | 57 | ## Optimizer 58 | 59 | ## Summaries 60 | 61 | ## Initialization 62 | saver = tf.train.Saver(max_to_keep=10000000) 63 | summary_writer = tf.train.SummaryWriter(eval_dir) 64 | coord = tf.train.Coordinator() 65 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 66 | sess.run(tf.global_variables_initializer()) 67 | 68 | 69 | def _eval_model_checkpoint(model_checkpoint_path): 70 | if model_checkpoint_path: 71 | tf.logging.info("Restoring from checkpoint %s" % model_checkpoint_path) 72 | saver.restore(sess, model_checkpoint_path) 73 | else: 74 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 75 | return 76 | 77 | # Counts the number of correct predictions. 78 | total_loss = 0.0 79 | total_sample_count = num_iter * batch_size 80 | step = 0 81 | global_step = model_checkpoint_path.split('/')[-1].split('-')[-1] 82 | 83 | tf.logging.info('%s: starting evaluation.' % (datetime.now())) 84 | start_time = time.time() 85 | correct_prediction, total_prediction = 0, 0 86 | while step < num_iter and not coord.should_stop(): 87 | loss_value, predictions, truth = sess.run([model.cost, model.predictions, model.labels]) 88 | 89 | total_loss += np.sum(loss_value) 90 | truth = np.argmax(truth, axis=1) 91 | predictions = np.argmax(predictions, axis=1) 92 | correct_prediction += np.sum(truth == predictions) 93 | total_prediction += predictions.shape[0] 94 | 95 | step += 1 96 | if step % 200 == 0: 97 | duration = time.time() - start_time 98 | sec_per_batch = duration / 20.0 99 | examples_per_sec = batch_size / sec_per_batch 100 | tf.logging.info('[%d batches out of %d] (%.1f examples/sec; %.3f' 101 | 'sec/batch)' % (step, num_iter, examples_per_sec, sec_per_batch)) 102 | start_time = time.time() 103 | 104 | # compute test set accuracy 105 | accuracy = correct_prediction * 100.0 / total_prediction 106 | avg_loss = total_loss / total_sample_count 107 | 108 | tf.logging.info('%s: top_1_acc: %6.3f%%, avg_loss: %.7f [%d examples]' % 109 | (global_step, accuracy, avg_loss, total_sample_count)) 110 | 111 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="test/accuracy", simple_value=accuracy)]) 112 | avg_loss_summary = tf.Summary(value=[tf.Summary.Value(tag="test/avg_loss", simple_value=avg_loss)]) 113 | summary_writer.add_summary(accuracy_sum, global_step) 114 | summary_writer.add_summary(avg_loss_summary, global_step) 115 | summary_writer.flush() 116 | 117 | ## Eval 118 | if run_once: 119 | ckpt = tf.train.get_checkpoint_state(log_dir) 120 | if ckpt and ckpt.model_checkpoint_path: 121 | _eval_model_checkpoint(ckpt.model_checkpoint_path) 122 | else: 123 | tf.logging.error('No checkpoint file found') 124 | exit() 125 | 126 | else: 127 | done = [] 128 | while True: 129 | tf.logging.info("checking for new models in %s ... " % log_dir) 130 | ckpt = tf.train.get_checkpoint_state(log_dir) 131 | if ckpt and ckpt.model_checkpoint_path: 132 | for path in ckpt.all_model_checkpoint_paths: 133 | if not path in done: 134 | done.append(path) 135 | _eval_model_checkpoint(path) 136 | else: 137 | tf.logging.error('No checkpoint file found') 138 | time.sleep(eval_interval_secs) 139 | 140 | print("done!") 141 | -------------------------------------------------------------------------------- /slim_resnet_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | from collections import deque 7 | 8 | from datasets.cifar10 import cifar10_data 9 | from datasets.cifar100 import cifar100_data 10 | 11 | from libs import components 12 | from libs import custom_ops 13 | from nets import bn_conv 14 | 15 | tf.logging.set_verbosity(tf.logging.INFO) 16 | 17 | log_dir = "logs/cifar10/resnet_uniform/" 18 | ckpt_dir = log_dir # "logs/cifar10/wrn_1/" 19 | batch_size = 64 20 | num_classes = 10 21 | epoch_in_steps = int(50000.0/batch_size) 22 | max_step = epoch_in_steps * 15 23 | load_latest_checkpoint = False 24 | step = 0 25 | lrn_rate = 0.1 26 | 27 | sess = tf.Session() 28 | 29 | ## Data 30 | with tf.device('/cpu:0'): 31 | d = cifar10_data(batch_size=batch_size, sess=sess) 32 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor(shuffle=True, augmentation=True) 33 | 34 | ## Model 35 | #logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 36 | #logits = highway_test.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 37 | #from tensorflow.contrib.slim.nets import resnet_v2 38 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 39 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 40 | # num_classes=num_classes, 41 | # global_pool=True)# reduce output to rank 2 (not working) 42 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 43 | 44 | #import nets.resnet 45 | import nets.resnet_uniform 46 | hps = nets.resnet_uniform.HParams(batch_size=batch_size, 47 | num_classes=num_classes, 48 | min_lrn_rate=0.0001, 49 | lrn_rate=0.1, 50 | num_residual_units=4, 51 | use_bottleneck=False, 52 | weight_decay_rate=0.0002, 53 | relu_leakiness=0.1, 54 | optimizer='mom') 55 | model = nets.resnet_uniform.ResNet(hps, image_batch_tensor, target_batch_tensor, 'train') 56 | model.build_graph() 57 | 58 | ## Losses and Accuracies 59 | avg_loss_queue = deque(epoch_in_steps * [0]) 60 | avg_top1_queue = deque(epoch_in_steps * [0]) 61 | 62 | ## Optimizer 63 | 64 | ## Summaries 65 | summary_op = model.summaries 66 | 67 | ## Initialization 68 | saver = tf.train.Saver(max_to_keep=10000000) 69 | summary_writer = tf.train.SummaryWriter(log_dir, graph=sess.graph) 70 | coord = tf.train.Coordinator() 71 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 72 | sess.run(tf.global_variables_initializer()) 73 | 74 | ## Load Pretrained 75 | if load_latest_checkpoint: 76 | checkpoint = tf.train.latest_checkpoint(ckpt_dir) 77 | if checkpoint: 78 | tf.logging.info("Restoring from checkpoint %s" % checkpoint) 79 | saver.restore(sess, checkpoint) 80 | step = sess.run(model.global_step) 81 | else: 82 | tf.logging.error("Couldn't find checkpoint to restore from. Exiting.") 83 | exit() 84 | 85 | ## Train 86 | tf.logging.info('start training ...') 87 | total_start_time = time.time() 88 | while not coord.should_stop(): 89 | start_time = time.time() 90 | correct_prediction, total_prediction = 0, 0 91 | (_, summaries, loss, train_step, predictions, truth) = sess.run( 92 | [model.train_op, summary_op, model.cost, model.global_step, model.predictions, model.labels], 93 | feed_dict={model.lrn_rate: lrn_rate}) 94 | 95 | if train_step < 20000: # 15000: # 40000 96 | lrn_rate = 0.1 97 | elif train_step < 40000: #30000: # 60000 98 | lrn_rate = 0.01 99 | elif train_step < 50000: # # 80000 100 | lrn_rate = 0.001 101 | else: 102 | lrn_rate = 0.0001 103 | 104 | duration = time.time() - start_time 105 | truth = np.argmax(truth, axis=1) 106 | predictions = np.argmax(predictions, axis=1) 107 | accuracy = np.sum(truth == predictions) * 100 / batch_size 108 | avg_accuracy = components.push_into_queue(accuracy, avg_top1_queue, "train/avg_accuracy", train_step, summary_writer) 109 | avg_loss = components.push_into_queue(loss, avg_loss_queue, "train/avg_loss", train_step, summary_writer) 110 | 111 | if step % 100 == 0: 112 | total_duration = (time.time() - total_start_time) / 60.0 113 | examples_per_sec = batch_size / float(duration) 114 | accuracy_sum = tf.Summary(value=[tf.Summary.Value(tag="train/accuracy", simple_value=accuracy)]) 115 | 116 | if step == 500: 117 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f, top-1: %5.2f%% (%.1f examples/sec; %.3f sec/batch)') 118 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, accuracy, examples_per_sec, duration)) 119 | else: 120 | format_str = ('%4.2fmin, step %4.d, lr: %.4f, loss: %4.3f (%4.3f), top-1: %5.2f%% (%5.2f%%)') 121 | tf.logging.info(format_str % (total_duration, step, lrn_rate, loss, avg_loss, accuracy, avg_accuracy)) 122 | 123 | summary_writer.add_summary(accuracy_sum, train_step) 124 | summary_writer.add_summary(summaries, train_step) 125 | summary_writer.flush() 126 | 127 | if step % 1000 == 0: 128 | tf.logging.info("saving checkpoint") 129 | checkpoint_path = os.path.join(log_dir, 'model.ckpt') 130 | saver.save(sess, checkpoint_path, global_step=model.global_step) 131 | 132 | step += 1 133 | 134 | coord.join(threads) 135 | 136 | print("done!") 137 | -------------------------------------------------------------------------------- /slim_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | import os 5 | import time 6 | from datetime import datetime 7 | 8 | from datasets.cifar10 import cifar10_data 9 | from datasets.cifar100 import cifar100_data 10 | 11 | from libs import custom_ops 12 | from nets import bn_conv 13 | 14 | log_dir = "logs/cifar10/8l_conv/" 15 | batch_size = 64 16 | num_classes = 10 17 | epoch_in_steps = int(50000.0/batch_size) 18 | max_step = epoch_in_steps * 15 19 | load_latest_checkpoint = False 20 | 21 | sess = tf.Session() 22 | 23 | ## Data 24 | with tf.device('/cpu:0'): 25 | d = cifar10_data(batch_size=batch_size, sess=sess) 26 | image_batch_tensor, target_batch_tensor = d.build_train_data_tensor(shuffle=True) 27 | 28 | ## Model 29 | logits = bn_conv.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 30 | #logits = highway_test.inference(image_batch_tensor, num_classes=num_classes, is_training=True) 31 | #from tensorflow.contrib.slim.nets import resnet_v2 32 | #with slim.arg_scope(custom_ops.resnet_arg_scope(is_training=True)): 33 | # net, end_points = resnet_v2.resnet_v2_101(image_batch_tensor, 34 | # num_classes=num_classes, 35 | # global_pool=True)# reduce output to rank 2 (not working) 36 | #logits = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=False) 37 | 38 | ## Losses and Accuracies 39 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, 40 | tf.cast(target_batch_tensor, tf.float32), 41 | name="cross-entropy") 42 | loss = tf.reduce_mean(cross_entropy, name='loss') 43 | 44 | top_1_correct = tf.nn.in_top_k(logits, tf.argmax(target_batch_tensor, 1), 1) 45 | top_5_correct = tf.nn.in_top_k(logits, tf.argmax(target_batch_tensor, 1), 5) 46 | 47 | top_1_batch_accuracy = tf.reduce_sum(tf.cast(top_1_correct, tf.float32)) * 100.0 / batch_size 48 | top_5_batch_accuracy = tf.reduce_sum(tf.cast(top_5_correct, tf.float32)) * 100.0 / batch_size 49 | 50 | ## Optimizer 51 | global_step = tf.Variable(0, name='global_step', trainable=False) 52 | learning_rate = tf.placeholder(tf.float32, name="learning_rate") 53 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) 54 | train_op = optimizer.minimize(loss, global_step=global_step) 55 | 56 | ## Summaries 57 | tf.scalar_summary('train/loss', loss) 58 | tf.scalar_summary('train/learning_rate', learning_rate) 59 | tf.scalar_summary('train/top_1_batch_acc', top_1_batch_accuracy) 60 | tf.scalar_summary('train/top_5_batch_acc', top_5_batch_accuracy) 61 | summary_op = tf.merge_all_summaries() 62 | 63 | ## Initialization 64 | saver = tf.train.Saver(max_to_keep=10000000,) 65 | summary_writer = tf.train.SummaryWriter(log_dir) 66 | coord = tf.train.Coordinator() 67 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 68 | sess.run(tf.global_variables_initializer()) 69 | 70 | if load_latest_checkpoint: 71 | checkpoint = tf.train.latest_checkpoint(log_dir) 72 | if checkpoint: 73 | print("Restoring from checkpoint", checkpoint) 74 | saver.restore(sess, checkpoint) 75 | else: 76 | print("Couldn't find checkpoint to restore from. Exiting.") 77 | exit() 78 | 79 | ## Training 80 | epoch_count = 0 81 | lr = 0.01 82 | for step in range(max_step): 83 | start_time = time.time() 84 | 85 | if step % (epoch_in_steps*10) == 0 and step > 100: 86 | lr /= 10 87 | print("learning rate decrased to ", lr) 88 | 89 | if step % epoch_in_steps == 0: 90 | epoch_count += 1 91 | print("epoch: ", epoch_count) 92 | 93 | _, summary_str, loss_val = sess.run([train_op, summary_op, loss], feed_dict={learning_rate: lr}) 94 | duration = time.time() - start_time 95 | 96 | assert not np.isnan(loss_val), 'Model diverged with loss = NaN' 97 | 98 | if step % 50 == 0: 99 | examples_per_sec = batch_size / float(duration) 100 | format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') 101 | print(format_str % (datetime.now(), step, loss_val, examples_per_sec, duration)) 102 | summary_writer.add_summary(summary_str, step) 103 | 104 | if step % 390 == 0 or step == max_step-1: 105 | print("saving model checkpoint") 106 | checkpoint_path = os.path.join(log_dir, 'model.ckpt') 107 | saver.save(sess, checkpoint_path, global_step=global_step) 108 | 109 | print("done!") 110 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import itertools 4 | from functools import partial 5 | 6 | grid_size = 20 7 | x = np.linspace(-1, 1, num=20) 8 | y = np.linspace(-1, 1, num=20) 9 | orig_pts = [e for e in itertools.product(x, y)] 10 | 11 | def transform(pts, t, residual=False): 12 | a = [] 13 | for (x,y) in pts: 14 | tx, ty = t(x, y) 15 | if residual: 16 | #a.append(((tx*0.5 + x*0.5) , (ty*0.5 + y*0.5) )) 17 | a.append(((tx + x), (ty + y))) 18 | else: 19 | a.append((tx, ty)) 20 | return a 21 | 22 | def activation(pts): 23 | return [(np.max((0, x)), np.max((0, y))) for (x, y) in pts] 24 | 25 | 26 | def get_mean_and_variance(pts): 27 | arr = np.array(pts) 28 | mean = arr.mean(axis=0) 29 | var = arr.var(axis=0) 30 | return mean, var 31 | 32 | 33 | # operations 34 | def batch_norm(mean, var, x, y): 35 | x, y = translate(-mean[0], -mean[1], x, y) 36 | 37 | scale_x = 1.0 / np.sqrt(var[0]) 38 | scale_y = 1.0 / np.sqrt(var[1]) 39 | x, y = scale(scale_x, scale_y, x, y) 40 | return x, y 41 | 42 | 43 | def translate(nx, ny, x, y): 44 | v_in = np.array([x, y, 1]) 45 | T = np.matrix([[1, 0, nx], 46 | [0, 1, ny], 47 | [0, 0, 1]]) 48 | v_out = np.array(np.matmul(T,v_in))[0] 49 | return v_out[0], v_out[1] 50 | 51 | 52 | def rand(stddev, x, y): 53 | R = np.matrix(np.random.normal(scale=stddev, size=(2, 2))) 54 | v_out = np.array(np.matmul(R, np.array([x, y])))[0] 55 | return v_out[0], v_out[1] 56 | 57 | 58 | def rot(rot, x, y): 59 | theta = np.radians(rot) 60 | R = np.matrix([[np.cos(theta), -np.sin(theta)], 61 | [np.sin(theta), np.cos(theta)]]) 62 | v_out = np.array(np.matmul(R, np.array([x,y])))[0] 63 | return v_out[0], v_out[1] 64 | 65 | 66 | def scale(tx, ty, x, y): 67 | return tx * x, ty * y 68 | 69 | 70 | def spez(x,y): 71 | x, y = rot(70, x, y) 72 | x, y = translate(0.5, 0.5, x, y) 73 | return x, y 74 | 75 | def id(x,y): 76 | return x, y 77 | 78 | def softmax(x, y): 79 | """Compute softmax values for each sets of scores in x.""" 80 | e_x = np.exp(x - np.max(x)) 81 | return e_x / e_x.sum(), y 82 | 83 | 84 | f, axarr = plt.subplots(3, 2, figsize=(10, 14)) 85 | 86 | all_pts = [] 87 | 88 | print() 89 | # top left 90 | pts = orig_pts 91 | mean, var = get_mean_and_variance(pts) 92 | print("Mean: ", mean," Var: ", var) 93 | all_pts.append(orig_pts) 94 | 95 | pts = transform(pts, partial(rand, 0.01), residual=True) 96 | mean, var = get_mean_and_variance(pts) 97 | print("Mean: ", mean, " Var: ", var) 98 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 99 | # pts = activation(pts) 100 | all_pts.append(pts) 101 | 102 | pts = transform(pts, partial(rand, 0.01), residual=True) 103 | mean, var = get_mean_and_variance(pts) 104 | print("Mean: ", mean, " Var: ", var) 105 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 106 | # pts = activation(pts) 107 | all_pts.append(pts) 108 | 109 | pts = transform(pts, partial(rand, 0.01), residual=False) 110 | mean, var = get_mean_and_variance(pts) 111 | print("Mean: ", mean, " Var: ", var) 112 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 113 | # pts = activation(pts) 114 | all_pts.append(pts) 115 | 116 | pts = transform(pts, partial(rand, 0.01), residual=True) 117 | mean, var = get_mean_and_variance(pts) 118 | print("Mean: ", mean, " Var: ", var) 119 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 120 | # pts = activation(pts) 121 | all_pts.append(pts) 122 | 123 | pts = transform(pts, partial(rand, 0.01), residual=True) 124 | mean, var = get_mean_and_variance(pts) 125 | print("Mean: ", mean, " Var: ", var) 126 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 127 | # pts = activation(pts) 128 | all_pts.append(pts) 129 | 130 | """ 131 | for i in range(5): 132 | # top right 133 | pts = transform(pts, partial(rand, 0.01), residual=False) 134 | mean, var = get_mean_and_variance(pts) 135 | print("Mean: ", mean," Var: ", var) 136 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 137 | #pts = activation(pts) 138 | all_pts.append(pts) 139 | """ 140 | 141 | 142 | """ 143 | # mid left 144 | #pts = transform(pts, partial(rot, 20)) 145 | mean, var = get_mean_and_variance(pts) 146 | print("Mean: ", mean," Var: ", var) 147 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 148 | pts = activation(pts) 149 | all_pts.append(pts) 150 | 151 | # mid right 152 | pts = transform(pts, partial(rot, 20)) 153 | mean, var = get_mean_and_variance(pts) 154 | print("Mean: ", mean," Var: ", var) 155 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 156 | pts = activation(pts) 157 | all_pts.append(pts) 158 | 159 | # bot left BN 160 | pts = transform(pts, partial(rot, 20)) 161 | mean, var = get_mean_and_variance(pts) 162 | print("Mean: ", mean," Var: ", var) 163 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 164 | pts = activation(pts) 165 | all_pts.append(pts) 166 | 167 | # bot right 168 | pts = transform(pts, partial(rot, 20)) 169 | mean, var = get_mean_and_variance(pts) 170 | print("Mean: ", mean," Var: ", var) 171 | pts = transform(pts, partial(batch_norm, mean, var), residual=False) 172 | pts = activation(pts) 173 | all_pts.append(pts) 174 | """ 175 | 176 | for (ax, pts) in zip(axarr.flatten().tolist(), all_pts): 177 | ax.scatter(*zip(*pts), marker='o', s=1, color='blue') 178 | ax.axis([-4, 4, -4, 4]) 179 | ax.grid(True) 180 | 181 | 182 | plt.show() -------------------------------------------------------------------------------- /utils/cars.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: downloads and loads the stanford cars 196 dataset into memory. 4 | # Date: 11.2016 5 | # 6 | # 7 | 8 | import scipy.io 9 | import numpy as np 10 | from utils import download 11 | 12 | ######################################################################## 13 | 14 | # Directory where you want to download and save the data-set. 15 | # Set this before you start calling any of the functions below. 16 | data_path = "data/stanford_cars/" 17 | 18 | # URL for the data-set on the internet. 19 | data_url = "http://imagenet.stanford.edu/internal/car196/car_ims.tgz" 20 | mat_url = "http://imagenet.stanford.edu/internal/car196/cars_annos.mat" 21 | 22 | label_file = "cars_annos.mat" 23 | num_classes = 196 24 | 25 | def download_data(): 26 | download.maybe_download_and_extract(url=data_url, download_dir=data_path) 27 | download.maybe_download(url=mat_url, download_dir=data_path) 28 | 29 | 30 | def load_class_names(): 31 | label_data = scipy.io.loadmat(data_path + label_file) 32 | return np.array([q[0] for q in label_data["class_names"][0]]) 33 | 34 | 35 | def load_training_data(): 36 | label_data = scipy.io.loadmat(data_path + label_file) 37 | set = 1 38 | data = [(data_path + q[0][0], int(q[1][0][0]), int(q[2][0][0]), int(q[3][0][0]), int(q[4][0][0]), int(q[5][0][0])-1) 39 | for q in label_data["annotations"][0] if q[6][0][0]==set] 40 | 41 | img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls = zip(*data) 42 | img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls = np.array(img_path), np.array(bbox_x1), np.array(bbox_y1), np.array(bbox_x2), np.array(bbox_y2), np.array(cls) 43 | 44 | return img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 45 | 46 | 47 | def load_test_data(): 48 | label_data = scipy.io.loadmat(data_path + label_file) 49 | set = 0 50 | data = [(data_path + q[0][0], int(q[1][0][0]), int(q[2][0][0]), int(q[3][0][0]), int(q[4][0][0]), int(q[5][0][0])-1) 51 | for q in label_data["annotations"][0] if q[6][0][0]==set] 52 | 53 | img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls = zip(*data) 54 | img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls = np.array(img_path), np.array(bbox_x1), np.array(bbox_y1), np.array(bbox_x2), np.array(bbox_y2), np.array(cls) 55 | 56 | return img_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /utils/cifar10.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Functions for downloading the CIFAR-10 data-set from the internet 4 | # and loading it into memory. 5 | # 6 | # Implemented in Python 3.5 7 | # 8 | # Usage: 9 | # 1) Set the variable data_path with the desired storage path. 10 | # 2) Call maybe_download_and_extract() to download the data-set 11 | # if it is not already located in the given data_path. 12 | # 3) Call load_class_names() to get an array of the class-names. 13 | # 4) Call load_training_data() and load_test_data() to get 14 | # the images, class-numbers and one-hot encoded class-labels 15 | # for the training-set and test-set. 16 | # 5) Use the returned data in your own program. 17 | # 18 | # Format: 19 | # The images for the training- and test-sets are returned as 4-dim numpy 20 | # arrays each with the shape: [image_number, height, width, channel] 21 | # where the individual pixels are floats between 0.0 and 1.0. 22 | # 23 | ######################################################################## 24 | # 25 | # This file is part of the TensorFlow Tutorials available at: 26 | # 27 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 28 | # 29 | # Published under the MIT License. See the file LICENSE for details. 30 | # 31 | # Copyright 2016 by Magnus Erik Hvass Pedersen 32 | # 33 | ######################################################################## 34 | 35 | import numpy as np 36 | import pickle 37 | import os 38 | from utils import download 39 | 40 | ######################################################################## 41 | 42 | # Directory where you want to download and save the data-set. 43 | # Set this before you start calling any of the functions below. 44 | data_path = "data/CIFAR-10/" 45 | 46 | # URL for the data-set on the internet. 47 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 48 | 49 | ######################################################################## 50 | # Various constants for the size of the images. 51 | # Use these constants in your own program. 52 | 53 | # Width and height of each image. 54 | img_size = 32 55 | 56 | # Number of channels in each image, 3 channels: Red, Green, Blue. 57 | num_channels = 3 58 | 59 | # Length of an image when flattened to a 1-dim array. 60 | img_size_flat = img_size * img_size * num_channels 61 | 62 | # Number of classes. 63 | num_classes = 10 64 | 65 | ######################################################################## 66 | # Various constants used to allocate arrays of the correct size. 67 | 68 | # Number of files for the training-set. 69 | _num_files_train = 5 70 | 71 | # Number of images for each batch-file in the training-set. 72 | _images_per_file = 10000 73 | 74 | # Total number of images in the training-set. 75 | # This is used to pre-allocate arrays for efficiency. 76 | _num_images_train = _num_files_train * _images_per_file 77 | 78 | ######################################################################## 79 | # Private functions for downloading, unpacking and loading data-files. 80 | 81 | 82 | def _get_file_path(filename=""): 83 | """ 84 | Return the full path of a data-file for the data-set. 85 | If filename=="" then return the directory of the files. 86 | """ 87 | 88 | return os.path.join(data_path, "cifar-10-batches-py/", filename) 89 | 90 | 91 | def _unpickle(filename): 92 | """ 93 | Unpickle the given file and return the data. 94 | Note that the appropriate dir-name is prepended the filename. 95 | """ 96 | 97 | # Create full path for the file. 98 | file_path = _get_file_path(filename) 99 | 100 | print("Loading data: " + file_path) 101 | 102 | with open(file_path, mode='rb') as file: 103 | # In Python 3.X it is important to set the encoding, 104 | # otherwise an exception is raised here. 105 | data = pickle.load(file, encoding='bytes') 106 | 107 | return data 108 | 109 | 110 | def _convert_images(raw): 111 | """ 112 | Convert images from the CIFAR-10 format and 113 | return a 4-dim array with shape: [image_number, height, width, channel] 114 | where the pixels are floats between 0.0 and 1.0. 115 | """ 116 | 117 | # Convert the raw images from the data-files to floating-points. 118 | raw_float = np.array(raw, dtype=float) / 255.0 119 | 120 | # Reshape the array to 4-dimensions. 121 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 122 | 123 | # Reorder the indices of the array. 124 | images = images.transpose([0, 2, 3, 1]) 125 | 126 | return images 127 | 128 | 129 | def _load_data(filename): 130 | """ 131 | Load a pickled data-file from the CIFAR-10 data-set 132 | and return the converted images (see above) and the class-number 133 | for each image. 134 | """ 135 | 136 | # Load the pickled data-file. 137 | data = _unpickle(filename) 138 | 139 | # Get the raw images. 140 | raw_images = data[b'data'] 141 | 142 | # Get the class-numbers for each image. Convert to numpy-array. 143 | cls = np.array(data[b'labels']) 144 | 145 | # Convert the images. 146 | images = _convert_images(raw_images) 147 | 148 | return images, cls 149 | 150 | 151 | ######################################################################## 152 | # Public functions that you may call to download the data-set from 153 | # the internet and load the data into memory. 154 | 155 | 156 | def maybe_download_and_extract(): 157 | """ 158 | Download and extract the CIFAR-10 data-set if it doesn't already exist 159 | in data_path (set this variable first to the desired path). 160 | """ 161 | 162 | download.maybe_download_and_extract(url=data_url, download_dir=data_path) 163 | 164 | 165 | def load_class_names(): 166 | """ 167 | Load the names for the classes in the CIFAR-10 data-set. 168 | Returns a list with the names. Example: names[3] is the name 169 | associated with class-number 3. 170 | """ 171 | 172 | # Load the class-names from the pickled file. 173 | raw = _unpickle(filename="batches.meta")[b'label_names'] 174 | 175 | # Convert from binary strings. 176 | names = [x.decode('utf-8') for x in raw] 177 | 178 | return names 179 | 180 | 181 | def load_training_data(): 182 | """ 183 | Load all the training-data for the CIFAR-10 data-set. 184 | The data-set is split into 5 data-files which are merged here. 185 | Returns the images, class-numbers and one-hot encoded class-labels. 186 | """ 187 | 188 | # Pre-allocate the arrays for the images and class-numbers for efficiency. 189 | images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float) 190 | cls = np.zeros(shape=[_num_images_train], dtype=int) 191 | 192 | # Begin-index for the current batch. 193 | begin = 0 194 | 195 | # For each data-file. 196 | for i in range(_num_files_train): 197 | # Load the images and class-numbers from the data-file. 198 | images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1)) 199 | 200 | # Number of images in this batch. 201 | num_images = len(images_batch) 202 | 203 | # End-index for the current batch. 204 | end = begin + num_images 205 | 206 | # Store the images into the array. 207 | images[begin:end, :] = images_batch 208 | 209 | # Store the class-numbers into the array. 210 | cls[begin:end] = cls_batch 211 | 212 | # The begin-index for the next batch is the current end-index. 213 | begin = end 214 | 215 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 216 | 217 | 218 | def load_test_data(): 219 | """ 220 | Load all the test-data for the CIFAR-10 data-set. 221 | Returns the images, class-numbers and one-hot encoded class-labels. 222 | """ 223 | 224 | images, cls = _load_data(filename="test_batch") 225 | 226 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 227 | 228 | ######################################################################## -------------------------------------------------------------------------------- /utils/cifar100.py: -------------------------------------------------------------------------------- 1 | # 2 | # cifar-10 file edited such that it works with cifar-100 - Imanol Schlag 3 | # 4 | ######################################################################## 5 | # 6 | # Functions for downloading the CIFAR-10 data-set from the internet 7 | # and loading it into memory. 8 | # 9 | # Implemented in Python 3.5 10 | # 11 | # Usage: 12 | # 1) Set the variable data_path with the desired storage path. 13 | # 2) Call maybe_download_and_extract() to download the data-set 14 | # if it is not already located in the given data_path. 15 | # 3) Call load_class_names() to get an array of the class-names. 16 | # 4) Call load_training_data() and load_test_data() to get 17 | # the images, class-numbers and one-hot encoded class-labels 18 | # for the training-set and test-set. 19 | # 5) Use the returned data in your own program. 20 | # 21 | # Format: 22 | # The images for the training- and test-sets are returned as 4-dim numpy 23 | # arrays each with the shape: [image_number, height, width, channel] 24 | # where the individual pixels are floats between 0.0 and 1.0. 25 | # 26 | ######################################################################## 27 | # 28 | # This file is part of the TensorFlow Tutorials available at: 29 | # 30 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 31 | # 32 | # Published under the MIT License. See the file LICENSE for details. 33 | # 34 | # Copyright 2016 by Magnus Erik Hvass Pedersen 35 | # 36 | ######################################################################## 37 | 38 | import numpy as np 39 | import pickle 40 | import os 41 | from utils import download 42 | 43 | ######################################################################## 44 | 45 | # Directory where you want to download and save the data-set. 46 | # Set this before you start calling any of the functions below. 47 | data_path = "data/CIFAR-100/" 48 | 49 | # URL for the data-set on the internet. 50 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" 51 | 52 | ######################################################################## 53 | # Various constants for the size of the images. 54 | # Use these constants in your own program. 55 | 56 | # Width and height of each image. 57 | img_size = 32 58 | 59 | # Number of channels in each image, 3 channels: Red, Green, Blue. 60 | num_channels = 3 61 | 62 | # Length of an image when flattened to a 1-dim array. 63 | img_size_flat = img_size * img_size * num_channels 64 | 65 | # Number of classes. 66 | num_classes = 100 67 | 68 | ######################################################################## 69 | # Various constants used to allocate arrays of the correct size. 70 | 71 | # Total number of images in the training-set. 72 | # This is used to pre-allocate arrays for efficiency. 73 | _num_images_train = 50000 74 | 75 | ######################################################################## 76 | # Private functions for downloading, unpacking and loading data-files. 77 | 78 | 79 | def _get_file_path(filename=""): 80 | """ 81 | Return the full path of a data-file for the data-set. 82 | If filename=="" then return the directory of the files. 83 | """ 84 | 85 | return os.path.join(data_path, "cifar-100-python/", filename) 86 | 87 | 88 | def _unpickle(filename): 89 | """ 90 | Unpickle the given file and return the data. 91 | Note that the appropriate dir-name is prepended the filename. 92 | """ 93 | 94 | # Create full path for the file. 95 | file_path = _get_file_path(filename) 96 | 97 | print("Loading data: " + file_path) 98 | 99 | with open(file_path, mode='rb') as file: 100 | # In Python 3.X it is important to set the encoding, 101 | # otherwise an exception is raised here. 102 | data = pickle.load(file, encoding='bytes') 103 | 104 | return data 105 | 106 | 107 | def _convert_images(raw): 108 | """ 109 | Convert images from the CIFAR-100 format and 110 | return a 4-dim array with shape: [image_number, height, width, channel] 111 | where the pixels are floats between 0.0 and 1.0. 112 | """ 113 | 114 | # Convert the raw images from the data-files to floating-points. 115 | raw_float = np.array(raw, dtype=float) / 255.0 116 | 117 | # Reshape the array to 4-dimensions. 118 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 119 | 120 | # Reorder the indices of the array. 121 | images = images.transpose([0, 2, 3, 1]) 122 | 123 | return images 124 | 125 | 126 | def _load_data(filename): 127 | """ 128 | Load a pickled data-file from the CIFAR-10 data-set 129 | and return the converted images (see above) and the class-number 130 | for each image. 131 | """ 132 | 133 | # Load the pickled data-file. 134 | data = _unpickle(filename) 135 | 136 | # Get the raw images. 137 | raw_images = data[b'data'] 138 | 139 | # Get the class-numbers for each image. Convert to numpy-array. 140 | cls = np.array(data[b'fine_labels']) 141 | 142 | # Convert the images. 143 | images = _convert_images(raw_images) 144 | 145 | return images, cls 146 | 147 | 148 | ######################################################################## 149 | # Public functions that you may call to download the data-set from 150 | # the internet and load the data into memory. 151 | 152 | 153 | def maybe_download_and_extract(): 154 | """ 155 | Download and extract the CIFAR-10 data-set if it doesn't already exist 156 | in data_path (set this variable first to the desired path). 157 | """ 158 | 159 | download.maybe_download_and_extract(url=data_url, download_dir=data_path) 160 | 161 | 162 | def load_class_names(): 163 | """ 164 | Load the names for the fine classes in the CIFAR-100 data-set. 165 | Returns a list with the names. Example: names[3] is the name 166 | associated with class-number 3. 167 | """ 168 | 169 | # Load the class-names from the pickled file. 170 | raw = _unpickle(filename="meta")[b'fine_label_names'] 171 | 172 | # Convert from binary strings. 173 | names = [x.decode('utf-8') for x in raw] 174 | 175 | return names 176 | 177 | 178 | def load_training_data(): 179 | """ 180 | Load all the training-data for the CIFAR-100 data-set. 181 | Returns the images, class-numbers and one-hot encoded class-labels. 182 | """ 183 | 184 | # Load the images and class-numbers from the data-file. 185 | images, cls = _load_data(filename="train") 186 | 187 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 188 | 189 | 190 | def load_test_data(): 191 | """ 192 | Load all the test-data for the CIFAR-100 data-set. 193 | Returns the images, class-numbers and one-hot encoded class-labels. 194 | """ 195 | 196 | images, cls = _load_data(filename="test") 197 | 198 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 199 | 200 | ######################################################################## -------------------------------------------------------------------------------- /utils/download.py: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Minor changes for it to work with my repo - Imanol Schlag 4 | # 5 | ######################################################################## 6 | # 7 | # Functions for downloading and extracting data-files from the internet. 8 | # 9 | # Implemented in Python 3.5 10 | # 11 | ######################################################################## 12 | # 13 | # This file is part of the TensorFlow Tutorials available at: 14 | # 15 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 16 | # 17 | # Published under the MIT License. See the file LICENSE for details. 18 | # 19 | # Copyright 2016 by Magnus Erik Hvass Pedersen 20 | # 21 | ######################################################################## 22 | 23 | import sys 24 | import os 25 | import urllib.request 26 | import tarfile 27 | import numpy as np 28 | 29 | ######################################################################## 30 | 31 | def _print_download_progress(count, block_size, total_size): 32 | """ 33 | Function used for printing the download progress. 34 | Used as a call-back function in maybe_download_and_extract(). 35 | """ 36 | 37 | # Percentage completion. 38 | pct_complete = float(count * block_size) / total_size 39 | 40 | # Status-message. Note the \r which means the line should overwrite itself. 41 | msg = "\r- Download progress: {0:.1%}".format(pct_complete) 42 | 43 | # Print it. 44 | sys.stdout.write(msg) 45 | sys.stdout.flush() 46 | 47 | 48 | ######################################################################## 49 | 50 | def maybe_download_and_extract(url, download_dir): 51 | """ 52 | Download and extract the data if it doesn't already exist. 53 | Assumes the url is a tar-ball file. 54 | :param url: 55 | Internet URL for the tar-file to download. 56 | Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 57 | :param download_dir: 58 | Directory where the downloaded file is saved. 59 | Example: "data/CIFAR-10/" 60 | :return: 61 | Nothing. 62 | """ 63 | 64 | # Filename for saving the file downloaded from the internet. 65 | # Use the filename from the URL and add it to the download_dir. 66 | filename = url.split('/')[-1] 67 | file_path = os.path.join(download_dir, filename) 68 | 69 | # Check if the download directory exists, otherwise create it. 70 | # If it exists then we assume it has also been extracted, 71 | if not os.path.exists(download_dir): 72 | os.makedirs(download_dir) 73 | 74 | # Download the file from the internet. 75 | file_path, _ = urllib.request.urlretrieve(url=url, 76 | filename=file_path, 77 | reporthook=_print_download_progress) 78 | 79 | print() 80 | print("Download finished. Extracting files.") 81 | 82 | # Unpack the tar-ball. 83 | tarfile.open(name=file_path, mode="r:gz").extractall(download_dir) 84 | print("Extracting finished. Cleaning up.") 85 | os.remove(file_path) 86 | print("Done.") 87 | return True 88 | else: 89 | print("Data has apparently already been downloaded and unpacked.") 90 | return False 91 | 92 | ######################################################################## 93 | 94 | def maybe_download(url, download_dir): 95 | """ 96 | Download and extract the data if it doesn't already exist. 97 | Assumes the url is a tar-ball file. 98 | :param url: 99 | Internet URL for the tar-file to download. 100 | Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 101 | :param download_dir: 102 | Directory where the downloaded file is saved. 103 | Example: "data/CIFAR-10/" 104 | :return: 105 | Nothing. 106 | """ 107 | 108 | # Filename for saving the file downloaded from the internet. 109 | # Use the filename from the URL and add it to the download_dir. 110 | filename = url.split('/')[-1] 111 | file_path = os.path.join(download_dir, filename) 112 | 113 | # Check if the file already exists. 114 | # If it exists then we assume it has also been extracted, 115 | # otherwise we need to download and extract it now. 116 | if not os.path.exists(file_path): 117 | # Check if the download directory exists, otherwise create it. 118 | if not os.path.exists(download_dir): 119 | os.makedirs(download_dir) 120 | 121 | # Download the file from the internet. 122 | file_path, _ = urllib.request.urlretrieve(url=url, 123 | filename=file_path, 124 | reporthook=_print_download_progress) 125 | 126 | print() 127 | print("Download finished.") 128 | print("Done.") 129 | return True 130 | else: 131 | print("Data has apparently already been downloaded and unpacked.") 132 | return False 133 | 134 | ######################################################################## 135 | 136 | def one_hot_encoded(class_numbers, num_classes=None): 137 | """ 138 | Generate the One-Hot encoded class-labels from an array of integers. 139 | 140 | For example, if class_number=2 and num_classes=4 then 141 | the one-hot encoded label is the float array: [0. 0. 1. 0.] 142 | 143 | :param class_numbers: 144 | Array of integers with class-numbers. 145 | Assume the integers are from zero to num_classes-1 inclusive. 146 | 147 | :param num_classes: 148 | Number of classes. If None then use max(cls)-1. 149 | 150 | :return: 151 | 2-dim array of shape: [len(cls), num_classes] 152 | """ 153 | 154 | # Find the number of classes if None is provided. 155 | if num_classes is None: 156 | num_classes = np.max(class_numbers) - 1 157 | 158 | return np.eye(num_classes, dtype=float)[class_numbers] -------------------------------------------------------------------------------- /utils/download.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ischlag/tensorflow-input-pipelines/9cfe275b100e7d5cbc34a0e53b59a0966f598906/utils/download.pyc -------------------------------------------------------------------------------- /utils/imagenet.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: Functions for loading the imagenet image paths and labels into memory. 4 | # Date: 11.2016 5 | # 6 | # In order to download the imagenet data you need to look at 7 | # utils/imagenet_download/run_me.sh 8 | # 9 | 10 | import tensorflow as tf 11 | import random 12 | import os 13 | 14 | train_dir = "data/imagenet/train/" 15 | validation_dir = "data/imagenet/validation/" 16 | labels_file = "data/imagenet/imagenet_lsvrc_2015_synsets.txt" 17 | metadata_file = "data/imagenet/imagenet_metadata.txt" 18 | bounding_box_file = "data/imagenet/imagenet_2012_bounding_boxes.csv" 19 | 20 | ############################################################################### 21 | # Some TensorFlow Inception functions (ported to python3) 22 | # source: https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py 23 | 24 | def _find_image_files(data_dir, labels_file): 25 | """Build a list of all images files and labels in the data set. 26 | Args: 27 | data_dir: string, path to the root directory of images. 28 | Assumes that the ImageNet data set resides in JPEG files located in 29 | the following directory structure. 30 | data_dir/n01440764/ILSVRC2012_val_00000293.JPEG 31 | data_dir/n01440764/ILSVRC2012_val_00000543.JPEG 32 | where 'n01440764' is the unique synset label associated with these images. 33 | labels_file: string, path to the labels file. 34 | The list of valid labels are held in this file. Assumes that the file 35 | contains entries as such: 36 | n01440764 37 | n01443537 38 | n01484850 39 | where each line corresponds to a label expressed as a synset. We map 40 | each synset contained in the file to an integer (based on the alphabetical 41 | ordering) starting with the integer 1 corresponding to the synset 42 | contained in the first line. 43 | The reason we start the integer labels at 1 is to reserve label 0 as an 44 | unused background class. 45 | Returns: 46 | filenames: list of strings; each string is a path to an image file. 47 | synsets: list of strings; each string is a unique WordNet ID. 48 | labels: list of integer; each integer identifies the ground truth. 49 | """ 50 | print('Determining list of input files and labels from %s.' % data_dir) 51 | challenge_synsets = [l.strip() for l in 52 | tf.gfile.FastGFile(labels_file, 'r').readlines()] 53 | 54 | labels = [] 55 | filenames = [] 56 | synsets = [] 57 | 58 | # Leave label index 0 empty as a background class. 59 | label_index = 1 60 | 61 | # Construct the list of JPEG files and labels. 62 | for synset in challenge_synsets: 63 | jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) 64 | matching_files = tf.gfile.Glob(jpeg_file_path) 65 | 66 | labels.extend([label_index] * len(matching_files)) 67 | synsets.extend([synset] * len(matching_files)) 68 | filenames.extend(matching_files) 69 | 70 | if not label_index % 100: 71 | print('Finished finding files in %d of %d classes.' % ( 72 | label_index, len(challenge_synsets))) 73 | label_index += 1 74 | 75 | # Shuffle the ordering of all image files in order to guarantee 76 | # random ordering of the images with respect to label in the 77 | # saved TFRecord files. Make the randomization repeatable. 78 | shuffled_index = list(range(len(filenames))) 79 | random.seed(12345) 80 | random.shuffle(shuffled_index) 81 | 82 | filenames = [filenames[i] for i in shuffled_index] 83 | synsets = [synsets[i] for i in shuffled_index] 84 | labels = [labels[i] for i in shuffled_index] 85 | 86 | print('Found %d JPEG files across %d labels inside %s.' % 87 | (len(filenames), len(challenge_synsets), data_dir)) 88 | return filenames, synsets, labels 89 | 90 | def _find_human_readable_labels(synsets, synset_to_human): 91 | """Build a list of human-readable labels. 92 | Args: 93 | synsets: list of strings; each string is a unique WordNet ID. 94 | synset_to_human: dict of synset to human labels, e.g., 95 | 'n02119022' --> 'red fox, Vulpes vulpes' 96 | Returns: 97 | List of human-readable strings corresponding to each synset. 98 | """ 99 | humans = [] 100 | for s in synsets: 101 | assert s in synset_to_human, ('Failed to find: %s' % s) 102 | humans.append(synset_to_human[s]) 103 | return humans 104 | 105 | def _find_image_bounding_boxes(filenames, image_to_bboxes): 106 | """Find the bounding boxes for a given image file. 107 | Args: 108 | filenames: list of strings; each string is a path to an image file. 109 | image_to_bboxes: dictionary mapping image file names to a list of 110 | bounding boxes. This list contains 0+ bounding boxes. 111 | Returns: 112 | List of bounding boxes for each image. Note that each entry in this 113 | list might contain from 0+ entries corresponding to the number of bounding 114 | box annotations for the image. 115 | """ 116 | num_image_bbox = 0 117 | bboxes = [] 118 | for f in filenames: 119 | basename = os.path.basename(f) 120 | if basename in image_to_bboxes: 121 | bboxes.append(image_to_bboxes[basename]) 122 | num_image_bbox += 1 123 | else: 124 | bboxes.append([]) 125 | print('Found %d images with bboxes out of %d images' % ( 126 | num_image_bbox, len(filenames))) 127 | return bboxes 128 | 129 | def _build_synset_lookup(imagenet_metadata_file): 130 | """Build lookup for synset to human-readable label. 131 | Args: 132 | imagenet_metadata_file: string, path to file containing mapping from 133 | synset to human-readable label. 134 | Assumes each line of the file looks like: 135 | n02119247 black fox 136 | n02119359 silver fox 137 | n02119477 red fox, Vulpes fulva 138 | where each line corresponds to a unique mapping. Note that each line is 139 | formatted as \t. 140 | Returns: 141 | Dictionary of synset to human labels, such as: 142 | 'n02119022' --> 'red fox, Vulpes vulpes' 143 | """ 144 | lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() 145 | synset_to_human = {} 146 | for l in lines: 147 | if l: 148 | parts = l.strip().split('\t') 149 | assert len(parts) == 2 150 | synset = parts[0] 151 | human = parts[1] 152 | synset_to_human[synset] = human 153 | return synset_to_human 154 | 155 | def _build_bounding_box_lookup(bounding_box_file): 156 | """Build a lookup from image file to bounding boxes. 157 | Args: 158 | bounding_box_file: string, path to file with bounding boxes annotations. 159 | Assumes each line of the file looks like: 160 | n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940 161 | where each line corresponds to one bounding box annotation associated 162 | with an image. Each line can be parsed as: 163 | , , , , 164 | Note that there might exist mulitple bounding box annotations associated 165 | with an image file. This file is the output of process_bounding_boxes.py. 166 | Returns: 167 | Dictionary mapping image file names to a list of bounding boxes. This list 168 | contains 0+ bounding boxes. 169 | """ 170 | lines = tf.gfile.FastGFile(bounding_box_file, 'r').readlines() 171 | images_to_bboxes = {} 172 | num_bbox = 0 173 | num_image = 0 174 | for l in lines: 175 | if l: 176 | parts = l.split(',') 177 | assert len(parts) == 5, ('Failed to parse: %s' % l) 178 | filename = parts[0] 179 | xmin = float(parts[1]) 180 | ymin = float(parts[2]) 181 | xmax = float(parts[3]) 182 | ymax = float(parts[4]) 183 | box = [xmin, ymin, xmax, ymax] 184 | 185 | if filename not in images_to_bboxes: 186 | images_to_bboxes[filename] = [] 187 | num_image += 1 188 | images_to_bboxes[filename].append(box) 189 | num_bbox += 1 190 | 191 | print('Successfully read %d bounding boxes ' 192 | 'across %d images.' % (num_bbox, num_image)) 193 | return images_to_bboxes 194 | 195 | ############################################################################### 196 | 197 | class imagenet_data: 198 | synset_to_human = _build_synset_lookup(metadata_file) 199 | image_to_bboxes = _build_bounding_box_lookup(bounding_box_file) 200 | 201 | val_filenames, val_synsets, val_labels = _find_image_files(validation_dir, labels_file) 202 | train_filenames, train_synsets, train_labels = _find_image_files(train_dir, labels_file) 203 | humans = _find_human_readable_labels(val_synsets, synset_to_human) 204 | 205 | def check_if_downloaded(): 206 | if os.path.exists(train_dir): 207 | print("Train directory seems to exist") 208 | else: 209 | raise Exception("Train directory doesn't seem to exist.") 210 | 211 | if os.path.exists(validation_dir): 212 | print("Validation directory seems to exist") 213 | else: 214 | raise Exception("Validation directory doesn't seem to exist.") 215 | 216 | 217 | def load_class_names(): 218 | return data.humans 219 | 220 | def load_training_data(): 221 | return data.train_filenames, data.train_labels 222 | 223 | def load_test_data(): 224 | return data.val_filenames, data.val_labels 225 | 226 | data = imagenet_data() -------------------------------------------------------------------------------- /utils/imagenet_download/preprocess_imagenet_validation_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Process the ImageNet Challenge bounding boxes for TensorFlow model training. 17 | 18 | Associate the ImageNet 2012 Challenge validation data set with labels. 19 | 20 | The raw ImageNet validation data set is expected to reside in JPEG files 21 | located in the following directory structure. 22 | 23 | data_dir/ILSVRC2012_val_00000001.JPEG 24 | data_dir/ILSVRC2012_val_00000002.JPEG 25 | ... 26 | data_dir/ILSVRC2012_val_00050000.JPEG 27 | 28 | This script moves the files into a directory structure like such: 29 | data_dir/n01440764/ILSVRC2012_val_00000293.JPEG 30 | data_dir/n01440764/ILSVRC2012_val_00000543.JPEG 31 | ... 32 | where 'n01440764' is the unique synset label associated with 33 | these images. 34 | 35 | This directory reorganization requires a mapping from validation image 36 | number (i.e. suffix of the original file) to the associated label. This 37 | is provided in the ImageNet development kit via a Matlab file. 38 | 39 | In order to make life easier and divorce ourselves from Matlab, we instead 40 | supply a custom text file that provides this mapping for us. 41 | 42 | Sample usage: 43 | ./preprocess_imagenet_validation_data.py ILSVRC2012_img_val \ 44 | imagenet_2012_validation_synset_labels.txt 45 | """ 46 | 47 | from __future__ import absolute_import 48 | from __future__ import division 49 | from __future__ import print_function 50 | 51 | import os 52 | import os.path 53 | import sys 54 | 55 | 56 | if __name__ == '__main__': 57 | if len(sys.argv) < 3: 58 | print('Invalid usage\n' 59 | 'usage: preprocess_imagenet_validation_data.py ' 60 | ' ') 61 | sys.exit(-1) 62 | data_dir = sys.argv[1] 63 | validation_labels_file = sys.argv[2] 64 | 65 | # Read in the 50000 synsets associated with the validation data set. 66 | labels = [l.strip() for l in open(validation_labels_file).readlines()] 67 | unique_labels = set(labels) 68 | 69 | # Make all sub-directories in the validation data dir. 70 | for label in unique_labels: 71 | labeled_data_dir = os.path.join(data_dir, label) 72 | os.makedirs(labeled_data_dir) 73 | 74 | # Move all of the image to the appropriate sub-directory. 75 | for i in range(len(labels)): 76 | basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1) 77 | original_filename = os.path.join(data_dir, basename) 78 | if not os.path.exists(original_filename): 79 | print('Failed to find: ' % original_filename) 80 | sys.exit(-1) 81 | new_filename = os.path.join(data_dir, labels[i], basename) 82 | os.rename(original_filename, new_filename) 83 | -------------------------------------------------------------------------------- /utils/imagenet_download/process_bounding_boxes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Process the ImageNet Challenge bounding boxes for TensorFlow model training. 17 | 18 | This script is called as 19 | 20 | process_bounding_boxes.py [synsets-file] 21 | 22 | Where is a directory containing the downloaded and unpacked bounding box 23 | data. If [synsets-file] is supplied, then only the bounding boxes whose 24 | synstes are contained within this file are returned. Note that the 25 | [synsets-file] file contains synset ids, one per line. 26 | 27 | The script dumps out a CSV text file in which each line contains an entry. 28 | n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940 29 | 30 | The entry can be read as: 31 | , , , , 32 | 33 | The bounding box for contains two points (xmin, ymin) and 34 | (xmax, ymax) specifying the lower-left corner and upper-right corner of a 35 | bounding box in *relative* coordinates. 36 | 37 | The user supplies a directory where the XML files reside. The directory 38 | structure in the directory is assumed to look like this: 39 | 40 | /nXXXXXXXX/nXXXXXXXX_YYYY.xml 41 | 42 | Each XML file contains a bounding box annotation. The script: 43 | 44 | (1) Parses the XML file and extracts the filename, label and bounding box info. 45 | 46 | (2) The bounding box is specified in the XML files as integer (xmin, ymin) and 47 | (xmax, ymax) *relative* to image size displayed to the human annotator. The 48 | size of the image displayed to the human annotator is stored in the XML file 49 | as integer (height, width). 50 | 51 | Note that the displayed size will differ from the actual size of the image 52 | downloaded from image-net.org. To make the bounding box annotation useable, 53 | we convert bounding box to floating point numbers relative to displayed 54 | height and width of the image. 55 | 56 | Note that each XML file might contain N bounding box annotations. 57 | 58 | Note that the points are all clamped at a range of [0.0, 1.0] because some 59 | human annotations extend outside the range of the supplied image. 60 | 61 | See details here: http://image-net.org/download-bboxes 62 | 63 | (3) By default, the script outputs all valid bounding boxes. If a 64 | [synsets-file] is supplied, only the subset of bounding boxes associated 65 | with those synsets are outputted. Importantly, one can supply a list of 66 | synsets in the ImageNet Challenge and output the list of bounding boxes 67 | associated with the training images of the ILSVRC. 68 | 69 | We use these bounding boxes to inform the random distortion of images 70 | supplied to the network. 71 | 72 | If you run this script successfully, you will see the following output 73 | to stderr: 74 | > Finished processing 544546 XML files. 75 | > Skipped 0 XML files not in ImageNet Challenge. 76 | > Skipped 0 bounding boxes not in ImageNet Challenge. 77 | > Wrote 615299 bounding boxes from 544546 annotated images. 78 | """ 79 | 80 | from __future__ import absolute_import 81 | from __future__ import division 82 | from __future__ import print_function 83 | 84 | import glob 85 | import os.path 86 | import sys 87 | import xml.etree.ElementTree as ET 88 | 89 | 90 | class BoundingBox(object): 91 | pass 92 | 93 | 94 | def GetItem(name, root, index=0): 95 | count = 0 96 | for item in root.iter(name): 97 | if count == index: 98 | return item.text 99 | count += 1 100 | # Failed to find "index" occurrence of item. 101 | return -1 102 | 103 | 104 | def GetInt(name, root, index=0): 105 | return int(GetItem(name, root, index)) 106 | 107 | 108 | def FindNumberBoundingBoxes(root): 109 | index = 0 110 | while True: 111 | if GetInt('xmin', root, index) == -1: 112 | break 113 | index += 1 114 | return index 115 | 116 | 117 | def ProcessXMLAnnotation(xml_file): 118 | """Process a single XML file containing a bounding box.""" 119 | # pylint: disable=broad-except 120 | try: 121 | tree = ET.parse(xml_file) 122 | except Exception: 123 | print('Failed to parse: ' + xml_file, file=sys.stderr) 124 | return None 125 | # pylint: enable=broad-except 126 | root = tree.getroot() 127 | 128 | num_boxes = FindNumberBoundingBoxes(root) 129 | boxes = [] 130 | 131 | for index in range(num_boxes): 132 | box = BoundingBox() 133 | # Grab the 'index' annotation. 134 | box.xmin = GetInt('xmin', root, index) 135 | box.ymin = GetInt('ymin', root, index) 136 | box.xmax = GetInt('xmax', root, index) 137 | box.ymax = GetInt('ymax', root, index) 138 | 139 | box.width = GetInt('width', root) 140 | box.height = GetInt('height', root) 141 | box.filename = GetItem('filename', root) + '.JPEG' 142 | box.label = GetItem('name', root) 143 | 144 | xmin = float(box.xmin) / float(box.width) 145 | xmax = float(box.xmax) / float(box.width) 146 | ymin = float(box.ymin) / float(box.height) 147 | ymax = float(box.ymax) / float(box.height) 148 | 149 | # Some images contain bounding box annotations that 150 | # extend outside of the supplied image. See, e.g. 151 | # n03127925/n03127925_147.xml 152 | # Additionally, for some bounding boxes, the min > max 153 | # or the box is entirely outside of the image. 154 | min_x = min(xmin, xmax) 155 | max_x = max(xmin, xmax) 156 | box.xmin_scaled = min(max(min_x, 0.0), 1.0) 157 | box.xmax_scaled = min(max(max_x, 0.0), 1.0) 158 | 159 | min_y = min(ymin, ymax) 160 | max_y = max(ymin, ymax) 161 | box.ymin_scaled = min(max(min_y, 0.0), 1.0) 162 | box.ymax_scaled = min(max(max_y, 0.0), 1.0) 163 | 164 | boxes.append(box) 165 | 166 | return boxes 167 | 168 | if __name__ == '__main__': 169 | if len(sys.argv) < 2 or len(sys.argv) > 3: 170 | print('Invalid usage\n' 171 | 'usage: process_bounding_boxes.py [synsets-file]', 172 | file=sys.stderr) 173 | sys.exit(-1) 174 | 175 | xml_files = glob.glob(sys.argv[1] + '/*/*.xml') 176 | print('Identified %d XML files in %s' % (len(xml_files), sys.argv[1]), 177 | file=sys.stderr) 178 | 179 | if len(sys.argv) == 3: 180 | labels = set([l.strip() for l in open(sys.argv[2]).readlines()]) 181 | print('Identified %d synset IDs in %s' % (len(labels), sys.argv[2]), 182 | file=sys.stderr) 183 | else: 184 | labels = None 185 | 186 | skipped_boxes = 0 187 | skipped_files = 0 188 | saved_boxes = 0 189 | saved_files = 0 190 | for file_index, one_file in enumerate(xml_files): 191 | # Example: <...>/n06470073/n00141669_6790.xml 192 | label = os.path.basename(os.path.dirname(one_file)) 193 | 194 | # Determine if the annotation is from an ImageNet Challenge label. 195 | if labels is not None and label not in labels: 196 | skipped_files += 1 197 | continue 198 | 199 | bboxes = ProcessXMLAnnotation(one_file) 200 | assert bboxes is not None, 'No bounding boxes found in ' + one_file 201 | 202 | found_box = False 203 | for bbox in bboxes: 204 | if labels is not None: 205 | if bbox.label != label: 206 | # Note: There is a slight bug in the bounding box annotation data. 207 | # Many of the dog labels have the human label 'Scottish_deerhound' 208 | # instead of the synset ID 'n02092002' in the bbox.label field. As a 209 | # simple hack to overcome this issue, we only exclude bbox labels 210 | # *which are synset ID's* that do not match original synset label for 211 | # the XML file. 212 | if bbox.label in labels: 213 | skipped_boxes += 1 214 | continue 215 | 216 | # Guard against improperly specified boxes. 217 | if (bbox.xmin_scaled >= bbox.xmax_scaled or 218 | bbox.ymin_scaled >= bbox.ymax_scaled): 219 | skipped_boxes += 1 220 | continue 221 | 222 | # Note bbox.filename occasionally contains '%s' in the name. This is 223 | # data set noise that is fixed by just using the basename of the XML file. 224 | image_filename = os.path.splitext(os.path.basename(one_file))[0] 225 | print('%s.JPEG,%.4f,%.4f,%.4f,%.4f' % 226 | (image_filename, 227 | bbox.xmin_scaled, bbox.ymin_scaled, 228 | bbox.xmax_scaled, bbox.ymax_scaled)) 229 | 230 | saved_boxes += 1 231 | found_box = True 232 | if found_box: 233 | saved_files += 1 234 | else: 235 | skipped_files += 1 236 | 237 | if not file_index % 5000: 238 | print('--> processed %d of %d XML files.' % 239 | (file_index + 1, len(xml_files)), 240 | file=sys.stderr) 241 | print('--> skipped %d boxes and %d XML files.' % 242 | (skipped_boxes, skipped_files), file=sys.stderr) 243 | 244 | print('Finished processing %d XML files.' % len(xml_files), file=sys.stderr) 245 | print('Skipped %d XML files not in ImageNet Challenge.' % skipped_files, 246 | file=sys.stderr) 247 | print('Skipped %d bounding boxes not in ImageNet Challenge.' % skipped_boxes, 248 | file=sys.stderr) 249 | print('Wrote %d bounding boxes from %d annotated images.' % 250 | (saved_boxes, saved_files), 251 | file=sys.stderr) 252 | print('Finished.', file=sys.stderr) 253 | -------------------------------------------------------------------------------- /utils/imagenet_download/run_me.sh: -------------------------------------------------------------------------------- 1 | # Download, extract, and preprocess the imagenet data using TensorFlows imagenet scripts 2 | # The original script didn't work for me because of different reasons. 3 | # Imanol Schlag, 11.2016 4 | # original: https://github.com/tensorflow/models/blob/master/inception/inception/data/download_imagenet.sh 5 | # 6 | # Size Info: 7 | # ILSVRC2012_img_train.tar is about 147.9 GB 8 | # ILSVRC2012_img_val.tar is about 6.7 GB 9 | # bounding_boxes/ is about 324.5 MB 10 | # 11 | # Usage: 12 | # Copy this shell script and all python scripts in the same folder into your imagenet data folder. 13 | # Run this shell script. 14 | 15 | # download bounding boxes 16 | OUTDIR="../../data/imagenet" 17 | echo "Saving imagenet data to $OUTDIR" 18 | mkdir -p "${OUTDIR}" 19 | wget -nc "http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_bbox_train_v2.tar.gz" -O "${OUTDIR}/bounding_boxes/annotations.tar.gz" 20 | 21 | # extract bounding box annotations 22 | tar xzf "${OUTDIR}/bounding_boxes/annotations.tar.gz" -C "${OUTDIR}/bounding_boxes" 23 | 24 | # download validation data 25 | wget -nd -c "http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar" -O "${OUTDIR}/ILSVRC2012_img_val.tar" 26 | 27 | # extract validation data 28 | mkdir -p "${OUTDIR}/validation/" 29 | tar xf "${OUTDIR}/ILSVRC2012_img_val.tar" -C "${OUTDIR}/validation/" 30 | 31 | # download train data 32 | wget -nd -c "http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar" -O "${OUTDIR}/ILSVRC2012_img_train.tar" 33 | 34 | # extract individual train tar files 35 | SYNSET_FILE="${OUTDIR}/imagenet_lsvrc_2015_synsets.txt" 36 | wget -N "https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_lsvrc_2015_synsets.txt" -O "${SYNSET_FILE}" 37 | while read SYNSET; do 38 | echo "Processing: ${SYNSET}" 39 | mkdir -p "${OUTDIR}/train/${SYNSET}" 40 | rm -rf "${OUTDIR}/train/${SYNSET}/*" 41 | 42 | tar xf "${OUTDIR}/ILSVRC2012_img_train.tar" "${SYNSET}.tar" 43 | tar xf "${SYNSET}.tar" -C "${OUTDIR}/train/${SYNSET}/" 44 | rm -f "${SYNSET}.tar" 45 | 46 | echo "Finished processing: ${SYNSET}" 47 | done < "${SYNSET_FILE}" 48 | 49 | # put validation data into directories just as the training data 50 | wget -N "https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_2012_validation_synset_labels.txt" -O "${OUTDIR}/imagenet_2012_validation_synset_labels.txt" 51 | python preprocess_imagenet_validation_data.py "${OUTDIR}/validation/" "${OUTDIR}/imagenet_2012_validation_synset_labels.txt" 52 | 53 | # extract bounding box infor into an xml file 54 | python process_bounding_boxes.py "${OUTDIR}/bounding_boxes/" "${OUTDIR}/imagenet_lsvrc_2015_synsets.txt" 55 | mv imagenet_2012_bounding_boxes.csv "${OUTDIR}/imagenet_2012_bounding_boxes.csv" 56 | # download the metadata text file 57 | wget -N "https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_metadata.txt" -O "${OUTDIR}/imagenet_metadata.txt" 58 | 59 | -------------------------------------------------------------------------------- /utils/penn_treebank.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: downloads and loads the penn treebank dataset into memory. 4 | # Date: 11.2016 5 | # 6 | # 7 | 8 | import os 9 | import tensorflow as tf 10 | import collections 11 | 12 | from utils import download 13 | 14 | ######################################################################## 15 | 16 | # Directory where you want to download and save the data-set. 17 | # Set this before you start calling any of the functions below. 18 | data_path = "data/penn_treebank/" 19 | 20 | # URL for the data-set on the internet. 21 | data_url = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz" 22 | 23 | train_data_file = data_path + "ptb.char.train.txt" 24 | test_data_file = data_path + "ptb.char.test.txt" 25 | valid_data_file = data_path + "ptb.char.valid.txt" 26 | 27 | ######################################################################## 28 | # some useful functions from TensorFlow 29 | 30 | def _read_words(filename): 31 | with tf.gfile.GFile(filename, "r") as f: 32 | return f.read().decode("utf-8").replace("\n", "").split() 33 | 34 | def _build_vocab(filename): 35 | data = _read_words(filename) 36 | 37 | counter = collections.Counter(data) 38 | count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) 39 | 40 | words, _ = list(zip(*count_pairs)) 41 | word_to_id = dict(zip(words, range(len(words)))) 42 | 43 | return word_to_id 44 | 45 | def _file_to_word_ids(filename, word_to_id): 46 | data = _read_words(filename) 47 | return [word_to_id[word] for word in data if word in word_to_id] 48 | 49 | ######################################################################## 50 | 51 | def download_data(): 52 | """Download the penn treebank data if it doesn't exist yet.""" 53 | if download.maybe_download_and_extract(url=data_url, download_dir=data_path): 54 | os.system("mv " + data_path + "/simple-examples/data/* " + data_path) 55 | os.system("rm -r " + data_path + "/simple-examples") 56 | 57 | def load_training_data(): 58 | word_to_id = _build_vocab(train_data_file) 59 | train_data = _file_to_word_ids(train_data_file, word_to_id) 60 | vocabulary = len(word_to_id) 61 | return train_data, vocabulary 62 | 63 | def load_test_data(): 64 | word_to_id = _build_vocab(train_data_file) 65 | train_data = _file_to_word_ids(test_data_file, word_to_id) 66 | vocabulary = len(word_to_id) 67 | return train_data, vocabulary 68 | 69 | def load_validation_data(): 70 | word_to_id = _build_vocab(train_data_file) 71 | train_data = _file_to_word_ids(valid_data_file, word_to_id) 72 | vocabulary = len(word_to_id) 73 | return train_data, vocabulary 74 | 75 | -------------------------------------------------------------------------------- /utils/svhn.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Author: Imanol Schlag (more info on ischlag.github.io) 3 | # Description: Functions to download and load SVHN into memory. 4 | # Date: 10.11.2016 5 | # 6 | # 7 | 8 | from utils import download 9 | import scipy.io 10 | 11 | data_path = "data/SVHN/" 12 | 13 | data_url = 'http://ufldl.stanford.edu/housenumbers/' 14 | train_data = 'train_32x32.mat' 15 | test_data = 'test_32x32.mat' 16 | extra_data = 'extra_32x32.mat' 17 | 18 | num_classes = 10 19 | 20 | def download_data(): 21 | """Download the SVHN data if it doesn't exist yet.""" 22 | 23 | download.maybe_download(url=data_url + train_data, download_dir=data_path) 24 | download.maybe_download(url=data_url + test_data, download_dir=data_path) 25 | download.maybe_download(url=data_url + extra_data, download_dir=data_path) 26 | 27 | def load_training_data(): 28 | """ 29 | Load all the training-data for the SVHN data-set. 30 | Returns the images, class-numbers and one-hot encoded class-labels. 31 | """ 32 | 33 | train_data = scipy.io.loadmat(data_path + 'train_32x32.mat', variable_names='X').get('X') 34 | train_labels = scipy.io.loadmat(data_path + 'train_32x32.mat', variable_names='y').get('y') 35 | 36 | images = train_data.transpose((3,0,1,2)) / 255.0 37 | cls = train_labels[:, 0] 38 | cls[cls == 10] = 0 39 | 40 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 41 | 42 | def load_test_data(): 43 | """ 44 | Load all the test-data for the SVHN data-set. 45 | Returns the images, class-numbers and one-hot encoded class-labels. 46 | """ 47 | 48 | test_data = scipy.io.loadmat(data_path + 'test_32x32.mat', variable_names='X').get('X') 49 | test_labels = scipy.io.loadmat(data_path + 'test_32x32.mat', variable_names='y').get('y') 50 | 51 | images = test_data.transpose((3, 0, 1, 2)) / 255.0 52 | cls = test_labels[:, 0] 53 | cls[cls == 10] = 0 54 | 55 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 56 | 57 | 58 | def load_extra_data(): 59 | extra_data = scipy.io.loadmat(data_path + 'extra_32x32.mat', variable_names='X').get('X') 60 | extra_labels = scipy.io.loadmat(data_path + 'extra_32x32.mat', variable_names='y').get('y') 61 | 62 | images = extra_data.transpose((3,0,1,2)) / 255.0 63 | cls = extra_labels[:, 0] 64 | cls[cls == 10] = 0 65 | 66 | return images, cls, download.one_hot_encoded(class_numbers=cls, num_classes=num_classes) 67 | 68 | 69 | --------------------------------------------------------------------------------