├── .gitignore ├── LICENSE ├── README.md ├── litterbox ├── __init__.py ├── data │ ├── __init__.py │ ├── build_image_data.py │ ├── build_imagenet_data.py │ ├── imagenet_2012_validation_synset_labels.txt │ ├── imagenet_lsvrc_2015_synsets.txt │ ├── imagenet_metadata.txt │ ├── labels_file.txt │ ├── preprocess_imagenet_validation_data.py │ └── process_bounding_boxes.py ├── example_predict.py ├── fabric │ ├── __init__.py │ ├── dataset.py │ ├── dataset_file.py │ ├── dataset_record.py │ ├── exec_eval.py │ ├── exec_predict.py │ ├── exec_train.py │ ├── feed.py │ ├── image_processing_common.py │ ├── loss.py │ ├── model.py │ ├── opt_param_scheduler.py │ ├── processor.py │ └── util.py ├── feeds │ ├── __init__.py │ └── image │ │ ├── __init__.py │ │ └── feed_image.py ├── imagenet_data.py ├── imagenet_eval.py ├── imagenet_train.py ├── layers │ ├── __init__.py │ ├── compact_bilinear_pooling.py │ ├── compact_bilinear_pooling_test.py │ ├── lstm.py │ └── preact_conv.py ├── models │ ├── __init__.py │ ├── google │ │ ├── __init__.py │ │ ├── model_google_slim.py │ │ └── nets │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── alexnet.py │ │ │ ├── alexnet_test.py │ │ │ ├── cifarnet.py │ │ │ ├── inception.py │ │ │ ├── inception_resnet_v2.py │ │ │ ├── inception_resnet_v2_test.py │ │ │ ├── inception_utils.py │ │ │ ├── inception_v1.py │ │ │ ├── inception_v1_test.py │ │ │ ├── inception_v2.py │ │ │ ├── inception_v2_test.py │ │ │ ├── inception_v3.py │ │ │ ├── inception_v3_test.py │ │ │ ├── inception_v4.py │ │ │ ├── inception_v4_test.py │ │ │ ├── lenet.py │ │ │ ├── nets_factory.py │ │ │ ├── nets_factory_test.py │ │ │ ├── overfeat.py │ │ │ ├── overfeat_test.py │ │ │ ├── resnet_utils.py │ │ │ ├── resnet_v1.py │ │ │ ├── resnet_v1_test.py │ │ │ ├── resnet_v2.py │ │ │ ├── resnet_v2_test.py │ │ │ ├── vgg.py │ │ │ └── vgg_test.py │ ├── my_slim │ │ ├── __init__.py │ │ ├── build_inception_v4.py │ │ ├── build_resnet.py │ │ ├── build_vgg.py │ │ ├── model_my_slim.py │ │ └── nets_factory.py │ └── sdc │ │ ├── __init__.py │ │ ├── build_inception_resnet_sdc.py │ │ ├── build_nvidia_sdc.py │ │ ├── build_resnet_sdc.py │ │ └── model_sdc.py ├── processors │ ├── __init__.py │ ├── imagenet │ │ ├── __init__.py │ │ ├── image_processing_imagenet.py │ │ ├── parse_proto_imagenet.py │ │ └── processor_imagenet.py │ └── sdc │ │ ├── __init__.py │ │ ├── image_processing_sdc.py │ │ ├── mu_law.py │ │ ├── parse_proto_sdc.py │ │ └── processor_sdc.py ├── sdc_eval.py ├── sdc_export_graph.py ├── sdc_pred.py ├── sdc_run_graph.py └── sdc_train.py └── utils ├── compare_csv.py ├── ensemble_csv.py ├── torch.py └── torchfile.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | output/ 4 | log/ 5 | __pycache__/ 6 | *.py[cod] 7 | *.csv 8 | -------------------------------------------------------------------------------- /litterbox/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /litterbox/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rwightman/tensorflow-litterbox/ddeeb3a6c7de64e5391050ffbb5948feca65ad3c/litterbox/data/__init__.py -------------------------------------------------------------------------------- /litterbox/data/labels_file.txt: -------------------------------------------------------------------------------- 1 | c0 2 | c1 3 | c2 4 | c3 5 | c4 6 | c5 7 | c6 8 | c7 9 | c8 10 | c9 11 | -------------------------------------------------------------------------------- /litterbox/data/preprocess_imagenet_validation_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Process the ImageNet Challenge bounding boxes for TensorFlow model training. 17 | 18 | Associate the ImageNet 2012 Challenge validation data set with labels. 19 | 20 | The raw ImageNet validation data set is expected to reside in JPEG files 21 | located in the following directory structure. 22 | 23 | data_dir/ILSVRC2012_val_00000001.JPEG 24 | data_dir/ILSVRC2012_val_00000002.JPEG 25 | ... 26 | data_dir/ILSVRC2012_val_00050000.JPEG 27 | 28 | This script moves the files into a directory structure like such: 29 | data_dir/n01440764/ILSVRC2012_val_00000293.JPEG 30 | data_dir/n01440764/ILSVRC2012_val_00000543.JPEG 31 | ... 32 | where 'n01440764' is the unique synset label associated with 33 | these images. 34 | 35 | This directory reorganization requires a mapping from validation image 36 | number (i.e. suffix of the original file) to the associated label. This 37 | is provided in the ImageNet development kit via a Matlab file. 38 | 39 | In order to make life easier and divorce ourselves from Matlab, we instead 40 | supply a custom text file that provides this mapping for us. 41 | 42 | Sample usage: 43 | ./preprocess_imagenet_validation_data.py ILSVRC2012_img_val \ 44 | imagenet_2012_validation_synset_labels.txt 45 | """ 46 | 47 | from __future__ import absolute_import 48 | from __future__ import division 49 | from __future__ import print_function 50 | 51 | import os 52 | import os.path 53 | import sys 54 | 55 | 56 | if __name__ == '__main__': 57 | if len(sys.argv) < 3: 58 | print('Invalid usage\n' 59 | 'usage: preprocess_imagenet_validation_data.py ' 60 | ' ') 61 | sys.exit(-1) 62 | data_dir = sys.argv[1] 63 | validation_labels_file = sys.argv[2] 64 | 65 | # Read in the 50000 synsets associated with the validation data set. 66 | labels = [l.strip() for l in open(validation_labels_file).readlines()] 67 | unique_labels = set(labels) 68 | 69 | # Make all sub-directories in the validation data dir. 70 | for label in unique_labels: 71 | labeled_data_dir = os.path.join(data_dir, label) 72 | os.makedirs(labeled_data_dir, exist_ok=True) 73 | 74 | # Move all of the image to the appropriate sub-directory. 75 | for i in range(len(labels)): 76 | basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1) 77 | original_filename = os.path.join(data_dir, basename) 78 | if not os.path.exists(original_filename): 79 | print('Failed to find: %s, dest %s' % (original_filename, labels[i])) 80 | sys.exit(-1) 81 | new_filename = os.path.join(data_dir, labels[i], basename) 82 | os.rename(original_filename, new_filename) 83 | -------------------------------------------------------------------------------- /litterbox/example_predict.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ Predict classes for test data 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import pandas as pd 16 | import numpy as np 17 | import os 18 | 19 | import tensorflow as tf 20 | from fabric import util, exec_predict 21 | from fabric.dataset_file import DatasetFile 22 | from feeds import FeedImagesWithLabels 23 | from processors import ProcessorImagenet 24 | from models import ModelMySlim, ModelGoogleSlim 25 | 26 | FLAGS = tf.app.flags.FLAGS 27 | 28 | tf.app.flags.DEFINE_boolean( 29 | 'output_prob', False, 30 | """Set true to output per-class softmax output probabilities instead of class id.""") 31 | 32 | tf.app.flags.DEFINE_integer( 33 | 'num_classes', 1001, 34 | """Number of class labels""") 35 | 36 | tf.app.flags.DEFINE_integer( 37 | 'output_offset', 0, 38 | """Offset of output prediction. Set to 1 if network trained with background and you want output without.""") 39 | 40 | tf.app.flags.DEFINE_boolean( 41 | 'my', False, 42 | """Enable my variants of the image classification models""") 43 | 44 | tf.app.flags.DEFINE_string( 45 | 'network', 'resnet_v1_50', 46 | """See models/google/nets/nets_factory.py or models/my_slim/nets_factory.py""") 47 | 48 | 49 | class ExampleData(DatasetFile): 50 | # Example dataset for feeding folder of images into model 51 | 52 | def __init__(self, subset): 53 | super(ExampleData, self).__init__('Example', subset) 54 | 55 | def num_classes(self): 56 | return FLAGS.num_classes 57 | 58 | 59 | def main(_): 60 | util.check_tensorflow_version() 61 | 62 | dataset = ExampleData(subset='') 63 | 64 | processor = ProcessorImagenet() 65 | processor.output_offset = FLAGS.output_offset 66 | 67 | feed = FeedImagesWithLabels(dataset=dataset, processor=processor) 68 | 69 | model_params = { 70 | 'num_classes': feed.num_classes_for_network(), 71 | 'network': FLAGS.network, 72 | } 73 | if FLAGS.my: 74 | # My variants of Resnet, Inception, and VGG networks 75 | model = ModelMySlim(params=model_params) 76 | else: 77 | # Google's tf.slim models 78 | model = ModelGoogleSlim(params=model_params) 79 | model.check_norm(processor.normalize) 80 | 81 | output, num_entries = exec_predict.predict(feed, model) 82 | 83 | output_columns = ['Img'] 84 | if FLAGS.output_prob: 85 | # Dump class probabilities to CSV file. 86 | class_labels = [] 87 | for c in range(dataset.num_classes()): 88 | class_labels.append("c%s" % c) 89 | output_columns += class_labels 90 | output = np.vstack([np.column_stack([o[1], o[0]]) for o in output]) 91 | else: 92 | # Dump class index to CSV file 93 | output_columns += ['Class'] 94 | output = np.vstack([np.column_stack([o[1], np.argmax(o[0], axis=1)]) for o in output]) 95 | 96 | df = pd.DataFrame(output, columns=output_columns) 97 | df.Img = df.Img.apply(lambda x: os.path.basename(x.decode())) 98 | df.to_csv('./output.csv', index=False) 99 | 100 | if __name__ == '__main__': 101 | tf.app.run() -------------------------------------------------------------------------------- /litterbox/fabric/__init__.py: -------------------------------------------------------------------------------- 1 | from fabric.dataset_file import DatasetFile 2 | from fabric.dataset_record import DatasetRecord 3 | from fabric.feed import Feed 4 | from fabric.loss import * 5 | from fabric.model import Model 6 | from fabric.processor import Processor 7 | from fabric.util import * 8 | 9 | #from fabric.exec_train import train 10 | #from fabric.exec_eval import evaluate 11 | #from fabric.exec_predict import predict 12 | -------------------------------------------------------------------------------- /litterbox/fabric/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | """ 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | from abc import ABCMeta 30 | from abc import abstractmethod 31 | import os 32 | 33 | import tensorflow as tf 34 | 35 | FLAGS = tf.app.flags.FLAGS 36 | 37 | # Basic model parameters. 38 | tf.app.flags.DEFINE_string('data_dir', '/tmp/mydata', """Path to the data.""") 39 | 40 | 41 | class Dataset(object): 42 | """A simple class for handling data sets.""" 43 | __metaclass__ = ABCMeta 44 | 45 | def __init__(self, name='Unknown', subset='', is_record=False): 46 | """Initialize dataset using a subset and the path to the data.""" 47 | assert subset in self.available_subsets(), self.available_subsets() 48 | self.name = name 49 | self.subset = subset 50 | self.is_record = is_record 51 | self.has_background_class = False 52 | 53 | def num_classes(self): 54 | """Returns the number of classes in the data set.""" 55 | return 0 56 | 57 | def available_subsets(self): 58 | """Returns the list of available subsets.""" 59 | return ['train', 'validation'] 60 | 61 | @abstractmethod 62 | def num_examples_per_epoch(self): 63 | """Returns the number of examples in the data subset.""" 64 | pass 65 | 66 | @abstractmethod 67 | def data_files(self): 68 | """Returns a python list of all (sharded) data subset files. 69 | 70 | Returns: 71 | python list of all (sharded) data set files. 72 | Raises: 73 | ValueError: if there are not data_files matching the subset. 74 | """ 75 | pass 76 | 77 | @abstractmethod 78 | def reader(self): 79 | """Return a reader for a single entry from the data set. 80 | 81 | See io_ops.py for details of Reader class. 82 | 83 | Returns: 84 | Reader object that reads the data set. 85 | """ 86 | pass -------------------------------------------------------------------------------- /litterbox/fabric/dataset_file.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import os 16 | import tensorflow as tf 17 | from collections import Counter 18 | #import pandas as pd 19 | from abc import ABCMeta 20 | from abc import abstractmethod 21 | 22 | from .dataset import Dataset 23 | 24 | FLAGS = tf.app.flags.FLAGS 25 | 26 | 27 | def get_image_files_and_labels(folder, types=('.jpg', '.jpeg')): 28 | label_counts = Counter() 29 | labels = [] 30 | filenames = [] 31 | for root, subdirs, files in os.walk(folder, topdown=False): 32 | label = os.path.relpath(root, folder) if (root != folder) else '' 33 | for f in files: 34 | if os.path.splitext(f)[1].lower() in types: 35 | label_counts.update([label]) 36 | filenames.append(os.path.join(root,f)) 37 | labels.append(label) 38 | return label_counts, labels, filenames 39 | 40 | 41 | class DatasetFile(Dataset): 42 | """A simple class for handling file (non-record) data sets.""" 43 | metaclass__ = ABCMeta 44 | 45 | def __init__(self, name, subset, types=('.jpg', '.jpeg'), add_background_class=False): 46 | """Initialize dataset using a subset and the path to the data.""" 47 | super(DatasetFile, self).__init__(name, subset, is_record=False) 48 | self.file_folder = os.path.join(FLAGS.data_dir, subset) 49 | self.label_counts, self.image_label_names, self.image_filenames = \ 50 | get_image_files_and_labels(self.file_folder, types=types) 51 | self.num_examples = sum(self.label_counts.values()) 52 | 53 | self.label_names = [] 54 | if add_background_class: 55 | self.label_names += ['background'] 56 | self.has_background_class = True 57 | 58 | # Generate label mappings 59 | # TODO This could be passed in if defined externally? 60 | # TODO make label part of dataset more generic, ie general target value handling 61 | # NOTE Currently assumes lexical order for labels (aside from 'background') 62 | self.label_names += sorted(self.label_counts.keys()) 63 | self.label_name_to_index = {v: k for (k, v) in enumerate(self.label_names)} 64 | self.image_label_indices = [self.label_name_to_index[x] for x in self.image_label_names] 65 | 66 | def num_examples_per_epoch(self): 67 | """Returns the number of examples in the data subset.""" 68 | return self.num_examples 69 | 70 | def available_subsets(self): 71 | """Returns the list of available subsets.""" 72 | return ['train', 'validation', 'test', ''] 73 | 74 | def data_files(self): 75 | """Returns a python list of all data files. 76 | 77 | Returns: 78 | python list of all data set files. 79 | Raises: 80 | ValueError: if there are no data files matching the subset. 81 | """ 82 | return self.image_filenames 83 | 84 | def label_names(self): 85 | """Return label names for list of files""" 86 | return self.image_label_names 87 | 88 | def label_indices(self): 89 | """Return label indices for list of files""" 90 | return self.image_label_indices 91 | 92 | def reader(self): 93 | """Return a reader for a single entry from the data set. 94 | 95 | See io_ops.py for details of Reader class. 96 | 97 | Returns: 98 | Reader object that reads the data set. 99 | """ 100 | return tf.WholeFileReader() 101 | -------------------------------------------------------------------------------- /litterbox/fabric/dataset_record.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | """ 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | import os 30 | import tensorflow as tf 31 | from abc import ABCMeta 32 | from abc import abstractmethod 33 | 34 | from .dataset import Dataset 35 | from .dataset import FLAGS 36 | 37 | 38 | class DatasetRecord(Dataset): 39 | """A simple class for handling data sets.""" 40 | __metaclass__ = ABCMeta 41 | 42 | def __init__(self, name, subset): 43 | super(DatasetRecord, self).__init__(name, subset, is_record=True) 44 | 45 | def data_files(self): 46 | """Returns a python list of all (sharded) data subset files. 47 | 48 | Returns: 49 | python list of all (sharded) data set files. 50 | Raises: 51 | ValueError: if there are not data_files matching the subset. 52 | """ 53 | tf_record_pattern = os.path.join(FLAGS.data_dir, '%s-*' % self.subset) 54 | data_files = tf.gfile.Glob(tf_record_pattern) 55 | if not data_files: 56 | print('No files found for dataset %s/%s at %s' % 57 | (self.name, self.subset, FLAGS.data_dir)) 58 | exit(-1) 59 | return data_files 60 | 61 | def reader(self): 62 | """Return a reader for a single entry from the data set. 63 | 64 | See io_ops.py for details of Reader class. 65 | 66 | Returns: 67 | Reader object that reads the data set. 68 | """ 69 | return tf.TFRecordReader() 70 | -------------------------------------------------------------------------------- /litterbox/fabric/exec_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | """A library to evaluate Inception on a single GPU. 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | import math 30 | import time 31 | from datetime import datetime 32 | 33 | import numpy as np 34 | import tensorflow as tf 35 | 36 | from fabric import util 37 | from fabric.feed import Feed 38 | 39 | FLAGS = tf.app.flags.FLAGS 40 | 41 | tf.app.flags.DEFINE_string( 42 | 'eval_dir', '/tmp/imagenet_eval', 43 | """Directory where to write event logs.""") 44 | 45 | # Flags governing the frequency of the eval. 46 | tf.app.flags.DEFINE_integer( 47 | 'eval_interval_secs', 60 * 5, 48 | """How often to run the eval.""") 49 | 50 | tf.app.flags.DEFINE_boolean( 51 | 'run_once', False, 52 | """Whether to run eval only once.""") 53 | 54 | tf.app.flags.DEFINE_string( 55 | 'checkpoint_path', '/tmp/imagenet_train', 56 | """Directory or file where to read model checkpoint(s).""") 57 | 58 | tf.app.flags.DEFINE_float( 59 | 'moving_average_decay', None, 60 | 'The decay to use for the moving average.' 61 | 'If left as None, then moving averages are not used.') 62 | 63 | 64 | def _eval_once(feed, saver, summary_writer, eval_ops, summary_op): 65 | """Runs Eval once. 66 | 67 | Args: 68 | saver: Saver. 69 | summary_writer: Summary writer. 70 | eval_ops: dict of evaluation metric ops 71 | summary_op: Summary op. 72 | """ 73 | with tf.Session() as sess: 74 | init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 75 | sess.run(init_op) 76 | 77 | checkpoint_path, global_step = util.resolve_checkpoint_path(FLAGS.checkpoint_path) 78 | if not checkpoint_path: 79 | print('No checkpoint file found at %s' % FLAGS.checkpoint_path) 80 | return 81 | saver.restore(sess, checkpoint_path) 82 | print('Successfully loaded model from %s at step=%d.' % (checkpoint_path, global_step)) 83 | 84 | # Start the queue runners. 85 | coord = tf.train.Coordinator() 86 | try: 87 | threads = [] 88 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 89 | threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) 90 | 91 | eval_ops_list = [] 92 | eval_names_list = [] 93 | if isinstance(eval_ops, dict): 94 | for name, op in eval_ops.items(): 95 | eval_ops_list.append(op) 96 | eval_names_list.append(name) 97 | else: 98 | assert isinstance(eval_ops, list) 99 | eval_ops_list = eval_ops 100 | for op in eval_ops: 101 | eval_names_list.append(op.name) 102 | 103 | num_examples = feed.num_examples_per_epoch() 104 | num_iter = int(math.ceil(num_examples / feed.batch_size)) 105 | eval_totals = [np.float64(0.0)] * len(eval_ops_list) 106 | example_count = 0 107 | step = 0 108 | print('%s: starting evaluation on (%s).' % (datetime.now(), feed.dataset.subset)) 109 | start_time = time.time() 110 | try: 111 | while step < num_iter and not coord.should_stop(): 112 | eval_results = sess.run(eval_ops_list) 113 | remaining_count = num_examples - example_count 114 | example_count += min(feed.batch_size, remaining_count) 115 | 116 | for i, result in enumerate(eval_results): 117 | if remaining_count < feed.batch_size: 118 | result = result[:remaining_count] 119 | eval_totals[i] += np.sum(result, dtype=np.float64) 120 | step += 1 121 | 122 | if step % 20 == 0: 123 | duration = time.time() - start_time 124 | sec_per_batch = duration / 20.0 125 | examples_per_sec = feed.batch_size / sec_per_batch 126 | print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f sec/batch)' 127 | % (datetime.now(), step, num_iter, examples_per_sec, sec_per_batch)) 128 | start_time = time.time() 129 | except KeyboardInterrupt: 130 | pass 131 | 132 | summary = tf.Summary() 133 | summary.ParseFromString(sess.run(summary_op)) 134 | print('%s:' % datetime.now(), end=" ") 135 | for i, val in enumerate(eval_totals): 136 | mean_val = val / example_count 137 | print('%s = %.6f' % (eval_names_list[i], mean_val), end=" ") 138 | summary.value.add(tag=eval_names_list[i], simple_value=mean_val) 139 | print('[%d examples]' % example_count) 140 | summary_writer.add_summary(summary, global_step) 141 | 142 | except Exception as e: # pylint: disable=broad-except 143 | coord.request_stop(e) 144 | 145 | coord.request_stop() 146 | coord.join(threads, stop_grace_period_secs=10) 147 | 148 | 149 | def evaluate(feed, model): 150 | """Evaluate model on Dataset for a number of steps.""" 151 | 152 | if tf.gfile.Exists(FLAGS.eval_dir): 153 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 154 | tf.gfile.MakeDirs(FLAGS.eval_dir) 155 | 156 | with tf.Graph().as_default(): 157 | 158 | # Get images and labels examples 159 | inputs, labels = feed.inputs_for_eval() 160 | 161 | # Build a Graph that computes the logits predictions from the 162 | # inference model. 163 | outputs = model.build_tower(inputs) 164 | 165 | # Calculate predictions. 166 | eval_ops = model.eval_ops(outputs, labels, processor=feed.processor) 167 | 168 | # Restore the moving average version of the learned variables for eval. 169 | if FLAGS.moving_average_decay: 170 | variable_averages = tf.train.ExponentialMovingAverage(model.MOVING_AVERAGE_DECAY) 171 | variables_to_restore = variable_averages.variables_to_restore() 172 | else: 173 | variables_to_restore = tf.contrib.framework.get_model_variables() 174 | 175 | saver = tf.train.Saver(variables_to_restore) 176 | 177 | # Build the summary operation based on the TF collection of Summaries. 178 | summary_op = tf.summary.merge_all() 179 | summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph=tf.get_default_graph()) 180 | 181 | while True: 182 | _eval_once(feed, saver, summary_writer, eval_ops, summary_op) 183 | if FLAGS.run_once: 184 | break 185 | time.sleep(FLAGS.eval_interval_secs) 186 | -------------------------------------------------------------------------------- /litterbox/fabric/exec_predict.py: -------------------------------------------------------------------------------- 1 | # 2 | """A library to predict using Inception on a single GPU. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import math 9 | import time 10 | from datetime import datetime 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | 15 | from fabric import util 16 | from .feed import Feed 17 | 18 | FLAGS = tf.app.flags.FLAGS 19 | 20 | tf.app.flags.DEFINE_string( 21 | 'predict_dir', '/tmp/imagenet_predict', 22 | """Directory where to write event logs.""") 23 | 24 | tf.app.flags.DEFINE_string( 25 | 'checkpoint_path', '/tmp/imagenet_train', 26 | """Directory or file where to read model checkpoint(s).""") 27 | 28 | tf.app.flags.DEFINE_float( 29 | 'moving_average_decay', None, 30 | 'The decay to use for the moving average.' 31 | 'If left as None, then moving averages are not used.') 32 | 33 | 34 | def truncate_batch(batch_outputs, remaining): 35 | truncated_outputs = [] 36 | for o in batch_outputs: 37 | if isinstance(o, list): 38 | truncated_outputs.append([v[:remaining] for v in o]) 39 | elif isinstance(o, dict): 40 | dict_out = {k: v[:remaining] for k, v in o.items()} 41 | truncated_outputs.append(dict_out) 42 | else: 43 | truncated_outputs.append(o[:remaining]) 44 | return truncated_outputs 45 | 46 | 47 | def _predict(feed, saver, output_op, names_op): 48 | """Runs prediction 49 | """ 50 | predictions = [] 51 | with tf.Session() as sess: 52 | init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 53 | sess.run(init_op) 54 | 55 | checkpoint_path, global_step = util.resolve_checkpoint_path(FLAGS.checkpoint_path) 56 | if not checkpoint_path: 57 | print('No checkpoint file found at %s' % FLAGS.checkpoint_path) 58 | return predictions, 0 59 | saver.restore(sess, checkpoint_path) 60 | print('Successfully loaded model from %s at step=%d.' % (checkpoint_path, global_step)) 61 | 62 | # Start the queue runners. 63 | coord = tf.train.Coordinator() 64 | threads = [] 65 | examples_count = 0 66 | try: 67 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 68 | threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) 69 | batch_size = feed.batch_size 70 | if FLAGS.sample: 71 | batch_size //= FLAGS.sample 72 | num_examples = feed.num_examples_per_epoch() 73 | num_iter = int(math.ceil(num_examples / batch_size)) 74 | print('%s: starting inference on %d examples in (%s).' % 75 | (datetime.now(), num_examples, feed.dataset.subset)) 76 | step = 0 77 | start_time = time.time() 78 | while step < num_iter and not coord.should_stop(): 79 | batch_outputs = sess.run([output_op, names_op]) 80 | remaining_count = num_examples - examples_count 81 | examples_count += min(batch_size, remaining_count) 82 | step += 1 83 | if step % 20 == 0: 84 | duration = time.time() - start_time 85 | sec_per_batch = duration / 20.0 86 | examples_per_sec = batch_size / sec_per_batch 87 | print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f sec/batch)' 88 | % (datetime.now(), step, num_iter, examples_per_sec, sec_per_batch)) 89 | start_time = time.time() 90 | if remaining_count < batch_size: 91 | batch_outputs = truncate_batch(batch_outputs, remaining_count) 92 | predictions.append(batch_outputs) 93 | except KeyboardInterrupt: 94 | pass 95 | except Exception as e: # pylint: disable=broad-except 96 | coord.request_stop(e) 97 | 98 | coord.request_stop() 99 | coord.join(threads, stop_grace_period_secs=10) 100 | 101 | return predictions, examples_count 102 | 103 | 104 | def _reduce_vals(values, factor): 105 | v_split = tf.split(0, values.get_shape()[0] // factor, values) 106 | v_mean = [tf.reduce_mean(x, reduction_indices=[0], keep_dims=True) for x in v_split] 107 | return tf.concat(0, v_mean) 108 | 109 | 110 | def _reduce_batch(outputs, identities, batch_size, f=8): 111 | ratio = batch_size // f 112 | if isinstance(outputs, list): 113 | outputs = [_reduce_vals(v, f) for v in outputs] 114 | elif isinstance(outputs, dict): 115 | outputs = {k: _reduce_vals(v, f) for k, v in outputs.items()} 116 | else: 117 | outputs = _reduce_vals(outputs, f) 118 | idx = f * np.arange(0, ratio) 119 | return outputs, tf.gather(identities, idx) 120 | 121 | 122 | def predict(feed, model, raw_outputs=False): 123 | """Predict/infer outputs for dataset using model.""" 124 | with tf.Graph().as_default(): 125 | # Get images and labels from the dataset. 126 | inputs, identities = feed.inputs_for_predict() 127 | 128 | # Build a Graph that computes the predictions from the inference model. 129 | outputs = model.build_tower(inputs) 130 | if raw_outputs: 131 | predictions = outputs 132 | else: 133 | predictions = model.get_predictions(outputs, processor=feed.processor) 134 | 135 | if feed.sample: 136 | predictions, identities = _reduce_batch( 137 | predictions, identities, feed.batch_size, f=feed.sample) 138 | 139 | if FLAGS.moving_average_decay: 140 | variable_averages = tf.train.ExponentialMovingAverage(model.MOVING_AVERAGE_DECAY) 141 | variables_to_restore = variable_averages.variables_to_restore() 142 | else: 143 | variables_to_restore = tf.contrib.framework.get_model_variables() 144 | saver = tf.train.Saver(variables_to_restore) 145 | 146 | prediction_values = _predict(feed, saver, predictions, identities) 147 | return prediction_values 148 | -------------------------------------------------------------------------------- /litterbox/fabric/feed.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | """Feed class 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | import math 30 | import abc 31 | import tensorflow as tf 32 | 33 | FLAGS = tf.app.flags.FLAGS 34 | 35 | tf.app.flags.DEFINE_integer('batch_size', 32, 36 | """Number of images to process in a batch.""") 37 | 38 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, 39 | """Number of preprocessing threads per tower. """ 40 | """Please make this a multiple of 4.""") 41 | 42 | tf.app.flags.DEFINE_integer('num_readers', 4, 43 | """Number of parallel readers during train.""") 44 | 45 | tf.app.flags.DEFINE_integer('num_examples', 0, 46 | """Number of examples to run. Note that the eval """ 47 | """ImageNet dataset contains 50000 examples.""") 48 | 49 | tf.app.flags.DEFINE_integer('sample', 0, '') 50 | 51 | 52 | class Feed(object): 53 | 54 | def __init__( 55 | self, dataset, processor, batch_size=None, sample=None, 56 | num_preprocess_threads=None, num_readers=None): 57 | 58 | if not dataset: 59 | raise ValueError('Please provide a dataset') 60 | self.dataset = dataset 61 | 62 | if not processor: 63 | raise ValueError('Please provide a data preprocessor') 64 | self.processor = processor 65 | 66 | self.batch_size = FLAGS.batch_size if not batch_size else batch_size 67 | 68 | self.sample = FLAGS.sample if sample is None else sample 69 | 70 | self.num_preprocess_threads = FLAGS.num_preprocess_threads \ 71 | if not num_preprocess_threads else num_preprocess_threads 72 | 73 | if self.num_preprocess_threads % 4: 74 | raise ValueError('Please make num_preprocess_threads a multiple ' 75 | 'of 4 (%d % 4 != 0).', self.num_preprocess_threads) 76 | 77 | self.num_readers = FLAGS.num_readers if not num_readers else num_readers 78 | if self.num_readers < 1: 79 | raise ValueError('Please make num_readers at least 1') 80 | 81 | def num_batches_per_epoch(self): 82 | return math.ceil(self.num_examples_per_epoch() / self.batch_size) 83 | 84 | def num_examples_per_epoch(self): 85 | return FLAGS.num_examples if FLAGS.num_examples else self.dataset.num_examples_per_epoch() 86 | 87 | def inputs_for_eval(self, num_splits=0): 88 | """Generate batches of undistorted examples with labels for evaluation. 89 | See _batch_inputs. 90 | """ 91 | # Force all input processing onto CPU in order to reserve the GPU for 92 | # the forward inference and back-propagation. 93 | with tf.device('/cpu:0'): 94 | inputs, _, labels = self._batch_inputs(num_splits, mode='eval') 95 | return inputs, labels 96 | 97 | def inputs_for_train(self, num_splits=0): 98 | """Generate batches of distorted examples with labels for training. 99 | See _batch_inputs 100 | """ 101 | # Force all input processing onto CPU in order to reserve the GPU for 102 | # the forward inference and back-propagation. 103 | with tf.device('/cpu:0'): 104 | inputs, _, labels = self._batch_inputs(num_splits, mode='train') 105 | return inputs, labels 106 | 107 | def inputs_for_predict(self): 108 | """Generate batches of undistorted examples for inference 109 | """ 110 | # Force all input processing onto CPU in order to reserve the GPU for 111 | # the forward inference and back-propagation. 112 | with tf.device('/cpu:0'): 113 | inputs, identities, _ = self._batch_inputs(0, mode='pred') 114 | return inputs, identities 115 | 116 | def _batch_inputs(self, num_splits=0, mode='eval'): 117 | """Construct batches of training or evaluation examples from the image dataset. 118 | 119 | Returns: 120 | tuple of lists of tensors/lists (of tensors)/dicts (of tensors) containing a 121 | batch of input examples 122 | """ 123 | with tf.name_scope('batch_processing'): 124 | if self.dataset.is_record: 125 | inputs = self._batch_inputs_record(mode) 126 | else: 127 | inputs = self._batch_inputs_file(mode) 128 | 129 | batch_queue_capacity = 2 * self.num_preprocess_threads * self.batch_size 130 | batch_data = tf.train.batch_join( 131 | inputs, 132 | enqueue_many=self.sample > 0, 133 | batch_size=self.batch_size, 134 | capacity=batch_queue_capacity) 135 | 136 | return self.processor.reshape_batch(batch_data, self.batch_size, num_splits) 137 | 138 | @abc.abstractmethod 139 | def _batch_inputs_record(self, mode): 140 | """Construct batches of training or evaluation examples from the dataset TF records. 141 | """ 142 | assert False, 'Calling virtual method' 143 | 144 | @abc.abstractmethod 145 | def _batch_inputs_file(self, mode): 146 | """Construct batches of training or evaluation examples from dataset files. 147 | """ 148 | assert False, 'Calling virtual method' 149 | -------------------------------------------------------------------------------- /litterbox/fabric/loss.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | 8 | def loss_softmax_cross_entropy_with_aux(logits, labels, aux_logits=None): 9 | # Reshape the labels into a dense Tensor of 10 | # shape [batch_size, num_classes]. 11 | num_classes = logits.get_shape()[-1].value 12 | dense_labels = tf.contrib.layers.one_hot_encoding(labels, num_classes) 13 | 14 | # Cross entropy loss for the main softmax prediction. 15 | tf.contrib.losses.softmax_cross_entropy(logits, dense_labels, label_smoothing=0.1, weights=1.0) 16 | 17 | if aux_logits is not None: 18 | # Cross entropy loss for the auxiliary head. 19 | tf.contrib.losses.softmax_cross_entropy( 20 | aux_logits, dense_labels, label_smoothing=0.1, weights=0.4, scope='aux_loss') 21 | 22 | 23 | # Math for calculating huber loss 24 | def _compute_huber(predictions, labels, delta=1.0): 25 | predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 26 | predictions = tf.to_float(predictions) 27 | labels = tf.to_float(labels) 28 | delta = tf.to_float(delta) 29 | 30 | diff = predictions - labels 31 | diff_abs = tf.abs(diff) 32 | delta_fact = 0.5 * tf.square(delta) 33 | condition = tf.less(diff_abs, delta) 34 | left_opt = 0.5 * tf.square(diff) 35 | right_opt = delta * diff_abs - delta_fact 36 | losses_val = tf.select(condition, left_opt, right_opt) 37 | return losses_val 38 | 39 | 40 | # Returns non-reduced tensor of unweighted losses with batch dimension matching inputs 41 | def metric_huber(predictions, labels, delta=1.0, scope=None): 42 | with tf.name_scope(scope, "huber_metric", [predictions, labels]): 43 | return _compute_huber(predictions, labels, delta) 44 | 45 | 46 | # Returns reduced loss, applies weights, and adds loss to collections 47 | def loss_huber(predictions, labels, delta=1.0, weights=1.0, scope=None): 48 | with tf.name_scope(scope, "huber_loss", [predictions, labels]): 49 | losses_val = _compute_huber(predictions, labels, delta) 50 | return tf.contrib.losses.compute_weighted_loss(losses_val, weights=weights) 51 | 52 | 53 | def loss_huber_with_aux(predictions, labels, delta=1.0, weight=1.0, aux_predictions=None): 54 | loss_huber(predictions, labels, delta=delta, weights=weight) 55 | if aux_predictions is not None: 56 | loss_huber(aux_predictions, labels, delta=delta, weights=weight*0.4, scope='aux_huber_loss') 57 | 58 | 59 | def loss_mse_with_aux(predictions, labels, aux_predictions=None): 60 | tf.contrib.losses.mean_squared_error(predictions, labels=labels, weights=1.0) 61 | if aux_predictions is not None: 62 | tf.contrib.losses.mean_squared_error(aux_predictions, labels=labels, weights=0.4, scope='aux_loss') -------------------------------------------------------------------------------- /litterbox/fabric/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import abc 17 | import re 18 | from copy import deepcopy 19 | 20 | 21 | def merge_params(default, args): 22 | params = deepcopy(default) 23 | params.update(args) 24 | return params 25 | 26 | 27 | class ModelTower(object): 28 | def __init__(self, name, endpoints, outputs, aux_outputs=None): 29 | self.name = name 30 | self.endpoints = endpoints 31 | self.outputs = outputs 32 | self.aux_outputs = aux_outputs 33 | 34 | 35 | class Model(object): 36 | __metaclass__ = abc.ABCMeta 37 | 38 | # If a model is trained using multiple GPUs, prefix all Op names with tower_name 39 | # to differentiate the operations. Note that this prefix is removed from the 40 | # names of the summaries when visualizing a model. 41 | TOWER_PREFIX = 'tower' 42 | 43 | def __init__(self): 44 | self.model_variable_scope = None 45 | self._last_tower = None 46 | self._towers = {} 47 | 48 | def add_tower(self, name, endpoints, outputs, aux_outputs=None): 49 | self._last_tower = ModelTower( 50 | name, 51 | endpoints, 52 | outputs, 53 | aux_outputs 54 | ) 55 | self._towers[name] = self._last_tower 56 | 57 | def tower(self, name=None): 58 | tower = self.last_tower() if name is None else self._towers[name] 59 | if not tower: 60 | raise RuntimeError('Invalid tower ' % name) 61 | return tower 62 | 63 | def last_tower(self): 64 | if not self._last_tower: 65 | raise RuntimeError('A valid model tower is required, please build one first') 66 | return self._last_tower 67 | 68 | def last_scope(self): 69 | return self._last_tower.name if self._last_tower else '' 70 | 71 | # Return scopes (strings) for output variables to allow filtering for save/restore 72 | @abc.abstractmethod 73 | def output_scopes(self, prefix_scope): 74 | assert False, 'abstract method not implemented' 75 | return [] 76 | 77 | # Return list of 'get/create variable' functions used by the model (used for variable scoping). 78 | # Makes it easier to abstract train code from models using different variable helpers 79 | def get_variable_fns(self): 80 | return [tf.contrib.framework.variable] 81 | 82 | # Hook to let the model make variable name remapping decisions, especially helpful for 83 | # handling old or pretrained checkpoints that don't match all current variable names 84 | def _remap_variable_names(self, variables, checkpoint_variable_set, prefix_scope): 85 | return variables 86 | 87 | # Return a list of model variables to restore for a Saver 88 | def variables_to_restore(self, restore_outputs=True, checkpoint_variable_set=set(), prefix_scope=''): 89 | scope = prefix_scope or None 90 | restore_variables = tf.contrib.framework.variables.get_model_variables(scope=scope) 91 | exclude_variables = self.output_scopes(prefix_scope=prefix_scope) 92 | if not restore_outputs: 93 | # Filter out variables in model output scopes by name if the outputs are not being restored 94 | model_variable_names = [x.op.name for x in restore_variables] 95 | filtered_variables = [] 96 | for var in restore_variables: 97 | excluded = False 98 | for exclusion in exclude_variables: 99 | if var.op.name.startswith(exclusion): 100 | excluded = True 101 | break 102 | if not excluded: 103 | filtered_variables.append(var) 104 | restore_variables = filtered_variables 105 | diff = set(model_variable_names).difference({x.op.name for x in restore_variables}) 106 | if diff: 107 | print('INFO: %d variables were explicitly omitted from restore.' % len(diff)) 108 | [print(x) for x in diff] 109 | 110 | restore_variables = self._remap_variable_names( 111 | restore_variables, checkpoint_variable_set, prefix_scope) 112 | 113 | if checkpoint_variable_set: 114 | matched = {} 115 | missing = [] 116 | if isinstance(restore_variables, dict): 117 | for name, var in restore_variables.items(): 118 | if name in checkpoint_variable_set: 119 | matched[name] = var 120 | else: 121 | missing += [name] 122 | else: 123 | for var in restore_variables: 124 | if var.op.name in checkpoint_variable_set: 125 | matched[var.op.name] = var 126 | else: 127 | missing += [var.op.name] 128 | if missing: 129 | print("WARNING: %d variables could not be found in checkpoint file that were not explicitly " 130 | "omitted. Using default initialization." % len(missing)) 131 | [print(x) for x in missing if not x.endswith('/Momentum')] 132 | restore_variables = matched 133 | 134 | return restore_variables 135 | 136 | def activation_summaries(self, tower_name=None): 137 | tower = self.tower(tower_name) 138 | with tf.name_scope('summaries'): 139 | act_ops = {} 140 | for x in tower.endpoints.values(): 141 | if isinstance(x, dict): 142 | for y in x.values(): 143 | act_ops[y] = y.op.name 144 | elif isinstance(x, list): 145 | for y in x: 146 | act_ops[y] = y.op.name 147 | else: 148 | act_ops[x] = x.op.name 149 | for endpoint, op_name in act_ops.items(): 150 | # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training 151 | # session. This helps the clarity of presentation on tensorboard. 152 | tensor_name = self.strip_common_scope(op_name) 153 | tf.summary.histogram(tensor_name + '/activations', endpoint) 154 | tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(endpoint)) 155 | 156 | def strip_common_scope(self, input_name): 157 | # strip tower scope, present in ops 158 | output_name = re.sub('%s_[0-9]*/' % self.TOWER_PREFIX, '', input_name) 159 | # strip extra model variable scope, present in ops and variables 160 | if self.model_variable_scope: 161 | output_name = re.sub('%s/' % self.model_variable_scope, '', output_name) 162 | return output_name 163 | 164 | 165 | @staticmethod 166 | def default_optimizer_params(): 167 | opt_type = 'momentum' 168 | opt_params = { 169 | 'learning_rate': 0.1, 170 | 'momentum': 0.9, 171 | 'use_nesterov': True 172 | } 173 | return opt_type, opt_params 174 | 175 | @staticmethod 176 | def scope_name(tower_id=0): 177 | return '%s_%d' % (Model.TOWER_PREFIX, tower_id) 178 | 179 | -------------------------------------------------------------------------------- /litterbox/fabric/processor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import abc 17 | 18 | 19 | def select_split(inputs, split_index=None): 20 | if split_index is None: 21 | return inputs 22 | inputs_out = [] 23 | for x in inputs: 24 | if isinstance(x, list): 25 | assert x 26 | if isinstance(x[0], list): 27 | inputs_out.append([t[split_index] for t in x]) 28 | else: 29 | inputs_out.append(x[split_index]) 30 | elif isinstance(x, dict): 31 | inputs_out.append({tk: tv[split_index] for tk, tv in x}) 32 | else: 33 | assert False, 'Unexpected split format, ' \ 34 | 'expecting list of splits, list of list of splits, or dict of splits' 35 | return inputs_out 36 | 37 | 38 | class Processor(object): 39 | __metaclass__ = abc.ABCMeta 40 | 41 | def __init__(self): 42 | pass 43 | 44 | @abc.abstractmethod 45 | def get_input_shape(self, batch_size): 46 | pass 47 | 48 | @abc.abstractmethod 49 | def parse_example(self, serialized_example): 50 | pass 51 | 52 | @abc.abstractmethod 53 | def process_example(self, tensors, mode, thread_id): 54 | pass 55 | 56 | @abc.abstractmethod 57 | def reshape_batch(self, batch_tensors, batch_size, num_splits): 58 | pass 59 | -------------------------------------------------------------------------------- /litterbox/fabric/util.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | import os 4 | 5 | 6 | def resolve_checkpoint_path(input_path): 7 | global_step = 0 8 | checkpoint_path = input_path 9 | if os.path.isdir(checkpoint_path): 10 | ckpt = tf.train.get_checkpoint_state(checkpoint_path) 11 | if ckpt and ckpt.model_checkpoint_path: 12 | checkpoint_path = ckpt.model_checkpoint_path 13 | if not os.path.isabs(checkpoint_path): 14 | checkpoint_path = os.path.join(input_path, checkpoint_path) 15 | else: 16 | return '', global_step 17 | 18 | # Assuming model_checkpoint_path looks something like: 19 | # /my-favorite-path/imagenet_train/model.ckpt-0, 20 | # extract global_step from it. 21 | try: 22 | global_step = int(checkpoint_path.split('/')[-1].split('-')[-1]) 23 | except ValueError: 24 | pass 25 | 26 | return checkpoint_path, global_step 27 | 28 | 29 | def check_tensorflow_version(min_version=12): 30 | assert int(str.split(tf.__version__,'.')[1]) >= min_version, \ 31 | 'Installed Tensorflow version (%s) is not be >= 0.%s.0' % (tf.__version__, min_version) 32 | -------------------------------------------------------------------------------- /litterbox/feeds/__init__.py: -------------------------------------------------------------------------------- 1 | from .image.feed_image import FeedImagesWithLabels -------------------------------------------------------------------------------- /litterbox/feeds/image/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rwightman/tensorflow-litterbox/ddeeb3a6c7de64e5391050ffbb5948feca65ad3c/litterbox/feeds/image/__init__.py -------------------------------------------------------------------------------- /litterbox/imagenet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """StateFarm data set. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | from fabric.dataset_record import DatasetRecord 16 | 17 | 18 | class ImagenetData(DatasetRecord): 19 | """StateFarm data set.""" 20 | 21 | def __init__(self, subset, background=True): 22 | super(ImagenetData, self).__init__('Imagenet', subset) 23 | self.has_background_class = background 24 | 25 | def num_classes(self): 26 | return 1001 27 | 28 | def num_examples_per_epoch(self): 29 | """Returns the number of examples in the data subset.""" 30 | if self.subset == 'train': 31 | return 1281167 32 | elif self.subset == 'validation': 33 | return 50000 34 | 35 | -------------------------------------------------------------------------------- /litterbox/imagenet_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | from fabric import util 17 | from fabric import exec_eval 18 | from feeds import FeedImagesWithLabels 19 | from processors import ProcessorImagenet 20 | from models import ModelMySlim, ModelGoogleSlim 21 | from imagenet_data import ImagenetData 22 | 23 | FLAGS = tf.app.flags.FLAGS 24 | 25 | tf.app.flags.DEFINE_string( 26 | 'subset', 'validation', 27 | """Either 'validation', 'train', 'test'""") 28 | 29 | tf.app.flags.DEFINE_boolean( 30 | 'my', False, 31 | """Enable my variants of the image classification models""") 32 | 33 | tf.app.flags.DEFINE_string( 34 | 'network', 'resnet_v1_50', 35 | """See models/google/nets/nets_factory.py or models/my_slim/nets_factory.py""") 36 | 37 | tf.app.flags.DEFINE_integer( 38 | 'label_offset', 0, 39 | """Offset of labels in dataset. Set to 1 if network trained without background but dataset includes it.""") 40 | 41 | 42 | def main(_): 43 | util.check_tensorflow_version() 44 | 45 | dataset = ImagenetData(subset=FLAGS.subset) 46 | 47 | processor = ProcessorImagenet() 48 | processor.label_offset = FLAGS.label_offset 49 | 50 | feed = FeedImagesWithLabels(dataset=dataset, processor=processor) 51 | 52 | model_params = { 53 | 'num_classes': feed.num_classes_for_network(), 54 | 'network': FLAGS.network, 55 | } 56 | 57 | if FLAGS.my: 58 | # My variants of Resnet, Inception, and VGG networks 59 | model = ModelMySlim(params=model_params) 60 | else: 61 | # Google's tf.slim models 62 | model = ModelGoogleSlim(params=model_params) 63 | model.check_norm(processor.normalize) 64 | 65 | exec_eval.evaluate(feed=feed, model=model) 66 | 67 | if __name__ == '__main__': 68 | tf.app.run() 69 | -------------------------------------------------------------------------------- /litterbox/imagenet_train.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | from fabric import util 17 | from fabric import exec_train 18 | from feeds import FeedImagesWithLabels 19 | from processors import ProcessorImagenet 20 | from models import ModelMySlim 21 | from models import ModelGoogleSlim 22 | from imagenet_data import ImagenetData 23 | 24 | FLAGS = tf.app.flags.FLAGS 25 | 26 | tf.app.flags.DEFINE_string( 27 | 'subset', 'train', 28 | """Either 'validation', 'train', 'test'""") 29 | 30 | tf.app.flags.DEFINE_boolean( 31 | 'my', False, 32 | """Enable my variants of the image classification models""") 33 | 34 | tf.app.flags.DEFINE_string( 35 | 'network', 'inception_v4', 36 | """See models/google/nets/nets_factory.py or models/my_slim/nets_factory.py""") 37 | 38 | tf.app.flags.DEFINE_integer( 39 | 'label_offset', 0, 40 | """Offset of labels in dataset. Set to 1 if network trained without background but dataset includes it.""") 41 | 42 | 43 | def main(_): 44 | util.check_tensorflow_version() 45 | 46 | dataset = ImagenetData(subset=FLAGS.subset) 47 | 48 | processor = ProcessorImagenet() 49 | processor.label_offset = FLAGS.label_offset 50 | 51 | feed = FeedImagesWithLabels(dataset=dataset, processor=processor) 52 | 53 | model_params = { 54 | 'num_classes': feed.num_classes_for_network(), 55 | 'network': FLAGS.network, 56 | } 57 | 58 | if FLAGS.my: 59 | # My variants of Resnet, Inception, and VGG networks 60 | model = ModelMySlim(params=model_params) 61 | else: 62 | # Google's tf.slim models 63 | model = ModelGoogleSlim(params=model_params) 64 | model.check_norm(processor.normalize) 65 | 66 | exec_train.train(feed=feed, model=model) 67 | 68 | if __name__ == '__main__': 69 | tf.app.run() 70 | -------------------------------------------------------------------------------- /litterbox/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .preact_conv import preact_conv2d 2 | from .compact_bilinear_pooling import compact_bilinear_pooling -------------------------------------------------------------------------------- /litterbox/layers/compact_bilinear_pooling.py: -------------------------------------------------------------------------------- 1 | # UC Berkeley's Standard Copyright and Disclaimer Notice: 2 | # 3 | # Copyright (c) 2016. The Regents of the University of California (Regents). All 4 | # Rights Reserved. Permission to use, copy, modify, and distribute this software 5 | # and its documentation for educational, research, and not-for-profit purposes, 6 | # without fee and without a signed licensing agreement, is hereby granted, 7 | # provided that the above copyright notice, this paragraph and the following 8 | # two paragraphs appear in all copies, modifications, and distributions. 9 | # Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, 10 | # Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, for commercial licensing 11 | # opportunities. 12 | # 13 | # Ronghang Hu, University of California, Berkeley. 14 | # 15 | # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, 16 | # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF 17 | # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN 18 | # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 19 | # 20 | # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 | # THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS 23 | # PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, 24 | # UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 | 26 | from __future__ import absolute_import, division, print_function 27 | 28 | import numpy as np 29 | import tensorflow as tf 30 | 31 | def _generate_sketch_matrix(rand_h, rand_s, output_dim): 32 | """ 33 | Return a sparse matrix used for tensor sketch operation in compact bilinear 34 | pooling 35 | 36 | Args: 37 | rand_h: an 1D numpy array containing indices in interval `[0, output_dim)`. 38 | rand_s: an 1D numpy array of 1 and -1, having the same shape as `rand_h`. 39 | output_dim: the output dimensions of compact bilinear pooling. 40 | 41 | Returns: 42 | a sparse matrix of shape [input_dim, output_dim] for tensor sketch. 43 | """ 44 | 45 | # Generate a sparse matrix for tensor count sketch 46 | rand_h = rand_h.astype(np.int64) 47 | rand_s = rand_s.astype(np.float32) 48 | assert(rand_h.ndim==1 and rand_s.ndim==1 and len(rand_h)==len(rand_s)) 49 | assert(np.all(rand_h >= 0) and np.all(rand_h < output_dim)) 50 | 51 | input_dim = len(rand_h) 52 | indices = np.concatenate((np.arange(input_dim)[..., np.newaxis], 53 | rand_h[..., np.newaxis]), axis=1) 54 | sparse_sketch_matrix = tf.sparse_reorder( 55 | tf.SparseTensor(indices, rand_s, [input_dim, output_dim])) 56 | return sparse_sketch_matrix 57 | 58 | def compact_bilinear_pooling(bottom1, bottom2, output_dim, sum_pool=True, 59 | rand_h_1=None, rand_s_1=None, rand_h_2=None, rand_s_2=None, 60 | seed_h_1=1, seed_s_1=3, seed_h_2=5, seed_s_2=7): 61 | """ 62 | Compute compact bilinear pooling over two bottom inputs. Reference: 63 | 64 | Yang Gao, et al. "Compact Bilinear Pooling." in Proceedings of IEEE 65 | Conference on Computer Vision and Pattern Recognition (2016). 66 | Akira Fukui, et al. "Multimodal Compact Bilinear Pooling for Visual Question 67 | Answering and Visual Grounding." arXiv preprint arXiv:1606.01847 (2016). 68 | 69 | Args: 70 | bottom1: 1st input, 4D Tensor of shape [batch_size, height, width, input_dim1]. 71 | bottom2: 2nd input, 4D Tensor of shape [batch_size, height, width, input_dim2]. 72 | 73 | output_dim: output dimension for compact bilinear pooling. 74 | 75 | sum_pool: (Optional) If True, sum the output along height and width 76 | dimensions and return output shape [batch_size, output_dim]. 77 | Otherwise return [batch_size, height, width, output_dim]. 78 | Default: True. 79 | 80 | rand_h_1: (Optional) an 1D numpy array containing indices in interval 81 | `[0, output_dim)`. Automatically generated from `seed_h_1` 82 | if is None. 83 | rand_s_1: (Optional) an 1D numpy array of 1 and -1, having the same shape 84 | as `rand_h_1`. Automatically generated from `seed_s_1` if is 85 | None. 86 | rand_h_2: (Optional) an 1D numpy array containing indices in interval 87 | `[0, output_dim)`. Automatically generated from `seed_h_2` 88 | if is None. 89 | rand_s_2: (Optional) an 1D numpy array of 1 and -1, having the same shape 90 | as `rand_h_2`. Automatically generated from `seed_s_2` if is 91 | None. 92 | 93 | Returns: 94 | Compact bilinear pooled results of shape [batch_size, output_dim] or 95 | [batch_size, height, width, output_dim], depending on `sum_pool`. 96 | """ 97 | 98 | # Static shapes are needed to construction count sketch matrix 99 | input_dim1 = bottom1.get_shape().as_list()[-1] 100 | input_dim2 = bottom2.get_shape().as_list()[-1] 101 | 102 | # Step 0: Generate vectors and sketch matrix for tensor count sketch 103 | # This is only done once during graph construction, and fixed during each 104 | # operation 105 | if rand_h_1 is None: 106 | np.random.seed(seed_h_1) 107 | rand_h_1 = np.random.randint(output_dim, size=input_dim1) 108 | if rand_s_1 is None: 109 | np.random.seed(seed_s_1) 110 | rand_s_1 = 2*np.random.randint(2, size=input_dim1) - 1 111 | sparse_sketch_matrix1 = _generate_sketch_matrix(rand_h_1, rand_s_1, output_dim) 112 | if rand_h_2 is None: 113 | np.random.seed(seed_h_2) 114 | rand_h_2 = np.random.randint(output_dim, size=input_dim2) 115 | if rand_s_2 is None: 116 | np.random.seed(seed_s_2) 117 | rand_s_2 = 2*np.random.randint(2, size=input_dim2) - 1 118 | sparse_sketch_matrix2 = _generate_sketch_matrix(rand_h_2, rand_s_2, output_dim) 119 | 120 | # Step 1: Flatten the input tensors and count sketch 121 | bottom1_flat = tf.reshape(bottom1, [-1, input_dim1]) 122 | bottom2_flat = tf.reshape(bottom2, [-1, input_dim2]) 123 | # Essentially: 124 | # sketch1 = bottom1 * sparse_sketch_matrix 125 | # sketch2 = bottom2 * sparse_sketch_matrix 126 | # But tensorflow only supports left multiplying a sparse matrix, so: 127 | # sketch1 = (sparse_sketch_matrix.T * bottom1.T).T 128 | # sketch2 = (sparse_sketch_matrix.T * bottom2.T).T 129 | sketch1 = tf.transpose(tf.sparse_tensor_dense_matmul(sparse_sketch_matrix1, 130 | bottom1_flat, adjoint_a=True, adjoint_b=True)) 131 | sketch2 = tf.transpose(tf.sparse_tensor_dense_matmul(sparse_sketch_matrix2, 132 | bottom2_flat, adjoint_a=True, adjoint_b=True)) 133 | 134 | # Step 2: FFT 135 | zeros = tf.zeros_like(sketch1) 136 | fft1 = tf.batch_fft(tf.complex(real=sketch1, imag=zeros)) 137 | fft2 = tf.batch_fft(tf.complex(real=sketch2, imag=zeros)) 138 | 139 | # Step 3: Elementwise product 140 | fft_product = tf.mul(fft1, fft2) 141 | 142 | # Step 4: Inverse FFT and reshape back 143 | # Compute output shape dynamically: [batch_size, height, width, output_dim] 144 | cbp_flat = tf.real(tf.batch_ifft(fft_product)) 145 | output_shape = tf.add(tf.mul(tf.shape(bottom1), [1, 1, 1, 0]), 146 | [0, 0, 0, output_dim]) 147 | cbp = tf.reshape(cbp_flat, output_shape) 148 | 149 | # Step 5: Sum pool over spatial dimensions, if specified 150 | if sum_pool: 151 | cbp = tf.reduce_sum(cbp, reduction_indices=[1, 2]) 152 | 153 | return cbp 154 | -------------------------------------------------------------------------------- /litterbox/layers/compact_bilinear_pooling_test.py: -------------------------------------------------------------------------------- 1 | # UC Berkeley's Standard Copyright and Disclaimer Notice: 2 | # 3 | # Copyright (c) 2016. The Regents of the University of California (Regents). All 4 | # Rights Reserved. Permission to use, copy, modify, and distribute this software 5 | # and its documentation for educational, research, and not-for-profit purposes, 6 | # without fee and without a signed licensing agreement, is hereby granted, 7 | # provided that the above copyright notice, this paragraph and the following 8 | # two paragraphs appear in all copies, modifications, and distributions. 9 | # Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck Avenue, 10 | # Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, for commercial licensing 11 | # opportunities. 12 | # 13 | # Ronghang Hu, University of California, Berkeley. 14 | # 15 | # IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, 16 | # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF 17 | # THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS BEEN 18 | # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 19 | # 20 | # REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 22 | # THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS 23 | # PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, 24 | # UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 | from __future__ import absolute_import, division, print_function 26 | 27 | import numpy as np 28 | import tensorflow as tf 29 | from compact_bilinear_pooling import compact_bilinear_pooling_layer 30 | 31 | def bp(bottom1, bottom2, sum_pool=True): 32 | assert(np.all(bottom1.shape[:3] == bottom2.shape[:3])) 33 | batch_size, height, width = bottom1.shape[:3] 34 | output_dim = bottom1.shape[-1] * bottom2.shape[-1] 35 | 36 | bottom1_flat = bottom1.reshape((-1, bottom1.shape[-1])) 37 | bottom2_flat = bottom2.reshape((-1, bottom2.shape[-1])) 38 | 39 | output = np.empty((batch_size*height*width, output_dim), np.float32) 40 | for n in range(len(output)): 41 | output[n, ...] = np.outer(bottom1_flat[n], bottom2_flat[n]).reshape(-1) 42 | output = output.reshape((batch_size, height, width, output_dim)) 43 | 44 | if sum_pool: 45 | output = np.sum(output, axis=(1, 2)) 46 | return output 47 | 48 | # Input and output tensors 49 | # Input channels need to be specified for shape inference 50 | input_dim1 = 2048 51 | input_dim2 = 2048 52 | output_dim = 8000 53 | bottom1 = tf.placeholder(tf.float32, [None, None, None, input_dim1]) 54 | bottom2 = tf.placeholder(tf.float32, [None, None, None, input_dim2]) 55 | top = compact_bilinear_pooling_layer(bottom1, bottom2, output_dim, sum_pool=True) 56 | def cbp(bottom1_value, bottom2_value): 57 | sess = tf.get_default_session() 58 | return sess.run(top, feed_dict={bottom1: bottom1_value, 59 | bottom2: bottom2_value}) 60 | 61 | def run_kernel_approximation_test(batch_size, height, width): 62 | # Input values 63 | x = np.random.rand(batch_size, height, width, input_dim1).astype(np.float32) 64 | y = np.random.rand(batch_size, height, width, input_dim2).astype(np.float32) 65 | 66 | z = np.random.rand(batch_size, height, width, input_dim1).astype(np.float32) 67 | w = np.random.rand(batch_size, height, width, input_dim2).astype(np.float32) 68 | 69 | # Compact Bilinear Pooling results 70 | cbp_xy = cbp(x, y) 71 | cbp_zw = cbp(z, w) 72 | 73 | # (Original) Bilinear Pooling results 74 | bp_xy = bp(x, y) 75 | bp_zw = bp(z, w) 76 | 77 | # Check the kernel results of Compact Bilinear Pooling 78 | # against Bilinear Pooling 79 | cbp_kernel = np.sum(cbp_xy*cbp_zw, axis=1) 80 | bp_kernel = np.sum(bp_xy*bp_zw, axis=1) 81 | 82 | print("ratio between Compact Bilinear Pooling kernel and (original) Bilinear Pooling kernel:") 83 | print(cbp_kernel / bp_kernel) 84 | 85 | def run_large_input_test(batch_size, height, width): 86 | # Input values 87 | x = np.random.rand(batch_size, height, width, input_dim1).astype(np.float32) 88 | y = np.random.rand(batch_size, height, width, input_dim2).astype(np.float32) 89 | 90 | # Compact Bilinear Pooling results 91 | cbp_xy = cbp(x, y) 92 | 93 | def main(): 94 | sess = tf.InteractiveSession() 95 | run_kernel_approximation_test(batch_size=2, height=3, width=4) 96 | run_large_input_test(batch_size=16, height=7, width=7) 97 | sess.close() 98 | 99 | if __name__ == '__main__': 100 | main() -------------------------------------------------------------------------------- /litterbox/layers/lstm.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | import tensorflow as tf 10 | 11 | _default_initializer_params = { 12 | 'stddev': 0.1, 13 | 'dtype': tf.float32, 14 | } 15 | 16 | 17 | def bidir_lstm( 18 | inputs, 19 | num_units, 20 | num_layers=1, 21 | initializer_fn=tf.truncated_normal, 22 | initializer_params=_default_initializer_params, 23 | dtype=tf.float32, 24 | scope=None 25 | ): 26 | shape = inputs.get_shape().as_list() 27 | batch_size = shape[0] 28 | inputs_unpacked = tf.unpack(inputs, axis=1) 29 | 30 | cell_fw = tf.contrib.rnn.python.ops.lstm_ops.LSTMBlockCell(num_units=num_units) 31 | cell_bw = tf.contrib.rnn.python.ops.lstm_ops.LSTMBlockCell(num_units=num_units) 32 | 33 | if num_layers > 1: 34 | cell_fw = tf.nn.rnn_cell.MultiRNNCell([cell_fw] * num_layers) 35 | cell_bw = tf.nn.rnn_cell.MultiRNNCell([cell_bw] * num_layers) 36 | 37 | initializer_params = initializer_params or {} 38 | initializer_params['dtype'] = dtype 39 | if isinstance(cell_fw.state_size, tuple): 40 | initial_state_fw = tuple( 41 | initializer_fn([batch_size, s], **initializer_params) for s in cell_fw.state_size) 42 | initial_state_bw = tuple( 43 | initializer_fn([batch_size, s], **initializer_params) for s in cell_bw.state_size) 44 | else: 45 | initial_state_fw = initializer_fn(shape=[batch_size, cell_fw.state_size], **initializer_params) 46 | initial_state_bw = initializer_fn(shape=[batch_size, cell_bw.state_size], **initializer_params) 47 | 48 | outputs, _, _ = tf.nn.bidirectional_rnn( 49 | cell_fw, 50 | cell_bw, 51 | inputs_unpacked, 52 | initial_state_fw=initial_state_fw, 53 | initial_state_bw=initial_state_bw, 54 | dtype=dtype, 55 | scope=scope) 56 | 57 | outputs = tf.pack(outputs, axis=1) 58 | return outputs 59 | 60 | 61 | def lstm( 62 | inputs, 63 | num_units, 64 | num_layers=1, 65 | initializer_fn=tf.truncated_normal, 66 | initializer_params=_default_initializer_params, 67 | dtype=tf.float32, 68 | scope=None 69 | ): 70 | print('input shape', inputs.get_shape()) 71 | shape = inputs.get_shape().as_list() 72 | batch_size = shape[0] 73 | inputs_unpacked = tf.unpack(inputs, axis=1) 74 | 75 | cell = tf.contrib.rnn.python.ops.lstm_ops.LSTMBlockCell(num_units=num_units) 76 | print('cell state size', cell.state_size) 77 | 78 | if num_layers > 1: 79 | cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers) 80 | 81 | initializer_params = initializer_params or {} 82 | initializer_params['dtype'] = dtype 83 | if isinstance(cell.state_size, tuple): 84 | initial_state = tuple(initializer_fn([batch_size, s]) for s in cell.state_size) 85 | else: 86 | initial_state = initializer_fn(shape=[batch_size, cell.state_size], **initializer_params) 87 | 88 | outputs, _, _ = tf.nn.rnn( 89 | cell, 90 | inputs_unpacked, 91 | initial_state=initial_state, 92 | dtype=dtype, 93 | scope=scope) 94 | 95 | outputs = tf.pack(outputs, axis=1) 96 | print('output shape', outputs.get_shape()) 97 | 98 | return outputs -------------------------------------------------------------------------------- /litterbox/layers/preact_conv.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | from tensorflow.contrib.framework.python.ops import add_arg_scope 10 | from tensorflow.contrib.framework.python.ops import variables 11 | from tensorflow.contrib.layers.python.layers import initializers 12 | from tensorflow.contrib.layers.python.layers import utils 13 | from tensorflow.python.framework import ops 14 | from tensorflow.python.ops import variable_scope 15 | from tensorflow.python.ops import nn 16 | 17 | 18 | @add_arg_scope 19 | def preact_conv2d( 20 | inputs, 21 | num_outputs, 22 | kernel_size, 23 | stride=1, 24 | padding='SAME', 25 | activation_fn=nn.relu, 26 | normalizer_fn=None, 27 | normalizer_params=None, 28 | weights_initializer=initializers.xavier_initializer(), 29 | weights_regularizer=None, 30 | reuse=None, 31 | variables_collections=None, 32 | outputs_collections=None, 33 | trainable=True, 34 | scope=None): 35 | """Adds a 2D convolution preceded by batch normalization and activation. 36 | """ 37 | with variable_scope.variable_scope(scope, 'Conv', values=[inputs], reuse=reuse) as sc: 38 | inputs = ops.convert_to_tensor(inputs) 39 | dtype = inputs.dtype.base_dtype 40 | if normalizer_fn: 41 | normalizer_params = normalizer_params or {} 42 | inputs = normalizer_fn(inputs, activation_fn=activation_fn, **normalizer_params) 43 | kernel_h, kernel_w = utils.two_element_tuple(kernel_size) 44 | stride_h, stride_w = utils.two_element_tuple(stride) 45 | num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4) 46 | weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs] 47 | weights_collections = utils.get_variable_collections(variables_collections, 'weights') 48 | weights = variables.model_variable('weights', 49 | shape=weights_shape, 50 | dtype=dtype, 51 | initializer=weights_initializer, 52 | regularizer=weights_regularizer, 53 | collections=weights_collections, 54 | trainable=trainable) 55 | outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], padding=padding) 56 | return utils.collect_named_outputs(outputs_collections, sc.name, outputs) -------------------------------------------------------------------------------- /litterbox/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .google.model_google_slim import ModelGoogleSlim 2 | from .my_slim.model_my_slim import ModelMySlim 3 | from .sdc.model_sdc import ModelSdc 4 | 5 | -------------------------------------------------------------------------------- /litterbox/models/google/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /litterbox/models/google/model_google_slim.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """Model wrapper for Google's tensorflow/model/slim models. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import re 16 | import tensorflow as tf 17 | from collections import OrderedDict 18 | from fabric import model 19 | from models.google.nets import nets_factory 20 | slim = tf.contrib.slim 21 | 22 | google_default_params = { 23 | 'network': 'inception_resnet_v2', 24 | 'num_classes': 1000, 25 | } 26 | 27 | 28 | class ModelGoogleSlim(model.Model): 29 | 30 | def __init__(self, params=google_default_params): 31 | super(ModelGoogleSlim, self).__init__() 32 | params = model.merge_params(google_default_params, params) 33 | 34 | # model_name must correspond to one of google's network names in nets package, 35 | # see nets_factory.py for valid names. 36 | self.network = params['network'] 37 | assert self.network in nets_factory.networks_map 38 | self.num_classes = params['num_classes'] 39 | assert self.num_classes > 1 40 | 41 | def build_tower(self, images, is_training=False, scope=None): 42 | weight_decay = 0.0001 43 | network_fn = nets_factory.get_network_fn( 44 | self.network, 45 | num_classes=self.num_classes, 46 | weight_decay=weight_decay, 47 | is_training=is_training) 48 | logits, endpoints = network_fn(images) 49 | 50 | # HACK get mode variable scope set by google net code from logits op name so it can 51 | # be removed for smaller Tensorboard tags 52 | scope_search = re.search('%s_[0-9]*/(\w+)/' % self.TOWER_PREFIX, logits.op.name) 53 | if scope_search: 54 | self.model_variable_scope = scope_search.group(1) 55 | 56 | if 'AuxLogits' in endpoints: 57 | # Grab the logits associated with the side head. Employed during training. 58 | aux_logits = endpoints['AuxLogits'] 59 | else: 60 | aux_logits = None 61 | 62 | self.add_tower( 63 | scope, 64 | endpoints, 65 | logits, 66 | aux_logits 67 | ) 68 | 69 | # Add summaries for viewing model statistics on TensorBoard. 70 | self.activation_summaries() 71 | 72 | return logits 73 | 74 | def add_tower_loss(self, labels, scope=None): 75 | tower = self.tower(scope) 76 | num_classes = tower.outputs.get_shape()[-1].value 77 | labels = slim.one_hot_encoding(labels, num_classes=num_classes) 78 | 79 | slim.losses.softmax_cross_entropy( 80 | tower.outputs, labels, label_smoothing=0.1, weights=1.0) 81 | 82 | if 'AuxLogits' in tower.endpoints: 83 | slim.losses.softmax_cross_entropy( 84 | tower.aux_outputs, labels, 85 | label_smoothing=0.1, weights=0.4, scope='aux_loss') 86 | 87 | def output_scopes(self, prefix_scope=''): 88 | scopes = ['logits', 'Logits', 'AuxLogits/Aux_logits', 'AuxLogits/Logits', 'AuxLogits/Conv2d_2b_1x1'] 89 | prefix = prefix_scope + '/' if prefix_scope else '' 90 | prefix += self.model_variable_scope + '/' 91 | return [prefix + x for x in scopes] 92 | 93 | def get_predictions(self, outputs, processor): 94 | if processor is not None: 95 | logits = processor.decode_output(outputs) 96 | else: 97 | logits = outputs 98 | return tf.nn.softmax(logits) 99 | 100 | @staticmethod 101 | def eval_ops(logits, labels, processor): 102 | """Generate a simple (non tower based) loss op for use in evaluation. 103 | 104 | Args: 105 | logits: List of logits from inference(). Shape [batch_size, num_classes], dtype float32/64 106 | labels: Labels from distorted_inputs or inputs(). batch_size vector with int32/64 values in [0, num_classes). 107 | """ 108 | top_1_op = tf.nn.in_top_k(logits, labels, 1) 109 | top_5_op = tf.nn.in_top_k(logits, labels, 5) 110 | loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy_eval') 111 | return OrderedDict([('top 5', top_5_op), ('top 1', top_1_op), ('loss', loss_op)]) 112 | 113 | def check_norm(self, norm): 114 | if ('vgg' in self.network or 'resnet' in self.network) and norm != 'caffe_rgb': 115 | print("WARNING: If you are using the pre-trained weights for Google VGG and Resnet models, " 116 | "they were imported from Caffe and expect [0, 255] inputs, not the default [-1, 1]. " 117 | "It is recommended to change the image norm method from '%s' to 'caffe_rgb' with " 118 | "the --image_norm param." % norm) 119 | 120 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/README.md: -------------------------------------------------------------------------------- 1 | # Models from Google Tensorflow 2 | 3 | These models were taken from tensorflow/models/slim/nets 4 | 5 | They are also in the main tensorflow repository at tensorflow/contrib/slim/python/slim/nets/ minus the factory. 6 | 7 | These networks were last update from revision https://github.com/tensorflow/models/commit/253ca4ab8101dc2c2404530680b4a12e7518572d 8 | 9 | ## TODO 10 | 11 | Remove model code and just use the tf.contrib.slim versions with same factory? Will this make it harder to do model surgery? TBD 12 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rwightman/tensorflow-litterbox/ddeeb3a6c7de64e5391050ffbb5948feca65ad3c/litterbox/models/google/nets/__init__.py -------------------------------------------------------------------------------- /litterbox/models/google/nets/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a model definition for AlexNet. 16 | 17 | This work was first described in: 18 | ImageNet Classification with Deep Convolutional Neural Networks 19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 | 21 | and later refined in: 22 | One weird trick for parallelizing convolutional neural networks 23 | Alex Krizhevsky, 2014 24 | 25 | Here we provide the implementation proposed in "One weird trick" and not 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 | 28 | Usage: 29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 | outputs, end_points = alexnet.alexnet_v2(inputs) 31 | 32 | @@alexnet_v2 33 | """ 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import tensorflow as tf 40 | 41 | slim = tf.contrib.slim 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 43 | 44 | 45 | def alexnet_v2_arg_scope(weight_decay=0.0005): 46 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 47 | activation_fn=tf.nn.relu, 48 | biases_initializer=tf.constant_initializer(0.1), 49 | weights_regularizer=slim.l2_regularizer(weight_decay)): 50 | with slim.arg_scope([slim.conv2d], padding='SAME'): 51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 52 | return arg_sc 53 | 54 | 55 | def alexnet_v2(inputs, 56 | num_classes=1000, 57 | is_training=True, 58 | dropout_keep_prob=0.5, 59 | spatial_squeeze=True, 60 | scope='alexnet_v2'): 61 | """AlexNet version 2. 62 | 63 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf 64 | Parameters from: 65 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 66 | layers-imagenet-1gpu.cfg 67 | 68 | Note: All the fully_connected layers have been transformed to conv2d layers. 69 | To use in classification mode, resize input to 224x224. To use in fully 70 | convolutional mode, set spatial_squeeze to false. 71 | The LRN layers have been removed and change the initializers from 72 | random_normal_initializer to xavier_initializer. 73 | 74 | Args: 75 | inputs: a tensor of size [batch_size, height, width, channels]. 76 | num_classes: number of predicted classes. 77 | is_training: whether or not the model is being trained. 78 | dropout_keep_prob: the probability that activations are kept in the dropout 79 | layers during training. 80 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 81 | outputs. Useful to remove unnecessary dimensions for classification. 82 | scope: Optional scope for the variables. 83 | 84 | Returns: 85 | the last op containing the log predictions and end_points dict. 86 | """ 87 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 88 | end_points_collection = sc.name + '_end_points' 89 | # Collect outputs for conv2d, fully_connected and max_pool2d. 90 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 91 | outputs_collections=[end_points_collection]): 92 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 93 | scope='conv1') 94 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') 95 | net = slim.conv2d(net, 192, [5, 5], scope='conv2') 96 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') 97 | net = slim.conv2d(net, 384, [3, 3], scope='conv3') 98 | net = slim.conv2d(net, 384, [3, 3], scope='conv4') 99 | net = slim.conv2d(net, 256, [3, 3], scope='conv5') 100 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') 101 | 102 | # Use conv2d instead of fully_connected layers. 103 | with slim.arg_scope([slim.conv2d], 104 | weights_initializer=trunc_normal(0.005), 105 | biases_initializer=tf.constant_initializer(0.1)): 106 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID', 107 | scope='fc6') 108 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 109 | scope='dropout6') 110 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 111 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 112 | scope='dropout7') 113 | net = slim.conv2d(net, num_classes, [1, 1], 114 | activation_fn=None, 115 | normalizer_fn=None, 116 | biases_initializer=tf.zeros_initializer, 117 | scope='fc8') 118 | 119 | # Convert end_points_collection into a end_point dict. 120 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 121 | if spatial_squeeze: 122 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 123 | end_points[sc.name + '/fc8'] = net 124 | return net, end_points 125 | alexnet_v2.default_image_size = 224 126 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/alexnet_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.alexnet.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from models.google.nets import alexnet 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class AlexnetV2Test(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 224, 224 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = alexnet.alexnet_v2(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 300, 400 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 4, 7, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 224, 224 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = alexnet.alexnet_v2(inputs, num_classes) 58 | expected_names = ['alexnet_v2/conv1', 59 | 'alexnet_v2/pool1', 60 | 'alexnet_v2/conv2', 61 | 'alexnet_v2/pool2', 62 | 'alexnet_v2/conv3', 63 | 'alexnet_v2/conv4', 64 | 'alexnet_v2/conv5', 65 | 'alexnet_v2/pool5', 66 | 'alexnet_v2/fc6', 67 | 'alexnet_v2/fc7', 68 | 'alexnet_v2/fc8' 69 | ] 70 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 71 | 72 | def testModelVariables(self): 73 | batch_size = 5 74 | height, width = 224, 224 75 | num_classes = 1000 76 | with self.test_session(): 77 | inputs = tf.random_uniform((batch_size, height, width, 3)) 78 | alexnet.alexnet_v2(inputs, num_classes) 79 | expected_names = ['alexnet_v2/conv1/weights', 80 | 'alexnet_v2/conv1/biases', 81 | 'alexnet_v2/conv2/weights', 82 | 'alexnet_v2/conv2/biases', 83 | 'alexnet_v2/conv3/weights', 84 | 'alexnet_v2/conv3/biases', 85 | 'alexnet_v2/conv4/weights', 86 | 'alexnet_v2/conv4/biases', 87 | 'alexnet_v2/conv5/weights', 88 | 'alexnet_v2/conv5/biases', 89 | 'alexnet_v2/fc6/weights', 90 | 'alexnet_v2/fc6/biases', 91 | 'alexnet_v2/fc7/weights', 92 | 'alexnet_v2/fc7/biases', 93 | 'alexnet_v2/fc8/weights', 94 | 'alexnet_v2/fc8/biases', 95 | ] 96 | model_variables = [v.op.name for v in slim.get_model_variables()] 97 | self.assertSetEqual(set(model_variables), set(expected_names)) 98 | 99 | def testEvaluation(self): 100 | batch_size = 2 101 | height, width = 224, 224 102 | num_classes = 1000 103 | with self.test_session(): 104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False) 106 | self.assertListEqual(logits.get_shape().as_list(), 107 | [batch_size, num_classes]) 108 | predictions = tf.argmax(logits, 1) 109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 110 | 111 | def testTrainEvalWithReuse(self): 112 | train_batch_size = 2 113 | eval_batch_size = 1 114 | train_height, train_width = 224, 224 115 | eval_height, eval_width = 300, 400 116 | num_classes = 1000 117 | with self.test_session(): 118 | train_inputs = tf.random_uniform( 119 | (train_batch_size, train_height, train_width, 3)) 120 | logits, _ = alexnet.alexnet_v2(train_inputs) 121 | self.assertListEqual(logits.get_shape().as_list(), 122 | [train_batch_size, num_classes]) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform( 125 | (eval_batch_size, eval_height, eval_width, 3)) 126 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False, 127 | spatial_squeeze=False) 128 | self.assertListEqual(logits.get_shape().as_list(), 129 | [eval_batch_size, 4, 7, num_classes]) 130 | logits = tf.reduce_mean(logits, [1, 2]) 131 | predictions = tf.argmax(logits, 1) 132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 133 | 134 | def testForward(self): 135 | batch_size = 1 136 | height, width = 224, 224 137 | with self.test_session() as sess: 138 | inputs = tf.random_uniform((batch_size, height, width, 3)) 139 | logits, _ = alexnet.alexnet_v2(inputs) 140 | sess.run(tf.initialize_all_variables()) 141 | output = sess.run(logits) 142 | self.assertTrue(output.any()) 143 | 144 | if __name__ == '__main__': 145 | tf.test.main() 146 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/cifarnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the CIFAR-10 model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) 26 | 27 | 28 | def cifarnet(images, num_classes=10, is_training=False, 29 | dropout_keep_prob=0.5, 30 | prediction_fn=slim.softmax, 31 | scope='CifarNet'): 32 | """Creates a variant of the CifarNet model. 33 | 34 | Note that since the output is a set of 'logits', the values fall in the 35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 36 | probability distribution over the characters, one will need to convert them 37 | using the softmax function: 38 | 39 | logits = cifarnet.cifarnet(images, is_training=False) 40 | probabilities = tf.nn.softmax(logits) 41 | predictions = tf.argmax(logits, 1) 42 | 43 | Args: 44 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 45 | num_classes: the number of classes in the dataset. 46 | is_training: specifies whether or not we're currently training the model. 47 | This variable will determine the behaviour of the dropout layer. 48 | dropout_keep_prob: the percentage of activation values that are retained. 49 | prediction_fn: a function to get predictions out of logits. 50 | scope: Optional variable_scope. 51 | 52 | Returns: 53 | logits: the pre-softmax activations, a tensor of size 54 | [batch_size, `num_classes`] 55 | end_points: a dictionary from components of the network to the corresponding 56 | activation. 57 | """ 58 | end_points = {} 59 | 60 | with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): 61 | net = slim.conv2d(images, 64, [5, 5], scope='conv1') 62 | end_points['conv1'] = net 63 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 64 | end_points['pool1'] = net 65 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') 66 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 67 | end_points['conv2'] = net 68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') 69 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 70 | end_points['pool2'] = net 71 | net = slim.flatten(net) 72 | end_points['Flatten'] = net 73 | net = slim.fully_connected(net, 384, scope='fc3') 74 | end_points['fc3'] = net 75 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 76 | scope='dropout3') 77 | net = slim.fully_connected(net, 192, scope='fc4') 78 | end_points['fc4'] = net 79 | logits = slim.fully_connected(net, num_classes, 80 | biases_initializer=tf.zeros_initializer, 81 | weights_initializer=trunc_normal(1/192.0), 82 | weights_regularizer=None, 83 | activation_fn=None, 84 | scope='logits') 85 | 86 | end_points['Logits'] = logits 87 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 88 | 89 | return logits, end_points 90 | cifarnet.default_image_size = 32 91 | 92 | 93 | def cifarnet_arg_scope(weight_decay=0.004): 94 | """Defines the default cifarnet argument scope. 95 | 96 | Args: 97 | weight_decay: The weight decay to use for regularizing the model. 98 | 99 | Returns: 100 | An `arg_scope` to use for the inception v3 model. 101 | """ 102 | with slim.arg_scope( 103 | [slim.conv2d], 104 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), 105 | activation_fn=tf.nn.relu): 106 | with slim.arg_scope( 107 | [slim.fully_connected], 108 | biases_initializer=tf.constant_initializer(0.1), 109 | weights_initializer=trunc_normal(0.04), 110 | weights_regularizer=slim.l2_regularizer(weight_decay), 111 | activation_fn=tf.nn.relu) as sc: 112 | return sc 113 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings inception_v1, inception_v2 and inception_v3 under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from models.google.nets.inception_resnet_v2 import inception_resnet_v2 23 | from models.google.nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from models.google.nets.inception_v1 import inception_v1 25 | from models.google.nets.inception_v1 import inception_v1_arg_scope 26 | from models.google.nets.inception_v1 import inception_v1_base 27 | from models.google.nets.inception_v2 import inception_v2 28 | from models.google.nets.inception_v2 import inception_v2_arg_scope 29 | from models.google.nets.inception_v2 import inception_v2_base 30 | from models.google.nets.inception_v3 import inception_v3 31 | from models.google.nets.inception_v3 import inception_v3_arg_scope 32 | from models.google.nets.inception_v3 import inception_v3_base 33 | from models.google.nets.inception_v4 import inception_v4 34 | from models.google.nets.inception_v4 import inception_v4_arg_scope 35 | from models.google.nets.inception_v4 import inception_v4_base 36 | # pylint: enable=unused-import 37 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/inception_resnet_v2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.inception_resnet_v2.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from models.google.nets import inception 23 | 24 | 25 | class InceptionTest(tf.test.TestCase): 26 | 27 | def testBuildLogits(self): 28 | batch_size = 5 29 | height, width = 299, 299 30 | num_classes = 1000 31 | with self.test_session(): 32 | inputs = tf.random_uniform((batch_size, height, width, 3)) 33 | logits, _ = inception.inception_resnet_v2(inputs, num_classes) 34 | self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) 35 | self.assertListEqual(logits.get_shape().as_list(), 36 | [batch_size, num_classes]) 37 | 38 | def testBuildEndPoints(self): 39 | batch_size = 5 40 | height, width = 299, 299 41 | num_classes = 1000 42 | with self.test_session(): 43 | inputs = tf.random_uniform((batch_size, height, width, 3)) 44 | _, end_points = inception.inception_resnet_v2(inputs, num_classes) 45 | self.assertTrue('Logits' in end_points) 46 | logits = end_points['Logits'] 47 | self.assertListEqual(logits.get_shape().as_list(), 48 | [batch_size, num_classes]) 49 | self.assertTrue('AuxLogits' in end_points) 50 | aux_logits = end_points['AuxLogits'] 51 | self.assertListEqual(aux_logits.get_shape().as_list(), 52 | [batch_size, num_classes]) 53 | pre_pool = end_points['PrePool'] 54 | self.assertListEqual(pre_pool.get_shape().as_list(), 55 | [batch_size, 8, 8, 1536]) 56 | 57 | def testVariablesSetDevice(self): 58 | batch_size = 5 59 | height, width = 299, 299 60 | num_classes = 1000 61 | with self.test_session(): 62 | inputs = tf.random_uniform((batch_size, height, width, 3)) 63 | # Force all Variables to reside on the device. 64 | with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): 65 | inception.inception_resnet_v2(inputs, num_classes) 66 | with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): 67 | inception.inception_resnet_v2(inputs, num_classes) 68 | for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'): 69 | self.assertDeviceEqual(v.device, '/cpu:0') 70 | for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'): 71 | self.assertDeviceEqual(v.device, '/gpu:0') 72 | 73 | def testHalfSizeImages(self): 74 | batch_size = 5 75 | height, width = 150, 150 76 | num_classes = 1000 77 | with self.test_session(): 78 | inputs = tf.random_uniform((batch_size, height, width, 3)) 79 | logits, end_points = inception.inception_resnet_v2(inputs, num_classes) 80 | self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) 81 | self.assertListEqual(logits.get_shape().as_list(), 82 | [batch_size, num_classes]) 83 | pre_pool = end_points['PrePool'] 84 | self.assertListEqual(pre_pool.get_shape().as_list(), 85 | [batch_size, 3, 3, 1536]) 86 | 87 | def testUnknownBatchSize(self): 88 | batch_size = 1 89 | height, width = 299, 299 90 | num_classes = 1000 91 | with self.test_session() as sess: 92 | inputs = tf.placeholder(tf.float32, (None, height, width, 3)) 93 | logits, _ = inception.inception_resnet_v2(inputs, num_classes) 94 | self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) 95 | self.assertListEqual(logits.get_shape().as_list(), 96 | [None, num_classes]) 97 | images = tf.random_uniform((batch_size, height, width, 3)) 98 | sess.run(tf.initialize_all_variables()) 99 | output = sess.run(logits, {inputs: images.eval()}) 100 | self.assertEquals(output.shape, (batch_size, num_classes)) 101 | 102 | def testEvaluation(self): 103 | batch_size = 2 104 | height, width = 299, 299 105 | num_classes = 1000 106 | with self.test_session() as sess: 107 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 108 | logits, _ = inception.inception_resnet_v2(eval_inputs, 109 | num_classes, 110 | is_training=False) 111 | predictions = tf.argmax(logits, 1) 112 | sess.run(tf.initialize_all_variables()) 113 | output = sess.run(predictions) 114 | self.assertEquals(output.shape, (batch_size,)) 115 | 116 | def testTrainEvalWithReuse(self): 117 | train_batch_size = 5 118 | eval_batch_size = 2 119 | height, width = 150, 150 120 | num_classes = 1000 121 | with self.test_session() as sess: 122 | train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) 123 | inception.inception_resnet_v2(train_inputs, num_classes) 124 | eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) 125 | logits, _ = inception.inception_resnet_v2(eval_inputs, 126 | num_classes, 127 | is_training=False, 128 | reuse=True) 129 | predictions = tf.argmax(logits, 1) 130 | sess.run(tf.initialize_all_variables()) 131 | output = sess.run(predictions) 132 | self.assertEquals(output.shape, (eval_batch_size,)) 133 | 134 | 135 | if __name__ == '__main__': 136 | tf.test.main() 137 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001): 36 | """Defines the default arg scope for inception models. 37 | 38 | Args: 39 | weight_decay: The weight decay to use for regularizing the model. 40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 41 | batch_norm_decay: Decay for batch norm moving average. 42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 43 | in batch norm. 44 | 45 | Returns: 46 | An `arg_scope` to use for the inception models. 47 | """ 48 | batch_norm_params = { 49 | # Decay for the moving averages. 50 | 'decay': batch_norm_decay, 51 | # epsilon to prevent 0s in variance. 52 | 'epsilon': batch_norm_epsilon, 53 | # collection containing update_ops. 54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 55 | } 56 | if use_batch_norm: 57 | normalizer_fn = slim.batch_norm 58 | normalizer_params = batch_norm_params 59 | else: 60 | normalizer_fn = None 61 | normalizer_params = {} 62 | # Set weight_decay for weights in Conv and FC layers. 63 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 64 | weights_regularizer=slim.l2_regularizer(weight_decay)): 65 | with slim.arg_scope( 66 | [slim.conv2d], 67 | weights_initializer=slim.variance_scaling_initializer(), 68 | activation_fn=tf.nn.relu, 69 | normalizer_fn=normalizer_fn, 70 | normalizer_params=normalizer_params) as sc: 71 | return sc 72 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. 44 | is_training: specifies whether or not we're currently training the model. 45 | This variable will determine the behaviour of the dropout layer. 46 | dropout_keep_prob: the percentage of activation values that are retained. 47 | prediction_fn: a function to get predictions out of logits. 48 | scope: Optional variable_scope. 49 | 50 | Returns: 51 | logits: the pre-softmax activations, a tensor of size 52 | [batch_size, `num_classes`] 53 | end_points: a dictionary from components of the network to the corresponding 54 | activation. 55 | """ 56 | end_points = {} 57 | 58 | with tf.variable_scope(scope, 'LeNet', [images, num_classes]): 59 | net = slim.conv2d(images, 32, [5, 5], scope='conv1') 60 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 61 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 62 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 63 | net = slim.flatten(net) 64 | end_points['Flatten'] = net 65 | 66 | net = slim.fully_connected(net, 1024, scope='fc3') 67 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 68 | scope='dropout3') 69 | logits = slim.fully_connected(net, num_classes, activation_fn=None, 70 | scope='fc4') 71 | 72 | end_points['Logits'] = logits 73 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 74 | 75 | return logits, end_points 76 | lenet.default_image_size = 28 77 | 78 | 79 | def lenet_arg_scope(weight_decay=0.0): 80 | """Defines the default lenet argument scope. 81 | 82 | Args: 83 | weight_decay: The weight decay to use for regularizing the model. 84 | 85 | Returns: 86 | An `arg_scope` to use for the inception v3 model. 87 | """ 88 | with slim.arg_scope( 89 | [slim.conv2d, slim.fully_connected], 90 | weights_regularizer=slim.l2_regularizer(weight_decay), 91 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 92 | activation_fn=tf.nn.relu) as sc: 93 | return sc 94 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import functools 21 | 22 | import tensorflow as tf 23 | 24 | from models.google.nets import alexnet 25 | from models.google.nets import cifarnet 26 | from models.google.nets import inception 27 | from models.google.nets import lenet 28 | from models.google.nets import overfeat 29 | from models.google.nets import resnet_v1 30 | from models.google.nets import resnet_v2 31 | from models.google.nets import vgg 32 | 33 | slim = tf.contrib.slim 34 | 35 | networks_map = { 36 | 'alexnet_v2': alexnet.alexnet_v2, 37 | 'cifarnet': cifarnet.cifarnet, 38 | 'overfeat': overfeat.overfeat, 39 | 'vgg_a': vgg.vgg_a, 40 | 'vgg_16': vgg.vgg_16, 41 | 'vgg_19': vgg.vgg_19, 42 | 'inception_v1': inception.inception_v1, 43 | 'inception_v2': inception.inception_v2, 44 | 'inception_v3': inception.inception_v3, 45 | 'inception_v4': inception.inception_v4, 46 | 'inception_resnet_v2': inception.inception_resnet_v2, 47 | 'lenet': lenet.lenet, 48 | 'resnet_v1_50': resnet_v1.resnet_v1_50, 49 | 'resnet_v1_101': resnet_v1.resnet_v1_101, 50 | 'resnet_v1_152': resnet_v1.resnet_v1_152, 51 | 'resnet_v1_200': resnet_v1.resnet_v1_200, 52 | 'resnet_v2_50': resnet_v2.resnet_v2_50, 53 | 'resnet_v2_101': resnet_v2.resnet_v2_101, 54 | 'resnet_v2_152': resnet_v2.resnet_v2_152, 55 | 'resnet_v2_200': resnet_v2.resnet_v2_200, 56 | } 57 | 58 | arg_scopes_map = { 59 | 'alexnet_v2': alexnet.alexnet_v2_arg_scope, 60 | 'cifarnet': cifarnet.cifarnet_arg_scope, 61 | 'overfeat': overfeat.overfeat_arg_scope, 62 | 'vgg_a': vgg.vgg_arg_scope, 63 | 'vgg_16': vgg.vgg_arg_scope, 64 | 'vgg_19': vgg.vgg_arg_scope, 65 | 'inception_v1': inception.inception_v3_arg_scope, 66 | 'inception_v2': inception.inception_v3_arg_scope, 67 | 'inception_v3': inception.inception_v3_arg_scope, 68 | 'inception_v4': inception.inception_v3_arg_scope, 69 | 'inception_resnet_v2': inception.inception_resnet_v2_arg_scope, 70 | 'lenet': lenet.lenet_arg_scope, 71 | 'resnet_v1_50': resnet_v1.resnet_arg_scope, 72 | 'resnet_v1_101': resnet_v1.resnet_arg_scope, 73 | 'resnet_v1_152': resnet_v1.resnet_arg_scope, 74 | 'resnet_v1_200': resnet_v1.resnet_arg_scope, 75 | 'resnet_v2_50': resnet_v2.resnet_arg_scope, 76 | 'resnet_v2_101': resnet_v2.resnet_arg_scope, 77 | 'resnet_v2_152': resnet_v2.resnet_arg_scope, 78 | 'resnet_v2_200': resnet_v2.resnet_arg_scope, 79 | } 80 | 81 | 82 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): 83 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 84 | 85 | Args: 86 | name: The name of the network. 87 | num_classes: The number of classes to use for classification. 88 | weight_decay: The l2 coefficient for the model weights. 89 | is_training: `True` if the model is being used for training and `False` 90 | otherwise. 91 | 92 | Returns: 93 | network_fn: A function that applies the model to a batch of images. It has 94 | the following signature: 95 | logits, end_points = network_fn(images) 96 | Raises: 97 | ValueError: If network `name` is not recognized. 98 | """ 99 | if name not in networks_map: 100 | raise ValueError('Name of network unknown %s' % name) 101 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay) 102 | func = networks_map[name] 103 | @functools.wraps(func) 104 | def network_fn(images): 105 | with slim.arg_scope(arg_scope): 106 | return func(images, num_classes, is_training=is_training) 107 | if hasattr(func, 'default_image_size'): 108 | network_fn.default_image_size = func.default_image_size 109 | 110 | return network_fn 111 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from nets import nets_factory 26 | 27 | 28 | class NetworksTest(tf.test.TestCase): 29 | 30 | def testGetNetworkFn(self): 31 | batch_size = 5 32 | num_classes = 1000 33 | for net in nets_factory.networks_map: 34 | with self.test_session(): 35 | net_fn = nets_factory.get_network_fn(net, num_classes) 36 | # Most networks use 224 as their default_image_size 37 | image_size = getattr(net_fn, 'default_image_size', 224) 38 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 39 | logits, end_points = net_fn(inputs) 40 | self.assertTrue(isinstance(logits, tf.Tensor)) 41 | self.assertTrue(isinstance(end_points, dict)) 42 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 43 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 44 | 45 | if __name__ == '__main__': 46 | tf.test.main() 47 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/overfeat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the model definition for the OverFeat network. 16 | 17 | The definition for the network was obtained from: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | 24 | Usage: 25 | with slim.arg_scope(overfeat.overfeat_arg_scope()): 26 | outputs, end_points = overfeat.overfeat(inputs) 27 | 28 | @@overfeat 29 | """ 30 | from __future__ import absolute_import 31 | from __future__ import division 32 | from __future__ import print_function 33 | 34 | import tensorflow as tf 35 | 36 | slim = tf.contrib.slim 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 38 | 39 | 40 | def overfeat_arg_scope(weight_decay=0.0005): 41 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 42 | activation_fn=tf.nn.relu, 43 | weights_regularizer=slim.l2_regularizer(weight_decay), 44 | biases_initializer=tf.zeros_initializer): 45 | with slim.arg_scope([slim.conv2d], padding='SAME'): 46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 47 | return arg_sc 48 | 49 | 50 | def overfeat(inputs, 51 | num_classes=1000, 52 | is_training=True, 53 | dropout_keep_prob=0.5, 54 | spatial_squeeze=True, 55 | scope='overfeat'): 56 | """Contains the model definition for the OverFeat network. 57 | 58 | The definition for the network was obtained from: 59 | OverFeat: Integrated Recognition, Localization and Detection using 60 | Convolutional Networks 61 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 62 | Yann LeCun, 2014 63 | http://arxiv.org/abs/1312.6229 64 | 65 | Note: All the fully_connected layers have been transformed to conv2d layers. 66 | To use in classification mode, resize input to 231x231. To use in fully 67 | convolutional mode, set spatial_squeeze to false. 68 | 69 | Args: 70 | inputs: a tensor of size [batch_size, height, width, channels]. 71 | num_classes: number of predicted classes. 72 | is_training: whether or not the model is being trained. 73 | dropout_keep_prob: the probability that activations are kept in the dropout 74 | layers during training. 75 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 76 | outputs. Useful to remove unnecessary dimensions for classification. 77 | scope: Optional scope for the variables. 78 | 79 | Returns: 80 | the last op containing the log predictions and end_points dict. 81 | 82 | """ 83 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: 84 | end_points_collection = sc.name + '_end_points' 85 | # Collect outputs for conv2d, fully_connected and max_pool2d 86 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 87 | outputs_collections=end_points_collection): 88 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 89 | scope='conv1') 90 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 91 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') 92 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 93 | net = slim.conv2d(net, 512, [3, 3], scope='conv3') 94 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4') 95 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5') 96 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 97 | with slim.arg_scope([slim.conv2d], 98 | weights_initializer=trunc_normal(0.005), 99 | biases_initializer=tf.constant_initializer(0.1)): 100 | # Use conv2d instead of fully_connected layers. 101 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') 102 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 103 | scope='dropout6') 104 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 105 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 106 | scope='dropout7') 107 | net = slim.conv2d(net, num_classes, [1, 1], 108 | activation_fn=None, 109 | normalizer_fn=None, 110 | biases_initializer=tf.zeros_initializer, 111 | scope='fc8') 112 | # Convert end_points_collection into a end_point dict. 113 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 114 | if spatial_squeeze: 115 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 116 | end_points[sc.name + '/fc8'] = net 117 | return net, end_points 118 | overfeat.default_image_size = 231 119 | -------------------------------------------------------------------------------- /litterbox/models/google/nets/overfeat_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.overfeat.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import overfeat 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class OverFeatTest(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 231, 231 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = overfeat.overfeat(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 281, 281 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 2, 2, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 231, 231 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = overfeat.overfeat(inputs, num_classes) 58 | expected_names = ['overfeat/conv1', 59 | 'overfeat/pool1', 60 | 'overfeat/conv2', 61 | 'overfeat/pool2', 62 | 'overfeat/conv3', 63 | 'overfeat/conv4', 64 | 'overfeat/conv5', 65 | 'overfeat/pool5', 66 | 'overfeat/fc6', 67 | 'overfeat/fc7', 68 | 'overfeat/fc8' 69 | ] 70 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 71 | 72 | def testModelVariables(self): 73 | batch_size = 5 74 | height, width = 231, 231 75 | num_classes = 1000 76 | with self.test_session(): 77 | inputs = tf.random_uniform((batch_size, height, width, 3)) 78 | overfeat.overfeat(inputs, num_classes) 79 | expected_names = ['overfeat/conv1/weights', 80 | 'overfeat/conv1/biases', 81 | 'overfeat/conv2/weights', 82 | 'overfeat/conv2/biases', 83 | 'overfeat/conv3/weights', 84 | 'overfeat/conv3/biases', 85 | 'overfeat/conv4/weights', 86 | 'overfeat/conv4/biases', 87 | 'overfeat/conv5/weights', 88 | 'overfeat/conv5/biases', 89 | 'overfeat/fc6/weights', 90 | 'overfeat/fc6/biases', 91 | 'overfeat/fc7/weights', 92 | 'overfeat/fc7/biases', 93 | 'overfeat/fc8/weights', 94 | 'overfeat/fc8/biases', 95 | ] 96 | model_variables = [v.op.name for v in slim.get_model_variables()] 97 | self.assertSetEqual(set(model_variables), set(expected_names)) 98 | 99 | def testEvaluation(self): 100 | batch_size = 2 101 | height, width = 231, 231 102 | num_classes = 1000 103 | with self.test_session(): 104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False) 106 | self.assertListEqual(logits.get_shape().as_list(), 107 | [batch_size, num_classes]) 108 | predictions = tf.argmax(logits, 1) 109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 110 | 111 | def testTrainEvalWithReuse(self): 112 | train_batch_size = 2 113 | eval_batch_size = 1 114 | train_height, train_width = 231, 231 115 | eval_height, eval_width = 281, 281 116 | num_classes = 1000 117 | with self.test_session(): 118 | train_inputs = tf.random_uniform( 119 | (train_batch_size, train_height, train_width, 3)) 120 | logits, _ = overfeat.overfeat(train_inputs) 121 | self.assertListEqual(logits.get_shape().as_list(), 122 | [train_batch_size, num_classes]) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform( 125 | (eval_batch_size, eval_height, eval_width, 3)) 126 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False, 127 | spatial_squeeze=False) 128 | self.assertListEqual(logits.get_shape().as_list(), 129 | [eval_batch_size, 2, 2, num_classes]) 130 | logits = tf.reduce_mean(logits, [1, 2]) 131 | predictions = tf.argmax(logits, 1) 132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 133 | 134 | def testForward(self): 135 | batch_size = 1 136 | height, width = 231, 231 137 | with self.test_session() as sess: 138 | inputs = tf.random_uniform((batch_size, height, width, 3)) 139 | logits, _ = overfeat.overfeat(inputs) 140 | sess.run(tf.initialize_all_variables()) 141 | output = sess.run(logits) 142 | self.assertTrue(output.any()) 143 | 144 | if __name__ == '__main__': 145 | tf.test.main() 146 | -------------------------------------------------------------------------------- /litterbox/models/my_slim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rwightman/tensorflow-litterbox/ddeeb3a6c7de64e5391050ffbb5948feca65ad3c/litterbox/models/my_slim/__init__.py -------------------------------------------------------------------------------- /litterbox/models/my_slim/build_vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import fabric 16 | import tensorflow as tf 17 | from tensorflow.contrib.framework import arg_scope 18 | from tensorflow.contrib import layers 19 | 20 | 21 | def _block_a(net, endpoints, d=64, scope='BlockA'): 22 | with tf.variable_scope(scope): 23 | net = endpoints[scope+'/Conv1'] = layers.conv2d(net, d, [3, 3], scope='Conv1_3x3') 24 | net = endpoints[scope+'/Conv2'] = layers.conv2d(net, d, [3, 3], scope='Conv2_3x3') 25 | net = endpoints[scope+'/Pool1'] = layers.max_pool2d(net, [2, 2], stride=2, scope='Pool1_2x2/2') 26 | return net 27 | 28 | 29 | def _block_b(net, endpoints, d=256, scope='BlockB'): 30 | with tf.variable_scope(scope): 31 | net = endpoints[scope+'/Conv1'] = layers.conv2d(net, d, [3, 3], scope='Conv1_3x3') 32 | net = endpoints[scope+'/Conv2'] = layers.conv2d(net, d, [3, 3], scope='Conv2_3x3') 33 | net = endpoints[scope+'/Conv3'] = layers.conv2d(net, d, [3, 3], scope='Conv3_3x3') 34 | net = endpoints[scope+'/Pool1'] = layers.max_pool2d(net, [2, 2], stride=2, scope='Pool1_2x2/2') 35 | return net 36 | 37 | 38 | def _block_c(net, endpoints, d=256, scope='BlockC'): 39 | with tf.variable_scope(scope): 40 | net = endpoints[scope+'/Conv1'] = layers.conv2d(net, d, [3, 3], scope='Conv1_3x3') 41 | net = endpoints[scope+'/Conv2'] = layers.conv2d(net, d, [3, 3], scope='Conv2_3x3') 42 | net = endpoints[scope+'/Conv3'] = layers.conv2d(net, d, [3, 3], scope='Conv3_3x3') 43 | net = endpoints[scope+'/Conv4'] = layers.conv2d(net, d, [3, 3], scope='Conv4_3x3') 44 | net = endpoints[scope+'/Pool1'] = layers.max_pool2d(net, [2, 2], stride=2, scope='Pool1_2x2/2') 45 | return net 46 | 47 | 48 | def _block_output(net, endpoints, num_classes, dropout_keep_prob=0.5): 49 | with tf.variable_scope('Output'): 50 | net = layers.flatten(net, scope='Flatten') 51 | 52 | # 7 x 7 x 512 53 | net = layers.fully_connected(net, 4096, scope='Fc1') 54 | net = endpoints['Output/Fc1'] = layers.dropout(net, dropout_keep_prob, scope='Dropout1') 55 | 56 | # 1 x 1 x 4096 57 | net = layers.fully_connected(net, 4096, scope='Fc2') 58 | net = endpoints['Output/Fc2'] = layers.dropout(net, dropout_keep_prob, scope='Dropout2') 59 | 60 | logits = layers.fully_connected(net, num_classes, activation_fn=None, scope='Logits') 61 | # 1 x 1 x num_classes 62 | endpoints['Logits'] = logits 63 | return logits 64 | 65 | 66 | def _build_vgg16( 67 | inputs, 68 | num_classes=1000, 69 | dropout_keep_prob=0.5, 70 | is_training=True, 71 | scope=''): 72 | """Blah""" 73 | 74 | endpoints = {} 75 | with tf.name_scope(scope, 'vgg16', [inputs]): 76 | with arg_scope( 77 | [layers.batch_norm, layers.dropout], is_training=is_training): 78 | with arg_scope( 79 | [layers.conv2d, layers.max_pool2d], 80 | stride=1, 81 | padding='SAME'): 82 | 83 | net = _block_a(inputs, endpoints, d=64, scope='Scale1') 84 | net = _block_a(net, endpoints, d=128, scope='Scale2') 85 | net = _block_b(net, endpoints, d=256, scope='Scale3') 86 | net = _block_b(net, endpoints, d=512, scope='Scale4') 87 | net = _block_b(net, endpoints, d=512, scope='Scale5') 88 | logits = _block_output(net, endpoints, num_classes, dropout_keep_prob) 89 | 90 | endpoints['Predictions'] = tf.nn.softmax(logits, name='Predictions') 91 | return logits, endpoints 92 | 93 | 94 | def _build_vgg19( 95 | inputs, 96 | num_classes=1000, 97 | dropout_keep_prob=0.5, 98 | is_training=True, 99 | scope=''): 100 | """Blah""" 101 | 102 | endpoints = {} 103 | with tf.name_scope(scope, 'vgg19', [inputs]): 104 | with arg_scope( 105 | [layers.batch_norm, layers.dropout], is_training=is_training): 106 | with arg_scope( 107 | [layers.conv2d, layers.max_pool2d], 108 | stride=1, 109 | padding='SAME'): 110 | 111 | net = _block_a(inputs, endpoints, d=64, scope='Scale1') 112 | net = _block_a(net, endpoints, d=128, scope='Scale2') 113 | net = _block_c(net, endpoints, d=256, scope='Scale3') 114 | net = _block_c(net, endpoints, d=512, scope='Scale4') 115 | net = _block_c(net, endpoints, d=512, scope='Scale5') 116 | logits = _block_output(net, endpoints, num_classes, dropout_keep_prob) 117 | 118 | endpoints['Predictions'] = tf.nn.softmax(logits, name='Predictions') 119 | return logits, endpoints 120 | 121 | 122 | def params_vgg( 123 | num_layers=16): 124 | params = { 125 | 'num_layers': num_layers, 126 | 'weight_decay': 0.0005, 127 | 'use_batch_norm': False, 128 | 'dropout_keep_prob': 0.5, 129 | 'output_scopes': ['Output'] 130 | } 131 | return params 132 | 133 | 134 | def vgg_arg_scope( 135 | weight_decay=0.0005, 136 | use_batch_norm=False): 137 | """""" 138 | batch_norm_params = { 139 | # Decay for the moving averages. 140 | 'decay': 0.9997, 141 | # epsilon to prevent 0s in variance. 142 | 'epsilon': 0.001, 143 | } 144 | normalizer_fn = layers.batch_norm if use_batch_norm else None 145 | normalizer_params = batch_norm_params if use_batch_norm else None 146 | l2_regularizer = layers.l2_regularizer(weight_decay) # 0.00004 147 | 148 | with arg_scope( 149 | [layers.fully_connected], 150 | biases_initializer=tf.constant_initializer(0.1), 151 | weights_initializer=layers.variance_scaling_initializer(factor=1.0), 152 | weights_regularizer=l2_regularizer, 153 | activation_fn=tf.nn.relu): 154 | with arg_scope( 155 | [layers.conv2d], 156 | normalizer_fn=normalizer_fn, 157 | normalizer_params=normalizer_params, 158 | weights_initializer=layers.variance_scaling_initializer(factor=1.0), 159 | weights_regularizer=l2_regularizer, 160 | activation_fn=tf.nn.relu) as arg_sc: 161 | return arg_sc 162 | 163 | 164 | def build_vgg( 165 | inputs, 166 | num_classes=1000, 167 | params=params_vgg(), 168 | is_training=True, 169 | scope=''): 170 | """""" 171 | params = fabric.model.merge_params(params_vgg(), params) 172 | num_layers = params['num_layers'] 173 | weight_decay = params['weight_decay'] 174 | use_batch_norm = params['use_batch_norm'] 175 | dropout_keep_prob = params['dropout_keep_prob'] 176 | 177 | with vgg_arg_scope( 178 | weight_decay=weight_decay, 179 | use_batch_norm=use_batch_norm, 180 | ): 181 | if num_layers == 19: 182 | logits, endpoints = _build_vgg19( 183 | inputs, 184 | num_classes=num_classes, 185 | dropout_keep_prob=dropout_keep_prob, 186 | is_training=is_training, 187 | scope=scope) 188 | else: 189 | assert num_layers == 16 190 | logits, endpoints = _build_vgg16( 191 | inputs, 192 | num_classes=num_classes, 193 | dropout_keep_prob=dropout_keep_prob, 194 | is_training=is_training, 195 | scope=scope) 196 | 197 | return logits, endpoints 198 | -------------------------------------------------------------------------------- /litterbox/models/my_slim/model_my_slim.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """The Inception model family (v3, V4, Inception-Resnet-V1, Inception-Resnet-V2). 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | from fabric import model, loss 17 | from collections import OrderedDict 18 | from .nets_factory import get_network_fn 19 | 20 | my_default_params = { 21 | 'network': 'inception_resnet_v2', 22 | 'num_classes': 1000, 23 | } 24 | 25 | 26 | class ModelMySlim(model.Model): 27 | 28 | def __init__(self, params): 29 | super(ModelMySlim, self).__init__() 30 | params = model.merge_params(my_default_params, params) 31 | self.network = params['network'] 32 | self.num_classes = params['num_classes'] 33 | self._params = params # cache for build fns 34 | 35 | def build_tower(self, inputs, is_training=False, scope=None): 36 | 37 | network_fn = get_network_fn( 38 | self.network, 39 | num_classes=self.num_classes, 40 | params=self._params, 41 | is_training=is_training) 42 | logits, endpoints = network_fn(inputs) 43 | 44 | if 'AuxLogits' in endpoints: 45 | # Grab the logits associated with the auxiliary head if present. 46 | aux_logits = endpoints['AuxLogits'] 47 | else: 48 | aux_logits = None 49 | 50 | self.add_tower( 51 | scope, 52 | endpoints, 53 | logits, 54 | aux_logits 55 | ) 56 | 57 | # Add summaries for viewing model statistics on TensorBoard. 58 | self.activation_summaries() 59 | 60 | return logits 61 | 62 | def add_tower_loss(self, labels, scope=None): 63 | """Adds all losses for the model. 64 | 65 | The final loss is not returned, the list of losses are collected by slim.losses. 66 | The losses are accumulated in tower_loss() and summed to calculate the total loss. 67 | 68 | Args: 69 | labels: Labels from distorted_inputs or inputs(). 1-D tensor of shape [batch_size] 70 | scope: tower scope of losses to add, ie 'tower_0/', defaults to last added tower if None 71 | """ 72 | tower = self.tower(scope) 73 | aux_logits = None 74 | if 'AuxLogits' in tower.endpoints: 75 | aux_logits = tower.aux_outputs 76 | 77 | loss.loss_softmax_cross_entropy_with_aux(tower.outputs, labels, aux_logits) 78 | 79 | def output_scopes(self, prefix_scope=''): 80 | # all models currently have their num_class specific FC/output layers under the 'Output' scope 81 | scopes = ['Output'] 82 | prefix = prefix_scope + '/' if prefix_scope else '' 83 | return [prefix + x for x in scopes] 84 | 85 | def get_predictions(self, outputs, processor): 86 | if processor is not None: 87 | logits = processor.decode_output(outputs) 88 | else: 89 | logits = outputs 90 | return tf.nn.softmax(logits) 91 | 92 | @staticmethod 93 | def eval_ops(logits, labels, processor): 94 | """Generate a simple (non tower based) loss op for use in evaluation. 95 | 96 | Args: 97 | logits: List of logits from inference(). Shape [batch_size, num_classes], dtype float32/64 98 | labels: Labels from distorted_inputs or inputs(). batch_size vector with int32/64 values in [0, num_classes). 99 | """ 100 | top_1_op = tf.nn.in_top_k(logits, labels, 1) 101 | top_5_op = tf.nn.in_top_k(logits, labels, 5) 102 | loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy_eval') 103 | return OrderedDict([('top 5', top_5_op), ('top 1', top_1_op), ('loss', loss_op)]) 104 | 105 | @staticmethod 106 | def default_optimizer_params(self): 107 | opt_type = 'RMSProp' 108 | # Default params as in Google's inception v3 model 109 | opt_params = { 110 | 'decay': 0.9, 111 | 'momentum': 0.9, 112 | 'epsilon': 1.0 113 | } 114 | return opt_type, opt_params 115 | -------------------------------------------------------------------------------- /litterbox/models/my_slim/nets_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import functools 5 | 6 | import tensorflow as tf 7 | from fabric import model 8 | from .build_inception_v4 import * 9 | from .build_resnet import * 10 | from .build_vgg import * 11 | 12 | networks_map = { 13 | 'vgg_16': build_vgg, 14 | 'vgg_19': build_vgg, 15 | 'inception_v4': build_inception, 16 | 'inception_resnet_v1': build_inception, 17 | 'inception_resnet_v2': build_inception, 18 | 'resnet_v1_18': build_resnet, 19 | 'resnet_v1_34': build_resnet, 20 | 'resnet_v1_50': build_resnet, 21 | 'resnet_v1_101': build_resnet, 22 | 'resnet_v1_152': build_resnet, 23 | 'resnet_v1_200': build_resnet, 24 | 'resnet_v2_50': build_resnet, 25 | 'resnet_v2_101': build_resnet, 26 | 'resnet_v2_152': build_resnet, 27 | 'resnet_v2_200': build_resnet, 28 | } 29 | 30 | params_map = { 31 | 'vgg_16': params_vgg(num_layers=16), 32 | 'vgg_19': params_vgg(num_layers=19), 33 | 'inception_v4': params_inception(version=4, residual=False), 34 | 'inception_resnet_v1': params_inception(version=1, residual=True), 35 | 'inception_resnet_v2': params_inception(version=2, residual=True), 36 | 'resnet_v1_18': params_resnet(num_layers=18), 37 | 'resnet_v1_34': params_resnet(num_layers=34), 38 | 'resnet_v1_50': params_resnet(num_layers=50), 39 | 'resnet_v1_101': params_resnet(num_layers=101), 40 | 'resnet_v1_152': params_resnet(num_layers=152), 41 | 'resnet_v1_200': params_resnet(num_layers=200), 42 | 'resnet_v2_50': params_resnet(num_layers=50, pre_activation=True), 43 | 'resnet_v2_101': params_resnet(num_layers=101, pre_activation=True), 44 | 'resnet_v2_152': params_resnet(num_layers=152, pre_activation=True), 45 | 'resnet_v2_200': params_resnet(num_layers=200, pre_activation=True), 46 | } 47 | 48 | 49 | def get_network_fn(name, num_classes, params, is_training=False): 50 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 51 | """ 52 | if name not in networks_map: 53 | raise ValueError('Name of network unknown %s' % name) 54 | params = model.merge_params(params_map[name], params) 55 | func = networks_map[name] 56 | 57 | @functools.wraps(func) 58 | def network_fn(inputs): 59 | return func(inputs, num_classes, params, is_training=is_training) 60 | 61 | if hasattr(func, 'default_image_size'): 62 | network_fn.default_image_size = func.default_image_size 63 | 64 | return network_fn 65 | -------------------------------------------------------------------------------- /litterbox/models/sdc/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /litterbox/models/sdc/build_nvidia_sdc.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | slim = tf.contrib.slim 8 | 9 | 10 | def build_nvidia_sdc( 11 | inputs, 12 | output_cfg={'steer': 1}, 13 | version=1, 14 | is_training=True, 15 | bayesian=False, 16 | dropout_keep_prob=0.7, 17 | reuse=None, 18 | scope='NvidiaSdc'): 19 | 20 | endpoints = {} 21 | var_scope = tf.variable_scope(scope, 'NvidiaSdc', [inputs], reuse=reuse) 22 | arg_scope_train = slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training) 23 | arg_scope_conv = slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='VALID') 24 | with var_scope, arg_scope_train, arg_scope_conv: 25 | #160 x 120 3 x 26 | print(inputs.get_shape()) 27 | net = slim.conv2d(inputs, 24, 5, stride=2, scope='Conv1_5x5') 28 | endpoints['Conv1_5x5'] = net 29 | print(net.get_shape()) 30 | net = slim.conv2d(net, 36, 5, stride=2, scope='Conv2_5x5') 31 | endpoints['Conv2_5x5'] = net 32 | print(net.get_shape()) 33 | net = slim.conv2d(net, 48, 5, stride=2, scope='Conv3_5x5') 34 | endpoints['Conv3_5x5'] = net 35 | print(net.get_shape()) 36 | net = slim.conv2d(net, 64, 3, stride=2, scope='Conv4_3x3') 37 | endpoints['Conv4_3x3'] = net 38 | print(net.get_shape()) 39 | net = slim.conv2d(net, 64, 3, scope='Conv5_3x3') 40 | endpoints['Conv5_3x3'] = net 41 | print(net.get_shape()) 42 | 43 | with tf.variable_scope('Output'): 44 | if version == 1: 45 | net = slim.conv2d(net, 1152, net.get_shape()[1:3], scope='Fc1') 46 | net = slim.dropout(net, dropout_keep_prob, scope='Dropout1') 47 | print(net.get_shape()) 48 | net = slim.conv2d(net, 144, 1, scope='Fc2') 49 | net = slim.dropout(net, dropout_keep_prob, scope='Dropout2') 50 | print(net.get_shape()) 51 | net = slim.conv2d(net, 72, 1, scope='Fc3') 52 | print(net.get_shape()) 53 | net = tf.squeeze(net, squeeze_dims=[1, 2]) 54 | else: 55 | net = slim.conv2d(net, 1280, net.get_shape()[1:3], scope='Fc1') 56 | net = slim.dropout(net, dropout_keep_prob, scope='Dropout1') 57 | print(net.get_shape()) 58 | net = slim.conv2d(net, 100, 1, scope='Fc2') 59 | net = slim.dropout(net, dropout_keep_prob, scope='Dropout2') 60 | print(net.get_shape()) 61 | net = slim.conv2d(net, 50, 1, scope='Fc3') 62 | print(net.get_shape()) 63 | net = slim.conv2d(net, 10, 1, scope='Fc4') 64 | print(net.get_shape()) 65 | net = tf.squeeze(net, squeeze_dims=[1, 2]) 66 | 67 | assert 'steer' in output_cfg 68 | output = {'steer': slim.fully_connected( 69 | net, output_cfg['steer'], activation_fn=None, scope='OutputSteer') 70 | } 71 | endpoints['Output'] = output 72 | 73 | return output, endpoints 74 | 75 | 76 | def nvidia_style_arg_scope( 77 | weight_decay=0.0005, 78 | batch_norm_decay=0.9997, 79 | batch_norm_epsilon=0.001): 80 | """Yields the scope with the default parameters for inception_resnet_v2. 81 | 82 | Args: 83 | weight_decay: the weight decay for weights variables. 84 | batch_norm_decay: decay for the moving average of batch_norm momentums. 85 | batch_norm_epsilon: small float added to variance to avoid dividing by zero. 86 | 87 | Returns: 88 | a arg_scope with the parameters needed for inception_resnet_v2. 89 | """ 90 | # Set weight_decay for weights in conv2d and fully_connected layers. 91 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 92 | weights_regularizer=slim.l2_regularizer(weight_decay), 93 | biases_regularizer=slim.l2_regularizer(weight_decay)): 94 | batch_norm_params = { 95 | 'decay': batch_norm_decay, 96 | 'epsilon': batch_norm_epsilon, 97 | } 98 | # Set activation_fn and parameters for batch_norm. 99 | with slim.arg_scope([slim.conv2d], 100 | activation_fn=tf.nn.elu, 101 | normalizer_fn=slim.batch_norm, 102 | normalizer_params=batch_norm_params) as scope: 103 | return scope 104 | 105 | -------------------------------------------------------------------------------- /litterbox/models/sdc/model_sdc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """Model wrapper for Google's tensorflow/model/slim models. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import fabric 16 | import tensorflow as tf 17 | from .build_inception_resnet_sdc import * 18 | from .build_resnet_sdc import * 19 | from .build_nvidia_sdc import * 20 | slim = tf.contrib.slim 21 | 22 | sdc_default_params = { 23 | 'outputs': {'steer': 1, 'xyz': 2}, 24 | 'network': 'inception_resnet_v2', # or one of other options in network_map 25 | 'regression_loss': 'mse', # or huber 26 | 'version': 2, 27 | 'bayesian': False, 28 | 'lock_root': False, 29 | } 30 | 31 | network_map = { 32 | 'inception_resnet_v2': build_inception_resnet_sdc_regression, 33 | 'resnet_v1_50': build_resnet_v1_50_sdc, 34 | 'resnet_v1_101': build_resnet_v1_101_sdc, 35 | 'resnet_v1_152': build_resnet_v1_152_sdc, 36 | 'nvidia_sdc': build_nvidia_sdc, 37 | } 38 | 39 | arg_scope_map = { 40 | 'inception_resnet_v2': inception_resnet_v2_arg_scope, 41 | 'resnet_v1_50': resnet_arg_scope, 42 | 'resnet_v1_101': resnet_arg_scope, 43 | 'resnet_v1_152': resnet_arg_scope, 44 | 'nvidia_sdc': nvidia_style_arg_scope, 45 | } 46 | 47 | 48 | class ModelSdc(fabric.model.Model): 49 | 50 | def __init__(self, params={}): 51 | super(ModelSdc, self).__init__() 52 | params = fabric.model.merge_params(sdc_default_params, params) 53 | print("ModelSdc params", params) 54 | 55 | self.output_cfg = params['outputs'] 56 | # model variable scope needs to match google net for pretrained weight compat 57 | if (params['network'] == 'resnet_v1_152' or 58 | params['network'] == 'resnet_v1_101' or 59 | params['network'] == 'resnet_v1_50'): 60 | self.network = params['network'] 61 | self.model_variable_scope = params['network'] 62 | elif params['network'] == 'inception_resnet_v2': 63 | self.network = 'inception_resnet_v2' 64 | self.model_variable_scope = "InceptionResnetV2" 65 | else: 66 | assert params['network'] == 'nvidia_sdc' 67 | self.network = 'nvidia_sdc' 68 | self.model_variable_scope = "NvidiaSdc" 69 | 70 | self.version = params['version'] 71 | self.bayesian = params['bayesian'] 72 | self.lock_root = params['lock_root'] 73 | 74 | if params['regression_loss'] == 'huber': 75 | self.regression_loss = fabric.loss.loss_huber_with_aux 76 | else: 77 | self.regression_loss = fabric.loss.loss_mse_with_aux 78 | self.disable_summaries = False 79 | 80 | def build_tower(self, inputs, is_training=False, summaries=True, scope=None): 81 | 82 | with slim.arg_scope(arg_scope_map[self.network]()): 83 | output, endpoints = network_map[self.network]( 84 | inputs, 85 | output_cfg=self.output_cfg, 86 | version=self.version, 87 | bayesian=self.bayesian, 88 | lock_root=self.lock_root, 89 | is_training=is_training) 90 | 91 | aux_output = None 92 | if 'AuxOutput' in endpoints: 93 | aux_output = endpoints['AuxOutput'] 94 | 95 | self.add_tower( 96 | scope, 97 | endpoints=endpoints, 98 | outputs=output, 99 | aux_outputs=aux_output, 100 | ) 101 | 102 | # Add summaries for viewing model statistics on TensorBoard. 103 | if summaries: 104 | self.activation_summaries() 105 | 106 | return output 107 | 108 | def add_tower_loss(self, targets, scope=None): 109 | tower = self.tower(scope) 110 | assert 'xyz' in self.output_cfg or 'steer' in self.output_cfg 111 | 112 | if 'xyz' in self.output_cfg: 113 | target_xyz = targets[1] 114 | aux_output_xyz = None 115 | if tower.aux_outputs: 116 | aux_output_xyz = tower.aux_outputs['xyz'] 117 | self.regression_loss( 118 | tower.outputs['xyz'], target_xyz, aux_predictions=aux_output_xyz) 119 | 120 | if 'steer' in self.output_cfg: 121 | target_steer = targets[0] 122 | aux_output_steer = None 123 | if tower.aux_outputs: 124 | aux_output_steer = tower.aux_outputs['steer'] 125 | if self.output_cfg['steer'] > 1: 126 | # steer is integer target, one hot output, use softmax 127 | fabric.loss_softmax_cross_entropy_with_aux( 128 | tower.outputs['steer'], target_steer, aux_logits=aux_output_steer) 129 | else: 130 | assert self.output_cfg['steer'] == 1 131 | # steer is float target/output, use regression /w huber loss 132 | self.regression_loss( 133 | tower.outputs['steer'], target_steer, aux_predictions=aux_output_steer) 134 | 135 | def get_predictions(self, outputs, processor=None): 136 | if processor: 137 | for k, v in outputs.items(): 138 | outputs[k] = processor.decode_output(v, key=k) 139 | return outputs 140 | 141 | def _remap_variable_names(self, variables, checkpoint_variable_set, prefix_scope): 142 | 143 | def _strip_name(prefix, name): 144 | name = name[len(prefix):] if name.startswith(prefix) else name 145 | return name 146 | 147 | if prefix_scope: 148 | # strip our network prefix scope and remap accordingly 149 | prefix_scope += '/' 150 | restore_variables = {_strip_name(prefix_scope, v.op.name): v for v in variables} 151 | return restore_variables 152 | else: 153 | return variables 154 | 155 | def output_scopes(self, prefix_scope=''): 156 | rel_scopes = ['logits', 'Logits', 'Output', 'Output/OutputXYZ', 'Output/OutputSteer', 'Output/Fc1', 157 | 'AuxLogits/OutputXYZ', 'AuxLogits/OutputSteer', 'AuxLogits/Fc1'] 158 | prefix = prefix_scope + '/' if prefix_scope else '' 159 | prefix += self.model_variable_scope + '/' 160 | abs_scopes = [prefix + x for x in rel_scopes] 161 | return abs_scopes 162 | 163 | @staticmethod 164 | def eval_ops(predictions, labels, processor=None): 165 | """Generate a simple (non tower based) loss op for use in evaluation. 166 | """ 167 | ops = {} 168 | if 'steer' in predictions: 169 | steer_label = labels[0] 170 | steer_prediction = predictions['steer'] 171 | 172 | if steer_prediction.get_shape()[-1].value > 1: 173 | # one hot steering loss (non reduced) 174 | steer_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 175 | steer_prediction, steer_label, name='steer_xentropy_eval') 176 | # decode non-linear mapping before mse 177 | steer_prediction = tf.cast(tf.argmax(steer_prediction, dimension=1), tf.int32) 178 | if processor: 179 | steer_prediction = processor.decode_output(steer_prediction, key='steer') 180 | steer_label = processor.decode_output(steer_label, key='steer') 181 | else: 182 | # linear regression steering loss 183 | assert steer_prediction.get_shape()[-1].value == 1 184 | steer_loss = fabric.loss.metric_huber(steer_prediction, steer_label) 185 | if processor: 186 | steer_prediction = processor.decode_output(steer_prediction, key='steer') 187 | steer_label = processor.decode_output(steer_label, key='steer') 188 | 189 | steer_mse = tf.squared_difference( 190 | steer_prediction, steer_label, name='steer_mse_eval') 191 | 192 | ops['steer_loss'] = steer_loss 193 | ops['steer_mse'] = steer_mse 194 | #ops['steer_prediction'] = steer_prediction 195 | #ops['steer_label'] = steer_label 196 | 197 | if 'xyz' in predictions: 198 | xyz_labels = labels[1] 199 | xyz_predictions = predictions['xyz'] 200 | if processor: 201 | xyz_labels = processor.decode_output(xyz_labels, key='xyz') 202 | xyz_predictions = processor.decode_output(xyz_predictions, key='xyz') 203 | xyz_loss = fabric.loss.metric_huber(xyz_predictions, xyz_labels) 204 | xyz_mse = tf.squared_difference(xyz_predictions, xyz_labels, name='xyz_mse_eval') 205 | ops['xyz_loss'] = xyz_loss 206 | ops['xyz_mse'] = xyz_mse 207 | ops['xyz_prediction'] = xyz_predictions 208 | ops['xyz_label'] = xyz_labels 209 | 210 | return ops 211 | -------------------------------------------------------------------------------- /litterbox/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .sdc.processor_sdc import ProcessorSdc 2 | from .imagenet.processor_imagenet import ProcessorImagenet -------------------------------------------------------------------------------- /litterbox/processors/imagenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rwightman/tensorflow-litterbox/ddeeb3a6c7de64e5391050ffbb5948feca65ad3c/litterbox/processors/imagenet/__init__.py -------------------------------------------------------------------------------- /litterbox/processors/imagenet/image_processing_imagenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | """Read and preprocess image data. 24 | 25 | Image processing occurs on a single image at a time. Image are read and 26 | preprocessed in pararllel across mulitple threads. The resulting images 27 | are concatenated together to form a single batch for training or evaluation. 28 | 29 | -- Provide processed image data for a network: 30 | inputs: Construct batches of evaluation examples of images. 31 | distorted_inputs: Construct batches of training examples of images. 32 | batch_inputs: Construct batches of training or evaluation examples of images. 33 | 34 | -- Data processing: 35 | parse_example_proto: Parses an Example proto containing a training example 36 | of an image. 37 | 38 | -- Image decoding: 39 | decode_jpeg: Decode a JPEG encoded string into a 3-D float32 Tensor. 40 | 41 | -- Image preprocessing: 42 | image_preprocessing: Decode and preprocess one image for evaluation or training 43 | distort_image: Distort one image for training a network. 44 | eval_image: Prepare one image for evaluation. 45 | distort_color: Distort the color in one image for training. 46 | """ 47 | from __future__ import absolute_import 48 | from __future__ import division 49 | from __future__ import print_function 50 | 51 | import tensorflow as tf 52 | import numpy as np 53 | from fabric.image_processing_common import * 54 | 55 | 56 | def image_preprocess_imagenet( 57 | image_buffer, 58 | height, width, bbox=None, 59 | normalize=None, fmt='jpg', train=False, thread_id=0): 60 | """Decode and preprocess one image for evaluation or training. 61 | 62 | Args: 63 | image_buffer: JPEG encoded string Tensor 64 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 65 | where each coordinate is [0, 1) and the coordinates are arranged as 66 | [ymin, xmin, ymax, xmax]. 67 | normalize: standardization/normalization method for image data 68 | train: boolean 69 | thread_id: integer indicating preprocessing thread 70 | 71 | Returns: 72 | 3-D float Tensor containing an appropriately scaled image 73 | 74 | Raises: 75 | ValueError: if user does not provide bounding box 76 | """ 77 | if not height or not width: 78 | raise ValueError('Please specify target image height & width.') 79 | 80 | image = decode_compressed_image(image_buffer, image_fmt=fmt) 81 | 82 | if train: 83 | if bbox is None: 84 | bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 85 | image = process_for_train(image, height=height, width=width, bbox=bbox, thread_id=thread_id) 86 | else: 87 | image = process_for_eval(image, height, width) 88 | 89 | return image_normalize(image, method=normalize) 90 | -------------------------------------------------------------------------------- /litterbox/processors/imagenet/parse_proto_imagenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | 30 | def parse_proto_imagenet(example_serialized, label_offset=0): 31 | """Parses an Example proto containing a training example of an image. 32 | 33 | The output of the build_image_data.py image preprocessing script is a dataset 34 | containing serialized Example protocol buffers. Each Example proto contains 35 | the following fields: 36 | 37 | image/height: 462 38 | image/width: 581 39 | image/colorspace: 'RGB' 40 | image/channels: 3 41 | image/class/label: 615 42 | image/class/synset: 'n03623198' 43 | image/class/text: 'knee pad' 44 | image/object/bbox/xmin: 0.1 45 | image/object/bbox/xmax: 0.9 46 | image/object/bbox/ymin: 0.2 47 | image/object/bbox/ymax: 0.6 48 | image/object/bbox/label: 615 49 | image/format: 'JPEG' 50 | image/filename: 'ILSVRC2012_val_00041207.JPEG' 51 | image/encoded: 52 | 53 | Args: 54 | example_serialized: scalar Tensor tf.string containing a serialized 55 | Example protocol buffer. 56 | 57 | Returns: 58 | image_buffer: Tensor tf.string containing the contents of a JPEG file. 59 | label: Tensor tf.int32 containing the label. 60 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 61 | where each coordinate is [0, 1) and the coordinates are arranged as 62 | [ymin, xmin, ymax, xmax]. 63 | text: Tensor tf.string containing the human-readable label. 64 | """ 65 | # Dense features in Example proto. 66 | feature_map = { 67 | 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 68 | 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), 69 | 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 70 | 'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 71 | } 72 | sparse_float32 = tf.VarLenFeature(dtype=tf.float32) 73 | # Sparse features in Example proto. 74 | feature_map.update( 75 | {k: sparse_float32 for k in ['image/object/bbox/xmin', 76 | 'image/object/bbox/ymin', 77 | 'image/object/bbox/xmax', 78 | 'image/object/bbox/ymax']}) 79 | 80 | features = tf.parse_single_example(example_serialized, feature_map) 81 | label = tf.cast(features['image/class/label'], dtype=tf.int32) 82 | label = tf.sub(label, label_offset) 83 | 84 | xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) 85 | ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) 86 | xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) 87 | ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) 88 | 89 | # Note that we impose an ordering of (y, x) just to make life difficult. 90 | bbox = tf.concat(0, [ymin, xmin, ymax, xmax]) 91 | 92 | # Force the variable number of bounding boxes into the shape 93 | # [1, num_boxes, coords]. 94 | bbox = tf.expand_dims(bbox, 0) 95 | bbox = tf.transpose(bbox, [0, 2, 1]) 96 | 97 | return features['image/encoded'], features['image/filename'], bbox, features['image/class/text'], label 98 | -------------------------------------------------------------------------------- /litterbox/processors/imagenet/processor_imagenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import fabric 17 | import math 18 | from .parse_proto_imagenet import parse_proto_imagenet 19 | from .image_processing_imagenet import image_preprocess_imagenet 20 | from fabric.image_processing_common import * # FIXME for annoying flags 21 | 22 | 23 | class ProcessorImagenet(fabric.Processor): 24 | 25 | def __init__(self): 26 | super(ProcessorImagenet, self).__init__() 27 | 28 | # For aspect based image size, short edge set to FLAGS.image_size 29 | if FLAGS.image_aspect == 0.0 or FLAGS.image_aspect == 1.0: 30 | self.width = FLAGS.image_size 31 | self.height = FLAGS.image_size 32 | elif FLAGS.image_aspect < 1.0: 33 | self.width = math.floor(FLAGS.image_size * FLAGS.image_aspect) 34 | self.height = FLAGS.image_size 35 | else: 36 | self.width = FLAGS.image_size 37 | self.height = math.floor(FLAGS.image_size / FLAGS.image_aspect) 38 | self.depth = 3 39 | self.normalize = FLAGS.image_norm if FLAGS.image_norm else 'default' 40 | self.label_offset = 0 # offset to subtract from label index in dataset 41 | self.output_offset = 0 # offset to subtract from output prediction from model 42 | 43 | def parse_example(self, serialized_example): 44 | parsed = parse_proto_imagenet(serialized_example, self.label_offset) 45 | # image_buffer, filename (example id), bbox, class name, class label 46 | return parsed 47 | 48 | def process_example(self, data, mode='eval', thread_id=0): 49 | train = (mode == 'train') 50 | image_buffer, name = data[:2] 51 | 52 | bbox = None 53 | label_index = tf.constant(0, dtype=tf.int32) 54 | if mode != 'pred': 55 | bbox, _, label_index = data[-3:] 56 | 57 | image_processed = image_preprocess_imagenet( 58 | image_buffer, 59 | height=self.height, width=self.width, bbox=bbox, 60 | normalize=self.normalize, train=train, thread_id=thread_id) 61 | 62 | return image_processed, name, label_index 63 | 64 | def reshape_batch(self, batch_data, batch_size, num_splits=0): 65 | images, names, labels = batch_data 66 | images = tf.cast(images, tf.float32) 67 | images = tf.reshape(images, shape=[batch_size, self.height, self.width, self.depth]) 68 | names = tf.reshape(names, [batch_size]) 69 | labels = tf.reshape(labels, [batch_size]) 70 | 71 | if num_splits > 0: 72 | images = tf.split(0, num_splits, images) 73 | names = tf.split(0, num_splits, names) 74 | labels = tf.split(0, num_splits, labels) 75 | 76 | return images, names, labels 77 | 78 | def decode_output(self, value, key=None): 79 | if self.output_offset > 0: 80 | outputs = tf.slice(value, [0, self.output_offset], [-1, -1]) 81 | elif self.output_offset < 0: 82 | outputs = tf.pad(value, [[0, 0], [-self.output_offset, 0]]) 83 | else: 84 | outputs = value 85 | return outputs 86 | -------------------------------------------------------------------------------- /litterbox/processors/sdc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rwightman/tensorflow-litterbox/ddeeb3a6c7de64e5391050ffbb5948feca65ad3c/litterbox/processors/sdc/__init__.py -------------------------------------------------------------------------------- /litterbox/processors/sdc/image_processing_sdc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | """Read and preprocess image data. 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | import tensorflow as tf 30 | import numpy as np 31 | from copy import deepcopy 32 | from fabric.image_processing_common import * 33 | 34 | #SDC_MEAN = [0.2087998337, 0.240992306, 0.284853019] 35 | #SDC_STD = [0.2160449662, 0.2489588968, 0.2898496487] 36 | SDC_MEAN = [0.2956688423, 0.3152727451, 0.3687327858] 37 | SDC_STD = [0.2538597152, 0.2642534638, 0.277498978] 38 | SDC_MEAN_STD = [SDC_MEAN, SDC_STD] 39 | 40 | distort_params_sdc = { 41 | 'h_flip': True, 42 | 'v_flip': False, 43 | 'elastic_distortion': False, 44 | 'affine_distortion': False, 45 | 'aspect_ratio_range': [0.909, 1.1], 46 | 'area_range': [0.75, 1.0], 47 | 'min_object_covered': 0.85, 48 | 'hue_delta': 0.1, 49 | 'angle_range': 1.5, 50 | } 51 | 52 | 53 | def _random_hflip(image, uniform_random): 54 | """Randomly flip an image horizontally (left to right). 55 | """ 56 | image = tf.convert_to_tensor(image, name='image') 57 | mirror = tf.less(tf.pack([1.0, uniform_random, 1.0]), 0.5) 58 | return tf.reverse(image, mirror) 59 | 60 | 61 | def image_preprocess_sdc( 62 | image_buffer, camera_id, 63 | height, width, image_fmt='jpg', 64 | normalize='global', train=False, summary_suffix='', thread_id=0): 65 | """Decode and preprocess one image for evaluation or training. 66 | 67 | Args: 68 | image_buffer: encoded string Tensor 69 | camera_id: string identifier of source camera 70 | height: image target height 71 | width: image target width 72 | image_fmt: encode format of eimage 73 | normalize: boolean, standardize to dataset mean/std deviation vs rescale 74 | train: boolean 75 | thread_id: integer indicating preprocessing thread 76 | 77 | Returns: 78 | 3-D float Tensor containing an appropriately scaled image 79 | """ 80 | if not height or not width: 81 | raise ValueError('Please specify target image height & width.') 82 | 83 | flip_coeff = tf.constant(1.0, dtype=tf.float32) 84 | if image_buffer.dtype == tf.string: 85 | # compressed image bytes passed as string 86 | image = decode_compressed_image(image_buffer, image_fmt) 87 | else: 88 | # raw image pixels passed as 3D [H, W, C] tensor in RGB format 89 | image = image_buffer 90 | assert len(image.get_shape()) == 3 91 | 92 | if train: 93 | left_string = tf.constant('left_camera', tf.string) 94 | right_string = tf.constant('right_camera', tf.string) 95 | left_camera = tf.equal(camera_id, left_string) 96 | right_camera = tf.equal(camera_id, right_string) 97 | 98 | # bbox are 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 99 | # where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. 100 | # for this code we want to bias the bbox for left camera to the right side, for the right camera 101 | # to the left side, and leave center as center 102 | 103 | bbox_left = tf.constant([0.05, 0.0, 0.95, 0.9], dtype=tf.float32, shape=[1, 1, 4]) 104 | bbox_right = tf.constant([0.05, 0.1, 0.95, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 105 | bbox_center = tf.constant([0.05, 0.05, 0.95, 0.95], dtype=tf.float32, shape=[1, 1, 4]) 106 | case_pairs = [(left_camera, lambda: bbox_right), (right_camera, lambda: bbox_left)] 107 | bbox = tf.case(case_pairs, lambda: bbox_center, exclusive=False, name='case') 108 | 109 | distort_params = deepcopy(distort_params_default) 110 | distort_params.update(deepcopy(distort_params_sdc)) 111 | h_flip = distort_params['h_flip'] 112 | distort_params['h_flip'] = False # do not perform h-flip in common processing 113 | distort_params['aspect_ratio_range'][0] *= (width / height) 114 | distort_params['aspect_ratio_range'][1] *= (width / height) 115 | 116 | image = process_for_train( 117 | image, 118 | height=height, width=width, bbox=bbox, 119 | params=distort_params, summary_suffix=summary_suffix, thread_id=thread_id) 120 | 121 | if h_flip: 122 | uniform_random = tf.random_uniform([], 0, 1.0) 123 | image = _random_hflip(image, uniform_random) 124 | flip_coeff = tf.cond(uniform_random < 0.5, lambda: tf.mul(flip_coeff, -1.0), lambda: flip_coeff) 125 | else: 126 | image = process_for_eval(image, height, width) 127 | 128 | image = image_normalize(image, method=normalize, global_mean_std=SDC_MEAN_STD) 129 | 130 | return image, flip_coeff 131 | -------------------------------------------------------------------------------- /litterbox/processors/sdc/mu_law.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | 17 | 18 | def mu_law_enc(data, mu=255): 19 | with tf.name_scope('mu_enc'): 20 | mu = tf.cast(mu, tf.float32) 21 | data = tf.cast(data, tf.float32) 22 | companded = tf.sign(data) * tf.log(1. + mu * tf.abs(data)) / tf.log(1. + mu) 23 | return companded 24 | 25 | 26 | def mu_law_dec(data, mu=255): 27 | with tf.name_scope('mu_dec'): 28 | mu = tf.cast(mu, tf.float32) 29 | data = tf.cast(data, tf.float32) 30 | uncompanded = tf.sign(data) * (tf.pow(1. + mu, tf.abs(data)) - 1.) / mu 31 | return uncompanded 32 | 33 | 34 | def mu_law_steering_enc(angle_float, discrete=False): 35 | if discrete: 36 | # transform to discrete integers based on steering granularity of 37 | # source and then mu-law to one hot compatible discrete ints 38 | steering_angle_i64 = tf.cast(tf.round(angle_float / .00174533), tf.int64) 39 | input_range = tf.cast(9600, tf.float32) 40 | output_range = tf.cast(500, tf.float32) 41 | scaled = 2. * steering_angle_i64 / input_range 42 | encoded = mu_law_enc(scaled, mu=127) 43 | encoded = tf.cast(tf.floor((encoded + 1) * output_range / 2), tf.int32) 44 | else: 45 | # encode from steering float range to companded -1.0 to 1.0f 46 | input_range = tf.cast(8.5, tf.float32) # half range, full is +ve - -ve 47 | scaled = angle_float / input_range 48 | encoded = mu_law_enc(scaled, mu=255) 49 | return encoded 50 | 51 | 52 | def mu_law_steering_dec(angle_enc, discrete=False): 53 | if discrete: 54 | # transform back from one hot discrete ints to steering floats 55 | input_range = tf.cast(500, tf.float32) 56 | output_range = tf.cast(9600, tf.float32) 57 | scaled = (2. * angle_enc + 1) / input_range - 1 58 | decoded = mu_law_dec(scaled, mu=127) 59 | decoded = tf.cast(tf.ceil(decoded * output_range / 2.), tf.int32) 60 | decoded = tf.cast(decoded, tf.float32) * .00174533 61 | else: 62 | # decode from -1.0 to 1.0f back to float range 63 | output_range = tf.cast(8.5, tf.float32) 64 | decoded = mu_law_dec(angle_enc, mu=255) 65 | decoded = decoded * output_range 66 | return decoded 67 | -------------------------------------------------------------------------------- /litterbox/processors/sdc/parse_proto_sdc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | # Based on original Work Copyright 2016 Google Inc. All Rights Reserved. 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | # ============================================================================== 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | 30 | def parse_proto_sdc(example_serialized): 31 | """Parses an Example proto containing a training example of an image. 32 | 33 | The output of the build_image_data.py image preprocessing script is a dataset 34 | containing serialized Example protocol buffers. Each Example proto contains 35 | the following fields: 36 | 37 | image/height: 480 38 | image/width: 640 39 | image/colorspace: 'RGB' 40 | image/channels: 3 41 | image/format: 'JPEG' 42 | image/encoded: 43 | steering/angle: 615 44 | gps/lat: 37.999 45 | gps/long: 122.300 46 | 47 | Args: 48 | example_serialized: scalar Tensor tf.string containing a serialized 49 | Example protocol buffer. 50 | 51 | Returns: 52 | image_buffer: Tensor tf.string containing the contents of a JPEG file. 53 | 54 | """ 55 | 56 | feature_map = { 57 | 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 58 | #'image/encoded': tf.FixedLenFeature([2], dtype=tf.string, default_value=['', '']), 59 | 'image/frame_id': tf.FixedLenFeature([], dtype=tf.string, default_value='center_camera'), 60 | 'image/timestamp': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), 61 | 'steer/angle': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.0]), 62 | 'steer/speed': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.0]), 63 | 'steer/timestamp': tf.FixedLenFeature([2], dtype=tf.int64, default_value=[-1, -1]), 64 | 'gps/lat': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.0]), 65 | 'gps/long': tf.FixedLenFeature([2], dtype=tf.float32, default_value=[0.0, 0.0]), 66 | 'gps/timestamp': tf.FixedLenFeature([2], dtype=tf.int64, default_value=[-1, -1]), 67 | } 68 | 69 | features = tf.parse_single_example(example_serialized, feature_map) 70 | camera_id = tf.cast(features['image/frame_id'], tf.string) 71 | image_timestamp = tf.cast(features['image/timestamp'], tf.int64) 72 | 73 | # FIXME for some reason I decided to interpolate in tensorflow, re-thinking that decision... 74 | zero_const = tf.constant(0.0, dtype=tf.float64) 75 | 76 | steering_timestamp = features['steer/timestamp'] 77 | steering_angle = features['steer/angle'] 78 | if True: 79 | # interpolate 80 | steering_angle_delta = tf.cast(steering_angle[1] - steering_angle[0], tf.float64) 81 | steering_time_delta = tf.cast(steering_timestamp[1] - steering_timestamp[0], tf.float64) 82 | steering_image_time_delta = tf.cast(image_timestamp - steering_timestamp[0], tf.float64) 83 | steering_slope = tf.cond( 84 | tf.less(steering_time_delta, .001), 85 | lambda: zero_const, 86 | lambda: steering_angle_delta / steering_time_delta) 87 | steering_angle = tf.cast(steering_angle[0], tf.float64) + steering_slope * steering_image_time_delta 88 | steering_angle_f32 = tf.cast(steering_angle, tf.float32) 89 | else: 90 | # latest sample 91 | steering_angle_f32 = tf.cast(steering_angle[1], tf.float32) 92 | 93 | gps_timestamp = features['gps/timestamp'] 94 | gps_lat = features['gps/lat'] 95 | gps_long = features['gps/long'] 96 | if True: 97 | gps_lat_delta = tf.cast(gps_lat[1] - gps_lat[0], tf.float64) 98 | gps_long_delta = tf.cast(gps_long[1] - gps_long[0], tf.float64) 99 | gps_time_delta = tf.cast(gps_timestamp[1] - gps_timestamp[0], tf.float64) 100 | gps_lat_slope = tf.cond( 101 | tf.equal(gps_time_delta, zero_const), lambda: zero_const, lambda: gps_lat_delta / gps_time_delta) 102 | gps_long_slope = tf.cond( 103 | tf.equal(gps_time_delta, zero_const), lambda: zero_const, lambda: gps_long_delta / gps_time_delta) 104 | gps_image_time_delta = tf.cast(image_timestamp - gps_timestamp[0], tf.float64) 105 | gps_lat_interpolated = tf.cast(gps_lat[0], tf.float64) + gps_lat_slope * gps_image_time_delta 106 | gps_long_interpolated = tf.cast(gps_long[0], tf.float64) + gps_long_slope * gps_image_time_delta 107 | gps_f32 = tf.concat(0, [tf.cast(gps_lat_interpolated, tf.float32), tf.cast(gps_long_interpolated, tf.float32)]) 108 | else: 109 | gps_f32 = tf.pack([tf.cast(gps_lat[1], tf.float32), tf.cast(gps_long[1], tf.float32)]) 110 | return features['image/encoded'], image_timestamp, camera_id, steering_angle_f32, gps_f32 111 | 112 | -------------------------------------------------------------------------------- /litterbox/processors/sdc/processor_sdc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import fabric 17 | import math 18 | from processors.sdc.parse_proto_sdc import * 19 | from processors.sdc.image_processing_sdc import * 20 | from processors.sdc.mu_law import * 21 | from fabric.image_processing_common import * # FIXME for annoying flags 22 | 23 | 24 | STEERING_STD = 0.3 # rounded 25 | GPS_MEAN = [37.5, -122.3] # rounded 26 | GPS_STD = [0.2, 0.2] # approx 27 | 28 | 29 | class ProcessorSdc(fabric.Processor): 30 | 31 | def __init__(self, params={}): 32 | super(ProcessorSdc, self).__init__() 33 | 34 | image_aspect = params['image_aspect'] if 'image_aspect' in params else FLAGS.image_aspect 35 | image_size = params['image_size'] if 'image_size' in params else FLAGS.image_size 36 | image_fmt = params['image_fmt'] if 'image_fmt' in params else FLAGS.image_fmt 37 | image_norm = params['image_norm'] if 'image_norm' in params else FLAGS.image_norm 38 | 39 | # For aspect based image size, short edge set to FLAGS.image_size 40 | if image_aspect == 0.0 or image_aspect == 1.0: 41 | self.width = image_size 42 | self.height = image_size 43 | elif image_aspect < 1.0: 44 | self.width = math.floor(image_size * image_aspect) 45 | self.height = image_size 46 | else: 47 | self.width = image_size 48 | self.height = math.floor(image_size / image_aspect) 49 | self.image_fmt = image_fmt 50 | self.depth = 3 51 | self.standardize_input = image_norm 52 | self.standardize_labels = True 53 | self.mu_law_steering = False 54 | self.num_input_images = 1 55 | 56 | def get_input_shape(self, batch_size=0): 57 | shape = [self.height, self.width, self.depth] 58 | if self.num_input_images > 1: 59 | shape = [self.num_input_images] + shape 60 | if batch_size: 61 | shape = [batch_size] + shape 62 | return shape 63 | 64 | def parse_example(self, serialized_example): 65 | parsed = parse_proto_sdc(serialized_example) 66 | return parsed 67 | 68 | def process_example(self, tensors, mode='eval', thread_id=0): 69 | train = (mode == 'train') 70 | image, image_timestamp, camera_id = tensors[:3] 71 | 72 | #FIXME push single/multi image handling into image_process_sdc if we want to share random augmentations 73 | if self.num_input_images > 1: 74 | assert(len(image.get_shape()) > 0) 75 | print('Multi image', image.get_shape()) 76 | split_image = tf.unpack(image) 77 | split_processed = [] 78 | for i, x in enumerate(split_image): 79 | suffix = '%d' % i 80 | xp, _ = image_preprocess_sdc( 81 | x, camera_id, 82 | height=self.height, width=self.width, image_fmt=self.image_fmt, 83 | normalize=self.standardize_input, train=train, summary_suffix=suffix, thread_id=thread_id) 84 | split_processed.append(xp) 85 | processed_image = tf.pack(split_processed) 86 | #FIXME need to sort out flip across mult-images 87 | flip_coeff = tf.constant(1.0, dtype=tf.float32) 88 | else: 89 | print('Single image') 90 | processed_image, flip_coeff = image_preprocess_sdc( 91 | image, camera_id, 92 | height=self.height, width=self.width, image_fmt=self.image_fmt, 93 | normalize=self.standardize_input, train=train, thread_id=thread_id) 94 | 95 | if mode != 'pred': 96 | steering_angle, gps_coord = tensors[-2:] 97 | if steering_angle is not None: 98 | steering_angle = tf.mul(steering_angle, flip_coeff) 99 | if self.standardize_labels: 100 | steering_angle /= STEERING_STD 101 | elif self.mu_law_steering: 102 | print("Encode mu-law angles") 103 | steering_angle = mu_law_steering_enc(steering_angle) 104 | if gps_coord is not None and self.standardize_labels: 105 | gps_coord = (gps_coord - GPS_MEAN) / GPS_STD 106 | return processed_image, image_timestamp, steering_angle, gps_coord 107 | else: 108 | return processed_image, image_timestamp, tf.zeros((1,)), tf.zeros((2,)) 109 | 110 | def reshape_batch(self, batch_tensors, batch_size, num_splits=0): 111 | images, timestamps, steering_angles, gps_coords = batch_tensors 112 | 113 | images = tf.cast(images, tf.float32) 114 | if self.num_input_images > 1: 115 | images = tf.reshape(images, shape=[batch_size, self.num_input_images, self.height, self.width, self.depth]) 116 | else: 117 | images = tf.reshape(images, shape=[batch_size, self.height, self.width, self.depth]) 118 | timestamps = tf.reshape(timestamps, [batch_size]) 119 | steering_angles = tf.reshape(steering_angles, [batch_size, 1]) 120 | gps_coords = tf.reshape(gps_coords, [batch_size, 2]) 121 | 122 | if num_splits > 0: 123 | # Split tensors for multi-gpu training 124 | images = tf.split(0, num_splits, images) 125 | timestamps = tf.split(0, num_splits, timestamps) 126 | steering_angles = tf.split(0, num_splits, steering_angles) 127 | gps_coords = tf.split(0, num_splits, gps_coords) 128 | 129 | return images, timestamps, [steering_angles, gps_coords] 130 | 131 | # decode model 'output' values, ie predictions or target labels 132 | def decode_output(self, value, key=None): 133 | if key and key == 'steer': 134 | print('Decoding', key, value) 135 | if self.standardize_labels: 136 | return value * STEERING_STD 137 | elif self.mu_law_steering: 138 | return mu_law_steering_dec(value) 139 | else: 140 | return value 141 | elif key and key == 'xyz': 142 | print('Decoding', key, value) 143 | if self.standardize_labels: 144 | return value * GPS_STD + GPS_MEAN 145 | else: 146 | return value 147 | else: 148 | return value 149 | -------------------------------------------------------------------------------- /litterbox/sdc_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | from fabric import util 17 | from fabric import exec_eval 18 | from fabric import DatasetRecord 19 | from models import ModelSdc 20 | from processors import ProcessorSdc 21 | from feeds import FeedImagesWithLabels 22 | 23 | FLAGS = tf.app.flags.FLAGS 24 | 25 | tf.app.flags.DEFINE_string( 26 | 'subset', 'validation', 27 | """Either 'validation', 'train', 'test'""") 28 | 29 | tf.app.flags.DEFINE_string( 30 | 'root_network', 'resnet_v1_50', 31 | """Either resnet_v1_50, resnet_v1_101, resnet_v1_152, inception_resnet_v2, nvidia_sdc""") 32 | 33 | tf.app.flags.DEFINE_integer( 34 | 'top_version', 5, 35 | """Top level network version, specifies output layer variations. See model code.""") 36 | 37 | 38 | class SdcData(DatasetRecord): 39 | """StateFarm data set.""" 40 | 41 | def __init__(self): 42 | super(SdcData, self).__init__('sdc', FLAGS.subset) 43 | 44 | def num_classes(self): 45 | return 0 46 | 47 | def num_examples_per_epoch(self): 48 | """Returns the number of examples in the data subset.""" 49 | if self.subset == 'train': 50 | return 431627 #319814 #124200 #964809 51 | elif self.subset == 'validation': 52 | return 16709 #43134 #57557 #39000 53 | 54 | 55 | def main(_): 56 | util.check_tensorflow_version() 57 | 58 | processor = ProcessorSdc() 59 | #processor.mu_law_steering = True 60 | #processor.standardize_labels = False 61 | processor.standardize_input = 'frame' 62 | 63 | feed = FeedImagesWithLabels(dataset=SdcData(), processor=processor) 64 | 65 | model_params = { 66 | 'outputs': { 67 | 'steer': 1, 68 | # 'xyz': 2, 69 | }, 70 | 71 | #'network': 'resnet_v1_152', 72 | #'version': 1, 73 | 74 | #'network': 'nvidia_sdc', # 160x120 75 | #'version': 2, 76 | 77 | #'network': 'resnet_v1_101', # 192x128 78 | #'version': 3, 79 | 80 | #'network': 'resnet_v1_50', 81 | #'version': 5, 82 | 83 | 'network': FLAGS.root_network, 84 | 'version': FLAGS.top_version, 85 | } 86 | model = ModelSdc(params=model_params) 87 | 88 | exec_eval.evaluate(feed, model) 89 | 90 | if __name__ == '__main__': 91 | tf.app.run() 92 | -------------------------------------------------------------------------------- /litterbox/sdc_export_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import pandas as pd 17 | import os 18 | from copy import deepcopy 19 | from fabric import util 20 | from models import ModelSdc 21 | from processors import ProcessorSdc 22 | from collections import defaultdict 23 | 24 | 25 | FLAGS = tf.app.flags.FLAGS 26 | 27 | tf.app.flags.DEFINE_string( 28 | 'root_network', 'resnet_v1_50', 29 | """Either resnet_v1_50, resnet_v1_101, resnet_v1_152, inception_resnet_v2, nvidia_sdc""") 30 | 31 | tf.app.flags.DEFINE_integer( 32 | 'top_version', 5, 33 | """Top level network version, specifies output layer variations. See model code.""") 34 | 35 | tf.app.flags.DEFINE_boolean( 36 | 'bayesian', False, """Activate dropout layers for inference.""") 37 | 38 | tf.app.flags.DEFINE_integer( 39 | 'samples', 0, """Activate dropout layers for inference.""") 40 | 41 | tf.app.flags.DEFINE_string( 42 | 'checkpoint_path', '', """Checkpoint file for model.""") 43 | 44 | tf.app.flags.DEFINE_string( 45 | 'ensemble_path', '', """CSV file with ensemble specification. Use as alternative to single model checkpoint.""") 46 | 47 | tf.app.flags.DEFINE_string( 48 | 'name', 'model', """Name prefix for outputs of exported artifacts.""") 49 | 50 | 51 | def _weighted_mean(outputs_list, weights_tensor): 52 | assert isinstance(outputs_list[0], tf.Tensor) 53 | print(outputs_list) 54 | outputs_tensor = tf.concat(1, outputs_list) 55 | print('outputs concat', outputs_tensor.get_shape()) 56 | if len(outputs_list) > 1: 57 | weighted_outputs = outputs_tensor * weights_tensor 58 | print('weighted outputs ', weighted_outputs.get_shape()) 59 | outputs_tensor = tf.reduce_mean(weighted_outputs) 60 | else: 61 | outputs_tensor = tf.squeeze(outputs_tensor) 62 | return outputs_tensor 63 | 64 | 65 | def _merge_outputs(outputs, weights): 66 | assert outputs 67 | 68 | merged = defaultdict(list) 69 | weights_tensor = tf.pack(weights) 70 | print('weights ', weights_tensor.get_shape()) 71 | 72 | # recombine multiple model outputs by dict key or list position under output name based dict 73 | if isinstance(outputs[0], dict): 74 | for o in outputs: 75 | for name, tensor in o.items(): 76 | merged['output_%s' % name].append(tensor) 77 | elif isinstance(outputs[0], list): 78 | for o in outputs: 79 | for index, tensor in enumerate(o): 80 | merged['output_%d' % index].append(tensor) 81 | else: 82 | merged['output'] = outputs 83 | 84 | reduced = {name: _weighted_mean(value_list, weights_tensor) for name, value_list in merged.items()} 85 | for k, v in reduced.items(): 86 | print(k, v, v.get_shape()) 87 | 88 | return reduced 89 | 90 | 91 | def build_export_graph(models, batch_size=1, export_scope=''): 92 | assert models 93 | 94 | inputs = tf.placeholder(tf.uint8, [None, None, 3], name='input_placeholder') 95 | print("Graph Inputs: ") 96 | print(inputs.name, inputs.get_shape()) 97 | 98 | with tf.device('/gpu:0'): 99 | inputs = tf.cast(inputs, tf.float32) 100 | inputs = tf.div(inputs, 255) 101 | input_tensors = [inputs, tf.zeros(shape=()), tf.constant('', dtype=tf.string)] 102 | model_outputs_list = [] 103 | weights_list = [] 104 | for m in models: 105 | with tf.variable_scope(m['name'], values=input_tensors): 106 | model, processor = m['model'], m['processor'] 107 | processed_inputs = processor.process_example(input_tensors, mode='pred') 108 | if batch_size > 1: 109 | processed_inputs = [tf.gather(tf.expand_dims(x, 0), [0] * batch_size) for x in processed_inputs] 110 | processed_inputs = processor.reshape_batch(processed_inputs, batch_size=batch_size) 111 | model_outputs = model.build_tower( 112 | processed_inputs[0], is_training=False, summaries=False) 113 | model_outputs_list += [model.get_predictions(model_outputs, processor)] 114 | weights_list += [m['weight']] 115 | 116 | merged_outputs = _merge_outputs(model_outputs_list, weights_list) 117 | 118 | print("Graph Outputs: ") 119 | outputs = [] 120 | for name, output in merged_outputs.items(): 121 | outputs += [tf.identity(output, name)] 122 | [print(x.name, x.get_shape()) for x in outputs] 123 | 124 | return inputs, outputs 125 | 126 | 127 | def main(_): 128 | util.check_tensorflow_version() 129 | assert os.path.isfile(FLAGS.checkpoint_path) or os.path.isfile(FLAGS.ensemble_path) 130 | 131 | model_args_list = [] 132 | if FLAGS.checkpoint_path: 133 | model_args_list.append( 134 | { 135 | 'root_network': FLAGS.root_network, 136 | 'top_version': FLAGS.top_version, 137 | 'image_norm': FLAGS.image_norm, 138 | 'image_size': FLAGS.image_size, 139 | 'image_aspect': FLAGS.image_aspect, 140 | 'checkpoint_path': FLAGS.checkpoint_path, 141 | 'bayesian': FLAGS.bayesian, 142 | 'weight': 1.0, 143 | } 144 | ) 145 | else: 146 | ensemble_df = pd.DataFrame.from_csv(FLAGS.ensemble_path, index_col=None) 147 | model_args_list += ensemble_df.to_dict('records') 148 | 149 | model_params_common = { 150 | 'outputs': { 151 | 'steer': 1, 152 | # 'xyz': 2, 153 | }, 154 | } 155 | model_list = [] 156 | for i, args in enumerate(model_args_list): 157 | print(args) 158 | model_name = 'model_%d' % i 159 | model_params = deepcopy(model_params_common) 160 | model_params['network'] = args['root_network'] 161 | model_params['version'] = args['top_version'] 162 | model_params['bayesian'] = FLAGS.bayesian 163 | model = ModelSdc(params=model_params) 164 | 165 | processor_params = {} 166 | processor_params['image_norm'] = args['image_norm'] 167 | processor_params['image_size'] = args['image_size'] 168 | processor_params['image_aspect'] = args['image_aspect'] 169 | processor = ProcessorSdc(params=processor_params) 170 | 171 | model_list.append({ 172 | 'model': model, 173 | 'processor': processor, 174 | 'weight': args['weight'], 175 | 'name': model_name, 176 | 'checkpoint_path': args['checkpoint_path'] 177 | }) 178 | 179 | name_prefix = FLAGS.name 180 | with tf.Graph().as_default() as g: 181 | batch_size = 1 if not FLAGS.samples else FLAGS.samples 182 | build_export_graph(models=model_list, batch_size=batch_size) 183 | model_variables = tf.contrib.framework.get_model_variables() 184 | saver = tf.train.Saver(model_variables) 185 | 186 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 187 | init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 188 | sess.run(init_op) 189 | 190 | g_def = g.as_graph_def(add_shapes=True) 191 | tf.train.write_graph(g_def, './', name='%s-graph_def.pb.txt' % name_prefix) 192 | 193 | for m in model_list: 194 | checkpoint_variable_set = set() 195 | checkpoint_path, global_step = util.resolve_checkpoint_path(m['checkpoint_path']) 196 | if not checkpoint_path: 197 | print('No checkpoint file found at %s' % m['checkpoint_path']) 198 | return 199 | reader = tf.train.NewCheckpointReader(checkpoint_path) 200 | checkpoint_variable_set.update(reader.get_variable_to_shape_map().keys()) 201 | variables_to_restore = m['model'].variables_to_restore( 202 | restore_outputs=True, 203 | checkpoint_variable_set=checkpoint_variable_set, 204 | prefix_scope=m['name']) 205 | 206 | saver_local = tf.train.Saver(variables_to_restore) 207 | saver_local.restore(sess, checkpoint_path) 208 | print('Successfully loaded model from %s at step=%d.' % (checkpoint_path, global_step)) 209 | 210 | saver.export_meta_graph('./%s-meta_graph.pb.txt' % name_prefix, as_text=True) 211 | saver.save(sess, './%s-checkpoint' % name_prefix, write_meta_graph=True) 212 | 213 | if __name__ == '__main__': 214 | tf.app.run() 215 | 216 | -------------------------------------------------------------------------------- /litterbox/sdc_pred.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ Predict classes for test data 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import numpy as np 17 | import pandas as pd 18 | import os 19 | from fabric import util, exec_predict 20 | from fabric.dataset_file import DatasetFile 21 | from feeds import FeedImagesWithLabels 22 | from models import ModelSdc 23 | from processors import ProcessorSdc 24 | 25 | FLAGS = tf.app.flags.FLAGS 26 | 27 | tf.app.flags.DEFINE_string( 28 | 'root_network', 'resnet_v1_50', 29 | """Either resnet_v1_50, resnet_v1_101, resnet_v1_152, inception_resnet_v2, nvidia_sdc""") 30 | 31 | tf.app.flags.DEFINE_integer( 32 | 'top_version', 5, 33 | """Top level network version, specifies output layer variations. See model code.""") 34 | 35 | 36 | class Challenge2Data(DatasetFile): 37 | # Example dataset for feeding folder of images into model 38 | 39 | def __init__(self, subset): 40 | super(Challenge2Data, self).__init__( 41 | 'Challenge2', subset, types=('.png', '.jpg')) 42 | 43 | 44 | def main(_): 45 | util.check_tensorflow_version() 46 | 47 | processor = ProcessorSdc() 48 | #processor.mu_law_steering = True 49 | #processor.standardize_labels = False 50 | 51 | feed = FeedImagesWithLabels( 52 | dataset=Challenge2Data(subset=''), 53 | processor=processor) 54 | 55 | model_params = { 56 | 'outputs': { 57 | 'steer': 1, 58 | # 'xyz': 2, 59 | }, 60 | 61 | 'network': FLAGS.root_network, 62 | 'version': FLAGS.top_version, 63 | 'bayesian': False, 64 | } 65 | model = ModelSdc(params=model_params) 66 | 67 | output, num_entries = exec_predict.predict(feed, model) 68 | 69 | filenames = [] 70 | steering_angles = [] 71 | coords = [] 72 | for o in output: 73 | filenames.extend([int(os.path.splitext(os.path.basename(f))[0]) for f in o[1]]) 74 | if 'steer' in o[0]: 75 | steering_angles.extend(np.squeeze(o[0]['steer'], axis=1)) 76 | if 'xyz' in o[0]: 77 | coords.extend(o[0]['xyz']) 78 | if coords: 79 | coords = np.vstack(coords) 80 | 81 | if steering_angles: 82 | columns_ang = ['frame_id', 'steering_angle'] 83 | df_ang = pd.DataFrame(data={columns_ang[0]: filenames, columns_ang[1]: steering_angles}, columns=columns_ang) 84 | df_ang = df_ang.head(num_entries).sort_values(by='frame_id') 85 | df_ang.to_csv('./output_angle.csv', index=False) 86 | 87 | if isinstance(coords, np.ndarray) and coords.shape[0]: 88 | columns_loc = ['frame_id', 'longitude', 'latitude'] 89 | df_loc = pd.DataFrame( 90 | data={columns_loc[0]: filenames, columns_loc[1]: coords[:, 0], columns_loc[2]: coords[:, 1]}, 91 | columns=columns_loc) 92 | df_loc = df_loc.head(num_entries) 93 | df_loc = df_loc.sort_values(by='frame_id') 94 | df_loc.to_csv('./output_coords.csv', index=False) 95 | df_loc.ix[:, -2:].to_csv('./output_coords_only.csv', index=False) 96 | 97 | if __name__ == '__main__': 98 | tf.app.run() -------------------------------------------------------------------------------- /litterbox/sdc_run_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | import pandas as pd 17 | import numpy as np 18 | import threading 19 | import queue 20 | import time 21 | import os 22 | import cv2 23 | import argparse 24 | from datetime import datetime 25 | 26 | 27 | def get_image_files(folder, types=('.jpg', '.jpeg', '.png')): 28 | filenames = [] 29 | for root, subdirs, files in os.walk(folder, topdown=False): 30 | filenames += [os.path.join(root, f) for f in files if os.path.splitext(f)[1].lower() in types] 31 | filenames = list(sorted(filenames)) 32 | return filenames 33 | 34 | 35 | class RwightmanModel(object): 36 | 37 | def __init__(self, alpha=0.9, graph_path='', checkpoint_path='', metagraph_path=''): 38 | if graph_path: 39 | assert os.path.isfile(graph_path) 40 | else: 41 | assert os.path.isfile(checkpoint_path) and os.path.isfile(metagraph_path) 42 | self.graph = tf.Graph() 43 | with self.graph.as_default(): 44 | if graph_path: 45 | # load a graph with weights frozen as constants 46 | graph_def = tf.GraphDef() 47 | with open(graph_path, "rb") as f: 48 | graph_def.ParseFromString(f.read()) 49 | _ = tf.import_graph_def(graph_def, name="") 50 | self.session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 51 | else: 52 | # load a meta-graph and initialize variables form checkpoint 53 | saver = tf.train.import_meta_graph(metagraph_path) 54 | self.session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 55 | saver.restore(self.session, checkpoint_path) 56 | self.model_input = self.session.graph.get_tensor_by_name("input_placeholder:0") 57 | self.model_output = self.session.graph.get_tensor_by_name("output_steer:0") 58 | self.last_steering_angle = 0 # None 59 | self.alpha = alpha 60 | 61 | def predict(self, image): 62 | feed_dict = {self.model_input: image} 63 | steering_angle = self.session.run(self.model_output, feed_dict=feed_dict) 64 | if self.last_steering_angle is None: 65 | self.last_steering_angle = steering_angle 66 | steering_angle = self.alpha * steering_angle + (1 - self.alpha) * self.last_steering_angle 67 | self.last_steering_angle = steering_angle 68 | return steering_angle 69 | 70 | 71 | class ProcessorThread(threading.Thread): 72 | 73 | def __init__(self, name, q, model): 74 | super(ProcessorThread, self).__init__(name=name) 75 | self.q = q 76 | self.model = model 77 | self.outputs = [] 78 | 79 | def run(self): 80 | print('Entering processing loop...') 81 | while True: 82 | item = self.q.get() 83 | if item is None: 84 | print("Exiting processing loop...") 85 | break 86 | output = self.model.predict(item) 87 | self.outputs.append(output) 88 | self.q.task_done() 89 | return 90 | 91 | 92 | def main(): 93 | parser = argparse.ArgumentParser() 94 | parser.add_argument('--alpha', type=float, default=0.5, help='Path to the metagraph path') 95 | parser.add_argument('--graph_path', type=str, help='Path to the metagraph path') 96 | parser.add_argument('--metagraph_path', type=str, help='Path to the metagraph path') 97 | parser.add_argument('--checkpoint_path', type=str, help='Path to the checkpoint path') 98 | parser.add_argument('--data_dir', type=str, required=True, help='Path to the images') 99 | parser.add_argument('--target_csv', type=str, help='Path to target csv for optional RMSE calc.') 100 | args = parser.parse_args() 101 | print(args.alpha) 102 | print('%s: Initializing model.' % datetime.now()) 103 | model = RwightmanModel( 104 | alpha=args.alpha, 105 | graph_path=args.graph_path, 106 | metagraph_path=args.metagraph_path, 107 | checkpoint_path=args.checkpoint_path) 108 | # Push one empty image through to ensure Tensorflow is ready, random wait on first frame otherwise 109 | model.predict(np.zeros(shape=[480, 640, 3])) 110 | 111 | q = queue.Queue(20) 112 | processor = ProcessorThread('process', q, model) 113 | processor.start() 114 | image_files = get_image_files(args.data_dir) 115 | 116 | print('%s: starting execution on (%s).' % (datetime.now(), args.data_dir)) 117 | start_time = time.time() 118 | timestamps = [] 119 | for f in image_files: 120 | # Note, all cv2 based decode and split/merge color channel switch resulted in faster throughput, lower 121 | # CPU usage than PIL Image or cv2 + python array slice reversal. 122 | image = cv2.imread(f) 123 | b, g, r = cv2.split(image) # get BGR channels 124 | image = cv2.merge([r, g, b]) # merge as RGB 125 | q.put(image) 126 | timestamps.append(os.path.splitext(os.path.basename(f))[0]) 127 | 128 | q.put(None) 129 | processor.join() 130 | 131 | duration = time.time() - start_time 132 | images_per_sec = len(image_files) / duration 133 | print('%s: %d images processed in %s seconds, %.1f images/sec' 134 | % (datetime.now(), len(image_files), duration, images_per_sec)) 135 | 136 | columns_ang = ['frame_id', 'steering_angle'] 137 | df_ang = pd.DataFrame(data={columns_ang[0]: timestamps, columns_ang[1]: processor.outputs}, columns=columns_ang) 138 | df_ang.to_csv('./output_angle.csv', index=False) 139 | 140 | if args.target_csv: 141 | targets_df = pd.read_csv(args.target_csv, header=0, index_col=False) 142 | targets = np.squeeze(targets_df.as_matrix(columns=[columns_ang[1]])) 143 | predictions = np.asarray(processor.outputs) 144 | mse = ((predictions - targets) ** 2).mean() 145 | rmse = np.sqrt(mse) 146 | print("RMSE: %f, MSE: %f" % (rmse, mse)) 147 | 148 | if __name__ == '__main__': 149 | main() 150 | -------------------------------------------------------------------------------- /litterbox/sdc_train.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | """ 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import tensorflow as tf 16 | from fabric import util 17 | from fabric import exec_train 18 | from fabric import DatasetRecord 19 | from models import ModelSdc 20 | from processors import ProcessorSdc 21 | from feeds import FeedImagesWithLabels 22 | 23 | FLAGS = tf.app.flags.FLAGS 24 | 25 | tf.app.flags.DEFINE_string('subset', 'train', 26 | """Either 'validation', 'train', 'test'""") 27 | 28 | tf.app.flags.DEFINE_string('root_network', 'resnet_v1_50', 29 | """Either resnet_v1_50, resnet_v1_101, resnet_v1_152, inception_resnet_v2, nvidia_sdc""") 30 | 31 | tf.app.flags.DEFINE_integer('top_version', 5, 32 | """Top level network version, specifies output layer variations. See model code.""") 33 | 34 | tf.app.flags.DEFINE_boolean('lock_root', False, 'Lock root convnet parameters') 35 | 36 | 37 | class SdcData(DatasetRecord): 38 | """Self-driving car dataset.""" 39 | 40 | def __init__(self): 41 | super(SdcData, self).__init__('sdc', FLAGS.subset) 42 | 43 | def num_classes(self): 44 | return 0 45 | 46 | def num_examples_per_epoch(self): 47 | """Returns the number of examples in the data subset.""" 48 | if self.subset == 'train': 49 | return 92643 #431627 #319814 50 | elif self.subset == 'validation': 51 | return 16709 #43134 #57557 52 | 53 | 54 | def main(_): 55 | util.check_tensorflow_version() 56 | 57 | processor = ProcessorSdc() 58 | processor.standardize_input = 'frame' 59 | #processor.num_input_images = 2 60 | 61 | feed = FeedImagesWithLabels(dataset=SdcData(), processor=processor) 62 | 63 | model_params = { 64 | 'outputs': { 65 | 'steer': 1, 66 | # 'xyz': 2, 67 | }, 68 | 69 | #'network': 'resnet_v1_50', 70 | #'version': 5, 71 | 72 | 'network': FLAGS.root_network, 73 | 'version': FLAGS.top_version, 74 | 'bayesian': False, 75 | 'lock_root': FLAGS.lock_root, 76 | 'regression_loss': 'mse', 77 | } 78 | model = ModelSdc(params=model_params) 79 | 80 | exec_train.train(feed, model) 81 | 82 | if __name__ == '__main__': 83 | tf.app.run() 84 | -------------------------------------------------------------------------------- /utils/compare_csv.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import pandas as pd 4 | 5 | 6 | def calc_rmse(predictions, targets): 7 | mse = ((predictions - targets) ** 2).mean() 8 | rmse = np.sqrt(mse) 9 | return rmse, mse 10 | 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('files', metavar='filename', type=str, nargs='*') 15 | args = parser.parse_args() 16 | args = vars(args) 17 | 18 | files = args['files'] 19 | 20 | assert len(files) == 2 21 | 22 | targets_df = pd.read_csv(files[0], header=0, index_col=False) 23 | predict_df = pd.read_csv(files[1], header=0, index_col=False) 24 | 25 | column = targets_df.columns[1] 26 | 27 | targets = targets_df.as_matrix(columns=[column]) 28 | 29 | #predict_df[column] = pd.ewma(predict_df[column], com=1, adjust=False) 30 | predictions = predict_df.as_matrix(columns=[column]) 31 | 32 | rmse, mse = calc_rmse(predictions, targets) 33 | print("RMSE: %f, MSE: %f" % (rmse, mse)) 34 | 35 | if __name__ == "__main__": 36 | main() 37 | 38 | -------------------------------------------------------------------------------- /utils/ensemble_csv.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import pandas as pd 4 | import collections 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('-o', '--outfile', metavar='out_file', type=str, default='ensemble.csv') 10 | parser.add_argument('-w', '--weights', metavar='weight_file', type=str, default='') 11 | parser.add_argument('-m', '--mean', type=str, default='arith') 12 | parser.add_argument('files', metavar='filename', type=str, nargs='*') 13 | args = parser.parse_args() 14 | args = vars(args) 15 | 16 | files = args['files'] 17 | outfile = args['outfile'] 18 | weights_file = args['weights'] 19 | mean_type = args['mean'] 20 | 21 | if weights_file: 22 | wf = pd.read_csv(weights_file, names=['file', 'weight'], header=None, index_col=False) 23 | files, weights = wf.file.tolist(), wf.weight.tolist() 24 | else: 25 | counted = collections.Counter(files) 26 | files, weights = counted.keys(), counted.values() 27 | 28 | frames = [] 29 | for file, weight in zip(files, weights): 30 | df = pd.read_csv(file, header=0, index_col=False) 31 | if mean_type == 'geom': 32 | df.iloc[:, 1:] = df.iloc[:, 1:].pow(weight) 33 | else: 34 | df.iloc[:, 1:] = df.iloc[:, 1:] * weight 35 | frames.append(df) 36 | 37 | merged = pd.concat(frames) 38 | group_col = merged.columns[0] 39 | weights_sum = sum(weights) 40 | if mean_type == 'geom': 41 | result = merged.groupby(merged[group_col]).prod() 42 | result = result.pow(1/weights_sum) 43 | result = result.div(result.sum(axis=1), axis=0) 44 | else: 45 | result = merged.groupby(merged[group_col]).sum() 46 | result = result / weights_sum 47 | 48 | result.to_csv(outfile) 49 | 50 | if __name__ == "__main__": 51 | main() 52 | 53 | -------------------------------------------------------------------------------- /utils/torch.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016 Ross Wightman. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # ============================================================================== 9 | import torchfile 10 | import argparse 11 | 12 | 13 | def convert_spatial_batch_normalization(obj): 14 | return "" 15 | 16 | 17 | def convert_spatial_convolution(obj): 18 | weights = obj.weight 19 | #biases = obj.biase 20 | kernel_width = obj.kW 21 | kernel_height = obj.kH 22 | stride_width = obj.dH 23 | stride_height = obj.dW 24 | pad_width = obj.padW 25 | pad_height = obj.padH 26 | out = '[%d, %d], %d, %s' % (kernel_height, kernel_width, stride_height, str(weights.shape)) 27 | return out 28 | 29 | 30 | def convert_linear(obj): 31 | weights = obj.weights 32 | biases = obj.biases 33 | out = '%s, %s' % (str(weights.shape), str(biases.shape)) 34 | return out 35 | 36 | 37 | def convert_spatial_average_pooling(obj): 38 | return "" 39 | 40 | 41 | def convert_spatial_max_pooling(obj): 42 | return "" 43 | 44 | 45 | def convert_unknown(obj): 46 | return 'UnknownClass' 47 | 48 | 49 | torch_converters = {} 50 | 51 | 52 | def convert_obj(typename, obj): 53 | name_parts = typename.rsplit('.', 1) 54 | if not name_parts or not name_parts[-1]: 55 | return 56 | class_name = name_parts[-1] 57 | if class_name not in torch_converters: 58 | return convert_unknown(obj) 59 | else: 60 | return torch_converters[class_name](obj) 61 | 62 | 63 | def add_converter(typename, convert_fn): 64 | torch_converters[typename] = convert_fn 65 | for mod in [("SpatialAveragePooling", convert_spatial_average_pooling), 66 | ("SpatialBatchNormalization", convert_spatial_batch_normalization), 67 | ("SpatialConvolution", convert_spatial_convolution), 68 | ("SpatialMaxPooling", convert_spatial_max_pooling)]: 69 | add_converter(mod[0], mod[1]) 70 | 71 | 72 | def process_obj(obj, level=0): 73 | indent = ''.join(['\t' for s in range(level)]) 74 | if isinstance(obj, torchfile.TorchObject): 75 | #print(indent + obj.torch_typename()) 76 | print(indent + obj.torch_typename() + ': ' + convert_obj(obj.torch_typename(), obj)) 77 | if obj.modules: 78 | for x in obj.modules: 79 | process_obj(x, level+1) 80 | 81 | 82 | def main(): 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument('torch_file') 85 | args = parser.parse_args() 86 | torch_file = args.torch_file 87 | 88 | data = torchfile.load(torch_file, force_8bytes_long=True) 89 | 90 | if data.modules: 91 | process_obj(data) 92 | 93 | 94 | if __name__ == '__main__': 95 | main() --------------------------------------------------------------------------------