├── .gitignore ├── LICENSE ├── README.md ├── dytb ├── __init__.py ├── evaluate.py ├── evaluators │ ├── AutoencoderEvaluator.py │ ├── ClassifierEvaluator.py │ ├── DetectorEvaluator.py │ ├── Evaluator.py │ ├── Metric.py │ ├── RegressorEvaluator.py │ ├── __init__.py │ └── metrics.py ├── inputs │ ├── __init__.py │ ├── images.py │ ├── interfaces.py │ ├── predefined │ │ ├── Cifar10.py │ │ ├── Cifar100.py │ │ ├── MNIST.py │ │ ├── ORLFaces.py │ │ ├── PASCALVOC2012Classification.py │ │ ├── PASCALVOC2012Localization.py │ │ └── __init__.py │ └── processing.py ├── models │ ├── __init__.py │ ├── collections.py │ ├── interfaces.py │ ├── layers.py │ ├── predefined │ │ ├── LeNet.py │ │ ├── LeNetBN.py │ │ ├── LeNetDirectDropout.py │ │ ├── LeNetDropout.py │ │ ├── SingleLayerCAE.py │ │ ├── StackedCAE.py │ │ ├── StackedDenoisingCAE.py │ │ ├── VGG.py │ │ ├── VGGBN.py │ │ ├── VGGDirectDropout.py │ │ ├── VGGDropout.py │ │ └── __init__.py │ ├── utils.py │ └── visualization.py ├── train.py ├── trainer │ ├── Trainer.py │ ├── __init__.py │ └── utils │ │ ├── __init__.py │ │ ├── builders.py │ │ └── flow.py └── utils │ ├── CLIArgs.py │ └── __init__.py ├── examples └── VGG-Cifar10-100-TransferLearning-FineTuning.ipynb ├── requirements.txt ├── scripts ├── dytb_evaluate ├── dytb_train ├── inputs │ └── __init__.py └── models │ └── __init__.py ├── setup.cfg ├── setup.py └── tests ├── extract_features.py └── images └── nocat.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | .hypothesis/ 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | #Ipython Notebook 61 | .ipynb_checkpoints 62 | 63 | # Datasets 64 | data/ 65 | 66 | # Logs 67 | log/ 68 | 69 | # Build 70 | build/* 71 | -------------------------------------------------------------------------------- /dytb/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Simplify the trainining and tuning of Tensorflow models""" 9 | 10 | from . import inputs 11 | from . import models 12 | 13 | __version__ = '0.7.4' 14 | __url__ = 'https://github.com/galeone/dynamic-training-bench' 15 | __author__ = 'Paolo Galeone' 16 | __email__ = 'nessuno@nerdz.eu' 17 | -------------------------------------------------------------------------------- /dytb/evaluate.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Evaluation method and utilities""" 9 | 10 | from .inputs.interfaces import InputType 11 | 12 | 13 | def evaluate(metric, 14 | checkpoint_path, 15 | model, 16 | dataset, 17 | input_type, 18 | batch_size, 19 | augmentation_fn=None): 20 | """Eval the model, restoring weight found in checkpoint_path, using the dataset. 21 | Args: 22 | metric: the metric to evaluate. The usual dictionary with the fn and its properties 23 | checkpoint_path: path of the trained model checkpoint directory 24 | model: implementation of the Model interface 25 | dataset: implementation of the Input interface 26 | input_type: InputType enum 27 | batch_size: evaluate in batch of size batch_size 28 | augmentation_fn: if present, applies the augmentation to the input data 29 | Returns: 30 | value: scalar value representing the evaluation of the model, 31 | on the dataset, fetching values of the specified input_type 32 | """ 33 | InputType.check(input_type) 34 | model.evaluator.dataset = dataset 35 | return model.evaluator.eval(metric, checkpoint_path, input_type, batch_size, 36 | augmentation_fn) 37 | 38 | 39 | def stats(checkpoint_path, model, dataset, batch_size, augmentation_fn=None): 40 | """Eval the model, restoring weight found in checkpoint_path, using the dataset. 41 | Args: 42 | checkpoint_path: path of the trained model checkpoint directory 43 | model: implementation of the Model interface 44 | dataset: implementation of the Input interface 45 | batch_size: evaluate in batch of size batch_size 46 | augmentation_fn: if present, applies the augmentation to the input data 47 | Returns: 48 | dict: a dictionary with the statistics measured 49 | """ 50 | model.evaluator.dataset = dataset 51 | return model.evaluator.stats(checkpoint_path, batch_size, augmentation_fn) 52 | -------------------------------------------------------------------------------- /dytb/evaluators/AutoencoderEvaluator.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """ Evaluate Autoencoding models """ 9 | 10 | from .Evaluator import Evaluator 11 | 12 | 13 | class AutoencoderEvaluator(Evaluator): 14 | """AutoencoderEvaluator is the evaluation object for a Autoencoder model""" 15 | 16 | @property 17 | def metrics(self): 18 | """Returns a list of dict with keys: 19 | { 20 | "fn": function 21 | "name": name 22 | "positive_trend_sign": sign that we like to see when things go well 23 | "model_selection": boolean, True if the metric has to be measured to select the model 24 | "average": boolean, true if the metric should be computed as average over the batches. 25 | If false the results over the batches are just added 26 | "tensorboard": boolean. True if the metric is a scalar and can be logged in tensoboard 27 | } 28 | """ 29 | return [{ 30 | "fn": self._model.loss, 31 | "name": "error", 32 | "positive_trend_sign": -1, 33 | "model_selection": True, 34 | "average": True, 35 | "tensorboard": True, 36 | }] 37 | -------------------------------------------------------------------------------- /dytb/evaluators/ClassifierEvaluator.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """ Evaluate Classification models """ 9 | 10 | from .Evaluator import Evaluator 11 | from .metrics import accuracy_op, confusion_matrix_op 12 | 13 | 14 | class ClassifierEvaluator(Evaluator): 15 | """ClassifierEvaluator is the evaluation object for a Classifier model""" 16 | 17 | @property 18 | def metrics(self): 19 | """Returns a list of dict with keys: 20 | { 21 | "fn": function 22 | "name": name 23 | "positive_trend_sign": sign that we like to see when things go well 24 | "model_selection": boolean, True if the metric has to be measured to select the model 25 | "average": boolean, true if the metric should be computed as average over the batches. 26 | If false the results over the batches are just added 27 | "tensorboard": boolean. True if the metric is a scalar and can be logged in tensoboard 28 | } 29 | """ 30 | return [{ 31 | "fn": accuracy_op, 32 | "name": "accuracy", 33 | "positive_trend_sign": +1, 34 | "model_selection": True, 35 | "average": True, 36 | "tensorboard": True, 37 | }, { 38 | "fn": 39 | lambda logits, labels: confusion_matrix_op(logits, labels, self.dataset.num_classes), 40 | "name": 41 | "confusion_matrix", 42 | "positive_trend_sign": 43 | 0, 44 | "model_selection": 45 | False, 46 | "average": 47 | False, 48 | "tensorboard": 49 | False, 50 | }] 51 | -------------------------------------------------------------------------------- /dytb/evaluators/DetectorEvaluator.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """ Evaluate Detection models """ 9 | 10 | from .Evaluator import Evaluator 11 | from .metrics import iou_op 12 | 13 | 14 | class DetectorEvaluator(Evaluator): 15 | """DetectorEvaluator is the evaluation object for a Detector model""" 16 | 17 | @property 18 | def metrics(self): 19 | """Returns a list of dict with keys: 20 | { 21 | "fn": function 22 | "name": name 23 | "positive_trend_sign": sign that we like to see when things go well 24 | "model_selection": boolean, True if the metric has to be measured to select the model 25 | "average": boolean, true if the metric should be computed as average over the batches. 26 | If false the results over the batches are just added 27 | "tensorboard": boolean. True if the metric is a scalar and can be logged in tensoboard 28 | } 29 | """ 30 | return [{ 31 | "fn": iou_op, 32 | "name": "IoU", 33 | "positive_trend_sign": +1, 34 | "model_selection": True, 35 | "average": True, 36 | "tensorboard": True, 37 | }] 38 | -------------------------------------------------------------------------------- /dytb/evaluators/Metric.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utility functions for model training and evaluation""" 9 | 10 | # TODO: understand if metrics can be formalized using an abstract class 11 | # or something different is better 12 | 13 | from abc import ABCMeta, abstractproperty 14 | 15 | 16 | class Metric(object, metaclass=ABCMeta): 17 | """Metric is a metric to measure and defined by its properties""" 18 | 19 | @staticmethod 20 | @abstractproperty 21 | def func(outputs, targets): 22 | """Metric to measure between outputs and targets""" 23 | 24 | @staticmethod 25 | @abstractproperty 26 | def name(): 27 | """Name of the metric""" 28 | 29 | @staticmethod 30 | @abstractproperty 31 | def positive_trend_sign(): 32 | """+1 or -1 depending on the expected trend when the 33 | metric goes well""" 34 | 35 | @staticmethod 36 | @abstractproperty 37 | def model_selection(): 38 | """Boolean: true if the best model should be choosen looking 39 | at the trend of this metric""" 40 | 41 | @staticmethod 42 | @abstractproperty 43 | def average(): 44 | """Boolean: true if the metric should be averaged over different 45 | measures among the dataset. If false the values are added""" 46 | 47 | @staticmethod 48 | @abstractproperty 49 | def tensorboard(): 50 | """Boolean: True if the metric should be logged in Tensorboard. 51 | The metric should output a scalar to be logged""" 52 | -------------------------------------------------------------------------------- /dytb/evaluators/RegressorEvaluator.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """ Evaluate Regression models """ 9 | 10 | from .Evaluator import Evaluator 11 | 12 | 13 | class RegressorEvaluator(Evaluator): 14 | """RegressorEvaluator is the evaluation object for a Regressor model""" 15 | 16 | @property 17 | def metrics(self): 18 | """Returns a list of dict with keys: 19 | { 20 | "fn": function 21 | "name": name 22 | "positive_trend_sign": sign that we like to see when things go well 23 | "model_selection": boolean, True if the metric has to be measured to select the model 24 | "average": boolean, true if the metric should be computed as average over the batches. 25 | If false the results over the batches are just added 26 | "tensorboard": boolean. True if the metric is a scalar and can be logged in tensoboard 27 | } 28 | """ 29 | return [{ 30 | "fn": self._model.loss, 31 | "name": "error", 32 | "positive_trend_sign": -1, 33 | "model_selection": True, 34 | "average": True, 35 | "tensorboard": True, 36 | }] 37 | -------------------------------------------------------------------------------- /dytb/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | -------------------------------------------------------------------------------- /dytb/evaluators/metrics.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utility functions for model training and evaluation""" 9 | 10 | import tensorflow as tf 11 | 12 | 13 | def accuracy_op(logits, labels): 14 | """Define the accuracy between predictions (logits) and labels. 15 | Args: 16 | logits: a [batch_size, 1,1, num_classes] tensor or 17 | a [batch_size, num_classes] tensor 18 | labels: a [batch_size] tensor 19 | Returns: 20 | accuracy: the accuracy op 21 | """ 22 | 23 | with tf.variable_scope('accuracy'): 24 | # handle fully convolutional classifiers 25 | logits_shape = logits.shape 26 | if len(logits_shape) == 4 and logits_shape[1:3] == [1, 1]: 27 | top_k_logits = tf.squeeze(logits, [1, 2]) 28 | else: 29 | top_k_logits = logits 30 | top_k_op = tf.nn.in_top_k(top_k_logits, labels, 1) 31 | accuracy = tf.reduce_mean(tf.cast(top_k_op, tf.float32)) 32 | 33 | return accuracy 34 | 35 | 36 | def confusion_matrix_op(logits, labels, num_classes): 37 | """Creates the operation to build the confusion matrix between the 38 | predictions and the labels. The number of classes are required to build 39 | the matrix correctly. 40 | Args: 41 | logits: a [batch_size, 1,1, num_classes] tensor or 42 | a [batch_size, num_classes] tensor 43 | labels: a [batch_size] tensor 44 | Returns: 45 | confusion_matrix_op: the confusion matrix tf op 46 | """ 47 | with tf.variable_scope('confusion_matrix'): 48 | # handle fully convolutional classifiers 49 | logits_shape = logits.shape 50 | if len(logits_shape) == 4 and logits_shape[1:3] == [1, 1]: 51 | top_k_logits = tf.squeeze(logits, [1, 2]) 52 | else: 53 | top_k_logits = logits 54 | 55 | # Extract the predicted label (top-1) 56 | _, top_predicted_label = tf.nn.top_k(top_k_logits, k=1, sorted=False) 57 | # (batch_size, k) -> k = 1 -> (batch_size) 58 | top_predicted_label = tf.squeeze(top_predicted_label, axis=1) 59 | 60 | return tf.confusion_matrix( 61 | labels, top_predicted_label, num_classes=num_classes) 62 | 63 | 64 | def iou_op(real_coordinates, coordinates): 65 | """Returns the average interserction over union operation between a batch of 66 | real_coordinates and a batch of coordinates. 67 | Args: 68 | real_coordinates: a tensor with shape [batch_size, 4] 69 | coordinates: a tensor with shape [batch_size, 4] 70 | Returns: 71 | iou: avewrage interserction over union in the batch 72 | """ 73 | 74 | with tf.variable_scope('iou'): 75 | ymin_orig = real_coordinates[:, 0] 76 | xmin_orig = real_coordinates[:, 1] 77 | ymax_orig = real_coordinates[:, 2] 78 | xmax_orig = real_coordinates[:, 3] 79 | area_orig = (ymax_orig - ymin_orig) * (xmax_orig - xmin_orig) 80 | 81 | ymin = coordinates[:, 0] 82 | xmin = coordinates[:, 1] 83 | ymax = coordinates[:, 2] 84 | xmax = coordinates[:, 3] 85 | area_pred = (ymax - ymin) * (xmax - xmin) 86 | 87 | intersection_ymin = tf.maximum(ymin, ymin_orig) 88 | intersection_xmin = tf.maximum(xmin, xmin_orig) 89 | intersection_ymax = tf.minimum(ymax, ymax_orig) 90 | intersection_xmax = tf.minimum(xmax, xmax_orig) 91 | 92 | intersection_area = tf.maximum( 93 | intersection_ymax - intersection_ymin, 94 | tf.zeros_like(intersection_ymax)) * tf.maximum( 95 | intersection_xmax - intersection_xmin, 96 | tf.zeros_like(intersection_ymax)) 97 | 98 | iou = tf.reduce_mean(intersection_area / 99 | (area_orig + area_pred - intersection_area)) 100 | return iou 101 | -------------------------------------------------------------------------------- /dytb/inputs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/dytb/inputs/__init__.py -------------------------------------------------------------------------------- /dytb/inputs/images.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utils for image processing""" 9 | 10 | import tensorflow as tf 11 | 12 | 13 | # Adapted from 14 | # https://github.com/pavelgonchar/colornet/blob/master/train.py 15 | def rgb2yuv(rgb): 16 | """ 17 | Convert RGB image into YUV https://en.wikipedia.org/wiki/YUV 18 | """ 19 | rgb2yuv_filter = tf.constant([[[[0.299, -0.169, 20 | 0.499], [0.587, -0.331, -0.418], 21 | [0.114, 0.499, -0.0813]]]]) 22 | rgb2yuv_bias = tf.constant([0., 0.5, 0.5]) 23 | 24 | rgb = tf.expand_dims(rgb, 0) 25 | 26 | temp = tf.nn.conv2d(rgb, rgb2yuv_filter, [1, 1, 1, 1], 'SAME') 27 | temp = tf.nn.bias_add(temp, rgb2yuv_bias) 28 | temp = tf.squeeze(temp, [0]) 29 | 30 | return temp 31 | 32 | 33 | # Adapted from 34 | # https://github.com/pavelgonchar/colornet/blob/master/train.py 35 | def yuv2rgb(yuv): 36 | """ 37 | Convert YUV image into RGB https://en.wikipedia.org/wiki/YUV 38 | """ 39 | yuv = tf.multiply(yuv, 255) 40 | yuv2rgb_filter = tf.constant([[[[1., 1., 1.], [0., -0.34413999, 1.77199996], 41 | [1.40199995, -0.71414, 0.]]]]) 42 | yuv2rgb_bias = tf.constant([-179.45599365, 135.45983887, -226.81599426]) 43 | 44 | yuv = tf.expand_dims(yuv, 0) 45 | temp = tf.nn.conv2d(yuv, yuv2rgb_filter, [1, 1, 1, 1], 'SAME') 46 | temp = tf.nn.bias_add(temp, yuv2rgb_bias) 47 | temp = tf.maximum(temp, tf.zeros(temp.get_shape(), dtype=tf.float32)) 48 | temp = tf.minimum(temp, 49 | tf.multiply( 50 | tf.ones(temp.get_shape(), dtype=tf.float32), 255)) 51 | temp = tf.divide(temp, 255) 52 | temp = tf.squeeze(temp, [0]) 53 | return temp 54 | 55 | 56 | def scale_image(image): 57 | """Returns the image tensor with values in [-1, 1]. 58 | Args: 59 | image: [height, width, depth] tensor with values in [0,1] 60 | """ 61 | image = tf.subtract(image, 0.5) 62 | # now image has values with zero mean in range [-0.5, 0.5] 63 | image = tf.multiply(image, 2.0) 64 | # now image has values with zero mean in range [-1, 1] 65 | return image 66 | 67 | 68 | def read_image_jpg(image_path, depth=3, scale=True): 69 | """Reads the image from image_path (tf.string tensor) [jpg image]. 70 | Cast the result to float32 and if scale=True scale it in [-1,1] 71 | using scale_image. Otherwise the values are in [0,1] 72 | Reuturn: 73 | the decoded jpeg image, casted to float32 74 | """ 75 | 76 | image = tf.image.convert_image_dtype( 77 | tf.image.decode_jpeg(tf.read_file(image_path), channels=depth), 78 | dtype=tf.float32) 79 | if scale: 80 | image = scale_image(image) 81 | return image 82 | 83 | 84 | def read_image_png(image_path, depth=3, scale=True): 85 | """Reads the image from image_path (tf.string tensor) [jpg image]. 86 | Cast the result to float32 and if scale=True scale it in [-1,1] 87 | using scale_image. Otherwise the values are in [0,1] 88 | Reuturn: 89 | the decoded jpeg image, casted to float32 90 | """ 91 | image = tf.image.convert_image_dtype( 92 | tf.image.decode_png(tf.read_file(image_path), channels=depth), 93 | dtype=tf.float32) 94 | if scale: 95 | image = scale_image(image) 96 | return image 97 | 98 | 99 | def read_image(image_path, channel, image_type, scale=True): 100 | """Wrapper around read_image_{jpg,png}""" 101 | if image_type == "jpg": 102 | image = read_image_jpg(image_path, channel, scale) 103 | else: 104 | image = read_image_png(image_path, channel, scale) 105 | return image 106 | -------------------------------------------------------------------------------- /dytb/inputs/interfaces.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Define the interface to implement to define an input""" 9 | 10 | from abc import ABCMeta, abstractmethod, abstractproperty 11 | from enum import Enum, unique 12 | 13 | 14 | class Input(object, metaclass=ABCMeta): 15 | """Input is the interface that classifiers must implement""" 16 | 17 | @abstractmethod 18 | def inputs(self, input_type, batch_size, augmentation_fn=None): 19 | """Construct input for evaluation using the Reader ops. 20 | 21 | Args: 22 | input_type: InputType enum 23 | batch_size: Number of elements per batch. 24 | augmentation_fn: function that accepts an input value, 25 | perform augmentation and returns the value 26 | 27 | Returns: 28 | elements: tensor of with batch_size elements 29 | ground_truth: tensor with batch_size elements 30 | """ 31 | pass 32 | 33 | @abstractmethod 34 | def num_examples(self, input_type): 35 | """Returns the number of examples for the specified input_type 36 | 37 | Args: 38 | input_type: InputType enum 39 | """ 40 | pass 41 | 42 | @abstractproperty 43 | def num_classes(self): 44 | """Returns the number of classes""" 45 | pass 46 | 47 | @abstractproperty 48 | def name(self): 49 | """Returns the name of the input source""" 50 | pass 51 | 52 | 53 | @unique 54 | class InputType(Enum): 55 | """Enum to specify the data type requested""" 56 | validation = 'validation' 57 | train = 'train' 58 | test = 'test' 59 | 60 | def __str__(self): 61 | """Return the string representation of the enum""" 62 | return self.value 63 | 64 | @staticmethod 65 | def check(input_type): 66 | """Check if input_type is an element of this Enum""" 67 | if not isinstance(input_type, InputType): 68 | raise ValueError("Invalid input_type, required a valid InputType") 69 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/Cifar10.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | # Adapted from: 4 | # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/image/cifar10/cifar10_input.py 5 | #This Source Code Form is subject to the terms of the Mozilla Public 6 | #License, v. 2.0. If a copy of the MPL was not distributed with this 7 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 8 | #Exhibit B is not attached; this software is compatible with the 9 | #licenses expressed under Section 1.12 of the MPL v2. 10 | """Routine for decoding the CIFAR-10 binary file format.""" 11 | 12 | import os 13 | import sys 14 | import tarfile 15 | 16 | from six.moves import urllib 17 | import tensorflow as tf 18 | from ..processing import build_batch 19 | from ..images import scale_image 20 | from ..interfaces import Input, InputType 21 | 22 | 23 | class Cifar10(Input): 24 | """Routine for decoding the CIFAR-10 binary file format.""" 25 | 26 | def __init__(self, add_input_to_label=False): 27 | # Global constants describing the CIFAR-10 data set. 28 | self._name = 'CIFAR-10' 29 | self._image_height = 32 30 | self._image_width = 32 31 | self._image_depth = 3 32 | 33 | self._num_classes = 10 34 | self._num_examples_per_epoch_for_train = 50000 35 | self._num_examples_per_epoch_for_eval = 10000 36 | self._num_examples_per_epoch_for_test = self._num_examples_per_epoch_for_eval 37 | 38 | self._data_dir = os.path.join( 39 | os.path.dirname(os.path.abspath(__file__)), 'data', 'Cifar10') 40 | self._data_url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' 41 | self._maybe_download_and_extract() 42 | self._add_input_to_label = add_input_to_label 43 | 44 | def num_examples(self, input_type): 45 | """Returns the number of examples per the specified input_type 46 | 47 | Args: 48 | input_type: InputType enum 49 | """ 50 | InputType.check(input_type) 51 | 52 | if input_type == InputType.train: 53 | return self._num_examples_per_epoch_for_train 54 | elif input_type == InputType.test: 55 | return self._num_examples_per_epoch_for_test 56 | return self._num_examples_per_epoch_for_eval 57 | 58 | @property 59 | def num_classes(self): 60 | """Returns the number of classes""" 61 | return self._num_classes 62 | 63 | @property 64 | def name(self): 65 | """Returns the name of the input source""" 66 | return self._name 67 | 68 | def _read(self, filename_queue): 69 | """Reads and parses examples from CIFAR10 data files. 70 | 71 | Recommendation: if you want N-way read parallelism, call this function 72 | N times. This will give you N independent Readers reading different 73 | files & positions within those files, which will give better mixing of 74 | examples. 75 | 76 | Args: 77 | filename_queue: A queue of strings with the filenames to read from. 78 | 79 | Returns: 80 | An object representing a single example, with the following fields: 81 | height: number of rows in the result (32) 82 | width: number of columns in the result (32) 83 | depth: number of color channels in the result (3) 84 | key: a scalar string Tensor describing the filename & record number 85 | for this example. 86 | label: an int32 Tensor with the label in the range 0..9. 87 | image: a [height, width, depth] uint8 Tensor with the image data 88 | """ 89 | 90 | # Dimensions of the images in the CIFAR-10 dataset. 91 | # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the 92 | # input format. 93 | result = { 94 | "height": self._image_height, 95 | "width": self._image_width, 96 | "depth": self._image_depth, 97 | "label": None, 98 | "image": None 99 | } 100 | 101 | image_bytes = result["height"] * result["width"] * result["depth"] 102 | # Every record consists of a label followed by the image, with a 103 | # fixed number of bytes for each. 104 | label_bytes = 1 # 2 for CIFAR-100 105 | record_bytes = label_bytes + image_bytes 106 | 107 | # Read a record, getting filenames from the filename_queue. No 108 | # header or footer in the CIFAR-10 format, so we leave header_bytes 109 | # and footer_bytes at their default of 0. 110 | reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) 111 | _, value = reader.read(filename_queue) 112 | 113 | # Convert from a string to a vector of uint8 that is record_bytes long. 114 | record_bytes = tf.decode_raw(value, tf.uint8) 115 | 116 | # The first bytes represent the label, which we convert from uint8->int32. 117 | result["label"] = tf.squeeze( 118 | tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32)) 119 | 120 | # The remaining bytes after the label represent the image, which we reshape 121 | # from [depth * height * width] to [depth, height, width]. 122 | depth_major = tf.reshape( 123 | tf.slice(record_bytes, [label_bytes], [image_bytes]), 124 | [result["depth"], result["height"], result["width"]]) 125 | 126 | # Convert from [depth, height, width] to [height, width, depth]. 127 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) 128 | 129 | # Convert from [0, 255] -> [0, 1] 130 | image = tf.divide(image, 255.0) 131 | 132 | # Convert from [0, 1] -> [-1, 1] 133 | result["image"] = scale_image(image) 134 | 135 | return result 136 | 137 | def inputs(self, input_type, batch_size, augmentation_fn=None): 138 | """Construct input for CIFAR evaluation using the Reader ops. 139 | 140 | Args: 141 | input_type: InputType enum 142 | batch_size: Number of images per batch. 143 | 144 | Returns: 145 | images: Images. 4D tensor of [batch_size, self._image_height, self._image_width, self._image_depth] size. 146 | labels: Labels. 1D tensor of [batch_size] size. 147 | """ 148 | InputType.check(input_type) 149 | 150 | if input_type == InputType.train: 151 | filenames = [ 152 | os.path.join(self._data_dir, 153 | 'cifar-10-batches-bin/data_batch_%d.bin' % i) 154 | for i in range(1, 6) 155 | ] 156 | num_examples_per_epoch = self._num_examples_per_epoch_for_train 157 | else: 158 | filenames = [ 159 | os.path.join(self._data_dir, 160 | 'cifar-10-batches-bin/test_batch.bin') 161 | ] 162 | num_examples_per_epoch = self._num_examples_per_epoch_for_eval 163 | 164 | for name in filenames: 165 | if not tf.gfile.Exists(name): 166 | raise ValueError('Failed to find file: ' + name) 167 | 168 | with tf.variable_scope("{}_input".format(input_type)): 169 | # Create a queue that produces the filenames to read. 170 | filename_queue = tf.train.string_input_producer(filenames) 171 | 172 | # Read examples from files in the filename queue. 173 | read_input = self._read(filename_queue) 174 | if augmentation_fn: 175 | read_input["image"] = augmentation_fn(read_input["image"]) 176 | 177 | # Ensure that the random shuffling has good mixing properties. 178 | min_fraction_of_examples_in_queue = 0.4 179 | min_queue_examples = int( 180 | num_examples_per_epoch * min_fraction_of_examples_in_queue) 181 | 182 | # Generate a batch of images and labels by building up a queue of examples. 183 | return build_batch( 184 | read_input["image"], 185 | read_input["label"] if not self._add_input_to_label else 186 | [read_input["label"], read_input["image"]], 187 | min_queue_examples, 188 | batch_size, 189 | shuffle=input_type == InputType.train) 190 | 191 | def _maybe_download_and_extract(self): 192 | """Download and extract the tarball from Alex's website.""" 193 | dest_directory = self._data_dir 194 | if not os.path.exists(dest_directory): 195 | os.makedirs(dest_directory) 196 | filename = self._data_url.split('/')[-1] 197 | filepath = os.path.join(dest_directory, filename) 198 | if not os.path.exists(filepath): 199 | 200 | def _progress(count, block_size, total_size): 201 | sys.stdout.write( 202 | '\r>> Downloading %s %.1f%%' % 203 | (filename, 204 | float(count * block_size) / float(total_size) * 100.0)) 205 | sys.stdout.flush() 206 | 207 | filepath, _ = urllib.request.urlretrieve(self._data_url, filepath, 208 | _progress) 209 | print() 210 | statinfo = os.stat(filepath) 211 | print('Successfully downloaded', filename, statinfo.st_size, 212 | 'bytes.') 213 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 214 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/Cifar100.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | # Conversion of cifar10_input: 4 | # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/image/cifar10/cifar10_input.py 5 | # For the cifar100 dataset. 6 | # 7 | #This Source Code Form is subject to the terms of the Mozilla Public 8 | #License, v. 2.0. If a copy of the MPL was not distributed with this 9 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 10 | #Exhibit B is not attached; this software is compatible with the 11 | #licenses expressed under Section 1.12 of the MPL v2. 12 | """Routine for decoding the CIFAR-100 binary file format.""" 13 | 14 | import os 15 | import sys 16 | import tarfile 17 | 18 | from six.moves import urllib 19 | import tensorflow as tf 20 | from ..processing import build_batch 21 | from ..images import scale_image 22 | from ..interfaces import Input, InputType 23 | 24 | 25 | class Cifar100(Input): 26 | """Routine for decoding the CIFAR-100 binary file format.""" 27 | 28 | def __init__(self, add_input_to_label=False): 29 | # Global constants describing the CIFAR-100 data set. 30 | self._name = 'CIFAR-100' 31 | self._image_height = 32 32 | self._image_width = 32 33 | self._image_depth = 3 34 | 35 | self._num_classes = 100 36 | self._num_examples_per_epoch_for_train = 50000 37 | self._num_examples_per_epoch_for_eval = 10000 38 | self._num_examples_per_epoch_for_test = self._num_examples_per_epoch_for_eval 39 | 40 | self._data_dir = os.path.join( 41 | os.path.dirname(os.path.abspath(__file__)), 'data', 'Cifar100') 42 | self._data_url = 'http://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz' 43 | self._maybe_download_and_extract() 44 | self._add_input_to_label = add_input_to_label 45 | 46 | def num_examples(self, input_type): 47 | """Returns the number of examples per the specified input_type 48 | 49 | Args: 50 | input_type: InputType enum 51 | """ 52 | InputType.check(input_type) 53 | 54 | if input_type == InputType.train: 55 | return self._num_examples_per_epoch_for_train 56 | elif input_type == InputType.test: 57 | return self._num_examples_per_epoch_for_test 58 | return self._num_examples_per_epoch_for_eval 59 | 60 | @property 61 | def num_classes(self): 62 | """Returns the number of classes""" 63 | return self._num_classes 64 | 65 | @property 66 | def name(self): 67 | """Returns the name of the input source""" 68 | return self._name 69 | 70 | def _read(self, filename_queue): 71 | """Reads and parses examples from CIFAR10 data files. 72 | 73 | Recommendation: if you want N-way read parallelism, call this function 74 | N times. This will give you N independent Readers reading different 75 | files & positions within those files, which will give better mixing of 76 | examples. 77 | 78 | Args: 79 | filename_queue: A queue of strings with the filenames to read from. 80 | 81 | Returns: 82 | An object representing a single example, with the following fields: 83 | height: number of rows in the result (32) 84 | width: number of columns in the result (32) 85 | depth: number of color channels in the result (3) 86 | key: a scalar string Tensor describing the filename & record number 87 | for this example. 88 | label: an int32 Tensor with the label in the range 0..9. 89 | image: a [height, width, depth] uint8 Tensor with the image data 90 | """ 91 | 92 | # Dimensions of the images in the CIFAR-10 dataset. 93 | # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the 94 | # input format. 95 | result = { 96 | "height": self._image_height, 97 | "width": self._image_width, 98 | "depth": self._image_depth, 99 | "label": None, 100 | "image": None 101 | } 102 | 103 | image_bytes = result["height"] * result["width"] * result["depth"] 104 | # Every record consists of a label followed by the image, with a 105 | # fixed number of bytes for each. 106 | label_bytes = 2 # 2 for CIFAR-100 107 | record_bytes = label_bytes + image_bytes 108 | 109 | # Read a record, getting filenames from the filename_queue. No 110 | # header or footer in the CIFAR-100 format, so we leave header_bytes 111 | # and footer_bytes at their default of 0. 112 | reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) 113 | _, value = reader.read(filename_queue) 114 | 115 | # Convert from a string to a vector of uint8 that is record_bytes long. 116 | record_bytes = tf.decode_raw(value, tf.uint8) 117 | 118 | # The first byte represent the coarse-label. 119 | # Extract the second byte that's the fine-label and convert it from uint8->int32. 120 | result["label"] = tf.squeeze( 121 | tf.cast(tf.slice(record_bytes, [1], [label_bytes - 1]), tf.int32)) 122 | 123 | # The remaining bytes after the label represent the image, which we reshape 124 | # from [depth * height * width] to [depth, height, width]. 125 | depth_major = tf.reshape( 126 | tf.slice(record_bytes, [label_bytes], [image_bytes]), 127 | [result["depth"], result["height"], result["width"]]) 128 | 129 | # Convert from [depth, height, width] to [height, width, depth]. 130 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) 131 | 132 | # Convert from [0, 255] -> [0, 1] 133 | image = tf.divide(image, 255.0) 134 | 135 | # Convert from [0, 1] -> [-1, 1] 136 | result["image"] = scale_image(image) 137 | 138 | return result 139 | 140 | def inputs(self, input_type, batch_size, augmentation_fn=None): 141 | """Construct input for CIFAR evaluation using the Reader ops. 142 | 143 | Args: 144 | input_type: InputType enum 145 | batch_size: Number of images per batch. 146 | 147 | Returns: 148 | images: Images. 4D tensor of [batch_size, self._image_height, self._image_width, self._image_depth] size. 149 | labels: Labels. 1D tensor of [batch_size] size. 150 | """ 151 | InputType.check(input_type) 152 | 153 | if input_type == InputType.train: 154 | filename = os.path.join(self._data_dir, 155 | 'cifar-100-binary/train.bin') 156 | num_examples_per_epoch = self._num_examples_per_epoch_for_train 157 | else: 158 | filename = os.path.join(self._data_dir, 'cifar-100-binary/test.bin') 159 | num_examples_per_epoch = self._num_examples_per_epoch_for_eval 160 | 161 | if not tf.gfile.Exists(filename): 162 | raise ValueError('Failed to find file: ' + filename) 163 | 164 | with tf.variable_scope("{}_input".format(input_type)): 165 | # Create a queue that produces the filenames to read. 166 | filename_queue = tf.train.string_input_producer([filename]) 167 | 168 | # Read examples from files in the filename queue. 169 | read_input = self._read(filename_queue) 170 | if augmentation_fn: 171 | read_input["image"] = augmentation_fn(read_input["image"]) 172 | 173 | # Ensure that the random shuffling has good mixing properties. 174 | min_fraction_of_examples_in_queue = 0.4 175 | min_queue_examples = int( 176 | num_examples_per_epoch * min_fraction_of_examples_in_queue) 177 | 178 | # Generate a batch of images and labels by building up a queue of examples. 179 | return build_batch( 180 | read_input["image"], 181 | read_input["label"] if not self._add_input_to_label else 182 | [read_input["label"], read_input["image"]], 183 | min_queue_examples, 184 | batch_size, 185 | shuffle=input_type == InputType.train) 186 | 187 | def _maybe_download_and_extract(self): 188 | """Download and extract the tarball from Alex's website.""" 189 | dest_directory = self._data_dir 190 | if not os.path.exists(dest_directory): 191 | os.makedirs(dest_directory) 192 | filename = self._data_url.split('/')[-1] 193 | filepath = os.path.join(dest_directory, filename) 194 | if not os.path.exists(filepath): 195 | 196 | def _progress(count, block_size, total_size): 197 | sys.stdout.write( 198 | '\r>> Downloading %s %.1f%%' % 199 | (filename, 200 | float(count * block_size) / float(total_size) * 100.0)) 201 | sys.stdout.flush() 202 | 203 | filepath, _ = urllib.request.urlretrieve(self._data_url, filepath, 204 | _progress) 205 | print() 206 | statinfo = os.stat(filepath) 207 | print('Successfully downloaded', filename, statinfo.st_size, 208 | 'bytes.') 209 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 210 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/MNIST.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Routine for decoding the MNIST binary file format.""" 9 | 10 | import os 11 | 12 | import tensorflow as tf 13 | from tensorflow.contrib.learn.python.learn.datasets import mnist 14 | from ..processing import convert_to_tfrecords, build_batch 15 | from ..images import scale_image 16 | from ..interfaces import Input, InputType 17 | 18 | 19 | class MNIST(Input): 20 | """Routine for decoding the MNIST binary file format.""" 21 | 22 | def __init__(self, resize=(28, 28, 1), add_input_to_label=False): 23 | # Global constants describing the MNIST data set. 24 | self._name = 'MNIST' 25 | self._original_shape = (28, 28, 1) 26 | mnist.IMAGE_PIXELS = 28 * 28 27 | self._image_width = resize[0] 28 | self._image_height = resize[1] 29 | self._image_depth = resize[2] 30 | 31 | self._num_classes = 10 32 | self._num_examples_per_epoch_for_train = 55000 33 | self._num_examples_per_epoch_for_eval = 5000 34 | self._num_examples_per_epoch_for_test = 10000 35 | 36 | self._data_dir = os.path.join( 37 | os.path.dirname(os.path.abspath(__file__)), 'data', 'MNIST') 38 | self._maybe_download_and_extract() 39 | self._add_input_to_label = add_input_to_label 40 | 41 | def num_examples(self, input_type): 42 | """Returns the number of examples per the specified input_type 43 | 44 | Args: 45 | input_type: InputType enum 46 | """ 47 | InputType.check(input_type) 48 | 49 | if input_type == InputType.train: 50 | return self._num_examples_per_epoch_for_train 51 | elif input_type == InputType.test: 52 | return self._num_examples_per_epoch_for_test 53 | return self._num_examples_per_epoch_for_eval 54 | 55 | @property 56 | def num_classes(self): 57 | """Returns the number of classes""" 58 | return self._num_classes 59 | 60 | @property 61 | def name(self): 62 | """Returns the name of the input source""" 63 | return self._name 64 | 65 | # adapted from: 66 | # https://github.com/tensorflow/tensorflow/blob/r0.11/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py 67 | def _read(self, filename_queue): 68 | """Reads and parses examples from MNIST data files. 69 | Recommendation: if you want N-way read parallelism, call this function 70 | N times. This will give you N independent Readers reading different 71 | files & positions within those files, which will give better mixing of 72 | examples. 73 | 74 | Args: 75 | filename_queue: A queue of strings with the filenames to read from. 76 | 77 | Returns: 78 | An object representing a single example, with the following fields: 79 | label: an int32 Tensor with the label in the range 0..9. 80 | image: a [height, width, depth] uint8 Tensor with the image data 81 | """ 82 | 83 | result = {'image': None, 'label': None} 84 | 85 | reader = tf.TFRecordReader() 86 | _, value = reader.read(filename_queue) 87 | features = tf.parse_single_example( 88 | value, 89 | features={ 90 | 'image_raw': tf.FixedLenFeature([], tf.string), 91 | # int64 required 92 | 'label': tf.FixedLenFeature([], tf.int64) 93 | }) 94 | 95 | # Convert from a scalar string tensor (whose single string has 96 | # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape 97 | # [mnist.IMAGE_PIXELS]. 98 | image = tf.decode_raw(features['image_raw'], tf.uint8) 99 | image.set_shape([mnist.IMAGE_PIXELS]) 100 | 101 | # Reshape to a valid image 102 | image = tf.reshape(image, self._original_shape) 103 | # Resize to the selected shape 104 | image = tf.squeeze( 105 | tf.image.resize_bilinear( 106 | tf.expand_dims(image, axis=0), 107 | [self._image_height, self._image_width]), 108 | axis=[0]) 109 | 110 | # Convert from [0, 255] -> [0, 1] 111 | image = tf.divide(tf.cast(image, tf.float32), 255.0) 112 | # Convert from [0, 1] -> [-1, 1] 113 | result["image"] = scale_image(image) 114 | 115 | # Convert label from a scalar uint8 tensor to an int32 scalar. 116 | result["label"] = tf.cast(features['label'], tf.int32) 117 | return result 118 | 119 | def inputs(self, input_type, batch_size, augmentation_fn=None): 120 | """Construct input for MNIST evaluation using the Reader ops. 121 | 122 | Args: 123 | input_type: InputType enum. 124 | batch_size: Number of images per batch. 125 | 126 | Returns: 127 | images: Images. 4D tensor of [batch_size, resize[0], resize[1], resize[2]] size. 128 | labels: Labels. 1D tensor of [batch_size] size. 129 | """ 130 | InputType.check(input_type) 131 | 132 | if input_type == InputType.train: 133 | filename = os.path.join(self._data_dir, 'train.tfrecords') 134 | num_examples_per_epoch = self._num_examples_per_epoch_for_train 135 | elif input_type == InputType.validation: 136 | filename = os.path.join(self._data_dir, 'validation.tfrecords') 137 | num_examples_per_epoch = self._num_examples_per_epoch_for_eval 138 | elif input_type == InputType.test: 139 | filename = os.path.join(self._data_dir, 'test.tfrecords') 140 | num_examples_per_epoch = self._num_examples_per_epoch_for_test 141 | 142 | with tf.variable_scope("{}_input".format(input_type)): 143 | # Create a queue that produces the filenames to read. 144 | filename_queue = tf.train.string_input_producer([filename]) 145 | 146 | # Read examples from files in the filename queue. 147 | read_input = self._read(filename_queue) 148 | if augmentation_fn: 149 | read_input["image"] = augmentation_fn(read_input["image"]) 150 | 151 | # Ensure that the random shuffling has good mixing properties. 152 | min_fraction_of_examples_in_queue = 0.4 153 | min_queue_examples = int( 154 | num_examples_per_epoch * min_fraction_of_examples_in_queue) 155 | 156 | # Generate a batch of images and labels by building up a queue of examples. 157 | return build_batch( 158 | read_input["image"], 159 | read_input["label"] if not self._add_input_to_label else 160 | [read_input["label"], read_input["image"]], 161 | min_queue_examples, 162 | batch_size, 163 | shuffle=input_type == InputType.train) 164 | 165 | def _maybe_download_and_extract(self): 166 | """Download and extract the MNIST dataset""" 167 | data_sets = mnist.read_data_sets( 168 | self._data_dir, 169 | dtype=tf.uint8, 170 | reshape=False, 171 | validation_size=self._num_examples_per_epoch_for_eval) 172 | 173 | # Convert to Examples and write the result to TFRecords. 174 | if not tf.gfile.Exists(os.path.join(self._data_dir, 'train.tfrecords')): 175 | convert_to_tfrecords(data_sets.train, 'train', self._data_dir) 176 | 177 | if not tf.gfile.Exists( 178 | os.path.join(self._data_dir, 'validation.tfrecords')): 179 | convert_to_tfrecords(data_sets.validation, 'validation', 180 | self._data_dir) 181 | 182 | if not tf.gfile.Exists(os.path.join(self._data_dir, 'test.tfrecords')): 183 | convert_to_tfrecords(data_sets.test, 'test', self._data_dir) 184 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/ORLFaces.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """ORL Faces database input""" 9 | 10 | import os 11 | import sys 12 | import zipfile 13 | import glob 14 | from PIL import Image 15 | 16 | from six.moves import urllib 17 | import tensorflow as tf 18 | import numpy as np 19 | from ..processing import convert_to_tfrecords, build_batch 20 | from ..images import scale_image 21 | from ..interfaces import Input, InputType 22 | 23 | 24 | class ORLFaces(Input): 25 | """ORL Faces database input""" 26 | 27 | def __init__(self, add_input_to_label=False): 28 | # Global constants describing the ORL Faces data set. 29 | self._name = 'ORL-Faces' 30 | self._image_width = 92 31 | self._image_height = 112 32 | self._image_depth = 1 33 | 34 | self._num_classes = 40 35 | self._num_examples_per_epoch_for_train = 400 36 | self._num_examples_per_epoch_for_eval = 0 37 | self._num_examples_per_epoch_for_test = 0 38 | 39 | self._data_dir = os.path.join( 40 | os.path.dirname(os.path.abspath(__file__)), 'data', 'ORLFaces') 41 | self._data_url = 'http://www.cl.cam.ac.uk/Research/DTG/attarchive/pub/data/att_faces.zip' 42 | self._maybe_download_and_extract() 43 | self._add_input_to_label = add_input_to_label 44 | 45 | def num_examples(self, input_type): 46 | """Returns the number of examples per the specified input_type 47 | 48 | Args: 49 | input_type: InputType enum 50 | """ 51 | InputType.check(input_type) 52 | 53 | if input_type == InputType.train: 54 | return self._num_examples_per_epoch_for_train 55 | elif input_type == InputType.test: 56 | return self._num_examples_per_epoch_for_test 57 | return self._num_examples_per_epoch_for_eval 58 | 59 | @property 60 | def num_classes(self): 61 | """Returns the number of classes""" 62 | return self._num_classes 63 | 64 | @property 65 | def name(self): 66 | """Returns the name of the input source""" 67 | return self._name 68 | 69 | # adapted from: 70 | # https://github.com/tensorflow/tensorflow/blob/r0.11/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py 71 | def _read(self, filename_queue): 72 | """Reads and parses examples from MNIST data files. 73 | Recommendation: if you want N-way read parallelism, call this function 74 | N times. This will give you N independent Readers reading different 75 | files & positions within those files, which will give better mixing of 76 | examples. 77 | 78 | Args: 79 | filename_queue: A queue of strings with the filenames to read from. 80 | 81 | Returns: 82 | An object representing a single example, with the following fields: 83 | label: an int32 Tensor with the label in the range 0..9. 84 | image: a [height, width, depth] uint8 Tensor with the image data 85 | """ 86 | 87 | result = {'image': None, 'label': None} 88 | 89 | reader = tf.TFRecordReader() 90 | _, value = reader.read(filename_queue) 91 | features = tf.parse_single_example( 92 | value, 93 | features={ 94 | 'image_raw': tf.FixedLenFeature([], tf.string), 95 | # int64 required 96 | 'label': tf.FixedLenFeature([], tf.int64) 97 | }) 98 | 99 | # Convert from a scalar string tensor (whose single string has 100 | # length IMAGE_WIDHT * self._image_height) to a uint8 tensor with 101 | # the same shape 102 | image = tf.decode_raw(features['image_raw'], tf.uint8) 103 | image.set_shape([self._image_width * self._image_height]) 104 | 105 | #`Reshape to a valid image 106 | image = tf.reshape( 107 | image, (self._image_height, self._image_width, self._image_depth)) 108 | 109 | # Convert from [0, 255] -> [0, 1] floats. 110 | image = tf.divide(tf.cast(image, tf.float32), 255.0) 111 | 112 | # Convert from [0, 1] -> [-1, 1] 113 | result["image"] = scale_image(image) 114 | 115 | # Convert label from a scalar uint8 tensor to an int32 scalar. 116 | result["label"] = tf.cast(features['label'], tf.int32) 117 | 118 | return result 119 | 120 | def inputs(self, input_type, batch_size, augmentation_fn=None): 121 | """Construct input for ORL Faces evaluation using the Reader ops. 122 | 123 | Args: 124 | input_type: InputType enum. 125 | batch_size: Number of images per batch. 126 | 127 | Returns: 128 | images: Images. 4D tensor of [batch_size, self._image_width, self._image_height, self._image_depth] size. 129 | labels: Labels. 1D tensor of [batch_size] size. 130 | """ 131 | InputType.check(input_type) 132 | 133 | with tf.variable_scope("{}_input".format(input_type)): 134 | filename = os.path.join(self._data_dir, 'faces.tfrecords') 135 | num_examples_per_epoch = self._num_examples_per_epoch_for_train 136 | 137 | # Create a queue that produces the filenames to read. 138 | filename_queue = tf.train.string_input_producer([filename]) 139 | 140 | # Read examples from files in the filename queue. 141 | read_input = self._read(filename_queue) 142 | if augmentation_fn: 143 | read_input["image"] = augmentation_fn(read_input["image"]) 144 | 145 | # Ensure that the random shuffling has good mixing properties. 146 | min_fraction_of_examples_in_queue = 0.4 147 | min_queue_examples = int( 148 | num_examples_per_epoch * min_fraction_of_examples_in_queue) 149 | 150 | # Generate a batch of images and labels by building up a queue of examples. 151 | return build_batch( 152 | read_input["image"], 153 | read_input["label"] if not self._add_input_to_label else 154 | [read_input["label"], read_input["image"]], 155 | min_queue_examples, 156 | batch_size, 157 | shuffle=input_type == InputType.train) 158 | 159 | def _maybe_download_and_extract(self): 160 | """Download and extract the ORL Faces dataset""" 161 | 162 | dest_directory = self._data_dir 163 | if not os.path.exists(dest_directory): 164 | os.makedirs(dest_directory) 165 | filename = self._data_url.split('/')[-1] 166 | filepath = os.path.join(dest_directory, filename) 167 | if not os.path.exists(filepath): 168 | 169 | def _progress(count, block_size, total_size): 170 | sys.stdout.write( 171 | '\r>> Downloading %s %.1f%%' % 172 | (filename, 173 | float(count * block_size) / float(total_size) * 100.0)) 174 | sys.stdout.flush() 175 | 176 | filepath, _ = urllib.request.urlretrieve(self._data_url, filepath, 177 | _progress) 178 | print() 179 | statinfo = os.stat(filepath) 180 | print('Successfully downloaded', filename, statinfo.st_size, 181 | 'bytes.') 182 | with zipfile.ZipFile(filepath) as zip_f: 183 | zip_f.extractall( 184 | os.path.join(dest_directory, 185 | filename.split('.')[-2])) 186 | 187 | # Convert to Examples and write the result to TFRecords. 188 | if not tf.gfile.Exists(os.path.join(self._data_dir, 'faces.tfrecords')): 189 | images = [] 190 | labels = [] 191 | 192 | for pgm in glob.glob("{}/*/*.pgm".format( 193 | os.path.join(dest_directory, 194 | filename.split('.')[-2]))): 195 | images.append( 196 | np.expand_dims(np.asarray(Image.open(pgm)), axis=2)) 197 | labels.append(int(pgm.split("/")[-2].strip("s"))) 198 | 199 | # Create dataset object 200 | dataset = lambda: None 201 | dataset.num_examples = self._num_examples_per_epoch_for_train 202 | dataset.images = np.array(images) 203 | dataset.labels = np.array(labels) 204 | convert_to_tfrecords(dataset, 'faces', self._data_dir) 205 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/PASCALVOC2012Classification.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """PASCAL VOC 2012""" 9 | 10 | import os 11 | import sys 12 | import tarfile 13 | import xml.etree.ElementTree as etree 14 | import csv 15 | from collections import defaultdict 16 | 17 | from six.moves import urllib 18 | import tensorflow as tf 19 | from ..processing import build_batch 20 | from ..images import read_image_jpg 21 | from ..interfaces.Input import Input 22 | from ..interfaces.InputType import InputType 23 | 24 | 25 | class PASCALVOC2012Classification(Input): 26 | """Routine for decoding the PASCAL VOC 2012 binary file format.""" 27 | 28 | def __init__(self, add_input_to_label=False): 29 | # Global constants describing the PASCAL VOC 2012 data set. 30 | # resize image to a fixed size 31 | # the resize dimension is an hyperparameter 32 | self._name = 'PASCAL-VOC-2012-Classification' 33 | self._image_height = 150 34 | self._image_width = 150 35 | self._image_depth = 3 36 | 37 | # multiple boxes enable the return of a tensor 38 | # of boxes instead of a single box per image 39 | self._multiple_bboxes = False 40 | 41 | self.CLASSES = [ 42 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", 43 | "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", 44 | "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" 45 | ] 46 | self._bboxes = {"train": defaultdict(list), "val": defaultdict(list)} 47 | self._tf_bboxes = {"train": None, "val": None} 48 | self._num_classes = 20 49 | self._num_examples_per_epoch_for_train = 13609 50 | self._num_examples_per_epoch_for_eval = 13841 51 | self._num_examples_per_epoch_for_test = self._num_examples_per_epoch_for_eval 52 | 53 | self._data_dir = os.path.join( 54 | os.path.dirname(os.path.abspath(__file__)), 'data', 'PASCALVOC2012') 55 | self._data_url = 'http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar' 56 | self._maybe_download_and_extract() 57 | self._add_input_to_label = add_input_to_label 58 | 59 | @property 60 | def name(self): 61 | """Returns the name of the input source""" 62 | return self._name 63 | 64 | def num_examples(self, input_type): 65 | """Returns the number of examples per the specified input_type 66 | 67 | Args: 68 | input_type: InputType enum 69 | """ 70 | InputType.check(input_type) 71 | 72 | if input_type == InputType.train: 73 | return self._num_examples_per_epoch_for_train 74 | elif input_type == InputType.test: 75 | return self._num_examples_per_epoch_for_test 76 | return self._num_examples_per_epoch_for_eval 77 | 78 | @property 79 | def num_classes(self): 80 | """Returns the number of classes""" 81 | return self._num_classes 82 | 83 | def _read_image_and_box(self, bboxes_csv): 84 | """Extract the filename from the queue, read the image and 85 | produce a single box 86 | Returns: 87 | image, box 88 | """ 89 | 90 | reader = tf.TextLineReader(skip_header_lines=True) 91 | _, row = reader.read(bboxes_csv) 92 | # file ,y_min, x_min, y_max, x_max, label 93 | record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]] 94 | # eg: 95 | # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0 96 | filename, y_min, x_min, y_max, x_max, label = tf.decode_csv( 97 | row, record_defaults) 98 | image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012', 99 | 'JPEGImages') + "/" + filename + ".jpg" 100 | 101 | # image is normalized in [-1,1], convert to #_image_depth depth 102 | image = read_image_jpg(image_path, depth=self._image_depth) 103 | return image, tf.stack([y_min, x_min, y_max, x_max, label]) 104 | 105 | def _read(self, filename_queue): 106 | image, bbox_and_label = self._read_image_and_box( 107 | filename_queue) #bbox is a single box 108 | 109 | bbox = bbox_and_label[:4] 110 | label = tf.cast(bbox_and_label[-1], tf.int32) 111 | 112 | image = tf.squeeze( 113 | tf.image.crop_and_resize( 114 | tf.expand_dims(image, axis=0), 115 | tf.expand_dims(bbox, axis=0), 116 | box_ind=[0], 117 | crop_size=[self._image_height, self._image_width]), 118 | axis=[0]) 119 | return image, label 120 | 121 | def inputs(self, input_type, batch_size, augmentation_fn=None): 122 | """Construct input for PASCALVOC2012Classification evaluation using the Reader ops. 123 | 124 | Args: 125 | input_type: InputType enum 126 | batch_size: Number of images per batch. 127 | Returns: 128 | images: Images. 4D tensor of [batch_size, self._image_height, self._image_width, self._image_depth] size. 129 | labels: tensor with batch_size labels 130 | """ 131 | InputType.check(input_type) 132 | 133 | if input_type == InputType.train: 134 | filenames = [os.path.join(self._data_dir, 'train.csv')] 135 | num_examples_per_epoch = self._num_examples_per_epoch_for_train 136 | else: 137 | filenames = [os.path.join(self._data_dir, 'val.csv')] 138 | num_examples_per_epoch = self._num_examples_per_epoch_for_eval 139 | 140 | for name in filenames: 141 | if not tf.gfile.Exists(name): 142 | raise ValueError('Failed to find file: ' + name) 143 | 144 | # Ensure that the random shuffling has good mixing properties. 145 | min_fraction_of_examples_in_queue = 0.4 146 | min_queue_examples = int( 147 | num_examples_per_epoch * min_fraction_of_examples_in_queue) 148 | 149 | with tf.variable_scope("{}_input".format(input_type)): 150 | # Create a queue that produces the filenames to read. 151 | filename_queue = tf.train.string_input_producer(filenames) 152 | 153 | image, label = self._read(filename_queue) 154 | if augmentation_fn: 155 | image = augmentation_fn(image) 156 | 157 | return build_batch( 158 | image, 159 | label if not self._add_input_to_label else [label, image], 160 | min_queue_examples, 161 | batch_size, 162 | shuffle=input_type == InputType.train) 163 | 164 | def _maybe_download_and_extract(self): 165 | """Download and extract the tarball""" 166 | dest_directory = self._data_dir 167 | if not os.path.exists(dest_directory): 168 | os.makedirs(dest_directory) 169 | filename = self._data_url.split('/')[-1] 170 | archivepath = os.path.join(dest_directory, filename) 171 | if not os.path.exists(archivepath): 172 | 173 | def _progress(count, block_size, total_size): 174 | sys.stdout.write( 175 | '\r>> Downloading %s %.1f%%' % 176 | (filename, 177 | float(count * block_size) / float(total_size) * 100.0)) 178 | sys.stdout.flush() 179 | 180 | archivepath, _ = urllib.request.urlretrieve(self._data_url, 181 | archivepath, _progress) 182 | print() 183 | statinfo = os.stat(archivepath) 184 | print('Successfully downloaded', filename, statinfo.st_size, 185 | 'bytes.') 186 | tarfile.open(archivepath, 'r').extractall(dest_directory) 187 | print('Sucessfully extracted.') 188 | 189 | # Now self._data dir contains VOCDevkit folder 190 | # Build train.csv and val.csv file in self._data_dir 191 | csv_header = ["filename", "y_min", "x_min", "y_max", "x_max", "label"] 192 | if os.path.exists(os.path.join( 193 | self._data_dir, 'train.csv')) and os.path.exists( 194 | os.path.join(self._data_dir, 'val.csv')): 195 | return 196 | 197 | base_dir = os.path.join( 198 | self._data_dir, 199 | 'VOCdevkit', 200 | 'VOC2012', 201 | ) 202 | 203 | for current_set in ['train', 'val']: 204 | csv_path = os.path.join(self._data_dir, 205 | '{}.csv'.format(current_set)) 206 | with open(csv_path, mode='w') as csv_file: 207 | # header 208 | writer = csv.DictWriter(csv_file, csv_header) 209 | writer.writeheader() 210 | for current_class in self.CLASSES: 211 | lines = open( 212 | os.path.join( 213 | base_dir, 'ImageSets', 'Main', '{}_{}.txt'.format( 214 | current_class, 215 | current_set))).read().strip().split("\n") 216 | for line in lines: 217 | splitted = line.split() 218 | if len(splitted) < 1: 219 | print(splitted, line, current_class) 220 | if splitted[1] == "-1": 221 | continue 222 | 223 | image_xml = os.path.join(base_dir, 'Annotations', 224 | '{}.xml'.format(splitted[0])) 225 | image_filename = splitted[0] 226 | 227 | # parse XML 228 | tree = etree.parse(image_xml) 229 | root = tree.getroot() 230 | size = root.find('size') 231 | width = float(size.find('width').text) 232 | height = float(size.find('height').text) 233 | 234 | for obj in root.iter('object'): 235 | # skip difficult & object.name not in current class 236 | label = obj.find('name').text 237 | if label != current_class: 238 | continue 239 | 240 | difficult = obj.find('difficult').text 241 | if int(difficult) == 1: 242 | continue 243 | 244 | bndbox = obj.find('bndbox') 245 | normalized_bbox = [ 246 | # y_min 247 | float(bndbox.find('ymin').text) / height, 248 | # x_min 249 | float(bndbox.find('xmin').text) / width, 250 | # y_max 251 | float(bndbox.find('ymax').text) / height, 252 | # x_max 253 | float(bndbox.find('xmax').text) / width 254 | ] 255 | 256 | label_id = self.CLASSES.index(current_class) 257 | writer.writerow({ 258 | "filename": image_filename, 259 | "y_min": normalized_bbox[0], 260 | "x_min": normalized_bbox[1], 261 | "y_max": normalized_bbox[2], 262 | "x_max": normalized_bbox[3], 263 | "label": label_id 264 | }) 265 | print('{}.csv created'.format(current_set)) 266 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/PASCALVOC2012Localization.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """PASCAL VOC 2012""" 9 | 10 | import os 11 | import sys 12 | import tarfile 13 | import xml.etree.ElementTree as etree 14 | import csv 15 | from collections import defaultdict 16 | 17 | from six.moves import urllib 18 | import tensorflow as tf 19 | from ..processing import build_batch 20 | from ..images import read_image_jpg 21 | from ..interfaces import Input, InputType 22 | from ..PASCALVOC2012Classification import PASCALVOC2012Classification 23 | 24 | 25 | class PASCALVOC2012Localization(Input): 26 | """Routine for decoding the PASCAL VOC 2012 binary file format.""" 27 | 28 | def __init__(self): 29 | self._name = 'PASCAL-VOC-2012-Localization' 30 | # multiple boxes enable the return of a tensor 31 | # of boxes instead of a single box per image 32 | self._multiple_bboxes = False 33 | 34 | # Use Classification dataset 35 | # to extract shared features and download the dataset 36 | self._pascal = PASCALVOC2012Classification() 37 | 38 | def num_examples(self, input_type): 39 | """Returns the number of examples per the specified input_type 40 | 41 | Args: 42 | input_type: InputType enum 43 | """ 44 | return self._pascal.num_examples(input_type) 45 | 46 | @property 47 | def num_classes(self): 48 | """Returns the number of classes""" 49 | return self._pascal.num_classes 50 | 51 | @property 52 | def name(self): 53 | """Returns the name of the input source""" 54 | return self._name 55 | 56 | def _read_image_and_box(self, bboxes_csv): 57 | """Extract the filename from the queue, read the image and 58 | produce a single box 59 | Returns: 60 | image, [y_min, x_min, y_max, x_max, label] 61 | """ 62 | 63 | reader = tf.TextLineReader(skip_header_lines=True) 64 | _, row = reader.read(bboxes_csv) 65 | # file ,y_min, x_min, y_max, x_max, label 66 | record_defaults = [[""], [0.], [0.], [0.], [0.], [0.]] 67 | # eg: 68 | # 2008_000033,0.1831831831831832,0.208,0.7717717717717718,0.952,0 69 | filename, y_min, x_min, y_max, x_max, label = tf.decode_csv( 70 | row, record_defaults) 71 | image_path = os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012', 72 | 'JPEGImages') + "/" + filename + ".jpg" 73 | 74 | # image is normalized in [-1,1] 75 | image = read_image_jpg(image_path) 76 | return image, tf.stack([y_min, x_min, y_max, x_max, label]) 77 | 78 | def inputs(self, input_type, batch_size, augmentation_fn=None): 79 | """Construct input for PASCALVOC2012 evaluation using the Reader ops. 80 | 81 | Args: 82 | input_type: InputType enum 83 | batch_size: Number of images per batch. 84 | Returns: 85 | images: Images. 4D tensor of [batch_size, self._image_height, self._image_width, self._image_depth] size. 86 | labels: A tensor with shape [batch_size, num_bboxes_max, 5]. num_bboxes_max are the maximum bboxes found in the 87 | requested set (train/test/validation). Where the bbox is fake, a -1,-1,-1,-1,-1 value is present 88 | """ 89 | InputType.check(input_type) 90 | 91 | if input_type == InputType.train: 92 | filenames = [ 93 | os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012', 94 | 'ImageSets', 'Main', 'train.txt') 95 | ] 96 | num_examples_per_epoch = self._num_examples_per_epoch_for_train 97 | else: 98 | filenames = [ 99 | os.path.join(self._data_dir, 'VOCdevkit', 'VOC2012', 100 | 'ImageSets', 'Main', 'val.txt') 101 | ] 102 | num_examples_per_epoch = self._num_examples_per_epoch_for_eval 103 | 104 | for name in filenames: 105 | if not tf.gfile.Exists(name): 106 | raise ValueError('Failed to find file: ' + name) 107 | 108 | # Ensure that the random shuffling has good mixing properties. 109 | min_fraction_of_examples_in_queue = 0.4 110 | min_queue_examples = int( 111 | num_examples_per_epoch * min_fraction_of_examples_in_queue) 112 | 113 | with tf.variable_scope("{}_input".format(input_type)): 114 | # Create a queue that produces the filenames to read. 115 | filename_queue = tf.train.string_input_producer(filenames) 116 | 117 | image, bbox = self._read_image_and_box(filename_queue) 118 | 119 | if augmentation_fn: 120 | image = augmentation_fn(image) 121 | return build_batch( 122 | image, 123 | bbox, 124 | min_queue_examples, 125 | batch_size, 126 | shuffle=input_type == InputType.train) 127 | -------------------------------------------------------------------------------- /dytb/inputs/predefined/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/dytb/inputs/predefined/__init__.py -------------------------------------------------------------------------------- /dytb/inputs/processing.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utils to dataset preprocessing""" 9 | 10 | import os 11 | import multiprocessing 12 | import tensorflow as tf 13 | 14 | 15 | def build_batch(image, label, min_queue_examples, batch_size, shuffle): 16 | """Construct a queued batch of images and labels. 17 | Args: 18 | image: 3-D Tensor of [height, width, 3] of type.float32. 19 | label: 1-D Tensor or a list of tensors like [label, attrA, ... ] 20 | min_queue_examples: int32, minimum number of samples to retain 21 | in the queue that provides of batches of examples. 22 | batch_size: Number of images per batch. 23 | shuffle: boolean indicating whether to use a shuffling queue. 24 | 25 | Returns: 26 | images: Images. 4D tensor of [batch_size, height, width, 3] size. 27 | labels: Labels. 1D tensor of [batch_size] size containing the elements of labels 28 | """ 29 | # Create a queue that shuffles the examples, and then 30 | # read 'batch_size' images + labels from the example queue. 31 | num_preprocess_threads = multiprocessing.cpu_count() 32 | if num_preprocess_threads > 2: 33 | num_preprocess_threads -= 2 34 | 35 | if isinstance(label, list): 36 | row = [image] + label 37 | else: 38 | row = [image, label] 39 | 40 | if shuffle: 41 | return tf.train.shuffle_batch( 42 | row, 43 | batch_size=batch_size, 44 | num_threads=num_preprocess_threads, 45 | capacity=min_queue_examples + 3 * batch_size, 46 | min_after_dequeue=min_queue_examples) 47 | 48 | return tf.train.batch( 49 | row, 50 | batch_size=batch_size, 51 | num_threads=num_preprocess_threads, 52 | capacity=min_queue_examples + 3 * batch_size) 53 | 54 | 55 | def convert_to_tfrecords(dataset, name, data_dir): 56 | """ Converts the dataset in a TFRecord file with name.tfrecords. 57 | Save it into data_dir.""" 58 | 59 | def _int64_feature(value): 60 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 61 | 62 | def _bytes_feature(value): 63 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 64 | 65 | if dataset.images.shape[0] != dataset.num_examples: 66 | raise ValueError('Images size {} does not match label size {}.'.format( 67 | dataset.images.shape[0], dataset.num_examples)) 68 | rows = dataset.images.shape[1] 69 | cols = dataset.images.shape[2] 70 | depth = dataset.images.shape[3] 71 | 72 | filename = os.path.join(data_dir, name + '.tfrecords') 73 | print('Writing', filename) 74 | writer = tf.python_io.TFRecordWriter(filename) 75 | for index in range(dataset.num_examples): 76 | image_raw = dataset.images[index].tostring() 77 | example = tf.train.Example( 78 | features=tf.train.Features( 79 | feature={ 80 | 'height': _int64_feature(rows), 81 | 'width': _int64_feature(cols), 82 | 'depth': _int64_feature(depth), 83 | 'label': _int64_feature(int(dataset.labels[index])), 84 | 'image_raw': _bytes_feature(image_raw) 85 | })) 86 | writer.write(example.SerializeToString()) 87 | writer.close() 88 | -------------------------------------------------------------------------------- /dytb/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/dytb/models/__init__.py -------------------------------------------------------------------------------- /dytb/models/collections.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Collections""" 9 | 10 | # name of the collection that holds non trainable 11 | # but required variables for the current model 12 | REQUIRED_NON_TRAINABLES = 'required_vars_collection' 13 | 14 | # name of the collection that holds the scalar summaries 15 | SCALAR_SUMMARIES = 'scalar_summaries' 16 | 17 | # name of the collection that holds the media summaries 18 | # media = not scalar 19 | MEDIA_SUMMARIES = 'media_summaries' 20 | 21 | # losses collection 22 | LOSSES = 'losses' 23 | -------------------------------------------------------------------------------- /dytb/models/interfaces.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Define the model interfaces""" 9 | 10 | from abc import ABCMeta, abstractmethod, abstractproperty 11 | # Evaluators 12 | from ..evaluators.AutoencoderEvaluator import AutoencoderEvaluator 13 | from ..evaluators.ClassifierEvaluator import ClassifierEvaluator 14 | from ..evaluators.DetectorEvaluator import DetectorEvaluator 15 | from ..evaluators.RegressorEvaluator import RegressorEvaluator 16 | 17 | 18 | class Autoencoder(object, metaclass=ABCMeta): 19 | """Autoencoder is the interface that classifiers must implement""" 20 | 21 | def __init__(self): 22 | self._info = {} 23 | self._seed = None 24 | self._evaluator = None 25 | 26 | @abstractmethod 27 | def get(self, inputs, num_classes, train_phase=False, l2_penalty=0.0): 28 | """ define the model with its inputs. 29 | Use this function to define the model in training and when exporting the model 30 | in the protobuf format. 31 | 32 | Args: 33 | inputs: model input 34 | num_classes: number of classes to predict. If the model doesn't use it, 35 | just pass any value. 36 | train_phase: set it to True when defining the model, during train 37 | l2_penalty: float value, weight decay (l2) penalty 38 | 39 | Returns: 40 | is_training_: tf.bool placeholder enable/disable training ops at run time 41 | predictions: the model output 42 | """ 43 | 44 | @abstractmethod 45 | def loss(self, predictions, real_values): 46 | """Return the loss operation between predictions and real_values 47 | Args: 48 | predictions: predicted values 49 | labels: real_values 50 | 51 | Returns: 52 | Loss tensor of type float. 53 | """ 54 | 55 | @property 56 | def name(self): 57 | """Returns the name of the model""" 58 | return self.__class__.__name__ 59 | 60 | @property 61 | def info(self): 62 | """Returns the inforation about the trained model""" 63 | return self._info 64 | 65 | @info.setter 66 | def info(self, info): 67 | """Save the training info 68 | Args: 69 | info: dict of training info 70 | """ 71 | self._info = info 72 | 73 | @property 74 | def seed(self): 75 | """Returns the seed used for weight initialization""" 76 | return self._seed 77 | 78 | @seed.setter 79 | def seed(self, seed): 80 | """Set the seed to use for weight initialization 81 | Args: 82 | seed 83 | """ 84 | self._seed = seed 85 | 86 | @property 87 | def evaluator(self): 88 | """Returns the evaluator associated to the model""" 89 | if self._evaluator is None: 90 | obj = AutoencoderEvaluator() 91 | obj.model = self 92 | self._evaluator = obj 93 | 94 | return self._evaluator 95 | 96 | 97 | class Classifier(object, metaclass=ABCMeta): 98 | """Classifier is the interface that classifiers must implement""" 99 | 100 | def __init__(self): 101 | self._info = {} 102 | self._seed = None 103 | self._evaluator = None 104 | 105 | @abstractmethod 106 | def get(self, inputs, num_classes, train_phase=False, l2_penalty=0.0): 107 | """Define the model with its inputs. 108 | Use this function to define the model in training and when exporting the model 109 | in the protobuf format. 110 | 111 | Args: 112 | inputs: model input 113 | num_classes: number of classes to predict 114 | train_phase: set it to True when defining the model, during train 115 | l2_penalty: float value, weight decay (l2) penalty 116 | 117 | Returns: 118 | is_training_: tf.bool placeholder enable/disable training ops at run time 119 | logits: the model output 120 | """ 121 | 122 | @abstractmethod 123 | def loss(self, logits, labels): 124 | """Return the loss operation between logits and labels 125 | Args: 126 | logits: Logits from get(). 127 | labels: Labels from train_inputs or inputs(). 1-D tensor 128 | of shape [batch_size] 129 | 130 | Returns: 131 | Loss tensor of type float. 132 | """ 133 | 134 | @property 135 | def name(self): 136 | """Returns the name of the model""" 137 | return self.__class__.__name__ 138 | 139 | @property 140 | def info(self): 141 | """Returns the inforation about the trained model""" 142 | return self._info 143 | 144 | @info.setter 145 | def info(self, info): 146 | """Save the training info 147 | Args: 148 | info: dict of training info 149 | """ 150 | self._info = info 151 | 152 | @property 153 | def seed(self): 154 | """Returns the seed used for weight initialization""" 155 | return self._seed 156 | 157 | @seed.setter 158 | def seed(self, seed): 159 | """Set the seed to use for weight initialization 160 | Args: 161 | seed 162 | """ 163 | self._seed = seed 164 | 165 | @property 166 | def evaluator(self): 167 | """Returns the evaluator associated to the model""" 168 | if self._evaluator is None: 169 | obj = ClassifierEvaluator() 170 | obj.model = self 171 | self._evaluator = obj 172 | return self._evaluator 173 | 174 | 175 | class Detector(object, metaclass=ABCMeta): 176 | """Detector is the interface that detectors must implement""" 177 | 178 | def __init__(self): 179 | self._info = {} 180 | self._seed = None 181 | self._evaluator = None 182 | 183 | @abstractmethod 184 | def get(self, inputs, num_classes, train_phase=False, l2_penalty=0.0): 185 | """ define the model with its inputs. 186 | Use this function to define the model in training and when exporting the model 187 | in the protobuf format. 188 | 189 | Args: 190 | inputs: model input, tensor with batch_size elements 191 | num_classes: number of classes to predict. If the model doesn't use it, 192 | just pass any value. 193 | train_phase: set it to True when defining the model, during train 194 | l2_penalty: float value, weight decay (l2) penalty 195 | 196 | Returns: 197 | is_training_: tf.bool placeholder enable/disable training ops at run time 198 | logits: the unscaled prediction for a class specific detector 199 | bboxes: the predicted coordinates for every detected object in the input image 200 | this must have the same number of rows of logits 201 | """ 202 | 203 | @abstractmethod 204 | def loss(self, label_relations, bboxes_relations): 205 | """Return the loss operation. 206 | Args: 207 | label_relations: a tuple with 2 elements, usually the pair 208 | (labels, logits), each one a tensor of batch_size elements 209 | bboxes_relations: a tuple with 2 elements, usually the pair 210 | (coordinates, bboxes) where coordinates are the 211 | ground truth coordinates ad bboxes the predicted one 212 | Returns: 213 | Loss tensor of type float. 214 | """ 215 | 216 | @property 217 | def name(self): 218 | """Returns the name of the model""" 219 | return self.__class__.__name__ 220 | 221 | @property 222 | def info(self): 223 | """Returns the inforation about the trained model""" 224 | return self._info 225 | 226 | @info.setter 227 | def info(self, info): 228 | """Save the training info 229 | Args: 230 | info: dict of training info 231 | """ 232 | self._info = info 233 | 234 | @property 235 | def seed(self): 236 | """Returns the seed used for weight initialization""" 237 | return self._seed 238 | 239 | @seed.setter 240 | def seed(self, seed): 241 | """Set the seed to use for weight initialization 242 | Args: 243 | seed 244 | """ 245 | self._seed = seed 246 | 247 | @property 248 | def evaluator(self): 249 | """Returns the evaluator associated to the model""" 250 | if self._evaluator is None: 251 | obj = DetectorEvaluator() 252 | obj.model = self 253 | self._evaluator = obj 254 | return self._evaluator 255 | 256 | 257 | class Regressor(object, metaclass=ABCMeta): 258 | """Regressor is the interface that regressors must implement""" 259 | 260 | def __init__(self): 261 | self._info = {} 262 | self._seed = None 263 | self._evaluator = None 264 | 265 | @abstractmethod 266 | def get(self, inputs, num_classes, train_phase=False, l2_penalty=0.0): 267 | """ define the model with its inputs. 268 | Use this function to define the model in training and when exporting the model 269 | in the protobuf format. 270 | 271 | Args: 272 | inputs: model input 273 | num_classes: number of classes to predict. If the model doesn't use it, 274 | just pass any value. 275 | train_phase: set it to True when defining the model, during train 276 | l2_penalty: float value, weight decay (l2) penalty 277 | 278 | Returns: 279 | is_training_: tf.bool placeholder enable/disable training ops at run time 280 | predictions: the model output 281 | """ 282 | 283 | @abstractmethod 284 | def loss(self, predictions, labels): 285 | """Return the loss operation between predictions and labels 286 | Args: 287 | predictions: Predictions from get(). 288 | labels: Labels from train_inputs or inputs(). 1-D tensor 289 | of shape [batch_size] 290 | 291 | Returns: 292 | Loss tensor of type float. 293 | """ 294 | 295 | @property 296 | def name(self): 297 | """Returns the name of the model""" 298 | return self.__class__.__name__ 299 | 300 | @property 301 | def info(self): 302 | """Returns the inforation about the trained model""" 303 | return self._info 304 | 305 | @info.setter 306 | def info(self, info): 307 | """Save the training info 308 | Args: 309 | info: dict of training info 310 | """ 311 | self._info = info 312 | 313 | @property 314 | def seed(self): 315 | """Returns the seed used for weight initialization""" 316 | return self._seed 317 | 318 | @seed.setter 319 | def seed(self, seed): 320 | """Set the seed to use for weight initialization 321 | Args: 322 | seed 323 | """ 324 | self._seed = seed 325 | 326 | @property 327 | def evaluator(self): 328 | """Returns the evaluator associated to the model""" 329 | if self._evaluator is None: 330 | obj = RegressorEvaluator() 331 | obj.model = self 332 | self._evaluator = obj 333 | return self._evaluator 334 | 335 | 336 | class Custom(object, metaclass=ABCMeta): 337 | """Custom is the interface that custom models must implement""" 338 | 339 | def __init__(self): 340 | self._info = {} 341 | self._seed = None 342 | self._evaluator = None 343 | 344 | @abstractmethod 345 | def get(self, inputs, num_classes, **kwargs): 346 | """ define the model with its inputs. 347 | Use this function to define the model in training and when exporting the model 348 | in the protobuf format. 349 | 350 | Args: 351 | inputs: model input 352 | num_classes: number of classes to predict. If the model doesn't use it, 353 | just pass any value. 354 | kwargs: 355 | train_phase: set it to True when defining the model, during train 356 | l2_penalty: float value, weight decay (l2) penalty 357 | 358 | Returns: 359 | is_training_: tf.bool placeholder enable/disable training ops at run time 360 | predictions: the model output 361 | """ 362 | 363 | @abstractmethod 364 | def loss(self, predictions, real_values): 365 | """Return the loss operation between predictions and real_values 366 | Args: 367 | predictions: a list of predicted values eg [predicted_labels_batch, ...] 368 | labels: a list of real_values, eg [ labels_batch, attributeA_batch, ...] 369 | 370 | Returns: 371 | Loss tensor of type float. 372 | """ 373 | 374 | @abstractproperty 375 | def evaluator(self): 376 | """Returns the evaluator associated to the model""" 377 | 378 | # Below implemented properties 379 | 380 | @property 381 | def name(self): 382 | """Returns the name of the model""" 383 | return self.__class__.__name__ 384 | 385 | @property 386 | def info(self): 387 | """Returns the inforation about the trained model""" 388 | return self._info 389 | 390 | @info.setter 391 | def info(self, info): 392 | """Save the training info 393 | Args: 394 | info: dict of training info 395 | """ 396 | self._info = info 397 | 398 | @property 399 | def seed(self): 400 | """Returns the seed used for weight initialization""" 401 | return self._seed 402 | 403 | @seed.setter 404 | def seed(self, seed): 405 | """Set the seed to use for weight initialization 406 | Args: 407 | seed 408 | """ 409 | self._seed = seed 410 | -------------------------------------------------------------------------------- /dytb/models/predefined/LeNet.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build a LeNet-like network without additional layers""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv, fc 13 | from ..interfaces import Classifier 14 | 15 | 16 | class LeNet(Classifier): 17 | """Build a LeNet-like network without additional layers""" 18 | 19 | def _inference(self, 20 | images, 21 | num_classes, 22 | is_training_, 23 | train_phase=False, 24 | l2_penalty=0.0): 25 | """Build the LeNet-like network. 26 | 27 | Args: 28 | images: Images returned from train_inputs() or inputs(). 29 | num_classes: Number of classes to predict 30 | is_training_: enable/disable training ops at run time 31 | train_phase: Boolean to enable/disable training ops at build time 32 | l2_penalty: float value, weight decay (l2) penalty 33 | 34 | Returns: 35 | Logits. 36 | """ 37 | 38 | # Initializer with seed 39 | initializer = tf.contrib.layers.variance_scaling_initializer( 40 | factor=2.0, 41 | mode='FAN_IN', 42 | uniform=False, 43 | seed=self.seed, 44 | dtype=tf.float32) 45 | 46 | with tf.variable_scope(self.__class__.__name__): 47 | with tf.variable_scope("conv1"): 48 | conv1 = conv( 49 | images, [5, 5, 1, 32], 50 | 1, 51 | 'SAME', 52 | train_phase, 53 | activation=tf.nn.relu, 54 | wd=l2_penalty, 55 | initializer=initializer) 56 | 57 | with tf.variable_scope("pool1"): 58 | pool1 = tf.nn.max_pool( 59 | conv1, 60 | ksize=[1, 2, 2, 1], 61 | strides=[1, 2, 2, 1], 62 | padding='VALID') 63 | 64 | with tf.variable_scope("conv2"): 65 | conv2 = conv( 66 | pool1, [5, 5, 32, 64], 67 | 1, 68 | 'SAME', 69 | train_phase, 70 | activation=tf.nn.relu, 71 | wd=l2_penalty, 72 | initializer=initializer) 73 | 74 | with tf.variable_scope("pool2"): 75 | pool2 = tf.nn.max_pool( 76 | conv2, 77 | ksize=[1, 2, 2, 1], 78 | strides=[1, 2, 2, 1], 79 | padding='VALID') 80 | pool2 = tf.reshape(pool2, [-1, 7 * 7 * 64]) 81 | 82 | with tf.variable_scope("fc1"): 83 | fc1 = fc( 84 | pool2, [7 * 7 * 64, 1024], 85 | train_phase, 86 | activation=tf.nn.relu, 87 | wd=l2_penalty, 88 | initializer=initializer) 89 | 90 | with tf.variable_scope("softmax_linear"): 91 | logits = fc(fc1, [1024, num_classes], train_phase) 92 | return logits 93 | 94 | def loss(self, logits, labels): 95 | """Add L2Loss to all the trainable variables. 96 | Args: 97 | logits: Logits from get(). 98 | labels: Labels from train_inputs or inputs(). 1-D tensor 99 | of shape [batch_size] 100 | 101 | Returns: 102 | Loss tensor of type float. 103 | """ 104 | with tf.variable_scope('loss'): 105 | # Calculate the average cross entropy loss across the batch. 106 | labels = tf.cast(labels, tf.int64) 107 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 108 | logits=logits, labels=labels, name='cross_entropy_per_example') 109 | cross_entropy_mean = tf.reduce_mean( 110 | cross_entropy, name='cross_entropy') 111 | tf.add_to_collection(LOSSES, cross_entropy_mean) 112 | 113 | # The total loss is defined as the cross entropy loss plus all of the weight 114 | # decay terms (L2 loss). 115 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 116 | return error 117 | 118 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 119 | """ define the model with its inputs. 120 | Use this function to define the model in training and when exporting the model 121 | in the protobuf format. 122 | 123 | Args: 124 | images: model input 125 | num_classes: number of classes to predict 126 | train_phase: set it to True when defining the model, during train 127 | l2_penalty: float value, weight decay (l2) penalty 128 | 129 | Returns: 130 | is_training_: tf.bool placeholder enable/disable training ops at run time 131 | logits: the model output 132 | """ 133 | is_training_ = tf.placeholder_with_default( 134 | False, shape=(), name="is_training_") 135 | # build a graph that computes the logits predictions from the images 136 | logits = self._inference(images, num_classes, is_training_, train_phase, 137 | l2_penalty) 138 | 139 | return is_training_, logits 140 | -------------------------------------------------------------------------------- /dytb/models/predefined/LeNetBN.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build a LeNet-like network with BN layers""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv, fc, batch_norm 13 | from ..interfaces import Classifier 14 | 15 | 16 | class LeNetBN(Classifier): 17 | """Build a LeNet-like network with BN layers""" 18 | 19 | def _inference(self, 20 | images, 21 | num_classes, 22 | is_training_, 23 | train_phase=False, 24 | l2_penalty=0.0): 25 | """Build the LeNet-like network. 26 | 27 | Args: 28 | images: Images returned from train_inputs() or inputs(). 29 | num_classes: Number of classes to predict 30 | is_training_: enable/disable training ops at run time 31 | train_phase: Boolean to enable/disable training ops at build time 32 | l2_penalty: float value, weight decay (l2) penalty 33 | 34 | Returns: 35 | Logits. 36 | """ 37 | 38 | # Initializer with seed 39 | initializer = tf.contrib.layers.variance_scaling_initializer( 40 | factor=2.0, 41 | mode='FAN_IN', 42 | uniform=False, 43 | seed=self.seed, 44 | dtype=tf.float32) 45 | 46 | with tf.variable_scope(self.__class__.__name__): 47 | with tf.variable_scope("conv1"): 48 | conv1 = tf.nn.relu( 49 | batch_norm( 50 | conv( 51 | images, [5, 5, 1, 32], 52 | 1, 53 | 'SAME', 54 | train_phase, 55 | bias_term=False, 56 | wd=l2_penalty, 57 | initializer=initializer), is_training_ 58 | if train_phase else False)) 59 | 60 | with tf.variable_scope("pool1"): 61 | pool1 = tf.nn.max_pool( 62 | conv1, 63 | ksize=[1, 2, 2, 1], 64 | strides=[1, 2, 2, 1], 65 | padding='VALID') 66 | 67 | with tf.variable_scope("conv2"): 68 | conv2 = tf.nn.relu( 69 | batch_norm( 70 | conv( 71 | pool1, [5, 5, 32, 64], 72 | 1, 73 | 'SAME', 74 | train_phase, 75 | bias_term=False, 76 | wd=l2_penalty, 77 | initializer=initializer), is_training_ 78 | if train_phase else False)) 79 | 80 | with tf.variable_scope("pool2"): 81 | pool2 = tf.nn.max_pool( 82 | conv2, 83 | ksize=[1, 2, 2, 1], 84 | strides=[1, 2, 2, 1], 85 | padding='VALID') 86 | pool2 = tf.reshape(pool2, [-1, 7 * 7 * 64]) 87 | 88 | with tf.variable_scope("fc1"): 89 | fc1 = tf.nn.relu( 90 | batch_norm( 91 | fc(pool2, [7 * 7 * 64, 1024], 92 | train_phase, 93 | bias_term=False, 94 | wd=l2_penalty, 95 | initializer=initializer), is_training_ 96 | if train_phase else False)) 97 | 98 | with tf.variable_scope("softmax_linear"): 99 | logits = fc( 100 | fc1, [1024, num_classes], 101 | train_phase, 102 | initializer=initializer) 103 | return logits 104 | 105 | def loss(self, logits, labels): 106 | """Add L2Loss to all the trainable variables. 107 | Args: 108 | logits: Logits from get(). 109 | labels: Labels from train_inputs or inputs(). 1-D tensor 110 | of shape [batch_size] 111 | 112 | Returns: 113 | Loss tensor of type float. 114 | """ 115 | with tf.variable_scope('loss'): 116 | # Calculate the average cross entropy loss across the batch. 117 | labels = tf.cast(labels, tf.int64) 118 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 119 | logits=logits, labels=labels, name='cross_entropy_per_example') 120 | cross_entropy_mean = tf.reduce_mean( 121 | cross_entropy, name='cross_entropy') 122 | tf.add_to_collection(LOSSES, cross_entropy_mean) 123 | 124 | # The total loss is defined as the cross entropy loss plus all of the weight 125 | # decay terms (L2 loss). 126 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 127 | 128 | return error 129 | 130 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 131 | """ define the model with its inputs. 132 | Use this function to define the model in training and when exporting the model 133 | in the protobuf format. 134 | 135 | Args: 136 | images: model input 137 | num_classes: number of classes to predict 138 | train_phase: set it to True when defining the model, during train 139 | l2_penalty: float value, weight decay (l2) penalty 140 | 141 | Returns: 142 | is_training_: tf.bool placeholder enable/disable training ops at run time 143 | logits: the model output 144 | """ 145 | is_training_ = tf.placeholder_with_default( 146 | False, shape=(), name="is_training_") 147 | # build a graph that computes the logits predictions from the images 148 | logits = self._inference(images, num_classes, is_training_, train_phase, 149 | l2_penalty) 150 | 151 | return is_training_, logits 152 | -------------------------------------------------------------------------------- /dytb/models/predefined/LeNetDirectDropout.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build a LeNet-like network with direct dropout layers""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv, fc, direct_dropout 13 | from ..interfaces import Classifier 14 | 15 | 16 | class LeNetDirectDropout(Classifier): 17 | """Build a LeNet-like network with direct dropout layers""" 18 | 19 | def _inference(self, 20 | images, 21 | num_classes, 22 | is_training_, 23 | train_phase=False, 24 | l2_penalty=0.0): 25 | """Build the LeNet-like network. 26 | 27 | Args: 28 | images: Images returned from train_inputs() or inputs(). 29 | num_classes: Number of classes to predict 30 | is_training_: enable/disable training ops at run time 31 | train_phase: Boolean to enable/disable training ops at build time 32 | l2_penalty: float value, weight decay (l2) penalty 33 | 34 | Returns: 35 | Logits. 36 | """ 37 | 38 | # Initializer with seed 39 | initializer = tf.contrib.layers.variance_scaling_initializer( 40 | factor=2.0, 41 | mode='FAN_IN', 42 | uniform=False, 43 | seed=self.seed, 44 | dtype=tf.float32) 45 | 46 | def direct_drop(layer, prob): 47 | """ Build a condition node if we are in train_phase. thus we can use the 48 | is_training_ placeholder to switch. 49 | Build a prob*layer node when we're not in train_phase. 50 | Returns the correct node""" 51 | 52 | if train_phase: 53 | layer = tf.cond( 54 | tf.equal(is_training_, True), 55 | lambda: direct_dropout(layer, prob), lambda: prob * layer) 56 | else: 57 | layer = prob * layer 58 | return layer 59 | 60 | with tf.variable_scope(self.__class__.__name__): 61 | with tf.variable_scope("conv1"): 62 | conv1 = conv( 63 | images, [5, 5, 1, 32], 64 | 1, 65 | 'SAME', 66 | train_phase, 67 | activation=tf.nn.relu, 68 | wd=l2_penalty, 69 | initializer=initializer) 70 | conv1 = direct_drop(conv1, 0.7) 71 | 72 | with tf.variable_scope("pool1"): 73 | pool1 = tf.nn.max_pool( 74 | conv1, 75 | ksize=[1, 2, 2, 1], 76 | strides=[1, 2, 2, 1], 77 | padding='VALID') 78 | 79 | with tf.variable_scope("conv2"): 80 | conv2 = conv( 81 | pool1, [5, 5, 32, 64], 82 | 1, 83 | 'SAME', 84 | train_phase, 85 | activation=tf.nn.relu, 86 | wd=l2_penalty, 87 | initializer=initializer) 88 | conv2 = direct_drop(conv2, 0.6) 89 | 90 | with tf.variable_scope("pool2"): 91 | pool2 = tf.nn.max_pool( 92 | conv2, 93 | ksize=[1, 2, 2, 1], 94 | strides=[1, 2, 2, 1], 95 | padding='VALID') 96 | pool2 = tf.reshape(pool2, [-1, 7 * 7 * 64]) 97 | 98 | with tf.variable_scope("fc1"): 99 | fc1 = fc( 100 | pool2, [7 * 7 * 64, 1024], 101 | train_phase, 102 | activation=tf.nn.relu, 103 | wd=l2_penalty, 104 | initializer=initializer) 105 | 106 | fc1 = direct_drop(fc1, 0.5) 107 | 108 | with tf.variable_scope("softmax_linear"): 109 | logits = fc( 110 | fc1, [1024, num_classes], 111 | train_phase, 112 | initializer=initializer) 113 | return logits 114 | 115 | def loss(self, logits, labels): 116 | """Add L2Loss to all the trainable variables. 117 | Args: 118 | logits: Logits from get(). 119 | labels: Labels from train_inputs or inputs(). 1-D tensor 120 | of shape [batch_size] 121 | 122 | Returns: 123 | Loss tensor of type float. 124 | """ 125 | with tf.variable_scope('loss'): 126 | # Calculate the average cross entropy loss across the batch. 127 | labels = tf.cast(labels, tf.int64) 128 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 129 | logits=logits, labels=labels, name='cross_entropy_per_example') 130 | cross_entropy_mean = tf.reduce_mean( 131 | cross_entropy, name='cross_entropy') 132 | tf.add_to_collection(LOSSES, cross_entropy_mean) 133 | 134 | # The total loss is defined as the cross entropy loss plus all of the weight 135 | # decay terms (L2 loss). 136 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 137 | 138 | return error 139 | 140 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 141 | """ define the model with its inputs. 142 | Use this function to define the model in training and when exporting the model 143 | in the protobuf format. 144 | 145 | Args: 146 | images: model input 147 | num_classes: number of classes to predict 148 | train_phase: set it to True when defining the model, during train 149 | l2_penalty: float value, weight decay (l2) penalty 150 | 151 | Returns: 152 | is_training_: tf.bool placeholder enable/disable training ops at run time 153 | logits: the model output 154 | """ 155 | is_training_ = tf.placeholder_with_default( 156 | False, shape=(), name="is_training_") 157 | # build a graph that computes the logits predictions from the images 158 | logits = self._inference(images, num_classes, is_training_, train_phase, 159 | l2_penalty) 160 | 161 | return is_training_, logits 162 | -------------------------------------------------------------------------------- /dytb/models/predefined/LeNetDropout.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build the LeNet-like network with dropout layers""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv, fc 13 | from ..interfaces import Classifier 14 | 15 | 16 | class LeNetDropout(Classifier): 17 | """Build the LeNet-like network with dropout layers""" 18 | 19 | def _inference(self, 20 | images, 21 | num_classes, 22 | is_training_, 23 | train_phase=False, 24 | l2_penalty=0.0): 25 | """Build the LeNet-like network. 26 | 27 | Args: 28 | images: Images returned from train_inputs() or inputs(). 29 | num_classes: Number of classes to predict 30 | is_training_: enable/disable training ops at run time 31 | train_phase: Boolean to enable/disable training ops at build time 32 | l2_penalty: float value, weight decay (l2) penalty 33 | 34 | Returns: 35 | Logits. 36 | """ 37 | 38 | # Initializer with seed 39 | initializer = tf.contrib.layers.variance_scaling_initializer( 40 | factor=2.0, 41 | mode='FAN_IN', 42 | uniform=False, 43 | seed=self.seed, 44 | dtype=tf.float32) 45 | 46 | with tf.variable_scope(self.__class__.__name__): 47 | with tf.variable_scope("conv1"): 48 | conv1 = conv( 49 | images, [5, 5, 1, 32], 50 | 1, 51 | 'SAME', 52 | train_phase, 53 | activation=tf.nn.relu, 54 | wd=l2_penalty, 55 | initializer=initializer) 56 | if train_phase: 57 | conv1 = tf.cond( 58 | tf.equal(is_training_, True), 59 | lambda: tf.nn.dropout(conv1, 0.7), lambda: conv1) 60 | 61 | with tf.variable_scope("pool1"): 62 | pool1 = tf.nn.max_pool( 63 | conv1, 64 | ksize=[1, 2, 2, 1], 65 | strides=[1, 2, 2, 1], 66 | padding='VALID') 67 | 68 | with tf.variable_scope("conv2"): 69 | conv2 = conv( 70 | pool1, [5, 5, 32, 64], 71 | 1, 72 | 'SAME', 73 | train_phase, 74 | activation=tf.nn.relu, 75 | wd=l2_penalty, 76 | initializer=initializer) 77 | if train_phase: 78 | conv2 = tf.cond( 79 | tf.equal(is_training_, True), 80 | lambda: tf.nn.dropout(conv2, 0.6), lambda: conv2) 81 | 82 | with tf.variable_scope("pool2"): 83 | pool2 = tf.nn.max_pool( 84 | conv2, 85 | ksize=[1, 2, 2, 1], 86 | strides=[1, 2, 2, 1], 87 | padding='VALID') 88 | pool2 = tf.reshape(pool2, [-1, 7 * 7 * 64]) 89 | 90 | with tf.variable_scope("fc1"): 91 | fc1 = fc( 92 | pool2, [7 * 7 * 64, 1024], 93 | train_phase, 94 | activation=tf.nn.relu, 95 | wd=l2_penalty, 96 | initializer=initializer) 97 | 98 | if train_phase: 99 | fc1 = tf.cond( 100 | tf.equal(is_training_, True), 101 | lambda: tf.nn.dropout(fc1, 0.5), lambda: fc1) 102 | 103 | with tf.variable_scope("softmax_linear"): 104 | logits = fc( 105 | fc1, [1024, num_classes], 106 | train_phase, 107 | initializer=initializer) 108 | return logits 109 | 110 | def loss(self, logits, labels): 111 | """Add L2Loss to all the trainable variables. 112 | Args: 113 | logits: Logits from get(). 114 | labels: Labels from train_inputs or inputs(). 1-D tensor 115 | of shape [batch_size] 116 | 117 | Returns: 118 | Loss tensor of type float. 119 | """ 120 | with tf.variable_scope('loss'): 121 | # Calculate the average cross entropy loss across the batch. 122 | labels = tf.cast(labels, tf.int64) 123 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 124 | logits=logits, labels=labels, name='cross_entropy_per_example') 125 | cross_entropy_mean = tf.reduce_mean( 126 | cross_entropy, name='cross_entropy') 127 | tf.add_to_collection(LOSSES, cross_entropy_mean) 128 | 129 | # The total loss is defined as the cross entropy loss plus all of the weight 130 | # decay terms (L2 loss). 131 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 132 | 133 | return error 134 | 135 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 136 | """ define the model with its inputs. 137 | Use this function to define the model in training and when exporting the model 138 | in the protobuf format. 139 | 140 | Args: 141 | images: model input 142 | num_classes: number of classes to predict 143 | train_phase: set it to True when defining the model, during train 144 | l2_penalty: float value, weight decay (l2) penalty 145 | 146 | Returns: 147 | is_training_: tf.bool placeholder enable/disable training ops at run time 148 | logits: the model output 149 | """ 150 | is_training_ = tf.placeholder_with_default( 151 | False, shape=(), name="is_training_") 152 | # build a graph that computes the logits predictions from the images 153 | logits = self._inference(images, num_classes, is_training_, train_phase, 154 | l2_penalty) 155 | 156 | return is_training_, logits 157 | -------------------------------------------------------------------------------- /dytb/models/predefined/SingleLayerCAE.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build a single layer CAE""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv 13 | from ..interfaces import Autoencoder 14 | 15 | 16 | class SingleLayerCAE(Autoencoder): 17 | """ Build a single layer CAE""" 18 | 19 | def _pad(self, input_x, filter_side): 20 | """ 21 | pads input_x with the right amount of zeros. 22 | Args: 23 | input_x: 4-D tensor, [batch_side, widht, height, depth] 24 | filter_side: used to dynamically determine the padding amount 25 | Returns: 26 | input_x padded 27 | """ 28 | # calculate the padding amount for each side 29 | amount = filter_side - 1 30 | # pad the input on top, bottom, left, right, with amount zeros 31 | return tf.pad(input_x, 32 | [[0, 0], [amount, amount], [amount, amount], [0, 0]]) 33 | 34 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 35 | """ define the model with its inputs. 36 | Use this function to define the model in training and when exporting the model 37 | in the protobuf format. 38 | Args: 39 | images: model input 40 | num_classes: number of classes to predict. If the model doesn't use it, 41 | just pass any value. 42 | train_phase: set it to True when defining the model, during train 43 | l2_penalty: float value, weight decay (l2) penalty 44 | Returns: 45 | is_training_: tf.bool placeholder enable/disable training ops at run time 46 | predictions: the model output 47 | """ 48 | 49 | # Initializer with seed 50 | initializer = tf.contrib.layers.variance_scaling_initializer( 51 | factor=2.0, 52 | mode='FAN_IN', 53 | uniform=False, 54 | seed=self.seed, 55 | dtype=tf.float32) 56 | 57 | filter_side = 3 58 | filters_number = 32 59 | with tf.variable_scope(self.__class__.__name__): 60 | input_x = self._pad(images, filter_side) 61 | 62 | with tf.variable_scope("encode"): 63 | # the encoding convolutions is a [3 x 3 x input_depth] x 32 convolution 64 | # the activation function chosen is the tanh 65 | # 32 is the number of feature extracted. It's completely arbitrary as is 66 | # the side of the convolutional filter and the activation function used 67 | encoding = conv( 68 | input_x, [ 69 | filter_side, filter_side, 70 | input_x.get_shape()[3].value, filters_number 71 | ], 72 | 1, 73 | 'VALID', 74 | train_phase, 75 | activation=tf.nn.tanh, 76 | wd=l2_penalty, 77 | initializer=initializer) 78 | 79 | with tf.variable_scope("decode"): 80 | # the decoding convolution is a [3 x 3 x 32] x input_depth convolution 81 | # the activation function chosen is the tanh 82 | # The dimenensions of the convolutional filter in the decoding convolution, 83 | # differently from the encoding, are constrained by the 84 | # choices made in the encoding layer 85 | # The only degree of freedom is the chose of the activation function. 86 | # We have to choose an activation function that constraints the outputs 87 | # to live in the same space of the input values. 88 | # Since the input values are between -1 and 1, we can use the tanh function 89 | # directly, or we could use the sigmoid and then scale the output 90 | output_x = conv( 91 | encoding, [ 92 | filter_side, filter_side, filters_number, 93 | input_x.get_shape()[3].value 94 | ], 95 | 1, 96 | 'VALID', 97 | train_phase, 98 | activation=tf.nn.tanh, 99 | initializer=initializer) 100 | 101 | # The is_training_ placeholder is not used, but we define and return it 102 | # in order to respect the expected output cardinality of the get method 103 | is_training_ = tf.placeholder_with_default( 104 | False, shape=(), name="is_training_") 105 | return is_training_, output_x 106 | 107 | def loss(self, predictions, real_values): 108 | """Return the loss operation between predictions and real_values. 109 | Add L2 weight decay term if any. 110 | Args: 111 | predictions: predicted values 112 | real_values: real values 113 | Returns: 114 | Loss tensor of type float. 115 | """ 116 | with tf.variable_scope('loss'): 117 | # 1/2n \sum^{n}_{i=i}{(x_i - x'_i)^2} 118 | mse = tf.divide( 119 | tf.reduce_mean( 120 | tf.square(tf.subtract(predictions, real_values))), 121 | 2., 122 | name="mse") 123 | tf.add_to_collection(LOSSES, mse) 124 | 125 | # mse + weight_decay per layer 126 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 127 | 128 | return error 129 | -------------------------------------------------------------------------------- /dytb/models/predefined/StackedCAE.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build a stacked CAE""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv 13 | from ..interfaces import Autoencoder 14 | 15 | 16 | class StackedCAE(Autoencoder): 17 | """Build a stacked CAE""" 18 | 19 | def _pad(self, input_x, filter_side): 20 | """ 21 | pads input_x with the right amount of zeros. 22 | Args: 23 | input_x: 4-D tensor, [batch_side, widht, height, depth] 24 | filter_side: used to dynamically determine the padding amount 25 | Returns: 26 | input_x padded 27 | """ 28 | # calculate the padding amount for each side 29 | amount = filter_side - 1 30 | # pad the input on top, bottom, left, right, with amount zeros 31 | return tf.pad(input_x, 32 | [[0, 0], [amount, amount], [amount, amount], [0, 0]]) 33 | 34 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 35 | """ define the model with its inputs. 36 | Use this function to define the model in training and when exporting the model 37 | in the protobuf format. 38 | Args: 39 | images: model input 40 | num_classes: number of classes to predict. If the model doesn't use it, 41 | just pass any value. 42 | train_phase: set it to True when defining the model, during train 43 | l2_penalty: float value, weight decay (l2) penalty 44 | Returns: 45 | is_training_: tf.bool placeholder enable/disable training ops at run time 46 | predictions: the model output 47 | """ 48 | 49 | # Initializer with seed 50 | initializer = tf.contrib.layers.variance_scaling_initializer( 51 | factor=2.0, 52 | mode='FAN_IN', 53 | uniform=False, 54 | seed=self.seed, 55 | dtype=tf.float32) 56 | 57 | num_layers = 9 58 | filter_side = 3 59 | filters_number = 9 60 | with tf.variable_scope(self.__class__.__name__): 61 | input_x = tf.identity(images) 62 | input_padded = self._pad(input_x, filter_side) 63 | for layer in range(num_layers): 64 | with tf.variable_scope("layer_" + str(layer)): 65 | with tf.variable_scope("encode"): 66 | encoding = conv( 67 | input_padded, [ 68 | filter_side, filter_side, 69 | input_padded.get_shape()[3].value, 70 | filters_number 71 | ], 72 | 1, 73 | 'VALID', 74 | train_phase, 75 | activation=tf.nn.tanh, 76 | wd=l2_penalty, 77 | initializer=initializer) 78 | if train_phase: 79 | encoding = tf.nn.dropout(encoding, 0.5) 80 | 81 | with tf.variable_scope("decode"): 82 | output_x = conv( 83 | encoding, [ 84 | filter_side, filter_side, filters_number, 85 | images.get_shape()[3].value 86 | ], 87 | 1, 88 | 'VALID', 89 | train_phase, 90 | activation=tf.nn.tanh, 91 | initializer=initializer) 92 | 93 | tf.add_to_collection(LOSSES, self._mse( 94 | input_x, output_x)) 95 | input_x = tf.stop_gradient(output_x) 96 | input_padded = self._pad(input_x, filter_side) 97 | 98 | # The is_training_ placeholder is not used, but we define and return it 99 | # in order to respect the expected output cardinality of the get method 100 | is_training_ = tf.placeholder_with_default( 101 | False, shape=(), name="is_training_") 102 | return is_training_, output_x 103 | 104 | def _mse(self, input_x, output_x): 105 | # 1/2n \sum^{n}_{i=i}{(x_i - x'_i)^2} 106 | return tf.divide( 107 | tf.reduce_mean(tf.square(tf.subtract(input_x, output_x))), 108 | 2., 109 | name="mse") 110 | 111 | def loss(self, predictions, real_values): 112 | """Return the loss operation between predictions and real_values. 113 | Add L2 weight decay term if any. 114 | Args: 115 | predictions: predicted values 116 | real_values: real values 117 | Returns: 118 | Loss tensor of type float. 119 | """ 120 | with tf.variable_scope('loss'): 121 | #tf.add_to_collection(LOSSES, self._mse(real_values, predictions)) 122 | # mse + weight_decay per layer 123 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 124 | 125 | return error 126 | -------------------------------------------------------------------------------- /dytb/models/predefined/StackedDenoisingCAE.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build a stacked denoising CAE""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv 13 | from ..interfaces import Autoencoder 14 | 15 | 16 | class StackedDenoisingCAE(Autoencoder): 17 | """Build a stacked denoising CAE""" 18 | 19 | def _pad(self, input_x, filter_side): 20 | """ 21 | pads input_x with the right amount of zeros. 22 | Args: 23 | input_x: 4-D tensor, [batch_side, widht, height, depth] 24 | filter_side: used to dynamically determine the padding amount 25 | Returns: 26 | input_x padded 27 | """ 28 | # calculate the padding amount for each side 29 | amount = filter_side - 1 30 | # pad the input on top, bottom, left, right, with amount zeros 31 | return tf.pad(input_x, 32 | [[0, 0], [amount, amount], [amount, amount], [0, 0]]) 33 | 34 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 35 | """ define the model with its inputs. 36 | Use this function to define the model in training and when exporting the model 37 | in the protobuf format. 38 | Args: 39 | images: model input 40 | num_classes: number of classes to predict. If the model doesn't use it, 41 | just pass any value. 42 | train_phase: set it to True when defining the model, during train 43 | l2_penalty: float value, weight decay (l2) penalty 44 | Returns: 45 | is_training_: tf.bool placeholder enable/disable training ops at run time 46 | predictions: the model output 47 | """ 48 | 49 | # Initializer with seed 50 | initializer = tf.contrib.layers.variance_scaling_initializer( 51 | factor=2.0, 52 | mode='FAN_IN', 53 | uniform=False, 54 | seed=self.seed, 55 | dtype=tf.float32) 56 | 57 | num_layers = 9 58 | filter_side = 3 59 | filters_number = 9 60 | with tf.variable_scope(self.__class__.__name__): 61 | input_x = tf.identity(images) 62 | if train_phase: 63 | input_x_noise = tf.clip_by_value(input_x + tf.random_uniform( 64 | input_x.get_shape(), 65 | minval=-0.5, 66 | maxval=0.5, 67 | dtype=input_x.dtype, 68 | seed=None), -1.0, 1.0) 69 | else: 70 | input_x_noise = input_x 71 | input_padded_noise = self._pad(input_x_noise, filter_side) 72 | 73 | for layer in range(num_layers): 74 | with tf.variable_scope("layer_" + str(layer)): 75 | with tf.variable_scope("encode"): 76 | encoding = conv( 77 | input_padded_noise, [ 78 | filter_side, filter_side, 79 | input_padded_noise.get_shape()[3].value, 80 | filters_number 81 | ], 82 | 1, 83 | 'VALID', 84 | train_phase, 85 | activation=tf.nn.relu, 86 | wd=l2_penalty, 87 | initializer=initializer) 88 | 89 | if train_phase: 90 | encoding = tf.nn.dropout(encoding, 0.5) 91 | 92 | with tf.variable_scope("decode"): 93 | output_x_noise = conv( 94 | encoding, [ 95 | filter_side, filter_side, filters_number, 96 | images.get_shape()[3].value 97 | ], 98 | 1, 99 | 'VALID', 100 | train_phase, 101 | activation=tf.nn.tanh, 102 | initializer=initializer) 103 | 104 | last = layer == num_layers - 1 105 | if train_phase and not last: 106 | output_x_noise = tf.nn.dropout(output_x_noise, 0.5) 107 | 108 | # loss between input without noise and output computed 109 | # on noisy values 110 | tf.add_to_collection(LOSSES, 111 | self._mse(output_x_noise, input_x)) 112 | input_x_noise = tf.stop_gradient(output_x_noise) 113 | input_padded_noise = self._pad(input_x_noise, 114 | filter_side) 115 | 116 | # The is_training_ placeholder is not used, but we define and return it 117 | # in order to respect the expected output cardinality of the get method 118 | is_training_ = tf.placeholder_with_default( 119 | False, shape=(), name="is_training_") 120 | return is_training_, output_x_noise 121 | 122 | def _mse(self, input_x, output_x): 123 | # 1/2n \sum^{n}_{i=i}{(x_i - x'_i)^2} 124 | return tf.divide( 125 | tf.reduce_mean(tf.square(tf.subtract(input_x, output_x))), 126 | 2., 127 | name="mse") 128 | 129 | def loss(self, predictions, real_values): 130 | """Return the loss operation between predictions and real_values. 131 | Add L2 weight decay term if any. 132 | Args: 133 | predictions: predicted values 134 | real_values: real values 135 | Returns: 136 | Loss tensor of type float. 137 | """ 138 | with tf.variable_scope('loss'): 139 | #tf.add_to_collection(LOSSES, self._mse(real_values, predictions)) 140 | # mse + weight_decay per layer 141 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 142 | 143 | return error 144 | -------------------------------------------------------------------------------- /dytb/models/predefined/VGG.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Build the VGG-like network without additional layers""" 9 | 10 | import tensorflow as tf 11 | from ..collections import LOSSES 12 | from ..layers import conv, fc 13 | from ..interfaces import Classifier 14 | 15 | 16 | class VGG(Classifier): 17 | """Build the VGG-like network without additional layers""" 18 | 19 | def _inference(self, 20 | images, 21 | num_classes, 22 | is_training_, 23 | train_phase=False, 24 | l2_penalty=0.0): 25 | """Build the VGG-like network without additional layers. 26 | 27 | Args: 28 | images: Images returned from train_inputs() or inputs(). 29 | num_classes: Number of classes to predict 30 | is_training_: enable/disable training ops at run time 31 | train_phase: Boolean to enable/disable training ops at build time 32 | l2_penalty: float value, weight decay (l2) penalty 33 | 34 | Returns: 35 | Logits. 36 | """ 37 | 38 | # Initializer with seed 39 | initializer = tf.contrib.layers.variance_scaling_initializer( 40 | factor=2.0, 41 | mode='FAN_IN', 42 | uniform=False, 43 | seed=self.seed, 44 | dtype=tf.float32) 45 | 46 | with tf.variable_scope(self.__class__.__name__): 47 | with tf.variable_scope('64'): 48 | with tf.variable_scope('conv1'): 49 | conv1 = conv( 50 | images, [3, 3, 3, 64], 51 | 1, 52 | 'SAME', 53 | train_phase, 54 | activation=tf.nn.relu, 55 | wd=l2_penalty, 56 | initializer=initializer) 57 | 58 | with tf.variable_scope('conv2'): 59 | conv2 = conv( 60 | conv1, [3, 3, 64, 64], 61 | 1, 62 | 'SAME', 63 | train_phase, 64 | activation=tf.nn.relu, 65 | wd=l2_penalty, 66 | initializer=initializer) 67 | 68 | with tf.variable_scope('pool1'): 69 | pool1 = tf.nn.max_pool( 70 | conv2, 71 | ksize=[1, 2, 2, 1], 72 | strides=[1, 2, 2, 1], 73 | padding='VALID') 74 | 75 | with tf.variable_scope('128'): 76 | with tf.variable_scope('conv3'): 77 | conv3 = conv( 78 | pool1, [3, 3, 64, 128], 79 | 1, 80 | 'SAME', 81 | train_phase, 82 | activation=tf.nn.relu, 83 | wd=l2_penalty, 84 | initializer=initializer) 85 | 86 | with tf.variable_scope('conv4'): 87 | conv4 = conv( 88 | conv3, [3, 3, 128, 128], 89 | 1, 90 | 'SAME', 91 | train_phase, 92 | activation=tf.nn.relu, 93 | wd=l2_penalty, 94 | initializer=initializer) 95 | 96 | with tf.variable_scope('pool2'): 97 | pool2 = tf.nn.max_pool( 98 | conv4, 99 | ksize=[1, 2, 2, 1], 100 | strides=[1, 2, 2, 1], 101 | padding='VALID') 102 | 103 | with tf.variable_scope('256'): 104 | with tf.variable_scope('conv5'): 105 | conv5 = conv( 106 | pool2, [3, 3, 128, 256], 107 | 1, 108 | 'SAME', 109 | train_phase, 110 | activation=tf.nn.relu, 111 | wd=l2_penalty, 112 | initializer=initializer) 113 | 114 | with tf.variable_scope('conv6'): 115 | conv6 = conv( 116 | conv5, [3, 3, 256, 256], 117 | 1, 118 | 'SAME', 119 | train_phase, 120 | activation=tf.nn.relu, 121 | wd=l2_penalty, 122 | initializer=initializer) 123 | 124 | with tf.variable_scope('conv7'): 125 | conv7 = conv( 126 | conv6, [3, 3, 256, 256], 127 | 1, 128 | 'SAME', 129 | train_phase, 130 | activation=tf.nn.relu, 131 | wd=l2_penalty, 132 | initializer=initializer) 133 | 134 | with tf.variable_scope('pool3'): 135 | pool3 = tf.nn.max_pool( 136 | conv7, 137 | ksize=[1, 2, 2, 1], 138 | strides=[1, 2, 2, 1], 139 | padding='VALID') 140 | 141 | with tf.variable_scope('512'): 142 | with tf.variable_scope('conv8'): 143 | conv8 = conv( 144 | pool3, [3, 3, 256, 512], 145 | 1, 146 | 'SAME', 147 | train_phase, 148 | activation=tf.nn.relu, 149 | wd=l2_penalty, 150 | initializer=initializer) 151 | 152 | with tf.variable_scope('conv9'): 153 | conv9 = conv( 154 | conv8, [3, 3, 512, 512], 155 | 1, 156 | 'SAME', 157 | train_phase, 158 | activation=tf.nn.relu, 159 | wd=l2_penalty, 160 | initializer=initializer) 161 | 162 | with tf.variable_scope('conv10'): 163 | conv10 = conv( 164 | conv9, [3, 3, 512, 512], 165 | 1, 166 | 'SAME', 167 | train_phase, 168 | activation=tf.nn.relu, 169 | wd=l2_penalty, 170 | initializer=initializer) 171 | 172 | with tf.variable_scope('pool4'): 173 | pool4 = tf.nn.max_pool( 174 | conv10, 175 | ksize=[1, 2, 2, 1], 176 | strides=[1, 2, 2, 1], 177 | padding='VALID') 178 | 179 | with tf.variable_scope('512b2'): 180 | with tf.variable_scope('conv11'): 181 | conv11 = conv( 182 | pool4, [3, 3, 512, 512], 183 | 1, 184 | 'SAME', 185 | train_phase, 186 | activation=tf.nn.relu, 187 | wd=l2_penalty, 188 | initializer=initializer) 189 | 190 | with tf.variable_scope('conv12'): 191 | conv12 = conv( 192 | conv11, [3, 3, 512, 512], 193 | 1, 194 | 'SAME', 195 | train_phase, 196 | activation=tf.nn.relu, 197 | wd=l2_penalty, 198 | initializer=initializer) 199 | 200 | with tf.variable_scope('conv13'): 201 | conv13 = conv( 202 | conv12, [3, 3, 512, 512], 203 | 1, 204 | 'SAME', 205 | train_phase, 206 | activation=tf.nn.relu, 207 | wd=l2_penalty, 208 | initializer=initializer) 209 | 210 | with tf.variable_scope('pool5'): 211 | pool5 = tf.nn.max_pool( 212 | conv13, 213 | ksize=[1, 2, 2, 1], 214 | strides=[1, 2, 2, 1], 215 | padding='VALID') 216 | pool5 = tf.reshape(pool5, [-1, 512]) 217 | 218 | with tf.variable_scope('fc'): 219 | fc1 = fc( 220 | pool5, [512, 512], 221 | train_phase, 222 | activation=tf.nn.relu, 223 | wd=l2_penalty, 224 | initializer=initializer) 225 | 226 | with tf.variable_scope('softmax_linear'): 227 | logits = fc( 228 | fc1, [512, num_classes], 229 | train_phase, 230 | initializer=initializer) 231 | return logits 232 | 233 | def loss(self, logits, labels): 234 | """Add L2Loss to all the trainable variables. 235 | Args: 236 | logits: Logits from get(). 237 | labels: Labels from train_inputs or inputs(). 1-D tensor 238 | of shape [batch_size] 239 | 240 | Returns: 241 | Loss tensor of type float. 242 | """ 243 | with tf.variable_scope('loss'): 244 | # Calculate the average cross entropy loss across the batch. 245 | labels = tf.cast(labels, tf.int64) 246 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 247 | logits=logits, labels=labels, name='cross_entropy_per_example') 248 | cross_entropy_mean = tf.reduce_mean( 249 | cross_entropy, name='cross_entropy') 250 | tf.add_to_collection(LOSSES, cross_entropy_mean) 251 | 252 | # The total loss is defined as the cross entropy loss plus all of the weight 253 | # decay terms (L2 loss). 254 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 255 | 256 | return error 257 | 258 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 259 | """ define the model with its inputs. 260 | Use this function to define the model in training and when exporting the model 261 | in the protobuf format. 262 | 263 | Args: 264 | images: model input 265 | num_classes: number of classes to predict 266 | train_phase: set it to True when defining the model, during train 267 | l2_penalty: float value, weight decay (l2) penalty 268 | 269 | Returns: 270 | is_training_: tf.bool placeholder enable/disable training ops at run time 271 | logits: the model output 272 | """ 273 | is_training_ = tf.placeholder_with_default( 274 | False, shape=(), name="is_training_") 275 | # build a graph that computes the logits predictions from the images 276 | logits = self._inference(images, num_classes, is_training_, train_phase, 277 | l2_penalty) 278 | 279 | return is_training_, logits 280 | -------------------------------------------------------------------------------- /dytb/models/predefined/VGGDirectDropout.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Builds the VGG-like network with direct dropout layers 9 | applyed after avery layer of neurons""" 10 | 11 | import tensorflow as tf 12 | from ..collections import LOSSES 13 | from ..layers import conv, fc, direct_dropout 14 | from ..interfaces import Classifier 15 | 16 | 17 | class VGGDirectDropout(Classifier): 18 | """Builds the VGG-like network with direct dropout layers 19 | applyed after avery layer of neurons""" 20 | 21 | def _inference(self, 22 | images, 23 | num_classes, 24 | is_training_, 25 | train_phase=False, 26 | l2_penalty=0.0): 27 | """Builds the VGG-like network with direct dropout layers 28 | applyed after avery layer of neurons. 29 | 30 | Args: 31 | images: Images returned from train_inputs() or inputs(). 32 | num_classes: Number of classes to predict 33 | is_training_: enable/disable training ops at run time 34 | train_phase: Boolean to enable/disable training ops at build time 35 | l2_penalty: float value, weight decay (l2) penalty 36 | 37 | Returns: 38 | Logits. 39 | """ 40 | 41 | # Initializer with seed 42 | initializer = tf.contrib.layers.variance_scaling_initializer( 43 | factor=2.0, 44 | mode='FAN_IN', 45 | uniform=False, 46 | seed=self.seed, 47 | dtype=tf.float32) 48 | 49 | def direct_drop(layer, prob): 50 | """ Build a condition node if we are in train_phase. thus we can use the 51 | is_training_ placeholder to switch. 52 | Build a prob*layer node when we're not in train_phase. 53 | Returns the correct node""" 54 | 55 | if train_phase: 56 | layer = tf.cond( 57 | tf.equal(is_training_, True), 58 | lambda: direct_dropout(layer, prob), lambda: prob * layer) 59 | else: 60 | layer = prob * layer 61 | return layer 62 | 63 | with tf.variable_scope(self.__class__.__name__): 64 | with tf.variable_scope('64'): 65 | with tf.variable_scope('conv1'): 66 | conv1 = conv( 67 | images, [3, 3, 3, 64], 68 | 1, 69 | 'SAME', 70 | train_phase, 71 | activation=tf.nn.relu, 72 | wd=l2_penalty, 73 | initializer=initializer) 74 | direct_drop(conv1, 0.7) 75 | 76 | with tf.variable_scope('conv2'): 77 | conv2 = conv( 78 | conv1, [3, 3, 64, 64], 79 | 1, 80 | 'SAME', 81 | train_phase, 82 | activation=tf.nn.relu, 83 | wd=l2_penalty, 84 | initializer=initializer) 85 | direct_drop(conv2, 0.6) 86 | 87 | with tf.variable_scope('pool1'): 88 | pool1 = tf.nn.max_pool( 89 | conv2, 90 | ksize=[1, 2, 2, 1], 91 | strides=[1, 2, 2, 1], 92 | padding='VALID') 93 | 94 | with tf.variable_scope('128'): 95 | with tf.variable_scope('conv3'): 96 | conv3 = conv( 97 | pool1, [3, 3, 64, 128], 98 | 1, 99 | 'SAME', 100 | train_phase, 101 | activation=tf.nn.relu, 102 | wd=l2_penalty, 103 | initializer=initializer) 104 | 105 | conv3 = direct_drop(conv3, 0.6) 106 | 107 | with tf.variable_scope('conv4'): 108 | conv4 = conv( 109 | conv3, [3, 3, 128, 128], 110 | 1, 111 | 'SAME', 112 | train_phase, 113 | activation=tf.nn.relu, 114 | wd=l2_penalty, 115 | initializer=initializer) 116 | 117 | conv4 = direct_drop(conv4, 0.6) 118 | 119 | with tf.variable_scope('pool2'): 120 | pool2 = tf.nn.max_pool( 121 | conv4, 122 | ksize=[1, 2, 2, 1], 123 | strides=[1, 2, 2, 1], 124 | padding='VALID') 125 | 126 | with tf.variable_scope('256'): 127 | with tf.variable_scope('conv5'): 128 | conv5 = conv( 129 | pool2, [3, 3, 128, 256], 130 | 1, 131 | 'SAME', 132 | train_phase, 133 | activation=tf.nn.relu, 134 | wd=l2_penalty, 135 | initializer=initializer) 136 | 137 | conv5 = direct_drop(conv5, 0.6) 138 | 139 | with tf.variable_scope('conv6'): 140 | conv6 = conv( 141 | conv5, [3, 3, 256, 256], 142 | 1, 143 | 'SAME', 144 | train_phase, 145 | activation=tf.nn.relu, 146 | wd=l2_penalty, 147 | initializer=initializer) 148 | 149 | conv6 = direct_drop(conv6, 0.6) 150 | 151 | with tf.variable_scope('conv7'): 152 | conv7 = conv( 153 | conv6, [3, 3, 256, 256], 154 | 1, 155 | 'SAME', 156 | train_phase, 157 | activation=tf.nn.relu, 158 | wd=l2_penalty, 159 | initializer=initializer) 160 | 161 | conv7 = direct_drop(conv7, 0.6) 162 | 163 | with tf.variable_scope('pool3'): 164 | pool3 = tf.nn.max_pool( 165 | conv7, 166 | ksize=[1, 2, 2, 1], 167 | strides=[1, 2, 2, 1], 168 | padding='VALID') 169 | 170 | with tf.variable_scope('512'): 171 | with tf.variable_scope('conv8'): 172 | conv8 = conv( 173 | pool3, [3, 3, 256, 512], 174 | 1, 175 | 'SAME', 176 | train_phase, 177 | activation=tf.nn.relu, 178 | wd=l2_penalty, 179 | initializer=initializer) 180 | 181 | conv8 = direct_drop(conv8, 0.6) 182 | 183 | with tf.variable_scope('conv9'): 184 | conv9 = conv( 185 | conv8, [3, 3, 512, 512], 186 | 1, 187 | 'SAME', 188 | train_phase, 189 | activation=tf.nn.relu, 190 | wd=l2_penalty, 191 | initializer=initializer) 192 | 193 | conv9 = direct_drop(conv9, 0.6) 194 | 195 | with tf.variable_scope('conv10'): 196 | conv10 = conv( 197 | conv9, [3, 3, 512, 512], 198 | 1, 199 | 'SAME', 200 | train_phase, 201 | activation=tf.nn.relu, 202 | wd=l2_penalty, 203 | initializer=initializer) 204 | 205 | conv10 = direct_drop(conv10, 0.6) 206 | 207 | with tf.variable_scope('pool4'): 208 | pool4 = tf.nn.max_pool( 209 | conv10, 210 | ksize=[1, 2, 2, 1], 211 | strides=[1, 2, 2, 1], 212 | padding='VALID') 213 | 214 | with tf.variable_scope('512b2'): 215 | with tf.variable_scope('conv11'): 216 | conv11 = conv( 217 | pool4, [3, 3, 512, 512], 218 | 1, 219 | 'SAME', 220 | train_phase, 221 | activation=tf.nn.relu, 222 | wd=l2_penalty, 223 | initializer=initializer) 224 | 225 | conv11 = direct_drop(conv11, 0.6) 226 | 227 | with tf.variable_scope('conv12'): 228 | conv12 = conv( 229 | conv11, [3, 3, 512, 512], 230 | 1, 231 | 'SAME', 232 | train_phase, 233 | activation=tf.nn.relu, 234 | wd=l2_penalty, 235 | initializer=initializer) 236 | 237 | conv12 = direct_drop(conv12, 0.6) 238 | 239 | with tf.variable_scope('conv13'): 240 | conv13 = conv( 241 | conv12, [3, 3, 512, 512], 242 | 1, 243 | 'SAME', 244 | train_phase, 245 | activation=tf.nn.relu, 246 | wd=l2_penalty, 247 | initializer=initializer) 248 | 249 | conv13 = direct_drop(conv13, 0.6) 250 | 251 | with tf.variable_scope('pool5'): 252 | pool5 = tf.nn.max_pool( 253 | conv13, 254 | ksize=[1, 2, 2, 1], 255 | strides=[1, 2, 2, 1], 256 | padding='VALID') 257 | 258 | pool5 = tf.reshape(pool5, [-1, 512]) 259 | 260 | with tf.variable_scope('fc'): 261 | fc1 = fc( 262 | pool5, [512, 512], 263 | train_phase, 264 | activation=tf.nn.relu, 265 | wd=l2_penalty, 266 | initializer=initializer) 267 | fc1 = direct_drop(fc1, 0.5) 268 | 269 | with tf.variable_scope('softmax_linear'): 270 | logits = fc( 271 | fc1, [512, num_classes], 272 | train_phase, 273 | initializer=initializer) 274 | return logits 275 | 276 | def loss(self, logits, labels): 277 | """Add L2Loss to all the trainable variables. 278 | Args: 279 | logits: Logits from get(). 280 | labels: Labels from train_inputs or inputs(). 1-D tensor 281 | of shape [batch_size] 282 | 283 | Returns: 284 | Loss tensor of type float. 285 | """ 286 | with tf.variable_scope('loss'): 287 | # Calculate the average cross entropy loss across the batch. 288 | labels = tf.cast(labels, tf.int64) 289 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 290 | logits=logits, labels=labels, name='cross_entropy_per_example') 291 | cross_entropy_mean = tf.reduce_mean( 292 | cross_entropy, name='cross_entropy') 293 | tf.add_to_collection(LOSSES, cross_entropy_mean) 294 | 295 | # The total loss is defined as the cross entropy loss plus all of the weight 296 | # decay terms (L2 loss). 297 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 298 | 299 | return error 300 | 301 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 302 | """ define the model with its inputs. 303 | Use this function to define the model in training and when exporting the model 304 | in the protobuf format. 305 | 306 | Args: 307 | images: model input 308 | num_classes: number of classes to predict 309 | train_phase: set it to True when defining the model, during train 310 | l2_penalty: float value, weight decay (l2) penalty 311 | 312 | Returns: 313 | is_training_: tf.bool placeholder to enable/disable training ops at run time 314 | logits: the model output 315 | """ 316 | is_training_ = tf.placeholder_with_default( 317 | False, shape=(), name="is_training_") 318 | # build a graph that computes the logits predictions from the images 319 | logits = self._inference(images, num_classes, is_training_, train_phase, 320 | l2_penalty) 321 | 322 | return is_training_, logits 323 | -------------------------------------------------------------------------------- /dytb/models/predefined/VGGDropout.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Builds the VGG-like network with dropout layers 9 | applyed after avery layer of neurons""" 10 | 11 | import tensorflow as tf 12 | from ..collections import LOSSES 13 | from ..layers import conv, fc 14 | from ..interfaces import Classifier 15 | 16 | 17 | class VGGDropout(Classifier): 18 | """Builds the VGG-like network with dropout layers applyed 19 | after avery layer of neurons""" 20 | 21 | def _inference(self, 22 | images, 23 | num_classes, 24 | is_training_, 25 | train_phase=False, 26 | l2_penalty=0.0): 27 | """Builds the VGG-like network with inverted dropout layers 28 | applyed after avery layer of neurons. 29 | 30 | Args: 31 | images: Images returned from train_inputs() or inputs(). 32 | num_classes: Number of classes to predict 33 | is_training_: enable/disable training ops at run time 34 | train_phase: Boolean to enable/disable training ops at build time 35 | l2_penalty: float value, weight decay (l2) penalty 36 | 37 | Returns: 38 | Logits. 39 | """ 40 | 41 | # Initializer with seed 42 | initializer = tf.contrib.layers.variance_scaling_initializer( 43 | factor=2.0, 44 | mode='FAN_IN', 45 | uniform=False, 46 | seed=self.seed, 47 | dtype=tf.float32) 48 | 49 | with tf.variable_scope(self.__class__.__name__): 50 | with tf.variable_scope('64'): 51 | with tf.variable_scope('conv1'): 52 | conv1 = conv( 53 | images, [3, 3, 3, 64], 54 | 1, 55 | 'SAME', 56 | train_phase, 57 | activation=tf.nn.relu, 58 | wd=l2_penalty, 59 | initializer=initializer) 60 | if train_phase: 61 | conv1 = tf.nn.dropout(conv1, 0.7) 62 | 63 | with tf.variable_scope('conv2'): 64 | conv2 = conv( 65 | conv1, [3, 3, 64, 64], 66 | 1, 67 | 'SAME', 68 | train_phase, 69 | activation=tf.nn.relu, 70 | wd=l2_penalty, 71 | initializer=initializer) 72 | 73 | if train_phase: 74 | conv2 = tf.nn.dropout(conv2, 0.6) 75 | 76 | with tf.variable_scope('pool1'): 77 | pool1 = tf.nn.max_pool( 78 | conv2, 79 | ksize=[1, 2, 2, 1], 80 | strides=[1, 2, 2, 1], 81 | padding='VALID') 82 | 83 | with tf.variable_scope('128'): 84 | with tf.variable_scope('conv3'): 85 | conv3 = conv( 86 | pool1, [3, 3, 64, 128], 87 | 1, 88 | 'SAME', 89 | train_phase, 90 | activation=tf.nn.relu, 91 | wd=l2_penalty, 92 | initializer=initializer) 93 | 94 | if train_phase: 95 | conv3 = tf.nn.dropout(conv3, 0.6) 96 | 97 | with tf.variable_scope('conv4'): 98 | conv4 = conv( 99 | conv3, [3, 3, 128, 128], 100 | 1, 101 | 'SAME', 102 | train_phase, 103 | activation=tf.nn.relu, 104 | wd=l2_penalty, 105 | initializer=initializer) 106 | 107 | if train_phase: 108 | conv4 = tf.nn.dropout(conv4, 0.6) 109 | 110 | with tf.variable_scope('pool2'): 111 | pool2 = tf.nn.max_pool( 112 | conv4, 113 | ksize=[1, 2, 2, 1], 114 | strides=[1, 2, 2, 1], 115 | padding='VALID') 116 | 117 | with tf.variable_scope('256'): 118 | with tf.variable_scope('conv5'): 119 | conv5 = conv( 120 | pool2, [3, 3, 128, 256], 121 | 1, 122 | 'SAME', 123 | train_phase, 124 | activation=tf.nn.relu, 125 | wd=l2_penalty, 126 | initializer=initializer) 127 | 128 | if train_phase: 129 | conv5 = tf.nn.dropout(conv5, 0.6) 130 | 131 | with tf.variable_scope('conv6'): 132 | conv6 = conv( 133 | conv5, [3, 3, 256, 256], 134 | 1, 135 | 'SAME', 136 | train_phase, 137 | activation=tf.nn.relu, 138 | wd=l2_penalty, 139 | initializer=initializer) 140 | 141 | if train_phase: 142 | conv6 = tf.nn.dropout(conv6, 0.6) 143 | 144 | with tf.variable_scope('conv7'): 145 | conv7 = conv( 146 | conv6, [3, 3, 256, 256], 147 | 1, 148 | 'SAME', 149 | train_phase, 150 | activation=tf.nn.relu, 151 | wd=l2_penalty, 152 | initializer=initializer) 153 | 154 | if train_phase: 155 | conv7 = tf.nn.dropout(conv7, 0.6) 156 | 157 | with tf.variable_scope('pool3'): 158 | pool3 = tf.nn.max_pool( 159 | conv7, 160 | ksize=[1, 2, 2, 1], 161 | strides=[1, 2, 2, 1], 162 | padding='VALID') 163 | 164 | with tf.variable_scope('512'): 165 | with tf.variable_scope('conv8'): 166 | conv8 = conv( 167 | pool3, [3, 3, 256, 512], 168 | 1, 169 | 'SAME', 170 | train_phase, 171 | activation=tf.nn.relu, 172 | wd=l2_penalty, 173 | initializer=initializer) 174 | 175 | if train_phase: 176 | conv8 = tf.nn.dropout(conv8, 0.6) 177 | 178 | with tf.variable_scope('conv9'): 179 | conv9 = conv( 180 | conv8, [3, 3, 512, 512], 181 | 1, 182 | 'SAME', 183 | train_phase, 184 | activation=tf.nn.relu, 185 | wd=l2_penalty, 186 | initializer=initializer) 187 | 188 | if train_phase: 189 | conv9 = tf.nn.dropout(conv9, 0.6) 190 | 191 | with tf.variable_scope('conv10'): 192 | conv10 = conv( 193 | conv9, [3, 3, 512, 512], 194 | 1, 195 | 'SAME', 196 | train_phase, 197 | activation=tf.nn.relu, 198 | wd=l2_penalty, 199 | initializer=initializer) 200 | 201 | if train_phase: 202 | conv10 = tf.nn.dropout(conv10, 0.6) 203 | 204 | with tf.variable_scope('pool4'): 205 | pool4 = tf.nn.max_pool( 206 | conv10, 207 | ksize=[1, 2, 2, 1], 208 | strides=[1, 2, 2, 1], 209 | padding='VALID') 210 | 211 | with tf.variable_scope('512b2'): 212 | with tf.variable_scope('conv11'): 213 | conv11 = conv( 214 | pool4, [3, 3, 512, 512], 215 | 1, 216 | 'SAME', 217 | train_phase, 218 | activation=tf.nn.relu, 219 | wd=l2_penalty, 220 | initializer=initializer) 221 | 222 | if train_phase: 223 | conv11 = tf.nn.dropout(conv11, 0.6) 224 | 225 | with tf.variable_scope('conv12'): 226 | conv12 = conv( 227 | conv11, [3, 3, 512, 512], 228 | 1, 229 | 'SAME', 230 | train_phase, 231 | activation=tf.nn.relu, 232 | wd=l2_penalty, 233 | initializer=initializer) 234 | 235 | if train_phase: 236 | conv12 = tf.nn.dropout(conv12, 0.6) 237 | 238 | with tf.variable_scope('conv13'): 239 | conv13 = conv( 240 | conv12, [3, 3, 512, 512], 241 | 1, 242 | 'SAME', 243 | train_phase, 244 | activation=tf.nn.relu, 245 | wd=l2_penalty, 246 | initializer=initializer) 247 | 248 | if train_phase: 249 | conv13 = tf.nn.dropout(conv13, 0.6) 250 | 251 | with tf.variable_scope('pool5'): 252 | pool5 = tf.nn.max_pool( 253 | conv13, 254 | ksize=[1, 2, 2, 1], 255 | strides=[1, 2, 2, 1], 256 | padding='VALID') 257 | 258 | pool5 = tf.reshape(pool5, [-1, 512]) 259 | 260 | with tf.variable_scope('fc'): 261 | fc1 = fc( 262 | pool5, [512, 512], 263 | train_phase, 264 | activation=tf.nn.relu, 265 | wd=l2_penalty, 266 | initializer=initializer) 267 | 268 | if train_phase: 269 | fc1 = tf.nn.dropout(fc1, 0.5) 270 | 271 | with tf.variable_scope('softmax_linear'): 272 | logits = fc( 273 | fc1, [512, num_classes], 274 | train_phase, 275 | initializer=initializer) 276 | return logits 277 | 278 | def loss(self, logits, labels): 279 | """Add L2Loss to all the trainable variables. 280 | Args: 281 | logits: Logits from get(). 282 | labels: Labels from train_inputs or inputs(). 1-D tensor 283 | of shape [batch_size] 284 | 285 | Returns: 286 | Loss tensor of type float. 287 | """ 288 | with tf.variable_scope('loss'): 289 | # Calculate the average cross entropy loss across the batch. 290 | labels = tf.cast(labels, tf.int64) 291 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 292 | logits=logits, labels=labels, name='cross_entropy_per_example') 293 | cross_entropy_mean = tf.reduce_mean( 294 | cross_entropy, name='cross_entropy') 295 | tf.add_to_collection(LOSSES, cross_entropy_mean) 296 | 297 | # The total loss is defined as the cross entropy loss plus all of the weight 298 | # decay terms (L2 loss). 299 | error = tf.add_n(tf.get_collection(LOSSES), name='total_loss') 300 | 301 | return error 302 | 303 | def get(self, images, num_classes, train_phase=False, l2_penalty=0.0): 304 | """ define the model with its inputs. 305 | Use this function to define the model in training and when exporting the model 306 | in the protobuf format. 307 | 308 | Args: 309 | images: model input 310 | num_classes: number of classes to predict 311 | train_phase: set it to True when defining the model, during train 312 | l2_penalty: float value, weight decay (l2) penalty 313 | 314 | Returns: 315 | is_training_: enable/disable training ops at run time 316 | logits: the model output 317 | """ 318 | is_training_ = tf.placeholder_with_default( 319 | False, shape=(), name="is_training_") 320 | # build a graph that computes the logits predictions from the images 321 | logits = self._inference(images, num_classes, is_training_, train_phase, 322 | l2_penalty) 323 | 324 | return is_training_, logits 325 | -------------------------------------------------------------------------------- /dytb/models/predefined/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/dytb/models/predefined/__init__.py -------------------------------------------------------------------------------- /dytb/models/utils.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utils for models creation""" 9 | 10 | import re 11 | import tensorflow as tf 12 | from .collections import SCALAR_SUMMARIES, REQUIRED_NON_TRAINABLES 13 | 14 | 15 | def legalize_name(name): 16 | """Made name a legal name to be used in tensorflow summaries 17 | Args: 18 | name: string 19 | Returns: 20 | name_legal 21 | """ 22 | return re.sub(r"[^\w|/]", "_", name) 23 | 24 | 25 | def tf_log(summary, collection=SCALAR_SUMMARIES): 26 | """Add tf.summary object to collection named collection""" 27 | tf.add_to_collection(collection, summary) 28 | 29 | 30 | def training_process_variables(): 31 | """global variables - trainable variables: 32 | it contains the variable defined by the optimizer and the others 33 | defined durint the training process. 34 | Those variables are useful when restoring a training process. 35 | Those variables are not trainable. can be saved and restored. 36 | """ 37 | 38 | return [ 39 | variable for variable in tf.global_variables() 40 | if variable.name not in (var.name for var in tf.trainable_variables()) 41 | ] 42 | 43 | 44 | def variables_to_save(add_list=None): 45 | """Returns a list of variables to save. 46 | add_list variables are always added to the list 47 | Args: 48 | add_list: a list of variables 49 | Returns: 50 | list: list of tensors to save 51 | """ 52 | if add_list is None: 53 | add_list = [] 54 | return tf.trainable_variables() + tf.get_collection_ref( 55 | REQUIRED_NON_TRAINABLES) + add_list + training_process_variables() 56 | 57 | 58 | def variables_to_restore(add_list=None, exclude_scope_list=None): 59 | """Returns a list of variables to restore to made the model working 60 | properly. 61 | The list is made by the trainable variables + required non trainable variables 62 | such as statistics of batch norm layers. 63 | Remove from the list variables that are in the exclude_scope_list. 64 | Add variables in the add_list 65 | 66 | Args: 67 | add_list: a list of variables 68 | exclude_scope_list: a list of scopes to exclude 69 | Returns: 70 | list: list of tensors to restore 71 | """ 72 | 73 | if add_list is None: 74 | add_list = [] 75 | if exclude_scope_list is None: 76 | exclude_scope_list = [] 77 | 78 | variables = variables_to_save() 79 | if exclude_scope_list: 80 | variables[:] = [ 81 | variable for variable in variables if not variable.name.startswith( 82 | tuple(scope for scope in exclude_scope_list)) 83 | ] 84 | return variables + add_list 85 | 86 | 87 | def variables_to_train(scope_list=None): 88 | """Returns a list of variables to train, filtered by the scopes. 89 | Args: 90 | scope_list: a list of scope to train 91 | Returns: 92 | the list of variables to train by the optimizer 93 | """ 94 | if scope_list is None: 95 | return tf.trainable_variables() 96 | vars_to_train = [] 97 | for scope in scope_list: 98 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 99 | vars_to_train.extend(variables) 100 | return vars_to_train 101 | 102 | 103 | def num_neurons_and_shape(layer): 104 | """Count the number of neurons in a single element of the layer, returns this 105 | number and the shape of the single layer. 106 | Args: 107 | layer: [batch_size, widht, height, depth] if the layer is convolutional 108 | [batch_size, num_neruons] if the layer is fully connected 109 | Returns: 110 | num_neurons, shape 111 | Where num_neurons is the number of neurons in a single elment of the input batch, 112 | shape is the shape of the single element""" 113 | # extract the number of neurons in x 114 | # and the number of neurons kept on 115 | input_shape = layer.get_shape() 116 | if len(input_shape) == 4: # conv layer 117 | num_neurons = input_shape[1].value * input_shape[2].value * input_shape[ 118 | 3].value 119 | shape = [ 120 | -1, input_shape[1].value, input_shape[2].value, input_shape[3].value 121 | ] 122 | else: #fc layer 123 | num_neurons = input_shape[1].value 124 | shape = [-1, input_shape[1].value] 125 | 126 | return num_neurons, shape 127 | 128 | 129 | def active_neurons(layer, off_value=0): 130 | """Count the number of active (> off_value) neurons in a single element of the layer. 131 | Args: 132 | layer: [batch_size, widht, height, depth] if the layer is convolutional 133 | [batch_size, num_neruons] if the layer is fully connected 134 | Returns: 135 | kept_on: [batch_size, 1] tf.int32, number of active neurons 136 | """ 137 | binary_tensor = tf.cast(tf.greater(layer, off_value), tf.int32) 138 | return tf.reduce_sum(binary_tensor, [1, 2, 3] 139 | if len(layer.get_shape()) == 4 else [1]) 140 | 141 | 142 | def count_trainable_parameters(print_model=False): 143 | """Count the number of trainable parameters is the current graph. 144 | Returns: 145 | count: the number of trainable parameters""" 146 | total_parameters = 0 147 | for variable in tf.trainable_variables(): 148 | # shape is an array of tf.Dimension 149 | shape = variable.get_shape() 150 | if print_model: 151 | print(variable) 152 | variable_parametes = 1 153 | for dim in shape: 154 | variable_parametes *= dim.value 155 | total_parameters += variable_parametes 156 | return total_parameters 157 | -------------------------------------------------------------------------------- /dytb/models/visualization.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """utility methods to create visualizations in tensorboard""" 9 | 10 | import math 11 | import tensorflow as tf 12 | from .utils import tf_log 13 | from .collections import MEDIA_SUMMARIES 14 | 15 | 16 | # Adapeted from 17 | # https://gist.github.com/kukuruza/03731dc494603ceab0c5#gistcomment-1879326 18 | def on_grid(kernel, grid_side, pad=1): 19 | """Visualize conv. features as an image (mostly for the 1st layer). 20 | Place kernel into a grid, with some paddings between adjacent filters. 21 | 22 | Args: 23 | kernel: tensor of shape [Y, X, NumChannels, NumKernels] 24 | grid_side: side of the grid. Require: NumKernels == grid_side**2 25 | pad: number of black pixels around each filter (between them) 26 | 27 | Returns: 28 | An image Tensor with shape [(Y+2*pad)*grid_side, (X+2*pad)*grid_side, NumChannels, 1]. 29 | """ 30 | 31 | x_min = tf.reduce_min(kernel) 32 | x_max = tf.reduce_max(kernel) 33 | 34 | kernel1 = (kernel - x_min) / (x_max - x_min) 35 | 36 | # pad X and Y 37 | x1 = tf.pad( 38 | kernel1, 39 | tf.constant([[pad, pad], [pad, pad], [0, 0], [0, 0]]), 40 | mode='CONSTANT') 41 | 42 | # X and Y dimensions, w.r.t. padding 43 | Y = kernel1.get_shape()[0] + 2 * pad 44 | X = kernel1.get_shape()[1] + 2 * pad 45 | 46 | channels = kernel1.get_shape()[2] 47 | 48 | # put NumKernels to the 1st dimension 49 | x2 = tf.transpose(x1, (3, 0, 1, 2)) 50 | # organize grid on Y axis 51 | x3 = tf.reshape(x2, 52 | tf.stack( 53 | values=[grid_side, Y * grid_side, X, channels], 54 | axis=0)) #3 55 | 56 | # switch X and Y axes 57 | x4 = tf.transpose(x3, (0, 2, 1, 3)) 58 | # organize grid on X axis 59 | x5 = tf.reshape(x4, 60 | tf.stack( 61 | values=[1, X * grid_side, Y * grid_side, channels], 62 | axis=0)) #3 63 | 64 | # back to normal order (not combining with the next step for clarity) 65 | x6 = tf.transpose(x5, (2, 1, 3, 0)) 66 | 67 | # to tf.image_summary order [batch_size, height, width, channels], 68 | # where in this case batch_size == 1 69 | x7 = tf.transpose(x6, (3, 0, 1, 2)) 70 | 71 | # scale to [0, 255] and convert to uint8 72 | return tf.image.convert_image_dtype(x7, dtype=tf.uint8) 73 | 74 | 75 | def log_images(name, inputs, outputs=None): 76 | """Log inputs and outputs batch of images. Display images in grids 77 | Args: 78 | name: name of the summary 79 | inputs: tensor with shape [batch_size, height, widht, depth] 80 | outputs: if present must have the same dimensions as inputs 81 | """ 82 | 83 | with tf.variable_scope('visualization'): 84 | batch_size = inputs.get_shape()[0].value 85 | grid_side = math.floor(math.sqrt(batch_size)) 86 | inputs = on_grid( 87 | tf.transpose(inputs, perm=(1, 2, 3, 0))[:, :, :, 0:grid_side**2], 88 | grid_side) 89 | 90 | if outputs is None: 91 | tf_log( 92 | tf.summary.image(name, inputs, max_outputs=1), 93 | collection=MEDIA_SUMMARIES) 94 | return 95 | 96 | inputs = tf.pad(inputs, [[0, 0], [0, 0], [0, 10], [0, 0]]) 97 | outputs = on_grid( 98 | tf.transpose(outputs, perm=(1, 2, 3, 0))[:, :, :, 0:grid_side**2], 99 | grid_side) 100 | tf_log( 101 | tf.summary.image( 102 | name, tf.concat([inputs, outputs], axis=2), max_outputs=1), 103 | collection=MEDIA_SUMMARIES) 104 | -------------------------------------------------------------------------------- /dytb/train.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Train method and utilities""" 9 | 10 | import os 11 | import tensorflow as tf 12 | from .inputs.interfaces import InputType 13 | from .trainer.Trainer import Trainer 14 | 15 | 16 | def _build_name(args, dataset): 17 | """Build method name parsing args. 18 | Args: 19 | args: the training parameter 20 | dataset: the dataset object 21 | Returns: 22 | name: the ID for the current training process""" 23 | optimizer = args["gd"]["optimizer"](**args["gd"]["args"]) 24 | name = "{}_{}_".format(dataset.name, optimizer.get_name()) 25 | 26 | if args["lr_decay"]["enabled"]: 27 | name += "lr_decay_" 28 | if args["regularizations"]["l2"]: 29 | name += "l2={}_".format(args["regularizations"]["l2"]) 30 | if args["regularizations"]["augmentation"]["name"].lower() != "identity": 31 | name += "{}_".format( 32 | args["regularizations"]["augmentation"]["name"].lower()) 33 | if args["comment"] != "": 34 | name += "{}_".format(args["comment"]) 35 | 36 | return name.rstrip("_") 37 | 38 | 39 | def _parse_hyperparameters(hyperparams=None): 40 | """Check if every parameter passed in hyperparams 41 | is a valid hyperparameter. 42 | Returns: 43 | hyperparams: the same dictionary with default values added if optionals 44 | Raises: 45 | ValueError if hyperparams is not valid 46 | """ 47 | 48 | if hyperparams is None: 49 | hyperparams = {} 50 | 51 | hp_available_keys = { 52 | "batch_size", "epochs", "gd", "lr_decay", "regularizations", "seed" 53 | } 54 | 55 | difference = hyperparams.keys() - hp_available_keys 56 | if difference: 57 | raise ValueError( 58 | "{} are not valid keys for {}. Valid keys are: {}".format( 59 | difference, "hyperparameters", hp_available_keys)) 60 | 61 | # Instantiate with default values if not specified 62 | args = { 63 | # The size of the trainign batch 64 | "batch_size": 65 | hyperparams.get("batch_size", 128), 66 | # The number of epochs to train 67 | # where an epoch is the training set cardinality * the augmentation factor 68 | "epochs": 69 | hyperparams.get("epochs", 150), 70 | # Gradient descent parameters 71 | "gd": 72 | hyperparams.get( 73 | "gd", 74 | { 75 | # The optimizer to use 76 | "optimizer": tf.train.MomentumOptimizer, 77 | # The arguments of the optimizer 78 | "args": { 79 | "learning_rate": 1e-3, 80 | "momentum": 0.9, 81 | "use_nesterov": False 82 | } 83 | }), 84 | # The learning rate decay 85 | "lr_decay": 86 | hyperparams.get("lr_decay", { 87 | "enabled": False, 88 | "epochs": 25, 89 | "factor": .1 90 | }), 91 | # The regularization to apply 92 | "regularizations": 93 | hyperparams.get( 94 | "regularizations", 95 | { 96 | # L2 on the model weights 97 | "l2": 0.0, 98 | # The augmentation on the input data: online augmentation 99 | "augmentation": { 100 | # The name of the augmentation: identity disables the augmentations 101 | "name": "identity", 102 | # The function of the augmentation: fn(x) where x is the orignnal sample 103 | "fn": lambda x: x, 104 | # The multiplicative factor of the training set: online data augmentation 105 | # can generate a potentially infinite number of training samples. 106 | # However, the generated samples starts to look "similar" after 107 | # being generated for a lot of times. 108 | # What we do applying augmentations is to pick samples from the input 109 | # distrubution. 110 | # If we have enough samples (in a single epoch), we have sampled the 111 | # distribution densely enough that the next epoch, altough the samples 112 | # are still online generated, will look similar to the previous one. 113 | 114 | # In short, this is a multiplicative factor that changes the effective 115 | # training set size: 116 | # 1 means no augmentation. 117 | # A rule of thumb is to set this value to a power of 10. 118 | "factor": 1, 119 | } 120 | }), 121 | # seed is the graph level and op level seed. 122 | # None means that random seed is used. 123 | # Otherwise the specified value is used. 124 | "seed": 125 | hyperparams.get("seed", None), 126 | } 127 | 128 | def _check_keys(dict_key, available_keys, sub_key=None): 129 | inserted_keys = args[dict_key] if not sub_key else args[dict_key][ 130 | sub_key] 131 | diff = inserted_keys.keys() - available_keys 132 | if diff: 133 | raise ValueError( 134 | "{} are not valid keys for {}. Valid keys are: {}".format( 135 | diff, dict_key, available_keys)) 136 | 137 | _check_keys("gd", {"optimizer", "args"}) 138 | _check_keys("lr_decay", {"enabled", "epochs", "factor"}) 139 | _check_keys("regularizations", {"l2", "augmentation"}) 140 | _check_keys("regularizations", {"name", "fn", "factor"}, "augmentation") 141 | 142 | # Check numeric fields 143 | if args["epochs"] <= 0: 144 | raise ValueError("epochs <= 0") 145 | if args["batch_size"] <= 0: 146 | raise ValueError("batch_size <= 0") 147 | # The other fields will be used at runtime. 148 | # If they're wrong, the training process can't start 149 | # and tensorflow will raise errors 150 | return args 151 | 152 | 153 | def _parse_surgery(surgery=None): 154 | """Check if every parameter passed in surgery is valid 155 | for network surgery purposes. 156 | 157 | Returns: 158 | surgery: the same dictionary with defautl values added if needed 159 | Raises: 160 | ValueError if surgery values are not valid 161 | """ 162 | if surgery is None: 163 | surgery = {} 164 | 165 | args = { 166 | "checkpoint_path": surgery.get("checkpoint_path", ""), 167 | "exclude_scopes": surgery.get("exclude_scopes", None), 168 | "trainable_scopes": surgery.get("trainable_scopes", None), 169 | } 170 | 171 | if args["checkpoint_path"] != "": 172 | if not tf.train.latest_checkpoint(args["checkpoint_path"]): 173 | raise ValueError("Invalid {}".format(args["checkpoint_path"])) 174 | # The other fields will be used at runtime. 175 | # If they're wrong, the training process can't start 176 | # and tensorflow will raise errors 177 | return args 178 | 179 | 180 | def train(model, 181 | dataset, 182 | hyperparameters=None, 183 | surgery=None, 184 | force_restart=False, 185 | comment=""): 186 | """Train the model using the provided dataset and the specifiied hyperparameters. 187 | Args: 188 | model: instance of a model interface 189 | dataset: instance of the Input interface 190 | hyperparameters: dictionary of the hyperparameter to use to train the model 191 | surgery: dictionary of options related to the network surgery, fine tuning and transfer 192 | learning 193 | force_restart: boolean, indicates if restart the train from 0 removing the old model 194 | or continue the training. 195 | comment: string to append at the log dir name 196 | Returns: 197 | info dict containing the information of the trained model 198 | """ 199 | hyperparameters = _parse_hyperparameters(hyperparameters) 200 | surgery = _parse_surgery(surgery) 201 | args = { 202 | **hyperparameters, 203 | **surgery, 204 | "force_restart": force_restart, 205 | "model": model, 206 | "dataset": dataset, 207 | "comment": comment} 208 | 209 | name = _build_name(args, dataset) 210 | 211 | #### Training constants #### 212 | float_steps_per_epoch = dataset.num_examples(InputType.train) * args[ 213 | "regularizations"]["augmentation"]["factor"] / args["batch_size"] 214 | steps_per_epoch = 1 if float_steps_per_epoch < 1. else round( 215 | float_steps_per_epoch) 216 | 217 | steps = { 218 | "epoch": steps_per_epoch, 219 | "log": 1 if steps_per_epoch < 10 else steps_per_epoch // 10, 220 | "max": int(float_steps_per_epoch * args["epochs"]), 221 | "decay": int(float_steps_per_epoch * args["lr_decay"]["epochs"]), 222 | } 223 | 224 | #### Model logs and checkpoint constants #### 225 | current_dir = os.getcwd() 226 | log_dir = os.path.join(current_dir, "log", args["model"].name, name) 227 | best_dir = os.path.join(log_dir, "best") 228 | paths = {"current": current_dir, "log": log_dir, "best": best_dir} 229 | 230 | if tf.gfile.Exists(log_dir) and force_restart: 231 | tf.gfile.DeleteRecursively(log_dir) 232 | tf.gfile.MakeDirs(log_dir) 233 | if not tf.gfile.Exists(best_dir): 234 | tf.gfile.MakeDirs(best_dir) 235 | 236 | if args["regularizations"]["augmentation"]["factor"] != 1: 237 | print("Original training set size {}. Augmented training set size: {}". 238 | format( 239 | dataset.num_examples(InputType.train), 240 | args["regularizations"]["augmentation"]["factor"] * 241 | dataset.num_examples(InputType.train))) 242 | return Trainer(model, dataset, args, steps, paths).train() 243 | -------------------------------------------------------------------------------- /dytb/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | -------------------------------------------------------------------------------- /dytb/trainer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | -------------------------------------------------------------------------------- /dytb/trainer/utils/builders.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utilities used by the trainers""" 9 | 10 | import os 11 | import tensorflow as tf 12 | 13 | from ...models.utils import variables_to_save, variables_to_restore, tf_log 14 | 15 | 16 | def build_optimizer(args, steps, global_step): 17 | """Build the specified optimizer, log the learning rate and enalble 18 | learning rate decay is specified. 19 | Args: 20 | args: the optimization argument dict 21 | global_step: integer tensor, the current training step 22 | Returns: 23 | optimizer: tf.Optimizer object initialized 24 | """ 25 | # Extract the initial learning rate 26 | initial_lr = float(args["gd"]["args"]['learning_rate']) 27 | 28 | if args["lr_decay"]["enabled"]: 29 | # Decay the learning rate exponentially based on the number of steps. 30 | learning_rate = tf.train.exponential_decay( 31 | initial_lr, 32 | global_step, 33 | steps["decay"], 34 | args["lr_decay"]["factor"], 35 | staircase=True) 36 | # Update the learning rate parameter of the optimizer 37 | args["gd"]["args"]['learning_rate'] = learning_rate 38 | # Log the learning rate 39 | tf_log(tf.summary.scalar('learning_rate', learning_rate)) 40 | else: 41 | learning_rate = tf.constant(initial_lr) 42 | 43 | # Instantiate the optimizer 44 | optimizer = args["gd"]["optimizer"](**args["gd"]["args"]) 45 | return optimizer 46 | 47 | 48 | def build_restore_saver(variables_to_add=None, scopes_to_remove=None): 49 | """Return a saver that restores every trainable variable that's not 50 | under a scope to remove. 51 | Args: 52 | variables_to_add: list of variables to add 53 | scopes_to_remove: list of scopes to remove 54 | """ 55 | if variables_to_add is None: 56 | variables_to_add = [] 57 | 58 | if scopes_to_remove is None: 59 | scopes_to_remove = [] 60 | 61 | restore_saver = tf.train.Saver( 62 | variables_to_restore(variables_to_add, scopes_to_remove)) 63 | return restore_saver 64 | 65 | 66 | def build_train_savers(variables_to_add=None): 67 | """Add variables_to_add to the collection of variables to save. 68 | Args: 69 | variables_to_add: list of variables to add 70 | Returns: 71 | train_saver: saver to use to log the training model 72 | best_saver: saver used to save the best model 73 | """ 74 | if variables_to_add is None: 75 | variables_to_add = [] 76 | variables = variables_to_save(variables_to_add) 77 | train_saver = tf.train.Saver(variables, max_to_keep=2) 78 | best_saver = tf.train.Saver(variables, max_to_keep=1) 79 | return train_saver, best_saver 80 | 81 | 82 | def build_loggers(graph, paths): 83 | """Build the FileWriter object used to log summaries. 84 | Args: 85 | graph: the graph which operations to log refers to 86 | paths: dict of paths 87 | Returns: 88 | train_log: tf.summary.FileWriter object to log train op 89 | validation_log: tf.summary.FileWriter object to log validation op 90 | """ 91 | train_log = tf.summary.FileWriter( 92 | os.path.join(paths["log"], 'train'), graph=graph) 93 | validation_log = tf.summary.FileWriter( 94 | os.path.join(paths["log"], 'validation'), graph=graph) 95 | return train_log, validation_log 96 | -------------------------------------------------------------------------------- /dytb/trainer/utils/flow.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Utilities to control to flow execution of the trainers""" 9 | 10 | import sys 11 | import tensorflow as tf 12 | 13 | from .builders import build_restore_saver 14 | 15 | 16 | def restore_or_restart(args, paths, sess): 17 | """Restore actual session or restart the training. 18 | If SESS.checkpoint_path is setted, start a new train 19 | loading the weight from the lastest checkpoint in that path 20 | Args: 21 | sess: session 22 | paths: dict of paths 23 | """ 24 | 25 | # first check if exists and checkpoint_path passed 26 | # from where to load the weights. 27 | # Return error if there's not 28 | pretrained_checkpoint = None 29 | if args["checkpoint_path"] != '': 30 | pretrained_checkpoint = tf.train.latest_checkpoint( 31 | args["checkpoint_path"]) 32 | if not pretrained_checkpoint: 33 | print("[E] {} not valid".format(args["checkpoint_path"])) 34 | sys.exit(-1) 35 | 36 | if not args["force_restart"]: 37 | # continue training checkpoint 38 | continue_checkpoint = tf.train.latest_checkpoint(paths["log"]) 39 | if continue_checkpoint: 40 | restore_saver = build_restore_saver( 41 | None, scopes_to_remove=args["exclude_scopes"]) 42 | restore_saver.restore(sess, continue_checkpoint) 43 | # else if the continue checkpoint does not exists 44 | # and the pretrained checkpoint has been specified 45 | # load the weights from the pretrained checkpoint 46 | elif pretrained_checkpoint: 47 | restore_saver = build_restore_saver( 48 | [], scopes_to_remove=args["exclude_scopes"]) 49 | restore_saver.restore(sess, pretrained_checkpoint) 50 | else: 51 | print('[!] No checkpoint file found') 52 | -------------------------------------------------------------------------------- /dytb/utils/CLIArgs.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Class that defines and parse CLI arguments""" 9 | 10 | import os 11 | import glob 12 | import argparse 13 | import json 14 | import importlib 15 | import pprint 16 | import sys 17 | import tensorflow as tf 18 | 19 | 20 | class CLIArgs(object): 21 | """Class that defines and parse CLI arguments""" 22 | 23 | def __init__(self, description="Train the model"): 24 | """Initialize variables: 25 | Args: 26 | description: The description to show when the help is displayed""" 27 | self._description = description 28 | self._args = None 29 | 30 | @staticmethod 31 | def get_dytb_models(): 32 | """Returns the avaiable dytb modules filename, without the .py ext""" 33 | dytbmodels_dir = os.path.join( 34 | os.path.dirname(os.path.abspath(__file__)), os.path.pardir, 35 | 'models', 'predefined') 36 | dytbmodels = [ 37 | model[len(dytbmodels_dir) + 1:-3] 38 | for model in glob.glob('{}/*.py'.format(dytbmodels_dir)) 39 | if "__init__.py" not in model 40 | ] 41 | return dytbmodels 42 | 43 | @staticmethod 44 | def get_dytb_datasets(): 45 | """Returns the avaiable dytb datasets filename, without the .py ext""" 46 | dytbdatasets_dir = os.path.join( 47 | os.path.dirname(os.path.abspath(__file__)), os.path.pardir, 48 | 'inputs', 'predefined') 49 | dytbdatasets = [ 50 | dataset[len(dytbdatasets_dir) + 1:-3] 51 | for dataset in glob.glob('{}/*.py'.format(dytbdatasets_dir)) 52 | if "__init__.py" not in dataset 53 | ] 54 | return dytbdatasets 55 | 56 | @staticmethod 57 | def get_local_models(): 58 | """Returns the avaiable modules filename, without the .py ext""" 59 | models_dir = os.path.join(os.getcwd(), 'models') 60 | return [ 61 | model[len(models_dir) + 1:-3] 62 | for model in glob.glob('{}/*.py'.format(models_dir)) 63 | if "__init__.py" not in model 64 | ] 65 | 66 | @staticmethod 67 | def get_local_datasets(): 68 | """Returns the avaiable datasets filename, without the .py ext""" 69 | datasets_dir = os.path.join(os.getcwd(), 'inputs') 70 | return [ 71 | dataset[len(datasets_dir) + 1:-3] 72 | for dataset in glob.glob('{}/*.py'.format(datasets_dir)) 73 | if "__init__.py" not in dataset 74 | ] 75 | 76 | @staticmethod 77 | def get_optimizers(): 78 | """Returns the avaiable Tensorflow optimizer""" 79 | return [ 80 | optimizer for optimizer in dir(tf.train) 81 | if optimizer.endswith("Optimizer") 82 | ] 83 | 84 | def _init_parser(self): 85 | """Parse CLI flags shared by train & eval proceudres. 86 | Returns: 87 | parser: parser object""" 88 | 89 | # CLI arguments 90 | parser = argparse.ArgumentParser(description=self._description) 91 | 92 | # Required arguments 93 | parser.add_argument( 94 | '--model', 95 | required=True, 96 | choices=self.get_dytb_models() + self.get_local_models()) 97 | parser.add_argument( 98 | '--dataset', 99 | required=True, 100 | choices=self.get_dytb_datasets() + self.get_local_datasets()) 101 | parser.add_argument('--batch_size', type=int, default=128) 102 | 103 | return parser 104 | 105 | def _get_model_dataset(self): 106 | """Return the model object and the dataset object. 107 | Returns: 108 | model: model object instantiated 109 | dataset: input object instantiated""" 110 | 111 | sys.path.append(os.getcwd()) 112 | 113 | # Instantiate the model object 114 | # Give the precedence to local models 115 | if self._args.model in self.get_local_models(): 116 | model = getattr( 117 | importlib.import_module('models.' + self._args.model), 118 | self._args.model)() 119 | else: 120 | model = getattr( 121 | importlib.import_module( 122 | 'dytb.models.predefined.' + self._args.model), 123 | self._args.model)() 124 | 125 | # Instantiate the input object 126 | # Give the precedente to local datasets 127 | if self._args.dataset in self.get_local_datasets(): 128 | dataset = getattr( 129 | importlib.import_module('inputs.' + self._args.dataset), 130 | self._args.dataset)() 131 | else: 132 | dataset = getattr( 133 | importlib.import_module( 134 | 'dytb.inputs.predefined.' + self._args.dataset), 135 | self._args.dataset)() 136 | 137 | return model, dataset 138 | 139 | def parse_eval(self): 140 | """Parser the CLI arguments for the evaluation procedure 141 | and return 142 | Returns: 143 | args: args object 144 | model: model object instantiated 145 | dataset: input object instantiated""" 146 | 147 | parser = self._init_parser() 148 | parser.add_argument( 149 | "--checkpoint_path", 150 | required=True, 151 | help='the path to a checkpoint from which load the model') 152 | parser.add_argument("--test", action="store_true", help='use test set') 153 | 154 | # Hardware 155 | parser.add_argument('--eval_device', default='/gpu:0') 156 | self._args = parser.parse_args() 157 | # Get model and dataset objects 158 | model, dataset = self._get_model_dataset() 159 | return self._args, model, dataset 160 | 161 | def parse_train(self): 162 | """Parser the CLI arguments for the training procedure 163 | and return 164 | Returns: 165 | args: args object 166 | model: model object instantiated 167 | dataset: input object instantiated 168 | """ 169 | 170 | parser = self._init_parser() 171 | 172 | # Restart train or continue 173 | parser.add_argument( 174 | '--restart', 175 | action='store_true', 176 | help='restart the training process DELETING the old checkpoint files' 177 | ) 178 | 179 | # Learning rate decay arguments 180 | parser.add_argument( 181 | '--lr_decay', 182 | action='store_true', 183 | help='enable the learning rate decay') 184 | parser.add_argument( 185 | '--lr_decay_epochs', 186 | type=int, 187 | default=25, 188 | help='decay the learning rate every lr_decay_epochs epochs') 189 | parser.add_argument( 190 | '--lr_decay_factor', 191 | type=float, 192 | default=0.1, 193 | help= 194 | 'decay of lr_decay_factor the initial learning rate after lr_decay_epochs epochs' 195 | ) 196 | 197 | # L2 regularization arguments 198 | parser.add_argument( 199 | '--l2_penalty', 200 | type=float, 201 | default=0.0, 202 | help='L2 penalty term to apply ad the trained parameters') 203 | 204 | # Optimization arguments 205 | parser.add_argument( 206 | '--optimizer', 207 | choices=self.get_optimizers(), 208 | default='MomentumOptimizer', 209 | help='the optimizer to use') 210 | parser.add_argument( 211 | '--optimizer_args', 212 | type=json.loads, 213 | default=''' 214 | { 215 | "learning_rate": 1e-2, 216 | "momentum": 0.9 217 | }''', 218 | help='the optimizer parameters') 219 | parser.add_argument( 220 | '--epochs', 221 | type=int, 222 | default=150, 223 | help='number of epochs to train the model') 224 | 225 | # Hardware 226 | parser.add_argument( 227 | '--train_device', 228 | default='/gpu:0', 229 | help= 230 | 'the device on which place the the model during the trining phase') 231 | 232 | # Optional comment 233 | parser.add_argument( 234 | '--comment', 235 | default='', 236 | help='comment string to preprend to the model name') 237 | 238 | # Fine tuning & graph manipulation 239 | parser.add_argument( 240 | '--exclude_scopes', 241 | help='comma separated list of scopes of variables to exclude from the checkpoint restoring.', 242 | default=None, 243 | type=lambda scope_list: [scope.strip() for scope in scope_list.split(',')]) 244 | 245 | parser.add_argument( 246 | '--trainable_scopes', 247 | help='comma separated list of scopes of variables to train. If empty every variable is trained', 248 | default=None, 249 | type=lambda scope_list: [scope.strip() for scope in scope_list.split(',')]) 250 | 251 | parser.add_argument( 252 | "--checkpoint_path", 253 | required=False, 254 | default='', 255 | help='the path to a checkpoint from which load the model') 256 | 257 | # Build the object 258 | self._args = parser.parse_args() 259 | 260 | # Get model and dataset objects 261 | model, dataset = self._get_model_dataset() 262 | 263 | print('Args: {}'.format(pprint.pformat(vars(self._args), indent=4))) 264 | 265 | return self._args, model, dataset 266 | -------------------------------------------------------------------------------- /dytb/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | setuptools==36.2.7 3 | six==1.10.0 4 | Pillow==9.0.1 5 | tensorflow==1.15.4 6 | -------------------------------------------------------------------------------- /scripts/dytb_evaluate: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #Copyright (C) 2017 Paolo Galeone 4 | # 5 | #This Source Code Form is subject to the terms of the Mozilla Public 6 | #License, v. 2.0. If a copy of the MPL was not distributed with this 7 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 8 | #Exhibit B is not attached; this software is compatible with the 9 | #licenses expressed under Section 1.12 of the MPL v2. 10 | """ Evaluate the model """ 11 | 12 | import pprint 13 | import sys 14 | import tensorflow as tf 15 | 16 | from dytb.utils.CLIArgs import CLIArgs 17 | from dytb.evaluate import stats 18 | 19 | 20 | def main(): 21 | """Evaluates the model, on the specified dataset, 22 | fetching the requested input type""" 23 | with tf.device(ARGS.eval_device): 24 | pprint.pprint( 25 | stats(ARGS.checkpoint_path, MODEL, DATASET, ARGS.batch_size), 26 | indent=4) 27 | 28 | 29 | if __name__ == '__main__': 30 | ARGS, MODEL, DATASET = CLIArgs( 31 | description="Evaluate the model").parse_eval() 32 | sys.exit(main()) 33 | -------------------------------------------------------------------------------- /scripts/dytb_train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #Copyright (C) 2016 Paolo Galeone 4 | # 5 | #This Source Code Form is subject to the terms of the Mozilla Public 6 | #License, v. 2.0. If a copy of the MPL was not distributed with this 7 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 8 | #Exhibit B is not attached; this software is compatible with the 9 | #licenses expressed under Section 1.12 of the MPL v2. 10 | """Dynamically define the train bench via CLI""" 11 | 12 | import pprint 13 | import sys 14 | import time 15 | import tensorflow as tf 16 | 17 | from dytb.utils.CLIArgs import CLIArgs 18 | from dytb.train import train 19 | 20 | 21 | def main(): 22 | """Executes the training procedure and write the results 23 | to the results.csv file""" 24 | with tf.device(ARGS.train_device): 25 | info = train( 26 | model=MODEL, 27 | dataset=DATASET, 28 | hyperparameters={ 29 | "epochs": ARGS.epochs, 30 | "batch_size": ARGS.batch_size, 31 | "regularizations": { 32 | "l2": ARGS.l2_penalty, 33 | "augmentation": { 34 | "name": "identity", 35 | "fn": lambda x: x, 36 | "factor": 1 37 | } 38 | }, 39 | "gd": { 40 | "optimizer": getattr(tf.train, ARGS.optimizer), 41 | "args": ARGS.optimizer_args 42 | }, 43 | "lr_decay": { 44 | "enabled": ARGS.lr_decay, 45 | "epochs": ARGS.lr_decay_epochs, 46 | "factor": ARGS.lr_decay_factor 47 | }, 48 | "seed": None, 49 | }, 50 | force_restart=ARGS.restart, 51 | surgery={ 52 | "checkpoint_path": ARGS.checkpoint_path, 53 | "exclude_scopes": ARGS.exclude_scopes, 54 | "trainable_scopes": ARGS.trainable_scopes 55 | }, 56 | comment=ARGS.comment) 57 | 58 | # Add full path of the best model, used to test the performance. 59 | row = {**info["stats"], "path": info["paths"]["best"], "time": time.strftime("%Y-%m-%d %H:%M")} 60 | pprint.pprint(row, indent=4) 61 | return 0 62 | 63 | 64 | if __name__ == '__main__': 65 | ARGS, MODEL, DATASET = CLIArgs().parse_train() 66 | sys.exit(main()) 67 | -------------------------------------------------------------------------------- /scripts/inputs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/scripts/inputs/__init__.py -------------------------------------------------------------------------------- /scripts/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/scripts/models/__init__.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2017 Paolo Galeone 2 | # 3 | #This Source Code Form is subject to the terms of the Mozilla Public 4 | #License, v. 2.0. If a copy of the MPL was not distributed with this 5 | #file, you can obtain one at http://mozilla.org/MPL/2.0/. 6 | #Exhibit B is not attached; this software is compatible with the 7 | #licenses expressed under Section 1.12 of the MPL v2. 8 | """Setup file to make dytb installable via pip""" 9 | 10 | import io 11 | import re 12 | from setuptools import setup 13 | from setuptools import find_packages 14 | 15 | INIT_PY = io.open('dytb/__init__.py').read() 16 | METADATA = dict(re.findall("__([a-z]+)__ = '([^']+)'", INIT_PY)) 17 | METADATA['doc'] = re.findall('"""(.+)"""', INIT_PY)[0] 18 | 19 | setup( 20 | name='dytb', 21 | version=METADATA['version'], 22 | description=METADATA['doc'], 23 | author=METADATA['author'], 24 | author_email=METADATA['email'], 25 | url=METADATA['url'], 26 | download_url='/'.join((METADATA['url'].rstrip('/'), 'tarball', 27 | METADATA['version'])), 28 | license='MPL', 29 | scripts=['scripts/dytb_evaluate', 'scripts/dytb_train'], 30 | packages=find_packages()) 31 | -------------------------------------------------------------------------------- /tests/extract_features.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tensorflow as tf 3 | 4 | from dytb.models.predefined.VGG import VGG 5 | from dytb.inputs.images import read_image 6 | 7 | 8 | class TestFeatureExtractors(unittest.TestCase): 9 | 10 | def test_classifier(self): 11 | model = VGG() 12 | image = tf.image.resize_bilinear( 13 | tf.expand_dims( 14 | read_image("images/nocat.png", channel=3, image_type="png"), 15 | axis=0), (32, 32)) 16 | features = model.evaluator.extract_features( 17 | checkpoint_path="../log/VGG/CIFAR-10_Momentum/best/", 18 | inputs=image, 19 | layer_name="VGG/pool1/MaxPool:0", 20 | num_classes=10) 21 | self.assertEqual(features.shape, (1, 16, 16, 64)) 22 | 23 | 24 | if __name__ == '__main__': 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /tests/images/nocat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galeone/dynamic-training-bench/6534e18bc0744e6e32ed92eba4c95e5b592d4beb/tests/images/nocat.png --------------------------------------------------------------------------------