├── README.md ├── bash_files ├── adaptive_superpixel.sh └── warm_up.sh ├── deeplab ├── __pycache__ │ ├── common.cpython-36.pyc │ ├── input_preprocess.cpython-36.pyc │ ├── model.cpython-36.pyc │ └── my_metrics.cpython-36.pyc ├── build_data_active_sp.py ├── build_data_active_sp_am.py ├── common.py ├── core │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── dense_prediction_cell.cpython-36.pyc │ │ ├── feature_extractor.cpython-36.pyc │ │ ├── preprocess_utils.cpython-36.pyc │ │ ├── resnet_v1_beta.cpython-36.pyc │ │ ├── utils.cpython-36.pyc │ │ └── xception.cpython-36.pyc │ ├── dense_prediction_cell.py │ ├── dense_prediction_cell.pyc │ ├── dense_prediction_cell_branch5_top1_cityscapes.json │ ├── dense_prediction_cell_test.py │ ├── feature_extractor.py │ ├── feature_extractor.pyc │ ├── preprocess_utils.py │ ├── preprocess_utils.pyc │ ├── preprocess_utils_test.py │ ├── resnet_v1_beta.py │ ├── resnet_v1_beta.pyc │ ├── resnet_v1_beta_test.py │ ├── utils.py │ ├── utils.pyc │ ├── utils_test.py │ ├── xception.py │ ├── xception.pyc │ └── xception_test.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── build_data.cpython-36.pyc │ │ └── segmentation_dataset.cpython-36.pyc │ ├── build_cityscapes_data.py │ ├── build_data.py │ ├── build_voc2012_data.py │ ├── convert_cityscapes.sh │ ├── remove_gt_colormap.py │ └── segmentation_dataset.py ├── eval_once.py ├── export_model.py ├── input_preprocess.py ├── model.py ├── my_metrics.py ├── train.py └── utils │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── input_generator.cpython-36.pyc │ └── train_utils.cpython-36.pyc │ ├── get_dataset_colormap.py │ ├── get_dataset_colormap_test.py │ ├── input_generator.py │ ├── input_generator.pyc │ ├── save_annotation.py │ ├── train_utils.py │ └── train_utils.pyc ├── requirements.txt ├── scripts ├── extract_model_predictions.py ├── extract_model_predictions_cb.py ├── extract_superpixels.py ├── gen_adaptive_spx.py ├── gen_oracle_spx.py ├── region_selection_using_cb.py ├── region_selection_using_random.py └── utils.py └── slim ├── __init__.py ├── deployment ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── model_deploy.cpython-36.pyc ├── model_deploy.py ├── model_deploy.pyc └── model_deploy_test.py └── nets ├── __init__.py ├── __init__.pyc ├── __pycache__ └── __init__.cpython-36.pyc ├── alexnet.py ├── alexnet.pyc ├── alexnet_test.py ├── cifarnet.py ├── cifarnet.pyc ├── cyclegan.py ├── cyclegan_test.py ├── dcgan.py ├── dcgan_test.py ├── i3d.py ├── i3d.pyc ├── i3d_test.py ├── i3d_utils.py ├── i3d_utils.pyc ├── inception.py ├── inception.pyc ├── inception_resnet_v2.py ├── inception_resnet_v2.pyc ├── inception_resnet_v2_test.py ├── inception_utils.py ├── inception_utils.pyc ├── inception_v1.py ├── inception_v1.pyc ├── inception_v1_test.py ├── inception_v2.py ├── inception_v2.pyc ├── inception_v2_test.py ├── inception_v3.py ├── inception_v3.pyc ├── inception_v3_test.py ├── inception_v4.py ├── inception_v4.pyc ├── inception_v4_test.py ├── lenet.py ├── lenet.pyc ├── mobilenet ├── README.md ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── conv_blocks.cpython-36.pyc │ ├── mobilenet.cpython-36.pyc │ └── mobilenet_v2.cpython-36.pyc ├── conv_blocks.py ├── conv_blocks.pyc ├── madds_top1_accuracy.png ├── mnet_v1_vs_v2_pixel1_latency.png ├── mobilenet.py ├── mobilenet.pyc ├── mobilenet_example.ipynb ├── mobilenet_v2.py ├── mobilenet_v2.pyc └── mobilenet_v2_test.py ├── mobilenet_v1.md ├── mobilenet_v1.png ├── mobilenet_v1.py ├── mobilenet_v1.pyc ├── mobilenet_v1_eval.py ├── mobilenet_v1_test.py ├── mobilenet_v1_train.py ├── nets_factory.py ├── nets_factory.pyc ├── nets_factory_test.py ├── overfeat.py ├── overfeat.pyc ├── overfeat_test.py ├── pix2pix.py ├── pix2pix_test.py ├── resnet_utils.py ├── resnet_utils.pyc ├── resnet_v1.py ├── resnet_v1.pyc ├── resnet_v1_test.py ├── resnet_v2.py ├── resnet_v2.pyc ├── resnet_v2_test.py ├── s3dg.py ├── s3dg.pyc ├── s3dg_test.py ├── vgg.py ├── vgg.pyc └── vgg_test.py /README.md: -------------------------------------------------------------------------------- 1 | # Adaptive Superpixel for Active Learning in Semantic Segmentation 2 | This repository is the official implementation of ["Adaptive Superpixel for Active Learning in Semantic Segmentation"](https://arxiv.org/abs/2303.16817) accepted by ICCV 2023. 3 | 4 | ## Abstract 5 | Learning semantic segmentation requires pixel-wise annotations, which can be time-consuming and expensive. To reduce the annotation cost, we propose a superpixel-based active learning (AL) framework, which collects a dominant label per superpixel instead. To be specific, it consists of adaptive superpixel and sieving mechanisms, fully dedicated to AL. At each round of AL, we adaptively merge neighboring pixels of similar learned features into superpixels. We then query a selected subset of these superpixels using an acquisition function assuming no uniform superpixel size. This approach is more efficient than existing methods, which rely only on innate features such as RGB color and assume uniform superpixel sizes. Obtaining a dominant label per superpixel drastically reduces annotators' burden as it requires fewer clicks. However, it inevitably introduces noisy annotations due to mismatches between superpixel and ground truth segmentation. To address this issue, we further devise a sieving mechanism that identifies and excludes potentially noisy annotations from learning. Our experiments on both Cityscapes and PASCAL VOC datasets demonstrate the efficacy of adaptive superpixel and sieving mechanisms. 6 | 7 | ## Usages 8 | Our code is written based on ["Revisiting Superpixels for Active Learning in Semantic Segmentation With Realistic Annotation Costs"](https://github.com/cailile/Revisiting-Superpixels-for-Active-Learning). 9 | You first prepare Cityscapes dataset and Xception-65 model pretrained on ImageNet. For the Cityscapes dataset, you can refer to [DeepLab on Cityscapes](https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/cityscapes.md). For the Xception-65 model, you can refer to [DeepLab Model Zoo](https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/model_zoo.md). Final directory structure is depicted as: 10 | 11 | ``` 12 | + datasets 13 | + cityscapes 14 | + leftImg8bit 15 | + gtFine 16 | + gtFineRegion 17 | + image_list 18 | + tfrecord 19 | 20 | + models 21 | + xception_65 22 | ``` 23 | 24 | To obtain base and oracle superpixels, you run python ./scripts/extract_superpixels.py and python ./scripts/gen_oracle_spx.py, respectively. For warm-up round, you run bash ./bash_files/warm_up.sh. After the warm-up, you can generate adaptive superpixels for the next round using python ./scripts/gen_adaptive_spx.py and a previously trained model. You then run bash ./bash_files/adaptive_superpixel.sh for subsequent rounds. 25 | 26 | ## Cite 27 | Please cite our paper if you use the model or this code in your own work: 28 | ``` 29 | @inproceedings{kim2023adaptive, 30 | title={Adaptive Superpixel for Active Learning in Semantic Segmentation}, 31 | author={Hoyoung Kim and Minhyeon Oh and Sehyun Hwang and Suha Kwak and Jungseul Ok}, 32 | booktitle=ICCV, 33 | year={2023}, 34 | url={https://arxiv.org/abs/2303.16817} 35 | } 36 | ``` 37 | -------------------------------------------------------------------------------- /bash_files/warm_up.sh: -------------------------------------------------------------------------------- 1 | project_dir=/home/khy/github/Adaptive-Superpixels 2 | export PYTHONPATH=${project_dir}/deeplab:${project_dir}/slim:${project_dir}/deeplab/datasets:$PYTHONPATH 3 | export CUDA_VISIBLE_DEVICES=0 4 | dataset_name=cityscapes 5 | model_name_seg=xception_65 6 | region_num_per_image=8292 7 | region_size=32 8 | train_itr=60000 9 | num_batch=1 10 | base_learning_rate=0.007 11 | train_split=train 12 | job_name=8192_50k_v0_1 13 | seed=1 14 | crop_size=2049 15 | k_array=(50000) 16 | region_idx_dir=./region_index/$job_name 17 | mkdir -p $region_idx_dir 18 | anno_cost_dir=None 19 | valid_idx_dir=./superpixels/$dataset_name/seeds_8192/train/label # change path for oracle 20 | devkit_path=./deeplab/datasets/cityscapes/ 21 | list_folder=./deeplab/datasets/cityscapes/image_list 22 | image_folder=./deeplab/datasets/cityscapes/leftImg8bit 23 | semantic_segmentation_folder=./deeplab/datasets/cityscapes/gtFine 24 | PATH_TO_INITIAL_CHECKPOINT=deeplab/models/$model_name_seg/model.ckpt 25 | eval_data_dir=deeplab/datasets/cityscapes/tfrecord 26 | mkdir -p ./accuracy_log 27 | accuracy_log=./accuracy_log/${job_name}.txt 28 | mkdir -p ./batch_log 29 | batch_log=./batch_log/${job_name}.txt 30 | mkdir -p ./logs 31 | if test -f $batch_log; then 32 | typeset -i start_batch=$(cat $batch_log) 33 | echo start batch log is $start_batch 34 | start_batch=$(( start_batch + 1 )) 35 | else 36 | start_batch=0 37 | fi 38 | echo start batch now is $start_batch 39 | for ((batch_id=start_batch;batch_id&1 | tee ./logs/${job_name}_${batch_id}_random.log 58 | echo Generating batch training data... 59 | semantic_segmentation_folder_region=./deeplab/datasets/cityscapes/gtFineRegion/$job_name/batch_$batch_id 60 | mkdir -p $semantic_segmentation_folder_region 61 | tfrecord_dir=./deeplab/datasets/$dataset_name/tfrecord/$job_name/batch_$batch_id 62 | mkdir -p $tfrecord_dir 63 | python ./deeplab/build_data_active_sp.py \ 64 | --dataset_name=cityscapes \ 65 | --list_folder=$list_folder \ 66 | --tfrecord_dir=$tfrecord_dir \ 67 | --image_folder=$image_folder \ 68 | --semantic_segmentation_folder=$semantic_segmentation_folder \ 69 | --semantic_segmentation_folder_region=$semantic_segmentation_folder_region \ 70 | --region_idx_dir=$region_idx_dir \ 71 | --valid_idx_dir=$valid_idx_dir \ 72 | --batch_id=$batch_id \ 73 | --region_type=sp \ 74 | --train_split=$train_split \ 75 | --is_uniq=True \ 76 | 2>&1 | tee ./logs/${job_name}_${batch_id}_build_data.log 77 | train_dir=./outputs/$job_name/batch_$batch_id 78 | mkdir -p $train_dir 79 | echo Active training batch $batch_id ... 80 | python ./deeplab/train.py \ 81 | --logtostderr \ 82 | --training_number_of_steps=$train_itr \ 83 | --base_learning_rate=$base_learning_rate \ 84 | --num_clones=1 \ 85 | --train_split=$train_split \ 86 | --model_variant=$model_name_seg \ 87 | --train_crop_size=769 \ 88 | --train_crop_size=769 \ 89 | --atrous_rates=6 \ 90 | --atrous_rates=12 \ 91 | --atrous_rates=18 \ 92 | --output_stride=16 \ 93 | --decoder_output_stride=4 \ 94 | --train_batch_size=4 \ 95 | --dataset=$dataset_name \ 96 | --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \ 97 | --train_logdir=$train_dir \ 98 | --dataset_dir=$tfrecord_dir \ 99 | --fine_tune_batch_norm=True \ 100 | 2>&1 | tee ./logs/${job_name}_${batch_id}_train.log 101 | python ./deeplab/export_model.py \ 102 | --logtostderr \ 103 | --checkpoint_path=$train_dir/model.ckpt-$train_itr \ 104 | --export_path=$train_dir/frozen_inference_graph.pb \ 105 | --model_variant=$model_name_seg \ 106 | --atrous_rates=6 \ 107 | --atrous_rates=12 \ 108 | --atrous_rates=18 \ 109 | --output_stride=16 \ 110 | --decoder_output_stride=4 \ 111 | --num_classes=19 \ 112 | --crop_size=512 \ 113 | --crop_size=1024 \ 114 | --inference_scales=1.0 115 | python ./deeplab/eval_once.py \ 116 | --checkpoint_path=$train_dir/model.ckpt-$train_itr \ 117 | --dataset=cityscapes \ 118 | --eval_logdir=$train_dir \ 119 | --dataset_dir=$eval_data_dir \ 120 | --model_variant=$model_name_seg \ 121 | --eval_crop_size=1025 \ 122 | --eval_crop_size=2049 \ 123 | --atrous_rates=6 \ 124 | --atrous_rates=12 \ 125 | --atrous_rates=18 \ 126 | --output_stride=16 \ 127 | --decoder_output_stride=4 \ 128 | --accuracy_log=$accuracy_log \ 129 | --batch_log=$batch_log \ 130 | --batch_id=$batch_id 131 | if [ ! -f $accuracy_log ] 132 | then 133 | echo training $batch_id is not successful 134 | break 135 | else 136 | rm -r $tfrecord_dir 137 | fi 138 | done 139 | -------------------------------------------------------------------------------- /deeplab/__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/__pycache__/input_preprocess.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/__pycache__/input_preprocess.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/__pycache__/my_metrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/__pycache__/my_metrics.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides flags that are common to scripts. 16 | 17 | Common flags from train/eval/vis/export_model.py are collected in this script. 18 | """ 19 | import collections 20 | import copy 21 | import json 22 | 23 | import tensorflow as tf 24 | 25 | flags = tf.app.flags 26 | 27 | # Flags for input preprocessing. 28 | 29 | flags.DEFINE_integer('min_resize_value', None, 30 | 'Desired size of the smaller image side.') 31 | 32 | flags.DEFINE_integer('max_resize_value', None, 33 | 'Maximum allowed size of the larger image side.') 34 | 35 | flags.DEFINE_integer('resize_factor', None, 36 | 'Resized dimensions are multiple of factor plus one.') 37 | 38 | # Model dependent flags. 39 | 40 | flags.DEFINE_integer('logits_kernel_size', 1, 41 | 'The kernel size for the convolutional kernel that ' 42 | 'generates logits.') 43 | 44 | # When using 'mobilent_v2', we set atrous_rates = decoder_output_stride = None. 45 | # When using 'xception_65' or 'resnet_v1' model variants, we set 46 | # atrous_rates = [6, 12, 18] (output stride 16) and decoder_output_stride = 4. 47 | # See core/feature_extractor.py for supported model variants. 48 | flags.DEFINE_string('model_variant', 'mobilenet_v2', 'DeepLab model variant.') 49 | 50 | flags.DEFINE_multi_float('image_pyramid', None, 51 | 'Input scales for multi-scale feature extraction.') 52 | 53 | flags.DEFINE_boolean('add_image_level_feature', True, 54 | 'Add image level feature.') 55 | 56 | flags.DEFINE_multi_integer( 57 | 'image_pooling_crop_size', None, 58 | 'Image pooling crop size [height, width] used in the ASPP module. When ' 59 | 'value is None, the model performs image pooling with "crop_size". This' 60 | 'flag is useful when one likes to use different image pooling sizes.') 61 | 62 | flags.DEFINE_boolean('aspp_with_batch_norm', True, 63 | 'Use batch norm parameters for ASPP or not.') 64 | 65 | flags.DEFINE_boolean('aspp_with_separable_conv', True, 66 | 'Use separable convolution for ASPP or not.') 67 | 68 | # Defaults to None. Set multi_grid = [1, 2, 4] when using provided 69 | # 'resnet_v1_{50,101}_beta' checkpoints. 70 | flags.DEFINE_multi_integer('multi_grid', None, 71 | 'Employ a hierarchy of atrous rates for ResNet.') 72 | 73 | flags.DEFINE_float('depth_multiplier', 1.0, 74 | 'Multiplier for the depth (number of channels) for all ' 75 | 'convolution ops used in MobileNet.') 76 | 77 | # For `xception_65`, use decoder_output_stride = 4. For `mobilenet_v2`, use 78 | # decoder_output_stride = None. 79 | flags.DEFINE_integer('decoder_output_stride', 4, 80 | 'The ratio of input to output spatial resolution when ' 81 | 'employing decoder to refine segmentation results.') 82 | 83 | flags.DEFINE_boolean('decoder_use_separable_conv', True, 84 | 'Employ separable convolution for decoder or not.') 85 | 86 | flags.DEFINE_enum('merge_method', 'max', ['max', 'avg'], 87 | 'Scheme to merge multi scale features.') 88 | 89 | flags.DEFINE_string( 90 | 'dense_prediction_cell_json', 91 | '', 92 | 'A JSON file that specifies the dense prediction cell.') 93 | 94 | FLAGS = flags.FLAGS 95 | 96 | # Constants 97 | 98 | # Perform semantic segmentation predictions. 99 | OUTPUT_TYPE = 'semantic' 100 | 101 | # Semantic segmentation item names. 102 | LABELS_CLASS = 'labels_class' 103 | IMAGE = 'image' 104 | HEIGHT = 'height' 105 | WIDTH = 'width' 106 | IMAGE_NAME = 'image_name' 107 | LABEL = 'label' 108 | ORIGINAL_IMAGE = 'original_image' 109 | 110 | # Test set name. 111 | TEST_SET = 'test' 112 | 113 | 114 | class ModelOptions( 115 | collections.namedtuple('ModelOptions', [ 116 | 'outputs_to_num_classes', 117 | 'crop_size', 118 | 'atrous_rates', 119 | 'output_stride', 120 | 'merge_method', 121 | 'add_image_level_feature', 122 | 'image_pooling_crop_size', 123 | 'aspp_with_batch_norm', 124 | 'aspp_with_separable_conv', 125 | 'multi_grid', 126 | 'decoder_output_stride', 127 | 'decoder_use_separable_conv', 128 | 'logits_kernel_size', 129 | 'model_variant', 130 | 'depth_multiplier', 131 | 'dense_prediction_cell_config', 132 | ])): 133 | """Immutable class to hold model options.""" 134 | 135 | __slots__ = () 136 | 137 | def __new__(cls, 138 | outputs_to_num_classes, 139 | crop_size=None, 140 | atrous_rates=None, 141 | output_stride=8): 142 | """Constructor to set default values. 143 | 144 | Args: 145 | outputs_to_num_classes: A dictionary from output type to the number of 146 | classes. For example, for the task of semantic segmentation with 21 147 | semantic classes, we would have outputs_to_num_classes['semantic'] = 21. 148 | crop_size: A tuple [crop_height, crop_width]. 149 | atrous_rates: A list of atrous convolution rates for ASPP. 150 | output_stride: The ratio of input to output spatial resolution. 151 | 152 | Returns: 153 | A new ModelOptions instance. 154 | """ 155 | dense_prediction_cell_config = None 156 | if FLAGS.dense_prediction_cell_json: 157 | with tf.gfile.Open(FLAGS.dense_prediction_cell_json, 'r') as f: 158 | dense_prediction_cell_config = json.load(f) 159 | 160 | return super(ModelOptions, cls).__new__( 161 | cls, outputs_to_num_classes, crop_size, atrous_rates, output_stride, 162 | FLAGS.merge_method, FLAGS.add_image_level_feature, 163 | FLAGS.image_pooling_crop_size, FLAGS.aspp_with_batch_norm, 164 | FLAGS.aspp_with_separable_conv, FLAGS.multi_grid, 165 | FLAGS.decoder_output_stride, FLAGS.decoder_use_separable_conv, 166 | FLAGS.logits_kernel_size, FLAGS.model_variant, FLAGS.depth_multiplier, 167 | dense_prediction_cell_config) 168 | 169 | def __deepcopy__(self, memo): 170 | return ModelOptions(copy.deepcopy(self.outputs_to_num_classes), 171 | self.crop_size, 172 | self.atrous_rates, 173 | self.output_stride) 174 | -------------------------------------------------------------------------------- /deeplab/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__init__.py -------------------------------------------------------------------------------- /deeplab/core/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__init__.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/dense_prediction_cell.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/dense_prediction_cell.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/feature_extractor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/feature_extractor.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/preprocess_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/preprocess_utils.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/resnet_v1_beta.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/resnet_v1_beta.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/__pycache__/xception.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/__pycache__/xception.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/core/dense_prediction_cell.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/dense_prediction_cell.pyc -------------------------------------------------------------------------------- /deeplab/core/dense_prediction_cell_branch5_top1_cityscapes.json: -------------------------------------------------------------------------------- 1 | [{"kernel": 3, "rate": [1, 6], "op": "conv", "input": -1}, {"kernel": 3, "rate": [18, 15], "op": "conv", "input": 0}, {"kernel": 3, "rate": [6, 3], "op": "conv", "input": 1}, {"kernel": 3, "rate": [1, 1], "op": "conv", "input": 0}, {"kernel": 3, "rate": [6, 21], "op": "conv", "input": 0}] -------------------------------------------------------------------------------- /deeplab/core/dense_prediction_cell_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for dense_prediction_cell.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from deeplab.core import dense_prediction_cell 25 | 26 | 27 | class DensePredictionCellTest(tf.test.TestCase): 28 | 29 | def setUp(self): 30 | self.segmentation_layer = dense_prediction_cell.DensePredictionCell( 31 | config=[ 32 | { 33 | dense_prediction_cell._INPUT: -1, 34 | dense_prediction_cell._OP: dense_prediction_cell._CONV, 35 | dense_prediction_cell._KERNEL: 1, 36 | }, 37 | { 38 | dense_prediction_cell._INPUT: 0, 39 | dense_prediction_cell._OP: dense_prediction_cell._CONV, 40 | dense_prediction_cell._KERNEL: 3, 41 | dense_prediction_cell._RATE: [1, 3], 42 | }, 43 | { 44 | dense_prediction_cell._INPUT: 1, 45 | dense_prediction_cell._OP: ( 46 | dense_prediction_cell._PYRAMID_POOLING), 47 | dense_prediction_cell._GRID_SIZE: [1, 2], 48 | }, 49 | ], 50 | hparams={'conv_rate_multiplier': 2}) 51 | 52 | def testPyramidPoolingArguments(self): 53 | features_size, pooled_kernel = ( 54 | self.segmentation_layer._get_pyramid_pooling_arguments( 55 | crop_size=[513, 513], 56 | output_stride=16, 57 | image_grid=[4, 4])) 58 | self.assertListEqual(features_size, [33, 33]) 59 | self.assertListEqual(pooled_kernel, [9, 9]) 60 | 61 | def testPyramidPoolingArgumentsWithImageGrid1x1(self): 62 | features_size, pooled_kernel = ( 63 | self.segmentation_layer._get_pyramid_pooling_arguments( 64 | crop_size=[257, 257], 65 | output_stride=16, 66 | image_grid=[1, 1])) 67 | self.assertListEqual(features_size, [17, 17]) 68 | self.assertListEqual(pooled_kernel, [17, 17]) 69 | 70 | def testParseOperationStringWithConv1x1(self): 71 | operation = self.segmentation_layer._parse_operation( 72 | config={ 73 | dense_prediction_cell._OP: dense_prediction_cell._CONV, 74 | dense_prediction_cell._KERNEL: [1, 1], 75 | }, 76 | crop_size=[513, 513], output_stride=16) 77 | self.assertEqual(operation[dense_prediction_cell._OP], 78 | dense_prediction_cell._CONV) 79 | self.assertListEqual(operation[dense_prediction_cell._KERNEL], [1, 1]) 80 | 81 | def testParseOperationStringWithConv3x3(self): 82 | operation = self.segmentation_layer._parse_operation( 83 | config={ 84 | dense_prediction_cell._OP: dense_prediction_cell._CONV, 85 | dense_prediction_cell._KERNEL: [3, 3], 86 | dense_prediction_cell._RATE: [9, 6], 87 | }, 88 | crop_size=[513, 513], output_stride=16) 89 | self.assertEqual(operation[dense_prediction_cell._OP], 90 | dense_prediction_cell._CONV) 91 | self.assertListEqual(operation[dense_prediction_cell._KERNEL], [3, 3]) 92 | self.assertEqual(operation[dense_prediction_cell._RATE], [9, 6]) 93 | 94 | def testParseOperationStringWithPyramidPooling2x2(self): 95 | operation = self.segmentation_layer._parse_operation( 96 | config={ 97 | dense_prediction_cell._OP: dense_prediction_cell._PYRAMID_POOLING, 98 | dense_prediction_cell._GRID_SIZE: [2, 2], 99 | }, 100 | crop_size=[513, 513], 101 | output_stride=16) 102 | self.assertEqual(operation[dense_prediction_cell._OP], 103 | dense_prediction_cell._PYRAMID_POOLING) 104 | # The feature maps of size [33, 33] should be covered by 2x2 kernels with 105 | # size [17, 17]. 106 | self.assertListEqual( 107 | operation[dense_prediction_cell._TARGET_SIZE], [33, 33]) 108 | self.assertListEqual(operation[dense_prediction_cell._KERNEL], [17, 17]) 109 | 110 | def testBuildCell(self): 111 | with self.test_session(graph=tf.Graph()) as sess: 112 | features = tf.random_normal([2, 33, 33, 5]) 113 | concat_logits = self.segmentation_layer.build_cell( 114 | features, 115 | output_stride=8, 116 | crop_size=[257, 257]) 117 | sess.run(tf.global_variables_initializer()) 118 | concat_logits = sess.run(concat_logits) 119 | self.assertTrue(concat_logits.any()) 120 | 121 | def testBuildCellWithImagePoolingCropSize(self): 122 | with self.test_session(graph=tf.Graph()) as sess: 123 | features = tf.random_normal([2, 33, 33, 5]) 124 | concat_logits = self.segmentation_layer.build_cell( 125 | features, 126 | output_stride=8, 127 | crop_size=[257, 257], 128 | image_pooling_crop_size=[129, 129]) 129 | sess.run(tf.global_variables_initializer()) 130 | concat_logits = sess.run(concat_logits) 131 | self.assertTrue(concat_logits.any()) 132 | 133 | 134 | if __name__ == '__main__': 135 | tf.test.main() -------------------------------------------------------------------------------- /deeplab/core/feature_extractor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/feature_extractor.pyc -------------------------------------------------------------------------------- /deeplab/core/preprocess_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/preprocess_utils.pyc -------------------------------------------------------------------------------- /deeplab/core/resnet_v1_beta.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/resnet_v1_beta.pyc -------------------------------------------------------------------------------- /deeplab/core/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """This script contains utility functions.""" 17 | import tensorflow as tf 18 | 19 | slim = tf.contrib.slim 20 | 21 | 22 | def scale_dimension(dim, scale): 23 | """Scales the input dimension. 24 | 25 | Args: 26 | dim: Input dimension (a scalar or a scalar Tensor). 27 | scale: The amount of scaling applied to the input. 28 | 29 | Returns: 30 | Scaled dimension. 31 | """ 32 | if isinstance(dim, tf.Tensor): 33 | return tf.cast((tf.to_float(dim) - 1.0) * scale + 1.0, dtype=tf.int32) 34 | else: 35 | return int((float(dim) - 1.0) * scale + 1.0) 36 | 37 | 38 | def split_separable_conv2d(inputs, 39 | filters, 40 | kernel_size=3, 41 | rate=1, 42 | weight_decay=0.00004, 43 | depthwise_weights_initializer_stddev=0.33, 44 | pointwise_weights_initializer_stddev=0.06, 45 | scope=None): 46 | """Splits a separable conv2d into depthwise and pointwise conv2d. 47 | 48 | This operation differs from `tf.layers.separable_conv2d` as this operation 49 | applies activation function between depthwise and pointwise conv2d. 50 | 51 | Args: 52 | inputs: Input tensor with shape [batch, height, width, channels]. 53 | filters: Number of filters in the 1x1 pointwise convolution. 54 | kernel_size: A list of length 2: [kernel_height, kernel_width] of 55 | of the filters. Can be an int if both values are the same. 56 | rate: Atrous convolution rate for the depthwise convolution. 57 | weight_decay: The weight decay to use for regularizing the model. 58 | depthwise_weights_initializer_stddev: The standard deviation of the 59 | truncated normal weight initializer for depthwise convolution. 60 | pointwise_weights_initializer_stddev: The standard deviation of the 61 | truncated normal weight initializer for pointwise convolution. 62 | scope: Optional scope for the operation. 63 | 64 | Returns: 65 | Computed features after split separable conv2d. 66 | """ 67 | outputs = slim.separable_conv2d( 68 | inputs, 69 | None, 70 | kernel_size=kernel_size, 71 | depth_multiplier=1, 72 | rate=rate, 73 | weights_initializer=tf.truncated_normal_initializer( 74 | stddev=depthwise_weights_initializer_stddev), 75 | weights_regularizer=None, 76 | scope=scope + '_depthwise') 77 | return slim.conv2d( 78 | outputs, 79 | filters, 80 | 1, 81 | weights_initializer=tf.truncated_normal_initializer( 82 | stddev=pointwise_weights_initializer_stddev), 83 | weights_regularizer=slim.l2_regularizer(weight_decay), 84 | scope=scope + '_pointwise') -------------------------------------------------------------------------------- /deeplab/core/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/utils.pyc -------------------------------------------------------------------------------- /deeplab/core/utils_test.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Tests for utils.py.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab.core import utils 21 | 22 | 23 | class UtilsTest(tf.test.TestCase): 24 | 25 | def testScaleDimensionOutput(self): 26 | self.assertEqual(161, utils.scale_dimension(321, 0.5)) 27 | self.assertEqual(193, utils.scale_dimension(321, 0.6)) 28 | self.assertEqual(241, utils.scale_dimension(321, 0.75)) 29 | 30 | 31 | if __name__ == '__main__': 32 | tf.test.main() -------------------------------------------------------------------------------- /deeplab/core/xception.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/core/xception.pyc -------------------------------------------------------------------------------- /deeplab/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deeplab/datasets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/datasets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/datasets/__pycache__/build_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/datasets/__pycache__/build_data.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/datasets/__pycache__/segmentation_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/datasets/__pycache__/segmentation_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/datasets/build_cityscapes_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Converts Cityscapes data to TFRecord file format with Example protos. 17 | 18 | The Cityscapes dataset is expected to have the following directory structure: 19 | 20 | + cityscapes 21 | - build_cityscapes_data.py (current working directiory). 22 | - build_data.py 23 | + cityscapesscripts 24 | + annotation 25 | + evaluation 26 | + helpers 27 | + preparation 28 | + viewer 29 | + gtFine 30 | + train 31 | + val 32 | + test 33 | + leftImg8bit 34 | + train 35 | + val 36 | + test 37 | + tfrecord 38 | 39 | This script converts data into sharded data files and save at tfrecord folder. 40 | 41 | Note that before running this script, the users should (1) register the 42 | Cityscapes dataset website at https://www.cityscapes-dataset.com to 43 | download the dataset, and (2) run the script provided by Cityscapes 44 | `preparation/createTrainIdLabelImgs.py` to generate the training groundtruth. 45 | 46 | Also note that the tensorflow model will be trained with `TrainId' instead 47 | of `EvalId' used on the evaluation server. Thus, the users need to convert 48 | the predicted labels to `EvalId` for evaluation on the server. See the 49 | vis.py for more details. 50 | 51 | The Example proto contains the following fields: 52 | 53 | image/encoded: encoded image content. 54 | image/filename: image filename. 55 | image/format: image file format. 56 | image/height: image height. 57 | image/width: image width. 58 | image/channels: image channels. 59 | image/segmentation/class/encoded: encoded semantic segmentation content. 60 | image/segmentation/class/format: semantic segmentation file format. 61 | """ 62 | import glob 63 | import math 64 | import os.path 65 | import re 66 | import sys 67 | import build_data 68 | import tensorflow as tf 69 | 70 | FLAGS = tf.app.flags.FLAGS 71 | 72 | tf.app.flags.DEFINE_string('cityscapes_root', 73 | './cityscapes', 74 | 'Cityscapes dataset root folder.') 75 | 76 | tf.app.flags.DEFINE_string( 77 | 'output_dir', 78 | './tfrecord', 79 | 'Path to save converted SSTable of TensorFlow examples.') 80 | 81 | 82 | _NUM_SHARDS = 10 83 | 84 | # A map from data type to folder name that saves the data. 85 | _FOLDERS_MAP = { 86 | 'image': 'leftImg8bit', 87 | 'label': 'gtFine', 88 | } 89 | 90 | # A map from data type to filename postfix. 91 | _POSTFIX_MAP = { 92 | 'image': '_leftImg8bit', 93 | 'label': '_gtFine_labelTrainIds', 94 | } 95 | 96 | # A map from data type to data format. 97 | _DATA_FORMAT_MAP = { 98 | 'image': 'png', 99 | 'label': 'png', 100 | } 101 | 102 | # Image file pattern. 103 | _IMAGE_FILENAME_RE = re.compile('(.+)' + _POSTFIX_MAP['image']) 104 | 105 | 106 | def _get_files(data, dataset_split): 107 | """Gets files for the specified data type and dataset split. 108 | 109 | Args: 110 | data: String, desired data ('image' or 'label'). 111 | dataset_split: String, dataset split ('train', 'val', 'test') 112 | 113 | Returns: 114 | A list of sorted file names or None when getting label for 115 | test set. 116 | """ 117 | if data == 'label' and dataset_split == 'test': 118 | return None 119 | pattern = '*%s.%s' % (_POSTFIX_MAP[data], _DATA_FORMAT_MAP[data]) 120 | search_files = os.path.join( 121 | FLAGS.cityscapes_root, _FOLDERS_MAP[data], dataset_split, '*', pattern) 122 | filenames = glob.glob(search_files) 123 | return sorted(filenames) 124 | 125 | 126 | def _convert_dataset(dataset_split): 127 | """Converts the specified dataset split to TFRecord format. 128 | 129 | Args: 130 | dataset_split: The dataset split (e.g., train, val). 131 | 132 | Raises: 133 | RuntimeError: If loaded image and label have different shape, or if the 134 | image file with specified postfix could not be found. 135 | """ 136 | image_files = _get_files('image', dataset_split) 137 | label_files = _get_files('label', dataset_split) 138 | 139 | num_images = len(image_files) 140 | num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS))) 141 | 142 | image_reader = build_data.ImageReader('png', channels=3) 143 | label_reader = build_data.ImageReader('png', channels=1) 144 | 145 | for shard_id in range(_NUM_SHARDS): 146 | shard_filename = '%s-%05d-of-%05d.tfrecord' % ( 147 | dataset_split, shard_id, _NUM_SHARDS) 148 | output_filename = os.path.join(FLAGS.output_dir, shard_filename) 149 | with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: 150 | start_idx = shard_id * num_per_shard 151 | end_idx = min((shard_id + 1) * num_per_shard, num_images) 152 | for i in range(start_idx, end_idx): 153 | sys.stdout.write('\r>> Converting image %d/%d shard %d' % ( 154 | i + 1, num_images, shard_id)) 155 | sys.stdout.flush() 156 | # Read the image. 157 | image_data = tf.gfile.FastGFile(image_files[i], 'rb').read() 158 | height, width = image_reader.read_image_dims(image_data) 159 | # Read the semantic segmentation annotation. 160 | seg_data = tf.gfile.FastGFile(label_files[i], 'rb').read() 161 | seg_height, seg_width = label_reader.read_image_dims(seg_data) 162 | if height != seg_height or width != seg_width: 163 | raise RuntimeError('Shape mismatched between image and label.') 164 | # Convert to tf example. 165 | re_match = _IMAGE_FILENAME_RE.search(image_files[i]) 166 | if re_match is None: 167 | raise RuntimeError('Invalid image filename: ' + image_files[i]) 168 | filename = os.path.basename(re_match.group(1)) 169 | example = build_data.image_seg_to_tfexample( 170 | image_data, filename, height, width, seg_data) 171 | tfrecord_writer.write(example.SerializeToString()) 172 | sys.stdout.write('\n') 173 | sys.stdout.flush() 174 | 175 | 176 | def main(unused_argv): 177 | # Only support converting 'train' and 'val' sets for now. 178 | for dataset_split in ['train', 'val']: 179 | _convert_dataset(dataset_split) 180 | 181 | 182 | if __name__ == '__main__': 183 | tf.app.run() 184 | -------------------------------------------------------------------------------- /deeplab/datasets/build_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Contains common utility functions and classes for building dataset. 17 | 18 | This script contains utility functions and classes to converts dataset to 19 | TFRecord file format with Example protos. 20 | 21 | The Example proto contains the following fields: 22 | 23 | image/encoded: encoded image content. 24 | image/filename: image filename. 25 | image/format: image file format. 26 | image/height: image height. 27 | image/width: image width. 28 | image/channels: image channels. 29 | image/segmentation/class/encoded: encoded semantic segmentation content. 30 | image/segmentation/class/format: semantic segmentation file format. 31 | """ 32 | import collections 33 | import six 34 | import tensorflow as tf 35 | 36 | FLAGS = tf.app.flags.FLAGS 37 | 38 | tf.app.flags.DEFINE_enum('image_format', 'png', ['jpg', 'jpeg', 'png'], 39 | 'Image format.') 40 | 41 | tf.app.flags.DEFINE_enum('label_format', 'png', ['png'], 42 | 'Segmentation label format.') 43 | 44 | # A map from image format to expected data format. 45 | _IMAGE_FORMAT_MAP = { 46 | 'jpg': 'jpeg', 47 | 'jpeg': 'jpeg', 48 | 'png': 'png', 49 | } 50 | 51 | 52 | class ImageReader(object): 53 | """Helper class that provides TensorFlow image coding utilities.""" 54 | 55 | def __init__(self, image_format='jpeg', channels=3): 56 | """Class constructor. 57 | 58 | Args: 59 | image_format: Image format. Only 'jpeg', 'jpg', or 'png' are supported. 60 | channels: Image channels. 61 | """ 62 | with tf.Graph().as_default(): 63 | self._decode_data = tf.placeholder(dtype=tf.string) 64 | self._image_format = image_format 65 | self._session = tf.Session() 66 | if self._image_format in ('jpeg', 'jpg'): 67 | self._decode = tf.image.decode_jpeg(self._decode_data, 68 | channels=channels) 69 | elif self._image_format == 'png': 70 | self._decode = tf.image.decode_png(self._decode_data, 71 | channels=channels) 72 | 73 | def read_image_dims(self, image_data): 74 | """Reads the image dimensions. 75 | 76 | Args: 77 | image_data: string of image data. 78 | 79 | Returns: 80 | image_height and image_width. 81 | """ 82 | image = self.decode_image(image_data) 83 | return image.shape[:2] 84 | 85 | def decode_image(self, image_data): 86 | """Decodes the image data string. 87 | 88 | Args: 89 | image_data: string of image data. 90 | 91 | Returns: 92 | Decoded image data. 93 | 94 | Raises: 95 | ValueError: Value of image channels not supported. 96 | """ 97 | image = self._session.run(self._decode, 98 | feed_dict={self._decode_data: image_data}) 99 | if len(image.shape) != 3 or image.shape[2] not in (1, 3): 100 | raise ValueError('The image channels not supported.') 101 | 102 | return image 103 | 104 | 105 | def _int64_list_feature(values): 106 | """Returns a TF-Feature of int64_list. 107 | 108 | Args: 109 | values: A scalar or list of values. 110 | 111 | Returns: 112 | A TF-Feature. 113 | """ 114 | if not isinstance(values, collections.Iterable): 115 | values = [values] 116 | 117 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) 118 | 119 | 120 | def _bytes_list_feature(values): 121 | """Returns a TF-Feature of bytes. 122 | 123 | Args: 124 | values: A string. 125 | 126 | Returns: 127 | A TF-Feature. 128 | """ 129 | def norm2bytes(value): 130 | return value.encode() if isinstance(value, str) and six.PY3 else value 131 | 132 | return tf.train.Feature( 133 | bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) 134 | 135 | 136 | def image_seg_to_tfexample(image_data, filename, height, width, seg_data): 137 | """Converts one image/segmentation pair to tf example. 138 | 139 | Args: 140 | image_data: string of image data. 141 | filename: image filename. 142 | height: image height. 143 | width: image width. 144 | seg_data: string of semantic segmentation data. 145 | 146 | Returns: 147 | tf example of one image/segmentation pair. 148 | """ 149 | return tf.train.Example(features=tf.train.Features(feature={ 150 | 'image/encoded': _bytes_list_feature(image_data), 151 | 'image/filename': _bytes_list_feature(filename), 152 | 'image/format': _bytes_list_feature( 153 | _IMAGE_FORMAT_MAP[FLAGS.image_format]), 154 | 'image/height': _int64_list_feature(height), 155 | 'image/width': _int64_list_feature(width), 156 | 'image/channels': _int64_list_feature(3), 157 | 'image/segmentation/class/encoded': ( 158 | _bytes_list_feature(seg_data)), 159 | 'image/segmentation/class/format': _bytes_list_feature( 160 | FLAGS.label_format), 161 | })) 162 | -------------------------------------------------------------------------------- /deeplab/datasets/build_voc2012_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Converts PASCAL VOC 2012 data to TFRecord file format with Example protos. 17 | 18 | PASCAL VOC 2012 dataset is expected to have the following directory structure: 19 | 20 | + pascal_voc_seg 21 | - build_data.py 22 | - build_voc2012_data.py (current working directory). 23 | + VOCdevkit 24 | + VOC2012 25 | + JPEGImages 26 | + SegmentationClass 27 | + ImageSets 28 | + Segmentation 29 | + tfrecord 30 | 31 | Image folder: 32 | ./VOCdevkit/VOC2012/JPEGImages 33 | 34 | Semantic segmentation annotations: 35 | ./VOCdevkit/VOC2012/SegmentationClass 36 | 37 | list folder: 38 | ./VOCdevkit/VOC2012/ImageSets/Segmentation 39 | 40 | This script converts data into sharded data files and save at tfrecord folder. 41 | 42 | The Example proto contains the following fields: 43 | 44 | image/encoded: encoded image content. 45 | image/filename: image filename. 46 | image/format: image file format. 47 | image/height: image height. 48 | image/width: image width. 49 | image/channels: image channels. 50 | image/segmentation/class/encoded: encoded semantic segmentation content. 51 | image/segmentation/class/format: semantic segmentation file format. 52 | """ 53 | import math 54 | import os.path 55 | import sys 56 | import build_data 57 | import tensorflow as tf 58 | 59 | FLAGS = tf.app.flags.FLAGS 60 | 61 | tf.app.flags.DEFINE_string('image_folder', 62 | './VOCdevkit/VOC2012/JPEGImages', 63 | 'Folder containing images.') 64 | 65 | tf.app.flags.DEFINE_string( 66 | 'semantic_segmentation_folder', 67 | './VOCdevkit/VOC2012/SegmentationClassRaw', 68 | 'Folder containing semantic segmentation annotations.') 69 | 70 | tf.app.flags.DEFINE_string( 71 | 'list_folder', 72 | './VOCdevkit/VOC2012/ImageSets/Segmentation', 73 | 'Folder containing lists for training and validation') 74 | 75 | tf.app.flags.DEFINE_string( 76 | 'output_dir', 77 | './tfrecord', 78 | 'Path to save converted SSTable of TensorFlow examples.') 79 | 80 | 81 | _NUM_SHARDS = 4 82 | 83 | 84 | def _convert_dataset(dataset_split): 85 | """Converts the specified dataset split to TFRecord format. 86 | 87 | Args: 88 | dataset_split: The dataset split (e.g., train, test). 89 | 90 | Raises: 91 | RuntimeError: If loaded image and label have different shape. 92 | """ 93 | dataset = os.path.basename(dataset_split)[:-4] 94 | sys.stdout.write('Processing ' + dataset) 95 | filenames = [x.strip('\n') for x in open(dataset_split, 'r')] 96 | num_images = len(filenames) 97 | num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS))) 98 | 99 | image_reader = build_data.ImageReader('jpeg', channels=3) 100 | label_reader = build_data.ImageReader('png', channels=1) 101 | 102 | for shard_id in range(_NUM_SHARDS): 103 | output_filename = os.path.join( 104 | FLAGS.output_dir, 105 | '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS)) 106 | with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: 107 | start_idx = shard_id * num_per_shard 108 | end_idx = min((shard_id + 1) * num_per_shard, num_images) 109 | for i in range(start_idx, end_idx): 110 | sys.stdout.write('\r>> Converting image %d/%d shard %d' % ( 111 | i + 1, len(filenames), shard_id)) 112 | sys.stdout.flush() 113 | # Read the image. 114 | image_filename = os.path.join( 115 | FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format) 116 | image_data = tf.gfile.FastGFile(image_filename, 'rb').read() 117 | height, width = image_reader.read_image_dims(image_data) 118 | # Read the semantic segmentation annotation. 119 | seg_filename = os.path.join( 120 | FLAGS.semantic_segmentation_folder, 121 | filenames[i] + '.' + FLAGS.label_format) 122 | seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read() 123 | seg_height, seg_width = label_reader.read_image_dims(seg_data) 124 | if height != seg_height or width != seg_width: 125 | raise RuntimeError('Shape mismatched between image and label.') 126 | # Convert to tf example. 127 | example = build_data.image_seg_to_tfexample( 128 | image_data, filenames[i], height, width, seg_data) 129 | tfrecord_writer.write(example.SerializeToString()) 130 | sys.stdout.write('\n') 131 | sys.stdout.flush() 132 | 133 | 134 | def main(unused_argv): 135 | dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt')) 136 | for dataset_split in dataset_splits: 137 | _convert_dataset(dataset_split) 138 | 139 | 140 | if __name__ == '__main__': 141 | tf.app.run() 142 | -------------------------------------------------------------------------------- /deeplab/datasets/convert_cityscapes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | # 17 | # Script to preprocess the Cityscapes dataset. Note (1) the users should 18 | # register the Cityscapes dataset website at 19 | # https://www.cityscapes-dataset.com/downloads/ to download the dataset, 20 | # and (2) the users should download the utility scripts provided by 21 | # Cityscapes at https://github.com/mcordts/cityscapesScripts. 22 | # 23 | # Usage: 24 | # bash ./preprocess_cityscapes.sh 25 | # 26 | # The folder structure is assumed to be: 27 | # + datasets 28 | # - build_cityscapes_data.py 29 | # - convert_cityscapes.sh 30 | # + cityscapes 31 | # + cityscapesscripts (downloaded scripts) 32 | # + gtFine 33 | # + leftImg8bit 34 | # 35 | 36 | # Exit immediately if a command exits with a non-zero status. 37 | set -e 38 | 39 | CURRENT_DIR=$(pwd) 40 | WORK_DIR="." 41 | 42 | # Root path for Cityscapes dataset. 43 | CITYSCAPES_ROOT="${WORK_DIR}/cityscapes" 44 | 45 | # Create training labels. 46 | python "${CITYSCAPES_ROOT}/cityscapesscripts/preparation/createTrainIdLabelImgs.py" 47 | 48 | # Build TFRecords of the dataset. 49 | # First, create output directory for storing TFRecords. 50 | OUTPUT_DIR="${CITYSCAPES_ROOT}/tfrecord" 51 | mkdir -p "${OUTPUT_DIR}" 52 | 53 | BUILD_SCRIPT="${CURRENT_DIR}/build_cityscapes_data.py" 54 | 55 | echo "Converting Cityscapes dataset..." 56 | python "${BUILD_SCRIPT}" \ 57 | --cityscapes_root="${CITYSCAPES_ROOT}" \ 58 | --output_dir="${OUTPUT_DIR}" \ 59 | -------------------------------------------------------------------------------- /deeplab/datasets/remove_gt_colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Removes the color map from segmentation annotations. 17 | 18 | Removes the color map from the ground truth segmentation annotations and save 19 | the results to output_dir. 20 | """ 21 | import glob 22 | import os.path 23 | import numpy as np 24 | 25 | from PIL import Image 26 | 27 | import tensorflow as tf 28 | 29 | FLAGS = tf.app.flags.FLAGS 30 | 31 | tf.app.flags.DEFINE_string('original_gt_folder', 32 | './VOCdevkit/VOC2012/SegmentationClass', 33 | 'Original ground truth annotations.') 34 | 35 | tf.app.flags.DEFINE_string('segmentation_format', 'png', 'Segmentation format.') 36 | 37 | tf.app.flags.DEFINE_string('output_dir', 38 | './VOCdevkit/VOC2012/SegmentationClassRaw', 39 | 'folder to save modified ground truth annotations.') 40 | 41 | 42 | def _remove_colormap(filename): 43 | """Removes the color map from the annotation. 44 | 45 | Args: 46 | filename: Ground truth annotation filename. 47 | 48 | Returns: 49 | Annotation without color map. 50 | """ 51 | return np.array(Image.open(filename)) 52 | 53 | 54 | def _save_annotation(annotation, filename): 55 | """Saves the annotation as png file. 56 | 57 | Args: 58 | annotation: Segmentation annotation. 59 | filename: Output filename. 60 | """ 61 | pil_image = Image.fromarray(annotation.astype(dtype=np.uint8)) 62 | with tf.gfile.Open(filename, mode='w') as f: 63 | pil_image.save(f, 'PNG') 64 | 65 | 66 | def main(unused_argv): 67 | # Create the output directory if not exists. 68 | if not tf.gfile.IsDirectory(FLAGS.output_dir): 69 | tf.gfile.MakeDirs(FLAGS.output_dir) 70 | 71 | annotations = glob.glob(os.path.join(FLAGS.original_gt_folder, 72 | '*.' + FLAGS.segmentation_format)) 73 | for annotation in annotations: 74 | raw_annotation = _remove_colormap(annotation) 75 | filename = os.path.splitext(os.path.basename(annotation))[0] 76 | _save_annotation(raw_annotation, 77 | os.path.join( 78 | FLAGS.output_dir, 79 | filename + '.' + FLAGS.segmentation_format)) 80 | 81 | 82 | if __name__ == '__main__': 83 | tf.app.run() 84 | -------------------------------------------------------------------------------- /deeplab/export_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Exports trained model to TensorFlow frozen graph.""" 16 | 17 | import os 18 | import tensorflow as tf 19 | 20 | from tensorflow.python.tools import freeze_graph 21 | from deeplab import common 22 | from deeplab import input_preprocess 23 | from deeplab import model 24 | 25 | slim = tf.contrib.slim 26 | flags = tf.app.flags 27 | 28 | FLAGS = flags.FLAGS 29 | 30 | flags.DEFINE_string('checkpoint_path', None, 'Checkpoint path') 31 | 32 | flags.DEFINE_string('export_path', None, 33 | 'Path to output Tensorflow frozen graph.') 34 | 35 | flags.DEFINE_integer('num_classes', 21, 'Number of classes.') 36 | 37 | flags.DEFINE_multi_integer('crop_size', [513, 513], 38 | 'Crop size [height, width].') 39 | 40 | # For `xception_65`, use atrous_rates = [12, 24, 36] if output_stride = 8, or 41 | # rates = [6, 12, 18] if output_stride = 16. For `mobilenet_v2`, use None. Note 42 | # one could use different atrous_rates/output_stride during training/evaluation. 43 | flags.DEFINE_multi_integer('atrous_rates', None, 44 | 'Atrous rates for atrous spatial pyramid pooling.') 45 | 46 | flags.DEFINE_integer('output_stride', 8, 47 | 'The ratio of input to output spatial resolution.') 48 | 49 | # Change to [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] for multi-scale inference. 50 | flags.DEFINE_multi_float('inference_scales', [1.0], 51 | 'The scales to resize images for inference.') 52 | 53 | flags.DEFINE_bool('add_flipped_images', False, 54 | 'Add flipped images during inference or not.') 55 | 56 | # Input name of the exported model. 57 | _INPUT_NAME = 'ImageTensor' 58 | 59 | # Output name of the exported model. 60 | _OUTPUT_NAME = 'SemanticPredictions' 61 | 62 | 63 | def _create_input_tensors(): 64 | """Creates and prepares input tensors for DeepLab model. 65 | 66 | This method creates a 4-D uint8 image tensor 'ImageTensor' with shape 67 | [1, None, None, 3]. The actual input tensor name to use during inference is 68 | 'ImageTensor:0'. 69 | 70 | Returns: 71 | image: Preprocessed 4-D float32 tensor with shape [1, crop_height, 72 | crop_width, 3]. 73 | original_image_size: Original image shape tensor [height, width]. 74 | resized_image_size: Resized image shape tensor [height, width]. 75 | """ 76 | # input_preprocess takes 4-D image tensor as input. 77 | input_image = tf.placeholder(tf.uint8, [1, None, None, 3], name=_INPUT_NAME) 78 | original_image_size = tf.shape(input_image)[1:3] 79 | 80 | # Squeeze the dimension in axis=0 since `preprocess_image_and_label` assumes 81 | # image to be 3-D. 82 | image = tf.squeeze(input_image, axis=0) 83 | resized_image, image, _ = input_preprocess.preprocess_image_and_label( 84 | image, 85 | label=None, 86 | crop_height=FLAGS.crop_size[0], 87 | crop_width=FLAGS.crop_size[1], 88 | min_resize_value=FLAGS.min_resize_value, 89 | max_resize_value=FLAGS.max_resize_value, 90 | resize_factor=FLAGS.resize_factor, 91 | is_training=False, 92 | model_variant=FLAGS.model_variant) 93 | resized_image_size = tf.shape(resized_image)[:2] 94 | 95 | # Expand the dimension in axis=0, since the following operations assume the 96 | # image to be 4-D. 97 | image = tf.expand_dims(image, 0) 98 | 99 | return image, original_image_size, resized_image_size 100 | 101 | 102 | def main(unused_argv): 103 | tf.logging.set_verbosity(tf.logging.INFO) 104 | tf.logging.info('Prepare to export model to: %s', FLAGS.export_path) 105 | 106 | with tf.Graph().as_default(): 107 | image, image_size, resized_image_size = _create_input_tensors() 108 | 109 | model_options = common.ModelOptions( 110 | outputs_to_num_classes={common.OUTPUT_TYPE: FLAGS.num_classes}, 111 | crop_size=FLAGS.crop_size, 112 | atrous_rates=FLAGS.atrous_rates, 113 | output_stride=FLAGS.output_stride) 114 | 115 | if tuple(FLAGS.inference_scales) == (1.0,): 116 | tf.logging.info('Exported model performs single-scale inference.') 117 | predictions = model.predict_labels( 118 | image, 119 | model_options=model_options, 120 | image_pyramid=FLAGS.image_pyramid) 121 | else: 122 | tf.logging.info('Exported model performs multi-scale inference.') 123 | predictions = model.predict_labels_multi_scale( 124 | image, 125 | model_options=model_options, 126 | eval_scales=FLAGS.inference_scales, 127 | add_flipped_images=FLAGS.add_flipped_images) 128 | 129 | predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.float32) 130 | # Crop the valid regions from the predictions. 131 | semantic_predictions = tf.slice( 132 | predictions, 133 | [0, 0, 0], 134 | [1, resized_image_size[0], resized_image_size[1]]) 135 | # Resize back the prediction to the original image size. 136 | def _resize_label(label, label_size): 137 | # Expand dimension of label to [1, height, width, 1] for resize operation. 138 | label = tf.expand_dims(label, 3) 139 | resized_label = tf.image.resize_images( 140 | label, 141 | label_size, 142 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, 143 | align_corners=True) 144 | return tf.cast(tf.squeeze(resized_label, 3), tf.int32) 145 | semantic_predictions = _resize_label(semantic_predictions, image_size) 146 | semantic_predictions = tf.identity(semantic_predictions, name=_OUTPUT_NAME) 147 | 148 | saver = tf.train.Saver(tf.model_variables()) 149 | 150 | tf.gfile.MakeDirs(os.path.dirname(FLAGS.export_path)) 151 | freeze_graph.freeze_graph_with_def_protos( 152 | tf.get_default_graph().as_graph_def(add_shapes=True), 153 | saver.as_saver_def(), 154 | FLAGS.checkpoint_path, 155 | _OUTPUT_NAME, 156 | restore_op_name=None, 157 | filename_tensor_name=None, 158 | output_graph=FLAGS.export_path, 159 | clear_devices=True, 160 | initializer_nodes=None) 161 | 162 | 163 | if __name__ == '__main__': 164 | flags.mark_flag_as_required('checkpoint_path') 165 | flags.mark_flag_as_required('export_path') 166 | tf.app.run() 167 | -------------------------------------------------------------------------------- /deeplab/input_preprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Prepares the data used for DeepLab training/evaluation.""" 17 | import tensorflow as tf 18 | from deeplab.core import feature_extractor 19 | from deeplab.core import preprocess_utils 20 | 21 | 22 | # The probability of flipping the images and labels 23 | # left-right during training 24 | _PROB_OF_FLIP = 0.5 25 | 26 | 27 | def preprocess_image_and_label(image, 28 | label, 29 | crop_height, 30 | crop_width, 31 | min_resize_value=None, 32 | max_resize_value=None, 33 | resize_factor=None, 34 | min_scale_factor=1., 35 | max_scale_factor=1., 36 | scale_factor_step_size=0, 37 | ignore_label=255, 38 | is_training=True, 39 | model_variant=None): 40 | """Preprocesses the image and label. 41 | 42 | Args: 43 | image: Input image. 44 | label: Ground truth annotation label. 45 | crop_height: The height value used to crop the image and label. 46 | crop_width: The width value used to crop the image and label. 47 | min_resize_value: Desired size of the smaller image side. 48 | max_resize_value: Maximum allowed size of the larger image side. 49 | resize_factor: Resized dimensions are multiple of factor plus one. 50 | min_scale_factor: Minimum scale factor value. 51 | max_scale_factor: Maximum scale factor value. 52 | scale_factor_step_size: The step size from min scale factor to max scale 53 | factor. The input is randomly scaled based on the value of 54 | (min_scale_factor, max_scale_factor, scale_factor_step_size). 55 | ignore_label: The label value which will be ignored for training and 56 | evaluation. 57 | is_training: If the preprocessing is used for training or not. 58 | model_variant: Model variant (string) for choosing how to mean-subtract the 59 | images. See feature_extractor.network_map for supported model variants. 60 | 61 | Returns: 62 | original_image: Original image (could be resized). 63 | processed_image: Preprocessed image. 64 | label: Preprocessed ground truth segmentation label. 65 | 66 | Raises: 67 | ValueError: Ground truth label not provided during training. 68 | """ 69 | if is_training and label is None: 70 | raise ValueError('During training, label must be provided.') 71 | if model_variant is None: 72 | tf.logging.warning('Default mean-subtraction is performed. Please specify ' 73 | 'a model_variant. See feature_extractor.network_map for ' 74 | 'supported model variants.') 75 | 76 | # Keep reference to original image. 77 | original_image = image 78 | 79 | processed_image = tf.cast(image, tf.float32) 80 | 81 | if label is not None: 82 | label = tf.cast(label, tf.int32) 83 | 84 | # Resize image and label to the desired range. 85 | if min_resize_value is not None or max_resize_value is not None: 86 | [processed_image, label] = ( 87 | preprocess_utils.resize_to_range( 88 | image=processed_image, 89 | label=label, 90 | min_size=min_resize_value, 91 | max_size=max_resize_value, 92 | factor=resize_factor, 93 | align_corners=True)) 94 | # The `original_image` becomes the resized image. 95 | original_image = tf.identity(processed_image) 96 | 97 | # Data augmentation by randomly scaling the inputs. 98 | if is_training: 99 | scale = preprocess_utils.get_random_scale( 100 | min_scale_factor, max_scale_factor, scale_factor_step_size) 101 | processed_image, label = preprocess_utils.randomly_scale_image_and_label( 102 | processed_image, label, scale) 103 | processed_image.set_shape([None, None, 3]) 104 | 105 | # Pad image and label to have dimensions >= [crop_height, crop_width] 106 | image_shape = tf.shape(processed_image) 107 | image_height = image_shape[0] 108 | image_width = image_shape[1] 109 | 110 | target_height = image_height + tf.maximum(crop_height - image_height, 0) 111 | target_width = image_width + tf.maximum(crop_width - image_width, 0) 112 | 113 | # Pad image with mean pixel value. 114 | mean_pixel = tf.reshape( 115 | feature_extractor.mean_pixel(model_variant), [1, 1, 3]) 116 | processed_image = preprocess_utils.pad_to_bounding_box( 117 | processed_image, 0, 0, target_height, target_width, mean_pixel) 118 | 119 | if label is not None: 120 | label = preprocess_utils.pad_to_bounding_box( 121 | label, 0, 0, target_height, target_width, ignore_label) 122 | 123 | # Randomly crop the image and label. 124 | if is_training and label is not None: 125 | processed_image, label = preprocess_utils.random_crop( 126 | [processed_image, label], crop_height, crop_width) 127 | 128 | processed_image.set_shape([crop_height, crop_width, 3]) 129 | 130 | if label is not None: 131 | label.set_shape([crop_height, crop_width, 1]) 132 | 133 | if is_training: 134 | # Randomly left-right flip the image and label. 135 | processed_image, label, _ = preprocess_utils.flip_dim( 136 | [processed_image, label], _PROB_OF_FLIP, dim=1) 137 | 138 | return original_image, processed_image, label 139 | 140 | -------------------------------------------------------------------------------- /deeplab/my_metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from tensorflow.python.ops import variable_scope 3 | from tensorflow.python.ops import math_ops 4 | from tensorflow.python.ops import array_ops 5 | from tensorflow.python.ops import state_ops 6 | from tensorflow.python.framework import ops 7 | from tensorflow.python.eager import context 8 | from tensorflow.python.framework import dtypes 9 | from tensorflow.python.ops import confusion_matrix 10 | 11 | 12 | def metric_variable(shape, dtype, validate_shape=True, name=None): 13 | """Create variable in `GraphKeys.(LOCAL|METRIC_VARIABLES`) collections.""" 14 | return variable_scope.variable( 15 | lambda: array_ops.zeros(shape, dtype), 16 | trainable=False, 17 | collections=[ 18 | ops.GraphKeys.LOCAL_VARIABLES, ops.GraphKeys.METRIC_VARIABLES 19 | ], 20 | validate_shape=validate_shape, 21 | name=name) 22 | 23 | 24 | def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None): 25 | """Calculate a streaming confusion matrix. 26 | Calculates a confusion matrix. For estimation over a stream of data, 27 | the function creates an `update_op` operation. 28 | Args: 29 | labels: A `Tensor` of ground truth labels with shape [batch size] and of 30 | type `int32` or `int64`. The tensor will be flattened if its rank > 1. 31 | predictions: A `Tensor` of prediction results for semantic labels, whose 32 | shape is [batch size] and type `int32` or `int64`. The tensor will be 33 | flattened if its rank > 1. 34 | num_classes: The possible number of labels the prediction task can 35 | have. This value must be provided, since a confusion matrix of 36 | dimension = [num_classes, num_classes] will be allocated. 37 | weights: Optional `Tensor` whose rank is either 0, or the same rank as 38 | `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 39 | be either `1`, or the same as the corresponding `labels` dimension). 40 | Returns: 41 | total_cm: A `Tensor` representing the confusion matrix. 42 | update_op: An operation that increments the confusion matrix. 43 | """ 44 | # Local variable to accumulate the predictions in the confusion matrix. 45 | total_cm = metric_variable( 46 | [num_classes, num_classes], dtypes.float64, name='total_confusion_matrix') 47 | 48 | # Cast the type to int64 required by confusion_matrix_ops. 49 | predictions = math_ops.to_int64(predictions) 50 | labels = math_ops.to_int64(labels) 51 | num_classes = math_ops.to_int64(num_classes) 52 | 53 | # Flatten the input if its rank > 1. 54 | if predictions.get_shape().ndims > 1: 55 | predictions = array_ops.reshape(predictions, [-1]) 56 | 57 | if labels.get_shape().ndims > 1: 58 | labels = array_ops.reshape(labels, [-1]) 59 | 60 | if (weights is not None) and (weights.get_shape().ndims > 1): 61 | weights = array_ops.reshape(weights, [-1]) 62 | 63 | # Accumulate the prediction to current confusion matrix. 64 | current_cm = confusion_matrix.confusion_matrix( 65 | labels, predictions, num_classes, weights=weights, dtype=dtypes.float64) 66 | update_op = state_ops.assign_add(total_cm, current_cm) 67 | return total_cm, update_op 68 | 69 | 70 | def _safe_div(numerator, denominator, name): 71 | """Divides two tensors element-wise, returning 0 if the denominator is <= 0. 72 | Args: 73 | numerator: A real `Tensor`. 74 | denominator: A real `Tensor`, with dtype matching `numerator`. 75 | name: Name for the returned op. 76 | Returns: 77 | 0 if `denominator` <= 0, else `numerator` / `denominator` 78 | """ 79 | t = math_ops.truediv(numerator, denominator) 80 | zero = array_ops.zeros_like(t, dtype=denominator.dtype) 81 | condition = math_ops.greater(denominator, zero) 82 | zero = math_ops.cast(zero, t.dtype) 83 | return array_ops.where(condition, t, zero, name=name) 84 | 85 | 86 | 87 | def iou(labels, 88 | predictions, 89 | num_classes, 90 | weights=None, 91 | metrics_collections=None, 92 | updates_collections=None, 93 | name=None): 94 | if context.executing_eagerly(): 95 | raise RuntimeError('tf.metrics.mean_iou is not supported when ' 96 | 'eager execution is enabled.') 97 | 98 | with variable_scope.variable_scope(name, 'iou', 99 | (predictions, labels, weights)): 100 | # Check if shape is compatible. 101 | predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 102 | 103 | total_cm, update_op = _streaming_confusion_matrix(labels, predictions, 104 | num_classes, weights) 105 | 106 | def compute_iou(name): 107 | """Compute the mean intersection-over-union via the confusion matrix.""" 108 | # 列向量求和 109 | sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0)) 110 | # 行向量求和 111 | sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1)) 112 | # 交集-对角线向量 113 | cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) 114 | # 并集-即混淆矩阵求和:列向量和+行向量和-对角线向量和 115 | denominator = sum_over_row + sum_over_col - cm_diag 116 | # The mean is only computed over classes that appear in the 117 | # label or prediction tensor. If the denominator is 0, we need to 118 | # ignore the class. 119 | num_valid_entries = math_ops.reduce_sum( 120 | math_ops.cast( 121 | math_ops.not_equal(denominator, 0), dtype=dtypes.float32)) 122 | 123 | # If the value of the denominator is 0, set it to 1 to avoid 124 | # zero division. 125 | denominator = array_ops.where( 126 | math_ops.greater(denominator, 0), denominator, 127 | array_ops.ones_like(denominator)) 128 | # iou即交并比: 交集/并集 129 | iou = math_ops.div(cm_diag, denominator) 130 | return iou 131 | 132 | iou_v = compute_iou('iou') 133 | 134 | if metrics_collections: 135 | ops.add_to_collections(metrics_collections, iou_v) 136 | 137 | if updates_collections: 138 | ops.add_to_collections(updates_collections, update_op) 139 | return iou_v, update_op 140 | -------------------------------------------------------------------------------- /deeplab/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/__init__.py -------------------------------------------------------------------------------- /deeplab/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/__init__.pyc -------------------------------------------------------------------------------- /deeplab/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/utils/__pycache__/input_generator.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/__pycache__/input_generator.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/utils/__pycache__/train_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/__pycache__/train_utils.cpython-36.pyc -------------------------------------------------------------------------------- /deeplab/utils/get_dataset_colormap_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for get_dataset_colormap.py.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from deeplab.utils import get_dataset_colormap 22 | 23 | 24 | class VisualizationUtilTest(tf.test.TestCase): 25 | 26 | def testBitGet(self): 27 | """Test that if the returned bit value is correct.""" 28 | self.assertEqual(1, get_dataset_colormap.bit_get(9, 0)) 29 | self.assertEqual(0, get_dataset_colormap.bit_get(9, 1)) 30 | self.assertEqual(0, get_dataset_colormap.bit_get(9, 2)) 31 | self.assertEqual(1, get_dataset_colormap.bit_get(9, 3)) 32 | 33 | def testPASCALLabelColorMapValue(self): 34 | """Test the getd color map value.""" 35 | colormap = get_dataset_colormap.create_pascal_label_colormap() 36 | 37 | # Only test a few sampled entries in the color map. 38 | self.assertTrue(np.array_equal([128., 0., 128.], colormap[5, :])) 39 | self.assertTrue(np.array_equal([128., 192., 128.], colormap[23, :])) 40 | self.assertTrue(np.array_equal([128., 0., 192.], colormap[37, :])) 41 | self.assertTrue(np.array_equal([224., 192., 192.], colormap[127, :])) 42 | self.assertTrue(np.array_equal([192., 160., 192.], colormap[175, :])) 43 | 44 | def testLabelToPASCALColorImage(self): 45 | """Test the value of the converted label value.""" 46 | label = np.array([[0, 16, 16], [52, 7, 52]]) 47 | expected_result = np.array([ 48 | [[0, 0, 0], [0, 64, 0], [0, 64, 0]], 49 | [[0, 64, 192], [128, 128, 128], [0, 64, 192]] 50 | ]) 51 | colored_label = get_dataset_colormap.label_to_color_image( 52 | label, get_dataset_colormap.get_pascal_name()) 53 | self.assertTrue(np.array_equal(expected_result, colored_label)) 54 | 55 | def testUnExpectedLabelValueForLabelToPASCALColorImage(self): 56 | """Raise ValueError when input value exceeds range.""" 57 | label = np.array([[120], [300]]) 58 | with self.assertRaises(ValueError): 59 | get_dataset_colormap.label_to_color_image( 60 | label, get_dataset_colormap.get_pascal_name()) 61 | 62 | def testUnExpectedLabelDimensionForLabelToPASCALColorImage(self): 63 | """Raise ValueError if input dimension is not correct.""" 64 | label = np.array([120]) 65 | with self.assertRaises(ValueError): 66 | get_dataset_colormap.label_to_color_image( 67 | label, get_dataset_colormap.get_pascal_name()) 68 | 69 | def testGetColormapForUnsupportedDataset(self): 70 | with self.assertRaises(ValueError): 71 | get_dataset_colormap.create_label_colormap('unsupported_dataset') 72 | 73 | def testUnExpectedLabelDimensionForLabelToADE20KColorImage(self): 74 | label = np.array([250]) 75 | with self.assertRaises(ValueError): 76 | get_dataset_colormap.label_to_color_image( 77 | label, get_dataset_colormap.get_ade20k_name()) 78 | 79 | def testFirstColorInADE20KColorMap(self): 80 | label = np.array([[1, 3], [10, 20]]) 81 | expected_result = np.array([ 82 | [[120, 120, 120], [6, 230, 230]], 83 | [[4, 250, 7], [204, 70, 3]] 84 | ]) 85 | colored_label = get_dataset_colormap.label_to_color_image( 86 | label, get_dataset_colormap.get_ade20k_name()) 87 | self.assertTrue(np.array_equal(colored_label, expected_result)) 88 | 89 | def testMapillaryVistasColorMapValue(self): 90 | colormap = get_dataset_colormap.create_mapillary_vistas_label_colormap() 91 | self.assertTrue(np.array_equal([190, 153, 153], colormap[3, :])) 92 | self.assertTrue(np.array_equal([102, 102, 156], colormap[6, :])) 93 | 94 | 95 | if __name__ == '__main__': 96 | tf.test.main() 97 | -------------------------------------------------------------------------------- /deeplab/utils/input_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Wrapper for providing semantic segmentation data.""" 16 | 17 | import tensorflow as tf 18 | from deeplab import common 19 | from deeplab import input_preprocess 20 | 21 | slim = tf.contrib.slim 22 | 23 | dataset_data_provider = slim.dataset_data_provider 24 | 25 | 26 | def _get_data(data_provider, dataset_split): 27 | """Gets data from data provider. 28 | 29 | Args: 30 | data_provider: An object of slim.data_provider. 31 | dataset_split: Dataset split. 32 | 33 | Returns: 34 | image: Image Tensor. 35 | label: Label Tensor storing segmentation annotations. 36 | image_name: Image name. 37 | height: Image height. 38 | width: Image width. 39 | 40 | Raises: 41 | ValueError: Failed to find label. 42 | """ 43 | if common.LABELS_CLASS not in data_provider.list_items(): 44 | raise ValueError('Failed to find labels.') 45 | 46 | image, height, width = data_provider.get( 47 | [common.IMAGE, common.HEIGHT, common.WIDTH]) 48 | 49 | # Some datasets do not contain image_name. 50 | if common.IMAGE_NAME in data_provider.list_items(): 51 | image_name, = data_provider.get([common.IMAGE_NAME]) 52 | else: 53 | image_name = tf.constant('') 54 | 55 | label = None 56 | if dataset_split != common.TEST_SET: 57 | label, = data_provider.get([common.LABELS_CLASS]) 58 | 59 | return image, label, image_name, height, width 60 | 61 | 62 | def get(dataset, 63 | crop_size, 64 | batch_size, 65 | min_resize_value=None, 66 | max_resize_value=None, 67 | resize_factor=None, 68 | min_scale_factor=1., 69 | max_scale_factor=1., 70 | scale_factor_step_size=0, 71 | num_readers=1, 72 | num_threads=1, 73 | dataset_split=None, 74 | is_training=True, 75 | model_variant=None): 76 | """Gets the dataset split for semantic segmentation. 77 | 78 | This functions gets the dataset split for semantic segmentation. In 79 | particular, it is a wrapper of (1) dataset_data_provider which returns the raw 80 | dataset split, (2) input_preprcess which preprocess the raw data, and (3) the 81 | Tensorflow operation of batching the preprocessed data. Then, the output could 82 | be directly used by training, evaluation or visualization. 83 | 84 | Args: 85 | dataset: An instance of slim Dataset. 86 | crop_size: Image crop size [height, width]. 87 | batch_size: Batch size. 88 | min_resize_value: Desired size of the smaller image side. 89 | max_resize_value: Maximum allowed size of the larger image side. 90 | resize_factor: Resized dimensions are multiple of factor plus one. 91 | min_scale_factor: Minimum scale factor value. 92 | max_scale_factor: Maximum scale factor value. 93 | scale_factor_step_size: The step size from min scale factor to max scale 94 | factor. The input is randomly scaled based on the value of 95 | (min_scale_factor, max_scale_factor, scale_factor_step_size). 96 | num_readers: Number of readers for data provider. 97 | num_threads: Number of threads for batching data. 98 | dataset_split: Dataset split. 99 | is_training: Is training or not. 100 | model_variant: Model variant (string) for choosing how to mean-subtract the 101 | images. See feature_extractor.network_map for supported model variants. 102 | 103 | Returns: 104 | A dictionary of batched Tensors for semantic segmentation. 105 | 106 | Raises: 107 | ValueError: dataset_split is None, failed to find labels, or label shape 108 | is not valid. 109 | """ 110 | if dataset_split is None: 111 | raise ValueError('Unknown dataset split.') 112 | if model_variant is None: 113 | tf.logging.warning('Please specify a model_variant. See ' 114 | 'feature_extractor.network_map for supported model ' 115 | 'variants.') 116 | 117 | data_provider = dataset_data_provider.DatasetDataProvider( 118 | dataset, 119 | num_readers=num_readers, 120 | num_epochs=None if is_training else 1, 121 | shuffle=is_training) 122 | image, label, image_name, height, width = _get_data(data_provider, 123 | dataset_split) 124 | if label is not None: 125 | if label.shape.ndims == 2: 126 | label = tf.expand_dims(label, 2) 127 | elif label.shape.ndims == 3 and label.shape.dims[2] == 1: 128 | pass 129 | else: 130 | raise ValueError('Input label shape must be [height, width], or ' 131 | '[height, width, 1].') 132 | 133 | label.set_shape([None, None, 1]) 134 | original_image, image, label = input_preprocess.preprocess_image_and_label( 135 | image, 136 | label, 137 | crop_height=crop_size[0], 138 | crop_width=crop_size[1], 139 | min_resize_value=min_resize_value, 140 | max_resize_value=max_resize_value, 141 | resize_factor=resize_factor, 142 | min_scale_factor=min_scale_factor, 143 | max_scale_factor=max_scale_factor, 144 | scale_factor_step_size=scale_factor_step_size, 145 | ignore_label=dataset.ignore_label, 146 | is_training=is_training, 147 | model_variant=model_variant) 148 | sample = { 149 | common.IMAGE: image, 150 | common.IMAGE_NAME: image_name, 151 | common.HEIGHT: height, 152 | common.WIDTH: width 153 | } 154 | if label is not None: 155 | sample[common.LABEL] = label 156 | 157 | if not is_training: 158 | # Original image is only used during visualization. 159 | sample[common.ORIGINAL_IMAGE] = original_image, 160 | num_threads = 1 161 | 162 | return tf.train.batch( 163 | sample, 164 | batch_size=batch_size, 165 | num_threads=num_threads, 166 | capacity=32 * batch_size, 167 | allow_smaller_final_batch=not is_training, 168 | dynamic_pad=True) 169 | -------------------------------------------------------------------------------- /deeplab/utils/input_generator.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/input_generator.pyc -------------------------------------------------------------------------------- /deeplab/utils/save_annotation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Saves an annotation as one png image. 16 | 17 | This script saves an annotation as one png image, and has the option to add 18 | colormap to the png image for better visualization. 19 | """ 20 | 21 | import numpy as np 22 | import PIL.Image as img 23 | import tensorflow as tf 24 | 25 | from deeplab.utils import get_dataset_colormap 26 | 27 | 28 | def save_annotation(label, 29 | save_dir, 30 | filename, 31 | add_colormap=True, 32 | colormap_type=get_dataset_colormap.get_pascal_name()): 33 | """Saves the given label to image on disk. 34 | 35 | Args: 36 | label: The numpy array to be saved. The data will be converted 37 | to uint8 and saved as png image. 38 | save_dir: The directory to which the results will be saved. 39 | filename: The image filename. 40 | add_colormap: Add color map to the label or not. 41 | colormap_type: Colormap type for visualization. 42 | """ 43 | # Add colormap for visualizing the prediction. 44 | if add_colormap: 45 | colored_label = get_dataset_colormap.label_to_color_image( 46 | label, colormap_type) 47 | else: 48 | colored_label = label 49 | 50 | pil_image = img.fromarray(colored_label.astype(dtype=np.uint8)) 51 | with tf.gfile.Open('%s/%s.png' % (save_dir, filename), mode='w') as f: 52 | pil_image.save(f, 'PNG') 53 | -------------------------------------------------------------------------------- /deeplab/utils/train_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/deeplab/utils/train_utils.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.1 2 | asn1crypto==0.24.0 3 | astor==0.8.0 4 | cryptography==2.1.4 5 | cycler==0.10.0 6 | decorator==4.4.2 7 | gast==0.2.2 8 | grpcio==1.22.0 9 | h5py==2.9.0 10 | idna==2.6 11 | imageio==2.9.0 12 | Keras-Applications==1.0.8 13 | Keras-Preprocessing==1.1.0 14 | keyring==10.6.0 15 | keyrings.alt==3.0 16 | kiwisolver==1.3.1 17 | Markdown==3.1.1 18 | matplotlib==3.3.4 19 | mock==3.0.5 20 | networkx==2.5.1 21 | numpy==1.16.4 22 | opencv-contrib-python==4.5.3.56 23 | Pillow==8.3.2 24 | protobuf==3.9.0 25 | pycrypto==2.6.1 26 | pygobject==3.26.1 27 | pyparsing==2.4.7 28 | python-apt==1.6.4 29 | python-dateutil==2.8.2 30 | PyWavelets==1.1.1 31 | pyxdg==0.25 32 | scikit-image==0.17.2 33 | scipy==1.5.4 34 | SecretStorage==2.3.1 35 | six==1.11.0 36 | tensorboard==1.13.1 37 | tensorflow-estimator==1.13.0 38 | tensorflow-gpu==1.13.2 39 | termcolor==1.1.0 40 | tifffile==2020.9.3 41 | Werkzeug==0.15.4 42 | -------------------------------------------------------------------------------- /scripts/extract_superpixels.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import argparse 4 | import cv2 as cv 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | from PIL import Image 10 | from skimage.segmentation import slic 11 | from skimage.segmentation import mark_boundaries 12 | from skimage import exposure 13 | 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--dataset_name', type=str, default='cityscapes') 17 | parser.add_argument('--split', type=str, default='train') 18 | parser.add_argument('--sp_method', type=str, default='seeds') 19 | parser.add_argument('--num_superpixels', type=int, default=8192) 20 | parser.add_argument('--resize_factor', type=int, default=1) 21 | FLAGS = parser.parse_args() 22 | 23 | 24 | def extract_superpixel_slic(image_name_path): 25 | method = 'slic' 26 | 27 | num_superpixels = FLAGS.num_superpixels 28 | sigma = 0 29 | superpixel_label_dir = './superpixels/{}/{}_{}/{}/label'.format(FLAGS.dataset_name, method, num_superpixels, FLAGS.split) 30 | if not os.path.exists(superpixel_label_dir): 31 | os.makedirs(superpixel_label_dir) 32 | superpixel_result_dir = './superpixels/{}/{}_{}/{}/result'.format(FLAGS.dataset_name, method, num_superpixels, FLAGS.split) 33 | if not os.path.exists(superpixel_result_dir): 34 | os.makedirs(superpixel_result_dir) 35 | 36 | max_n = 0 37 | nr_sample = 0 38 | for image_name, image_path in image_name_path.items(): 39 | 40 | print(image_name) 41 | #if os.path.exists(os.path.join(superpixel_label_dir, image_name + '.pkl')): 42 | # continue 43 | img = plt.imread(image_path) 44 | img_eq = exposure.equalize_hist(img) 45 | img_eq = img 46 | 47 | labels = slic(img_eq, n_segments = num_superpixels, sigma=sigma) 48 | result = mark_boundaries(img, labels) 49 | 50 | output_dic = {} 51 | output_dic['labels'] = labels.astype(np.int16) 52 | num_sp = labels.max() + 1 53 | 54 | if num_sp > max_n: 55 | max_n = num_sp 56 | 57 | output_dic['valid_idxes'] = np.unique(labels) 58 | 59 | pickle.dump(output_dic, open(os.path.join(superpixel_label_dir, image_name + '.pkl'), 'wb')) 60 | plt.imsave(os.path.join(superpixel_result_dir, image_name + '.jpg'), result) 61 | nr_sample += 1 62 | print(max_n) 63 | 64 | 65 | def extract_superpixel_seeds(image_name_path): 66 | method = 'seeds' 67 | 68 | prior = 3 69 | num_levels = 5 70 | num_histogram_bins = 10 71 | num_superpixels = FLAGS.num_superpixels 72 | 73 | superpixel_label_dir = './superpixels/{}/{}_{}/{}/label'.format(FLAGS.dataset_name, method, num_superpixels, FLAGS.split) 74 | if not os.path.exists(superpixel_label_dir): 75 | os.makedirs(superpixel_label_dir) 76 | superpixel_result_dir = './superpixels/{}/{}_{}/{}/result'.format(FLAGS.dataset_name, method, num_superpixels, FLAGS.split) 77 | if not os.path.exists(superpixel_result_dir): 78 | os.makedirs(superpixel_result_dir) 79 | 80 | max_n = 0 81 | nr_sample = 0 82 | for image_name, image_path in image_name_path.items(): 83 | #if nr_sample >= 100: break 84 | print(image_name) 85 | 86 | img = Image.open(image_path) 87 | width, height = img.size 88 | resize_factor = FLAGS.resize_factor 89 | img = img.convert('RGB').resize((width//resize_factor, height//resize_factor)) 90 | 91 | img_eq = exposure.equalize_hist(np.asarray(img)) 92 | #img_eq = img 93 | 94 | converted_img = cv.cvtColor(img_eq.astype(np.float32), cv.COLOR_RGB2HSV) 95 | height,width,channels = converted_img.shape 96 | seeds = cv.ximgproc.createSuperpixelSEEDS(width, height, channels, num_superpixels, num_levels, prior, num_histogram_bins, True) 97 | seeds.iterate(converted_img, 10) 98 | 99 | labels = seeds.getLabels() 100 | result = mark_boundaries(img_eq, labels) 101 | 102 | output_dic = {} 103 | output_dic['labels'] = labels.astype(np.int16) 104 | num_sp = labels.max() + 1 105 | 106 | if num_sp > max_n: 107 | max_n = num_sp 108 | 109 | output_dic['valid_idxes'] = np.unique(labels) 110 | 111 | pickle.dump(output_dic, open(os.path.join(superpixel_label_dir, image_name + '.pkl'), 'wb')) 112 | plt.imsave(os.path.join(superpixel_result_dir, image_name + '.jpg'), result) 113 | nr_sample += 1 114 | print(max_n) 115 | 116 | if __name__ == '__main__': 117 | 118 | if FLAGS.dataset_name == 'pascal_voc_seg': 119 | devkit_path = './deeplab/datasets/pascal_voc_seg/VOCdevkit/' 120 | image_dir = devkit_path + 'VOC2012/JPEGImages' 121 | imageset_path = devkit_path + 'VOC2012/ImageSets/Segmentation/%s.txt'%FLAGS.split 122 | elif FLAGS.dataset_name == 'cityscapes': 123 | devkit_path = './deeplab/datasets/cityscapes/' 124 | image_dir = devkit_path + 'leftImg8bit/' 125 | imageset_path = devkit_path + 'image_list/%s.txt'%FLAGS.split 126 | 127 | with open(imageset_path, 'r') as f: 128 | lines = f.readlines() 129 | image_list = [x.strip() for x in lines] 130 | 131 | image_name_path = {} 132 | 133 | if FLAGS.dataset_name == 'pascal_voc_seg': 134 | for image_name in image_list: 135 | image_path = os.path.join(image_dir, image_name + '.jpg') 136 | image_name_path[image_name] = image_path 137 | elif FLAGS.dataset_name == 'cityscapes': 138 | for image_name in image_list: 139 | parts = image_name.split("_") 140 | image_path = os.path.join(image_dir, FLAGS.split, parts[0], image_name + '_leftImg8bit.png') 141 | image_name_path[image_name] = image_path 142 | 143 | if FLAGS.sp_method == 'seeds': 144 | extract_superpixel_seeds(image_name_path) 145 | elif FLAGS.sp_method == 'slic': 146 | extract_superpixel_slic(image_name_path) 147 | else: 148 | print('%s not implemented'%FLAGS.sp_method) 149 | raise RuntimeError 150 | 151 | -------------------------------------------------------------------------------- /scripts/gen_oracle_spx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import ray 4 | import os 5 | import pickle 6 | import pathlib 7 | import io 8 | import re 9 | import collections 10 | import cv2 11 | from skimage import segmentation 12 | import scipy 13 | import heapq 14 | import math 15 | import shapely 16 | import shapely.ops 17 | 18 | from PIL import Image 19 | from PIL import ImageDraw 20 | from skimage import measure 21 | from matplotlib.colors import ListedColormap 22 | # from gen_spx_visualization import draw_map, draw_map_overlay 23 | 24 | _unknown_index, _known_index_start = 0, 1 25 | 26 | def find_contours_of_connected_components(boundaries_map): 27 | 28 | def find_contours(binary): 29 | # RETR_EXTERNAL: find external contours only. 30 | # CHAIN_APPROX_NONE: do not approx. 31 | contours, _ = cv2.findContours( 32 | binary, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE) 33 | return contours 34 | 35 | # we wanna assemble non-overlapping contours only. 36 | distinct_contours = [] 37 | def add(contour): 38 | for other in distinct_contours: 39 | if contour.contains(other): 40 | return False 41 | distinct_contours.append(contour); return True 42 | 43 | for value in np.unique(boundaries_map): 44 | # for each pixel `value`, there could be multiple contours 45 | binary_contours_map = np.array( 46 | (boundaries_map == value), dtype=np.uint8) 47 | for contour in find_contours(binary_contours_map): 48 | # for some reason cv2.findContours() returns each contour 49 | # as a 3d numpy array with one redundant dimension 50 | num_coords, one, two = contour.shape 51 | if one != 1 or two != 2: 52 | raise AssertionError( 53 | "unexpected error: cv2 api might have been changed") 54 | # remove redundant (singleton) dimension 55 | contour = contour.squeeze(axis=1) 56 | if num_coords > 2: 57 | add(shapely.geometry.Polygon(contour)) 58 | else: 59 | print("strange contour: %s" 60 | % list(tuple(xy) for xy in contour)) 61 | 62 | return distinct_contours 63 | 64 | 65 | def gt_connected_components_map(boundaries_map): 66 | 67 | # we just need the size of input image 68 | height, width = boundaries_map.shape 69 | # 32-bit signed interger pixels 70 | mode = "I" 71 | 72 | # placeholder with 'zero'-valued pixels as default 73 | components_map = Image.new(mode=mode, size=(width, height), 74 | color=_unknown_index) 75 | draw = ImageDraw.Draw(components_map, mode=mode) 76 | 77 | # find all distinct contours of connected components in 78 | # boundary-map. 79 | find_components = find_contours_of_connected_components 80 | distinct_contours = find_components(boundaries_map) 81 | 82 | # draw 83 | for index, contour in enumerate(distinct_contours, 84 | start=_known_index_start): 85 | draw.polygon(contour.exterior.coords, fill=index) 86 | 87 | return np.array(components_map, dtype=int) 88 | 89 | # we want all pixels to be assigned own colors 90 | # return segmentation.expand_labels( 91 | # np.array(components_map, dtype=int), distance=100) 92 | 93 | 94 | # ---------------------------------------------------------- 95 | 96 | 97 | import utils 98 | 99 | def save_region_map(dir, map, cmap, image_name, image_path): 100 | # draw_map_overlay( 101 | # Image.open(image_path), map, cmap, 0.5).save( 102 | # dir / ("result/%s.png" % image_name)) 103 | map = np.array(map, dtype=np.int16) 104 | with open(dir / ("label/%s.pkl" % image_name), "wb") as file: 105 | pickle.dump( 106 | {"labels": map, "valid_idxes": np.unique(map)}, file) 107 | 108 | def main(): 109 | 110 | from argparse import ArgumentParser 111 | from pathlib import Path 112 | 113 | parser = ArgumentParser() 114 | 115 | parser.add_argument("--dataset_name", required=True) 116 | parser.add_argument("--split", required=True) 117 | parser.add_argument("--outputs_dir", required=True, type=Path) 118 | 119 | conf = parser.parse_args() 120 | outputs_dir = conf.outputs_dir 121 | 122 | (outputs_dir / "label").mkdir(parents=True, exist_ok=True) 123 | (outputs_dir / "result").mkdir(parents=True, exist_ok=True) 124 | 125 | print("outputs saved to '%s'" % outputs_dir) 126 | 127 | # with open("distinct_rgb_colors_5000.pkl", "rb") as file: 128 | # cmap = ListedColormap(pickle.load(file), N=5000) 129 | 130 | def _save_region_map(dir, map, image_name, image_path): 131 | save_region_map(dir, map, None, image_name, image_path) 132 | 133 | print("color map loaded") 134 | 135 | @utils.map_and_reduce 136 | def cityscapes_gt_cc(hash, image_path, true_label_path): 137 | print(hash) 138 | 139 | boundaries_map = Image.open(true_label_path).convert("L") 140 | boundaries_map = np.array(boundaries_map) 141 | 142 | components_map = gt_connected_components_map(boundaries_map) 143 | _save_region_map(outputs_dir, components_map, hash, image_path) 144 | 145 | if conf.dataset_name == 'pascal_voc_seg': 146 | devkit_path = './deeplab/datasets/pascal_voc_seg/' 147 | image_dir = devkit_path + 'VOC2012/JPEGImages' 148 | imageset_path = devkit_path + 'VOC2012/ImageSets/Segmentation/%s.txt'%conf.split 149 | semantic_segmentation_folder = devkit_path + 'VOC2012/SegmentationClassRaw' 150 | num_class = 21 151 | elif conf.dataset_name == 'cityscapes': 152 | devkit_path = './deeplab/datasets/cityscapes/' 153 | image_dir = devkit_path + 'leftImg8bit/' 154 | imageset_path = devkit_path + 'image_list/%s.txt'%conf.split 155 | semantic_segmentation_folder = devkit_path + 'gtFine' 156 | num_class = 19 157 | 158 | with open(imageset_path, 'r') as f: 159 | image_list = [line.strip() for line in f] 160 | 161 | image_hash_path = collections.OrderedDict() 162 | if conf.dataset_name == 'pascal_voc_seg': 163 | for image_name in image_list: 164 | image_path = os.path.join(image_dir, image_name + '.jpg') 165 | true_label_path = Path(semantic_segmentation_folder) / (image_name + ".png") 166 | image_hash_path[image_name] = (image_path, true_label_path) 167 | elif conf.dataset_name == 'cityscapes': 168 | for image_name in image_list: 169 | parts = image_name.split("_") 170 | image_path = os.path.join(image_dir, conf.split, parts[0], image_name + '_leftImg8bit.png') 171 | true_label_path = Path(semantic_segmentation_folder) / conf.split / parts[0] / (image_name + "_gtFine_labelIds.png") 172 | image_hash_path[image_name] = (image_path, true_label_path) 173 | 174 | arg_tuples = [] 175 | for n, (hash, (path, true_label_path)) in enumerate(image_hash_path.items()): 176 | arg_tuples.append((hash, path, true_label_path)) 177 | for batch in utils.chunk(arg_tuples, 8): 178 | cityscapes_gt_cc(batch, 32) 179 | 180 | if __name__ == "__main__": 181 | main() 182 | 183 | -------------------------------------------------------------------------------- /scripts/region_selection_using_cb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jul 30 16:03:39 2019 4 | 5 | @author: lile 6 | """ 7 | import os 8 | import pickle 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | FLAGS = tf.app.flags.FLAGS 13 | tf.app.flags.DEFINE_integer('region_num_per_image', 1024, 'Number of regions per image.') 14 | tf.app.flags.DEFINE_integer('batch_id', 0, 'Batch id') 15 | tf.app.flags.DEFINE_integer('k', 1000, 'budget') 16 | tf.app.flags.DEFINE_integer('seed', 1, 'selected region number for a batch') 17 | tf.app.flags.DEFINE_integer('region_size', 32, 'selected region number for a batch') 18 | tf.app.flags.DEFINE_string('feat_version', 'v0', 'the feature used for comparing region similarity') 19 | 20 | tf.app.flags.DEFINE_string( 21 | 'list_folder', 22 | '', 23 | 'Folder containing lists for training and validation') 24 | 25 | tf.app.flags.DEFINE_string('region_uncert_dir', 26 | '/home/lile/Projects/active_seg/region_uncertainty/voc2012/xception_41/baseline_slide_window_k_1000_train_iter_30000_bn_False_random_False_nms_0.6/batch_1', 27 | 'Folder containing images.') 28 | 29 | tf.app.flags.DEFINE_string('region_idx_dir', 30 | '', 31 | 'selected region index') 32 | tf.app.flags.DEFINE_string( 33 | 'train_split', 34 | 'train', 35 | 'train split') 36 | 37 | tf.app.flags.DEFINE_string('valid_idx_dir', 38 | '/home/lile/Projects/active_seg/region_features/voc2012/resnet_v1_50', 39 | 'Folder containing images.') 40 | 41 | 42 | tf.app.flags.DEFINE_string( 43 | 'anno_cost_dir', 44 | 'False', 45 | 'Folder containing lists for training and validation') 46 | tf.app.flags.DEFINE_string( 47 | 'cost_type', 48 | 'rc', 49 | 'cost type: rc (region count), cc (click count)') 50 | 51 | tf.app.flags.DEFINE_string('is_bal', 52 | 'False', 53 | 'Folder containing images.') 54 | 55 | tf.app.flags.DEFINE_string('class_to_region_idx_path', 56 | '/home/lile/Projects/active_seg/region_features/voc2012/resnet_v1_50', 57 | 'Folder containing images.') 58 | 59 | def main(unused_argv): 60 | imageset_path = FLAGS.list_folder + '/%s.txt' % (FLAGS.train_split) 61 | 62 | with open(imageset_path, 'r') as f: 63 | lines = f.readlines() 64 | image_list = [x.strip() for x in lines] 65 | 66 | region_num = FLAGS.region_num_per_image * len(image_list) 67 | print(region_num) 68 | 69 | def normalize(array, array_min, array_max): 70 | array = (array - array_min) / (array_max - array_min) 71 | return array 72 | 73 | selected_idx_prev = [] 74 | if FLAGS.cost_type == 'cc': 75 | all_region_anno_cost = [] 76 | for image_name in image_list: 77 | all_region_anno_cost.append(pickle.load(open(os.path.join(FLAGS.anno_cost_dir, image_name + '.pkl'), 'rb'))) 78 | all_region_anno_cost = np.hstack(all_region_anno_cost) 79 | 80 | # prepare uncertainty 81 | all_region_uncertainty_norm = np.zeros((region_num,1), dtype = np.float32) 82 | 83 | all_region_uncertainty = [] 84 | for image_name in image_list: 85 | all_region_uncertainty.append(pickle.load(open(os.path.join(FLAGS.region_uncert_dir, image_name + '.pkl'), 'rb'))) 86 | all_region_uncertainty = np.hstack(all_region_uncertainty) 87 | 88 | print('all_region_uncertainty max is {}, min is {}'.format(all_region_uncertainty.max(), all_region_uncertainty.min())) 89 | all_region_uncertainty_norm = normalize(all_region_uncertainty, all_region_uncertainty.min(), all_region_uncertainty.max()) 90 | all_region_uncertainty_norm = all_region_uncertainty_norm.reshape(-1, 1) 91 | all_region_uncertainty_norm[selected_idx_prev] = 0 92 | 93 | if FLAGS.is_bal == 'True': 94 | class_to_region_idx = pickle.load(open(FLAGS.class_to_region_idx_path, 'rb')) 95 | 96 | num_class = len(class_to_region_idx.keys()) 97 | pixel_num_per_class = np.zeros((num_class,), dtype=np.int) 98 | total_pixel_num = 1024 * 2048 * 2975 # check !! 99 | for i in range(num_class): 100 | region_idx_and_size = np.array(class_to_region_idx[i]) 101 | if region_idx_and_size.shape[0] == 0: 102 | pixel_num_per_class[i] = 0 103 | else: 104 | pixel_num_per_class[i] = np.sum(region_idx_and_size[:, 1]) 105 | 106 | p = np.zeros((num_class,), dtype=np.float32) 107 | w = np.zeros((num_class,), dtype=np.float32) 108 | for i in range(num_class): 109 | p[i] = pixel_num_per_class[i] / total_pixel_num 110 | w[i]= np.exp(-p[i]) 111 | 112 | region_to_w = np.ones((region_num,1), dtype=np.float32) 113 | for i in range(num_class): 114 | for reg, size in class_to_region_idx[i]: # check !! 115 | region_to_w[reg] = w[i] 116 | 117 | selected_idx = [] 118 | cost = 0 119 | 120 | f = all_region_uncertainty_norm 121 | if FLAGS.is_bal == 'True': 122 | f *= region_to_w 123 | 124 | sort_idx = np.argsort(f, axis=None)[::-1] 125 | 126 | if FLAGS.cost_type == 'rc': 127 | selected_idx = sort_idx[:FLAGS.k] 128 | print('selected last value is %f'%f[sort_idx[FLAGS.k-1]]) 129 | else: 130 | p = 0 131 | while cost < FLAGS.k: 132 | sel_ind = sort_idx[p] 133 | assert sel_ind not in selected_idx 134 | assert sel_ind not in selected_idx_prev 135 | selected_idx.append(sel_ind) 136 | p +=1 137 | cost += all_region_anno_cost[sel_ind] 138 | print('selected last value is %f'%f[sel_ind]) 139 | 140 | image_name_selected_regions = {} 141 | for image_name in image_list: 142 | image_name_selected_regions[image_name] = [] 143 | 144 | selected_idx = np.array(selected_idx) 145 | print('selected %d region in batch %d'%(selected_idx.size, FLAGS.batch_id)) 146 | for i in selected_idx: 147 | image_id = i // FLAGS.region_num_per_image 148 | region_id = i % FLAGS.region_num_per_image 149 | 150 | image_name = image_list[image_id] 151 | image_name_selected_regions[image_name].append(region_id) 152 | 153 | pickle.dump(image_name_selected_regions, open(os.path.join(FLAGS.region_idx_dir, 'batch_{}.pkl'.format(FLAGS.batch_id)), 'wb')) 154 | pickle.dump(selected_idx, open(os.path.join(FLAGS.region_idx_dir, 'batch_{}_selected_idx.pkl'.format(FLAGS.batch_id)), 'wb')) 155 | 156 | if __name__ == '__main__': 157 | tf.app.run() 158 | -------------------------------------------------------------------------------- /scripts/region_selection_using_random.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Aug 5 12:33:14 2019 4 | 5 | @author: lile 6 | """ 7 | import os 8 | import pickle 9 | import random 10 | import tensorflow as tf 11 | import numpy as np 12 | 13 | FLAGS = tf.app.flags.FLAGS 14 | 15 | tf.app.flags.DEFINE_string('region_idx_dir', 16 | '', 17 | 'Folder containing selected region index.') 18 | 19 | tf.app.flags.DEFINE_string( 20 | 'list_folder', 21 | '', 22 | 'Folder containing lists for training and validation') 23 | 24 | tf.app.flags.DEFINE_integer('region_num_per_image', 20, 'Number of regions per image.') 25 | tf.app.flags.DEFINE_integer('batch_id', 0, 'batch id') 26 | tf.app.flags.DEFINE_integer('seed', 10, 'seed for random generator') 27 | 28 | tf.app.flags.DEFINE_integer('k', 5000, 'selected region number for a batch') 29 | tf.app.flags.DEFINE_string( 30 | 'train_split', 31 | 'train', 32 | 'split for training images') 33 | 34 | tf.app.flags.DEFINE_string('valid_idx_dir', 35 | '', 36 | 'valid region index for each image') 37 | 38 | tf.app.flags.DEFINE_string( 39 | 'anno_cost_dir', 40 | 'False', 41 | 'Folder containing annotation cost for each region') 42 | 43 | tf.app.flags.DEFINE_string( 44 | 'cost_type', 45 | 'rc', 46 | 'cost type: rc (region count), cc (click count)') 47 | 48 | def random_select(): 49 | 50 | imageset_path = FLAGS.list_folder + '/%s.txt' % (FLAGS.train_split) 51 | 52 | with open(imageset_path, 'r') as f: 53 | lines = f.readlines() 54 | image_list = [x.strip() for x in lines] 55 | 56 | image_name_selected_regions = {} 57 | for image_name in image_list: 58 | image_name_selected_regions[image_name] = [] 59 | 60 | print('region_num_per_image is %d'%FLAGS.region_num_per_image) 61 | if FLAGS.batch_id == 0: 62 | region_idx = [] 63 | n = 0 64 | for image_name in image_list: 65 | idxes = np.array(pickle.load(open(os.path.join(FLAGS.valid_idx_dir, image_name + '.pkl'), 'rb'))['valid_idxes']) 66 | idxes = idxes.astype(np.int32) 67 | idxes += n * FLAGS.region_num_per_image 68 | region_idx.append(idxes) 69 | n += 1 70 | 71 | random_idx = np.hstack(region_idx) 72 | random.seed(FLAGS.seed) 73 | random.shuffle(random_idx) 74 | print('total region number is %d'%random_idx.size) 75 | pickle.dump(random_idx, open(os.path.join(FLAGS.region_idx_dir, 'random_idx.pkl'), 'wb')) 76 | else: 77 | random_idx = pickle.load(open(os.path.join(FLAGS.region_idx_dir, 'random_idx.pkl'), 'rb')) 78 | 79 | sel_k = FLAGS.k 80 | if FLAGS.batch_id == 0: 81 | start_idx = 0 82 | else: 83 | start_idx = pickle.load(open(os.path.join(FLAGS.region_idx_dir, 'batch_{}_start_idx.pkl'.format(FLAGS.batch_id)), 'rb')) 84 | 85 | if FLAGS.cost_type == 'rc': 86 | 87 | selected_idx = random_idx[start_idx: start_idx + sel_k] 88 | pickle.dump(start_idx + sel_k, open(os.path.join(FLAGS.region_idx_dir, 'batch_{}_start_idx.pkl'.format(FLAGS.batch_id+1)), 'wb')) 89 | 90 | elif FLAGS.cost_type == 'cc': 91 | all_region_anno_cost = [] 92 | for image_name in image_list: 93 | all_region_anno_cost.append(pickle.load(open(os.path.join(FLAGS.anno_cost_dir, image_name + '.pkl'), 'rb'))) 94 | all_region_anno_cost = np.hstack(all_region_anno_cost) 95 | 96 | def get_selected_idx(start_idx, budget): 97 | selected_idx = [] 98 | cost = 0 99 | p = 0 100 | while cost < budget: 101 | cost += all_region_anno_cost[random_idx[start_idx+p]] 102 | selected_idx.append(random_idx[start_idx+p]) 103 | p += 1 104 | return np.array(selected_idx), p 105 | 106 | selected_idx, p = get_selected_idx(start_idx, sel_k) 107 | pickle.dump(start_idx + p, open(os.path.join(FLAGS.region_idx_dir, 'batch_{}_start_idx.pkl'.format(FLAGS.batch_id+1)), 'wb')) 108 | 109 | print('selected %d region in batch %d'%(selected_idx.size, FLAGS.batch_id)) 110 | for i in selected_idx: 111 | image_id = i // FLAGS.region_num_per_image 112 | region_id = i % FLAGS.region_num_per_image 113 | 114 | assert image_id in range(len(image_list)) 115 | 116 | image_name = image_list[image_id] 117 | image_name_selected_regions[image_name].append(region_id) 118 | 119 | pickle.dump(image_name_selected_regions, open(os.path.join(FLAGS.region_idx_dir, 'batch_{}.pkl'.format(FLAGS.batch_id)), 'wb')) 120 | pickle.dump(selected_idx, open(os.path.join(FLAGS.region_idx_dir, 'batch_{}_selected_idx.pkl'.format(FLAGS.batch_id)), 'wb')) 121 | 122 | def main(unused_argv): 123 | random_select() 124 | 125 | if __name__ == '__main__': 126 | tf.app.run() 127 | -------------------------------------------------------------------------------- /scripts/utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import functools 4 | import ray 5 | import math 6 | import numpy as np 7 | 8 | 9 | class Argumentor: 10 | 11 | r"""Argumenting context manager. 12 | This allows a user to specify a set of arguments which will 13 | be partially applied to each of the callables passed to this 14 | context manager. 15 | With this context, we can ensure that each callabe uses the 16 | same set of arguments and simplify the code:: 17 | add, sub = lambda x, y: x + y, lambda x, y: x - y 18 | # 15, -5 19 | with Argumentor([add, sub], y=10) as (add10, sub10): 20 | print("%d, %d" % (add10(5), sub10(5))) 21 | Arguments: 22 | function_or_functions (callable or iterable of callables): 23 | The target callable objects. These will be partially 24 | applied with the passed *args and **kwargs. 25 | """ 26 | 27 | def __init__(self, function_or_functions, *args, **kwargs): 28 | 29 | apply = functools.partial 30 | 31 | try: 32 | functions = iter(function_or_functions) 33 | except: 34 | self.function_apps = [ 35 | apply(function_or_functions, *args, **kwargs)] 36 | else: 37 | self.function_apps = [ 38 | apply(function, *args, **kwargs) 39 | for function in functions] 40 | 41 | def __enter__(self): 42 | try: 43 | [function] = self.function_apps 44 | except: 45 | return self 46 | else: 47 | return function 48 | 49 | def __iter__(self): 50 | return iter(self.function_apps) 51 | 52 | def __exit__(self, type, value, traceback): 53 | ... 54 | 55 | 56 | def chunk(l, n): 57 | u"""Partition `x` into `n` equal-sized (apporx.) bathes. 58 | """ 59 | d, r = divmod(len(l), n) 60 | for i in range(n): 61 | si = (d+1)*(i if i < r else r) + d*(0 if i < r else i - r) 62 | yield l[si:si+(d+1 if i < r else d)] 63 | 64 | 65 | def map_and_reduce(func): 66 | u"""Map and reduce for parallel execution. 67 | 68 | This is based on ray's multiprocessing framework, but 69 | over-parallelization is avoided with batching. 70 | 71 | """ 72 | 73 | def main(tuples, n): 74 | 75 | ray.init() 76 | 77 | @ray.remote 78 | def process_batch(batch): 79 | batch_return = [] 80 | for args in batch: 81 | batch_return.append(func(*args)) 82 | return batch_return 83 | 84 | # map 85 | futures = [] 86 | for batch in chunk(tuples, n): 87 | futures.append(process_batch.remote(batch)) 88 | # reduce 89 | results = ray.get(futures) 90 | 91 | ray.shutdown() 92 | return results 93 | 94 | return main -------------------------------------------------------------------------------- /slim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/__init__.py -------------------------------------------------------------------------------- /slim/deployment/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/deployment/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/deployment/__init__.pyc -------------------------------------------------------------------------------- /slim/deployment/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/deployment/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /slim/deployment/__pycache__/model_deploy.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/deployment/__pycache__/model_deploy.cpython-36.pyc -------------------------------------------------------------------------------- /slim/deployment/model_deploy.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/deployment/model_deploy.pyc -------------------------------------------------------------------------------- /slim/nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/nets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/__init__.pyc -------------------------------------------------------------------------------- /slim/nets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /slim/nets/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a model definition for AlexNet. 16 | 17 | This work was first described in: 18 | ImageNet Classification with Deep Convolutional Neural Networks 19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 | 21 | and later refined in: 22 | One weird trick for parallelizing convolutional neural networks 23 | Alex Krizhevsky, 2014 24 | 25 | Here we provide the implementation proposed in "One weird trick" and not 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 | 28 | Usage: 29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 | outputs, end_points = alexnet.alexnet_v2(inputs) 31 | 32 | @@alexnet_v2 33 | """ 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import tensorflow as tf 40 | 41 | slim = tf.contrib.slim 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 43 | 44 | 45 | def alexnet_v2_arg_scope(weight_decay=0.0005): 46 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 47 | activation_fn=tf.nn.relu, 48 | biases_initializer=tf.constant_initializer(0.1), 49 | weights_regularizer=slim.l2_regularizer(weight_decay)): 50 | with slim.arg_scope([slim.conv2d], padding='SAME'): 51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 52 | return arg_sc 53 | 54 | 55 | def alexnet_v2(inputs, 56 | num_classes=1000, 57 | is_training=True, 58 | dropout_keep_prob=0.5, 59 | spatial_squeeze=True, 60 | scope='alexnet_v2', 61 | global_pool=False): 62 | """AlexNet version 2. 63 | 64 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf 65 | Parameters from: 66 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 67 | layers-imagenet-1gpu.cfg 68 | 69 | Note: All the fully_connected layers have been transformed to conv2d layers. 70 | To use in classification mode, resize input to 224x224 or set 71 | global_pool=True. To use in fully convolutional mode, set 72 | spatial_squeeze to false. 73 | The LRN layers have been removed and change the initializers from 74 | random_normal_initializer to xavier_initializer. 75 | 76 | Args: 77 | inputs: a tensor of size [batch_size, height, width, channels]. 78 | num_classes: the number of predicted classes. If 0 or None, the logits layer 79 | is omitted and the input features to the logits layer are returned instead. 80 | is_training: whether or not the model is being trained. 81 | dropout_keep_prob: the probability that activations are kept in the dropout 82 | layers during training. 83 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 84 | logits. Useful to remove unnecessary dimensions for classification. 85 | scope: Optional scope for the variables. 86 | global_pool: Optional boolean flag. If True, the input to the classification 87 | layer is avgpooled to size 1x1, for any input size. (This is not part 88 | of the original AlexNet.) 89 | 90 | Returns: 91 | net: the output of the logits layer (if num_classes is a non-zero integer), 92 | or the non-dropped-out input to the logits layer (if num_classes is 0 93 | or None). 94 | end_points: a dict of tensors with intermediate activations. 95 | """ 96 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 97 | end_points_collection = sc.original_name_scope + '_end_points' 98 | # Collect outputs for conv2d, fully_connected and max_pool2d. 99 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 100 | outputs_collections=[end_points_collection]): 101 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 102 | scope='conv1') 103 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') 104 | net = slim.conv2d(net, 192, [5, 5], scope='conv2') 105 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') 106 | net = slim.conv2d(net, 384, [3, 3], scope='conv3') 107 | net = slim.conv2d(net, 384, [3, 3], scope='conv4') 108 | net = slim.conv2d(net, 256, [3, 3], scope='conv5') 109 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') 110 | 111 | # Use conv2d instead of fully_connected layers. 112 | with slim.arg_scope([slim.conv2d], 113 | weights_initializer=trunc_normal(0.005), 114 | biases_initializer=tf.constant_initializer(0.1)): 115 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID', 116 | scope='fc6') 117 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 118 | scope='dropout6') 119 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 120 | # Convert end_points_collection into a end_point dict. 121 | end_points = slim.utils.convert_collection_to_dict( 122 | end_points_collection) 123 | if global_pool: 124 | net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') 125 | end_points['global_pool'] = net 126 | if num_classes: 127 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 128 | scope='dropout7') 129 | net = slim.conv2d(net, num_classes, [1, 1], 130 | activation_fn=None, 131 | normalizer_fn=None, 132 | biases_initializer=tf.zeros_initializer(), 133 | scope='fc8') 134 | if spatial_squeeze: 135 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 136 | end_points[sc.name + '/fc8'] = net 137 | return net, end_points 138 | alexnet_v2.default_image_size = 224 139 | -------------------------------------------------------------------------------- /slim/nets/alexnet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/alexnet.pyc -------------------------------------------------------------------------------- /slim/nets/cifarnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the CIFAR-10 model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) 26 | 27 | 28 | def cifarnet(images, num_classes=10, is_training=False, 29 | dropout_keep_prob=0.5, 30 | prediction_fn=slim.softmax, 31 | scope='CifarNet'): 32 | """Creates a variant of the CifarNet model. 33 | 34 | Note that since the output is a set of 'logits', the values fall in the 35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 36 | probability distribution over the characters, one will need to convert them 37 | using the softmax function: 38 | 39 | logits = cifarnet.cifarnet(images, is_training=False) 40 | probabilities = tf.nn.softmax(logits) 41 | predictions = tf.argmax(logits, 1) 42 | 43 | Args: 44 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 45 | num_classes: the number of classes in the dataset. If 0 or None, the logits 46 | layer is omitted and the input features to the logits layer are returned 47 | instead. 48 | is_training: specifies whether or not we're currently training the model. 49 | This variable will determine the behaviour of the dropout layer. 50 | dropout_keep_prob: the percentage of activation values that are retained. 51 | prediction_fn: a function to get predictions out of logits. 52 | scope: Optional variable_scope. 53 | 54 | Returns: 55 | net: a 2D Tensor with the logits (pre-softmax activations) if num_classes 56 | is a non-zero integer, or the input to the logits layer if num_classes 57 | is 0 or None. 58 | end_points: a dictionary from components of the network to the corresponding 59 | activation. 60 | """ 61 | end_points = {} 62 | 63 | with tf.variable_scope(scope, 'CifarNet', [images]): 64 | net = slim.conv2d(images, 64, [5, 5], scope='conv1') 65 | end_points['conv1'] = net 66 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 67 | end_points['pool1'] = net 68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') 69 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 70 | end_points['conv2'] = net 71 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') 72 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 73 | end_points['pool2'] = net 74 | net = slim.flatten(net) 75 | end_points['Flatten'] = net 76 | net = slim.fully_connected(net, 384, scope='fc3') 77 | end_points['fc3'] = net 78 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 79 | scope='dropout3') 80 | net = slim.fully_connected(net, 192, scope='fc4') 81 | end_points['fc4'] = net 82 | if not num_classes: 83 | return net, end_points 84 | logits = slim.fully_connected(net, num_classes, 85 | biases_initializer=tf.zeros_initializer(), 86 | weights_initializer=trunc_normal(1/192.0), 87 | weights_regularizer=None, 88 | activation_fn=None, 89 | scope='logits') 90 | 91 | end_points['Logits'] = logits 92 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 93 | 94 | return logits, end_points 95 | cifarnet.default_image_size = 32 96 | 97 | 98 | def cifarnet_arg_scope(weight_decay=0.004): 99 | """Defines the default cifarnet argument scope. 100 | 101 | Args: 102 | weight_decay: The weight decay to use for regularizing the model. 103 | 104 | Returns: 105 | An `arg_scope` to use for the inception v3 model. 106 | """ 107 | with slim.arg_scope( 108 | [slim.conv2d], 109 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), 110 | activation_fn=tf.nn.relu): 111 | with slim.arg_scope( 112 | [slim.fully_connected], 113 | biases_initializer=tf.constant_initializer(0.1), 114 | weights_initializer=trunc_normal(0.04), 115 | weights_regularizer=slim.l2_regularizer(weight_decay), 116 | activation_fn=tf.nn.relu) as sc: 117 | return sc 118 | -------------------------------------------------------------------------------- /slim/nets/cifarnet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/cifarnet.pyc -------------------------------------------------------------------------------- /slim/nets/cyclegan_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for tensorflow.contrib.slim.nets.cyclegan.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from nets import cyclegan 24 | 25 | 26 | # TODO(joelshor): Add a test to check generator endpoints. 27 | class CycleganTest(tf.test.TestCase): 28 | 29 | def test_generator_inference(self): 30 | """Check one inference step.""" 31 | img_batch = tf.zeros([2, 32, 32, 3]) 32 | model_output, _ = cyclegan.cyclegan_generator_resnet(img_batch) 33 | with self.test_session() as sess: 34 | sess.run(tf.global_variables_initializer()) 35 | sess.run(model_output) 36 | 37 | def _test_generator_graph_helper(self, shape): 38 | """Check that generator can take small and non-square inputs.""" 39 | output_imgs, _ = cyclegan.cyclegan_generator_resnet(tf.ones(shape)) 40 | self.assertAllEqual(shape, output_imgs.shape.as_list()) 41 | 42 | def test_generator_graph_small(self): 43 | self._test_generator_graph_helper([4, 32, 32, 3]) 44 | 45 | def test_generator_graph_medium(self): 46 | self._test_generator_graph_helper([3, 128, 128, 3]) 47 | 48 | def test_generator_graph_nonsquare(self): 49 | self._test_generator_graph_helper([2, 80, 400, 3]) 50 | 51 | def test_generator_unknown_batch_dim(self): 52 | """Check that generator can take unknown batch dimension inputs.""" 53 | img = tf.placeholder(tf.float32, shape=[None, 32, None, 3]) 54 | output_imgs, _ = cyclegan.cyclegan_generator_resnet(img) 55 | 56 | self.assertAllEqual([None, 32, None, 3], output_imgs.shape.as_list()) 57 | 58 | def _input_and_output_same_shape_helper(self, kernel_size): 59 | img_batch = tf.placeholder(tf.float32, shape=[None, 32, 32, 3]) 60 | output_img_batch, _ = cyclegan.cyclegan_generator_resnet( 61 | img_batch, kernel_size=kernel_size) 62 | 63 | self.assertAllEqual(img_batch.shape.as_list(), 64 | output_img_batch.shape.as_list()) 65 | 66 | def input_and_output_same_shape_kernel3(self): 67 | self._input_and_output_same_shape_helper(3) 68 | 69 | def input_and_output_same_shape_kernel4(self): 70 | self._input_and_output_same_shape_helper(4) 71 | 72 | def input_and_output_same_shape_kernel5(self): 73 | self._input_and_output_same_shape_helper(5) 74 | 75 | def input_and_output_same_shape_kernel6(self): 76 | self._input_and_output_same_shape_helper(6) 77 | 78 | def _error_if_height_not_multiple_of_four_helper(self, height): 79 | self.assertRaisesRegexp( 80 | ValueError, 81 | 'The input height must be a multiple of 4.', 82 | cyclegan.cyclegan_generator_resnet, 83 | tf.placeholder(tf.float32, shape=[None, height, 32, 3])) 84 | 85 | def test_error_if_height_not_multiple_of_four_height29(self): 86 | self._error_if_height_not_multiple_of_four_helper(29) 87 | 88 | def test_error_if_height_not_multiple_of_four_height30(self): 89 | self._error_if_height_not_multiple_of_four_helper(30) 90 | 91 | def test_error_if_height_not_multiple_of_four_height31(self): 92 | self._error_if_height_not_multiple_of_four_helper(31) 93 | 94 | def _error_if_width_not_multiple_of_four_helper(self, width): 95 | self.assertRaisesRegexp( 96 | ValueError, 97 | 'The input width must be a multiple of 4.', 98 | cyclegan.cyclegan_generator_resnet, 99 | tf.placeholder(tf.float32, shape=[None, 32, width, 3])) 100 | 101 | def test_error_if_width_not_multiple_of_four_width29(self): 102 | self._error_if_width_not_multiple_of_four_helper(29) 103 | 104 | def test_error_if_width_not_multiple_of_four_width30(self): 105 | self._error_if_width_not_multiple_of_four_helper(30) 106 | 107 | def test_error_if_width_not_multiple_of_four_width31(self): 108 | self._error_if_width_not_multiple_of_four_helper(31) 109 | 110 | 111 | if __name__ == '__main__': 112 | tf.test.main() 113 | -------------------------------------------------------------------------------- /slim/nets/dcgan_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for dcgan.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from six.moves import xrange # pylint: disable=redefined-builtin 22 | import tensorflow as tf 23 | 24 | from nets import dcgan 25 | 26 | 27 | class DCGANTest(tf.test.TestCase): 28 | 29 | def test_generator_run(self): 30 | tf.set_random_seed(1234) 31 | noise = tf.random_normal([100, 64]) 32 | image, _ = dcgan.generator(noise) 33 | with self.test_session() as sess: 34 | sess.run(tf.global_variables_initializer()) 35 | image.eval() 36 | 37 | def test_generator_graph(self): 38 | tf.set_random_seed(1234) 39 | # Check graph construction for a number of image size/depths and batch 40 | # sizes. 41 | for i, batch_size in zip(xrange(3, 7), xrange(3, 8)): 42 | tf.reset_default_graph() 43 | final_size = 2 ** i 44 | noise = tf.random_normal([batch_size, 64]) 45 | image, end_points = dcgan.generator( 46 | noise, 47 | depth=32, 48 | final_size=final_size) 49 | 50 | self.assertAllEqual([batch_size, final_size, final_size, 3], 51 | image.shape.as_list()) 52 | 53 | expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits'] 54 | self.assertSetEqual(set(expected_names), set(end_points.keys())) 55 | 56 | # Check layer depths. 57 | for j in range(1, i): 58 | layer = end_points['deconv%i' % j] 59 | self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1]) 60 | 61 | def test_generator_invalid_input(self): 62 | wrong_dim_input = tf.zeros([5, 32, 32]) 63 | with self.assertRaises(ValueError): 64 | dcgan.generator(wrong_dim_input) 65 | 66 | correct_input = tf.zeros([3, 2]) 67 | with self.assertRaisesRegexp(ValueError, 'must be a power of 2'): 68 | dcgan.generator(correct_input, final_size=30) 69 | 70 | with self.assertRaisesRegexp(ValueError, 'must be greater than 8'): 71 | dcgan.generator(correct_input, final_size=4) 72 | 73 | def test_discriminator_run(self): 74 | image = tf.random_uniform([5, 32, 32, 3], -1, 1) 75 | output, _ = dcgan.discriminator(image) 76 | with self.test_session() as sess: 77 | sess.run(tf.global_variables_initializer()) 78 | output.eval() 79 | 80 | def test_discriminator_graph(self): 81 | # Check graph construction for a number of image size/depths and batch 82 | # sizes. 83 | for i, batch_size in zip(xrange(1, 6), xrange(3, 8)): 84 | tf.reset_default_graph() 85 | img_w = 2 ** i 86 | image = tf.random_uniform([batch_size, img_w, img_w, 3], -1, 1) 87 | output, end_points = dcgan.discriminator( 88 | image, 89 | depth=32) 90 | 91 | self.assertAllEqual([batch_size, 1], output.get_shape().as_list()) 92 | 93 | expected_names = ['conv%i' % j for j in xrange(1, i+1)] + ['logits'] 94 | self.assertSetEqual(set(expected_names), set(end_points.keys())) 95 | 96 | # Check layer depths. 97 | for j in range(1, i+1): 98 | layer = end_points['conv%i' % j] 99 | self.assertEqual(32 * 2**(j-1), layer.get_shape().as_list()[-1]) 100 | 101 | def test_discriminator_invalid_input(self): 102 | wrong_dim_img = tf.zeros([5, 32, 32]) 103 | with self.assertRaises(ValueError): 104 | dcgan.discriminator(wrong_dim_img) 105 | 106 | spatially_undefined_shape = tf.placeholder(tf.float32, [5, 32, None, 3]) 107 | with self.assertRaises(ValueError): 108 | dcgan.discriminator(spatially_undefined_shape) 109 | 110 | not_square = tf.zeros([5, 32, 16, 3]) 111 | with self.assertRaisesRegexp(ValueError, 'not have equal width and height'): 112 | dcgan.discriminator(not_square) 113 | 114 | not_power_2 = tf.zeros([5, 30, 30, 3]) 115 | with self.assertRaisesRegexp(ValueError, 'not a power of 2'): 116 | dcgan.discriminator(not_power_2) 117 | 118 | 119 | if __name__ == '__main__': 120 | tf.test.main() 121 | -------------------------------------------------------------------------------- /slim/nets/i3d.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the definition for Inflated 3D Inception V1 (I3D). 16 | 17 | The network architecture is proposed by: 18 | Joao Carreira and Andrew Zisserman, 19 | Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset. 20 | https://arxiv.org/abs/1705.07750 21 | """ 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | from nets import i3d_utils 30 | from nets import s3dg 31 | 32 | slim = tf.contrib.slim 33 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 34 | conv3d_spatiotemporal = i3d_utils.conv3d_spatiotemporal 35 | 36 | 37 | def i3d_arg_scope(weight_decay=1e-7, 38 | batch_norm_decay=0.999, 39 | batch_norm_epsilon=0.001, 40 | use_renorm=False, 41 | separable_conv3d=False): 42 | """Defines default arg_scope for I3D. 43 | 44 | Args: 45 | weight_decay: The weight decay to use for regularizing the model. 46 | batch_norm_decay: Decay for batch norm moving average. 47 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 48 | in batch norm. 49 | use_renorm: Whether to use batch renormalization or not. 50 | separable_conv3d: Whether to use separable 3d Convs. 51 | 52 | Returns: 53 | sc: An arg_scope to use for the models. 54 | """ 55 | batch_norm_params = { 56 | # Decay for the moving averages. 57 | 'decay': batch_norm_decay, 58 | # epsilon to prevent 0s in variance. 59 | 'epsilon': batch_norm_epsilon, 60 | # Turns off fused batch norm. 61 | 'fused': False, 62 | 'renorm': use_renorm, 63 | # collection containing the moving mean and moving variance. 64 | 'variables_collections': { 65 | 'beta': None, 66 | 'gamma': None, 67 | 'moving_mean': ['moving_vars'], 68 | 'moving_variance': ['moving_vars'], 69 | } 70 | } 71 | 72 | with slim.arg_scope( 73 | [slim.conv3d, conv3d_spatiotemporal], 74 | weights_regularizer=slim.l2_regularizer(weight_decay), 75 | activation_fn=tf.nn.relu, 76 | normalizer_fn=slim.batch_norm, 77 | normalizer_params=batch_norm_params): 78 | with slim.arg_scope( 79 | [conv3d_spatiotemporal], separable=separable_conv3d) as sc: 80 | return sc 81 | 82 | 83 | def i3d_base(inputs, final_endpoint='Mixed_5c', 84 | scope='InceptionV1'): 85 | """Defines the I3D base architecture. 86 | 87 | Note that we use the names as defined in Inception V1 to facilitate checkpoint 88 | conversion from an image-trained Inception V1 checkpoint to I3D checkpoint. 89 | 90 | Args: 91 | inputs: A 5-D float tensor of size [batch_size, num_frames, height, width, 92 | channels]. 93 | final_endpoint: Specifies the endpoint to construct the network up to. It 94 | can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 95 | 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 96 | 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 97 | 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] 98 | scope: Optional variable_scope. 99 | 100 | Returns: 101 | A dictionary from components of the network to the corresponding activation. 102 | 103 | Raises: 104 | ValueError: if final_endpoint is not set to one of the predefined values. 105 | """ 106 | 107 | return s3dg.s3dg_base( 108 | inputs, 109 | first_temporal_kernel_size=7, 110 | temporal_conv_startat='Conv2d_2c_3x3', 111 | gating_startat=None, 112 | final_endpoint=final_endpoint, 113 | min_depth=16, 114 | depth_multiplier=1.0, 115 | data_format='NDHWC', 116 | scope=scope) 117 | 118 | 119 | def i3d(inputs, 120 | num_classes=1000, 121 | dropout_keep_prob=0.8, 122 | is_training=True, 123 | prediction_fn=slim.softmax, 124 | spatial_squeeze=True, 125 | reuse=None, 126 | scope='InceptionV1'): 127 | """Defines the I3D architecture. 128 | 129 | The default image size used to train this network is 224x224. 130 | 131 | Args: 132 | inputs: A 5-D float tensor of size [batch_size, num_frames, height, width, 133 | channels]. 134 | num_classes: number of predicted classes. 135 | dropout_keep_prob: the percentage of activation values that are retained. 136 | is_training: whether is training or not. 137 | prediction_fn: a function to get predictions out of logits. 138 | spatial_squeeze: if True, logits is of shape is [B, C], if false logits is 139 | of shape [B, 1, 1, C], where B is batch_size and C is number of classes. 140 | reuse: whether or not the network and its variables should be reused. To be 141 | able to reuse 'scope' must be given. 142 | scope: Optional variable_scope. 143 | 144 | Returns: 145 | logits: the pre-softmax activations, a tensor of size 146 | [batch_size, num_classes] 147 | end_points: a dictionary from components of the network to the corresponding 148 | activation. 149 | """ 150 | # Final pooling and prediction 151 | with tf.variable_scope( 152 | scope, 'InceptionV1', [inputs, num_classes], reuse=reuse) as scope: 153 | with slim.arg_scope( 154 | [slim.batch_norm, slim.dropout], is_training=is_training): 155 | net, end_points = i3d_base(inputs, scope=scope) 156 | with tf.variable_scope('Logits'): 157 | kernel_size = i3d_utils.reduced_kernel_size_3d(net, [2, 7, 7]) 158 | net = slim.avg_pool3d( 159 | net, kernel_size, stride=1, scope='AvgPool_0a_7x7') 160 | net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b') 161 | logits = slim.conv3d( 162 | net, 163 | num_classes, [1, 1, 1], 164 | activation_fn=None, 165 | normalizer_fn=None, 166 | scope='Conv2d_0c_1x1') 167 | # Temporal average pooling. 168 | logits = tf.reduce_mean(logits, axis=1) 169 | if spatial_squeeze: 170 | logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') 171 | 172 | end_points['Logits'] = logits 173 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 174 | return logits, end_points 175 | 176 | 177 | i3d.default_image_size = 224 178 | -------------------------------------------------------------------------------- /slim/nets/i3d.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/i3d.pyc -------------------------------------------------------------------------------- /slim/nets/i3d_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for networks.i3d.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from nets import i3d 24 | 25 | 26 | class I3DTest(tf.test.TestCase): 27 | 28 | def testBuildClassificationNetwork(self): 29 | batch_size = 5 30 | num_frames = 64 31 | height, width = 224, 224 32 | num_classes = 1000 33 | 34 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 35 | logits, end_points = i3d.i3d(inputs, num_classes) 36 | self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | self.assertTrue('Predictions' in end_points) 40 | self.assertListEqual(end_points['Predictions'].get_shape().as_list(), 41 | [batch_size, num_classes]) 42 | 43 | def testBuildBaseNetwork(self): 44 | batch_size = 5 45 | num_frames = 64 46 | height, width = 224, 224 47 | 48 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 49 | mixed_6c, end_points = i3d.i3d_base(inputs) 50 | self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c')) 51 | self.assertListEqual(mixed_6c.get_shape().as_list(), 52 | [batch_size, 8, 7, 7, 1024]) 53 | expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 54 | 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 55 | 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 56 | 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 57 | 'Mixed_5b', 'Mixed_5c'] 58 | self.assertItemsEqual(end_points.keys(), expected_endpoints) 59 | 60 | def testBuildOnlyUptoFinalEndpoint(self): 61 | batch_size = 5 62 | num_frames = 64 63 | height, width = 224, 224 64 | endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 65 | 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 66 | 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 67 | 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 68 | 'Mixed_5c'] 69 | for index, endpoint in enumerate(endpoints): 70 | with tf.Graph().as_default(): 71 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 72 | out_tensor, end_points = i3d.i3d_base( 73 | inputs, final_endpoint=endpoint) 74 | self.assertTrue(out_tensor.op.name.startswith( 75 | 'InceptionV1/' + endpoint)) 76 | self.assertItemsEqual(endpoints[:index+1], end_points) 77 | 78 | def testBuildAndCheckAllEndPointsUptoMixed5c(self): 79 | batch_size = 5 80 | num_frames = 64 81 | height, width = 224, 224 82 | 83 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 84 | _, end_points = i3d.i3d_base(inputs, 85 | final_endpoint='Mixed_5c') 86 | endpoints_shapes = {'Conv2d_1a_7x7': [5, 32, 112, 112, 64], 87 | 'MaxPool_2a_3x3': [5, 32, 56, 56, 64], 88 | 'Conv2d_2b_1x1': [5, 32, 56, 56, 64], 89 | 'Conv2d_2c_3x3': [5, 32, 56, 56, 192], 90 | 'MaxPool_3a_3x3': [5, 32, 28, 28, 192], 91 | 'Mixed_3b': [5, 32, 28, 28, 256], 92 | 'Mixed_3c': [5, 32, 28, 28, 480], 93 | 'MaxPool_4a_3x3': [5, 16, 14, 14, 480], 94 | 'Mixed_4b': [5, 16, 14, 14, 512], 95 | 'Mixed_4c': [5, 16, 14, 14, 512], 96 | 'Mixed_4d': [5, 16, 14, 14, 512], 97 | 'Mixed_4e': [5, 16, 14, 14, 528], 98 | 'Mixed_4f': [5, 16, 14, 14, 832], 99 | 'MaxPool_5a_2x2': [5, 8, 7, 7, 832], 100 | 'Mixed_5b': [5, 8, 7, 7, 832], 101 | 'Mixed_5c': [5, 8, 7, 7, 1024]} 102 | 103 | self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) 104 | for endpoint_name, expected_shape in endpoints_shapes.iteritems(): 105 | self.assertTrue(endpoint_name in end_points) 106 | self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), 107 | expected_shape) 108 | 109 | def testHalfSizeImages(self): 110 | batch_size = 5 111 | num_frames = 64 112 | height, width = 112, 112 113 | 114 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 115 | mixed_5c, _ = i3d.i3d_base(inputs) 116 | self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) 117 | self.assertListEqual(mixed_5c.get_shape().as_list(), 118 | [batch_size, 8, 4, 4, 1024]) 119 | 120 | def testTenFrames(self): 121 | batch_size = 5 122 | num_frames = 10 123 | height, width = 224, 224 124 | 125 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 126 | mixed_5c, _ = i3d.i3d_base(inputs) 127 | self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) 128 | self.assertListEqual(mixed_5c.get_shape().as_list(), 129 | [batch_size, 2, 7, 7, 1024]) 130 | 131 | def testEvaluation(self): 132 | batch_size = 2 133 | num_frames = 64 134 | height, width = 224, 224 135 | num_classes = 1000 136 | 137 | eval_inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 138 | logits, _ = i3d.i3d(eval_inputs, num_classes, 139 | is_training=False) 140 | predictions = tf.argmax(logits, 1) 141 | 142 | with self.test_session() as sess: 143 | sess.run(tf.global_variables_initializer()) 144 | output = sess.run(predictions) 145 | self.assertEquals(output.shape, (batch_size,)) 146 | 147 | 148 | if __name__ == '__main__': 149 | tf.test.main() 150 | -------------------------------------------------------------------------------- /slim/nets/i3d_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/i3d_utils.pyc -------------------------------------------------------------------------------- /slim/nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings all inception models under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from nets.inception_resnet_v2 import inception_resnet_v2 23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from nets.inception_resnet_v2 import inception_resnet_v2_base 25 | from nets.inception_v1 import inception_v1 26 | from nets.inception_v1 import inception_v1_arg_scope 27 | from nets.inception_v1 import inception_v1_base 28 | from nets.inception_v2 import inception_v2 29 | from nets.inception_v2 import inception_v2_arg_scope 30 | from nets.inception_v2 import inception_v2_base 31 | from nets.inception_v3 import inception_v3 32 | from nets.inception_v3 import inception_v3_arg_scope 33 | from nets.inception_v3 import inception_v3_base 34 | from nets.inception_v4 import inception_v4 35 | from nets.inception_v4 import inception_v4_arg_scope 36 | from nets.inception_v4 import inception_v4_base 37 | # pylint: enable=unused-import 38 | -------------------------------------------------------------------------------- /slim/nets/inception.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception.pyc -------------------------------------------------------------------------------- /slim/nets/inception_resnet_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception_resnet_v2.pyc -------------------------------------------------------------------------------- /slim/nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001, 36 | activation_fn=tf.nn.relu, 37 | batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS, 38 | batch_norm_scale=False): 39 | """Defines the default arg scope for inception models. 40 | 41 | Args: 42 | weight_decay: The weight decay to use for regularizing the model. 43 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 44 | batch_norm_decay: Decay for batch norm moving average. 45 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 46 | in batch norm. 47 | activation_fn: Activation function for conv2d. 48 | batch_norm_updates_collections: Collection for the update ops for 49 | batch norm. 50 | batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the 51 | activations in the batch normalization layer. 52 | 53 | Returns: 54 | An `arg_scope` to use for the inception models. 55 | """ 56 | batch_norm_params = { 57 | # Decay for the moving averages. 58 | 'decay': batch_norm_decay, 59 | # epsilon to prevent 0s in variance. 60 | 'epsilon': batch_norm_epsilon, 61 | # collection containing update_ops. 62 | 'updates_collections': batch_norm_updates_collections, 63 | # use fused batch norm if possible. 64 | 'fused': None, 65 | 'scale': batch_norm_scale, 66 | } 67 | if use_batch_norm: 68 | normalizer_fn = slim.batch_norm 69 | normalizer_params = batch_norm_params 70 | else: 71 | normalizer_fn = None 72 | normalizer_params = {} 73 | # Set weight_decay for weights in Conv and FC layers. 74 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 75 | weights_regularizer=slim.l2_regularizer(weight_decay)): 76 | with slim.arg_scope( 77 | [slim.conv2d], 78 | weights_initializer=slim.variance_scaling_initializer(), 79 | activation_fn=activation_fn, 80 | normalizer_fn=normalizer_fn, 81 | normalizer_params=normalizer_params) as sc: 82 | return sc 83 | -------------------------------------------------------------------------------- /slim/nets/inception_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception_utils.pyc -------------------------------------------------------------------------------- /slim/nets/inception_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception_v1.pyc -------------------------------------------------------------------------------- /slim/nets/inception_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception_v2.pyc -------------------------------------------------------------------------------- /slim/nets/inception_v3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception_v3.pyc -------------------------------------------------------------------------------- /slim/nets/inception_v4.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/inception_v4.pyc -------------------------------------------------------------------------------- /slim/nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. If 0 or None, the logits 44 | layer is omitted and the input features to the logits layer are returned 45 | instead. 46 | is_training: specifies whether or not we're currently training the model. 47 | This variable will determine the behaviour of the dropout layer. 48 | dropout_keep_prob: the percentage of activation values that are retained. 49 | prediction_fn: a function to get predictions out of logits. 50 | scope: Optional variable_scope. 51 | 52 | Returns: 53 | net: a 2D Tensor with the logits (pre-softmax activations) if num_classes 54 | is a non-zero integer, or the inon-dropped-out nput to the logits layer 55 | if num_classes is 0 or None. 56 | end_points: a dictionary from components of the network to the corresponding 57 | activation. 58 | """ 59 | end_points = {} 60 | 61 | with tf.variable_scope(scope, 'LeNet', [images]): 62 | net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1') 63 | net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 64 | net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2') 65 | net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 66 | net = slim.flatten(net) 67 | end_points['Flatten'] = net 68 | 69 | net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3') 70 | if not num_classes: 71 | return net, end_points 72 | net = end_points['dropout3'] = slim.dropout( 73 | net, dropout_keep_prob, is_training=is_training, scope='dropout3') 74 | logits = end_points['Logits'] = slim.fully_connected( 75 | net, num_classes, activation_fn=None, scope='fc4') 76 | 77 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 78 | 79 | return logits, end_points 80 | lenet.default_image_size = 28 81 | 82 | 83 | def lenet_arg_scope(weight_decay=0.0): 84 | """Defines the default lenet argument scope. 85 | 86 | Args: 87 | weight_decay: The weight decay to use for regularizing the model. 88 | 89 | Returns: 90 | An `arg_scope` to use for the inception v3 model. 91 | """ 92 | with slim.arg_scope( 93 | [slim.conv2d, slim.fully_connected], 94 | weights_regularizer=slim.l2_regularizer(weight_decay), 95 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 96 | activation_fn=tf.nn.relu) as sc: 97 | return sc 98 | -------------------------------------------------------------------------------- /slim/nets/lenet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/lenet.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/README.md: -------------------------------------------------------------------------------- 1 | # MobileNetV2 2 | This folder contains building code for MobileNetV2, based on 3 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) 4 | 5 | # Performance 6 | ## Latency 7 | This is the timing of [MobileNetV1](../mobilenet_v1.md) vs MobileNetV2 using 8 | TF-Lite on the large core of Pixel 1 phone. 9 | 10 | ![mnet_v1_vs_v2_pixel1_latency.png](mnet_v1_vs_v2_pixel1_latency.png) 11 | 12 | ## MACs 13 | MACs, also sometimes known as MADDs - the number of multiply-accumulates needed 14 | to compute an inference on a single image is a common metric to measure the efficiency of the model. 15 | 16 | Below is the graph comparing V2 vs a few selected networks. The size 17 | of each blob represents the number of parameters. Note for [ShuffleNet](https://arxiv.org/abs/1707.01083) there 18 | are no published size numbers. We estimate it to be comparable to MobileNetV2 numbers. 19 | 20 | ![madds_top1_accuracy](madds_top1_accuracy.png) 21 | 22 | # Pretrained models 23 | ## Imagenet Checkpoints 24 | 25 | Classification Checkpoint | MACs (M)| Parameters (M)| Top 1 Accuracy| Top 5 Accuracy | Mobile CPU (ms) Pixel 1 26 | ---------------------------|---------|---------------|---------|----|------------- 27 | | [mobilenet_v2_1.4_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz) | 582 | 6.06 | 75.0 | 92.5 | 138.0 28 | | [mobilenet_v2_1.3_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.3_224.tgz) | 509 | 5.34 | 74.4 | 92.1 | 123.0 29 | | [mobilenet_v2_1.0_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) | 300 | 3.47 | 71.8 | 91.0 | 73.8 30 | | [mobilenet_v2_1.0_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_192.tgz) | 221 | 3.47 | 70.7 | 90.1 | 55.1 31 | | [mobilenet_v2_1.0_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_160.tgz) | 154 | 3.47 | 68.8 | 89.0 | 40.2 32 | | [mobilenet_v2_1.0_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_128.tgz) | 99 | 3.47 | 65.3 | 86.9 | 27.6 33 | | [mobilenet_v2_1.0_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_96.tgz) | 56 | 3.47 | 60.3 | 83.2 | 17.6 34 | | [mobilenet_v2_0.75_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_224.tgz) | 209 | 2.61 | 69.8 | 89.6 | 55.8 35 | | [mobilenet_v2_0.75_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_192.tgz) | 153 | 2.61 | 68.7 | 88.9 | 41.6 36 | | [mobilenet_v2_0.75_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_160.tgz) | 107 | 2.61 | 66.4 | 87.3 | 30.4 37 | | [mobilenet_v2_0.75_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_128.tgz) | 69 | 2.61 | 63.2 | 85.3 | 21.9 38 | | [mobilenet_v2_0.75_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_96.tgz) | 39 | 2.61 | 58.8 | 81.6 | 14.2 39 | | [mobilenet_v2_0.5_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_224.tgz) | 97 | 1.95 | 65.4 | 86.4 | 28.7 40 | | [mobilenet_v2_0.5_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_192.tgz) | 71 | 1.95 | 63.9 | 85.4 | 21.1 41 | | [mobilenet_v2_0.5_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_160.tgz) | 50 | 1.95 | 61.0 | 83.2 | 14.9 42 | | [mobilenet_v2_0.5_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_128.tgz) | 32 | 1.95 | 57.7 | 80.8 | 9.9 43 | | [mobilenet_v2_0.5_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_96.tgz) | 18 | 1.95 | 51.2 | 75.8 | 6.4 44 | | [mobilenet_v2_0.35_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_224.tgz) | 59 | 1.66 | 60.3 | 82.9 | 19.7 45 | | [mobilenet_v2_0.35_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_192.tgz) | 43 | 1.66 | 58.2 | 81.2 | 14.6 46 | | [mobilenet_v2_0.35_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_160.tgz) | 30 | 1.66 | 55.7 | 79.1 | 10.5 47 | | [mobilenet_v2_0.35_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_128.tgz) | 20 | 1.66 | 50.8 | 75.0 | 6.9 48 | | [mobilenet_v2_0.35_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_96.tgz) | 11 | 1.66 | 45.5 | 70.4 | 4.5 49 | 50 | # Training 51 | The numbers above can be reproduced using slim's `train_image_classifier`. 52 | Below is the set of parameters that achieves 72.0% for full size MobileNetV2, after about 700K when trained on 8 GPU. 53 | If trained on a single GPU the full convergence is after 5.5M steps. Also note that learning rate and 54 | num_epochs_per_decay both need to be adjusted depending on how many GPUs are being 55 | used due to slim's internal averaging. 56 | 57 | ```bash 58 | --model_name="mobilenet_v2" 59 | --learning_rate=0.045 * NUM_GPUS #slim internally averages clones so we compensate 60 | --preprocessing_name="inception_v2" 61 | --label_smoothing=0.1 62 | --moving_average_decay=0.9999 63 | --batch_size= 96 64 | --num_clones = NUM_GPUS # you can use any number here between 1 and 8 depending on your hardware setup. 65 | --learning_rate_decay_factor=0.98 66 | --num_epochs_per_decay = 2.5 / NUM_GPUS # train_image_classifier does per clone epochs 67 | ``` 68 | 69 | # Example 70 | 71 | 72 | See this [ipython notebook](mobilenet_example.ipynb) or open and run the network directly in [Colaboratory](https://colab.research.google.com/github/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_example.ipynb). 73 | 74 | -------------------------------------------------------------------------------- /slim/nets/mobilenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/__init__.py -------------------------------------------------------------------------------- /slim/nets/mobilenet/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/__init__.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/__pycache__/conv_blocks.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/__pycache__/conv_blocks.cpython-36.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/__pycache__/mobilenet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/__pycache__/mobilenet.cpython-36.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/__pycache__/mobilenet_v2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/__pycache__/mobilenet_v2.cpython-36.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/conv_blocks.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/conv_blocks.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/madds_top1_accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/madds_top1_accuracy.png -------------------------------------------------------------------------------- /slim/nets/mobilenet/mnet_v1_vs_v2_pixel1_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/mnet_v1_vs_v2_pixel1_latency.png -------------------------------------------------------------------------------- /slim/nets/mobilenet/mobilenet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/mobilenet.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet/mobilenet_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet/mobilenet_v2.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet_v1.png -------------------------------------------------------------------------------- /slim/nets/mobilenet_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/mobilenet_v1.pyc -------------------------------------------------------------------------------- /slim/nets/mobilenet_v1_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Validate mobilenet_v1 with options for quantization.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import math 22 | import tensorflow as tf 23 | 24 | from datasets import dataset_factory 25 | from nets import mobilenet_v1 26 | from preprocessing import preprocessing_factory 27 | 28 | slim = tf.contrib.slim 29 | 30 | flags = tf.app.flags 31 | 32 | flags.DEFINE_string('master', '', 'Session master') 33 | flags.DEFINE_integer('batch_size', 250, 'Batch size') 34 | flags.DEFINE_integer('num_classes', 1001, 'Number of classes to distinguish') 35 | flags.DEFINE_integer('num_examples', 50000, 'Number of examples to evaluate') 36 | flags.DEFINE_integer('image_size', 224, 'Input image resolution') 37 | flags.DEFINE_float('depth_multiplier', 1.0, 'Depth multiplier for mobilenet') 38 | flags.DEFINE_bool('quantize', False, 'Quantize training') 39 | flags.DEFINE_string('checkpoint_dir', '', 'The directory for checkpoints') 40 | flags.DEFINE_string('eval_dir', '', 'Directory for writing eval event logs') 41 | flags.DEFINE_string('dataset_dir', '', 'Location of dataset') 42 | 43 | FLAGS = flags.FLAGS 44 | 45 | 46 | def imagenet_input(is_training): 47 | """Data reader for imagenet. 48 | 49 | Reads in imagenet data and performs pre-processing on the images. 50 | 51 | Args: 52 | is_training: bool specifying if train or validation dataset is needed. 53 | Returns: 54 | A batch of images and labels. 55 | """ 56 | if is_training: 57 | dataset = dataset_factory.get_dataset('imagenet', 'train', 58 | FLAGS.dataset_dir) 59 | else: 60 | dataset = dataset_factory.get_dataset('imagenet', 'validation', 61 | FLAGS.dataset_dir) 62 | 63 | provider = slim.dataset_data_provider.DatasetDataProvider( 64 | dataset, 65 | shuffle=is_training, 66 | common_queue_capacity=2 * FLAGS.batch_size, 67 | common_queue_min=FLAGS.batch_size) 68 | [image, label] = provider.get(['image', 'label']) 69 | 70 | image_preprocessing_fn = preprocessing_factory.get_preprocessing( 71 | 'mobilenet_v1', is_training=is_training) 72 | 73 | image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) 74 | 75 | images, labels = tf.train.batch( 76 | tensors=[image, label], 77 | batch_size=FLAGS.batch_size, 78 | num_threads=4, 79 | capacity=5 * FLAGS.batch_size) 80 | return images, labels 81 | 82 | 83 | def metrics(logits, labels): 84 | """Specify the metrics for eval. 85 | 86 | Args: 87 | logits: Logits output from the graph. 88 | labels: Ground truth labels for inputs. 89 | 90 | Returns: 91 | Eval Op for the graph. 92 | """ 93 | labels = tf.squeeze(labels) 94 | names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 95 | 'Accuracy': tf.metrics.accuracy(tf.argmax(logits, 1), labels), 96 | 'Recall_5': tf.metrics.recall_at_k(labels, logits, 5), 97 | }) 98 | for name, value in names_to_values.iteritems(): 99 | slim.summaries.add_scalar_summary( 100 | value, name, prefix='eval', print_summary=True) 101 | return names_to_updates.values() 102 | 103 | 104 | def build_model(): 105 | """Build the mobilenet_v1 model for evaluation. 106 | 107 | Returns: 108 | g: graph with rewrites after insertion of quantization ops and batch norm 109 | folding. 110 | eval_ops: eval ops for inference. 111 | variables_to_restore: List of variables to restore from checkpoint. 112 | """ 113 | g = tf.Graph() 114 | with g.as_default(): 115 | inputs, labels = imagenet_input(is_training=False) 116 | 117 | scope = mobilenet_v1.mobilenet_v1_arg_scope( 118 | is_training=False, weight_decay=0.0) 119 | with slim.arg_scope(scope): 120 | logits, _ = mobilenet_v1.mobilenet_v1( 121 | inputs, 122 | is_training=False, 123 | depth_multiplier=FLAGS.depth_multiplier, 124 | num_classes=FLAGS.num_classes) 125 | 126 | if FLAGS.quantize: 127 | tf.contrib.quantize.create_eval_graph() 128 | 129 | eval_ops = metrics(logits, labels) 130 | 131 | return g, eval_ops 132 | 133 | 134 | def eval_model(): 135 | """Evaluates mobilenet_v1.""" 136 | g, eval_ops = build_model() 137 | with g.as_default(): 138 | num_batches = math.ceil(FLAGS.num_examples / float(FLAGS.batch_size)) 139 | slim.evaluation.evaluate_once( 140 | FLAGS.master, 141 | FLAGS.checkpoint_dir, 142 | logdir=FLAGS.eval_dir, 143 | num_evals=num_batches, 144 | eval_op=eval_ops) 145 | 146 | 147 | def main(unused_arg): 148 | eval_model() 149 | 150 | 151 | if __name__ == '__main__': 152 | tf.app.run(main) 153 | -------------------------------------------------------------------------------- /slim/nets/nets_factory.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/nets_factory.pyc -------------------------------------------------------------------------------- /slim/nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from nets import nets_factory 26 | 27 | 28 | class NetworksTest(tf.test.TestCase): 29 | 30 | def testGetNetworkFnFirstHalf(self): 31 | batch_size = 5 32 | num_classes = 1000 33 | for net in list(nets_factory.networks_map.keys())[:10]: 34 | with tf.Graph().as_default() as g, self.test_session(g): 35 | net_fn = nets_factory.get_network_fn(net, num_classes=num_classes) 36 | # Most networks use 224 as their default_image_size 37 | image_size = getattr(net_fn, 'default_image_size', 224) 38 | if net not in ['i3d', 's3dg']: 39 | inputs = tf.random_uniform( 40 | (batch_size, image_size, image_size, 3)) 41 | logits, end_points = net_fn(inputs) 42 | self.assertTrue(isinstance(logits, tf.Tensor)) 43 | self.assertTrue(isinstance(end_points, dict)) 44 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 45 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 46 | 47 | def testGetNetworkFnSecondHalf(self): 48 | batch_size = 5 49 | num_classes = 1000 50 | for net in list(nets_factory.networks_map.keys())[10:]: 51 | with tf.Graph().as_default() as g, self.test_session(g): 52 | net_fn = nets_factory.get_network_fn(net, num_classes=num_classes) 53 | # Most networks use 224 as their default_image_size 54 | image_size = getattr(net_fn, 'default_image_size', 224) 55 | if net not in ['i3d', 's3dg']: 56 | inputs = tf.random_uniform( 57 | (batch_size, image_size, image_size, 3)) 58 | logits, end_points = net_fn(inputs) 59 | self.assertTrue(isinstance(logits, tf.Tensor)) 60 | self.assertTrue(isinstance(end_points, dict)) 61 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 62 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 63 | 64 | def testGetNetworkFnVideoModels(self): 65 | batch_size = 5 66 | num_classes = 400 67 | for net in ['i3d', 's3dg']: 68 | with tf.Graph().as_default() as g, self.test_session(g): 69 | net_fn = nets_factory.get_network_fn(net, num_classes=num_classes) 70 | # Most networks use 224 as their default_image_size 71 | image_size = getattr(net_fn, 'default_image_size', 224) // 2 72 | inputs = tf.random_uniform( 73 | (batch_size, 10, image_size, image_size, 3)) 74 | logits, end_points = net_fn(inputs) 75 | self.assertTrue(isinstance(logits, tf.Tensor)) 76 | self.assertTrue(isinstance(end_points, dict)) 77 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 78 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 79 | 80 | if __name__ == '__main__': 81 | tf.test.main() 82 | -------------------------------------------------------------------------------- /slim/nets/overfeat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the model definition for the OverFeat network. 16 | 17 | The definition for the network was obtained from: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | 24 | Usage: 25 | with slim.arg_scope(overfeat.overfeat_arg_scope()): 26 | outputs, end_points = overfeat.overfeat(inputs) 27 | 28 | @@overfeat 29 | """ 30 | from __future__ import absolute_import 31 | from __future__ import division 32 | from __future__ import print_function 33 | 34 | import tensorflow as tf 35 | 36 | slim = tf.contrib.slim 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 38 | 39 | 40 | def overfeat_arg_scope(weight_decay=0.0005): 41 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 42 | activation_fn=tf.nn.relu, 43 | weights_regularizer=slim.l2_regularizer(weight_decay), 44 | biases_initializer=tf.zeros_initializer()): 45 | with slim.arg_scope([slim.conv2d], padding='SAME'): 46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 47 | return arg_sc 48 | 49 | 50 | def overfeat(inputs, 51 | num_classes=1000, 52 | is_training=True, 53 | dropout_keep_prob=0.5, 54 | spatial_squeeze=True, 55 | scope='overfeat', 56 | global_pool=False): 57 | """Contains the model definition for the OverFeat network. 58 | 59 | The definition for the network was obtained from: 60 | OverFeat: Integrated Recognition, Localization and Detection using 61 | Convolutional Networks 62 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 63 | Yann LeCun, 2014 64 | http://arxiv.org/abs/1312.6229 65 | 66 | Note: All the fully_connected layers have been transformed to conv2d layers. 67 | To use in classification mode, resize input to 231x231. To use in fully 68 | convolutional mode, set spatial_squeeze to false. 69 | 70 | Args: 71 | inputs: a tensor of size [batch_size, height, width, channels]. 72 | num_classes: number of predicted classes. If 0 or None, the logits layer is 73 | omitted and the input features to the logits layer are returned instead. 74 | is_training: whether or not the model is being trained. 75 | dropout_keep_prob: the probability that activations are kept in the dropout 76 | layers during training. 77 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 78 | outputs. Useful to remove unnecessary dimensions for classification. 79 | scope: Optional scope for the variables. 80 | global_pool: Optional boolean flag. If True, the input to the classification 81 | layer is avgpooled to size 1x1, for any input size. (This is not part 82 | of the original OverFeat.) 83 | 84 | Returns: 85 | net: the output of the logits layer (if num_classes is a non-zero integer), 86 | or the non-dropped-out input to the logits layer (if num_classes is 0 or 87 | None). 88 | end_points: a dict of tensors with intermediate activations. 89 | """ 90 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: 91 | end_points_collection = sc.original_name_scope + '_end_points' 92 | # Collect outputs for conv2d, fully_connected and max_pool2d 93 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 94 | outputs_collections=end_points_collection): 95 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 96 | scope='conv1') 97 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 98 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') 99 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 100 | net = slim.conv2d(net, 512, [3, 3], scope='conv3') 101 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4') 102 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5') 103 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 104 | 105 | # Use conv2d instead of fully_connected layers. 106 | with slim.arg_scope([slim.conv2d], 107 | weights_initializer=trunc_normal(0.005), 108 | biases_initializer=tf.constant_initializer(0.1)): 109 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') 110 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 111 | scope='dropout6') 112 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 113 | # Convert end_points_collection into a end_point dict. 114 | end_points = slim.utils.convert_collection_to_dict( 115 | end_points_collection) 116 | if global_pool: 117 | net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') 118 | end_points['global_pool'] = net 119 | if num_classes: 120 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 121 | scope='dropout7') 122 | net = slim.conv2d(net, num_classes, [1, 1], 123 | activation_fn=None, 124 | normalizer_fn=None, 125 | biases_initializer=tf.zeros_initializer(), 126 | scope='fc8') 127 | if spatial_squeeze: 128 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 129 | end_points[sc.name + '/fc8'] = net 130 | return net, end_points 131 | overfeat.default_image_size = 231 132 | -------------------------------------------------------------------------------- /slim/nets/overfeat.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/overfeat.pyc -------------------------------------------------------------------------------- /slim/nets/overfeat_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.overfeat.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import overfeat 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class OverFeatTest(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 231, 231 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = overfeat.overfeat(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 281, 281 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 2, 2, num_classes]) 50 | 51 | def testGlobalPool(self): 52 | batch_size = 1 53 | height, width = 281, 281 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False, 58 | global_pool=True) 59 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 60 | self.assertListEqual(logits.get_shape().as_list(), 61 | [batch_size, 1, 1, num_classes]) 62 | 63 | def testEndPoints(self): 64 | batch_size = 5 65 | height, width = 231, 231 66 | num_classes = 1000 67 | with self.test_session(): 68 | inputs = tf.random_uniform((batch_size, height, width, 3)) 69 | _, end_points = overfeat.overfeat(inputs, num_classes) 70 | expected_names = ['overfeat/conv1', 71 | 'overfeat/pool1', 72 | 'overfeat/conv2', 73 | 'overfeat/pool2', 74 | 'overfeat/conv3', 75 | 'overfeat/conv4', 76 | 'overfeat/conv5', 77 | 'overfeat/pool5', 78 | 'overfeat/fc6', 79 | 'overfeat/fc7', 80 | 'overfeat/fc8' 81 | ] 82 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 83 | 84 | def testNoClasses(self): 85 | batch_size = 5 86 | height, width = 231, 231 87 | num_classes = None 88 | with self.test_session(): 89 | inputs = tf.random_uniform((batch_size, height, width, 3)) 90 | net, end_points = overfeat.overfeat(inputs, num_classes) 91 | expected_names = ['overfeat/conv1', 92 | 'overfeat/pool1', 93 | 'overfeat/conv2', 94 | 'overfeat/pool2', 95 | 'overfeat/conv3', 96 | 'overfeat/conv4', 97 | 'overfeat/conv5', 98 | 'overfeat/pool5', 99 | 'overfeat/fc6', 100 | 'overfeat/fc7' 101 | ] 102 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 103 | self.assertTrue(net.op.name.startswith('overfeat/fc7')) 104 | 105 | def testModelVariables(self): 106 | batch_size = 5 107 | height, width = 231, 231 108 | num_classes = 1000 109 | with self.test_session(): 110 | inputs = tf.random_uniform((batch_size, height, width, 3)) 111 | overfeat.overfeat(inputs, num_classes) 112 | expected_names = ['overfeat/conv1/weights', 113 | 'overfeat/conv1/biases', 114 | 'overfeat/conv2/weights', 115 | 'overfeat/conv2/biases', 116 | 'overfeat/conv3/weights', 117 | 'overfeat/conv3/biases', 118 | 'overfeat/conv4/weights', 119 | 'overfeat/conv4/biases', 120 | 'overfeat/conv5/weights', 121 | 'overfeat/conv5/biases', 122 | 'overfeat/fc6/weights', 123 | 'overfeat/fc6/biases', 124 | 'overfeat/fc7/weights', 125 | 'overfeat/fc7/biases', 126 | 'overfeat/fc8/weights', 127 | 'overfeat/fc8/biases', 128 | ] 129 | model_variables = [v.op.name for v in slim.get_model_variables()] 130 | self.assertSetEqual(set(model_variables), set(expected_names)) 131 | 132 | def testEvaluation(self): 133 | batch_size = 2 134 | height, width = 231, 231 135 | num_classes = 1000 136 | with self.test_session(): 137 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 138 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False) 139 | self.assertListEqual(logits.get_shape().as_list(), 140 | [batch_size, num_classes]) 141 | predictions = tf.argmax(logits, 1) 142 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 143 | 144 | def testTrainEvalWithReuse(self): 145 | train_batch_size = 2 146 | eval_batch_size = 1 147 | train_height, train_width = 231, 231 148 | eval_height, eval_width = 281, 281 149 | num_classes = 1000 150 | with self.test_session(): 151 | train_inputs = tf.random_uniform( 152 | (train_batch_size, train_height, train_width, 3)) 153 | logits, _ = overfeat.overfeat(train_inputs) 154 | self.assertListEqual(logits.get_shape().as_list(), 155 | [train_batch_size, num_classes]) 156 | tf.get_variable_scope().reuse_variables() 157 | eval_inputs = tf.random_uniform( 158 | (eval_batch_size, eval_height, eval_width, 3)) 159 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False, 160 | spatial_squeeze=False) 161 | self.assertListEqual(logits.get_shape().as_list(), 162 | [eval_batch_size, 2, 2, num_classes]) 163 | logits = tf.reduce_mean(logits, [1, 2]) 164 | predictions = tf.argmax(logits, 1) 165 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 166 | 167 | def testForward(self): 168 | batch_size = 1 169 | height, width = 231, 231 170 | with self.test_session() as sess: 171 | inputs = tf.random_uniform((batch_size, height, width, 3)) 172 | logits, _ = overfeat.overfeat(inputs) 173 | sess.run(tf.global_variables_initializer()) 174 | output = sess.run(logits) 175 | self.assertTrue(output.any()) 176 | 177 | if __name__ == '__main__': 178 | tf.test.main() 179 | -------------------------------------------------------------------------------- /slim/nets/pix2pix_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================= 15 | """Tests for pix2pix.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | from nets import pix2pix 23 | 24 | 25 | class GeneratorTest(tf.test.TestCase): 26 | 27 | def _reduced_default_blocks(self): 28 | """Returns the default blocks, scaled down to make test run faster.""" 29 | return [pix2pix.Block(b.num_filters // 32, b.decoder_keep_prob) 30 | for b in pix2pix._default_generator_blocks()] 31 | 32 | def test_output_size_nn_upsample_conv(self): 33 | batch_size = 2 34 | height, width = 256, 256 35 | num_outputs = 4 36 | 37 | images = tf.ones((batch_size, height, width, 3)) 38 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 39 | logits, _ = pix2pix.pix2pix_generator( 40 | images, num_outputs, blocks=self._reduced_default_blocks(), 41 | upsample_method='nn_upsample_conv') 42 | 43 | with self.test_session() as session: 44 | session.run(tf.global_variables_initializer()) 45 | np_outputs = session.run(logits) 46 | self.assertListEqual([batch_size, height, width, num_outputs], 47 | list(np_outputs.shape)) 48 | 49 | def test_output_size_conv2d_transpose(self): 50 | batch_size = 2 51 | height, width = 256, 256 52 | num_outputs = 4 53 | 54 | images = tf.ones((batch_size, height, width, 3)) 55 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 56 | logits, _ = pix2pix.pix2pix_generator( 57 | images, num_outputs, blocks=self._reduced_default_blocks(), 58 | upsample_method='conv2d_transpose') 59 | 60 | with self.test_session() as session: 61 | session.run(tf.global_variables_initializer()) 62 | np_outputs = session.run(logits) 63 | self.assertListEqual([batch_size, height, width, num_outputs], 64 | list(np_outputs.shape)) 65 | 66 | def test_block_number_dictates_number_of_layers(self): 67 | batch_size = 2 68 | height, width = 256, 256 69 | num_outputs = 4 70 | 71 | images = tf.ones((batch_size, height, width, 3)) 72 | blocks = [ 73 | pix2pix.Block(64, 0.5), 74 | pix2pix.Block(128, 0), 75 | ] 76 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 77 | _, end_points = pix2pix.pix2pix_generator( 78 | images, num_outputs, blocks) 79 | 80 | num_encoder_layers = 0 81 | num_decoder_layers = 0 82 | for end_point in end_points: 83 | if end_point.startswith('encoder'): 84 | num_encoder_layers += 1 85 | elif end_point.startswith('decoder'): 86 | num_decoder_layers += 1 87 | 88 | self.assertEqual(num_encoder_layers, len(blocks)) 89 | self.assertEqual(num_decoder_layers, len(blocks)) 90 | 91 | 92 | class DiscriminatorTest(tf.test.TestCase): 93 | 94 | def _layer_output_size(self, input_size, kernel_size=4, stride=2, pad=2): 95 | return (input_size + pad * 2 - kernel_size) // stride + 1 96 | 97 | def test_four_layers(self): 98 | batch_size = 2 99 | input_size = 256 100 | 101 | output_size = self._layer_output_size(input_size) 102 | output_size = self._layer_output_size(output_size) 103 | output_size = self._layer_output_size(output_size) 104 | output_size = self._layer_output_size(output_size, stride=1) 105 | output_size = self._layer_output_size(output_size, stride=1) 106 | 107 | images = tf.ones((batch_size, input_size, input_size, 3)) 108 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 109 | logits, end_points = pix2pix.pix2pix_discriminator( 110 | images, num_filters=[64, 128, 256, 512]) 111 | self.assertListEqual([batch_size, output_size, output_size, 1], 112 | logits.shape.as_list()) 113 | self.assertListEqual([batch_size, output_size, output_size, 1], 114 | end_points['predictions'].shape.as_list()) 115 | 116 | def test_four_layers_no_padding(self): 117 | batch_size = 2 118 | input_size = 256 119 | 120 | output_size = self._layer_output_size(input_size, pad=0) 121 | output_size = self._layer_output_size(output_size, pad=0) 122 | output_size = self._layer_output_size(output_size, pad=0) 123 | output_size = self._layer_output_size(output_size, stride=1, pad=0) 124 | output_size = self._layer_output_size(output_size, stride=1, pad=0) 125 | 126 | images = tf.ones((batch_size, input_size, input_size, 3)) 127 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 128 | logits, end_points = pix2pix.pix2pix_discriminator( 129 | images, num_filters=[64, 128, 256, 512], padding=0) 130 | self.assertListEqual([batch_size, output_size, output_size, 1], 131 | logits.shape.as_list()) 132 | self.assertListEqual([batch_size, output_size, output_size, 1], 133 | end_points['predictions'].shape.as_list()) 134 | 135 | def test_four_layers_wrog_paddig(self): 136 | batch_size = 2 137 | input_size = 256 138 | 139 | images = tf.ones((batch_size, input_size, input_size, 3)) 140 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 141 | with self.assertRaises(TypeError): 142 | pix2pix.pix2pix_discriminator( 143 | images, num_filters=[64, 128, 256, 512], padding=1.5) 144 | 145 | def test_four_layers_negative_padding(self): 146 | batch_size = 2 147 | input_size = 256 148 | 149 | images = tf.ones((batch_size, input_size, input_size, 3)) 150 | with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()): 151 | with self.assertRaises(ValueError): 152 | pix2pix.pix2pix_discriminator( 153 | images, num_filters=[64, 128, 256, 512], padding=-1) 154 | 155 | if __name__ == '__main__': 156 | tf.test.main() 157 | -------------------------------------------------------------------------------- /slim/nets/resnet_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/resnet_utils.pyc -------------------------------------------------------------------------------- /slim/nets/resnet_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/resnet_v1.pyc -------------------------------------------------------------------------------- /slim/nets/resnet_v2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/resnet_v2.pyc -------------------------------------------------------------------------------- /slim/nets/s3dg.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/s3dg.pyc -------------------------------------------------------------------------------- /slim/nets/s3dg_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for networks.s3dg.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from nets import s3dg 24 | 25 | 26 | class S3DGTest(tf.test.TestCase): 27 | 28 | def testBuildClassificationNetwork(self): 29 | batch_size = 5 30 | num_frames = 64 31 | height, width = 224, 224 32 | num_classes = 1000 33 | 34 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 35 | logits, end_points = s3dg.s3dg(inputs, num_classes) 36 | self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | self.assertTrue('Predictions' in end_points) 40 | self.assertListEqual(end_points['Predictions'].get_shape().as_list(), 41 | [batch_size, num_classes]) 42 | 43 | def testBuildBaseNetwork(self): 44 | batch_size = 5 45 | num_frames = 64 46 | height, width = 224, 224 47 | 48 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 49 | mixed_6c, end_points = s3dg.s3dg_base(inputs) 50 | self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c')) 51 | self.assertListEqual(mixed_6c.get_shape().as_list(), 52 | [batch_size, 8, 7, 7, 1024]) 53 | expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 54 | 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 55 | 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 56 | 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 57 | 'Mixed_5b', 'Mixed_5c'] 58 | self.assertItemsEqual(end_points.keys(), expected_endpoints) 59 | 60 | def testBuildOnlyUptoFinalEndpointNoGating(self): 61 | batch_size = 5 62 | num_frames = 64 63 | height, width = 224, 224 64 | endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 65 | 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 66 | 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 67 | 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 68 | 'Mixed_5c'] 69 | for index, endpoint in enumerate(endpoints): 70 | with tf.Graph().as_default(): 71 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 72 | out_tensor, end_points = s3dg.s3dg_base( 73 | inputs, final_endpoint=endpoint, gating_startat=None) 74 | print(endpoint, out_tensor.op.name) 75 | self.assertTrue(out_tensor.op.name.startswith( 76 | 'InceptionV1/' + endpoint)) 77 | self.assertItemsEqual(endpoints[:index+1], end_points) 78 | 79 | def testBuildAndCheckAllEndPointsUptoMixed5c(self): 80 | batch_size = 5 81 | num_frames = 64 82 | height, width = 224, 224 83 | 84 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 85 | _, end_points = s3dg.s3dg_base(inputs, 86 | final_endpoint='Mixed_5c') 87 | endpoints_shapes = {'Conv2d_1a_7x7': [5, 32, 112, 112, 64], 88 | 'MaxPool_2a_3x3': [5, 32, 56, 56, 64], 89 | 'Conv2d_2b_1x1': [5, 32, 56, 56, 64], 90 | 'Conv2d_2c_3x3': [5, 32, 56, 56, 192], 91 | 'MaxPool_3a_3x3': [5, 32, 28, 28, 192], 92 | 'Mixed_3b': [5, 32, 28, 28, 256], 93 | 'Mixed_3c': [5, 32, 28, 28, 480], 94 | 'MaxPool_4a_3x3': [5, 16, 14, 14, 480], 95 | 'Mixed_4b': [5, 16, 14, 14, 512], 96 | 'Mixed_4c': [5, 16, 14, 14, 512], 97 | 'Mixed_4d': [5, 16, 14, 14, 512], 98 | 'Mixed_4e': [5, 16, 14, 14, 528], 99 | 'Mixed_4f': [5, 16, 14, 14, 832], 100 | 'MaxPool_5a_2x2': [5, 8, 7, 7, 832], 101 | 'Mixed_5b': [5, 8, 7, 7, 832], 102 | 'Mixed_5c': [5, 8, 7, 7, 1024]} 103 | 104 | self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) 105 | for endpoint_name, expected_shape in endpoints_shapes.iteritems(): 106 | self.assertTrue(endpoint_name in end_points) 107 | self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), 108 | expected_shape) 109 | 110 | def testHalfSizeImages(self): 111 | batch_size = 5 112 | num_frames = 64 113 | height, width = 112, 112 114 | 115 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 116 | mixed_5c, _ = s3dg.s3dg_base(inputs) 117 | self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) 118 | self.assertListEqual(mixed_5c.get_shape().as_list(), 119 | [batch_size, 8, 4, 4, 1024]) 120 | 121 | def testTenFrames(self): 122 | batch_size = 5 123 | num_frames = 10 124 | height, width = 224, 224 125 | 126 | inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 127 | mixed_5c, _ = s3dg.s3dg_base(inputs) 128 | self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) 129 | self.assertListEqual(mixed_5c.get_shape().as_list(), 130 | [batch_size, 2, 7, 7, 1024]) 131 | 132 | def testEvaluation(self): 133 | batch_size = 2 134 | num_frames = 64 135 | height, width = 224, 224 136 | num_classes = 1000 137 | 138 | eval_inputs = tf.random_uniform((batch_size, num_frames, height, width, 3)) 139 | logits, _ = s3dg.s3dg(eval_inputs, num_classes, 140 | is_training=False) 141 | predictions = tf.argmax(logits, 1) 142 | 143 | with self.test_session() as sess: 144 | sess.run(tf.global_variables_initializer()) 145 | output = sess.run(predictions) 146 | self.assertEquals(output.shape, (batch_size,)) 147 | 148 | 149 | if __name__ == '__main__': 150 | tf.test.main() 151 | -------------------------------------------------------------------------------- /slim/nets/vgg.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ml-postech/adaptive-superpixel-for-active-learning-in-semantic-segmentation/4a489b68a5e5870f47527261f001f814522057ae/slim/nets/vgg.pyc --------------------------------------------------------------------------------