├── .gitignore ├── LICENSE ├── LICENSE-original ├── README.md ├── create_pascal_tf_record.py ├── deeplab_model.py ├── evaluate.py ├── export_inference_graph.py ├── images ├── tensorboard_images.png └── tensorboard_miou.png ├── inference.py ├── picture_extraction.py ├── preprocess ├── annotimg │ ├── 2537534_1.jpg │ ├── 2537534_10.jpg │ ├── 2537534_11.jpg │ ├── 2537534_12.jpg │ ├── 2537534_13.jpg │ ├── 2537534_14.jpg │ ├── 2537534_15.jpg │ ├── 2537534_16.jpg │ ├── 2537534_17.jpg │ ├── 2537534_18.jpg │ ├── 2537534_19.jpg │ ├── 2537534_2.jpg │ ├── 2537534_20.jpg │ ├── 2537534_21.jpg │ ├── 2537534_22.jpg │ ├── 2537534_23.jpg │ ├── 2537534_24.jpg │ ├── 2537534_25.jpg │ ├── 2537534_26.jpg │ ├── 2537534_27.jpg │ ├── 2537534_3.jpg │ ├── 2537534_4.jpg │ ├── 2537534_5.jpg │ ├── 2537534_6.jpg │ ├── 2537534_7.jpg │ ├── 2537534_8.jpg │ └── 2537534_9.jpg ├── annotxml │ ├── 2537534_1.xml │ ├── 2537534_10.xml │ ├── 2537534_11.xml │ ├── 2537534_12.xml │ ├── 2537534_13.xml │ ├── 2537534_14.xml │ ├── 2537534_15.xml │ ├── 2537534_16.xml │ ├── 2537534_17.xml │ ├── 2537534_18.xml │ ├── 2537534_19.xml │ ├── 2537534_2.xml │ ├── 2537534_20.xml │ ├── 2537534_21.xml │ ├── 2537534_22.xml │ ├── 2537534_23.xml │ ├── 2537534_24.xml │ ├── 2537534_25.xml │ ├── 2537534_26.xml │ ├── 2537534_27.xml │ ├── 2537534_3.xml │ ├── 2537534_4.xml │ ├── 2537534_5.xml │ ├── 2537534_6.xml │ ├── 2537534_7.xml │ ├── 2537534_8.xml │ └── 2537534_9.xml ├── img │ ├── 2537534_1.jpg │ ├── 2537534_10.jpg │ ├── 2537534_11.jpg │ ├── 2537534_12.jpg │ ├── 2537534_13.jpg │ ├── 2537534_14.jpg │ ├── 2537534_15.jpg │ ├── 2537534_16.jpg │ ├── 2537534_17.jpg │ ├── 2537534_18.jpg │ ├── 2537534_19.jpg │ ├── 2537534_2.jpg │ ├── 2537534_20.jpg │ ├── 2537534_21.jpg │ ├── 2537534_22.jpg │ ├── 2537534_23.jpg │ ├── 2537534_24.jpg │ ├── 2537534_25.jpg │ ├── 2537534_26.jpg │ ├── 2537534_27.jpg │ ├── 2537534_3.jpg │ ├── 2537534_4.jpg │ ├── 2537534_5.jpg │ ├── 2537534_6.jpg │ ├── 2537534_7.jpg │ ├── 2537534_8.jpg │ └── 2537534_9.jpg ├── makeannotimage.py ├── train.txt └── val.txt ├── train_3class_50.py └── utils ├── __init__.py ├── dataset_util.py └── preprocessing.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # PyCharm 104 | .idea 105 | 106 | # dataset 107 | dataset/VOCdevkit 108 | ini_checkpoints 109 | model 110 | *.record 111 | models 112 | dataset/inference_output/ 113 | .DS_Store 114 | dataset/export_output 115 | -------------------------------------------------------------------------------- /LICENSE-original: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Riei Ishizeki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 図表抽出(Image extraction) 2 | 3 | 4 | このプログラムは以下のリポジトリ(MITライセンス)を改変して作成しています。 5 | 6 | [rishizek's repo](https://github.com/rishizek/tensorflow-deeplab-v3-plus). 7 | 8 | ## Setup 9 | TensorFlow (r1.6)以降と Python 3をお使いください。 10 | 11 | 学習を試す場合は 12 | tensorflowの[slim](https://github.com/tensorflow/models/tree/master/research/slim)から[resnet_v2_50_2017_04_14.tar.gz](http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz)をダウンロードし、 13 | ini_checkpoints/resnet_v2_50 14 | に配置してください。 15 | 16 | 推論のみ試す場合は、model50ディレクトリ下に 17 | [学習済重みファイル](http://lab.ndl.go.jp/dataset/trainedweights.zip)を配置してください。 18 | 19 | 20 | ## Inference 21 | ```bash 22 | python3 picture_extraction.py --input_dir INPUT_DIR --output_dir OUTPUT_DIR 23 | ``` 24 | 25 | 26 | ## Training 27 | pascal VOCのxmlのフォーマットで、図表部分の矩形領域に"4_illustration",資料全体の領域に"1_overall"のアノテーションを付与してください。 28 | 作成したxmlをpreprocess/annotxmlに、画像をpreprocess/imgに入れ、 29 | preprocess/makeannotimage.py 30 | を実行すると、セグメンテーション画像ファイルがpreprocess/annotimgに生成されます。 31 | 32 | annotimg内にセグメンテーション画像が生成されたら、 33 | 34 | ```bash 35 | python3 create_pascal_tf_record.py 36 | python3 train_3class_101.py 37 | ``` 38 | を実行すると学習が始まります。 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /create_pascal_tf_record.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Converts PASCAL dataset to TFRecords file format.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import argparse 9 | import io 10 | import os 11 | import sys 12 | 13 | import PIL.Image 14 | import tensorflow as tf 15 | 16 | from utils import dataset_util 17 | 18 | parser = argparse.ArgumentParser() 19 | 20 | parser.add_argument('--data_dir', type=str, default='./preprocess', 21 | help='Path to the directory containing the PASCAL VOC data.') 22 | 23 | parser.add_argument('--output_path', type=str, default='./preprocess', 24 | help='Path to the directory to create TFRecords outputs.') 25 | 26 | parser.add_argument('--train_data_list', type=str, default='./preprocess/train.txt', 27 | help='Path to the file listing the training data.') 28 | 29 | parser.add_argument('--valid_data_list', type=str, default='./preprocess/val.txt', 30 | help='Path to the file listing the validation data.') 31 | 32 | parser.add_argument('--image_data_dir', type=str, default='img', 33 | help='The directory containing the image data.') 34 | 35 | parser.add_argument('--label_data_dir', type=str, default='annotimg', 36 | help='The directory containing the augmented label data.') 37 | 38 | 39 | def dict_to_tf_example(image_path, 40 | label_path): 41 | """Convert image and label to tf.Example proto. 42 | 43 | Args: 44 | image_path: Path to a single PASCAL image. 45 | label_path: Path to its corresponding label. 46 | 47 | Returns: 48 | example: The converted tf.Example. 49 | 50 | Raises: 51 | ValueError: if the image pointed to by image_path is not a valid JPEG or 52 | if the label pointed to by label_path is not a valid PNG or 53 | if the size of image does not match with that of label. 54 | """ 55 | with tf.gfile.GFile(image_path, 'rb') as fid: 56 | encoded_jpg = fid.read() 57 | encoded_jpg_io = io.BytesIO(encoded_jpg) 58 | image = PIL.Image.open(encoded_jpg_io) 59 | if image.format != 'JPEG': 60 | raise ValueError('Image format not JPEG') 61 | 62 | with tf.gfile.GFile(label_path, 'rb') as fid: 63 | encoded_label = fid.read() 64 | encoded_label_io = io.BytesIO(encoded_label) 65 | label = PIL.Image.open(encoded_label_io) 66 | if label.format != 'PNG': 67 | raise ValueError('Label format not PNG') 68 | 69 | if image.size != label.size: 70 | raise ValueError('The size of image does not match with that of label.') 71 | 72 | width, height = image.size 73 | 74 | example = tf.train.Example(features=tf.train.Features(feature={ 75 | 'image/height': dataset_util.int64_feature(height), 76 | 'image/width': dataset_util.int64_feature(width), 77 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 78 | 'image/format': dataset_util.bytes_feature('jpg'.encode('utf8')), 79 | 'label/encoded': dataset_util.bytes_feature(encoded_label), 80 | 'label/format': dataset_util.bytes_feature('png'.encode('utf8')), 81 | })) 82 | return example 83 | 84 | 85 | def create_tf_record(output_filename, 86 | image_dir, 87 | label_dir, 88 | examples): 89 | """Creates a TFRecord file from examples. 90 | 91 | Args: 92 | output_filename: Path to where output file is saved. 93 | image_dir: Directory where image files are stored. 94 | label_dir: Directory where label files are stored. 95 | examples: Examples to parse and save to tf record. 96 | """ 97 | writer = tf.python_io.TFRecordWriter(output_filename) 98 | for idx, example in enumerate(examples): 99 | if idx % 100 == 0: 100 | tf.logging.info('On image %d of %d', idx, len(examples)) 101 | image_path = os.path.join(image_dir, example) 102 | label_path = os.path.join(label_dir, example[:-4]+".png") 103 | 104 | if not os.path.exists(image_path): 105 | tf.logging.warning('Could not find %s, ignoring example.', image_path) 106 | continue 107 | elif not os.path.exists(label_path): 108 | tf.logging.warning('Could not find %s, ignoring example.', label_path) 109 | continue 110 | 111 | try: 112 | tf_example = dict_to_tf_example(image_path, label_path) 113 | writer.write(tf_example.SerializeToString()) 114 | except ValueError: 115 | tf.logging.warning('Invalid example: %s, ignoring.', example) 116 | 117 | writer.close() 118 | 119 | 120 | def main(unused_argv): 121 | if not os.path.exists(FLAGS.output_path): 122 | os.makedirs(FLAGS.output_path) 123 | 124 | tf.logging.info("Reading from VOC dataset") 125 | image_dir = os.path.join(FLAGS.data_dir, FLAGS.image_data_dir) 126 | label_dir = os.path.join(FLAGS.data_dir, FLAGS.label_data_dir) 127 | 128 | if not os.path.isdir(label_dir): 129 | raise ValueError("Missing Augmentation label directory. " 130 | "You may download the augmented labels from the link (Thanks to DrSleep): " 131 | "https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip") 132 | train_examples = dataset_util.read_examples_list(FLAGS.train_data_list) 133 | val_examples = dataset_util.read_examples_list(FLAGS.valid_data_list) 134 | 135 | train_output_path = os.path.join(FLAGS.output_path, 'voc_train.record') 136 | val_output_path = os.path.join(FLAGS.output_path, 'voc_val.record') 137 | 138 | create_tf_record(train_output_path, image_dir, label_dir, train_examples) 139 | create_tf_record(val_output_path, image_dir, label_dir, val_examples) 140 | 141 | 142 | if __name__ == '__main__': 143 | tf.logging.set_verbosity(tf.logging.INFO) 144 | FLAGS, unparsed = parser.parse_known_args() 145 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 146 | -------------------------------------------------------------------------------- /deeplab_model.py: -------------------------------------------------------------------------------- 1 | """DeepLab v3 models based on slim library.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | 9 | from tensorflow.contrib.slim.nets import resnet_v2 10 | from tensorflow.contrib import layers as layers_lib 11 | from tensorflow.contrib.framework.python.ops import arg_scope 12 | from tensorflow.contrib.layers.python.layers import layers 13 | 14 | from utils import preprocessing 15 | 16 | _BATCH_NORM_DECAY = 0.9997 17 | _WEIGHT_DECAY = 5e-4 18 | 19 | 20 | def atrous_spatial_pyramid_pooling(inputs, output_stride, batch_norm_decay, is_training, depth=256): 21 | """Atrous Spatial Pyramid Pooling. 22 | 23 | Args: 24 | inputs: A tensor of size [batch, height, width, channels]. 25 | output_stride: The ResNet unit's stride. Determines the rates for atrous convolution. 26 | the rates are (6, 12, 18) when the stride is 16, and doubled when 8. 27 | batch_norm_decay: The moving average decay when estimating layer activation 28 | statistics in batch normalization. 29 | is_training: A boolean denoting whether the input is for training. 30 | depth: The depth of the ResNet unit output. 31 | 32 | Returns: 33 | The atrous spatial pyramid pooling output. 34 | """ 35 | with tf.variable_scope("aspp"): 36 | if output_stride not in [8, 16]: 37 | raise ValueError('output_stride must be either 8 or 16.') 38 | 39 | atrous_rates = [6, 12, 18] 40 | if output_stride == 8: 41 | atrous_rates = [2*rate for rate in atrous_rates] 42 | 43 | with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): 44 | with arg_scope([layers.batch_norm], is_training=is_training): 45 | inputs_size = tf.shape(inputs)[1:3] 46 | # (a) one 1×1 convolution and three 3×3 convolutions with rates = (6, 12, 18) when output stride = 16. 47 | # the rates are doubled when output stride = 8. 48 | conv_1x1 = layers_lib.conv2d(inputs, depth, [1, 1], stride=1, scope="conv_1x1") 49 | conv_3x3_1 = layers_lib.conv2d(inputs, depth, [3, 3], stride=1, rate=atrous_rates[0], scope='conv_3x3_1') 50 | conv_3x3_2 = layers_lib.conv2d(inputs, depth, [3, 3], stride=1, rate=atrous_rates[1], scope='conv_3x3_2') 51 | conv_3x3_3 = layers_lib.conv2d(inputs, depth, [3, 3], stride=1, rate=atrous_rates[2], scope='conv_3x3_3') 52 | 53 | # (b) the image-level features 54 | with tf.variable_scope("image_level_features"): 55 | # global average pooling 56 | image_level_features = tf.reduce_mean(inputs, [1, 2], name='global_average_pooling', keep_dims=True) 57 | # 1×1 convolution with 256 filters( and batch normalization) 58 | image_level_features = layers_lib.conv2d(image_level_features, depth, [1, 1], stride=1, scope='conv_1x1') 59 | # bilinearly upsample features 60 | image_level_features = tf.image.resize_bilinear(image_level_features, inputs_size, name='upsample') 61 | 62 | net = tf.concat([conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3, image_level_features], axis=3, name='concat') 63 | net = layers_lib.conv2d(net, depth, [1, 1], stride=1, scope='conv_1x1_concat') 64 | 65 | return net 66 | 67 | 68 | def deeplab_v3_plus_generator(num_classes, 69 | output_stride, 70 | base_architecture, 71 | pre_trained_model, 72 | batch_norm_decay, 73 | data_format='channels_last'): 74 | """Generator for DeepLab v3 plus models. 75 | 76 | Args: 77 | num_classes: The number of possible classes for image classification. 78 | output_stride: The ResNet unit's stride. Determines the rates for atrous convolution. 79 | the rates are (6, 12, 18) when the stride is 16, and doubled when 8. 80 | base_architecture: The architecture of base Resnet building block. 81 | pre_trained_model: The path to the directory that contains pre-trained models. 82 | batch_norm_decay: The moving average decay when estimating layer activation 83 | statistics in batch normalization. 84 | data_format: The input format ('channels_last', 'channels_first', or None). 85 | If set to None, the format is dependent on whether a GPU is available. 86 | Only 'channels_last' is supported currently. 87 | 88 | Returns: 89 | The model function that takes in `inputs` and `is_training` and 90 | returns the output tensor of the DeepLab v3 model. 91 | """ 92 | if data_format is None: 93 | # data_format = ( 94 | # 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last') 95 | pass 96 | 97 | if batch_norm_decay is None: 98 | batch_norm_decay = _BATCH_NORM_DECAY 99 | 100 | if base_architecture not in ['resnet_v2_50', 'resnet_v2_101']: 101 | raise ValueError("'base_architrecture' must be either 'resnet_v2_50' or 'resnet_v2_50'.") 102 | 103 | if base_architecture == 'resnet_v2_50': 104 | base_model = resnet_v2.resnet_v2_50 105 | else: 106 | base_model = resnet_v2.resnet_v2_101 107 | 108 | def model(inputs, is_training): 109 | """Constructs the ResNet model given the inputs.""" 110 | if data_format == 'channels_first': 111 | # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). 112 | # This provides a large performance boost on GPU. See 113 | # https://www.tensorflow.org/performance/performance_guide#data_formats 114 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 115 | 116 | # tf.logging.info('net shape: {}'.format(inputs.shape)) 117 | # encoder 118 | with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): 119 | logits, end_points = base_model(inputs, 120 | num_classes=None, 121 | is_training=is_training, 122 | global_pool=False, 123 | output_stride=output_stride) 124 | 125 | if is_training: 126 | exclude = [base_architecture + '/logits', 'global_step'] 127 | variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude) 128 | tf.train.init_from_checkpoint(pre_trained_model, 129 | {v.name.split(':')[0]: v for v in variables_to_restore}) 130 | 131 | inputs_size = tf.shape(inputs)[1:3] 132 | net = end_points[base_architecture + '/block4'] 133 | encoder_output = atrous_spatial_pyramid_pooling(net, output_stride, batch_norm_decay, is_training) 134 | 135 | with tf.variable_scope("decoder"): 136 | with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): 137 | with arg_scope([layers.batch_norm], is_training=is_training): 138 | with tf.variable_scope("low_level_features"): 139 | low_level_features = end_points[base_architecture + '/block1/unit_3/bottleneck_v2/conv1'] 140 | low_level_features = layers_lib.conv2d(low_level_features, 48, 141 | [1, 1], stride=1, scope='conv_1x1') 142 | low_level_features_size = tf.shape(low_level_features)[1:3] 143 | 144 | with tf.variable_scope("upsampling_logits"): 145 | net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1') 146 | net = tf.concat([net, low_level_features], axis=3, name='concat') 147 | net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_1') 148 | net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_2') 149 | net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv_1x1') 150 | logits = tf.image.resize_bilinear(net, inputs_size, name='upsample_2') 151 | 152 | return logits 153 | 154 | return model 155 | 156 | 157 | def deeplabv3_plus_model_fn(features, labels, mode, params): 158 | """Model function for PASCAL VOC.""" 159 | if isinstance(features, dict): 160 | features = features['feature'] 161 | 162 | images = tf.cast( 163 | tf.map_fn(preprocessing.mean_image_addition, features), 164 | tf.uint8) 165 | 166 | network = deeplab_v3_plus_generator(params['num_classes'], 167 | params['output_stride'], 168 | params['base_architecture'], 169 | params['pre_trained_model'], 170 | params['batch_norm_decay']) 171 | 172 | logits = network(features, mode == tf.estimator.ModeKeys.TRAIN) 173 | 174 | pred_classes = tf.expand_dims(tf.argmax(logits, axis=3, output_type=tf.int32), axis=3) 175 | 176 | pred_decoded_labels = tf.py_func(preprocessing.decode_labels, 177 | [pred_classes, params['batch_size'], params['num_classes']], 178 | tf.uint8) 179 | 180 | predictions = { 181 | 'classes': pred_classes, 182 | 'probabilities': tf.nn.softmax(logits, name='softmax_tensor'), 183 | 'decoded_labels': pred_decoded_labels 184 | } 185 | 186 | if mode == tf.estimator.ModeKeys.PREDICT: 187 | # Delete 'decoded_labels' from predictions because custom functions produce error when used with saved_model 188 | predictions_without_decoded_labels = predictions.copy() 189 | del predictions_without_decoded_labels['decoded_labels'] 190 | 191 | return tf.estimator.EstimatorSpec( 192 | mode=mode, 193 | predictions=predictions, 194 | export_outputs={ 195 | 'preds': tf.estimator.export.PredictOutput( 196 | predictions_without_decoded_labels) 197 | }) 198 | 199 | gt_decoded_labels = tf.py_func(preprocessing.decode_labels, 200 | [labels, params['batch_size'], params['num_classes']], tf.uint8) 201 | 202 | labels = tf.squeeze(labels, axis=3) # reduce the channel dimension. 203 | 204 | logits_by_num_classes = tf.reshape(logits, [-1, params['num_classes']]) 205 | labels_flat = tf.reshape(labels, [-1, ]) 206 | 207 | valid_indices = tf.to_int32(labels_flat <= params['num_classes'] - 1) 208 | valid_logits = tf.dynamic_partition(logits_by_num_classes, valid_indices, num_partitions=2)[1] 209 | valid_labels = tf.dynamic_partition(labels_flat, valid_indices, num_partitions=2)[1] 210 | 211 | preds_flat = tf.reshape(pred_classes, [-1, ]) 212 | valid_preds = tf.dynamic_partition(preds_flat, valid_indices, num_partitions=2)[1] 213 | confusion_matrix = tf.confusion_matrix(valid_labels, valid_preds, num_classes=params['num_classes']) 214 | 215 | predictions['valid_preds'] = valid_preds 216 | predictions['valid_labels'] = valid_labels 217 | predictions['confusion_matrix'] = confusion_matrix 218 | 219 | cross_entropy = tf.losses.sparse_softmax_cross_entropy( 220 | logits=valid_logits, labels=valid_labels) 221 | 222 | # Create a tensor named cross_entropy for logging purposes. 223 | tf.identity(cross_entropy, name='cross_entropy') 224 | tf.summary.scalar('cross_entropy', cross_entropy) 225 | 226 | if not params['freeze_batch_norm']: 227 | train_var_list = [v for v in tf.trainable_variables()] 228 | else: 229 | train_var_list = [v for v in tf.trainable_variables() 230 | if 'beta' not in v.name and 'gamma' not in v.name] 231 | 232 | # Add weight decay to the loss. 233 | with tf.variable_scope("total_loss"): 234 | loss = cross_entropy + params.get('weight_decay', _WEIGHT_DECAY) * tf.add_n( 235 | [tf.nn.l2_loss(v) for v in train_var_list]) 236 | # loss = tf.losses.get_total_loss() # obtain the regularization losses as well 237 | 238 | if mode == tf.estimator.ModeKeys.TRAIN: 239 | tf.summary.image('images', 240 | tf.concat(axis=2, values=[images, gt_decoded_labels, pred_decoded_labels]), 241 | max_outputs=params['tensorboard_images_max_outputs']) # Concatenate row-wise. 242 | 243 | global_step = tf.train.get_or_create_global_step() 244 | 245 | if params['learning_rate_policy'] == 'piecewise': 246 | # Scale the learning rate linearly with the batch size. When the batch size 247 | # is 128, the learning rate should be 0.1. 248 | initial_learning_rate = 0.1 * params['batch_size'] / 128 249 | batches_per_epoch = params['num_train'] / params['batch_size'] 250 | # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs. 251 | boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]] 252 | values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]] 253 | learning_rate = tf.train.piecewise_constant( 254 | tf.cast(global_step, tf.int32), boundaries, values) 255 | elif params['learning_rate_policy'] == 'poly': 256 | learning_rate = tf.train.polynomial_decay( 257 | params['initial_learning_rate'], 258 | tf.cast(global_step, tf.int32) - params['initial_global_step'], 259 | params['max_iter'], params['end_learning_rate'], power=params['power']) 260 | else: 261 | raise ValueError('Learning rate policy must be "piecewise" or "poly"') 262 | 263 | # Create a tensor named learning_rate for logging purposes 264 | tf.identity(learning_rate, name='learning_rate') 265 | tf.summary.scalar('learning_rate', learning_rate) 266 | 267 | optimizer = tf.train.MomentumOptimizer( 268 | learning_rate=learning_rate, 269 | momentum=params['momentum']) 270 | 271 | # Batch norm requires update ops to be added as a dependency to the train_op 272 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 273 | with tf.control_dependencies(update_ops): 274 | train_op = optimizer.minimize(loss, global_step, var_list=train_var_list) 275 | else: 276 | train_op = None 277 | 278 | accuracy = tf.metrics.accuracy( 279 | valid_labels, valid_preds) 280 | mean_iou = tf.metrics.mean_iou(valid_labels, valid_preds, params['num_classes']) 281 | metrics = {'px_accuracy': accuracy, 'mean_iou': mean_iou} 282 | 283 | # Create a tensor named train_accuracy for logging purposes 284 | tf.identity(accuracy[1], name='train_px_accuracy') 285 | tf.summary.scalar('train_px_accuracy', accuracy[1]) 286 | 287 | def compute_mean_iou(total_cm, name='mean_iou'): 288 | """Compute the mean intersection-over-union via the confusion matrix.""" 289 | sum_over_row = tf.to_float(tf.reduce_sum(total_cm, 0)) 290 | sum_over_col = tf.to_float(tf.reduce_sum(total_cm, 1)) 291 | cm_diag = tf.to_float(tf.diag_part(total_cm)) 292 | denominator = sum_over_row + sum_over_col - cm_diag 293 | 294 | # The mean is only computed over classes that appear in the 295 | # label or prediction tensor. If the denominator is 0, we need to 296 | # ignore the class. 297 | num_valid_entries = tf.reduce_sum(tf.cast( 298 | tf.not_equal(denominator, 0), dtype=tf.float32)) 299 | 300 | # If the value of the denominator is 0, set it to 1 to avoid 301 | # zero division. 302 | denominator = tf.where( 303 | tf.greater(denominator, 0), 304 | denominator, 305 | tf.ones_like(denominator)) 306 | iou = tf.div(cm_diag, denominator) 307 | 308 | for i in range(params['num_classes']): 309 | tf.identity(iou[i], name='train_iou_class{}'.format(i)) 310 | tf.summary.scalar('train_iou_class{}'.format(i), iou[i]) 311 | 312 | # If the number of valid entries is 0 (no classes) we return 0. 313 | result = tf.where( 314 | tf.greater(num_valid_entries, 0), 315 | tf.reduce_sum(iou, name=name) / num_valid_entries, 316 | 0) 317 | return result 318 | 319 | train_mean_iou = compute_mean_iou(mean_iou[1]) 320 | 321 | tf.identity(train_mean_iou, name='train_mean_iou') 322 | tf.summary.scalar('train_mean_iou', train_mean_iou) 323 | 324 | return tf.estimator.EstimatorSpec( 325 | mode=mode, 326 | predictions=predictions, 327 | loss=loss, 328 | train_op=train_op, 329 | eval_metric_ops=metrics) 330 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluate a DeepLab v3 model.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import os 9 | import sys 10 | 11 | import tensorflow as tf 12 | 13 | import deeplab_model 14 | from utils import preprocessing 15 | from utils import dataset_util 16 | 17 | import numpy as np 18 | import timeit 19 | 20 | parser = argparse.ArgumentParser() 21 | 22 | parser.add_argument('--image_data_dir', type=str, default='dataset/VOCdevkit/VOC2012/JPEGImages', 23 | help='The directory containing the image data.') 24 | 25 | parser.add_argument('--label_data_dir', type=str, default='dataset/VOCdevkit/VOC2012/SegmentationClassAug', 26 | help='The directory containing the ground truth label data.') 27 | 28 | parser.add_argument('--evaluation_data_list', type=str, default='./dataset/val.txt', 29 | help='Path to the file listing the evaluation images.') 30 | 31 | parser.add_argument('--model_dir', type=str, default='./model', 32 | help="Base directory for the model. " 33 | "Make sure 'model_checkpoint_path' given in 'checkpoint' file matches " 34 | "with checkpoint name.") 35 | 36 | parser.add_argument('--base_architecture', type=str, default='resnet_v2_101', 37 | choices=['resnet_v2_50', 'resnet_v2_101'], 38 | help='The architecture of base Resnet building block.') 39 | 40 | parser.add_argument('--output_stride', type=int, default=16, 41 | choices=[8, 16], 42 | help='Output stride for DeepLab v3. Currently 8 or 16 is supported.') 43 | 44 | _NUM_CLASSES = 21 45 | 46 | 47 | def main(unused_argv): 48 | # Using the Winograd non-fused algorithms provides a small performance boost. 49 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 50 | 51 | examples = dataset_util.read_examples_list(FLAGS.evaluation_data_list) 52 | image_files = [os.path.join(FLAGS.image_data_dir, filename) + '.jpg' for filename in examples] 53 | label_files = [os.path.join(FLAGS.label_data_dir, filename) + '.png' for filename in examples] 54 | 55 | features, labels = preprocessing.eval_input_fn(image_files, label_files) 56 | 57 | predictions = deeplab_model.deeplabv3_plus_model_fn( 58 | features, 59 | labels, 60 | tf.estimator.ModeKeys.EVAL, 61 | params={ 62 | 'output_stride': FLAGS.output_stride, 63 | 'batch_size': 1, # Batch size must be 1 because the images' size may differ 64 | 'base_architecture': FLAGS.base_architecture, 65 | 'pre_trained_model': None, 66 | 'batch_norm_decay': None, 67 | 'num_classes': _NUM_CLASSES, 68 | 'freeze_batch_norm': True 69 | }).predictions 70 | 71 | # Manually load the latest checkpoint 72 | saver = tf.train.Saver() 73 | with tf.Session() as sess: 74 | ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) 75 | saver.restore(sess, ckpt.model_checkpoint_path) 76 | 77 | # Loop through the batches and store predictions and labels 78 | step = 1 79 | sum_cm = np.zeros((_NUM_CLASSES, _NUM_CLASSES), dtype=np.int32) 80 | start = timeit.default_timer() 81 | while True: 82 | try: 83 | preds = sess.run(predictions) 84 | sum_cm += preds['confusion_matrix'] 85 | if not step % 100: 86 | stop = timeit.default_timer() 87 | tf.logging.info("current step = {} ({:.3f} sec)".format(step, stop-start)) 88 | start = timeit.default_timer() 89 | step += 1 90 | except tf.errors.OutOfRangeError: 91 | break 92 | 93 | def compute_mean_iou(total_cm): 94 | """Compute the mean intersection-over-union via the confusion matrix.""" 95 | sum_over_row = np.sum(total_cm, axis=0).astype(float) 96 | sum_over_col = np.sum(total_cm, axis=1).astype(float) 97 | cm_diag = np.diagonal(total_cm).astype(float) 98 | denominator = sum_over_row + sum_over_col - cm_diag 99 | 100 | # The mean is only computed over classes that appear in the 101 | # label or prediction tensor. If the denominator is 0, we need to 102 | # ignore the class. 103 | num_valid_entries = np.sum((denominator != 0).astype(float)) 104 | 105 | # If the value of the denominator is 0, set it to 1 to avoid 106 | # zero division. 107 | denominator = np.where( 108 | denominator > 0, 109 | denominator, 110 | np.ones_like(denominator)) 111 | 112 | ious = cm_diag / denominator 113 | 114 | print('Intersection over Union for each class:') 115 | for i, iou in enumerate(ious): 116 | print(' class {}: {:.4f}'.format(i, iou)) 117 | 118 | # If the number of valid entries is 0 (no classes) we return 0. 119 | m_iou = np.where( 120 | num_valid_entries > 0, 121 | np.sum(ious) / num_valid_entries, 122 | 0) 123 | m_iou = float(m_iou) 124 | print('mean Intersection over Union: {:.4f}'.format(float(m_iou))) 125 | 126 | def compute_accuracy(total_cm): 127 | """Compute the accuracy via the confusion matrix.""" 128 | denominator = total_cm.sum().astype(float) 129 | cm_diag_sum = np.diagonal(total_cm).sum().astype(float) 130 | 131 | # If the number of valid entries is 0 (no classes) we return 0. 132 | accuracy = np.where( 133 | denominator > 0, 134 | cm_diag_sum / denominator, 135 | 0) 136 | accuracy = float(accuracy) 137 | print('Pixel Accuracy: {:.4f}'.format(float(accuracy))) 138 | 139 | compute_mean_iou(sum_cm) 140 | compute_accuracy(sum_cm) 141 | 142 | 143 | if __name__ == '__main__': 144 | tf.logging.set_verbosity(tf.logging.INFO) 145 | FLAGS, unparsed = parser.parse_known_args() 146 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 147 | -------------------------------------------------------------------------------- /export_inference_graph.py: -------------------------------------------------------------------------------- 1 | """Export inference graph.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import os 9 | import sys 10 | 11 | import tensorflow as tf 12 | 13 | import deeplab_model 14 | from utils import preprocessing 15 | 16 | 17 | parser = argparse.ArgumentParser() 18 | 19 | parser.add_argument('--model_dir', type=str, default='./model', 20 | help="Base directory for the model. " 21 | "Make sure 'model_checkpoint_path' given in 'checkpoint' file matches " 22 | "with checkpoint name.") 23 | 24 | parser.add_argument('--export_dir', type=str, default='dataset/export_output', 25 | help='The directory where the exported SavedModel will be stored.') 26 | 27 | parser.add_argument('--base_architecture', type=str, default='resnet_v2_101', 28 | choices=['resnet_v2_50', 'resnet_v2_101'], 29 | help='The architecture of base Resnet building block.') 30 | 31 | parser.add_argument('--output_stride', type=int, default=16, 32 | choices=[8, 16], 33 | help='Output stride for DeepLab v3. Currently 8 or 16 is supported.') 34 | 35 | 36 | _NUM_CLASSES = 21 37 | 38 | 39 | def main(unused_argv): 40 | # Using the Winograd non-fused algorithms provides a small performance boost. 41 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 42 | 43 | model = tf.estimator.Estimator( 44 | model_fn=deeplab_model.deeplabv3_plus_model_fn, 45 | model_dir=FLAGS.model_dir, 46 | params={ 47 | 'output_stride': FLAGS.output_stride, 48 | 'batch_size': 1, # Batch size must be 1 because the images' size may differ 49 | 'base_architecture': FLAGS.base_architecture, 50 | 'pre_trained_model': None, 51 | 'batch_norm_decay': None, 52 | 'num_classes': _NUM_CLASSES, 53 | }) 54 | 55 | # Export the model 56 | def serving_input_receiver_fn(): 57 | image = tf.placeholder(tf.float32, [None, None, None, 3], name='image_tensor') 58 | receiver_tensors = {'image': image} 59 | features = tf.map_fn(preprocessing.mean_image_subtraction, image) 60 | return tf.estimator.export.ServingInputReceiver( 61 | features=features, 62 | receiver_tensors=receiver_tensors) 63 | 64 | model.export_savedmodel(FLAGS.export_dir, serving_input_receiver_fn) 65 | 66 | 67 | if __name__ == '__main__': 68 | tf.logging.set_verbosity(tf.logging.INFO) 69 | FLAGS, unparsed = parser.parse_known_args() 70 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 71 | -------------------------------------------------------------------------------- /images/tensorboard_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/images/tensorboard_images.png -------------------------------------------------------------------------------- /images/tensorboard_miou.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/images/tensorboard_miou.png -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | """Run inference a DeepLab v3 model using tf.estimator API.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import os 9 | import sys 10 | 11 | import tensorflow as tf 12 | 13 | import deeplab_model 14 | from utils import preprocessing 15 | from utils import dataset_util 16 | 17 | from PIL import Image 18 | import matplotlib.pyplot as plt 19 | 20 | from tensorflow.python import debug as tf_debug 21 | 22 | parser = argparse.ArgumentParser() 23 | 24 | parser.add_argument('--data_dir', type=str, default='dataset/VOCdevkit/VOC2012/JPEGImages', 25 | help='The directory containing the image data.') 26 | 27 | parser.add_argument('--output_dir', type=str, default='./dataset/inference_output', 28 | help='Path to the directory to generate the inference results') 29 | 30 | parser.add_argument('--infer_data_list', type=str, default='./dataset/sample_images_list.txt', 31 | help='Path to the file listing the inferring images.') 32 | 33 | parser.add_argument('--model_dir', type=str, default='./model', 34 | help="Base directory for the model. " 35 | "Make sure 'model_checkpoint_path' given in 'checkpoint' file matches " 36 | "with checkpoint name.") 37 | 38 | parser.add_argument('--base_architecture', type=str, default='resnet_v2_101', 39 | choices=['resnet_v2_50', 'resnet_v2_101'], 40 | help='The architecture of base Resnet building block.') 41 | 42 | parser.add_argument('--output_stride', type=int, default=16, 43 | choices=[8, 16], 44 | help='Output stride for DeepLab v3. Currently 8 or 16 is supported.') 45 | 46 | parser.add_argument('--debug', action='store_true', 47 | help='Whether to use debugger to track down bad values during training.') 48 | 49 | _NUM_CLASSES = 3 50 | 51 | 52 | def main(unused_argv): 53 | # Using the Winograd non-fused algorithms provides a small performance boost. 54 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 55 | 56 | pred_hooks = None 57 | if FLAGS.debug: 58 | debug_hook = tf_debug.LocalCLIDebugHook() 59 | pred_hooks = [debug_hook] 60 | 61 | model = tf.estimator.Estimator( 62 | model_fn=deeplab_model.deeplabv3_plus_model_fn, 63 | model_dir=FLAGS.model_dir, 64 | params={ 65 | 'output_stride': FLAGS.output_stride, 66 | 'batch_size': 1, # Batch size must be 1 because the images' size may differ 67 | 'base_architecture': FLAGS.base_architecture, 68 | 'pre_trained_model': None, 69 | 'batch_norm_decay': None, 70 | 'num_classes': _NUM_CLASSES, 71 | }) 72 | 73 | examples = dataset_util.read_examples_list(FLAGS.infer_data_list) 74 | image_files = [os.path.join(FLAGS.data_dir, filename) for filename in examples] 75 | 76 | predictions = model.predict( 77 | input_fn=lambda: preprocessing.eval_input_fn(image_files), 78 | hooks=pred_hooks) 79 | 80 | output_dir = FLAGS.output_dir 81 | if not os.path.exists(output_dir): 82 | os.makedirs(output_dir) 83 | 84 | for pred_dict, image_path in zip(predictions, image_files): 85 | image_basename = os.path.splitext(os.path.basename(image_path))[0] 86 | output_filename = image_basename + '_mask.png' 87 | path_to_output = os.path.join(output_dir, output_filename) 88 | 89 | print("generating:", path_to_output) 90 | mask = pred_dict['decoded_labels'] 91 | mask = Image.fromarray(mask) 92 | plt.axis('off') 93 | plt.imshow(mask) 94 | plt.savefig(path_to_output, bbox_inches='tight') 95 | 96 | 97 | if __name__ == '__main__': 98 | tf.logging.set_verbosity(tf.logging.INFO) 99 | FLAGS, unparsed = parser.parse_known_args() 100 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 101 | -------------------------------------------------------------------------------- /picture_extraction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Run inference a DeepLab v3 model using tf.estimator API.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import cv2 9 | import argparse 10 | import os 11 | import sys 12 | import glob 13 | import tensorflow as tf 14 | 15 | import deeplab_model 16 | from utils import preprocessing 17 | from utils import dataset_util 18 | 19 | from PIL import Image 20 | 21 | import matplotlib 22 | matplotlib.use('Agg') 23 | import matplotlib.pyplot as plt 24 | import xml.etree.ElementTree as ET 25 | from tensorflow.python import debug as tf_debug 26 | 27 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 28 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = "1" 29 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 30 | 31 | parser = argparse.ArgumentParser() 32 | 33 | parser.add_argument('--input_dir', type=str, 34 | help='The directory containing the image data.') 35 | 36 | parser.add_argument('--output_dir', type=str, 37 | help='Path to the directory to generate the inference results') 38 | 39 | parser.add_argument('--model_dir', type=str, default='./model50', 40 | help="Base directory for the model. " 41 | "Make sure 'model_checkpoint_path' given in 'checkpoint' file matches " 42 | "with checkpoint name.") 43 | parser.add_argument('--base_architecture', type=str, default='resnet_v2_50', 44 | choices=['resnet_v2_50', 'resnet_v2_101'], 45 | help='The architecture of base Resnet building block.') 46 | 47 | parser.add_argument('--output_stride', type=int, default=16, 48 | choices=[8, 16], 49 | help='Output stride for DeepLab v3. Currently 8 or 16 is supported.') 50 | 51 | parser.add_argument('--debug', action='store_true', 52 | help='Whether to use debugger to track down bad values during training.') 53 | 54 | _NUM_CLASSES = 3 55 | 56 | OFFSET=10 57 | 58 | 59 | def make_xml(filepath,width,height,cordinates_lst): 60 | filename=os.path.basename(filepath) 61 | root_xml = ET.Element('annotation') 62 | ET.SubElement(root_xml, 'folder').text='annot' 63 | 64 | ET.SubElement(root_xml, 'filename').text=filename 65 | 66 | ET.SubElement(root_xml, 'path').text=filename 67 | 68 | xml_source=ET.SubElement(root_xml, 'source') 69 | ET.SubElement(xml_source, 'database').text="Unknown" 70 | 71 | xml_size=ET.SubElement(root_xml, 'size') 72 | ET.SubElement(xml_size, 'width').text=str(width) 73 | ET.SubElement(xml_size, 'height').text=str(height) 74 | ET.SubElement(xml_size, 'depth').text=str(3) 75 | 76 | ET.SubElement(root_xml, 'segmented').text=str(0) 77 | 78 | for index,cordinates in enumerate(cordinates_lst): 79 | #print(cordinates) 80 | xml_obj=ET.SubElement(root_xml, 'object') 81 | ET.SubElement(xml_obj, 'name').text=cordinates[0] 82 | ET.SubElement(xml_obj, 'pose').text=str(index) 83 | ET.SubElement(xml_obj, 'truncated').text=str(0) 84 | ET.SubElement(xml_obj, 'difficult').text=str(0) 85 | 86 | xml_bndbox=ET.SubElement(xml_obj, 'bndbox') 87 | ET.SubElement(xml_bndbox, 'xmin').text=str(cordinates[1]) 88 | ET.SubElement(xml_bndbox, 'ymin').text=str(cordinates[2]) 89 | ET.SubElement(xml_bndbox, 'xmax').text=str(cordinates[3]) 90 | ET.SubElement(xml_bndbox, 'ymax').text=str(cordinates[4]) 91 | 92 | tree=ET.ElementTree(root_xml) 93 | #print(tree) 94 | tree.write(os.path.join(FLAGS.output_dir,filename)+".xml",xml_declaration=False) 95 | 96 | def main(unused_argv): 97 | # Using the Winograd non-fused algorithms provides a small performance boost. 98 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 99 | 100 | pred_hooks = None 101 | if FLAGS.debug: 102 | debug_hook = tf_debug.LocalCLIDebugHook() 103 | pred_hooks = [debug_hook] 104 | 105 | model = tf.estimator.Estimator( 106 | model_fn=deeplab_model.deeplabv3_plus_model_fn, 107 | model_dir=FLAGS.model_dir, 108 | params={ 109 | 'output_stride': FLAGS.output_stride, 110 | 'batch_size': 1, # Batch size must be 1 because the images' size may differ 111 | 'base_architecture': FLAGS.base_architecture, 112 | 'pre_trained_model': None, 113 | 'batch_norm_decay': None, 114 | 'num_classes': _NUM_CLASSES, 115 | }) 116 | 117 | #examples = dataset_util.read_examples_list(FLAGS.infer_data_list) 118 | image_files = glob.glob(os.path.join(FLAGS.input_dir,'*')) 119 | 120 | predictions = model.predict( 121 | input_fn=lambda: preprocessing.eval_input_fn(image_files,1600,1600), 122 | hooks=pred_hooks) 123 | 124 | output_dir = FLAGS.output_dir 125 | if not os.path.exists(output_dir): 126 | os.makedirs(output_dir) 127 | 128 | for pred_dict, image_path in zip(predictions, image_files): 129 | img_raw = cv2.imread(image_path,1) 130 | height_r,width_r=img_raw.shape[:2] 131 | img=cv2.resize(img_raw,(1600,1600)) 132 | image_basename = os.path.splitext(os.path.basename(image_path))[0] 133 | mask = pred_dict['decoded_labels'] 134 | mask_g = mask[:,:,0] 135 | ret,mask_g = cv2.threshold(mask_g,120,255,0) 136 | _, contours, hierarchy = cv2.findContours(mask_g, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 137 | contours.sort(key=cv2.contourArea, reverse=True) 138 | height,width=img.shape[:2] 139 | cord_lst=[] 140 | for index,c in enumerate(contours): 141 | if cv2.contourArea(c) 2 | アノテーション作業用 3 | 2537534_1.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_1.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 84 21 | 52 22 | 787 23 | 996 24 | 25 | 26 | 27 | 3_typography 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 437 33 | 88 34 | 471 35 | 234 36 | 37 | 38 | 39 | 3_typography 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 647 45 | 97 46 | 713 47 | 244 48 | 49 | 50 | 51 | 5_stamp 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 634 57 | 274 58 | 712 59 | 364 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 382 69 | 91 70 | 423 71 | 236 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 391 81 | 252 82 | 468 83 | 571 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_10.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_10.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_10.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 77 21 | 57 22 | 1524 23 | 1005 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 901 33 | 195 34 | 1341 35 | 875 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 212 45 | 304 46 | 677 47 | 924 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1352 57 | 205 58 | 1391 59 | 409 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 649 69 | 236 70 | 687 71 | 335 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 419 81 | 141 82 | 525 83 | 164 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 1082 93 | 139 94 | 1187 95 | 164 96 | 97 | 98 | 99 | 5_stamp 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 645 105 | 194 106 | 689 107 | 231 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_11.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_11.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_11.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 70 21 | 61 22 | 1518 23 | 1008 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 876 33 | 177 34 | 1093 35 | 454 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 250 45 | 698 46 | 371 47 | 852 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 230 57 | 175 58 | 262 59 | 326 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 367 69 | 347 70 | 398 71 | 468 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 482 81 | 192 82 | 514 83 | 264 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 701 93 | 408 94 | 743 95 | 531 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 701 105 | 593 106 | 739 107 | 712 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 400 117 | 741 118 | 435 119 | 843 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 707 129 | 177 130 | 753 131 | 332 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 206 141 | 699 142 | 240 143 | 833 144 | 145 | 146 | 147 | 4_illustration 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1174 153 | 175 154 | 1418 155 | 462 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 1294 165 | 177 166 | 1326 167 | 202 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 414 177 | 121 178 | 523 179 | 145 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1077 189 | 138 190 | 1180 191 | 161 192 | 193 | 194 | 195 | 4_illustration 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 289 201 | 162 202 | 428 203 | 288 204 | 205 | 206 | 207 | 4_illustration 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 534 213 | 150 214 | 678 215 | 314 216 | 217 | 218 | 219 | 4_illustration 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 194 225 | 429 226 | 360 227 | 571 228 | 229 | 230 | 231 | 4_illustration 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 410 237 | 364 238 | 689 239 | 650 240 | 241 | 242 | 243 | 4_illustration 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 444 249 | 748 250 | 575 251 | 882 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 603 261 | 655 262 | 716 263 | 796 264 | 265 | 266 | 267 | 4_illustration 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 987 273 | 473 274 | 1251 275 | 927 276 | 277 | 278 | 279 | 4_illustration 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 1251 285 | 602 286 | 1439 287 | 879 288 | 289 | 290 | 291 | 4_illustration 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 823 297 | 595 298 | 984 299 | 877 300 | 301 | 302 | 303 | 3_typography 304 | Unspecified 305 | 0 306 | 0 307 | 308 | 857 309 | 598 310 | 885 311 | 616 312 | 313 | 314 | 315 | 3_typography 316 | Unspecified 317 | 0 318 | 0 319 | 320 | 1376 321 | 621 322 | 1407 323 | 646 324 | 325 | 326 | 327 | 5_stamp 328 | Unspecified 329 | 0 330 | 0 331 | 332 | 359 333 | 302 334 | 405 335 | 342 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_12.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_12.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_12.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 74 21 | 57 22 | 1522 23 | 1004 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 871 33 | 186 34 | 1007 35 | 398 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 296 45 | 575 46 | 621 47 | 812 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1015 57 | 189 58 | 1058 59 | 301 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1013 69 | 612 70 | 1048 71 | 740 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 1323 81 | 196 82 | 1359 83 | 305 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 548 93 | 398 94 | 580 95 | 512 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 1063 105 | 383 106 | 1093 107 | 507 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1380 117 | 172 118 | 1419 119 | 288 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 640 129 | 204 130 | 668 131 | 253 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 639 141 | 568 142 | 673 143 | 729 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1372 153 | 569 154 | 1398 155 | 711 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 723 165 | 202 166 | 759 167 | 293 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 356 177 | 227 178 | 389 179 | 279 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1333 189 | 372 190 | 1373 191 | 505 192 | 193 | 194 | 195 | 4_illustration 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 277 201 | 266 202 | 343 203 | 391 204 | 205 | 206 | 207 | 4_illustration 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 550 213 | 227 214 | 646 215 | 346 216 | 217 | 218 | 219 | 4_illustration 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 385 225 | 366 226 | 525 227 | 557 228 | 229 | 230 | 231 | 4_illustration 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 1079 237 | 209 238 | 1321 239 | 384 240 | 241 | 242 | 243 | 3_typography 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 1012 249 | 395 250 | 1037 251 | 423 252 | 253 | 254 | 255 | 3_typography 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 1287 261 | 409 262 | 1310 263 | 432 264 | 265 | 266 | 267 | 4_illustration 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 864 273 | 427 274 | 1057 275 | 564 276 | 277 | 278 | 279 | 4_illustration 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 1133 285 | 425 286 | 1319 287 | 582 288 | 289 | 290 | 291 | 4_illustration 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 1129 297 | 605 298 | 1348 299 | 764 300 | 301 | 302 | 303 | 4_illustration 304 | Unspecified 305 | 0 306 | 0 307 | 308 | 846 309 | 662 310 | 1193 311 | 911 312 | 313 | 314 | 315 | 3_typography 316 | Unspecified 317 | 0 318 | 0 319 | 320 | 418 321 | 130 322 | 519 323 | 152 324 | 325 | 326 | 327 | 3_typography 328 | Unspecified 329 | 0 330 | 0 331 | 332 | 1071 333 | 132 334 | 1184 335 | 162 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_13.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_13.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_13.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 76 21 | 56 22 | 1524 23 | 1005 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 873 33 | 626 34 | 1364 35 | 946 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 910 45 | 284 46 | 1361 47 | 517 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 221 57 | 185 58 | 407 59 | 392 60 | 61 | 62 | 63 | 4_illustration 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 452 69 | 491 70 | 659 71 | 800 72 | 73 | 74 | 75 | 4_illustration 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 204 81 | 494 82 | 401 83 | 802 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 635 93 | 186 94 | 669 95 | 296 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 400 105 | 452 106 | 437 107 | 622 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1359 117 | 192 118 | 1399 119 | 309 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 1296 129 | 670 130 | 1330 131 | 798 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 644 141 | 466 142 | 678 143 | 557 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 418 153 | 217 154 | 459 155 | 325 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 862 165 | 268 166 | 889 167 | 454 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 719 177 | 187 178 | 759 179 | 317 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 421 189 | 134 190 | 516 191 | 157 192 | 193 | 194 | 195 | 3_typography 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 1080 201 | 132 202 | 1185 203 | 154 204 | 205 | 206 | 207 | 5_stamp 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 419 213 | 166 214 | 465 215 | 212 216 | 217 | 218 | 219 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_14.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_14.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_14.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 79 21 | 51 22 | 1531 23 | 1002 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 866 33 | 239 34 | 982 35 | 430 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 205 45 | 511 46 | 466 47 | 786 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 209 57 | 211 58 | 627 59 | 396 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1077 69 | 310 70 | 1103 71 | 357 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 624 81 | 207 82 | 660 83 | 341 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 1326 93 | 435 94 | 1364 95 | 495 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 178 105 | 393 106 | 216 107 | 575 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 911 117 | 192 118 | 939 119 | 243 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 832 129 | 524 130 | 868 131 | 754 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 381 141 | 204 142 | 424 143 | 350 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 720 153 | 187 154 | 759 155 | 291 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 705 165 | 492 166 | 736 167 | 550 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 463 177 | 693 178 | 488 179 | 770 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1380 189 | 188 190 | 1414 191 | 302 192 | 193 | 194 | 195 | 4_illustration 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 1032 201 | 364 202 | 1141 203 | 504 204 | 205 | 206 | 207 | 3_typography 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 1289 213 | 212 214 | 1314 215 | 327 216 | 217 | 218 | 219 | 4_illustration 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 1180 225 | 237 226 | 1266 227 | 429 228 | 229 | 230 | 231 | 4_illustration 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 496 237 | 750 238 | 609 239 | 839 240 | 241 | 242 | 243 | 4_illustration 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 528 249 | 552 250 | 700 251 | 709 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 844 261 | 534 262 | 1114 263 | 862 264 | 265 | 266 | 267 | 4_illustration 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 1142 273 | 434 274 | 1398 275 | 796 276 | 277 | 278 | 279 | 3_typography 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 423 285 | 129 286 | 528 287 | 155 288 | 289 | 290 | 291 | 3_typography 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 1077 297 | 132 298 | 1178 299 | 159 300 | 301 | 302 | 303 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_15.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_15.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_15.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 85 21 | 59 22 | 1528 23 | 1002 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 296 33 | 189 34 | 626 35 | 912 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 973 45 | 290 46 | 1240 47 | 859 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 696 57 | 230 58 | 749 59 | 467 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1385 69 | 181 70 | 1414 71 | 250 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 1350 81 | 181 82 | 1379 83 | 250 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 1369 93 | 259 94 | 1403 95 | 329 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 416 105 | 136 106 | 525 107 | 157 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1078 117 | 130 118 | 1187 119 | 154 120 | 121 | 122 | 123 | 5_stamp 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 699 129 | 181 130 | 750 131 | 225 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_16.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_16.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_16.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 88 21 | 55 22 | 1530 23 | 1000 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 971 33 | 193 34 | 1310 35 | 914 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 273 45 | 346 46 | 671 47 | 895 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1348 57 | 251 58 | 1404 59 | 480 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 708 69 | 180 70 | 760 71 | 347 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 423 81 | 130 82 | 534 83 | 154 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 1082 93 | 130 94 | 1182 95 | 157 96 | 97 | 98 | 99 | 5_stamp 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 1350 105 | 199 106 | 1410 107 | 246 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_17.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_17.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_17.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 87 21 | 59 22 | 1531 23 | 1003 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 189 33 | 357 34 | 559 35 | 856 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 1014 45 | 311 46 | 1289 47 | 784 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 473 57 | 194 58 | 518 59 | 313 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 249 69 | 271 70 | 282 71 | 393 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 1303 81 | 202 82 | 1356 83 | 385 84 | 85 | 86 | 87 | 4_illustration 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 585 93 | 254 94 | 766 95 | 682 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 425 105 | 134 106 | 526 107 | 159 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1084 117 | 134 118 | 1185 119 | 154 120 | 121 | 122 | 123 | 5_stamp 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 246 129 | 234 130 | 283 131 | 268 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 750 141 | 298 142 | 772 143 | 387 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_18.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_18.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_18.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 86 21 | 59 22 | 1530 23 | 1002 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 314 33 | 416 34 | 601 35 | 930 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 1212 45 | 252 46 | 1431 47 | 686 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 934 57 | 266 58 | 967 59 | 362 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1400 69 | 244 70 | 1429 71 | 345 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 534 81 | 204 82 | 570 83 | 256 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 196 93 | 415 94 | 230 95 | 691 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 728 105 | 170 106 | 762 107 | 441 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 650 117 | 545 118 | 675 119 | 609 120 | 121 | 122 | 123 | 4_illustration 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 198 129 | 296 130 | 316 131 | 443 132 | 133 | 134 | 135 | 4_illustration 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 384 141 | 186 142 | 705 143 | 387 144 | 145 | 146 | 147 | 4_illustration 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 851 153 | 361 154 | 1191 155 | 870 156 | 157 | 158 | 159 | 4_illustration 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 576 165 | 427 166 | 725 167 | 539 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 432 177 | 127 178 | 539 179 | 152 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1085 189 | 136 190 | 1187 191 | 159 192 | 193 | 194 | 195 | 5_stamp 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 933 201 | 226 202 | 975 203 | 260 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_19.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_19.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_19.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 86 21 | 58 22 | 1530 23 | 1002 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 969 33 | 296 34 | 1259 35 | 875 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 269 45 | 512 46 | 664 47 | 820 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 191 57 | 159 58 | 584 59 | 357 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 607 69 | 198 70 | 650 71 | 229 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 1358 81 | 219 82 | 1395 83 | 534 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 713 93 | 184 94 | 756 95 | 363 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 853 105 | 197 106 | 889 107 | 384 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 641 117 | 566 118 | 669 119 | 602 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 630 129 | 237 130 | 644 131 | 423 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 607 141 | 237 142 | 625 143 | 412 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 648 153 | 607 154 | 671 155 | 645 156 | 157 | 158 | 159 | 4_illustration 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 909 165 | 225 166 | 1010 167 | 336 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 427 177 | 136 178 | 532 179 | 159 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1073 189 | 134 190 | 1182 191 | 157 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_2.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_2.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_2.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 77 21 | 57 22 | 1509 23 | 1008 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 168 33 | 175 34 | 764 35 | 914 36 | 37 | 38 | 39 | 2_handwritten 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 1308 45 | 305 46 | 1436 47 | 978 48 | 49 | 50 | 51 | 5_stamp 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1048 57 | 800 58 | 1180 59 | 931 60 | 61 | 62 | 63 | 5_stamp 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 362 69 | 82 70 | 555 71 | 257 72 | 73 | 74 | 75 | 5_stamp 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 432 81 | 884 82 | 532 83 | 975 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 325 93 | 184 94 | 639 95 | 243 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 216 105 | 286 106 | 259 107 | 686 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 693 117 | 296 118 | 734 119 | 691 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 168 129 | 650 130 | 189 131 | 727 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 384 141 | 921 142 | 550 143 | 939 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_20.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_20.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_20.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 85 21 | 57 22 | 1529 23 | 1002 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 222 33 | 543 34 | 369 35 | 852 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 957 45 | 219 46 | 1284 47 | 437 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 955 57 | 635 58 | 1271 59 | 846 60 | 61 | 62 | 63 | 4_illustration 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 980 69 | 468 70 | 1218 71 | 611 72 | 73 | 74 | 75 | 4_illustration 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 227 81 | 220 82 | 445 83 | 410 84 | 85 | 86 | 87 | 4_illustration 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 482 93 | 233 94 | 727 95 | 403 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 444 105 | 181 106 | 487 107 | 421 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1301 117 | 196 118 | 1340 119 | 273 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 637 129 | 210 130 | 674 131 | 291 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 581 141 | 701 142 | 610 143 | 785 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 462 153 | 640 154 | 482 155 | 804 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 901 165 | 640 166 | 950 167 | 845 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 187 177 | 179 178 | 223 179 | 424 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 727 189 | 184 190 | 770 191 | 381 192 | 193 | 194 | 195 | 3_typography 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 725 201 | 468 202 | 762 203 | 646 204 | 205 | 206 | 207 | 3_typography 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 1388 213 | 190 214 | 1428 215 | 356 216 | 217 | 218 | 219 | 3_typography 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 1263 225 | 456 226 | 1301 227 | 586 228 | 229 | 230 | 231 | 3_typography 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 385 237 | 623 238 | 407 239 | 760 240 | 241 | 242 | 243 | 3_typography 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 489 249 | 639 250 | 512 251 | 809 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 406 261 | 425 262 | 559 263 | 614 264 | 265 | 266 | 267 | 4_illustration 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 617 273 | 463 274 | 723 275 | 709 276 | 277 | 278 | 279 | 4_illustration 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 414 285 | 750 286 | 718 287 | 896 288 | 289 | 290 | 291 | 3_typography 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 434 297 | 130 298 | 534 299 | 157 300 | 301 | 302 | 303 | 3_typography 304 | Unspecified 305 | 0 306 | 0 307 | 308 | 1084 309 | 136 310 | 1189 311 | 162 312 | 313 | 314 | 315 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_21.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_21.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_21.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 89 21 | 56 22 | 1533 23 | 1001 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 1060 33 | 375 34 | 1343 35 | 873 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 159 45 | 513 46 | 276 47 | 832 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 369 57 | 225 58 | 616 59 | 387 60 | 61 | 62 | 63 | 4_illustration 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 653 69 | 515 70 | 791 71 | 866 72 | 73 | 74 | 75 | 4_illustration 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 389 81 | 715 82 | 566 83 | 859 84 | 85 | 86 | 87 | 4_illustration 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 904 93 | 696 94 | 1008 95 | 778 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 1355 105 | 219 106 | 1396 107 | 318 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 730 117 | 178 118 | 764 119 | 455 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 639 129 | 195 130 | 681 131 | 352 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 283 141 | 494 142 | 324 143 | 611 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1032 153 | 604 154 | 1057 155 | 889 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 853 165 | 212 166 | 883 167 | 538 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 1353 177 | 551 178 | 1405 179 | 682 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 579 189 | 678 190 | 613 191 | 897 192 | 193 | 194 | 195 | 3_typography 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 1378 201 | 695 202 | 1403 203 | 870 204 | 205 | 206 | 207 | 3_typography 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 606 213 | 490 214 | 648 215 | 600 216 | 217 | 218 | 219 | 3_typography 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 1369 225 | 332 226 | 1400 227 | 414 228 | 229 | 230 | 231 | 3_typography 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 511 237 | 458 238 | 535 239 | 537 240 | 241 | 242 | 243 | 3_typography 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 262 249 | 202 250 | 287 251 | 295 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 419 261 | 491 262 | 493 263 | 618 264 | 265 | 266 | 267 | 3_typography 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 1348 273 | 696 274 | 1371 275 | 873 276 | 277 | 278 | 279 | 4_illustration 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 221 285 | 315 286 | 334 287 | 425 288 | 289 | 290 | 291 | 4_illustration 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 898 297 | 339 298 | 998 299 | 437 300 | 301 | 302 | 303 | 4_illustration 304 | Unspecified 305 | 0 306 | 0 307 | 308 | 1116 309 | 188 310 | 1289 311 | 337 312 | 313 | 314 | 315 | 3_typography 316 | Unspecified 317 | 0 318 | 0 319 | 320 | 430 321 | 136 322 | 530 323 | 159 324 | 325 | 326 | 327 | 3_typography 328 | Unspecified 329 | 0 330 | 0 331 | 332 | 1091 333 | 139 334 | 1196 335 | 161 336 | 337 | 338 | 339 | 5_stamp 340 | Unspecified 341 | 0 342 | 0 343 | 344 | 852 345 | 171 346 | 889 347 | 205 348 | 349 | 350 | 351 | 5_stamp 352 | Unspecified 353 | 0 354 | 0 355 | 356 | 1360 357 | 512 358 | 1403 359 | 547 360 | 361 | 362 | 363 | 5_stamp 364 | Unspecified 365 | 0 366 | 0 367 | 368 | 1358 369 | 180 370 | 1401 371 | 215 372 | 373 | 374 | 375 | 5_stamp 376 | Unspecified 377 | 0 378 | 0 379 | 380 | 1020 381 | 569 382 | 1063 383 | 604 384 | 385 | 386 | 387 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_22.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_22.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_22.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 87 21 | 53 22 | 1533 23 | 1001 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 860 33 | 191 34 | 1094 35 | 521 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 208 45 | 175 46 | 564 47 | 514 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 846 57 | 178 58 | 869 59 | 330 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1108 69 | 658 70 | 1139 71 | 900 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 736 81 | 191 82 | 769 83 | 344 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 397 93 | 727 94 | 430 95 | 855 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 181 105 | 171 106 | 203 107 | 372 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 651 117 | 285 118 | 676 119 | 380 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 873 129 | 174 130 | 896 131 | 326 132 | 133 | 134 | 135 | 4_illustration 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 1102 141 | 223 142 | 1441 143 | 745 144 | 145 | 146 | 147 | 4_illustration 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 343 153 | 477 154 | 471 155 | 641 156 | 157 | 158 | 159 | 4_illustration 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 207 165 | 643 166 | 384 167 | 755 168 | 169 | 170 | 171 | 4_illustration 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 525 177 | 375 178 | 662 179 | 886 180 | 181 | 182 | 183 | 4_illustration 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 659 189 | 643 190 | 751 191 | 686 192 | 193 | 194 | 195 | 4_illustration 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 685 201 | 498 202 | 744 203 | 571 204 | 205 | 206 | 207 | 4_illustration 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 226 213 | 793 214 | 368 215 | 907 216 | 217 | 218 | 219 | 4_illustration 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 825 225 | 620 226 | 1123 227 | 855 228 | 229 | 230 | 231 | 4_illustration 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 1150 237 | 773 238 | 1398 239 | 900 240 | 241 | 242 | 243 | 3_typography 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 405 249 | 420 250 | 437 251 | 454 252 | 253 | 254 | 255 | 3_typography 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 271 261 | 586 262 | 300 263 | 618 264 | 265 | 266 | 267 | 3_typography 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 753 273 | 457 274 | 775 275 | 511 276 | 277 | 278 | 279 | 3_typography 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 609 285 | 343 286 | 643 287 | 366 288 | 289 | 290 | 291 | 3_typography 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 705 297 | 455 298 | 734 299 | 487 300 | 301 | 302 | 303 | 3_typography 304 | Unspecified 305 | 0 306 | 0 307 | 308 | 694 309 | 609 310 | 719 311 | 634 312 | 313 | 314 | 315 | 3_typography 316 | Unspecified 317 | 0 318 | 0 319 | 320 | 1412 321 | 346 322 | 1443 323 | 489 324 | 325 | 326 | 327 | 5_stamp 328 | Unspecified 329 | 0 330 | 0 331 | 332 | 1109 333 | 184 334 | 1150 335 | 216 336 | 337 | 338 | 339 | 3_typography 340 | Unspecified 341 | 0 342 | 0 343 | 344 | 1355 345 | 695 346 | 1378 347 | 777 348 | 349 | 350 | 351 | 3_typography 352 | Unspecified 353 | 0 354 | 0 355 | 356 | 1385 357 | 700 358 | 1403 359 | 812 360 | 361 | 362 | 363 | 3_typography 364 | Unspecified 365 | 0 366 | 0 367 | 368 | 428 369 | 129 370 | 535 371 | 157 372 | 373 | 374 | 375 | 3_typography 376 | Unspecified 377 | 0 378 | 0 379 | 380 | 1089 381 | 134 382 | 1201 383 | 159 384 | 385 | 386 | 387 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_23.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_23.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_23.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 89 21 | 54 22 | 1534 23 | 1001 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 828 33 | 171 34 | 1441 35 | 918 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 580 45 | 318 46 | 793 47 | 868 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 736 57 | 169 58 | 773 59 | 286 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 427 69 | 125 70 | 535 71 | 150 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 1087 81 | 132 82 | 1198 83 | 159 84 | 85 | 86 | 87 | 4_illustration 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 273 93 | 220 94 | 478 95 | 421 96 | 97 | 98 | 99 | 4_illustration 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 227 105 | 573 106 | 369 107 | 854 108 | 109 | 110 | 111 | 4_illustration 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 387 117 | 500 118 | 462 119 | 596 120 | 121 | 122 | 123 | 4_illustration 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 433 129 | 703 130 | 564 131 | 866 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 1012 141 | 221 142 | 1054 143 | 336 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 748 153 | 343 154 | 775 155 | 366 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 280 165 | 563 166 | 305 167 | 591 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 468 177 | 502 178 | 491 179 | 525 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 569 189 | 702 190 | 594 191 | 721 192 | 193 | 194 | 195 | 5_stamp 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 1009 201 | 183 202 | 1054 203 | 218 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_24.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_24.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_24.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 89 21 | 51 22 | 1535 23 | 1000 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 216 33 | 162 34 | 725 35 | 957 36 | 37 | 38 | 39 | 3_typography 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 576 45 | 186 46 | 703 47 | 232 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 318 57 | 187 58 | 414 59 | 227 60 | 61 | 62 | 63 | 5_stamp 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 739 69 | 168 70 | 789 71 | 212 72 | 73 | 74 | 75 | 4_illustration 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 832 81 | 325 82 | 1018 83 | 870 84 | 85 | 86 | 87 | 4_illustration 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 1098 93 | 648 94 | 1293 95 | 870 96 | 97 | 98 | 99 | 4_illustration 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 1191 105 | 345 106 | 1287 107 | 539 108 | 109 | 110 | 111 | 4_illustration 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1014 117 | 239 118 | 1126 119 | 336 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 432 129 | 125 130 | 535 131 | 152 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 1091 141 | 130 142 | 1196 143 | 154 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 946 153 | 327 154 | 978 155 | 346 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 1103 165 | 236 166 | 1123 167 | 255 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 1287 177 | 334 178 | 1309 179 | 350 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1294 189 | 643 190 | 1321 191 | 671 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_25.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_25.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_25.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 90 21 | 52 22 | 1535 23 | 999 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 862 33 | 166 34 | 1369 35 | 932 36 | 37 | 38 | 39 | 3_typography 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 269 45 | 222 46 | 310 47 | 916 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 326 57 | 222 58 | 379 59 | 911 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 396 69 | 224 70 | 452 71 | 916 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 527 81 | 223 82 | 583 83 | 916 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 719 93 | 226 94 | 768 95 | 914 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 463 105 | 224 106 | 515 107 | 914 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 593 117 | 223 118 | 645 119 | 911 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 662 129 | 224 130 | 707 131 | 915 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 999 141 | 219 142 | 1086 143 | 259 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1247 153 | 220 154 | 1380 155 | 266 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 1087 165 | 138 166 | 1194 167 | 168 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 1378 177 | 180 178 | 1425 179 | 218 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 202 189 | 229 190 | 244 191 | 902 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_26.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_26.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_26.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 91 21 | 56 22 | 1533 23 | 1001 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_27.xml: -------------------------------------------------------------------------------- 1 | 2 | アノテーション作業用 3 | 2537534_27.jpg 4 | C:\Users\t-aoike\Documents\アノテーション作業用\2537534_27.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 814 21 | 62 22 | 1523 23 | 995 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_3.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_3.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_3.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 69 21 | 61 22 | 1511 23 | 1006 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 964 33 | 304 34 | 1268 35 | 824 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 359 45 | 336 46 | 601 47 | 870 48 | 49 | 50 | 51 | 2_handwritten 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1350 57 | 195 58 | 1403 59 | 439 60 | 61 | 62 | 63 | 2_handwritten 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1244 69 | 268 70 | 1280 71 | 342 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 587 81 | 264 82 | 624 83 | 333 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 414 93 | 136 94 | 520 95 | 156 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 1073 105 | 141 106 | 1178 107 | 157 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_5.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_5.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_5.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 68 21 | 63 22 | 1509 23 | 1007 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 246 33 | 170 34 | 625 35 | 928 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 1097 45 | 561 46 | 1359 47 | 779 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 836 57 | 733 58 | 1075 59 | 848 60 | 61 | 62 | 63 | 4_illustration 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 905 69 | 405 70 | 1005 71 | 482 72 | 73 | 74 | 75 | 4_illustration 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 899 81 | 512 82 | 994 83 | 578 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 998 93 | 186 94 | 1022 95 | 230 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 571 105 | 177 106 | 602 107 | 290 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1064 117 | 530 118 | 1096 119 | 631 120 | 121 | 122 | 123 | 4_illustration 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 919 129 | 303 130 | 992 131 | 351 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 1373 141 | 194 142 | 1409 143 | 384 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1338 153 | 517 154 | 1370 155 | 598 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 831 165 | 670 166 | 863 167 | 767 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 171 177 | 224 178 | 210 179 | 365 180 | 181 | 182 | 183 | 3_typography 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 719 189 | 534 190 | 744 191 | 593 192 | 193 | 194 | 195 | 4_illustration 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 557 201 | 570 202 | 702 203 | 742 204 | 205 | 206 | 207 | 4_illustration 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 1089 213 | 224 214 | 1366 215 | 468 216 | 217 | 218 | 219 | 3_typography 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 1311 225 | 174 226 | 1336 227 | 233 228 | 229 | 230 | 231 | 4_illustration 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 1004 237 | 592 238 | 1057 239 | 654 240 | 241 | 242 | 243 | 3_typography 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 930 249 | 491 250 | 961 251 | 545 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 919 261 | 224 262 | 993 263 | 266 264 | 265 | 266 | 267 | 3_typography 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 402 273 | 145 274 | 509 275 | 167 276 | 277 | 278 | 279 | 3_typography 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 1061 285 | 145 286 | 1172 287 | 165 288 | 289 | 290 | 291 | 5_stamp 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 166 297 | 186 298 | 208 299 | 220 300 | 301 | 302 | 303 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_6.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_6.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_6.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 67 21 | 61 22 | 1511 23 | 1006 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 836 33 | 164 34 | 1385 35 | 975 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 166 45 | 217 46 | 357 47 | 865 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1347 57 | 226 58 | 1397 59 | 349 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1209 69 | 185 70 | 1258 71 | 294 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 334 81 | 199 82 | 372 83 | 298 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 698 93 | 196 94 | 733 95 | 270 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 692 105 | 492 106 | 719 107 | 598 108 | 109 | 110 | 111 | 4_illustration 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 449 117 | 194 118 | 697 119 | 520 120 | 121 | 122 | 123 | 4_illustration 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 444 129 | 559 130 | 703 131 | 865 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 409 141 | 140 142 | 520 143 | 160 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1066 153 | 147 154 | 1172 155 | 168 156 | 157 | 158 | 159 | 5_stamp 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 1347 165 | 179 166 | 1391 167 | 218 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_7.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_7.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_7.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 79 21 | 56 22 | 1522 23 | 999 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 158 33 | 274 34 | 825 35 | 849 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 838 45 | 316 46 | 1015 47 | 594 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 1382 57 | 191 58 | 1428 59 | 319 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 1379 69 | 465 70 | 1429 71 | 598 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 838 81 | 224 82 | 870 83 | 317 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 465 93 | 198 94 | 497 95 | 301 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 966 105 | 252 106 | 994 107 | 349 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1098 117 | 518 118 | 1130 119 | 576 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 410 129 | 144 130 | 517 131 | 166 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 900 141 | 237 142 | 936 143 | 267 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1133 153 | 162 154 | 1169 155 | 192 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 943 165 | 658 166 | 975 167 | 716 168 | 169 | 170 | 171 | 4_illustration 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 981 177 | 191 178 | 1160 179 | 457 180 | 181 | 182 | 183 | 4_illustration 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 1034 189 | 578 190 | 1097 191 | 645 192 | 193 | 194 | 195 | 4_illustration 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 952 201 | 726 202 | 1122 203 | 878 204 | 205 | 206 | 207 | 4_illustration 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 1166 213 | 789 214 | 1388 215 | 929 216 | 217 | 218 | 219 | 4_illustration 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 1177 225 | 189 226 | 1366 227 | 265 228 | 229 | 230 | 231 | 4_illustration 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 1167 237 | 288 238 | 1373 239 | 363 240 | 241 | 242 | 243 | 4_illustration 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 1194 249 | 386 250 | 1344 251 | 483 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 1172 261 | 493 262 | 1351 263 | 605 264 | 265 | 266 | 267 | 4_illustration 268 | Unspecified 269 | 0 270 | 0 271 | 272 | 1150 273 | 613 274 | 1341 275 | 777 276 | 277 | 278 | 279 | 5_stamp 280 | Unspecified 281 | 0 282 | 0 283 | 284 | 1381 285 | 416 286 | 1435 287 | 454 288 | 289 | 290 | 291 | 3_typography 292 | Unspecified 293 | 0 294 | 0 295 | 296 | 1087 297 | 138 298 | 1193 299 | 157 300 | 301 | 302 | 303 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_8.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_8.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_8.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 81 21 | 58 22 | 1526 23 | 1005 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 962 33 | 187 34 | 1271 35 | 613 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 292 45 | 287 46 | 666 47 | 650 48 | 49 | 50 | 51 | 3_typography 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 726 57 | 191 58 | 762 59 | 403 60 | 61 | 62 | 63 | 3_typography 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 668 69 | 605 70 | 700 71 | 754 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 601 81 | 237 82 | 636 83 | 349 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 1204 93 | 563 94 | 1234 95 | 648 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 1229 105 | 203 106 | 1274 107 | 249 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 1357 117 | 502 118 | 1398 119 | 539 120 | 121 | 122 | 123 | 4_illustration 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 292 129 | 679 130 | 639 131 | 752 132 | 133 | 134 | 135 | 4_illustration 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 870 141 | 639 142 | 1347 143 | 802 144 | 145 | 146 | 147 | 4_illustration 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1307 153 | 545 154 | 1359 155 | 583 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 426 165 | 141 166 | 529 167 | 162 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 1076 177 | 140 178 | 1186 179 | 162 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /preprocess/annotxml/2537534_9.xml: -------------------------------------------------------------------------------- 1 | 2 | raw_all 3 | 2537534_9.jpg 4 | D:\digicorebynagasaki\dataset_kotenseki\raw_all\2537534_9.jpg 5 | 6 | Unknown 7 | 8 | 9 | 1600 10 | 1200 11 | 3 12 | 13 | 0 14 | 15 | 1_overall 16 | Unspecified 17 | 0 18 | 0 19 | 20 | 77 21 | 59 22 | 1524 23 | 1005 24 | 25 | 26 | 27 | 4_illustration 28 | Unspecified 29 | 0 30 | 0 31 | 32 | 1001 33 | 253 34 | 1278 35 | 503 36 | 37 | 38 | 39 | 4_illustration 40 | Unspecified 41 | 0 42 | 0 43 | 44 | 219 45 | 196 46 | 397 47 | 375 48 | 49 | 50 | 51 | 4_illustration 52 | Unspecified 53 | 0 54 | 0 55 | 56 | 182 57 | 618 58 | 377 59 | 808 60 | 61 | 62 | 63 | 4_illustration 64 | Unspecified 65 | 0 66 | 0 67 | 68 | 667 69 | 436 70 | 783 71 | 914 72 | 73 | 74 | 75 | 3_typography 76 | Unspecified 77 | 0 78 | 0 79 | 80 | 628 81 | 172 82 | 660 83 | 305 84 | 85 | 86 | 87 | 3_typography 88 | Unspecified 89 | 0 90 | 0 91 | 92 | 270 93 | 510 94 | 302 95 | 583 96 | 97 | 98 | 99 | 3_typography 100 | Unspecified 101 | 0 102 | 0 103 | 104 | 524 105 | 521 106 | 554 107 | 594 108 | 109 | 110 | 111 | 3_typography 112 | Unspecified 113 | 0 114 | 0 115 | 116 | 706 117 | 180 118 | 750 119 | 384 120 | 121 | 122 | 123 | 3_typography 124 | Unspecified 125 | 0 126 | 0 127 | 128 | 178 129 | 189 130 | 219 131 | 381 132 | 133 | 134 | 135 | 3_typography 136 | Unspecified 137 | 0 138 | 0 139 | 140 | 650 141 | 526 142 | 682 143 | 703 144 | 145 | 146 | 147 | 3_typography 148 | Unspecified 149 | 0 150 | 0 151 | 152 | 1384 153 | 212 154 | 1425 155 | 329 156 | 157 | 158 | 159 | 3_typography 160 | Unspecified 161 | 0 162 | 0 163 | 164 | 1285 165 | 197 166 | 1318 167 | 324 168 | 169 | 170 | 171 | 3_typography 172 | Unspecified 173 | 0 174 | 0 175 | 176 | 1287 177 | 514 178 | 1317 179 | 599 180 | 181 | 182 | 183 | 4_illustration 184 | Unspecified 185 | 0 186 | 0 187 | 188 | 410 189 | 604 190 | 625 191 | 805 192 | 193 | 194 | 195 | 3_typography 196 | Unspecified 197 | 0 198 | 0 199 | 200 | 407 201 | 300 202 | 438 203 | 409 204 | 205 | 206 | 207 | 4_illustration 208 | Unspecified 209 | 0 210 | 0 211 | 212 | 438 213 | 218 214 | 626 215 | 389 216 | 217 | 218 | 219 | 4_illustration 220 | Unspecified 221 | 0 222 | 0 223 | 224 | 294 225 | 408 226 | 525 227 | 540 228 | 229 | 230 | 231 | 3_typography 232 | Unspecified 233 | 0 234 | 0 235 | 236 | 422 237 | 136 238 | 529 239 | 156 240 | 241 | 242 | 243 | 3_typography 244 | Unspecified 245 | 0 246 | 0 247 | 248 | 1082 249 | 139 250 | 1187 251 | 158 252 | 253 | 254 | 255 | 4_illustration 256 | Unspecified 257 | 0 258 | 0 259 | 260 | 980 261 | 552 262 | 1229 263 | 831 264 | 265 | 266 | 267 | -------------------------------------------------------------------------------- /preprocess/img/2537534_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_1.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_10.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_11.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_12.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_13.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_14.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_15.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_16.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_17.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_18.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_19.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_2.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_20.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_21.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_22.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_23.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_24.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_24.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_25.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_26.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_27.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_3.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_4.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_5.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_6.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_7.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_8.jpg -------------------------------------------------------------------------------- /preprocess/img/2537534_9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/preprocess/img/2537534_9.jpg -------------------------------------------------------------------------------- /preprocess/makeannotimage.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import glob 4 | import pandas as pd 5 | import cv2 6 | import random 7 | from xml.etree import ElementTree as ET 8 | 9 | random.seed(777) 10 | class XML_preprocessor(object): 11 | 12 | def __init__(self, data_path): 13 | self.path_prefix = data_path 14 | self.num_classes = 3 15 | self.data = dict() 16 | self._preprocess_XML() 17 | 18 | def _preprocess_XML(self): 19 | filenames = glob.glob(self.path_prefix+"/*") 20 | random.shuffle(filenames) 21 | ft = open('train.txt', 'w') 22 | fv = open('val.txt', 'w') 23 | for filename in filenames: 24 | imgfilename=os.path.basename(filename[:-4])+".jpg" 25 | tree = ET.parse(filename) 26 | root = tree.getroot() 27 | flag = False 28 | size_tree = root.find('size') 29 | width = int(size_tree.find('width').text) 30 | height = int(size_tree.find('height').text) 31 | print(width,height) 32 | annotimg=np.zeros((height, width, 1), np.uint8) 33 | #o_xmin,o_xmax,o_ymin,o_ymax 34 | for object_tree in root.findall('object'): 35 | flag=True 36 | class_name = object_tree.find('name').text 37 | xmin = int(object_tree.find("bndbox").find("xmin").text) 38 | ymin = int(object_tree.find("bndbox").find("ymin").text) 39 | xmax = int(object_tree.find("bndbox").find("xmax").text) 40 | ymax = int(object_tree.find("bndbox").find("ymax").text) 41 | if class_name=="4_illustration": 42 | for h in range(ymin, ymax): 43 | for w in range(xmin, xmax): 44 | annotimg[h, w] = max(1,annotimg[h, w]) 45 | elif class_name!="1_overall": 46 | for h in range(ymin, ymax): 47 | for w in range(xmin, xmax): 48 | annotimg[h, w] = 2 49 | else: 50 | o_xmin, o_xmax, o_ymin, o_ymax =xmin,xmax,ymin,ymax 51 | if random.random()<0.1: 52 | fv.write(imgfilename + "\n") 53 | else: 54 | ft.write(imgfilename + "\n") 55 | cv2.imwrite(os.path.join("annotimg",imgfilename), annotimg) 56 | ft.close() 57 | fv.close() 58 | 59 | ## example on how to use it 60 | import pickle 61 | XML_preprocessor('annotxml') 62 | -------------------------------------------------------------------------------- /preprocess/train.txt: -------------------------------------------------------------------------------- 1 | 2537534_9.jpg 2 | 2537534_12.jpg 3 | 2537534_4.jpg 4 | 2537534_11.jpg 5 | 2537534_13.jpg 6 | 2537534_23.jpg 7 | 2537534_15.jpg 8 | 2537534_3.jpg 9 | 2537534_14.jpg 10 | 2537534_6.jpg 11 | 2537534_21.jpg 12 | 2537534_5.jpg 13 | 2537534_27.jpg 14 | 2537534_18.jpg 15 | 2537534_20.jpg 16 | 2537534_7.jpg 17 | 2537534_10.jpg 18 | 2537534_25.jpg 19 | 2537534_17.jpg 20 | 2537534_26.jpg 21 | 2537534_2.jpg 22 | 2537534_8.jpg 23 | 2537534_22.jpg 24 | 2537534_16.jpg 25 | -------------------------------------------------------------------------------- /preprocess/val.txt: -------------------------------------------------------------------------------- 1 | 2537534_1.jpg 2 | 2537534_24.jpg 3 | 2537534_19.jpg 4 | -------------------------------------------------------------------------------- /train_3class_50.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Train a DeepLab v3 plus model using tf.estimator API.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import argparse 9 | import os 10 | import sys 11 | 12 | import tensorflow as tf 13 | import deeplab_model 14 | from utils import preprocessing 15 | from tensorflow.python import debug as tf_debug 16 | 17 | import shutil 18 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 19 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = "1" 20 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 21 | 22 | parser = argparse.ArgumentParser() 23 | 24 | parser.add_argument('--model_dir', type=str, default='./model50', 25 | help='Base directory for the model.') 26 | 27 | parser.add_argument('--clean_model_dir', action='store_true', 28 | help='Whether to clean up the model directory if present.') 29 | 30 | parser.add_argument('--train_epochs', type=int, default=100, 31 | help='Number of training epochs: ' 32 | 'For 30K iteration with batch size 6, train_epoch = 17.01 (= 30K * 6 / 10,582). ' 33 | 'For 30K iteration with batch size 8, train_epoch = 22.68 (= 30K * 8 / 10,582). ' 34 | 'For 30K iteration with batch size 10, train_epoch = 25.52 (= 30K * 10 / 10,582). ' 35 | 'For 30K iteration with batch size 11, train_epoch = 31.19 (= 30K * 11 / 10,582). ' 36 | 'For 30K iteration with batch size 15, train_epoch = 42.53 (= 30K * 15 / 10,582). ' 37 | 'For 30K iteration with batch size 16, train_epoch = 45.36 (= 30K * 16 / 10,582).') 38 | 39 | parser.add_argument('--epochs_per_eval', type=int, default=1, 40 | help='The number of training epochs to run between evaluations.') 41 | 42 | parser.add_argument('--tensorboard_images_max_outputs', type=int, default=6, 43 | help='Max number of batch elements to generate for Tensorboard.') 44 | 45 | parser.add_argument('--batch_size', type=int, default=5, 46 | help='Number of examples per batch.') 47 | 48 | parser.add_argument('--learning_rate_policy', type=str, default='poly', 49 | choices=['poly', 'piecewise'], 50 | help='Learning rate policy to optimize loss.') 51 | 52 | parser.add_argument('--max_iter', type=int, default=30000, 53 | help='Number of maximum iteration used for "poly" learning rate policy.') 54 | 55 | parser.add_argument('--data_dir', type=str, default='./preprocess/', 56 | help='Path to the directory containing the PASCAL VOC data tf record.') 57 | 58 | parser.add_argument('--base_architecture', type=str, default='resnet_v2_50', 59 | choices=['resnet_v2_50', 'resnet_v2_101'], 60 | help='The architecture of base Resnet building block.') 61 | 62 | parser.add_argument('--pre_trained_model', type=str, default='./ini_checkpoints/resnet_v2_50/resnet_v2_50.ckpt', 63 | help='Path to the pre-trained model checkpoint.') 64 | 65 | parser.add_argument('--output_stride', type=int, default=16, 66 | choices=[8, 16], 67 | help='Output stride for DeepLab v3. Currently 8 or 16 is supported.') 68 | 69 | parser.add_argument('--freeze_batch_norm', action='store_true', 70 | help='Freeze batch normalization parameters during the training.') 71 | 72 | parser.add_argument('--initial_learning_rate', type=float, default=7e-3, 73 | help='Initial learning rate for the optimizer.') 74 | 75 | parser.add_argument('--end_learning_rate', type=float, default=1e-6, 76 | help='Initial learning rate for the optimizer.') 77 | 78 | parser.add_argument('--initial_global_step', type=int, default=0, 79 | help='Initial global step for controlling learning rate when fine-tuning model.') 80 | 81 | parser.add_argument('--weight_decay', type=float, default=2e-4, 82 | help='The weight decay to use for regularizing the model.') 83 | 84 | parser.add_argument('--debug', action='store_true', 85 | help='Whether to use debugger to track down bad values during training.') 86 | 87 | _NUM_CLASSES = 3 88 | _HEIGHT = 300 89 | _WIDTH =300 90 | _DEPTH = 3 91 | _MIN_SCALE = 0.9 92 | _MAX_SCALE = 1.3 93 | _IGNORE_LABEL = 255 94 | 95 | _POWER = 0.9 96 | _MOMENTUM = 0.9 97 | 98 | _BATCH_NORM_DECAY = 0.9997 99 | 100 | _NUM_IMAGES = { 101 | 'train': 1600, 102 | 'validation': 200, 103 | } 104 | 105 | 106 | def get_filenames(is_training, data_dir): 107 | """Return a list of filenames. 108 | 109 | Args: 110 | is_training: A boolean denoting whether the input is for training. 111 | data_dir: path to the the directory containing the input data. 112 | 113 | Returns: 114 | A list of file names. 115 | """ 116 | if is_training: 117 | return [os.path.join(data_dir, 'voc_train.record')] 118 | else: 119 | return [os.path.join(data_dir, 'voc_val.record')] 120 | 121 | 122 | def parse_record(raw_record): 123 | """Parse PASCAL image and label from a tf record.""" 124 | keys_to_features = { 125 | 'image/height': 126 | tf.FixedLenFeature((), tf.int64), 127 | 'image/width': 128 | tf.FixedLenFeature((), tf.int64), 129 | 'image/encoded': 130 | tf.FixedLenFeature((), tf.string, default_value=''), 131 | 'image/format': 132 | tf.FixedLenFeature((), tf.string, default_value='jpeg'), 133 | 'label/encoded': 134 | tf.FixedLenFeature((), tf.string, default_value=''), 135 | 'label/format': 136 | tf.FixedLenFeature((), tf.string, default_value='png'), 137 | } 138 | 139 | parsed = tf.parse_single_example(raw_record, keys_to_features) 140 | 141 | # height = tf.cast(parsed['image/height'], tf.int32) 142 | # width = tf.cast(parsed['image/width'], tf.int32) 143 | 144 | image = tf.image.decode_image( 145 | tf.reshape(parsed['image/encoded'], shape=[]), _DEPTH) 146 | image = tf.to_float(tf.image.convert_image_dtype(image, dtype=tf.uint8)) 147 | image.set_shape([None, None, 3]) 148 | 149 | label = tf.image.decode_image( 150 | tf.reshape(parsed['label/encoded'], shape=[]), 1) 151 | label = tf.to_int32(tf.image.convert_image_dtype(label, dtype=tf.uint8)) 152 | label.set_shape([None, None, 1]) 153 | 154 | return image, label 155 | 156 | 157 | def preprocess_image(image, label, is_training): 158 | """Preprocess a single image of layout [height, width, depth].""" 159 | if is_training: 160 | # Randomly scale the image and label. 161 | image, label = preprocessing.random_rescale_image_and_label( 162 | image, label, _MIN_SCALE, _MAX_SCALE) 163 | 164 | # Randomly crop or pad a [_HEIGHT, _WIDTH] section of the image and label. 165 | image, label = preprocessing.random_crop_or_pad_image_and_label( 166 | image, label, _HEIGHT, _WIDTH, _IGNORE_LABEL) 167 | 168 | # Randomly flip the image and label horizontally. 169 | image, label = preprocessing.random_flip_left_right_image_and_label( 170 | image, label) 171 | 172 | image.set_shape([_HEIGHT, _WIDTH, 3]) 173 | label.set_shape([_HEIGHT, _WIDTH, 1]) 174 | 175 | image = preprocessing.mean_image_subtraction(image) 176 | 177 | return image, label 178 | 179 | 180 | def input_fn(is_training, data_dir, batch_size, num_epochs=1): 181 | """Input_fn using the tf.data input pipeline for CIFAR-10 dataset. 182 | 183 | Args: 184 | is_training: A boolean denoting whether the input is for training. 185 | data_dir: The directory containing the input data. 186 | batch_size: The number of samples per batch. 187 | num_epochs: The number of epochs to repeat the dataset. 188 | 189 | Returns: 190 | A tuple of images and labels. 191 | """ 192 | dataset = tf.data.Dataset.from_tensor_slices(get_filenames(is_training, data_dir)) 193 | dataset = dataset.flat_map(tf.data.TFRecordDataset) 194 | 195 | if is_training: 196 | # When choosing shuffle buffer sizes, larger sizes result in better 197 | # randomness, while smaller sizes have better performance. 198 | # is a relatively small dataset, we choose to shuffle the full epoch. 199 | dataset = dataset.shuffle(buffer_size=_NUM_IMAGES['train']) 200 | 201 | dataset = dataset.map(parse_record) 202 | dataset = dataset.map( 203 | lambda image, label: preprocess_image(image, label, is_training)) 204 | dataset = dataset.prefetch(batch_size) 205 | 206 | # We call repeat after shuffling, rather than before, to prevent separate 207 | # epochs from blending together. 208 | dataset = dataset.repeat(num_epochs) 209 | dataset = dataset.batch(batch_size) 210 | 211 | iterator = dataset.make_one_shot_iterator() 212 | images, labels = iterator.get_next() 213 | 214 | return images, labels 215 | 216 | 217 | def main(unused_argv): 218 | # Using the Winograd non-fused algorithms provides a small performance boost. 219 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 220 | 221 | if FLAGS.clean_model_dir: 222 | shutil.rmtree(FLAGS.model_dir, ignore_errors=True) 223 | 224 | # Set up a RunConfig to only save checkpoints once per training cycle. 225 | run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9) 226 | model = tf.estimator.Estimator( 227 | model_fn=deeplab_model.deeplabv3_plus_model_fn, 228 | model_dir=FLAGS.model_dir, 229 | config=run_config, 230 | params={ 231 | 'output_stride': FLAGS.output_stride, 232 | 'batch_size': FLAGS.batch_size, 233 | 'base_architecture': FLAGS.base_architecture, 234 | 'pre_trained_model': FLAGS.pre_trained_model, 235 | 'batch_norm_decay': _BATCH_NORM_DECAY, 236 | 'num_classes': _NUM_CLASSES, 237 | 'tensorboard_images_max_outputs': FLAGS.tensorboard_images_max_outputs, 238 | 'weight_decay': FLAGS.weight_decay, 239 | 'learning_rate_policy': FLAGS.learning_rate_policy, 240 | 'num_train': _NUM_IMAGES['train'], 241 | 'initial_learning_rate': FLAGS.initial_learning_rate, 242 | 'max_iter': FLAGS.max_iter, 243 | 'end_learning_rate': FLAGS.end_learning_rate, 244 | 'power': _POWER, 245 | 'momentum': _MOMENTUM, 246 | 'freeze_batch_norm': FLAGS.freeze_batch_norm, 247 | 'initial_global_step': FLAGS.initial_global_step 248 | }) 249 | 250 | for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval): 251 | tensors_to_log = { 252 | 'learning_rate': 'learning_rate', 253 | 'cross_entropy': 'cross_entropy', 254 | 'train_px_accuracy': 'train_px_accuracy', 255 | 'train_mean_iou': 'train_mean_iou', 256 | } 257 | 258 | logging_hook = tf.train.LoggingTensorHook( 259 | tensors=tensors_to_log, every_n_iter=10) 260 | train_hooks = [logging_hook] 261 | eval_hooks = None 262 | 263 | if FLAGS.debug: 264 | debug_hook = tf_debug.LocalCLIDebugHook() 265 | train_hooks.append(debug_hook) 266 | eval_hooks = [debug_hook] 267 | 268 | tf.logging.info("Start training.") 269 | model.train( 270 | input_fn=lambda: input_fn(True, FLAGS.data_dir, FLAGS.batch_size, FLAGS.epochs_per_eval), 271 | hooks=train_hooks, 272 | # steps=1 # For debug 273 | ) 274 | 275 | tf.logging.info("Start evaluation.") 276 | # Evaluate the model and print results 277 | eval_results = model.evaluate( 278 | # Batch size must be 1 for testing because the images' size differs 279 | input_fn=lambda: input_fn(False, FLAGS.data_dir, 1), 280 | hooks=eval_hooks, 281 | # steps=1 # For debug 282 | ) 283 | print(eval_results) 284 | 285 | 286 | if __name__ == '__main__': 287 | tf.logging.set_verbosity(tf.logging.INFO) 288 | FLAGS, unparsed = parser.parse_known_args() 289 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 290 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndl-lab/tensorflow-deeplab-v3-plus/fa8f01781941fdf04d71d92bdf2d712438adc7c0/utils/__init__.py -------------------------------------------------------------------------------- /utils/dataset_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility functions for creating TFRecord data sets. 17 | source: https://github.com/tensorflow/models/blob/master/research/object_detection/utils/dataset_util.py 18 | """ 19 | 20 | import tensorflow as tf 21 | 22 | 23 | def int64_feature(value): 24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 25 | 26 | 27 | def int64_list_feature(value): 28 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 29 | 30 | 31 | def bytes_feature(value): 32 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 33 | 34 | 35 | def bytes_list_feature(value): 36 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 37 | 38 | 39 | def float_list_feature(value): 40 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 41 | 42 | 43 | def read_examples_list(path): 44 | """Read list of training or validation examples. 45 | 46 | The file is assumed to contain a single example per line where the first 47 | token in the line is an identifier that allows us to find the image and 48 | annotation xml for that example. 49 | 50 | For example, the line: 51 | xyz 3 52 | would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). 53 | 54 | Args: 55 | path: absolute path to examples list file. 56 | 57 | Returns: 58 | list of example identifiers (strings). 59 | """ 60 | with tf.gfile.GFile(path) as fid: 61 | lines = fid.readlines() 62 | return [line.strip().split(' ')[0] for line in lines] 63 | 64 | 65 | def recursive_parse_xml_to_dict(xml): 66 | """Recursively parses XML contents to python dict. 67 | 68 | We assume that `object` tags are the only ones that can appear 69 | multiple times at the same level of a tree. 70 | 71 | Args: 72 | xml: xml tree obtained by parsing XML file contents using lxml.etree 73 | 74 | Returns: 75 | Python dictionary holding XML contents. 76 | """ 77 | if not xml: 78 | return {xml.tag: xml.text} 79 | result = {} 80 | for child in xml: 81 | child_result = recursive_parse_xml_to_dict(child) 82 | if child.tag != 'object': 83 | result[child.tag] = child_result[child.tag] 84 | else: 85 | if child.tag not in result: 86 | result[child.tag] = [] 87 | result[child.tag].append(child_result[child.tag]) 88 | return {xml.tag: result} 89 | 90 | 91 | def make_initializable_iterator(dataset): 92 | """Creates an iterator, and initializes tables. 93 | 94 | This is useful in cases where make_one_shot_iterator wouldn't work because 95 | the graph contains a hash table that needs to be initialized. 96 | 97 | Args: 98 | dataset: A `tf.data.Dataset` object. 99 | 100 | Returns: 101 | A `tf.data.Iterator`. 102 | """ 103 | iterator = dataset.make_initializable_iterator() 104 | tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) 105 | return iterator 106 | 107 | 108 | def read_dataset( 109 | file_read_func, decode_func, input_files, config, num_workers=1, 110 | worker_index=0): 111 | """Reads a dataset, and handles repetition and shuffling. 112 | 113 | Args: 114 | file_read_func: Function to use in tf.data.Dataset.interleave, to read 115 | every individual file into a tf.data.Dataset. 116 | decode_func: Function to apply to all records. 117 | input_files: A list of file paths to read. 118 | config: A input_reader_builder.InputReader object. 119 | num_workers: Number of workers / shards. 120 | worker_index: Id for the current worker. 121 | 122 | Returns: 123 | A tf.data.Dataset based on config. 124 | """ 125 | # Shard, shuffle, and read files. 126 | filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files], 127 | 0) 128 | dataset = tf.data.Dataset.from_tensor_slices(filenames) 129 | dataset = dataset.shard(num_workers, worker_index) 130 | dataset = dataset.repeat(config.num_epochs or None) 131 | if config.shuffle: 132 | dataset = dataset.shuffle(config.filenames_shuffle_buffer_size, 133 | reshuffle_each_iteration=True) 134 | 135 | # Read file records and shuffle them. 136 | # If cycle_length is larger than the number of files, more than one reader 137 | # will be assigned to the same file, leading to repetition. 138 | cycle_length = tf.cast( 139 | tf.minimum(config.num_readers, tf.size(filenames)), tf.int64) 140 | # TODO: find the optimal block_length. 141 | dataset = dataset.interleave( 142 | file_read_func, cycle_length=cycle_length, block_length=1) 143 | 144 | if config.shuffle: 145 | dataset = dataset.shuffle(config.shuffle_buffer_size, 146 | reshuffle_each_iteration=True) 147 | 148 | dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers) 149 | return dataset.prefetch(config.prefetch_buffer_size) 150 | -------------------------------------------------------------------------------- /utils/preprocessing.py: -------------------------------------------------------------------------------- 1 | """Utility functions for preprocessing data sets.""" 2 | 3 | from PIL import Image 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | _R_MEAN = 123.68 8 | _G_MEAN = 116.78 9 | _B_MEAN = 103.94 10 | 11 | # colour map 12 | label_colours = [(0, 0, 0), # 0=background 13 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 14 | (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), 15 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 16 | (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), 17 | # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person 18 | (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128), 19 | # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor 20 | (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)] 21 | 22 | 23 | def decode_labels(mask, num_images=1, num_classes=21): 24 | """Decode batch of segmentation masks. 25 | 26 | Args: 27 | mask: result of inference after taking argmax. 28 | num_images: number of images to decode from the batch. 29 | num_classes: number of classes to predict (including background). 30 | 31 | Returns: 32 | A batch with num_images RGB images of the same size as the input. 33 | """ 34 | n, h, w, c = mask.shape 35 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' \ 36 | % (n, num_images) 37 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 38 | for i in range(num_images): 39 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) 40 | pixels = img.load() 41 | for j_, j in enumerate(mask[i, :, :, 0]): 42 | for k_, k in enumerate(j): 43 | if k < num_classes: 44 | pixels[k_, j_] = label_colours[k] 45 | outputs[i] = np.array(img) 46 | return outputs 47 | 48 | 49 | def mean_image_addition(image, means=(_R_MEAN, _G_MEAN, _B_MEAN)): 50 | """Adds the given means from each image channel. 51 | 52 | For example: 53 | means = [123.68, 116.779, 103.939] 54 | image = _mean_image_subtraction(image, means) 55 | 56 | Note that the rank of `image` must be known. 57 | 58 | Args: 59 | image: a tensor of size [height, width, C]. 60 | means: a C-vector of values to subtract from each channel. 61 | 62 | Returns: 63 | the centered image. 64 | 65 | Raises: 66 | ValueError: If the rank of `image` is unknown, if `image` has a rank other 67 | than three or if the number of channels in `image` doesn't match the 68 | number of values in `means`. 69 | """ 70 | if image.get_shape().ndims != 3: 71 | raise ValueError('Input must be of size [height, width, C>0]') 72 | num_channels = image.get_shape().as_list()[-1] 73 | if len(means) != num_channels: 74 | raise ValueError('len(means) must match the number of channels') 75 | 76 | channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) 77 | for i in range(num_channels): 78 | channels[i] += means[i] 79 | return tf.concat(axis=2, values=channels) 80 | 81 | 82 | def mean_image_subtraction(image, means=(_R_MEAN, _G_MEAN, _B_MEAN)): 83 | """Subtracts the given means from each image channel. 84 | 85 | For example: 86 | means = [123.68, 116.779, 103.939] 87 | image = _mean_image_subtraction(image, means) 88 | 89 | Note that the rank of `image` must be known. 90 | 91 | Args: 92 | image: a tensor of size [height, width, C]. 93 | means: a C-vector of values to subtract from each channel. 94 | 95 | Returns: 96 | the centered image. 97 | 98 | Raises: 99 | ValueError: If the rank of `image` is unknown, if `image` has a rank other 100 | than three or if the number of channels in `image` doesn't match the 101 | number of values in `means`. 102 | """ 103 | if image.get_shape().ndims != 3: 104 | raise ValueError('Input must be of size [height, width, C>0]') 105 | num_channels = image.get_shape().as_list()[-1] 106 | if len(means) != num_channels: 107 | raise ValueError('len(means) must match the number of channels') 108 | 109 | channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) 110 | for i in range(num_channels): 111 | channels[i] -= means[i] 112 | return tf.concat(axis=2, values=channels) 113 | 114 | 115 | def random_rescale_image_and_label(image, label, min_scale, max_scale): 116 | """Rescale an image and label with in target scale. 117 | 118 | Rescales an image and label within the range of target scale. 119 | 120 | Args: 121 | image: 3-D Tensor of shape `[height, width, channels]`. 122 | label: 3-D Tensor of shape `[height, width, 1]`. 123 | min_scale: Min target scale. 124 | max_scale: Max target scale. 125 | 126 | Returns: 127 | Cropped and/or padded image. 128 | If `images` was 3-D, a 3-D float Tensor of shape 129 | `[new_height, new_width, channels]`. 130 | If `labels` was 3-D, a 3-D float Tensor of shape 131 | `[new_height, new_width, 1]`. 132 | """ 133 | if min_scale <= 0: 134 | raise ValueError('\'min_scale\' must be greater than 0.') 135 | elif max_scale <= 0: 136 | raise ValueError('\'max_scale\' must be greater than 0.') 137 | elif min_scale >= max_scale: 138 | raise ValueError('\'max_scale\' must be greater than \'min_scale\'.') 139 | 140 | shape = tf.shape(image) 141 | height = tf.to_float(shape[0]) 142 | width = tf.to_float(shape[1]) 143 | scale = tf.random_uniform( 144 | [], minval=min_scale, maxval=max_scale, dtype=tf.float32) 145 | new_height = tf.to_int32(height * scale) 146 | new_width = tf.to_int32(width * scale) 147 | image = tf.image.resize_images(image, [new_height, new_width], 148 | method=tf.image.ResizeMethod.BILINEAR) 149 | # Since label classes are integers, nearest neighbor need to be used. 150 | label = tf.image.resize_images(label, [new_height, new_width], 151 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 152 | 153 | return image, label 154 | 155 | 156 | def random_crop_or_pad_image_and_label(image, label, crop_height, crop_width, ignore_label): 157 | """Crops and/or pads an image to a target width and height. 158 | 159 | Resizes an image to a target width and height by rondomly 160 | cropping the image or padding it evenly with zeros. 161 | 162 | Args: 163 | image: 3-D Tensor of shape `[height, width, channels]`. 164 | label: 3-D Tensor of shape `[height, width, 1]`. 165 | crop_height: The new height. 166 | crop_width: The new width. 167 | ignore_label: Label class to be ignored. 168 | 169 | Returns: 170 | Cropped and/or padded image. 171 | If `images` was 3-D, a 3-D float Tensor of shape 172 | `[new_height, new_width, channels]`. 173 | """ 174 | label = label - ignore_label # Subtract due to 0 padding. 175 | label = tf.to_float(label) 176 | image_height = tf.shape(image)[0] 177 | image_width = tf.shape(image)[1] 178 | image_and_label = tf.concat([image, label], axis=2) 179 | image_and_label_pad = tf.image.pad_to_bounding_box( 180 | image_and_label, 0, 0, 181 | tf.maximum(crop_height, image_height), 182 | tf.maximum(crop_width, image_width)) 183 | image_and_label_crop = tf.random_crop( 184 | image_and_label_pad, [crop_height, crop_width, 4]) 185 | 186 | image_crop = image_and_label_crop[:, :, :3] 187 | label_crop = image_and_label_crop[:, :, 3:] 188 | label_crop += ignore_label 189 | label_crop = tf.to_int32(label_crop) 190 | 191 | return image_crop, label_crop 192 | 193 | 194 | def random_flip_left_right_image_and_label(image, label): 195 | """Randomly flip an image and label horizontally (left to right). 196 | 197 | Args: 198 | image: A 3-D tensor of shape `[height, width, channels].` 199 | label: A 3-D tensor of shape `[height, width, 1].` 200 | 201 | Returns: 202 | A 3-D tensor of the same type and shape as `image`. 203 | A 3-D tensor of the same type and shape as `label`. 204 | """ 205 | uniform_random = tf.random_uniform([], 0, 1.0) 206 | mirror_cond = tf.less(uniform_random, .5) 207 | image = tf.cond(mirror_cond, lambda: tf.reverse(image, [1]), lambda: image) 208 | label = tf.cond(mirror_cond, lambda: tf.reverse(label, [1]), lambda: label) 209 | 210 | return image, label 211 | 212 | 213 | def eval_input_fn(image_filenames, new_height=None, new_width=None, label_filenames=None, batch_size=1): 214 | """An input function for evaluation and inference. 215 | 216 | Args: 217 | image_filenames: The file names for the inferred images. 218 | label_filenames: The file names for the grand truth labels. 219 | batch_size: The number of samples per batch. Need to be 1 220 | for the images of different sizes. 221 | 222 | Returns: 223 | A tuple of images and labels. 224 | """ 225 | # Reads an image from a file, decodes it into a dense tensor 226 | def _parse_function(filename, is_label): 227 | if not is_label: 228 | image_filename, label_filename = filename, None 229 | else: 230 | image_filename, label_filename = filename 231 | 232 | image_string = tf.read_file(image_filename) 233 | image = tf.image.decode_image(image_string,channels=3) 234 | image = tf.to_float(tf.image.convert_image_dtype(image, dtype=tf.uint8)) 235 | image.set_shape([None, None, 3]) 236 | 237 | image = mean_image_subtraction(image) 238 | 239 | if not is_label: 240 | if new_width is not None and new_height is not None: 241 | image=tf.image.resize_images(image,[new_height,new_width]) 242 | return image 243 | else: 244 | label_string = tf.read_file(label_filename) 245 | label = tf.image.decode_image(label_string) 246 | label = tf.to_int32(tf.image.convert_image_dtype(label, dtype=tf.uint8)) 247 | label.set_shape([None, None, 1]) 248 | 249 | return image, label 250 | 251 | if label_filenames is None: 252 | input_filenames = image_filenames 253 | else: 254 | input_filenames = (image_filenames, label_filenames) 255 | 256 | dataset = tf.data.Dataset.from_tensor_slices(input_filenames) 257 | if label_filenames is None: 258 | dataset = dataset.map(lambda x: _parse_function(x, False)) 259 | else: 260 | dataset = dataset.map(lambda x, y: _parse_function((x, y), True)) 261 | dataset = dataset.prefetch(batch_size) 262 | dataset = dataset.batch(batch_size) 263 | iterator = dataset.make_one_shot_iterator() 264 | 265 | if label_filenames is None: 266 | images = iterator.get_next() 267 | labels = None 268 | else: 269 | images, labels = iterator.get_next() 270 | 271 | return images, labels 272 | --------------------------------------------------------------------------------