├── trainer ├── requirements.txt ├── generate_label_map.py ├── download_checkpoint.py ├── override_pipeline.py ├── start.sh ├── prepare_training.py ├── generate_tf_record.py └── faster_rcnn_resnet101_coco.config ├── requirements.txt ├── export_instructions.md ├── LICENSE ├── download_model.py ├── .gitignore └── README.md /trainer/requirements.txt: -------------------------------------------------------------------------------- 1 | pycocotools==2.0.0 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | botocore==1.12.183 2 | ibm-cos-sdk==2.5.1 3 | ibm-cos-sdk-core==2.5.1 4 | ibm-cos-sdk-s3transfer==2.5.1 -------------------------------------------------------------------------------- /trainer/generate_label_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | def generate_label_map(labels, output): 5 | # Create a file named label_map.pbtxt 6 | with open(output, 'w') as file: 7 | # Loop through all of the labels and write each label to the file with an id. 8 | for idx, label in enumerate(labels): 9 | file.write('item {\n') 10 | file.write('\tname: \'{}\'\n'.format(label)) 11 | file.write('\tid: {}\n'.format(idx + 1)) # indexes must start at 1. 12 | file.write('}\n') -------------------------------------------------------------------------------- /export_instructions.md: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim 2 | 3 | BASE_PATH=/Users/niko/Desktop/custom-training 4 | INPUT_TYPE=image_tensor 5 | PIPELINE_CONFIG_PATH=${BASE_PATH}/output/pipeline.config 6 | TRAINED_CKPT_PREFIX=${BASE_PATH}/output/checkpoint/model.ckpt-3000 7 | EXPORT_DIR=object_detection/exported_model 8 | python object_detection/export_inference_graph.py \ 9 | --input_type=${INPUT_TYPE} \ 10 | --pipeline_config_path=${PIPELINE_CONFIG_PATH} \ 11 | --trained_checkpoint_prefix=${TRAINED_CKPT_PREFIX} \ 12 | --output_directory=${EXPORT_DIR} -------------------------------------------------------------------------------- /trainer/download_checkpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tarfile 3 | 4 | import six.moves.urllib as urllib 5 | 6 | def download_checkpoint(model, output): 7 | download_base = 'http://download.tensorflow.org/models/object_detection/' 8 | 9 | # Download the checkpoint 10 | opener = urllib.request.URLopener() 11 | opener.retrieve(download_base + model, model) 12 | 13 | # Extract all the `model.ckpt` files. 14 | with tarfile.open(model) as tar: 15 | for member in tar.getmembers(): 16 | member.name = os.path.basename(member.name) 17 | if 'model.ckpt' in member.name: 18 | tar.extract(member, path=output) -------------------------------------------------------------------------------- /trainer/override_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from google.protobuf import text_format 4 | 5 | from object_detection.utils import config_util 6 | 7 | def override_pipeline(pipeline, override_dict, num_classes=0): 8 | configs = config_util.get_configs_from_pipeline_file(pipeline) 9 | 10 | meta_arch = configs["model"].WhichOneof("model") 11 | override_dict['model.{}.num_classes'.format(meta_arch)] = num_classes 12 | 13 | configs = config_util.merge_external_params_with_configs(configs, kwargs_dict=override_dict) 14 | pipeline_config = config_util.create_pipeline_proto_from_configs(configs) 15 | config_util.save_pipeline_config(pipeline_config, os.environ['RESULT_DIR']) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Cloud Annotations 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /trainer/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If any of the commands fail, echoing this message will inform cacli. 4 | trap 'echo CACLI-TRAINING-FAILED; exit' ERR 5 | 6 | # Install any additional requirements. We must install on the user level for it 7 | # to work with WML. Installing with `--no-deps` ensures we don't override the 8 | # default packages provided by WML. 9 | pip install --user --no-deps -r requirements.txt 10 | 11 | # Unpack the object_detection and slim packages. 12 | tar -xvzf object_detection-0.1.tar.gz 13 | tar -xvzf slim-0.1.tar.gz 14 | 15 | # Move the object_detection and slim packages. 16 | cp -rf object_detection-0.1/object_detection . 17 | cp -rf slim-0.1 slim/ 18 | 19 | # Cleanup. (not really necessary) 20 | rm -rf object_detection-0.1.tar.gz 21 | rm -rf object_detection-0.1 22 | rm -rf slim-0.1.tar.gz 23 | rm -rf slim-0.1 24 | 25 | # Add slim to our python path. 26 | export PYTHONPATH=${PWD}/slim 27 | 28 | # Run our prep scripts. 29 | python prepare_training.py 30 | 31 | # Start training. ($1 is the integer of training steps provided by cacli) 32 | python -m object_detection.model_main \ 33 | --pipeline_config_path="${RESULT_DIR}/pipeline.config" \ 34 | --model_dir="${RESULT_DIR}/checkpoint" \ 35 | --num_train_steps=$1 \ 36 | --alsologtostderr 37 | 38 | # Tell cacli we successfully finished training. 39 | echo 'CACLI-TRAINING-SUCCESS' -------------------------------------------------------------------------------- /download_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import ibm_boto3 4 | from botocore.client import Config 5 | 6 | # Things to change: 7 | BUCKET_NAME = '' 8 | OUTPUT_LOCATION = '' 9 | STEPS = 3000 10 | INSTANCE_ID = '' 11 | ENDPOINT = 'https://s3.us.cloud-object-storage.appdomain.cloud' 12 | API_KEY = '' 13 | 14 | 15 | credentials = { 16 | 'ibm_auth_endpoint': 'https://iam.ng.bluemix.net/oidc/token', 17 | 'ibm_service_instance_id': INSTANCE_ID, 18 | 'endpoint_url': ENDPOINT, 19 | 'ibm_api_key_id': API_KEY, 20 | 'config': Config(signature_version='oauth') 21 | } 22 | 23 | bucket = ibm_boto3.resource('s3', **credentials).Bucket(BUCKET_NAME) 24 | 25 | print('downloading checkpoints...') 26 | if os.path.exists('output') and os.path.isdir('output'): 27 | shutil.rmtree('output') 28 | os.makedirs('output') 29 | os.makedirs('output/checkpoint') 30 | 31 | data_path = 'checkpoint/model.ckpt-{}.data-00000-of-00001'.format(STEPS) 32 | index_path = 'checkpoint/model.ckpt-{}.index'.format(STEPS) 33 | meta_path = 'checkpoint/model.ckpt-{}.meta'.format(STEPS) 34 | bucket.download_file(os.path.join(OUTPUT_LOCATION, data_path), os.path.join('output', data_path)) 35 | bucket.download_file(os.path.join(OUTPUT_LOCATION, index_path), os.path.join('output', index_path)) 36 | bucket.download_file(os.path.join(OUTPUT_LOCATION, meta_path), os.path.join('output', meta_path)) 37 | bucket.download_file(os.path.join(OUTPUT_LOCATION, 'pipeline.config'), 'output/pipeline.config') 38 | -------------------------------------------------------------------------------- /trainer/prepare_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from generate_label_map import generate_label_map 5 | from generate_tf_record import generate_tf_record 6 | from download_checkpoint import download_checkpoint 7 | from override_pipeline import override_pipeline 8 | 9 | MODEL_CHECKPOINT = 'faster_rcnn_resnet101_coco_2018_01_28.tar.gz' 10 | MODEL_CONFIG = 'faster_rcnn_resnet101_coco.config' 11 | 12 | label_map_path = os.path.join(os.environ['RESULT_DIR'], 'label_map.pbtxt') 13 | train_record_path = 'train.record' 14 | val_record_path = 'val.record' 15 | checkpoint_path = 'checkpoint' 16 | 17 | # Open _annotations.json, os.environ['DATA_DIR'] is the directory where all of 18 | # our bucket data is stored. 19 | with open(os.path.join(os.environ['DATA_DIR'], '_annotations.json')) as f: 20 | annotations = json.load(f)['annotations'] 21 | 22 | # Loop through each image and through each image's annotations and collect all 23 | # the labels into a set. We could also just use labels array, but this could 24 | # include labels that aren't used in the dataset. 25 | labels = list({a['label'] for image in annotations.values() for a in image}) 26 | 27 | override_dict = { 28 | 'train_input_path': train_record_path, 29 | 'eval_input_path': val_record_path, 30 | 'train_config.fine_tune_checkpoint': os.path.join(checkpoint_path, 'model.ckpt'), 31 | 'label_map_path': label_map_path 32 | } 33 | 34 | generate_label_map(labels, label_map_path) 35 | generate_tf_record(annotations, label_map_path, train_record_path, val_record_path) 36 | download_checkpoint(MODEL_CHECKPOINT, checkpoint_path) 37 | override_pipeline(MODEL_CONFIG, override_dict, num_classes=len(labels)) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.tar.gz 2 | *.zip 3 | config.yaml 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | -------------------------------------------------------------------------------- /trainer/generate_tf_record.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import json 4 | import random 5 | 6 | import PIL.Image 7 | import tensorflow as tf 8 | 9 | from object_detection.utils import dataset_util 10 | from object_detection.utils import label_map_util 11 | 12 | def generate_tf_record(annotations, label_map_path, train_record, val_record): 13 | # Get a list of all images in our dataset. 14 | image_names = [image for image in annotations.keys()] 15 | # Load the label map we created. 16 | label_map = label_map_util.get_label_map_dict(label_map_path) 17 | 18 | random.seed(42) 19 | random.shuffle(image_names) 20 | num_train = int(0.7 * len(image_names)) 21 | train_examples = image_names[:num_train] 22 | val_examples = image_names[num_train:] 23 | 24 | create_tf_record(annotations, train_examples, label_map, train_record) 25 | create_tf_record(annotations, val_examples, label_map, val_record) 26 | 27 | def create_tf_record(annotations, image_names, label_map, output): 28 | # Create a train.record TFRecord file. 29 | with tf.python_io.TFRecordWriter(output) as writer: 30 | # Loop through all the training examples. 31 | for image_name in image_names: 32 | # Make sure the image is actually a file 33 | img_path = os.path.join(os.environ['DATA_DIR'], image_name) 34 | if not os.path.isfile(img_path): 35 | continue 36 | 37 | # Read in the image. 38 | with tf.gfile.GFile(img_path, 'rb') as fid: 39 | encoded_jpg = fid.read() 40 | 41 | # Open the image with PIL so we can check that it's a jpeg and get the image 42 | # dimensions. 43 | encoded_jpg_io = io.BytesIO(encoded_jpg) 44 | image = PIL.Image.open(encoded_jpg_io) 45 | if image.format != 'JPEG': 46 | raise ValueError('Image format not JPEG') 47 | 48 | width, height = image.size 49 | 50 | # Initialize all the arrays. 51 | xmins = [] 52 | xmaxs = [] 53 | ymins = [] 54 | ymaxs = [] 55 | classes_text = [] 56 | classes = [] 57 | 58 | # The class text is the label name and the class is the id. If there are 3 59 | # cats in the image and 1 dog, it may look something like this: 60 | # classes_text = ['Cat', 'Cat', 'Dog', 'Cat'] 61 | # classes = [ 1 , 1 , 2 , 1 ] 62 | 63 | # For each image, loop through all the annotations and append their values. 64 | for annotation in annotations[image_name]: 65 | xmins.append(annotation['x']) 66 | xmaxs.append(annotation['x2']) 67 | ymins.append(annotation['y']) 68 | ymaxs.append(annotation['y2']) 69 | label = annotation['label'] 70 | classes_text.append(label.encode('utf8')) 71 | classes.append(label_map[label]) 72 | 73 | # Create the TFExample. 74 | try: 75 | tf_example = tf.train.Example(features=tf.train.Features(feature={ 76 | 'image/height': dataset_util.int64_feature(height), 77 | 'image/width': dataset_util.int64_feature(width), 78 | 'image/filename': dataset_util.bytes_feature(image_name.encode('utf8')), 79 | 'image/source_id': dataset_util.bytes_feature(image_name.encode('utf8')), 80 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 81 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 82 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 83 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 84 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 85 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 86 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 87 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 88 | })) 89 | if tf_example: 90 | # Write the TFExample to the TFRecord. 91 | writer.write(tf_example.SerializeToString()) 92 | except ValueError: 93 | print('Invalid example, ignoring.') -------------------------------------------------------------------------------- /trainer/faster_rcnn_resnet101_coco.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configuration for MSCOCO Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 90 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0003 91 | schedule { 92 | step: 900000 93 | learning_rate: .00003 94 | } 95 | schedule { 96 | step: 1200000 97 | learning_rate: .000003 98 | } 99 | } 100 | } 101 | momentum_optimizer_value: 0.9 102 | } 103 | use_moving_average: false 104 | } 105 | gradient_clipping_by_norm: 10.0 106 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt" 107 | from_detection_checkpoint: true 108 | data_augmentation_options { 109 | random_horizontal_flip { 110 | } 111 | } 112 | } 113 | 114 | train_input_reader: { 115 | tf_record_input_reader { 116 | input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100" 117 | } 118 | label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" 119 | } 120 | 121 | eval_config: { 122 | num_examples: 8000 123 | # Note: The below line limits the evaluation process to 10 evaluations. 124 | # Remove the below line to evaluate indefinitely. 125 | max_evals: 10 126 | } 127 | 128 | eval_input_reader: { 129 | tf_record_input_reader { 130 | input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010" 131 | } 132 | label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt" 133 | shuffle: false 134 | num_readers: 1 135 | } 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Using Cloud Annotations to train models from TensorFlow's Object Detection model zoo 2 | 3 | clone the following repos: 4 | ``` 5 | git clone https://github.com/cloud-annotations/custom-training.git 6 | ``` 7 | ``` 8 | git clone https://github.com/tensorflow/models.git 9 | ``` 10 | 11 | There are 3 things we need to setup: 12 | - The TensorFlow Object Detection package 13 | - The pipeline 14 | - The training script (which will handle:) 15 | - The training data 16 | - The pretrained model 17 | 18 | ## Setting up the TensorFlow Object Detection package 19 | move into the research directory: 20 | ``` 21 | cd models/research/ 22 | ``` 23 | 24 | compile the protobufs: 25 | ``` 26 | protoc object_detection/protos/*.proto --python_out=. 27 | ``` 28 | 29 | > **Note:** You will need to have `protoc` installed 30 | > **macOS + Homebrew** If you have Homebrew installed, run: `brew install protobuf` 31 | > **Windows / Linux / macOS** The simplest way to install the protocol compiler is to download a pre-built binary from the [protobuf release page](https://github.com/protocolbuffers/protobuf/releases) 32 | > You can find pre-built binaries in zip packages: `protoc-{version}-{platform}.zip` 33 | 34 | Set up the packages: 35 | ``` 36 | python setup.py sdist 37 | (cd slim && python setup.py sdist) 38 | ``` 39 | This will create two python packages, `dist/object_detection-0.1.tar.gz` and `slim/dist/slim-0.1.tar.gz`, copy them to the `custom-training/trainer` directory. 40 | 41 | The trainer folder should look like this: 42 | ``` 43 | + trainer/ 44 | - download_checkpoint.py 45 | - faster_rcnn_resnet101_coco.config 46 | - generate_label_map.py 47 | - generate_tf_record.py 48 | - object_detection-0.1.tar.gz 49 | - override_pipeline.py 50 | - prepare_training.py 51 | - requirements.txt 52 | - slim-0.1.tar.gz 53 | - start.sh 54 | ``` 55 | 56 | ## Choosing a model type 57 | Before moving forward we need to decide on a model type. You can find all available model types in the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). 58 | 59 | There 2 main base: 60 | - `ssd` used by default by the Cloud Annotations tool, is great for devices that don't have a lot of power. You can detect objects very fast on devices like phones and raspberry pis. 61 | - `faster rcnn` is good for high accuracy predictions, but runs much slower. 62 | 63 | In the model zoo you can find a chart with comparison of the speed and accuracy of each type. 64 | The time is in milliseconds, but it's important to note that it is the speed of the model on a `Nvidia GeForce GTX TITAN X` gpu. 65 | The accuracy is measured in `mAP` the higher the better. 66 | 67 | The default model that cacli trains is the `ssd mobilenet v1` model which has the following metrics: 68 | 69 | | Speed (ms) | mAP | 70 | | :--------: | :-: | 71 | | 30 | 21 | 72 | 73 | > **Note:** As a frame of reference, I get about 15fps on my MacBook Pro, ~66ms. 74 | 75 | In this walkthrough I'll be using the `faster r-cnn resnet101` model which has the following metrics: 76 | 77 | | Speed (ms) | mAP | 78 | | :--------: | :-: | 79 | | 106 | 32 | 80 | 81 | > **Note:** I'm guessing ~233ms on a Mac or 4fps. 82 | 83 | Feel free to use any other model type, but make sure that the output is `Boxes` **NOT** `Masks`. 84 | 85 | ## Setting up the pipeline 86 | Once we have decided on a model structure, we can find one of the pipeline configs provided by TensorFlow that corresponds to our model type from [here](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs). 87 | Since we are using `faster r-cnn resnet101` we can download [`faster_rcnn_resnet101_coco.config`](https://github.com/tensorflow/models/raw/master/research/object_detection/samples/configs/faster_rcnn_resnet101_coco.config). 88 | The pipeline config tells the TensorFlow Object Detection API what type of model we want to train, how to train it and with what data. We should be fine with the majority of the defaults, but feel free to tinker with any of the model/training params. You can get more info on the format of the config files [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/configuring_jobs.md). 89 | 90 | > **Note:** There are a few things that will be dynamically changed with a script we write so that the config will always work with our data. These include: `num_classes`, `fine_tune_checkpoint`, `label_map_path` and `input_path`. 91 | 92 | ## Setting up the training script 93 | When we start a training run our object storage bucket gets mounted to the training service. This gives us access to all of our images and an `_annotations.json` file with all of our bounding box annotations. We can access this data via an environment variable named `DATA_DIR`. 94 | When the training run begins it looks for and runs a file named `start.sh`. This is where we can prepare our data and then run the training command. 95 | 96 | ### Preparing the training data 97 | The TensorFlow Object Detection API expects our data to be in the format of TFRecord so we will need to write and run a conversion script. 98 | 99 | The format of the `_annotations.json` looks something like this: 100 | ``` 101 | { 102 | "version": "1.0", 103 | "type": "localization", 104 | "labels": ["Cat", "Dog"], 105 | "annotations": { 106 | "image1.jpg": [ 107 | { 108 | "x": 0.7255949630314233, 109 | "x2": 0.9695875693160814, 110 | "y": 0.5820120073891626, 111 | "y2": 1, 112 | "label": "Cat" 113 | }, 114 | { 115 | "x": 0.8845598428835489, 116 | "x2": 1, 117 | "y": 0.1829972290640394, 118 | "y2": 0.966248460591133, 119 | "label": "Dog" 120 | } 121 | ] 122 | } 123 | } 124 | ``` 125 | 126 | Along with the TFRecord we also need a label map protobuf. The label map is what maps an integer id to a text label name. The ids are indexed by 1, meaning the first label will have an id of 1 not 0. 127 | This is an example of what a label map for our `_annotations.json` example would look like: 128 | ``` 129 | item { 130 | id: 1 131 | name: 'Cat' 132 | } 133 | 134 | item { 135 | id: 2 136 | name: 'Dog' 137 | } 138 | ``` 139 | 140 | The TFRecord format is a collection of serialized feature dicts, each looking something like this: 141 | ``` 142 | { 143 | 'image/height': 1800, 144 | 'image/width': 2400, 145 | 'image/filename': 'image1.jpg', 146 | 'image/source_id': 'image1.jpg', 147 | 'image/encoded': ACTUAL_ENCODED_IMAGE_DATA_AS_BYTES, 148 | 'image/format': 'jpeg', 149 | 'image/object/bbox/xmin': [0.7255949630314233, 0.8845598428835489], 150 | 'image/object/bbox/xmax': [0.9695875693160814, 1.0000000000000000], 151 | 'image/object/bbox/ymin': [0.5820120073891626, 0.1829972290640394], 152 | 'image/object/bbox/ymax': [1.0000000000000000, 0.9662484605911330], 153 | 'image/object/class/text': (['Cat', 'Dog']), 154 | 'image/object/class/label': ([1, 2]) 155 | } 156 | ``` 157 | 158 | We can access our annotations with the following code: 159 | ```python 160 | # Open _annotations.json, os.environ['DATA_DIR'] is the directory where all of 161 | # our bucket data is stored. 162 | with open(os.path.join(os.environ['DATA_DIR'], '_annotations.json')) as f: 163 | annotations = json.load(f)['annotations'] 164 | 165 | # Loop through each image and through each image's annotations and collect all 166 | # the labels into a set. We could also just use labels array, but this could 167 | # include labels that aren't used in the dataset. 168 | labels = list({a['label'] for image in annotations.values() for a in image}) 169 | ``` 170 | > You can find this code in `prepare_training.py`. 171 | 172 | Once we have our annotations, we can generate a label map! 173 | ```python 174 | # Create a file named label_map.pbtxt 175 | with open('label_map.pbtxt', 'w') as file: 176 | # Loop through all of the labels and write each label to the file with an id. 177 | for idx, label in enumerate(labels): 178 | file.write('item {\n') 179 | file.write('\tname: \'{}\'\n'.format(label)) 180 | file.write('\tid: {}\n'.format(idx + 1)) # indexes must start at 1. 181 | file.write('}\n') 182 | ``` 183 | > You can find this code in `generate_label_map.py`. 184 | 185 | Now that we have our label map, we can build our TFRecord. 186 | ```python 187 | # Create a train.record TFRecord file. 188 | with tf.python_io.TFRecordWriter('train.record') as writer: 189 | # Load the label map we created. 190 | label_map_dict = label_map_util.get_label_map_dict('label_map.pbtxt') 191 | # Get a list of all images in our dataset. 192 | image_names = [image for image in annotations.keys()] 193 | 194 | # Loop through all the training examples. 195 | for idx, image_name in enumerate(image_names): 196 | # Make sure the image is actually a file 197 | img_path = os.path.join(os.environ['DATA_DIR'], image_name) 198 | if not os.path.isfile(img_path): 199 | continue 200 | 201 | # Read in the image. 202 | with tf.gfile.GFile(img_path, 'rb') as fid: 203 | encoded_jpg = fid.read() 204 | 205 | # Open the image with PIL so we can check that it's a jpeg and get the image 206 | # dimensions. 207 | encoded_jpg_io = io.BytesIO(encoded_jpg) 208 | image = PIL.Image.open(encoded_jpg_io) 209 | if image.format != 'JPEG': 210 | raise ValueError('Image format not JPEG') 211 | 212 | width, height = image.size 213 | 214 | # Initialize all the arrays. 215 | xmins = [] 216 | xmaxs = [] 217 | ymins = [] 218 | ymaxs = [] 219 | classes_text = [] 220 | classes = [] 221 | 222 | # The class text is the label name and the class is the id. If there are 3 223 | # cats in the image and 1 dog, it may look something like this: 224 | # classes_text = ['Cat', 'Cat', 'Dog', 'Cat'] 225 | # classes = [ 1 , 1 , 2 , 1 ] 226 | 227 | # For each image, loop through all the annotations and append their values. 228 | for annotation in annotations[image_name]: 229 | xmins.append(annotation['x']) 230 | xmaxs.append(annotation['x2']) 231 | ymins.append(annotation['y']) 232 | ymaxs.append(annotation['y2']) 233 | label = annotation['label'] 234 | classes_text.append(label.encode('utf8')) 235 | classes.append(label_map_dict[label]) 236 | 237 | # Create the TFExample. 238 | try: 239 | tf_example = tf.train.Example(features=tf.train.Features(feature={ 240 | 'image/height': dataset_util.int64_feature(height), 241 | 'image/width': dataset_util.int64_feature(width), 242 | 'image/filename': dataset_util.bytes_feature(image_name.encode('utf8')), 243 | 'image/source_id': dataset_util.bytes_feature(image_name.encode('utf8')), 244 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 245 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 246 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 247 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 248 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 249 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 250 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 251 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 252 | })) 253 | if tf_example: 254 | # Write the TFExample to the TFRecord. 255 | writer.write(tf_example.SerializeToString()) 256 | except ValueError: 257 | print('Invalid example, ignoring.') 258 | ``` 259 | > You can find this code in `generate_tf_record.py`. 260 | 261 | > **Note:** There are a few extra things that we can do here, like shuffling the data and splitting it into training and validation sets. 262 | > We can also shard the TFRecord if we have a few thousand images. To learn more check out the docs [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md). 263 | 264 | ### Downloading a pretrained model checkpoint 265 | Training a model from scratch can take days and tons of data. We can mitigate this by using a pretrained model checkpoint. 266 | Instead of starting from nothing, we can add to what was already learned with our own data. 267 | 268 | We can get a download a checkpoint from the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). 269 | 270 | We can download the checkpoint to our training run with the following code: 271 | ```python 272 | download_base = 'http://download.tensorflow.org/models/object_detection/' 273 | model_file = 'faster_rcnn_resnet101_coco_2018_01_28.tar.gz' 274 | 275 | # Download the checkpoint 276 | opener = urllib.request.URLopener() 277 | opener.retrieve(download_base + model_file, model_file) 278 | 279 | # Extract all the `model.ckpt` files. 280 | with tarfile.open(model_file) as tar: 281 | for member in tar.getmembers(): 282 | member.name = os.path.basename(member.name) 283 | if 'model.ckpt' in member.name: 284 | tar.extract(member, path='checkpoint') 285 | ``` 286 | > You can find this code in `download_checkpoint.py`. 287 | 288 | > **Note:** This script is downloading the `faster r-cnn resnet101` model, make sure you download the model type you are training. 289 | 290 | ### Injecting the pipeline with proper values 291 | The final thing we need to do is inject our pipline with the amount of labels we have and where to find the label map, TFRecord and model checkpoint. 292 | ```python 293 | pipeline = 'faster_rcnn_resnet101_coco.config' 294 | 295 | override_dict = { 296 | 'train_input_path': 'train.record', 297 | 'train_config.fine_tune_checkpoint': 'checkpoint/model.ckpt', 298 | 'label_map_path': 'label_map.pbtxt' 299 | } 300 | 301 | configs = config_util.get_configs_from_pipeline_file(pipeline) 302 | meta_arch = configs["model"].WhichOneof("model") 303 | override_dict['model.{}.num_classes'.format(meta_arch)] = len(labels) 304 | configs = config_util.merge_external_params_with_configs(configs, kwargs_dict=override_dict) 305 | pipeline_config = config_util.create_pipeline_proto_from_configs(configs) 306 | config_util.save_pipeline_config(pipeline_config, '') 307 | ``` 308 | > You can find this code in `override_pipeline.py`. 309 | 310 | ## Final checklist 311 | All the code in the trainer should work as-is. 312 | 313 | The only things you **MUST** do: 314 | - add the `object_detection-0.1.tar.gz` file to `trainer` 315 | - add the `slim-0.1.tar.gz` file to `trainer` 316 | 317 | (Optional) choose different model: 318 | - download alternative pipeline config 319 | - modify `MODEL_CHECKPOINT` in `prepare_training.py` 320 | - modify `MODEL_CONFIG` in `prepare_training.py` 321 | 322 | ## Training the model 323 | When you're ready to train all you need to do is zip the `trainer` directory and run: 324 | ``` 325 | cacli train --script=trainer.zip 326 | ``` 327 | --------------------------------------------------------------------------------