├── trainer
    ├── requirements.txt
    ├── generate_label_map.py
    ├── download_checkpoint.py
    ├── override_pipeline.py
    ├── start.sh
    ├── prepare_training.py
    ├── generate_tf_record.py
    └── faster_rcnn_resnet101_coco.config
├── requirements.txt
├── export_instructions.md
├── LICENSE
├── download_model.py
├── .gitignore
└── README.md


/trainer/requirements.txt:
--------------------------------------------------------------------------------
1 | pycocotools==2.0.0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | botocore==1.12.183
2 | ibm-cos-sdk==2.5.1
3 | ibm-cos-sdk-core==2.5.1
4 | ibm-cos-sdk-s3transfer==2.5.1


--------------------------------------------------------------------------------
/trainer/generate_label_map.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | def generate_label_map(labels, output):
 5 |   # Create a file named label_map.pbtxt
 6 |   with open(output, 'w') as file:
 7 |     # Loop through all of the labels and write each label to the file with an id. 
 8 |     for idx, label in enumerate(labels):
 9 |       file.write('item {\n')
10 |       file.write('\tname: \'{}\'\n'.format(label))
11 |       file.write('\tid: {}\n'.format(idx + 1)) # indexes must start at 1.
12 |       file.write('}\n')


--------------------------------------------------------------------------------
/export_instructions.md:
--------------------------------------------------------------------------------
 1 | export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
 2 | 
 3 | BASE_PATH=/Users/niko/Desktop/custom-training
 4 | INPUT_TYPE=image_tensor
 5 | PIPELINE_CONFIG_PATH=${BASE_PATH}/output/pipeline.config
 6 | TRAINED_CKPT_PREFIX=${BASE_PATH}/output/checkpoint/model.ckpt-3000
 7 | EXPORT_DIR=object_detection/exported_model
 8 | python object_detection/export_inference_graph.py \
 9 |     --input_type=${INPUT_TYPE} \
10 |     --pipeline_config_path=${PIPELINE_CONFIG_PATH} \
11 |     --trained_checkpoint_prefix=${TRAINED_CKPT_PREFIX} \
12 |     --output_directory=${EXPORT_DIR}


--------------------------------------------------------------------------------
/trainer/download_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tarfile
 3 | 
 4 | import six.moves.urllib as urllib
 5 | 
 6 | def download_checkpoint(model, output):
 7 |   download_base = 'http://download.tensorflow.org/models/object_detection/'
 8 | 
 9 |   # Download the checkpoint
10 |   opener = urllib.request.URLopener()
11 |   opener.retrieve(download_base + model, model)
12 | 
13 |   # Extract all the `model.ckpt` files.
14 |   with tarfile.open(model) as tar:
15 |     for member in tar.getmembers():
16 |       member.name = os.path.basename(member.name)
17 |       if 'model.ckpt' in member.name:
18 |         tar.extract(member, path=output)


--------------------------------------------------------------------------------
/trainer/override_pipeline.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from google.protobuf import text_format
 4 | 
 5 | from object_detection.utils import config_util
 6 | 
 7 | def override_pipeline(pipeline, override_dict, num_classes=0):
 8 |   configs = config_util.get_configs_from_pipeline_file(pipeline)
 9 | 
10 |   meta_arch = configs["model"].WhichOneof("model")
11 |   override_dict['model.{}.num_classes'.format(meta_arch)] = num_classes
12 | 
13 |   configs = config_util.merge_external_params_with_configs(configs, kwargs_dict=override_dict)
14 |   pipeline_config = config_util.create_pipeline_proto_from_configs(configs)
15 |   config_util.save_pipeline_config(pipeline_config, os.environ['RESULT_DIR'])


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Cloud Annotations
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/trainer/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If any of the commands fail, echoing this message will inform cacli.
 4 | trap 'echo CACLI-TRAINING-FAILED; exit' ERR
 5 | 
 6 | # Install any additional requirements. We must install on the user level for it 
 7 | # to work with WML. Installing with `--no-deps` ensures we don't override the
 8 | # default packages provided by WML.
 9 | pip install --user --no-deps -r requirements.txt
10 | 
11 | # Unpack the object_detection and slim packages.
12 | tar -xvzf object_detection-0.1.tar.gz
13 | tar -xvzf slim-0.1.tar.gz
14 | 
15 | # Move the object_detection and slim packages.
16 | cp -rf object_detection-0.1/object_detection .
17 | cp -rf slim-0.1 slim/
18 | 
19 | # Cleanup. (not really necessary)
20 | rm -rf object_detection-0.1.tar.gz
21 | rm -rf object_detection-0.1
22 | rm -rf slim-0.1.tar.gz
23 | rm -rf slim-0.1
24 | 
25 | # Add slim to our python path.
26 | export PYTHONPATH=${PWD}/slim
27 | 
28 | # Run our prep scripts.
29 | python prepare_training.py
30 | 
31 | # Start training. ($1 is the integer of training steps provided by cacli)
32 | python -m object_detection.model_main \
33 |   --pipeline_config_path="${RESULT_DIR}/pipeline.config" \
34 |   --model_dir="${RESULT_DIR}/checkpoint" \
35 |   --num_train_steps=$1 \
36 |   --alsologtostderr
37 | 
38 | # Tell cacli we successfully finished training.
39 | echo 'CACLI-TRAINING-SUCCESS'


--------------------------------------------------------------------------------
/download_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import ibm_boto3
 4 | from botocore.client import Config
 5 | 
 6 | # Things to change:
 7 | BUCKET_NAME = ''
 8 | OUTPUT_LOCATION = ''
 9 | STEPS = 3000
10 | INSTANCE_ID = ''
11 | ENDPOINT = 'https://s3.us.cloud-object-storage.appdomain.cloud'
12 | API_KEY = ''
13 | 
14 | 
15 | credentials = {
16 |     'ibm_auth_endpoint': 'https://iam.ng.bluemix.net/oidc/token',
17 |     'ibm_service_instance_id': INSTANCE_ID,
18 |     'endpoint_url': ENDPOINT,
19 |     'ibm_api_key_id': API_KEY,
20 |     'config': Config(signature_version='oauth')
21 | }
22 | 
23 | bucket = ibm_boto3.resource('s3', **credentials).Bucket(BUCKET_NAME)
24 | 
25 | print('downloading checkpoints...')
26 | if os.path.exists('output') and os.path.isdir('output'):
27 |     shutil.rmtree('output')
28 | os.makedirs('output')
29 | os.makedirs('output/checkpoint')
30 | 
31 | data_path = 'checkpoint/model.ckpt-{}.data-00000-of-00001'.format(STEPS)
32 | index_path = 'checkpoint/model.ckpt-{}.index'.format(STEPS)
33 | meta_path = 'checkpoint/model.ckpt-{}.meta'.format(STEPS)
34 | bucket.download_file(os.path.join(OUTPUT_LOCATION, data_path), os.path.join('output', data_path))
35 | bucket.download_file(os.path.join(OUTPUT_LOCATION, index_path), os.path.join('output', index_path))
36 | bucket.download_file(os.path.join(OUTPUT_LOCATION, meta_path), os.path.join('output', meta_path))
37 | bucket.download_file(os.path.join(OUTPUT_LOCATION, 'pipeline.config'), 'output/pipeline.config')
38 | 


--------------------------------------------------------------------------------
/trainer/prepare_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from generate_label_map import generate_label_map
 5 | from generate_tf_record import generate_tf_record
 6 | from download_checkpoint import download_checkpoint
 7 | from override_pipeline import override_pipeline
 8 | 
 9 | MODEL_CHECKPOINT = 'faster_rcnn_resnet101_coco_2018_01_28.tar.gz'
10 | MODEL_CONFIG = 'faster_rcnn_resnet101_coco.config'
11 | 
12 | label_map_path = os.path.join(os.environ['RESULT_DIR'], 'label_map.pbtxt')
13 | train_record_path = 'train.record'
14 | val_record_path = 'val.record'
15 | checkpoint_path = 'checkpoint'
16 | 
17 | # Open _annotations.json, os.environ['DATA_DIR'] is the directory where all of 
18 | # our bucket data is stored.
19 | with open(os.path.join(os.environ['DATA_DIR'], '_annotations.json')) as f:
20 |   annotations = json.load(f)['annotations']
21 | 
22 | # Loop through each image and through each image's annotations and collect all
23 | # the labels into a set. We could also just use labels array, but this could
24 | # include labels that aren't used in the dataset.
25 | labels = list({a['label'] for image in annotations.values() for a in image})
26 | 
27 | override_dict = {
28 |   'train_input_path': train_record_path,
29 |   'eval_input_path': val_record_path,
30 |   'train_config.fine_tune_checkpoint': os.path.join(checkpoint_path, 'model.ckpt'),
31 |   'label_map_path': label_map_path
32 | }
33 | 
34 | generate_label_map(labels, label_map_path)
35 | generate_tf_record(annotations, label_map_path, train_record_path, val_record_path)
36 | download_checkpoint(MODEL_CHECKPOINT, checkpoint_path)
37 | override_pipeline(MODEL_CONFIG, override_dict, num_classes=len(labels))


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.tar.gz
  2 | *.zip
  3 | config.yaml
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 


--------------------------------------------------------------------------------
/trainer/generate_tf_record.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import io
 3 | import json
 4 | import random
 5 | 
 6 | import PIL.Image
 7 | import tensorflow as tf
 8 | 
 9 | from object_detection.utils import dataset_util
10 | from object_detection.utils import label_map_util
11 | 
12 | def generate_tf_record(annotations, label_map_path, train_record, val_record):
13 |   # Get a list of all images in our dataset.
14 |   image_names = [image for image in annotations.keys()]
15 |   # Load the label map we created.
16 |   label_map = label_map_util.get_label_map_dict(label_map_path)
17 | 
18 |   random.seed(42)
19 |   random.shuffle(image_names)
20 |   num_train = int(0.7 * len(image_names))
21 |   train_examples = image_names[:num_train]
22 |   val_examples = image_names[num_train:]
23 | 
24 |   create_tf_record(annotations, train_examples, label_map, train_record)
25 |   create_tf_record(annotations, val_examples, label_map, val_record)
26 | 
27 | def create_tf_record(annotations, image_names, label_map, output):
28 |   # Create a train.record TFRecord file.
29 |   with tf.python_io.TFRecordWriter(output) as writer:
30 |     # Loop through all the training examples.
31 |     for image_name in image_names:
32 |       # Make sure the image is actually a file
33 |       img_path = os.path.join(os.environ['DATA_DIR'], image_name)    
34 |       if not os.path.isfile(img_path):
35 |         continue
36 | 
37 |       # Read in the image.
38 |       with tf.gfile.GFile(img_path, 'rb') as fid:
39 |         encoded_jpg = fid.read()
40 | 
41 |       # Open the image with PIL so we can check that it's a jpeg and get the image
42 |       # dimensions.
43 |       encoded_jpg_io = io.BytesIO(encoded_jpg)
44 |       image = PIL.Image.open(encoded_jpg_io)
45 |       if image.format != 'JPEG':
46 |         raise ValueError('Image format not JPEG')
47 | 
48 |       width, height = image.size
49 | 
50 |       # Initialize all the arrays.
51 |       xmins = []
52 |       xmaxs = []
53 |       ymins = []
54 |       ymaxs = []
55 |       classes_text = []
56 |       classes = []
57 | 
58 |       # The class text is the label name and the class is the id. If there are 3
59 |       # cats in the image and 1 dog, it may look something like this:
60 |       # classes_text = ['Cat', 'Cat', 'Dog', 'Cat']
61 |       # classes      = [  1  ,   1  ,   2  ,   1  ]
62 | 
63 |       # For each image, loop through all the annotations and append their values.
64 |       for annotation in annotations[image_name]:
65 |         xmins.append(annotation['x'])
66 |         xmaxs.append(annotation['x2'])
67 |         ymins.append(annotation['y'])
68 |         ymaxs.append(annotation['y2'])
69 |         label = annotation['label']
70 |         classes_text.append(label.encode('utf8'))
71 |         classes.append(label_map[label])
72 |       
73 |       # Create the TFExample.
74 |       try:
75 |         tf_example = tf.train.Example(features=tf.train.Features(feature={
76 |           'image/height': dataset_util.int64_feature(height),
77 |           'image/width': dataset_util.int64_feature(width),
78 |           'image/filename': dataset_util.bytes_feature(image_name.encode('utf8')),
79 |           'image/source_id': dataset_util.bytes_feature(image_name.encode('utf8')),
80 |           'image/encoded': dataset_util.bytes_feature(encoded_jpg),
81 |           'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
82 |           'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
83 |           'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
84 |           'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
85 |           'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
86 |           'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
87 |           'image/object/class/label': dataset_util.int64_list_feature(classes),
88 |         }))
89 |         if tf_example:
90 |           # Write the TFExample to the TFRecord.
91 |           writer.write(tf_example.SerializeToString())
92 |       except ValueError:
93 |         print('Invalid example, ignoring.')


--------------------------------------------------------------------------------
/trainer/faster_rcnn_resnet101_coco.config:
--------------------------------------------------------------------------------
  1 | # Faster R-CNN with Resnet-101 (v1), configuration for MSCOCO Dataset.
  2 | # Users should configure the fine_tune_checkpoint field in the train config as
  3 | # well as the label_map_path and input_path fields in the train_input_reader and
  4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
  5 | # should be configured.
  6 | 
  7 | model {
  8 |   faster_rcnn {
  9 |     num_classes: 90
 10 |     image_resizer {
 11 |       keep_aspect_ratio_resizer {
 12 |         min_dimension: 600
 13 |         max_dimension: 1024
 14 |       }
 15 |     }
 16 |     feature_extractor {
 17 |       type: 'faster_rcnn_resnet101'
 18 |       first_stage_features_stride: 16
 19 |     }
 20 |     first_stage_anchor_generator {
 21 |       grid_anchor_generator {
 22 |         scales: [0.25, 0.5, 1.0, 2.0]
 23 |         aspect_ratios: [0.5, 1.0, 2.0]
 24 |         height_stride: 16
 25 |         width_stride: 16
 26 |       }
 27 |     }
 28 |     first_stage_box_predictor_conv_hyperparams {
 29 |       op: CONV
 30 |       regularizer {
 31 |         l2_regularizer {
 32 |           weight: 0.0
 33 |         }
 34 |       }
 35 |       initializer {
 36 |         truncated_normal_initializer {
 37 |           stddev: 0.01
 38 |         }
 39 |       }
 40 |     }
 41 |     first_stage_nms_score_threshold: 0.0
 42 |     first_stage_nms_iou_threshold: 0.7
 43 |     first_stage_max_proposals: 300
 44 |     first_stage_localization_loss_weight: 2.0
 45 |     first_stage_objectness_loss_weight: 1.0
 46 |     initial_crop_size: 14
 47 |     maxpool_kernel_size: 2
 48 |     maxpool_stride: 2
 49 |     second_stage_box_predictor {
 50 |       mask_rcnn_box_predictor {
 51 |         use_dropout: false
 52 |         dropout_keep_probability: 1.0
 53 |         fc_hyperparams {
 54 |           op: FC
 55 |           regularizer {
 56 |             l2_regularizer {
 57 |               weight: 0.0
 58 |             }
 59 |           }
 60 |           initializer {
 61 |             variance_scaling_initializer {
 62 |               factor: 1.0
 63 |               uniform: true
 64 |               mode: FAN_AVG
 65 |             }
 66 |           }
 67 |         }
 68 |       }
 69 |     }
 70 |     second_stage_post_processing {
 71 |       batch_non_max_suppression {
 72 |         score_threshold: 0.0
 73 |         iou_threshold: 0.6
 74 |         max_detections_per_class: 100
 75 |         max_total_detections: 300
 76 |       }
 77 |       score_converter: SOFTMAX
 78 |     }
 79 |     second_stage_localization_loss_weight: 2.0
 80 |     second_stage_classification_loss_weight: 1.0
 81 |   }
 82 | }
 83 | 
 84 | train_config: {
 85 |   batch_size: 1
 86 |   optimizer {
 87 |     momentum_optimizer: {
 88 |       learning_rate: {
 89 |         manual_step_learning_rate {
 90 |           initial_learning_rate: 0.0003
 91 |           schedule {
 92 |             step: 900000
 93 |             learning_rate: .00003
 94 |           }
 95 |           schedule {
 96 |             step: 1200000
 97 |             learning_rate: .000003
 98 |           }
 99 |         }
100 |       }
101 |       momentum_optimizer_value: 0.9
102 |     }
103 |     use_moving_average: false
104 |   }
105 |   gradient_clipping_by_norm: 10.0
106 |   fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
107 |   from_detection_checkpoint: true
108 |   data_augmentation_options {
109 |     random_horizontal_flip {
110 |     }
111 |   }
112 | }
113 | 
114 | train_input_reader: {
115 |   tf_record_input_reader {
116 |     input_path: "PATH_TO_BE_CONFIGURED/mscoco_train.record-?????-of-00100"
117 |   }
118 |   label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
119 | }
120 | 
121 | eval_config: {
122 |   num_examples: 8000
123 |   # Note: The below line limits the evaluation process to 10 evaluations.
124 |   # Remove the below line to evaluate indefinitely.
125 |   max_evals: 10
126 | }
127 | 
128 | eval_input_reader: {
129 |   tf_record_input_reader {
130 |     input_path: "PATH_TO_BE_CONFIGURED/mscoco_val.record-?????-of-00010"
131 |   }
132 |   label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
133 |   shuffle: false
134 |   num_readers: 1
135 | }
136 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Using Cloud Annotations to train models from TensorFlow's Object Detection model zoo
  2 | 
  3 | clone the following repos:
  4 | ```
  5 | git clone https://github.com/cloud-annotations/custom-training.git
  6 | ```
  7 | ```
  8 | git clone https://github.com/tensorflow/models.git
  9 | ```
 10 | 
 11 | There are 3 things we need to setup:
 12 | - The TensorFlow Object Detection package
 13 | - The pipeline
 14 | - The training script (which will handle:)
 15 |   - The training data
 16 |   - The pretrained model
 17 | 
 18 | ## Setting up the TensorFlow Object Detection package
 19 | move into the research directory:
 20 | ```
 21 | cd models/research/
 22 | ```
 23 | 
 24 | compile the protobufs:
 25 | ```
 26 | protoc object_detection/protos/*.proto --python_out=.
 27 | ```
 28 | 
 29 | > **Note:** You will need to have `protoc` installed
 30 | > **macOS + Homebrew** If you have Homebrew installed, run: `brew install protobuf`
 31 | > **Windows / Linux / macOS** The simplest way to install the protocol compiler is to download a pre-built binary from the [protobuf release page](https://github.com/protocolbuffers/protobuf/releases)
 32 | > You can find pre-built binaries in zip packages: `protoc-{version}-{platform}.zip`
 33 | 
 34 | Set up the packages:
 35 | ```
 36 | python setup.py sdist
 37 | (cd slim && python setup.py sdist)
 38 | ```
 39 | This will create two python packages, `dist/object_detection-0.1.tar.gz` and `slim/dist/slim-0.1.tar.gz`, copy them to the `custom-training/trainer` directory.
 40 | 
 41 | The trainer folder should look like this:
 42 | ```
 43 | + trainer/
 44 |   - download_checkpoint.py
 45 |   - faster_rcnn_resnet101_coco.config
 46 |   - generate_label_map.py
 47 |   - generate_tf_record.py
 48 |   - object_detection-0.1.tar.gz
 49 |   - override_pipeline.py
 50 |   - prepare_training.py
 51 |   - requirements.txt
 52 |   - slim-0.1.tar.gz
 53 |   - start.sh
 54 | ```
 55 | 
 56 | ## Choosing a model type
 57 | Before moving forward we need to decide on a model type. You can find all available model types in the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md).
 58 | 
 59 | There 2 main base:
 60 | - `ssd` used by default by the Cloud Annotations tool, is great for devices that don't have a lot of power. You can detect objects very fast on devices like phones and raspberry pis.
 61 | - `faster rcnn` is good for high accuracy predictions, but runs much slower. 
 62 | 
 63 | In the model zoo you can find a chart with comparison of the speed and accuracy of each type.
 64 | The time is in milliseconds, but it's important to note that it is the speed of the model on a `Nvidia GeForce GTX TITAN X` gpu.
 65 | The accuracy is measured in `mAP` the higher the better.
 66 | 
 67 | The default model that cacli trains is the `ssd mobilenet v1` model which has the following metrics:
 68 | 
 69 | | Speed (ms) | mAP |
 70 | | :--------: | :-: |
 71 | |     30     |  21 |
 72 | 
 73 | > **Note:** As a frame of reference, I get about 15fps on my MacBook Pro, ~66ms.
 74 | 
 75 | In this walkthrough I'll be using the `faster r-cnn resnet101` model which has the following metrics:
 76 | 
 77 | | Speed (ms) | mAP |
 78 | | :--------: | :-: |
 79 | |    106     |  32 |
 80 | 
 81 | > **Note:** I'm guessing ~233ms on a Mac or 4fps.
 82 | 
 83 | Feel free to use any other model type, but make sure that the output is `Boxes` **NOT** `Masks`.
 84 | 
 85 | ## Setting up the pipeline
 86 | Once we have decided on a model structure, we can find one of the pipeline configs provided by TensorFlow that corresponds to our model type from [here](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs).
 87 | Since we are using `faster r-cnn resnet101` we can download [`faster_rcnn_resnet101_coco.config`](https://github.com/tensorflow/models/raw/master/research/object_detection/samples/configs/faster_rcnn_resnet101_coco.config).
 88 | The pipeline config tells the TensorFlow Object Detection API what type of model we want to train, how to train it and with what data. We should be fine with the majority of the defaults, but feel free to tinker with any of the model/training params. You can get more info on the format of the config files [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/configuring_jobs.md).
 89 | 
 90 | > **Note:** There are a few things that will be dynamically changed with a script we write so that the config will always work with our data. These include: `num_classes`, `fine_tune_checkpoint`, `label_map_path` and `input_path`.
 91 | 
 92 | ## Setting up the training script
 93 | When we start a training run our object storage bucket gets mounted to the training service. This gives us access to all of our images and an `_annotations.json` file with all of our bounding box annotations. We can access this data via an environment variable named `DATA_DIR`.
 94 | When the training run begins it looks for and runs a file named `start.sh`. This is where we can prepare our data and then run the training command.
 95 | 
 96 | ### Preparing the training data
 97 | The TensorFlow Object Detection API expects our data to be in the format of TFRecord so we will need to write and run a conversion script.
 98 | 
 99 | The format of the `_annotations.json` looks something like this:
100 | ```
101 | {
102 |   "version": "1.0",
103 |   "type": "localization",
104 |   "labels": ["Cat", "Dog"],
105 |   "annotations": {
106 |     "image1.jpg": [
107 |       {
108 |         "x": 0.7255949630314233,
109 |         "x2": 0.9695875693160814,
110 |         "y": 0.5820120073891626,
111 |         "y2": 1,
112 |         "label": "Cat"
113 |       },
114 |       {
115 |         "x": 0.8845598428835489,
116 |         "x2": 1,
117 |         "y": 0.1829972290640394,
118 |         "y2": 0.966248460591133,
119 |         "label": "Dog"
120 |       }
121 |     ]
122 |   }
123 | }
124 | ```
125 | 
126 | Along with the TFRecord we also need a label map protobuf. The label map is what maps an integer id to a text label name. The ids are indexed by 1, meaning the first label will have an id of 1 not 0.
127 | This is an example of what a label map for our `_annotations.json` example would look like:
128 | ```
129 | item {
130 |   id: 1
131 |   name: 'Cat'
132 | }
133 | 
134 | item {
135 |   id: 2
136 |   name: 'Dog'
137 | }
138 | ```
139 | 
140 | The TFRecord format is a collection of serialized feature dicts, each looking something like this:
141 | ```
142 | {
143 |   'image/height': 1800,
144 |   'image/width': 2400,
145 |   'image/filename': 'image1.jpg',
146 |   'image/source_id': 'image1.jpg',
147 |   'image/encoded': ACTUAL_ENCODED_IMAGE_DATA_AS_BYTES,
148 |   'image/format': 'jpeg',
149 |   'image/object/bbox/xmin': [0.7255949630314233, 0.8845598428835489],
150 |   'image/object/bbox/xmax': [0.9695875693160814, 1.0000000000000000],
151 |   'image/object/bbox/ymin': [0.5820120073891626, 0.1829972290640394],
152 |   'image/object/bbox/ymax': [1.0000000000000000, 0.9662484605911330],
153 |   'image/object/class/text': (['Cat', 'Dog']),
154 |   'image/object/class/label': ([1, 2])
155 | }
156 | ```
157 | 
158 | We can access our annotations with the following code:
159 | ```python
160 | # Open _annotations.json, os.environ['DATA_DIR'] is the directory where all of 
161 | # our bucket data is stored.
162 | with open(os.path.join(os.environ['DATA_DIR'], '_annotations.json')) as f:
163 |   annotations = json.load(f)['annotations']
164 | 
165 | # Loop through each image and through each image's annotations and collect all
166 | # the labels into a set. We could also just use labels array, but this could
167 | # include labels that aren't used in the dataset.
168 | labels = list({a['label'] for image in annotations.values() for a in image})
169 | ```
170 | > You can find this code in `prepare_training.py`.
171 | 
172 | Once we have our annotations, we can generate a label map!
173 | ```python
174 | # Create a file named label_map.pbtxt
175 | with open('label_map.pbtxt', 'w') as file:
176 |   # Loop through all of the labels and write each label to the file with an id. 
177 |   for idx, label in enumerate(labels):
178 |     file.write('item {\n')
179 |     file.write('\tname: \'{}\'\n'.format(label))
180 |     file.write('\tid: {}\n'.format(idx + 1)) # indexes must start at 1.
181 |     file.write('}\n')
182 | ```
183 | > You can find this code in `generate_label_map.py`.
184 | 
185 | Now that we have our label map, we can build our TFRecord.
186 | ```python
187 | # Create a train.record TFRecord file.
188 | with tf.python_io.TFRecordWriter('train.record') as writer:
189 |   # Load the label map we created.
190 |   label_map_dict = label_map_util.get_label_map_dict('label_map.pbtxt')
191 |   # Get a list of all images in our dataset.
192 |   image_names = [image for image in annotations.keys()]
193 | 
194 |   # Loop through all the training examples.
195 |   for idx, image_name in enumerate(image_names):
196 |     # Make sure the image is actually a file
197 |     img_path = os.path.join(os.environ['DATA_DIR'], image_name)    
198 |     if not os.path.isfile(img_path):
199 |       continue
200 | 
201 |     # Read in the image.
202 |     with tf.gfile.GFile(img_path, 'rb') as fid:
203 |       encoded_jpg = fid.read()
204 | 
205 |     # Open the image with PIL so we can check that it's a jpeg and get the image
206 |     # dimensions.
207 |     encoded_jpg_io = io.BytesIO(encoded_jpg)
208 |     image = PIL.Image.open(encoded_jpg_io)
209 |     if image.format != 'JPEG':
210 |       raise ValueError('Image format not JPEG')
211 | 
212 |     width, height = image.size
213 | 
214 |     # Initialize all the arrays.
215 |     xmins = []
216 |     xmaxs = []
217 |     ymins = []
218 |     ymaxs = []
219 |     classes_text = []
220 |     classes = []
221 | 
222 |     # The class text is the label name and the class is the id. If there are 3
223 |     # cats in the image and 1 dog, it may look something like this:
224 |     # classes_text = ['Cat', 'Cat', 'Dog', 'Cat']
225 |     # classes      = [  1  ,   1  ,   2  ,   1  ]
226 | 
227 |     # For each image, loop through all the annotations and append their values.
228 |     for annotation in annotations[image_name]:
229 |       xmins.append(annotation['x'])
230 |       xmaxs.append(annotation['x2'])
231 |       ymins.append(annotation['y'])
232 |       ymaxs.append(annotation['y2'])
233 |       label = annotation['label']
234 |       classes_text.append(label.encode('utf8'))
235 |       classes.append(label_map_dict[label])
236 |     
237 |     # Create the TFExample.
238 |     try:
239 |       tf_example = tf.train.Example(features=tf.train.Features(feature={
240 |         'image/height': dataset_util.int64_feature(height),
241 |         'image/width': dataset_util.int64_feature(width),
242 |         'image/filename': dataset_util.bytes_feature(image_name.encode('utf8')),
243 |         'image/source_id': dataset_util.bytes_feature(image_name.encode('utf8')),
244 |         'image/encoded': dataset_util.bytes_feature(encoded_jpg),
245 |         'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
246 |         'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
247 |         'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
248 |         'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
249 |         'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
250 |         'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
251 |         'image/object/class/label': dataset_util.int64_list_feature(classes),
252 |       }))
253 |       if tf_example:
254 |         # Write the TFExample to the TFRecord.
255 |         writer.write(tf_example.SerializeToString())
256 |     except ValueError:
257 |       print('Invalid example, ignoring.')
258 | ```
259 | > You can find this code in `generate_tf_record.py`.
260 | 
261 | > **Note:** There are a few extra things that we can do here, like shuffling the data and splitting it into training and validation sets.
262 | > We can also shard the TFRecord if we have a few thousand images. To learn more check out the docs [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md).
263 | 
264 | ### Downloading a pretrained model checkpoint
265 | Training a model from scratch can take days and tons of data. We can mitigate this by using a pretrained model checkpoint.
266 | Instead of starting from nothing, we can add to what was already learned with our own data.
267 | 
268 | We can get a download a checkpoint from the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). 
269 | 
270 | We can download the checkpoint to our training run with the following code:
271 | ```python
272 | download_base = 'http://download.tensorflow.org/models/object_detection/'
273 | model_file = 'faster_rcnn_resnet101_coco_2018_01_28.tar.gz'
274 | 
275 | # Download the checkpoint
276 | opener = urllib.request.URLopener()
277 | opener.retrieve(download_base + model_file, model_file)
278 | 
279 | # Extract all the `model.ckpt` files.
280 | with tarfile.open(model_file) as tar:
281 |   for member in tar.getmembers():
282 |     member.name = os.path.basename(member.name)
283 |     if 'model.ckpt' in member.name:
284 |       tar.extract(member, path='checkpoint')
285 | ```
286 | > You can find this code in `download_checkpoint.py`.
287 | 
288 | > **Note:** This script is downloading the `faster r-cnn resnet101` model, make sure you download the model type you are training.
289 | 
290 | ### Injecting the pipeline with proper values
291 | The final thing we need to do is inject our pipline with the amount of labels we have and where to find the label map, TFRecord and model checkpoint.
292 | ```python
293 | pipeline = 'faster_rcnn_resnet101_coco.config'
294 | 
295 | override_dict = {
296 |   'train_input_path': 'train.record',
297 |   'train_config.fine_tune_checkpoint': 'checkpoint/model.ckpt',
298 |   'label_map_path': 'label_map.pbtxt'
299 | }
300 | 
301 | configs = config_util.get_configs_from_pipeline_file(pipeline)
302 | meta_arch = configs["model"].WhichOneof("model")
303 | override_dict['model.{}.num_classes'.format(meta_arch)] = len(labels)
304 | configs = config_util.merge_external_params_with_configs(configs, kwargs_dict=override_dict)
305 | pipeline_config = config_util.create_pipeline_proto_from_configs(configs)
306 | config_util.save_pipeline_config(pipeline_config, '')
307 | ```
308 | > You can find this code in `override_pipeline.py`.
309 | 
310 | ## Final checklist
311 | All the code in the trainer should work as-is.
312 | 
313 | The only things you **MUST** do:
314 | - add the `object_detection-0.1.tar.gz` file to `trainer`
315 | - add the `slim-0.1.tar.gz` file to `trainer`
316 | 
317 | (Optional) choose different model:
318 | - download alternative pipeline config
319 | - modify `MODEL_CHECKPOINT` in `prepare_training.py`
320 | - modify `MODEL_CONFIG` in `prepare_training.py`
321 | 
322 | ## Training the model
323 | When you're ready to train all you need to do is zip the `trainer` directory and run:
324 | ```
325 | cacli train --script=trainer.zip
326 | ```
327 | 


--------------------------------------------------------------------------------