├── image_segmentation
    ├── utils
    │   ├── __init__.py
    │   ├── tfrecord2idx
    │   ├── deeplab_model.py
    │   ├── compare_models.py
    │   ├── tfrecord_helpers.py
    │   └── model_helpers.py
    ├── image_segmentation
    │   ├── __init__.py
    │   ├── dali_config.py
    │   ├── utils.py
    │   ├── build_data.py
    │   ├── dali_pipeline.py
    │   ├── data_generator.py
    │   ├── train.py
    │   └── icnet.py
    ├── config.yaml
    ├── .gitattributes
    ├── examples
    │   ├── living_room.jpg
    │   ├── example_image_and_mask.png
    │   ├── example_pixel_probabilities.png
    │   ├── icnet_768x768_living_room.h5
    │   ├── icnet_768x768_living_room.zip
    │   └── icnet_768x768_living_room.mlmodel
    ├── nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl
    ├── requirements.txt
    ├── LICENSE
    ├── convert_to_coreml.py
    ├── setup.py
    ├── Makefile
    ├── coco_object_info.txt
    ├── objectInfo150.txt
    ├── README.md
    └── create_tfrecord_dataset.py
├── style_transfer
    ├── style_transfer
    │   ├── __init__.py
    │   ├── dataset_builder.py
    │   ├── utils.py
    │   ├── layers.py
    │   ├── layer_converters.py
    │   ├── train.py
    │   ├── models.py
    │   └── fritz_coreml_converter.py
    ├── example
    │   ├── dog.jpg
    │   ├── starry_night.jpg
    │   ├── stylized_dog.jpg
    │   ├── starry_night_results.jpg
    │   ├── starry_night.h5
    │   ├── starry_night_256x256_025.h5
    │   ├── starry_night_640x480_025.mlmodel
    │   ├── starry_night_256x256_small_a03.h5
    │   ├── starry_night_640x480_025_optimized.pb
    │   └── starry_night_640x480_small_a03_q8.mlmodel
    ├── setup.py
    ├── stylize_image.py
    ├── requirements.txt
    ├── convert_to_coreml.py
    ├── convert_to_tfmobile.py
    ├── create_training_dataset.py
    └── README.md
├── resources
    ├── README.md
    └── AI_Landscape.md
├── create_ml_playgrounds
    ├── pneumonia_detector
    │   ├── Pneumonia.playground
    │   │   ├── Contents.swift
    │   │   └── contents.xcplayground
    │   ├── Pneumonia.mlmodel
    │   └── README.md
    ├── subreddit_suggester
    │   ├── data.json
    │   ├── SubredditSuggester.mlmodel
    │   └── SubredditSuggester.playground
    │   │   ├── contents.xcplayground
    │   │   └── Contents.swift
    └── README.md
├── .gitattributes
├── LICENSE
├── README.md
└── .gitignore


/image_segmentation/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/image_segmentation/config.yaml:
--------------------------------------------------------------------------------
1 | trainingInput:
2 |   scaleTier: BASIC_GPU
3 | 


--------------------------------------------------------------------------------
/style_transfer/example/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/dog.jpg


--------------------------------------------------------------------------------
/image_segmentation/.gitattributes:
--------------------------------------------------------------------------------
1 | nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/starry_night.jpg


--------------------------------------------------------------------------------
/style_transfer/example/stylized_dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/stylized_dog.jpg


--------------------------------------------------------------------------------
/image_segmentation/examples/living_room.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/living_room.jpg


--------------------------------------------------------------------------------
/style_transfer/example/starry_night_results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/starry_night_results.jpg


--------------------------------------------------------------------------------
/resources/README.md:
--------------------------------------------------------------------------------
1 | # Resources
2 | 
3 | Additional, non-code resources for machine learning / AI.
4 | 
5 | * [AI Startup Landscape](./AI_Landscape.md)
6 | 


--------------------------------------------------------------------------------
/image_segmentation/examples/example_image_and_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/example_image_and_mask.png


--------------------------------------------------------------------------------
/image_segmentation/examples/example_pixel_probabilities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/example_pixel_probabilities.png


--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/Pneumonia.playground/Contents.swift:
--------------------------------------------------------------------------------
1 | import CreateMLUI
2 | 
3 | 
4 | let builder = MLImageClassifierBuilder()
5 | builder.showInLiveView()
6 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9c522b30d709051fe24c06dd4ac27f0bd58101ce68e06fc79e1454d0424678cb
3 | size 569464
4 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night_256x256_025.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb03d6faa1e226da19c82eb6d250d84db12a166d06a8332cfe0a7989b36bcce8
3 | size 569496
4 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/data.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:be520cc19d10060552788eb6462640f631be0a845eb8f88048b14e47658d82eb
3 | size 3345068
4 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night_640x480_025.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4b6abac67c45d5385fc17b46f32e1cc5ed1f9107c053344ed9a9757c47aba738
3 | size 438131
4 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/Pneumonia.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:98fecaa9cd499fec169718e2b1156c048393f8b436891df4f9e305f5846c7238
3 | size 16980
4 | 


--------------------------------------------------------------------------------
/image_segmentation/examples/icnet_768x768_living_room.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a4d544785680f713f0091b83b938c22a56e1c8f981d1321adf4355097f9fac4c
3 | size 81300248
4 | 


--------------------------------------------------------------------------------
/image_segmentation/examples/icnet_768x768_living_room.zip:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:11a3dee73201e2dbeaae9112b8adb2281485d5d79ec1611dd87f3c0b74ed1eae
3 | size 98526975
4 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night_256x256_small_a03.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d3c21d53dda54dd6df0abc6d3b0c7637bc64c7fb9997b3fa29c97fbdc1bd61e1
3 | size 153272
4 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night_640x480_025_optimized.pb:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6262e219e71a9ebb201a13400d817b37625bd7231aedd20e7c22abc45c5d7506
3 | size 478672
4 | 


--------------------------------------------------------------------------------
/image_segmentation/examples/icnet_768x768_living_room.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:3f3009bb2bb3b056b707527b4f54aaa8f1df28a46da7e03e1922379f540bf15e
3 | size 26938492
4 | 


--------------------------------------------------------------------------------
/style_transfer/example/starry_night_640x480_small_a03_q8.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bc44cfa2aa8c056ff5fac3c83dd80c57731840ed5e68b6272cda34b6490fbfa4
3 | size 16876
4 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/SubredditSuggester.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7d409517ad9098ba80a33601df3d542a84edc9bb9f1d0494ff8862a96387cb6c
3 | size 1100617
4 | 


--------------------------------------------------------------------------------
/image_segmentation/nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:5d9a9865809b1a6f91a1c6033d6ba881a23d29e424f1bdb2b19e3b01177977d4
3 | size 17489870
4 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/README.md:
--------------------------------------------------------------------------------
1 | # Detecting Pneumonia in an iOS App with Create ML
2 | 
3 | Swift playground which is used to train image classifier for this [blog post](https://heartbeat.fritz.ai/detecting-pneumonia-in-an-ios-app-with-create-ml-5cff2a60a3d).
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.h5 filter=lfs diff=lfs merge=lfs -text
2 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
3 | *.pb filter=lfs diff=lfs merge=lfs -text
4 | *.zip filter=lfs diff=lfs merge=lfs -text
5 | *.whl filter=lfs diff=lfs merge=lfs -text
6 | *.json filter=lfs diff=lfs merge=lfs -text
7 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/Pneumonia.playground/contents.xcplayground:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2 | <playground version='5.0' target-platform='macos' executeOnSourceChanges='false'>
3 |     <timeline fileName='timeline.xctimeline'/>
4 | </playground>


--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/SubredditSuggester.playground/contents.xcplayground:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2 | <playground version='5.0' target-platform='macos' executeOnSourceChanges='false'>
3 |     <timeline fileName='timeline.xctimeline'/>
4 | </playground>


--------------------------------------------------------------------------------
/image_segmentation/requirements.txt:
--------------------------------------------------------------------------------
 1 | Keras==2.2.4
 2 | h5py==2.7.1
 3 | numpy==1.14.3
 4 | matplotlib==2.2.2
 5 | scikit-image==0.13.1
 6 | Pillow==5.1.0
 7 | six==1.10.0
 8 | # Forked coremltools which includes fix for bilinear upsampling.
 9 | # Update this after it has been merged into master.
10 | -e git+git@github.com:ghop02/coremltools.git@289-add-keras-bilinear-upsampling#egg=coremltools
11 | 


--------------------------------------------------------------------------------
/style_transfer/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup script for style_transfer."""
 2 | 
 3 | from setuptools import find_packages
 4 | from setuptools import setup
 5 | 
 6 | 
 7 | REQUIRED_PACKAGES = ['h5py', 'keras==2.1.2', 'Pillow']
 8 | 
 9 | setup(
10 |     name='style_transfer',
11 |     version='1.0',
12 |     install_requires=REQUIRED_PACKAGES,
13 |     include_package_data=True,
14 |     packages=[p for p in find_packages() if p.startswith('style_transfer')],
15 |     description='Fritz Style Transfer Library',
16 | )
17 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/README.md:
--------------------------------------------------------------------------------
1 | # Create ML Playgrounds
2 | A collection of Swift playgrounds using Create ML to train Core ML models.
3 | 
4 | ## Playgrounds
5 | 
6 | * [Subreddit Suggester](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): Reduce the number of clicks required for submitting posts to Reddit by automatically suggesting a subreddit based on the post's title.
7 | *  [Pneumonia Detector](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): Classify X-ray images to detect pediatric pneumonia.
8 | 


--------------------------------------------------------------------------------
/image_segmentation/utils/tfrecord2idx:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import struct
 4 | 
 5 | if len(sys.argv) < 3:
 6 |     print("Usage: tfrecord2idx <tfrecord filename> <index filename>")
 7 |     exit()
 8 | 
 9 | f = open(sys.argv[1], 'rb')
10 | idx = open(sys.argv[2], 'w')
11 | 
12 | while True:
13 |     current = f.tell()
14 |     try:
15 |         # length
16 |         byte_len = f.read(8)
17 |         if byte_len == '':
18 |             break
19 |         # crc
20 |         f.read(4)
21 |         proto_len = struct.unpack('q', byte_len)[0]
22 |         # proto
23 |         f.read(proto_len)
24 |         # crc
25 |         f.read(4)
26 |         idx.write(str(current) + ' ' + str(f.tell() - current) + '\n')
27 |     except:
28 |         print("Not a valid TFRecord file")
29 |         break
30 | 
31 | f.close()
32 | idx.close()
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Fritz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/image_segmentation/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Fritz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/dali_config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logger = logging.getLogger(__name__)
 4 | 
 5 | 
 6 | def build_config(**updates):
 7 |     defaults = {
 8 |         'hue_min': -30,
 9 |         'hue_max': 30,
10 |         'zoom_scale': 1.3,
11 |         'rotate_angle_min': -45,
12 |         'rotate_angle_max': 45,
13 |         'crop_x_max': 0.2,
14 |         'crop_y_max': 0.2,
15 |         'contrast_min': 0.45,
16 |         'contrast_max': 1.5,
17 |         'saturation_min': 0.4,
18 |         'saturation_max': 2.0,
19 |         'brightness_min': 0.35,
20 |         'brightness_max': 1.5,
21 |     }
22 |     for key in updates:
23 |         if key not in defaults:
24 |             raise Exception("Augmentation Config %s not found." % key)
25 | 
26 |     defaults.update(**updates)
27 | 
28 |     return defaults
29 | 
30 | 
31 | class DaliConfig(object):
32 |     """Wrapper for Dali augmentation yaml config parameters. """
33 |     def __init__(self, **updates):
34 | 
35 |         self.__dict__ = build_config(**updates)
36 | 
37 |     def summarize(self):
38 |         logger.info('Dali Image Augmentation Parameters')
39 |         logger.info('==================================')
40 |         for key, value in self.__dict__.items():
41 |             logger.info('  %s: %s', key, value)
42 | 


--------------------------------------------------------------------------------
/image_segmentation/convert_to_coreml.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | import coremltools
 5 | import keras
 6 | 
 7 | from image_segmentation.icnet import ICNetModelFactory
 8 | 
 9 | 
10 | def convert(argv):
11 |     parser = argparse.ArgumentParser(
12 |         description='Convert a Keras ICNet model to Core ML'
13 |     )
14 |     parser.add_argument(
15 |         'keras_checkpoint', nargs='?', type=str,
16 |         help='a Keras model checkpoint to load and convert.'
17 |     )
18 |     parser.add_argument(
19 |         '--alpha', type=float, required=True,
20 |         help='The width paramter of the network.')
21 |     parser.add_argument(
22 |         'mlmodel_output', nargs='?', type=str,
23 |         help='a .mlmodel output file.'
24 |     )
25 | 
26 |     args = parser.parse_args(argv)
27 | 
28 |     original_keras_model = keras.models.load_model(args.keras_checkpoint)
29 |     img_size = original_keras_model.input_shape[1]
30 |     num_classes = original_keras_model.output_shape[0][-1]
31 | 
32 |     keras_model = ICNetModelFactory.build(
33 |         img_size,
34 |         num_classes,
35 |         alpha=args.alpha,
36 |         weights_path=args.keras_checkpoint,
37 |         train=False
38 |     )
39 | 
40 |     mlmodel = coremltools.converters.keras.convert(
41 |         keras_model,
42 |         input_names='image',
43 |         image_input_names='image',
44 |         image_scale=2.0 / 255.0,
45 |         red_bias=-1.0,
46 |         green_bias=-1.0,
47 |         blue_bias=-1.0,
48 |         output_names='output'
49 |     )
50 | 
51 |     mlmodel.save(args.mlmodel_output)
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     convert(sys.argv[1:])
56 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/dataset_builder.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | logger = logging.getLogger('trainer')
 6 | 
 7 | 
 8 | class DatasetBuilder(object):
 9 |     """Build a TFRecord dataset for training."""
10 | 
11 |     @staticmethod
12 |     def _resize_fn(images, image_size):
13 |         return tf.image.resize_images(
14 |             images,
15 |             image_size,
16 |             method=tf.image.ResizeMethod.BICUBIC
17 |         )
18 | 
19 |     @staticmethod
20 |     def _decode_example(example_proto):
21 |         features = {
22 |             "image/encoded": tf.FixedLenFeature(
23 |                 (), tf.string, default_value=""
24 |             )
25 |         }
26 |         parsed_features = tf.parse_single_example(example_proto, features)
27 |         image = tf.image.decode_jpeg(
28 |             parsed_features["image/encoded"],
29 |             channels=3)
30 |         return image
31 | 
32 |     @classmethod
33 |     def build(cls, filename, batch_size, image_size):
34 |         """Build a TensorFlow dataset from images.
35 | 
36 |         Args:
37 |             filename (str) - a filename of tfrecords to load
38 |             batch_size (int) - the batch size for the iterator
39 |             image_size ((int, int)) - resize all images to a single size
40 | 
41 |         Returns
42 |             dataset - a tfrecord dataset
43 |         """
44 |         logger.info('Creating dataset from: %s' % filename)
45 |         dataset = tf.data.TFRecordDataset(filename)
46 |         dataset = dataset.map(cls._decode_example)
47 |         dataset = dataset.map(lambda x: cls._resize_fn(x, image_size))
48 |         dataset = dataset.batch(batch_size)
49 |         dataset = dataset.repeat()  # Repeat forever
50 |         return dataset
51 | 


--------------------------------------------------------------------------------
/image_segmentation/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup script for image_segmentation."""
 2 | 
 3 | import logging
 4 | import subprocess
 5 | from setuptools import find_packages
 6 | from setuptools import setup
 7 | from setuptools.command.install import install
 8 | 
 9 | 
10 | REQUIRED_PACKAGES = [
11 |     'h5py',
12 |     'keras==2.2.4',
13 |     'Pillow',
14 |     'matplotlib',
15 |     'google-cloud-storage',
16 | ]
17 | 
18 | 
19 | class CustomCommands(install):
20 |     """A setuptools Command class able to run arbitrary commands."""
21 | 
22 |     def run_custom_command(self, command_list):
23 |         p = subprocess.Popen(
24 |             command_list,
25 |             stdin=subprocess.PIPE,
26 |             stdout=subprocess.PIPE,
27 |             stderr=subprocess.STDOUT)
28 |         # Can use communicate(input='y\n'.encode()) if the command run requires
29 |         # some confirmation.
30 |         stdout_data, _ = p.communicate()
31 |         logging.info('Log command output: %s', stdout_data)
32 |         if p.returncode != 0:
33 |             raise RuntimeError('Command %s failed: exit code: %s' %
34 |                                (command_list, p.returncode))
35 | 
36 |     def run(self):
37 |         self.run_custom_command(['apt-get', 'update'])
38 |         self.run_custom_command([
39 |             'apt-get', 'install', '-y', 'python-tk'
40 |         ])
41 |         install.run(self)
42 | 
43 | 
44 | setup(
45 |     name='image_segmentation',
46 |     version='1.0',
47 |     install_requires=REQUIRED_PACKAGES,
48 |     include_package_data=True,
49 |     packages=[
50 |         p for p in find_packages()
51 |         if p.startswith('image_segmentation') or p.startswith('utils')
52 |     ],
53 |     description='Fritz Style Image Segmentation Library',
54 |     cmdclass={
55 |         'install': CustomCommands,
56 |     }
57 | )
58 | 


--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/SubredditSuggester.playground/Contents.swift:
--------------------------------------------------------------------------------
 1 | import CreateMLUI
 2 | import CreateML
 3 | import Foundation
 4 | 
 5 | let dataFilename = "/Users/jltoole/fritz/fritz-createml-examples/subreddit_title_classifier/popular_data_top_year.json"
 6 | let data = try MLDataTable(contentsOf: URL(fileURLWithPath: dataFilename))
 7 | print(data.description)
 8 | 
 9 | let (trainingData, testingData) = data.randomSplit(by: 0.8, seed: 5)
10 | 
11 | let subredditClassifier = try MLTextClassifier(trainingData: trainingData,
12 |                                                textColumn: "text",
13 |                                                labelColumn: "label")
14 | 
15 | // Training accuracy as a percentage
16 | let trainingAccuracy = (1.0 - subredditClassifier.trainingMetrics.classificationError) * 100
17 | // Validation accuracy as a percentage
18 | let validationAccuracy = (1.0 - subredditClassifier.validationMetrics.classificationError) * 100
19 | print("Training Accuracy: \(trainingAccuracy), Validation Accuracy: \(validationAccuracy)")
20 | 
21 | let evaluationMetrics = subredditClassifier.evaluation(on: testingData)
22 | 
23 | // Evaluation accuracy as a percentage
24 | let evaluationAccuracy = (1.0 - evaluationMetrics.classificationError) * 100
25 | print("Evaluation Accuracy: \(evaluationAccuracy)")
26 | 
27 | let title = "Saw this good boy at the park today with TensorFlow."
28 | let predictedSubreddit = try subredditClassifier.prediction(from: title)
29 | print(predictedSubreddit)
30 | 
31 | let metadata = MLModelMetadata(author: "Jameson Toole",
32 |                                shortDescription: "Predict which subreddit a post should go in based on a title.",
33 |                                version: "1.0")
34 | 
35 | try subredditClassifier.write(to: URL(fileURLWithPath: "/Users/jltoole/fritz/fritz-createml-examples/subreddit_title_classifier/subredditClassifier.mlmodel"),
36 |                               metadata: metadata)
37 | 
38 | testingData.
39 | 


--------------------------------------------------------------------------------
/style_transfer/stylize_image.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import keras
 3 | import logging
 4 | import numpy
 5 | import PIL.Image
 6 | 
 7 | import keras_contrib
 8 | 
 9 | from style_transfer import layers
10 | from style_transfer import utils
11 | 
12 | logging.basicConfig(level=logging.INFO)
13 | logger = logging.getLogger('stylize_image')
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser(
18 |         description='Stylize an image using a trained model.'
19 |     )
20 | 
21 |     parser.add_argument(
22 |         '--input-image', type=str, required=True,
23 |         help='An image to stylize.'
24 |     )
25 |     parser.add_argument(
26 |         '--output-image', type=str, required=True,
27 |         help='An output file for the stylized image.'
28 |     )
29 |     parser.add_argument(
30 |         '--model-checkpoint', type=str, required=True,
31 |         help='Checkpoint from a trained Style Transfer Network.'
32 |     )
33 | 
34 |     args = parser.parse_args()
35 | 
36 |     logger.info('Loading model from %s' % args.model_checkpoint)
37 |     custom_objects = {
38 |         'InstanceNormalization':
39 |             keras_contrib.layers.normalization.InstanceNormalization,
40 |         'DeprocessStylizedImage': layers.DeprocessStylizedImage
41 |     }
42 |     transfer_net = keras.models.load_model(
43 |         args.model_checkpoint,
44 |         custom_objects=custom_objects
45 |     )
46 | 
47 |     image_size = transfer_net.input_shape[1:3]
48 | 
49 |     inputs = [transfer_net.input, keras.backend.learning_phase()]
50 |     outputs = [transfer_net.output]
51 | 
52 |     transfer_style = keras.backend.function(inputs, outputs)
53 | 
54 |     input_image = utils.load_image(
55 |         args.input_image,
56 |         image_size[0],
57 |         image_size[1],
58 |         expand_dims=True
59 |     )
60 |     output_image = transfer_style([input_image, 1])[0]
61 |     output_image = PIL.Image.fromarray(numpy.uint8(output_image[0]))
62 |     output_image.save(args.output_image)
63 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/utils.py:
--------------------------------------------------------------------------------
 1 | """Summary.
 2 | 
 3 | Attributes:
 4 |     logger (TYPE): Description
 5 | """
 6 | import io
 7 | import logging
 8 | import os
 9 | 
10 | import PIL.Image
11 | import numpy
12 | from tensorflow.python.lib.io import file_io
13 | 
14 | 
15 | logger = logging.getLogger('utils')
16 | 
17 | 
18 | def load_image(
19 |         filename,
20 |         height,
21 |         width,
22 |         expand_dims=False):
23 |     """Load an image and transform it to a specific size.
24 | 
25 |     Optionally, preprocess the image through the VGG preprocessor.
26 | 
27 |     Args:
28 |         filename (TYPE): Description
29 |         height (TYPE): Description
30 |         width (TYPE): Description
31 |         expand_dims (bool, optional): Description
32 |         filename - an image file to load
33 |         height - the height of the transformed image
34 |         width - the width of the transformed image
35 |         vgg_preprocess - if True, preprocess the image for a VGG network.
36 |         expand_dims - Add an addition dimension (B, H, W, C), useful for
37 |                       feeding models.
38 | 
39 |     Returns:
40 |         img - a numpy array representing the image.
41 |     """
42 |     img = file_io.read_file_to_string(filename, binary_mode=True)
43 |     img = PIL.Image.open(io.BytesIO(img))
44 |     img = img.resize((width, height), resample=PIL.Image.BILINEAR)
45 |     img = numpy.array(img)[:, :, :3]
46 | 
47 |     if expand_dims:
48 |         img = numpy.expand_dims(img, axis=0)
49 | 
50 |     return img
51 | 
52 | 
53 | def copy_file_from_gcs(file_path):
54 |     """Copy a file from gcs to local machine.
55 | 
56 |     Args:
57 |         file_path (str): a GCS url to download
58 | 
59 |     Returns:
60 |         str: a local path to the file
61 |     """
62 |     logger.info('Downloading %s' % file_path)
63 |     with file_io.FileIO(file_path, mode='rb') as input_f:
64 |         basename = os.path.basename(file_path)
65 |         with file_io.FileIO(basename, mode='w+') as output_f:
66 |             output_f.write(input_f.read())
67 |     return basename
68 | 


--------------------------------------------------------------------------------
/style_transfer/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.2.0
 2 | appnope==0.1.0
 3 | astor==0.6.2
 4 | awscli==1.14.64
 5 | backcall==0.1.0
 6 | bleach==1.5.0
 7 | botocore==1.9.17
 8 | certifi==2018.4.16
 9 | chardet==3.0.4
10 | colorama==0.3.7
11 | coremltools==0.8
12 | cycler==0.10.0
13 | Cython==0.28.2
14 | decorator==4.3.0
15 | docutils==0.14
16 | easydict==1.7
17 | entrypoints==0.2.3
18 | gast==0.2.0
19 | graphviz==0.8.3
20 | grpcio==1.11.0
21 | h5py==2.7.1
22 | html5lib==0.9999999
23 | idna==2.6
24 | imgaug==0.2.5
25 | ipykernel==4.8.2
26 | ipython==6.3.1
27 | ipython-genutils==0.2.0
28 | ipywidgets==7.2.1
29 | jedi==0.12.0
30 | Jinja2==2.10
31 | jmespath==0.9.3
32 | jsonschema==2.6.0
33 | jupyter==1.0.0
34 | jupyter-client==5.2.3
35 | jupyter-console==5.2.0
36 | jupyter-core==4.4.0
37 | Keras==2.1.6
38 | Keras-Applications==1.0.2
39 | keras-contrib==2.0.8
40 | Keras-Preprocessing==1.0.1
41 | kiwisolver==1.0.1
42 | lxml==4.2.3
43 | Markdown==2.6.11
44 | MarkupSafe==1.0
45 | matplotlib==2.2.2
46 | mistune==0.8.3
47 | mxnet==1.1.0.post0
48 | nbconvert==5.3.1
49 | nbformat==4.4.0
50 | networkx==2.1
51 | notebook>=5.7.2
52 | numpy==1.14.3
53 | opencv-contrib-python==3.4.0.12
54 | pandas==0.22.0
55 | pandocfilters==1.4.2
56 | parso==0.2.0
57 | pexpect==4.5.0
58 | pickleshare==0.7.4
59 | Pillow==5.1.0
60 | prettytable==0.7.2
61 | prometheus-client==0.3.0
62 | prompt-toolkit==1.0.15
63 | protobuf==3.5.2.post1
64 | ptyprocess==0.5.2
65 | pyasn1==0.4.2
66 | pycocotools==2.0.0
67 | pydot==1.2.4
68 | Pygments==2.2.0
69 | pyparsing==2.2.0
70 | python-dateutil==2.6.1
71 | pytz==2018.4
72 | PyWavelets==0.5.2
73 | PyYAML>=4.2b1
74 | pyzmq==17.0.0
75 | qtconsole==4.3.1
76 | requests==2.20.0
77 | rsa==3.4.2
78 | s3transfer==0.1.13
79 | scikit-image==0.13.1
80 | scipy==1.1.0
81 | seaborn==0.8.1
82 | Send2Trash==1.5.0
83 | simplegeneric==0.8.1
84 | six==1.10.0
85 | tensorboard==1.9.0
86 | tensorflow==1.9.0
87 | termcolor==1.1.0
88 | terminado==0.8.1
89 | testpath==0.3.1
90 | tfcoreml==0.2.0
91 | tornado==5.0.2
92 | traitlets==4.3.2
93 | turicreate==4.3.2
94 | urllib3>=1.23
95 | wcwidth==0.1.7
96 | webencodings==0.5.1
97 | Werkzeug==0.14.1
98 | widgetsnbextension==3.2.1
99 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/layers.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | 
 3 | 
 4 | class VGGNormalize(keras.layers.Layer):
 5 |     """A custom layer to normalize an image for input into a VGG model.
 6 | 
 7 |     This consists of swapping channel order and centering pixel values.
 8 | 
 9 |     Centering values come from:
10 |     https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py  # NOQA
11 |     """
12 | 
13 |     def __init__(self, **kwargs):
14 |         """Initialize the layer.
15 | 
16 |         Args:
17 |             **kwargs - arguments passed to the Keras layer base.
18 |         """
19 |         super(VGGNormalize, self).__init__(**kwargs)
20 |         # work around for a bug introduced in diffences between the tf.keras and keras APIs
21 |         self.outbound_nodes = self._outbound_nodes
22 | 
23 |     def build(self, input_shape):
24 |         """Build the layer."""
25 |         pass
26 | 
27 |     def call(self, x, reverse_channels=True):
28 |         """Apply the layer.
29 | 
30 |         Args:
31 |             x - an input tensor.
32 |             reverse_channels - if True, reverse the channel order
33 |         """
34 |         # Swap channel order: 'RGB'->'BGR'
35 |         if reverse_channels:
36 |             x = x[:, :, :, ::-1]
37 | 
38 |         # Center pixel values. Technically each channel should have its
39 |         # own center value, but the tensor computation is annoying so we'll
40 |         # just center them all with the same value.
41 |         x -= 120.0
42 | 
43 |         return x
44 | 
45 | 
46 | class DeprocessStylizedImage(keras.layers.Layer):
47 |     """A layer to deprocess style transfer layer output.
48 | 
49 |     The style transfer network outputs an image where pixel values are
50 |     between -1 and 1 due to a tanh activation. This layer converts that back
51 |     to normal values between 0 and 255.
52 |     """
53 | 
54 |     def __init__(self, **kwargs):
55 |         """Initialize the layer.
56 | 
57 |         Args:
58 |             **kwargs - arguments passed to the Keras layer base.
59 |         """
60 |         super(DeprocessStylizedImage, self).__init__(**kwargs)
61 | 
62 |     def build(self, input_shape):
63 |         """Build the layer."""
64 |         pass
65 | 
66 |     def call(self, x):
67 |         """Apply the layer."""
68 |         return (x + 1.0) * 127.5
69 | 


--------------------------------------------------------------------------------
/image_segmentation/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | download:
 3 | 	./download_and_convert_ade20k.sh
 4 | 
 5 | create-training-data:
 6 | 	mkdir -p data/${LABEL_SET}
 7 | 	python create_tfrecord_dataset.py \
 8 | 		-i data/ADEChallengeData2016/images/training/ \
 9 | 		-a data/ADEChallengeData2016/annotations/training/ \
10 | 		-o data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
11 | 		-l data/ADEChallengeData2016/objectInfo150.txt \
12 | 		-w "person, individual, someone, somebody, mortal, soul|house:building, edifice:house:skyscraper|sky|car, auto, automobile, machine, motorcar:bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle:truck, motortruck:van|bicycle, bike, wheel, cycle:minibike, motorbike" \
13 | 		-t 0.20
14 | 
15 | upload-data:
16 | 	gsutil cp data/${LABEL_SET}/* gs://${GCS_BUCKET}/data/${LABEL_SET}/
17 | 
18 | 
19 | train-local-refine:
20 | 	python -m image_segmentation.train \
21 | 	    -d data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
22 | 	    -l data/${LABEL_SET}/labels.txt \
23 | 	    -n 10000 \
24 | 	    -s 768 \
25 | 	    -a 1 \
26 | 	    --steps-per-epoch 100 \
27 | 	    --batch-size 5 \
28 | 	    --lr 0.0001 \
29 |             --fine-tune-checkpoint data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_rf.h5 \
30 | 	    -o data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_rf.h5 \
31 | 	    --refine
32 | 
33 | train-local:
34 | 	python -m image_segmentation.train \
35 | 	    --data data/combined2.tfrecord \
36 | 	    --use-dali \
37 | 	    -l data/${LABEL_SET}/labels.txt \
38 | 	    -n 500000 \
39 | 	    -s 768 \
40 | 	    -a 1 \
41 | 	    --batch-size 12 \
42 | 	    --steps-per-epoch 2500 \
43 | 	    --parallel-calls 4 \
44 | 	    --lr 0.0001 \
45 | 	    --fine-tune-checkpoint data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_fine.h5 \
46 | 	    --add-noise \
47 | 	    --model-name people_with_noise
48 | 
49 | 
50 | train-cloud:
51 | 	python setup.py sdist
52 | 	gcloud ml-engine jobs submit training `whoami`_image_segmentation_`date +%s` \
53 | 	    --runtime-version 1.9 \
54 | 	    --job-dir=gs://${GCS_BUCKET} \
55 | 	    --packages dist/image_segmentation-1.0.tar.gz,nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl \
56 | 	    --module-name image_segmentation.train \
57 | 	    --region us-central1 \
58 | 	    --config config.yaml \
59 | 	    -- \
60 | 	    -d gs://fritz-data-sandbox/ADEChallengeData2016/people/people_data.tfrecord \
61 | 	    -l gs://fritz-data-sandbox/ADEChallengeData2016/people/labels.txt \
62 | 	    --use-dali \
63 | 	    -n 5000 \
64 | 	    -s 768 \
65 | 	    -a 1 \
66 | 	    --batch-size 12 \
67 | 	    --steps-per-epoch 250 \
68 | 	    --parallel-calls 4 \
69 | 	    --lr 0.001 \
70 | 	    --add-noise \
71 | 	    --model-name ${MODEL_NAME} \
72 | 	    --gcs-bucket gs://${GCS_BUCKET}/train
73 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/layer_converters.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | """Functions to convert custom Keras layers to equalivent Core ML Layers.
 4 | 
 5 | Each of these functions must conform to the spec set by apple here:
 6 | https://github.com/apple/coremltools/blob/master/coremltools/converters/keras/_layers2.py
 7 | """
 8 | 
 9 | 
10 | def convert_instancenormalization(
11 |         builder,
12 |         layer,
13 |         input_names,
14 |         output_names,
15 |         keras_layer):
16 |     """
17 |     Convert InstanceNormalization layer from to coreml.
18 | 
19 |     This conforms to the Core ML layer spec.
20 | 
21 |     Parameters
22 |     ----------
23 |     keras_layer: layer
24 |         A keras layer object.
25 | 
26 |     builder: NeuralNetworkBuilder
27 |         A neural network builder object.
28 |     """
29 |     input_name, output_name = (input_names[0], output_names[0])
30 |     nb_channels = keras_layer.get_weights()[0].shape[0]
31 | 
32 |     # Set parameters
33 |     # Parameter arrangement in Keras: gamma, beta, mean, variance
34 |     idx = 0
35 |     gamma, beta = None, None
36 |     if keras_layer.scale:
37 |         gamma = keras_layer.get_weights()[idx]
38 |         idx += 1
39 |     if keras_layer.center:
40 |         beta = keras_layer.get_weights()[idx]
41 |         idx += 1
42 | 
43 |     epsilon = keras_layer.epsilon or 1e-5
44 | 
45 |     builder.add_batchnorm(
46 |         name=layer,
47 |         channels=nb_channels,
48 |         gamma=gamma,
49 |         beta=beta,
50 |         compute_mean_var=True,
51 |         instance_normalization=True,
52 |         input_name=input_name,
53 |         output_name=output_name,
54 |         epsilon=epsilon
55 |     )
56 | 
57 | 
58 | def convert_deprocessstylizedimage(
59 |         builder,
60 |         layer,
61 |         input_names,
62 |         output_names,
63 |         keras_layes):
64 |     """Convert the DeprocessStylizedImage layer type to Core ML.
65 | 
66 |     This simply takes the output of the tanh activation layer and scales
67 |     values to conform to typical image RGB values.
68 |     """
69 |     input_name, output_name = (input_names[0], output_names[0])
70 | 
71 |     # Apple's scale layer performs the following math
72 |     # y = w * x + b
73 |     # So to match the keras model's deprocessing layer y = (x + 1) * 127.5
74 |     # We can set the following matrices
75 |     scale = 127.5
76 |     w = numpy.array([scale, scale, scale])
77 |     b = numpy.array([scale, scale, scale])
78 | 
79 |     builder.add_scale(
80 |         name=input_name,
81 |         W=w,
82 |         b=b,
83 |         has_bias=True,
84 |         shape_scale=w.shape,
85 |         shape_bias=b.shape,
86 |         input_name=input_name,
87 |         output_name=output_name
88 |     )
89 | 


--------------------------------------------------------------------------------
/style_transfer/convert_to_coreml.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import keras_contrib
 3 | import logging
 4 | import sys
 5 | 
 6 | from style_transfer import layer_converters
 7 | from style_transfer import layers
 8 | from style_transfer import models
 9 | from style_transfer.fritz_coreml_converter import FritzCoremlConverter
10 | 
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger('convert_to_coreml')
13 | 
14 | 
15 | def main(argv):
16 | 
17 |     parser = argparse.ArgumentParser(
18 |         description='Stylize an image using a trained model.'
19 |     )
20 |     parser.add_argument(
21 |         '--keras-checkpoint', type=str, required=True,
22 |         help='Weights from a trained Style Transfer Network.'
23 |     )
24 |     parser.add_argument(
25 |         '--alpha', type=float, required=True,
26 |         help='The width multiplier of the network.'
27 |     )
28 |     parser.add_argument(
29 |         '--coreml-model', type=str, required=True,
30 |         help='A CoreML output file to save to'
31 |     )
32 |     parser.add_argument(
33 |         '--image-size', type=str, default='640,480',
34 |         help='The size of input and output of the final Core ML model: H,W'
35 |     )
36 |     parser.add_argument(
37 |         '--use-small-network', action='store_true',
38 |         help=('Use a very small network architecture that works in real time '
39 |               'on some mobile devices using only CPU')
40 |     )
41 | 
42 |     args = parser.parse_args(argv)
43 | 
44 |     image_size = [int(dim) for dim in args.image_size.split(',')]
45 |     # Map custom layers to their custom coreml converters
46 |     custom_layers = {
47 |         keras_contrib.layers.normalization.InstanceNormalization: layer_converters.convert_instancenormalization,  # NOQA
48 |         layers.DeprocessStylizedImage: layer_converters.convert_deprocessstylizedimage  # NOQA
49 |     }
50 | 
51 |     logger.info('Loading model weights from %s' % args.keras_checkpoint)
52 | 
53 |     if args.use_small_network:
54 |         model = models.SmallStyleTransferNetwork.build(
55 |             image_size,
56 |             alpha=args.alpha,
57 |             checkpoint_file=args.keras_checkpoint
58 |         )
59 |     else:
60 |         model = models.StyleTransferNetwork.build(
61 |             image_size,
62 |             alpha=args.alpha,
63 |             checkpoint_file=args.keras_checkpoint
64 |         )
65 | 
66 |     fritz_converter = FritzCoremlConverter()
67 |     mlmodel = fritz_converter.convert_keras(
68 |         model,
69 |         input_names=['image'],
70 |         image_input_names=['image'],
71 |         output_names=['stylizedImage'],
72 |         image_output_names=['stylizedImage'],
73 |         custom_layers=custom_layers
74 |     )
75 |     logger.info('Saving .mlmodel to %s' % args.coreml_model)
76 |     mlmodel.save(args.coreml_model)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main(sys.argv[1:])
81 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/utils.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as pyplot
 2 | import numpy
 3 | import skimage.transform
 4 | 
 5 | 
 6 | def plot_image_and_mask(img, mask, alpha=0.6, deprocess_func=None,
 7 |                         reference_mask=None,
 8 |                         show_original_image=True,
 9 |                         small=False):
10 |     """Plot an image and overlays a transparent segmentation mask.
11 | 
12 |     Args:
13 |         img (arr): the image data to plot
14 |         mask (arr): the segmentation mask
15 |         alpha (float, optional): the alpha value of the segmentation mask.
16 |         small: If true, output small figure
17 | 
18 |     Returns:
19 |         pyplot.plot: a plot
20 |     """
21 |     max_mask = numpy.argmax(mask, axis=-1)
22 | 
23 |     rows, columns = 1, 1
24 |     if show_original_image:
25 |         columns += 1
26 |     if reference_mask is not None:
27 |         columns += 1
28 | 
29 |     fig = pyplot.figure()
30 | 
31 |     if deprocess_func:
32 |         img = deprocess_func(img)
33 | 
34 |     # Add Results plot
35 |     column_index = 1
36 |     fig.add_subplot(rows, columns, column_index)
37 | 
38 |     pyplot.imshow(img.astype(int))
39 |     pyplot.imshow(
40 |         skimage.transform.resize(
41 |             max_mask,
42 |             img.shape[:2],
43 |             order=0),
44 |         alpha=alpha)
45 | 
46 |     if reference_mask is not None:
47 |         column_index += 1
48 |         fig.add_subplot(rows, columns, column_index)
49 |         pyplot.imshow(img.astype(int))
50 |         pyplot.imshow(
51 |             skimage.transform.resize(
52 |                 reference_mask[:, :, 0],
53 |                 img.shape[:2],
54 |                 order=0),
55 |             alpha=alpha)
56 | 
57 |     if show_original_image:
58 |         column_index += 1
59 |         fig.add_subplot(rows, columns, column_index)
60 |         pyplot.imshow(img.astype('uint8'))
61 | 
62 |     if small:
63 |         fig.set_size_inches(columns * 5, 5)
64 |     else:
65 |         fig.set_size_inches(columns * 10, 10)
66 | 
67 |     return fig
68 | 
69 | 
70 | def plot_pixel_probabilities(probabilities, class_labels, subplot=None):
71 |     """Plot probabilities that each pixel belows to a given class.
72 | 
73 |     This creates a subplot for each class and plots a heatmap of
74 |     probabilities that each pixel belongs to each class.
75 | 
76 |     Args:
77 |         probabilities (arr): an array of class probabilities for each pixel
78 |         class_labels (List[str]): the labels for each class
79 | 
80 |     Returns:
81 |         TYPE: Description
82 |     """
83 |     num_classes = probabilities.shape[-1]
84 |     total_items = num_classes + (1 if subplot else 0)
85 |     columns = 4
86 |     rows = numpy.ceil(total_items / 4)
87 |     fig = pyplot.figure(figsize=(12, rows * 4))
88 | 
89 |     if subplot:
90 |         fig.add_subplot(subplot)
91 | 
92 |     for cidx in range(num_classes):
93 |         ax = fig.add_subplot(rows, columns, cidx + 1)
94 |         ax.imshow(probabilities[:, :, cidx], vmin=0, vmax=1.0)
95 |         ax.set_title(class_labels[cidx])
96 |     fig.tight_layout()
97 |     return fig
98 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fritz Models
 2 | A collection of machine and deep learning models designed to run on mobile devices.
 3 | 
 4 | Models in this repository contain code and utility for training models as well as converting them to mobile-friendly formats like Core ML, TensorFlow Mobile, and TensorFlow Lite.
 5 | 
 6 | ## Update: 12/26/2018
 7 | For convenience, we've consolodated a few open source projects into a single repository. `fritz-style-transfer` has been renamed `fritz-models`. Have no fear, all of the code for style transfer lives in the `style_transfer` sub-directory.
 8 | 
 9 | ## Models
10 | 
11 | * [Style Transfer](https://github.com/fritzlabs/fritz-models/tree/master/style_transfer): Transform images into works of art by transfering the style of one image onto the content of another.
12 | * [Image Segmentation](https://github.com/fritzlabs/fritz-models/tree/master/image_segmentation): Semantic segmentation of images. Assign a value to each pixel of an image corresponding to the type of object it belongs to.
13 | * [Create ML Playgrounds](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): A series of playgrounds for training models with Apple's Create ML tool
14 | 
15 | Don't see the model you're looking for? Open an issue and let us know!
16 | 
17 | ## Add to your app
18 | To see live demonstrations of these models running on-device, the Heartbeat App is available in both the [App Store](https://itunes.apple.com/us/app/heartbeat-by-fritz/id1325206416?mt=8) ([source code](https://github.com/fritzlabs/heartbeat-ios)) and [Play Store](https://play.google.com/store/apps/details?id=ai.fritz.heartbeat) ([source code](https://github.com/fritzlabs/heartbeat-android)).
19 | 
20 | If you'd like to incorporate any of these models or versions you've trained into your own app, head over to [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models). SDKs are available for both iOS and Android.
21 | 
22 | ## Additional resources
23 | 
24 | Additional, [non-code resources](resources/README.md) for machine learning and AI.
25 | 
26 | * [AI and ML Landscape](resources/AI_Landscape.md): our curated list of helpful products and services for AI and machine learning.
27 | 
28 | ## Join the community
29 | [Heartbeat](https://heartbeat.fritz.ai/?utm_source=github&utm_campaign=fritz-models) is a community of developers interested in the intesection of mobile and machine learning. [Chat with us in Slack](https://join.slack.com/t/heartbeat-by-fritz/shared_invite/enQtMzY5OTM1MzgyODIzLTZhNTFjYmRiODU0NjZjNjJlOGRjYzI2OTIwY2M4YTBiNjM1ODU1ZmU3Y2Q2MmMzMmI2ZTIzZjQ1ZWI3NzBkZGU) and stay up to date on the latest mobile ML news with our [Newsletter](https://mobileml.us16.list-manage.com/subscribe?u=de53bead690affb8e9a21de8f&id=68acb5c0fd).
30 | 
31 | ## A note about large files
32 | Large files like model checkpoints, data, and archives of compiled code are managed via `git lfs`. You need to have Git LFS installed in order to download these files. Installation instructions are available [here](https://github.com/git-lfs/git-lfs#getting-started).
33 | 
34 | If you have Git LFS installed, large files will download automatically by default. This can take a while and require a good connection. To clone this repository without downloading the model checkpoints, you can run:
35 | 
36 | ```
37 | GIT_LFS_SKIP_SMUDGE=1 git clone ...
38 | ```
39 | 


--------------------------------------------------------------------------------
/image_segmentation/coco_object_info.txt:
--------------------------------------------------------------------------------
  1 | Idx	Ratio	Train	Val	Name
  2 | 1	0.0	0	0	person
  3 | 2	0.0	0	0	bicycle
  4 | 3	0.0	0	0	car
  5 | 4	0.0	0	0	motorcycle
  6 | 5	0.0	0	0	airplane
  7 | 6	0.0	0	0	bus
  8 | 7	0.0	0	0	train
  9 | 8	0.0	0	0	truck
 10 | 9	0.0	0	0	boat
 11 | 10	0.0	0	0	traffic light
 12 | 11	0.0	0	0	fire hydrant
 13 | 13	0.0	0	0	stop sign
 14 | 14	0.0	0	0	parking meter
 15 | 15	0.0	0	0	bench
 16 | 16	0.0	0	0	bird
 17 | 17	0.0	0	0	cat
 18 | 18	0.0	0	0	dog
 19 | 19	0.0	0	0	horse
 20 | 20	0.0	0	0	sheep
 21 | 21	0.0	0	0	cow
 22 | 22	0.0	0	0	elephant
 23 | 23	0.0	0	0	bear
 24 | 24	0.0	0	0	zebra
 25 | 25	0.0	0	0	giraffe
 26 | 27	0.0	0	0	backpack
 27 | 28	0.0	0	0	umbrella
 28 | 31	0.0	0	0	handbag
 29 | 32	0.0	0	0	tie
 30 | 33	0.0	0	0	suitcase
 31 | 34	0.0	0	0	frisbee
 32 | 35	0.0	0	0	skis
 33 | 36	0.0	0	0	snowboard
 34 | 37	0.0	0	0	sports ball
 35 | 38	0.0	0	0	kite
 36 | 39	0.0	0	0	baseball bat
 37 | 40	0.0	0	0	baseball glove
 38 | 41	0.0	0	0	skateboard
 39 | 42	0.0	0	0	surfboard
 40 | 43	0.0	0	0	tennis racket
 41 | 44	0.0	0	0	bottle
 42 | 46	0.0	0	0	wine glass
 43 | 47	0.0	0	0	cup
 44 | 48	0.0	0	0	fork
 45 | 49	0.0	0	0	knife
 46 | 50	0.0	0	0	spoon
 47 | 51	0.0	0	0	bowl
 48 | 52	0.0	0	0	banana
 49 | 53	0.0	0	0	apple
 50 | 54	0.0	0	0	sandwich
 51 | 55	0.0	0	0	orange
 52 | 56	0.0	0	0	broccoli
 53 | 57	0.0	0	0	carrot
 54 | 58	0.0	0	0	hot dog
 55 | 59	0.0	0	0	pizza
 56 | 60	0.0	0	0	donut
 57 | 61	0.0	0	0	cake
 58 | 62	0.0	0	0	chair
 59 | 63	0.0	0	0	couch
 60 | 64	0.0	0	0	potted plant
 61 | 65	0.0	0	0	bed
 62 | 67	0.0	0	0	dining table
 63 | 70	0.0	0	0	toilet
 64 | 72	0.0	0	0	tv
 65 | 73	0.0	0	0	laptop
 66 | 74	0.0	0	0	mouse
 67 | 75	0.0	0	0	remote
 68 | 76	0.0	0	0	keyboard
 69 | 77	0.0	0	0	cell phone
 70 | 78	0.0	0	0	microwave
 71 | 79	0.0	0	0	oven
 72 | 80	0.0	0	0	toaster
 73 | 81	0.0	0	0	sink
 74 | 82	0.0	0	0	refrigerator
 75 | 84	0.0	0	0	book
 76 | 85	0.0	0	0	clock
 77 | 86	0.0	0	0	vase
 78 | 87	0.0	0	0	scissors
 79 | 88	0.0	0	0	teddy bear
 80 | 89	0.0	0	0	hair drier
 81 | 90	0.0	0	0	toothbrush
 82 | 92	0.0	0	0	banner
 83 | 93	0.0	0	0	blanket
 84 | 95	0.0	0	0	bridge
 85 | 100	0.0	0	0	cardboard
 86 | 107	0.0	0	0	counter
 87 | 109	0.0	0	0	curtain
 88 | 112	0.0	0	0	door-stuff
 89 | 118	0.0	0	0	floor-wood
 90 | 119	0.0	0	0	flower
 91 | 122	0.0	0	0	fruit
 92 | 125	0.0	0	0	gravel
 93 | 128	0.0	0	0	house
 94 | 130	0.0	0	0	light
 95 | 133	0.0	0	0	mirror-stuff
 96 | 138	0.0	0	0	net
 97 | 141	0.0	0	0	pillow
 98 | 144	0.0	0	0	platform
 99 | 145	0.0	0	0	playingfield
100 | 147	0.0	0	0	railroad
101 | 148	0.0	0	0	river
102 | 149	0.0	0	0	road
103 | 151	0.0	0	0	roof
104 | 154	0.0	0	0	sand
105 | 155	0.0	0	0	sea
106 | 156	0.0	0	0	shelf
107 | 159	0.0	0	0	snow
108 | 161	0.0	0	0	stairs
109 | 166	0.0	0	0	tent
110 | 168	0.0	0	0	towel
111 | 171	0.0	0	0	wall-brick
112 | 175	0.0	0	0	wall-stone
113 | 176	0.0	0	0	wall-tile
114 | 177	0.0	0	0	wall-wood
115 | 178	0.0	0	0	water-other
116 | 180	0.0	0	0	window-blind
117 | 181	0.0	0	0	window-other
118 | 184	0.0	0	0	tree-merged
119 | 185	0.0	0	0	fence-merged
120 | 186	0.0	0	0	ceiling-merged
121 | 187	0.0	0	0	sky-other-merged
122 | 188	0.0	0	0	cabinet-merged
123 | 189	0.0	0	0	table-merged
124 | 190	0.0	0	0	floor-other-merged
125 | 191	0.0	0	0	pavement-merged
126 | 192	0.0	0	0	mountain-merged
127 | 193	0.0	0	0	grass-merged
128 | 194	0.0	0	0	dirt-merged
129 | 195	0.0	0	0	paper-merged
130 | 196	0.0	0	0	food-other-merged
131 | 197	0.0	0	0	building-other-merged
132 | 198	0.0	0	0	rock-merged
133 | 199	0.0	0	0	wall-other-merged
134 | 200	0.0	0	0	rug-merged
135 | 


--------------------------------------------------------------------------------
/image_segmentation/utils/deeplab_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tarfile
 3 | 
 4 | import numpy as np
 5 | from PIL import Image
 6 | from six.moves import urllib
 7 | import tempfile
 8 | import tensorflow as tf
 9 | 
10 | 
11 | MODEL_NAME = 'mobilenetv2_coco_voctrainaug'
12 | 
13 | _DOWNLOAD_URL_PREFIX = 'http://download.tensorflow.org/models/'
14 | _MODEL_URLS = {
15 |     'mobilenetv2_coco_voctrainaug':
16 |         'deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz',
17 |     'mobilenetv2_coco_voctrainval':
18 |         'deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz',
19 |     'xception_coco_voctrainaug':
20 |         'deeplabv3_pascal_train_aug_2018_01_04.tar.gz',
21 |     'xception_coco_voctrainval':
22 |         'deeplabv3_pascal_trainval_2018_01_04.tar.gz',
23 | }
24 | _TARBALL_NAME = 'deeplab_model.tar.gz'
25 | 
26 | 
27 | class DeepLabModel(object):
28 |     """Class to load deeplab model and run inference."""
29 | 
30 |     INPUT_TENSOR_NAME = 'ImageTensor:0'
31 |     OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
32 |     INPUT_SIZE = 513
33 |     FROZEN_GRAPH_NAME = 'frozen_inference_graph'
34 | 
35 |     def __init__(self, tarball_path):
36 |         """Creates and loads pretrained deeplab model."""
37 |         self.graph = tf.Graph()
38 | 
39 |         graph_def = None
40 |         # Extract frozen graph from tar archive.
41 |         tar_file = tarfile.open(tarball_path)
42 |         for tar_info in tar_file.getmembers():
43 |             if self.FROZEN_GRAPH_NAME in os.path.basename(tar_info.name):
44 |                 file_handle = tar_file.extractfile(tar_info)
45 |                 graph_def = tf.GraphDef.FromString(file_handle.read())
46 |                 break
47 | 
48 |         tar_file.close()
49 | 
50 |         if graph_def is None:
51 |             raise RuntimeError('Cannot find inference graph in tar archive.')
52 | 
53 |         with self.graph.as_default():
54 |             tf.import_graph_def(graph_def, name='')
55 | 
56 |         self.sess = tf.Session(graph=self.graph)
57 | 
58 |     def run(self, image):
59 |         """Runs inference on a single image.
60 | 
61 |         Args:
62 |           image: A PIL.Image object, raw input image.
63 | 
64 |         Returns:
65 |           resized_image: RGB image resized from original input image.
66 |           seg_map: Segmentation map of `resized_image`.
67 |         """
68 |         width, height = image.size
69 |         resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
70 |         target_size = (int(resize_ratio * width), int(resize_ratio * height))
71 |         resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)
72 |         batch_seg_map = self.sess.run(
73 |             self.OUTPUT_TENSOR_NAME,
74 |             feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
75 |         seg_map = batch_seg_map[0]
76 |         return resized_image, seg_map
77 | 
78 | 
79 | def download_deeplab_model(model_name):
80 |     model_dir = tempfile.mkdtemp()
81 |     tf.gfile.MakeDirs(model_dir)
82 | 
83 |     download_path = os.path.join(model_dir, _TARBALL_NAME)
84 |     print(download_path)
85 |     print('downloading model, this might take a while...')
86 | 
87 |     urllib.request.urlretrieve(
88 |         _DOWNLOAD_URL_PREFIX + _MODEL_URLS[MODEL_NAME],
89 |         download_path
90 |     )
91 |     print('download completed! loading DeepLab model...')
92 | 
93 |     model = DeepLabModel(download_path)
94 |     print('model loaded successfully!')
95 |     return model
96 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Any data should go in a data/ dir untracked by git
  2 | data/
  3 | 
  4 | # Local development
  5 | .DS_Store
  6 | .vscode/
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | env/
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # dotenv
 90 | .env
 91 | 
 92 | # virtualenv
 93 | .venv
 94 | venv/
 95 | ENV/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 
110 | # Swift things for Create ML
111 | # Xcode
112 | #
113 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
114 | 
115 | ## Build generated
116 | build/
117 | DerivedData/
118 | 
119 | ## Various settings
120 | *.pbxuser
121 | !default.pbxuser
122 | *.mode1v3
123 | !default.mode1v3
124 | *.mode2v3
125 | !default.mode2v3
126 | *.perspectivev3
127 | !default.perspectivev3
128 | xcuserdata/
129 | 
130 | ## Other
131 | *.moved-aside
132 | *.xccheckout
133 | *.xcscmblueprint
134 | 
135 | ## Obj-C/Swift specific
136 | *.hmap
137 | *.ipa
138 | *.dSYM.zip
139 | *.dSYM
140 | 
141 | ## Playgrounds
142 | timeline.xctimeline
143 | playground.xcworkspace
144 | 
145 | # Swift Package Manager
146 | #
147 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
148 | # Packages/
149 | # Package.pins
150 | # Package.resolved
151 | .build/
152 | 
153 | # CocoaPods
154 | #
155 | # We recommend against adding the Pods directory to your .gitignore. However
156 | # you should judge for yourself, the pros and cons are mentioned at:
157 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
158 | #
159 | # Pods/
160 | 
161 | # Carthage
162 | #
163 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
164 | # Carthage/Checkouts
165 | 
166 | Carthage/Build
167 | 
168 | # fastlane
169 | #
170 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
171 | # screenshots whenever they are needed.
172 | # For more information about the recommended setup visit:
173 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
174 | 
175 | fastlane/report.xml
176 | fastlane/Preview.html
177 | fastlane/screenshots/**/*.png
178 | fastlane/test_output


--------------------------------------------------------------------------------
/image_segmentation/utils/compare_models.py:
--------------------------------------------------------------------------------
  1 | from matplotlib import gridspec
  2 | from matplotlib import pyplot
  3 | import skimage.transform
  4 | import numpy
  5 | 
  6 | 
  7 | def create_pascal_label_colormap():
  8 |     """Creates a label colormap used in PASCAL VOC segmentation benchmark.
  9 | 
 10 |     Returns:
 11 |         A Colormap for visualizing segmentation results.
 12 |     """
 13 |     colormap = numpy.zeros((256, 3), dtype=int)
 14 |     ind = numpy.arange(256, dtype=int)
 15 | 
 16 |     for shift in reversed(range(8)):
 17 |         for channel in range(3):
 18 |             colormap[:, channel] |= ((ind >> channel) & 1) << shift
 19 |         ind >>= 3
 20 | 
 21 |     return colormap
 22 | 
 23 | 
 24 | def label_to_color_image(label):
 25 |     """Adds color defined by the dataset colormap to the label.
 26 | 
 27 |     Args:
 28 |       label: A 2D array with integer type, storing the segmentation label.
 29 | 
 30 |     Returns:
 31 |       result: A 2D array with floating type. The element of the array
 32 |         is the color indexed by the corresponding element in the inumpyut label
 33 |         to the PASCAL color map.
 34 | 
 35 |     Raises:
 36 |       ValueError: If label is not of rank 2 or its value is larger than color
 37 |         map maximum entry.
 38 |     """
 39 |     if label.ndim != 2:
 40 |         raise ValueError('Expect 2-D inumpyut label')
 41 | 
 42 |     colormap = create_pascal_label_colormap()
 43 | 
 44 |     if numpy.max(label) >= len(colormap):
 45 |         raise ValueError('label value too large.')
 46 | 
 47 |     return colormap[label]
 48 | 
 49 | 
 50 | LABEL_NAMES = numpy.asarray([
 51 |     'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
 52 |     'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
 53 |     'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
 54 | ])
 55 | 
 56 | FULL_LABEL_MAP = numpy.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
 57 | FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
 58 | 
 59 | 
 60 | def vis_segmentation(image, deeplab_seg_map, icnet_seg_map):
 61 |     """Visualizes inumpyut image, segmentation map and overlay view."""
 62 |     pyplot.figure(figsize=(15, 5))
 63 |     grid_spec = gridspec.GridSpec(1, 4, width_ratios=[4, 4, 4, 4])
 64 | 
 65 |     pyplot.subplot(grid_spec[0])
 66 |     pyplot.imshow(image)
 67 |     pyplot.axis('off')
 68 |     pyplot.title('Input Image')
 69 | 
 70 |     pyplot.subplot(grid_spec[1])
 71 |     seg_image = label_to_color_image(deeplab_seg_map).astype(numpy.uint8)
 72 |     pyplot.imshow(seg_image)
 73 |     pyplot.axis('off')
 74 |     pyplot.title('Deeplab v3 Segmentation')
 75 | 
 76 |     pyplot.subplot(grid_spec[2])
 77 |     # resize icnet mask
 78 |     icnet_seg_map = skimage.transform.resize(
 79 |         icnet_seg_map[0, :, :],
 80 |         deeplab_seg_map.shape,
 81 |         preserve_range=True,
 82 |         anti_aliasing=False,
 83 |         order=0).astype('int')
 84 |     seg_image = label_to_color_image(icnet_seg_map).astype(numpy.uint8)
 85 |     pyplot.imshow(seg_image)
 86 |     pyplot.axis('off')
 87 |     pyplot.title('Fritz Segmentation')
 88 | 
 89 |     pyplot.subplot(grid_spec[3])
 90 |     pyplot.imshow(image)
 91 |     pyplot.imshow(seg_image, alpha=0.7)
 92 |     pyplot.axis('off')
 93 |     pyplot.title('Fritz Segmentation Overlay')
 94 | 
 95 |     pyplot.grid('off')
 96 |     pyplot.show()
 97 | 
 98 | 
 99 | def multiple_vis(results):
100 | 
101 |     fig = pyplot.figure(figsize=(15, 3 * len(results)))
102 |     grid_spec = gridspec.GridSpec(len(results), 4, width_ratios=[4, 4, 4, 4])
103 | 
104 |     i = 0
105 |     for image, deeplab_seg_map, icnet_seg_map in results:
106 |         pyplot.subplot(grid_spec[i])
107 |         pyplot.imshow(image)
108 |         # pyplot.axis('off')
109 |         i += 1
110 | 
111 |         pyplot.subplot(grid_spec[i])
112 |         seg_image = label_to_color_image(deeplab_seg_map).astype(numpy.uint8)
113 |         pyplot.imshow(seg_image)
114 |         pyplot.axis('off')
115 |         pyplot.title('Deeplab v3 Segmentation')
116 | 
117 |         i += 1
118 |         pyplot.subplot(grid_spec[i])
119 |         # resize icnet mask
120 |         icnet_seg_map = skimage.transform.resize(
121 |             icnet_seg_map[0, :, :],
122 |             deeplab_seg_map.shape,
123 |             preserve_range=True,
124 |             anti_aliasing=False,
125 |             order=0).astype('int')
126 |         seg_image = label_to_color_image(icnet_seg_map).astype(numpy.uint8)
127 |         pyplot.imshow(seg_image)
128 |         pyplot.axis('off')
129 |         pyplot.title('Fritz Segmentation')
130 |         i += 1
131 | 
132 |         pyplot.subplot(grid_spec[i])
133 |         pyplot.imshow(image)
134 |         pyplot.imshow(seg_image, alpha=0.7)
135 |         pyplot.axis('off')
136 |         pyplot.title('Fritz Segmentation Overlay')
137 |         i += 1
138 | 
139 |     pyplot.grid('off')
140 | 
141 |     return fig
142 | 


--------------------------------------------------------------------------------
/style_transfer/convert_to_tfmobile.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | import sys
  5 | 
  6 | import keras
  7 | import tensorflow as tf
  8 | from tensorflow.python.platform import gfile
  9 | from tensorflow.python.tools import freeze_graph
 10 | from tensorflow.python.tools import optimize_for_inference_lib
 11 | from tensorflow.python.framework import dtypes
 12 | 
 13 | from style_transfer import models
 14 | 
 15 | logging.basicConfig(level=logging.INFO)
 16 | logger = logging.getLogger('stylize_image')
 17 | 
 18 | 
 19 | def _freeze_graph(model, basename, output_dir):
 20 |     name, _ = os.path.splitext(basename)
 21 | 
 22 |     saver = tf.train.Saver()
 23 | 
 24 |     with keras.backend.get_session() as sess:
 25 |         checkpoint_filename = os.path.join(output_dir, '%s.ckpt' % name)
 26 |         output_graph_filename = os.path.join(output_dir, '%s_frozen.pb' % name)
 27 |         saver.save(sess, checkpoint_filename)
 28 |         tf.train.write_graph(
 29 |             sess.graph_def, output_dir, '%s_graph_def.pbtext' % name
 30 |         )
 31 | 
 32 |         freeze_graph.freeze_graph(
 33 |             input_graph=os.path.join(output_dir, '%s_graph_def.pbtext' % name),
 34 |             input_saver='',
 35 |             input_binary=False,
 36 |             input_checkpoint=checkpoint_filename,
 37 |             output_graph=output_graph_filename,
 38 |             output_node_names='deprocess_stylized_image_1/mul',
 39 |             restore_op_name="save/restore_all",
 40 |             filename_tensor_name="save/Const:0",
 41 |             clear_devices=True,
 42 |             initializer_nodes=None
 43 |         )
 44 |         logger.info('Saved frozen graph to: %s' % output_graph_filename)
 45 | 
 46 | 
 47 | def load_graph_def(filename):
 48 |     input_graph_def = tf.GraphDef()
 49 |     with gfile.FastGFile(filename, 'rb') as file:
 50 |         data = file.read()
 51 |         input_graph_def.ParseFromString(data)
 52 |     return input_graph_def
 53 | 
 54 | 
 55 | def _optimize_graph(basename, output_dir):
 56 |     name, _ = os.path.splitext(basename)
 57 |     frozen_graph_filename = os.path.join(output_dir, '%s_frozen.pb' % name)
 58 |     graph_def = load_graph_def(frozen_graph_filename)
 59 | 
 60 |     optimized_graph = optimize_for_inference_lib.optimize_for_inference(
 61 |         input_graph_def=graph_def,
 62 |         input_node_names=['input_1'],
 63 |         placeholder_type_enum=dtypes.float32.as_datatype_enum,
 64 |         output_node_names=['deprocess_stylized_image_1/mul'],
 65 |         toco_compatible=True
 66 |     )
 67 | 
 68 |     optimized_graph_filename = os.path.basename(
 69 |         frozen_graph_filename).replace('frozen', 'optimized')
 70 |     optimized_graph_filename = optimized_graph_filename
 71 |     tf.train.write_graph(
 72 |         optimized_graph, output_dir, optimized_graph_filename, as_text=False
 73 |     )
 74 |     logger.info('Saved optimized graph to: %s' %
 75 |                 os.path.join(output_dir, optimized_graph_filename))
 76 | 
 77 | 
 78 | def main(argv):
 79 | 
 80 |     parser = argparse.ArgumentParser(
 81 |         description='Stylize an image using a trained model.'
 82 |     )
 83 |     parser.add_argument(
 84 |         '--keras-checkpoint', type=str, required=True,
 85 |         help='Weights from a trained Style Transfer Network.'
 86 |     )
 87 |     parser.add_argument(
 88 |         '--alpha', type=float, required=True,
 89 |         help='The width multiplier of the network.'
 90 |     )
 91 |     parser.add_argument(
 92 |         '--output-dir', type=str, required=True,
 93 |         help='A directory to save various tensorflow graphs to'
 94 |     )
 95 |     parser.add_argument(
 96 |         '--image-size', type=str, default='640,480',
 97 |         help='The size of input and output of the final Core ML model: H,W'
 98 |     )
 99 |     parser.add_argument(
100 |         '--use-small-network', action='store_true',
101 |         help=('Use a very small network architecture that works in real time '
102 |               'on some mobile devices using only CPU')
103 |     )
104 | 
105 |     args = parser.parse_args(argv)
106 | 
107 |     image_size = [int(dim) for dim in args.image_size.split(',')]
108 | 
109 |     logger.info('Loading model weights from %s' % args.keras_checkpoint)
110 | 
111 |     # Set some keras params before loading the model
112 |     keras.backend.clear_session()
113 |     keras.backend.set_learning_phase(0)
114 |     if args.use_small_network:
115 |         model = models.SmallStyleTransferNetwork.build(
116 |             image_size,
117 |             alpha=args.alpha,
118 |             checkpoint_file=args.keras_checkpoint
119 |         )
120 |     else:
121 |         model = models.StyleTransferNetwork.build(
122 |             image_size,
123 |             alpha=args.alpha,
124 |             checkpoint_file=args.keras_checkpoint
125 |         )
126 | 
127 |     basename = os.path.basename(args.keras_checkpoint)
128 |     # Freeze Graph
129 |     _freeze_graph(model, basename, args.output_dir)
130 |     # Optimize Graph
131 |     _optimize_graph(basename, args.output_dir)
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     main(sys.argv[1:])
136 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | 
  4 | from style_transfer import trainer
  5 | 
  6 | logging.basicConfig(level=logging.INFO)
  7 | logger = logging.getLogger('train_network')
  8 | 
  9 | # The default layers are those suggested by Johnson et al.
 10 | # The names map to those used in the VGG16 application included
 11 | # with Keras.
 12 | _DEFAULT_STYLE_LAYERS = [
 13 |     'block1_conv2', 'block2_conv2',
 14 |     'block3_conv3', 'block4_conv3'
 15 | ]
 16 | _DEFAULT_CONTENT_LAYERS = ['block3_conv3']
 17 | 
 18 | 
 19 | if __name__ == '__main__':
 20 |     parser = argparse.ArgumentParser(
 21 |         description='Train a Style Transfer Network.'
 22 |     )
 23 | 
 24 |     parser.add_argument(
 25 |         '--training-image-dset', type=str, required=True,
 26 |         help=('An h5 file containing images to trian with. The dset must '
 27 |               'contain a key `images` with the arrays.')
 28 |     )
 29 |     parser.add_argument(
 30 |         '--style-images', type=str, required=True,
 31 |         help='A comma separated list of images to take styles from.'
 32 |     )
 33 |     parser.add_argument(
 34 |         '--model-checkpoint', type=str, required=True,
 35 |         help='An file to save the trained network.'
 36 |     )
 37 |     parser.add_argument(
 38 |         '--image-size', default='256,256', type=str,
 39 |         help='The size of the image H,W'
 40 |     )
 41 |     parser.add_argument(
 42 |         '--content-layers', type=str,
 43 |         help=('A comma separated list of VGG layers to use for '
 44 |               'computing content loss')
 45 |     )
 46 |     parser.add_argument(
 47 |         '--style-layers', type=str,
 48 |         help=('A comma separated list of VGG layers to use for '
 49 |               'computing style loss')
 50 |     )
 51 |     parser.add_argument(
 52 |         '--content-weight', type=float, default=1.0,
 53 |         help='Content loss weight'
 54 |     )
 55 |     parser.add_argument(
 56 |         '--style-weight', type=float, default=1e-4,
 57 |         help='Style loss weight'
 58 |     )
 59 |     parser.add_argument(
 60 |         '--total-variation-weight', type=float, default=0,
 61 |         help='Total variation loss weight'
 62 |     )
 63 |     parser.add_argument(
 64 |         '--num-iterations', type=int, default=40000,
 65 |         help='Number of iterations to train for.'
 66 |     )
 67 |     parser.add_argument(
 68 |         '--batch-size', type=int, default=4,
 69 |         help='The batch size to train with.'
 70 |     )
 71 |     parser.add_argument(
 72 |         '--learning-rate', type=float, default=0.001,
 73 |         help='The learning rate.'
 74 |     )
 75 |     parser.add_argument(
 76 |         '--log-interval', type=int, default=10,
 77 |         help='the interval at which log statements are printed.'
 78 |     )
 79 |     parser.add_argument(
 80 |         '--checkpoint-interval', type=int, default=10,
 81 |         help='the interval at which model checkpoints are saved.'
 82 |     )
 83 |     parser.add_argument(
 84 |         '--fine-tune-checkpoint', type=str,
 85 |         help='A checkpoint file to finetune from.'
 86 |     )
 87 |     parser.add_argument(
 88 |         '--alpha', type=float, default=1.0,
 89 |         help='the width parameter controlling the number of filters'
 90 |     )
 91 |     parser.add_argument(
 92 |         '--norm-by-channels', action='store_true',
 93 |         help='if present, normalize gram matrix by channel'
 94 |     )
 95 |     parser.add_argument(
 96 |         '--gcs-bucket', type=str,
 97 |         help='a gcs bucket to save results to.'
 98 |     )
 99 |     parser.add_argument(
100 |         '--use-small-network', action='store_true',
101 |         help=('Use a very small network architecture that works in real time '
102 |               'on some mobile devices using only CPU')
103 |     )
104 | 
105 |     args, unknown = parser.parse_known_args()
106 | 
107 |     # Set the content and style loss layers.
108 |     content_layers = _DEFAULT_CONTENT_LAYERS
109 |     if args.content_layers:
110 |         content_layers = args.content_layers.split(',')
111 | 
112 |     style_layers = _DEFAULT_STYLE_LAYERS
113 |     if args.style_layers:
114 |         style_layers = args.style_layers.split(',')
115 | 
116 |     style_image_files = args.style_images.split(',')
117 |     image_size = [int(el) for el in args.image_size.split(',')]
118 |     norm_by_channels = args.norm_by_channels or False
119 | 
120 |     trainer.train(
121 |         args.training_image_dset,
122 |         style_image_files,
123 |         args.model_checkpoint,
124 |         content_layers,
125 |         style_layers,
126 |         content_weight=args.content_weight,
127 |         style_weight=args.style_weight,
128 |         total_variation_weight=args.total_variation_weight,
129 |         image_size=image_size,
130 |         alpha=args.alpha,
131 |         batch_size=args.batch_size,
132 |         num_iterations=args.num_iterations,
133 |         learning_rate=args.learning_rate,
134 |         log_interval=args.log_interval,
135 |         checkpoint_interval=args.checkpoint_interval,
136 |         fine_tune_checkpoint=args.fine_tune_checkpoint,
137 |         norm_by_channels=norm_by_channels,
138 |         gcs_bucket=args.gcs_bucket,
139 |         use_small_network=args.use_small_network,
140 |     )
141 |     logger.info('Done.')
142 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/build_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # =============================================================================
 15 | 
 16 | """Contains common utility functions and classes for building dataset.
 17 | 
 18 | This script contains utility functions and classes to converts dataset to
 19 | TFRecord file format with Example protos.
 20 | The Example proto contains the following fields:
 21 |     image/encoded: encoded image content.
 22 |     image/filename: image filename.
 23 |     image/format: image file format.
 24 |     image/height: image height.
 25 |     image/width: image width.
 26 |     image/channels: image channels.
 27 |     image/segmentation/class/encoded: encoded semantic segmentation content.
 28 |     image/segmentation/class/format: semantic segmentation file format.
 29 | """
 30 | import collections
 31 | import six
 32 | import tensorflow as tf
 33 | 
 34 | IMAGE_FORMAT = 'jpeg'
 35 | LABEL_FORMAT = 'png'
 36 | 
 37 | 
 38 | class ImageReader(object):
 39 |     """Helper class that provides TensorFlow image coding utilities."""
 40 | 
 41 |     def __init__(self, image_format='jpeg', channels=3):
 42 |         """Class constructor.
 43 | 
 44 |         Args:
 45 |             image_format: Image format. Only 'jpeg', 'jpg', or 'png'
 46 |                 are supported.
 47 |             channels: Image channels.
 48 |         """
 49 |         with tf.Graph().as_default():
 50 |             self._decode_data = tf.placeholder(dtype=tf.string)
 51 |             self._image_format = image_format
 52 |             self._session = tf.Session()
 53 |             if self._image_format in ('jpeg', 'jpg'):
 54 |                 self._decode = tf.image.decode_jpeg(
 55 |                     self._decode_data, channels=channels)
 56 |             elif self._image_format == 'png':
 57 |                 self._decode = tf.image.decode_png(
 58 |                     self._decode_data, channels=channels)
 59 | 
 60 |     def read_image_dims(self, image_data):
 61 |         """Read the image dimensions.
 62 | 
 63 |         Args:
 64 |             image_data: string of image data.
 65 |         Returns:
 66 |             image_height and image_width.
 67 |         """
 68 |         image = self.decode_image(image_data)
 69 |         return image.shape[:2]
 70 | 
 71 |     def decode_image(self, image_data):
 72 |         """Decode the image data string.
 73 | 
 74 |         Args:
 75 |             image_data: string of image data.
 76 |         Returns:
 77 |             Decoded image data.
 78 |         Raises:
 79 |             ValueError: Value of image channels not supported.
 80 |         """
 81 |         image = self._session.run(
 82 |             self._decode, feed_dict={self._decode_data: image_data})
 83 |         if len(image.shape) != 3 or image.shape[2] not in (1, 3):
 84 |             raise ValueError('The image channels not supported.')
 85 | 
 86 |         return image
 87 | 
 88 | 
 89 | def _int64_list_feature(values):
 90 |     """Return a TF-Feature of int64_list.
 91 | 
 92 |     Args:
 93 |         values: A scalar or list of values.
 94 |     Returns:
 95 |         A TF-Feature.
 96 |     """
 97 |     if not isinstance(values, collections.Iterable):
 98 |         values = [values]
 99 | 
100 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
101 | 
102 | 
103 | def _bytes_list_feature(values):
104 |     """Return a TF-Feature of bytes.
105 | 
106 |     Args:
107 |         values: A string.
108 |     Returns:
109 |         A TF-Feature.
110 |     """
111 |     def norm2bytes(value):
112 |         return value.encode() if isinstance(value, str) and six.PY3 else value
113 | 
114 |     return tf.train.Feature(
115 |         bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))
116 | 
117 | 
118 | def image_seg_to_tfexample(image_data, filename, height, width, seg_data):
119 |     """Convert one image/segmentation pair to tf example.
120 | 
121 |     Args:
122 |         image_data: string of image data.
123 |         filename: image filename.
124 |         height: image height.
125 |         width: image width.
126 |         seg_data: string of semantic segmentation data.
127 |     Returns:
128 |         tf example of one image/segmentation pair.
129 |     """
130 |     return tf.train.Example(
131 |         features=tf.train.Features(
132 |             feature={
133 |                 'image/encoded': _bytes_list_feature(image_data),
134 |                 'image/filename': _bytes_list_feature(filename),
135 |                 'image/format': _bytes_list_feature(IMAGE_FORMAT),
136 |                 'image/height': _int64_list_feature(height),
137 |                 'image/width': _int64_list_feature(width),
138 |                 'image/channels': _int64_list_feature(3),
139 |                 'image/segmentation/class/encoded': (
140 |                     _bytes_list_feature(seg_data)),
141 |                 'image/segmentation/class/format': _bytes_list_feature(
142 |                     LABEL_FORMAT),
143 |             }
144 |         )
145 |     )
146 | 


--------------------------------------------------------------------------------
/image_segmentation/utils/tfrecord_helpers.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import sys
  3 | import struct
  4 | import random
  5 | import six
  6 | import tensorflow as tf
  7 | import numpy
  8 | import PIL
  9 | 
 10 | 
 11 | def iterate_tfrecord(filename, decode=False):
 12 |     """Iterate through a tfrecord file.
 13 | 
 14 |     Args:
 15 |         filename (str): Filename to iterate.
 16 |         decode (bool): Optionally pass all records to example decoder function.
 17 |             False by default.
 18 | 
 19 |     Returns: Iterator of tfrecords.
 20 |     """
 21 |     for record in tf.python_io.tf_record_iterator(filename):
 22 |         example = tf.train.Example()
 23 |         example.ParseFromString(record)
 24 |         if decode:
 25 |             yield decode_image_tensor(example)
 26 |         else:
 27 |             yield example
 28 | 
 29 | 
 30 | def save_tfrecords(records, output_filename):
 31 |     """Save all tfrecord examples to file.
 32 | 
 33 |     Args:
 34 |         records (Iterator[tf.train.Example]): Iterator of records to save.
 35 |         output_filename (str): Output file to save to.
 36 |     """
 37 |     with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
 38 |         for record in records:
 39 |             tfrecord_writer.write(record.SerializeToString())
 40 | 
 41 | 
 42 | def decode_image_tensor(example):
 43 |     """Takes a tfrecord example and decodes image and mask data.
 44 | 
 45 |     Args:
 46 |         example (tf.train.Example): TF example to decode.
 47 | 
 48 |     Returns: dict of decoded mask and image data.
 49 |     """
 50 |     feature_dict = example.features.feature
 51 |     image_value = feature_dict['image/encoded'].bytes_list.value[0]
 52 |     encoded_mask = feature_dict['image/segmentation/class/encoded']
 53 |     filename = feature_dict['image/filename'].bytes_list.value[0]
 54 |     mask_value = encoded_mask.bytes_list.value[0]
 55 |     mask = numpy.array(PIL.Image.open(io.BytesIO(mask_value)))
 56 |     height = feature_dict['image/height'].int64_list.value[0]
 57 |     width = feature_dict['image/width'].int64_list.value[0]
 58 |     mask_format = (
 59 |         feature_dict['image/segmentation/class/format'].bytes_list.value[0]
 60 |     )
 61 | 
 62 |     return {
 63 |         'image': PIL.Image.open(io.BytesIO(image_value)),
 64 |         'mask': mask,
 65 |         'height': height,
 66 |         'width': width,
 67 |         'filename': filename,
 68 |         'format': feature_dict['image/format'].bytes_list.value[0],
 69 |         'mask_format': mask_format,
 70 |     }
 71 | 
 72 | 
 73 | def get_png_string(mask_array):
 74 |     """Builds PNG string from mask array.
 75 | 
 76 |     Args:
 77 |         mask_array (HxW): Mask array to generate PNG string from.
 78 | 
 79 |     Returns: String of mask encoded as a PNG.
 80 |     """
 81 |     # Convert the new mask back to an image.
 82 |     image = PIL.Image.fromarray(mask_array.astype('uint8')).convert('RGB')
 83 |     # Save the new image to a PNG byte string.
 84 |     byte_buffer = io.BytesIO()
 85 |     image.save(byte_buffer, format='png')
 86 |     byte_buffer.seek(0)
 87 |     return byte_buffer.read()
 88 | 
 89 | 
 90 | def update_mask(record, mask_array):
 91 |     """Update mask in tensorflow example.
 92 | 
 93 |     Args:
 94 |         record (tf.train.Example): Record to update
 95 |         mask_array (numpy.Array): HxW array of class values.
 96 | 
 97 |     Returns: Updated tf.train.Example.
 98 |     """
 99 |     def norm2bytes(value):
100 |         return value.encode() if isinstance(value, str) and six.PY3 else value
101 | 
102 |     mask_data = get_png_string(mask_array)
103 |     feature = record.features.feature['image/segmentation/class/encoded']
104 |     feature.bytes_list.value.pop()
105 |     feature.bytes_list.value.append(norm2bytes(mask_data))
106 |     return record
107 | 
108 | 
109 | def get_mask_ratio(example):
110 |     total_people_pixels = example['mask'][:, :, 0].sum(axis=None)
111 |     return total_people_pixels / (example['height'] * example['width'])
112 | 
113 | 
114 | def iter_interleave(kaggle, ade20k, coco):
115 |     """
116 |     A generator that interleaves the output from a one or more iterators
117 |     until they are *all* exhausted.
118 | 
119 |     """
120 |     kaggle_finished = False
121 |     ade20k_finished = False
122 |     coco_finished = False
123 |     a, b, c = 0, 0, 0
124 | 
125 |     while (not kaggle_finished) or (not ade20k_finished) or (not coco_finished):
126 |         if not kaggle_finished:
127 |             try:
128 |                 item = kaggle.next()
129 |                 a += 1
130 |                 if random.choice([False, True, True]):
131 |                     yield item
132 |             except StopIteration:
133 |                 print("kaggle finished")
134 |                 kaggle_finished = True
135 |         if not ade20k_finished:
136 |             try:
137 |                 item = ade20k.next()
138 |                 b += 1
139 |                 yield item
140 |             except StopIteration:
141 |                 print("ade20k finished")
142 |                 ade20k_finished = True
143 | 
144 |         if not coco_finished:
145 |             try:
146 |                 for _ in range(4):
147 |                     item = coco.next()
148 |                     c += 1
149 |                     yield item
150 |             except StopIteration:
151 |                 print("coco finished")
152 |                 coco_finished = True
153 | 
154 |     print(a, b, c)
155 | 
156 | 
157 | def chunk_records(filename, n, start=0):
158 |     records = iterate_tfrecord(filename)
159 |     while True:
160 |         for i in range(start):
161 |             continue
162 | 
163 |         try:
164 |             yield [records.next() for _ in range(n)]
165 |         except StopIteration:
166 |             return
167 | 


--------------------------------------------------------------------------------
/style_transfer/create_training_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import io
  4 | import os
  5 | import sys
  6 | import urllib
  7 | import zipfile
  8 | 
  9 | import PIL.Image
 10 | import tensorflow as tf
 11 | 
 12 | logger = logging.getLogger('create_training_dataset')
 13 | 
 14 | _COCO_ZIP_URL = 'http://images.cocodataset.org/zips/train2014.zip'
 15 | 
 16 | try:
 17 |     raw_input          # Python 3
 18 | except NameError:
 19 |     raw_input = input  # Python 3
 20 | 
 21 | 
 22 | class DatasetCreator(object):
 23 |     """A class to preprocess images from the COCO training data.
 24 | 
 25 |     This does not apply any sort of normalization to images. It simply
 26 |     transforms and scales image sizes before packing them into an H5 dataset
 27 |     and saving them to disk.
 28 |     """
 29 | 
 30 |     allowed_formats = {'.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG'}
 31 |     max_resize = 16
 32 | 
 33 |     @classmethod
 34 |     def _get_image_filenames(cls, input_dir, num_images):
 35 |         """Get a list of image filenames from a directory."""
 36 |         img_list = []
 37 |         for filename in os.listdir(input_dir):
 38 |             _, ext = os.path.splitext(filename)
 39 |             if ext in cls.allowed_formats:
 40 |                 img_list.append(os.path.join(input_dir, filename))
 41 |                 if num_images and len(img_list) > num_images:
 42 |                     break
 43 |         return img_list
 44 | 
 45 |     @staticmethod
 46 |     def _bytes_feature(value):
 47 |         return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 48 | 
 49 |     @classmethod
 50 |     def process_images(
 51 |             cls,
 52 |             input_dir,
 53 |             output_filename,
 54 |             num_images=None,
 55 |             num_threads=1):
 56 |         """Process all images in a directory and create an H5 data set.
 57 | 
 58 |         Args:
 59 |             input_dir - a directory containing images
 60 |             output_filename - the name of the h5 file to write to
 61 |             num_images - the number of images to process. 'None' processes all
 62 |             num_threads - the number of threads to use. Default 1.
 63 |         """
 64 |         img_list = cls._get_image_filenames(input_dir, num_images)
 65 |         num_images = len(img_list)
 66 |         # Remove the h5 file if it exists
 67 |         try:
 68 |             os.remove(output_filename)
 69 |         except OSError:
 70 |             pass
 71 | 
 72 |         record_writer = tf.python_io.TFRecordWriter(output_filename)
 73 |         for idx, filename in enumerate(img_list):
 74 |             img = PIL.Image.open(filename)
 75 |             encoded_jpeg = io.BytesIO()
 76 |             img.save(encoded_jpeg, format='jpeg')
 77 |             encoded_jpeg.seek(0)
 78 | 
 79 |             example = tf.train.Example(features=tf.train.Features(
 80 |                 feature={
 81 |                     'image/encoded': cls._bytes_feature(encoded_jpeg.read()),
 82 |                 }))
 83 |             record_writer.write(example.SerializeToString())
 84 |         record_writer.close()
 85 | 
 86 | 
 87 | def download_coco_data(directory):
 88 |     """Download and extract the COCO image training data set.
 89 | 
 90 |     This file is very large (~13GB) so we check with the user to make
 91 |     sure that is ok.
 92 | 
 93 |     Args:
 94 |         dir - a directory to save the dataset to
 95 |     """
 96 |     # This is a really big file so ask the user if they are sure they want
 97 |     # to start the download.
 98 |     if not os.path.isdir(directory):
 99 |         logger.info('Creating directory: %s' % directory)
100 |         os.makedirs(directory)
101 | 
102 |     answer = None
103 |     while answer not in {'Y', 'n'}:
104 |         answer = raw_input(
105 |             'Are you sure you want to download the COCO dataset? [Y/n] '
106 |         )
107 | 
108 |     if answer == 'n':
109 |         sys.exit()
110 | 
111 |     logger.info('Downloading COCO image data set. This may take a while...')
112 |     zip_save_path = os.path.join(directory, 'train2014.zip')
113 |     urllib.urlretrieve(_COCO_ZIP_URL, zip_save_path)
114 | 
115 |     # Files are even bigger to unzip so ask again if they are fine to proceed.
116 |     answer = None
117 |     while answer not in {'Y', 'n'}:
118 |         answer = raw_input(
119 |             'Are you sure you want to unzip things? [Y/n] '
120 |         )
121 | 
122 |     if answer == 'n':
123 |         sys.exit()
124 | 
125 |     logger.info('Unzipping COCO image data set. This may take a while...')
126 |     unzip = zipfile.ZipFile(zip_save_path, 'r')
127 |     unzip.extractall(directory)
128 |     unzip.close()
129 |     # Delete the original zipfile
130 |     os.remove(zip_save_path)
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     parser = argparse.ArgumentParser(
135 |         description=('Create a dataset to use when training the Fritz'
136 |                      ' Style Transfer model.'))
137 |     parser.add_argument(
138 |         '--output', type=str, required=True,
139 |         help='The name of the resulting dataset.')
140 |     parser.add_argument(
141 |         '--image-dir', type=str, required=True,
142 |         help=('A directory containing images to turn into tfrecords')
143 |     )
144 |     parser.add_argument(
145 |         '--download', action='store_true',
146 |         help=('When present, download and extract the COCO image dataset.'
147 |               'Note this is a huge download (~13GB).')
148 |     )
149 |     parser.add_argument(
150 |         '--num-images', type=int, help='The number of images to process.'
151 |     )
152 | 
153 |     args = parser.parse_args()
154 |     image_directory = args.image_dir
155 |     if args.download:
156 |         download_coco_data(image_directory)
157 |         image_directory = os.path.join(image_directory, 'train2014')
158 | 
159 |     image_directory = os.path.join(args.image_dir)
160 |     DatasetCreator.process_images(
161 |         image_directory,
162 |         args.output,
163 |         num_images=args.num_images
164 |     )
165 | 


--------------------------------------------------------------------------------
/image_segmentation/objectInfo150.txt:
--------------------------------------------------------------------------------
  1 | Idx	Ratio	Train	Val	Name
  2 | 1	0.1576	11664	1172	wall
  3 | 2	0.1072	6046	612	building, edifice
  4 | 3	0.0878	8265	796	sky
  5 | 4	0.0621	9336	917	floor, flooring
  6 | 5	0.0480	6678	641	tree
  7 | 6	0.0450	6604	643	ceiling
  8 | 7	0.0398	4023	408	road, route
  9 | 8	0.0231	1906	199	bed 
 10 | 9	0.0198	4688	460	windowpane, window 
 11 | 10	0.0183	2423	225	grass
 12 | 11	0.0181	2874	294	cabinet
 13 | 12	0.0166	3068	310	sidewalk, pavement
 14 | 13	0.0160	5075	526	person, individual, someone, somebody, mortal, soul
 15 | 14	0.0151	1804	190	earth, ground
 16 | 15	0.0118	6666	796	door, double door
 17 | 16	0.0110	4269	411	table
 18 | 17	0.0109	1691	160	mountain, mount
 19 | 18	0.0104	3999	441	plant, flora, plant life
 20 | 19	0.0104	2149	217	curtain, drape, drapery, mantle, pall
 21 | 20	0.0103	3261	318	chair
 22 | 21	0.0098	3164	306	car, auto, automobile, machine, motorcar
 23 | 22 	0.0074	709	75	water
 24 | 23	0.0067	3296	315	painting, picture
 25 | 24 	0.0065	1191	106	sofa, couch, lounge
 26 | 25 	0.0061	1516	162	shelf
 27 | 26 	0.0060	667	69	house
 28 | 27 	0.0053	651	57	sea
 29 | 28	0.0052	1847	224	mirror
 30 | 29	0.0046	1158	128	rug, carpet, carpeting
 31 | 30	0.0044	480	44	field
 32 | 31	0.0044	1172	98	armchair
 33 | 32	0.0044	1292	184	seat
 34 | 33	0.0033	1386	138	fence, fencing
 35 | 34	0.0031	698	61	desk
 36 | 35	0.0030	781	73	rock, stone
 37 | 36	0.0027	380	43	wardrobe, closet, press
 38 | 37	0.0026	3089	302	lamp
 39 | 38	0.0024	404	37	bathtub, bathing tub, bath, tub
 40 | 39	0.0024	804	99	railing, rail
 41 | 40	0.0023	1453	153	cushion
 42 | 41	0.0023	411	37	base, pedestal, stand
 43 | 42	0.0022	1440	162	box
 44 | 43	0.0022	800	77	column, pillar
 45 | 44	0.0020	2650	298	signboard, sign
 46 | 45	0.0019	549	46	chest of drawers, chest, bureau, dresser
 47 | 46	0.0019	367	36	counter
 48 | 47	0.0018	311	30	sand
 49 | 48	0.0018	1181	122	sink
 50 | 49	0.0018	287	23	skyscraper
 51 | 50	0.0018	468	38	fireplace, hearth, open fireplace
 52 | 51	0.0018	402	43	refrigerator, icebox
 53 | 52	0.0018	130	12	grandstand, covered stand
 54 | 53	0.0018	561	64	path
 55 | 54	0.0017	880	102	stairs, steps
 56 | 55	0.0017	86	12	runway
 57 | 56	0.0017	172	11	case, display case, showcase, vitrine
 58 | 57	0.0017	198	18	pool table, billiard table, snooker table
 59 | 58	0.0017	930	109	pillow
 60 | 59	0.0015	139	18	screen door, screen
 61 | 60	0.0015	564	52	stairway, staircase
 62 | 61	0.0015	320	26	river
 63 | 62	0.0015	261	29	bridge, span
 64 | 63	0.0014	275	22	bookcase
 65 | 64	0.0014	335	60	blind, screen
 66 | 65	0.0014	792	75	coffee table, cocktail table
 67 | 66	0.0014	395	49	toilet, can, commode, crapper, pot, potty, stool, throne
 68 | 67	0.0014	1309	138	flower
 69 | 68	0.0013	1112	113	book
 70 | 69	0.0013	266	27	hill
 71 | 70	0.0013	659	66	bench
 72 | 71	0.0012	331	31	countertop
 73 | 72	0.0012	531	56	stove, kitchen stove, range, kitchen range, cooking stove
 74 | 73	0.0012	369	36	palm, palm tree
 75 | 74	0.0012	144	9	kitchen island
 76 | 75	0.0011	265	29	computer, computing machine, computing device, data processor, electronic computer, information processing system
 77 | 76	0.0010	324	33	swivel chair
 78 | 77	0.0009	304	27	boat
 79 | 78	0.0009	170	20	bar
 80 | 79	0.0009	68	6	arcade machine
 81 | 80	0.0009	65	8	hovel, hut, hutch, shack, shanty
 82 | 81	0.0009	248	25	bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle
 83 | 82	0.0008	492	49	towel
 84 | 83	0.0008	2510	269	light, light source
 85 | 84	0.0008	440	39	truck, motortruck
 86 | 85	0.0008	147	18	tower
 87 | 86	0.0008	583	56	chandelier, pendant, pendent
 88 | 87	0.0007	533	61	awning, sunshade, sunblind
 89 | 88	0.0007	1989	239	streetlight, street lamp
 90 | 89	0.0007	71	5	booth, cubicle, stall, kiosk
 91 | 90	0.0007	618	53	television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box
 92 | 91	0.0007	135	12	airplane, aeroplane, plane
 93 | 92	0.0007	83	5	dirt track
 94 | 93	0.0007	178	17	apparel, wearing apparel, dress, clothes
 95 | 94	0.0006	1003	104	pole
 96 | 95	0.0006	182	12	land, ground, soil
 97 | 96	0.0006	452	50	bannister, banister, balustrade, balusters, handrail
 98 | 97	0.0006	42	6	escalator, moving staircase, moving stairway
 99 | 98	0.0006	307	31	ottoman, pouf, pouffe, puff, hassock
100 | 99	0.0006	965	114	bottle
101 | 100	0.0006	117	13	buffet, counter, sideboard
102 | 101	0.0006	354	35	poster, posting, placard, notice, bill, card
103 | 102	0.0006	108	9	stage
104 | 103	0.0006	557	55	van
105 | 104	0.0006	52	4	ship
106 | 105	0.0005	99	5	fountain
107 | 106	0.0005	57	4	conveyer belt, conveyor belt, conveyer, conveyor, transporter
108 | 107	0.0005	292	31	canopy
109 | 108	0.0005	77	9	washer, automatic washer, washing machine
110 | 109	0.0005	340	38	plaything, toy
111 | 110	0.0005	66	3	swimming pool, swimming bath, natatorium
112 | 111	0.0005	465	49	stool
113 | 112	0.0005	50	4	barrel, cask
114 | 113	0.0005	622	75	basket, handbasket
115 | 114	0.0005	80	9	waterfall, falls
116 | 115	0.0005	59	3	tent, collapsible shelter
117 | 116	0.0005	531	72	bag
118 | 117	0.0005	282	30	minibike, motorbike
119 | 118	0.0005	73	7	cradle
120 | 119	0.0005	435	44	oven
121 | 120	0.0005	136	25	ball
122 | 121	0.0005	116	24	food, solid food
123 | 122	0.0004	266	31	step, stair
124 | 123	0.0004	58	12	tank, storage tank
125 | 124	0.0004	418	83	trade name, brand name, brand, marque
126 | 125	0.0004	319	43	microwave, microwave oven
127 | 126	0.0004	1193	139	pot, flowerpot
128 | 127	0.0004	97	23	animal, animate being, beast, brute, creature, fauna
129 | 128	0.0004	347	36	bicycle, bike, wheel, cycle 
130 | 129	0.0004	52	5	lake
131 | 130	0.0004	246	22	dishwasher, dish washer, dishwashing machine
132 | 131	0.0004	108	13	screen, silver screen, projection screen
133 | 132	0.0004	201	30	blanket, cover
134 | 133	0.0004	285	21	sculpture
135 | 134	0.0004	268	27	hood, exhaust hood
136 | 135	0.0003	1020	108	sconce
137 | 136	0.0003	1282	122	vase
138 | 137	0.0003	528	65	traffic light, traffic signal, stoplight
139 | 138	0.0003	453	57	tray
140 | 139	0.0003	671	100	ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
141 | 140	0.0003	397	44	fan
142 | 141	0.0003	92	8	pier, wharf, wharfage, dock
143 | 142	0.0003	228	18	crt screen
144 | 143	0.0003	570	59	plate
145 | 144	0.0003	217	22	monitor, monitoring device
146 | 145	0.0003	206	19	bulletin board, notice board
147 | 146	0.0003	130	14	shower
148 | 147	0.0003	178	28	radiator
149 | 148	0.0002	504	57	glass, drinking glass
150 | 149	0.0002	775	96	clock
151 | 150	0.0002	421	56	flag
152 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/dali_pipeline.py:
--------------------------------------------------------------------------------
  1 | from nvidia import dali
  2 | import nvidia.dali.tfrecord as tfrec
  3 | from nvidia.dali import ops
  4 | from nvidia.dali import types
  5 | 
  6 | 
  7 | class CommonPipeline(dali.pipeline.Pipeline):
  8 | 
  9 |     def _input(self, tfrecord_path, index_path, shard_id=0):
 10 |         return ops.TFRecordReader(
 11 |             path=tfrecord_path,
 12 |             index_path=index_path,
 13 |             random_shuffle=True,
 14 |             features={
 15 |                 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
 16 |                 'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""),
 17 |                 'image/format': tfrec.FixedLenFeature((), tfrec.string, ""),
 18 |                 'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1),
 19 |                 'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1),
 20 |                 'image/channels': tfrec.FixedLenFeature([1], tfrec.int64, -1),
 21 |                 'image/segmentation/class/encoded': (
 22 |                     tfrec.FixedLenFeature((), tfrec.string, "")
 23 |                 ),
 24 |                 'image/segmentation/class/format': (
 25 |                     tfrec.FixedLenFeature((), tfrec.string, "")
 26 |                 )
 27 |             }
 28 |         )
 29 | 
 30 |     def __init__(self,
 31 |                  batch_size,
 32 |                  num_threads,
 33 |                  device_id,
 34 |                  image_size,
 35 |                  tfrecord_path,
 36 |                  index_path,
 37 |                  config,
 38 |                  shard_id=0):
 39 | 
 40 |         super(CommonPipeline, self).__init__(batch_size,
 41 |                                              num_threads,
 42 |                                              device_id)
 43 | 
 44 |         self.image_size = image_size
 45 |         self.input = self._input(tfrecord_path, index_path, shard_id=shard_id)
 46 |         # The nvjpeg decoder throws an error for some unsupported jpegs.
 47 |         # until this is fixed, we'll use the host decoder, which runs on the
 48 |         # CPU.
 49 |         # self.decode = ops.nvJPEGDecoder(device="mixed",
 50 |         #                                 output_type=types.RGB)
 51 |         self.decode = ops.HostDecoder(device="cpu",
 52 |                                       output_type=types.RGB)
 53 |         self.resize = ops.Resize(device="gpu",
 54 |                                  image_type=types.RGB,
 55 |                                  interp_type=types.INTERP_LINEAR,
 56 |                                  resize_x=image_size,
 57 |                                  resize_y=image_size)
 58 | 
 59 |         self.resize_large = ops.Resize(device="gpu",
 60 |                                        image_type=types.RGB,
 61 |                                        interp_type=types.INTERP_LINEAR,
 62 |                                        resize_x=image_size * config.zoom_scale,
 63 |                                        resize_y=image_size * config.zoom_scale)
 64 | 
 65 |         self.color_twist = ops.ColorTwist(
 66 |             device="gpu",
 67 |         )
 68 |         self.crop_mirror_normalize = ops.CropMirrorNormalize(
 69 |             device="gpu",
 70 |             crop=image_size,
 71 |             output_dtype=types.FLOAT,
 72 |             image_type=types.RGB,
 73 |             output_layout=types.DALITensorLayout.NHWC,
 74 |             mean=122.5,
 75 |             std=255.0
 76 |         )
 77 | 
 78 |         self.crop = ops.Crop(
 79 |             device="gpu",
 80 |             crop=image_size,
 81 |         )
 82 | 
 83 |         self.cast = ops.Cast(
 84 |             device="gpu",
 85 |             dtype=types.DALIDataType.INT64
 86 |         )
 87 |         self.rotate = ops.Rotate(
 88 |             device="gpu",
 89 |             fill_value=0
 90 |         )
 91 |         self.flip = ops.Flip(device="gpu")
 92 | 
 93 |         self.coin = ops.CoinFlip(probability=0.5)
 94 |         self.rotate_rng = ops.Uniform(range=(config.rotate_angle_min,
 95 |                                              config.rotate_angle_max))
 96 |         self.crop_x_rng = ops.Uniform(range=(0.0, config.crop_x_max))
 97 |         self.crop_y_rng = ops.Uniform(range=(0.0, config.crop_y_max))
 98 |         self.hue_rng = ops.Uniform(range=(config.hue_min,
 99 |                                           config.hue_max))
100 |         self.contrast_rng = ops.Uniform(range=(config.contrast_min,
101 |                                                config.contrast_max))
102 |         self.saturation_rng = ops.Uniform(range=(config.saturation_min,
103 |                                                  config.saturation_max))
104 |         self.brightness_rng = ops.Uniform(range=(config.brightness_min,
105 |                                                  config.brightness_max))
106 | 
107 |         self.iter = 0
108 | 
109 |     def define_graph(self):
110 |         inputs = self.input()
111 |         angle = self.rotate_rng()
112 |         coin = self.coin()
113 |         hue = self.hue_rng()
114 |         contrast = self.contrast_rng()
115 |         saturation = self.saturation_rng()
116 |         brightness = self.brightness_rng()
117 |         crop_x = self.crop_x_rng()
118 |         crop_y = self.crop_y_rng()
119 | 
120 |         images = self.decode(inputs["image/encoded"])
121 |         images = images.gpu()
122 |         images = self.resize_large(images)
123 |         images = self.rotate(images, angle=angle)
124 |         images = self.crop(images, crop_pos_x=crop_x, crop_pos_y=crop_y)
125 |         images = self.resize(images)
126 |         images = self.color_twist(images,
127 |                                   brightness=brightness,
128 |                                   hue=hue,
129 |                                   saturation=saturation,
130 |                                   contrast=contrast)
131 |         images = self.flip(images, horizontal=coin)
132 | 
133 |         masks = self.decode(inputs["image/segmentation/class/encoded"])
134 |         masks = masks.gpu()
135 |         masks = self.resize_large(masks)
136 |         masks = self.rotate(masks, angle=angle)
137 |         masks = self.crop(masks, crop_pos_x=crop_x, crop_pos_y=crop_y)
138 |         masks = self.resize(masks)
139 |         masks = self.flip(masks, horizontal=coin)
140 | 
141 |         images = self.crop_mirror_normalize(images)
142 |         masks = self.cast(masks)
143 |         return (images, masks)
144 | 
145 |     def iter_setup(self):
146 |         pass
147 | 


--------------------------------------------------------------------------------
/image_segmentation/README.md:
--------------------------------------------------------------------------------
  1 | # Fritz Image Segmentation
  2 | A Core ML compatible implementation of semantic segmentation with ICNet in Keras.
  3 | 
  4 | ## Installation
  5 | 
  6 | From this directory, run:
  7 | 
  8 | ```
  9 | export PYTHONPATH=$PYTHONPATH:`pwd`
 10 | ```
 11 | 
 12 | ## Downaload Data
 13 | The model is trained on the [ADE20K dataset](http://groups.csail.mit.edu/vision/datasets/ADE20K/) provided by MIT. You can download and prepare this data for training using this [handy script](https://github.com/tensorflow/models/blob/master/research/deeplab/datasets/download_and_convert_ade20k.sh) provided in the `TensorFlow/models/research/deeplab` repo on GitHub.
 14 | 
 15 | The dataset contains >20,000 images and corresponding segmentation masks. Masks asign one of 150 categories to each individual pixel of the image. A list of object classes is included in this repo: [objectInfo150.txt]()
 16 | 
 17 | ## Create TFRecord Dataset
 18 | 
 19 | Training requires data be read from TFRecords so we'll need to convert the images before we can use them. It's also recommended you train choose less than 20 image labels to train on as performance degrades after this point. The full 150 class labels is too much. A whitelist of class labels can be passed via the command line in a pipe separated string. Note that class labels much match those in the `objectInfo150.txt` exactly. Examples of valid whitelists are:
 20 | 
 21 | ```
 22 | "person|wall|floor, flooring"
 23 | "chair|wall|coffee table, cocktail table|ceiling|floor, flooring|bed|lamp|sofa, couch, lounge|windowpane, window|pillow"
 24 | ```
 25 | 
 26 | You can also set the `whitelist-threshold` argument to specify the fraction of whitelisted labels that must appear in an image for it to be used in training. For example, if 10 labels are whitelisted and the threashold is set to 0.6, at least 6 of the 10 whitelisted labels must appear in the image for it to be included.
 27 | 
 28 | Let's create a training data set for images with objects you might find in a living room or bedroom.
 29 | 
 30 | ```
 31 | export LABEL_SET=living_room
 32 | mkdir data/${LABEL_SET}
 33 | python create_tfrecord_dataset.py \
 34 |     -i data/ADEChallengeData2016/images/training/ \
 35 |     -a data/ADEChallengeData2016/annotations/training/ \
 36 |     -o data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
 37 |     -l data/objectInfo150.txt \
 38 |     -w "chair|wall|coffee table, cocktail table|ceiling|floor, flooring|bed|lamp|sofa, couch, lounge|windowpane, window|pillow" \
 39 |     -t 0.6
 40 | ```
 41 | 
 42 | This script also automatically outputs a new set of labels and indices in a file named `labels.txt` found in the same directory as the `.tfrecord` output.
 43 | 
 44 | ## Training
 45 | The model can be trained using the `train.py` script.
 46 | 
 47 | Before you start, make sure the `image_segmentation` model is on your $PYTHONPATH. From the `fritz-models/image_segmentation` directory.
 48 | 
 49 | ```
 50 | export PYTHONPATH=$PYTHONPATH:`pwd`
 51 | ```
 52 | 
 53 | ### Train Locally
 54 | Train the model for 10 steps by running:
 55 | 
 56 | ```
 57 | export LABEL_SET=living_room
 58 | python image_segmentation/train.py \
 59 |     -d data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
 60 |     -l data/${LABEL_SET}/labels.txt \
 61 |     -n 10 \
 62 |     -s 768 \
 63 |     -a 0.25 \
 64 |     -o data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_025.h5
 65 | ```
 66 | 
 67 | By default, a model weights checkpoint is saved every epoch. Note that only weights are saved, not the full model. This is to make it easier to build models for training vs inference.
 68 | 
 69 | ### Training on Google Cloud ML
 70 | Zip up all of the local files to send up to Google Cloud.
 71 | 
 72 | ```
 73 | # from fritz-models/image_segmentation/
 74 | python setup.py sdist
 75 | ```
 76 | Run the training job.
 77 | 
 78 | ```
 79 | export LABEL_SET=living_room
 80 | export YOUR_GCS_BUCKET=<YOUR_BUCKET_HERE>
 81 | gcloud ml-engine jobs submit training `whoami`_image_segmentation_`date +%s` \
 82 |     --runtime-version 1.9 \
 83 |     --job-dir=gs://${YOUR_GCS_BUCKET} \
 84 |     --packages dist/image_segmentation-1.0.tar.gz \
 85 |     --module-name image_segmentation.train \
 86 |     --region us-east1 \
 87 |     --scale-tier basic_gpu \
 88 |     -- \
 89 |     -d gs://${YOUR_GCS_BUCKET}/data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
 90 |     -l gs://${YOUR_GCS_BUCKET}/data/${LABEL_SET}/labels.txt \
 91 |     -o ${LABEL_SET}_768x768_025.h5 \
 92 |     --image-size 768 \
 93 |     --alpha 0.25 \
 94 |     --num-steps 5000 \
 95 |     --batch-size 24 \
 96 |     --model-name ${LABEL_SET} \
 97 |     --gcs-bucket gs://${YOUR_GCS_BUCKET}/train
 98 | ```
 99 | 
100 | ## Converting to Core ML
101 | The resulting Keras model can be converted using the script provided. It uses the standard `coremltools` package, but removes the additional model output nodes used for training.
102 | 
103 | ```
104 | python convert_to_coreml.py --alpha 0.25 ${LABEL_SET}_768x768_025.h5 ${LABEL_SET}_768x768_025.mlmodel
105 | ```
106 | 
107 | Once you've got your Core ML model, you can use [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=image-segmentation) to integrate, deploy, and manage it in your app. For more tutorials on mobile machine learning, check out [Heartbeat](https://heartbeat.fritz.ai?utm_source=github&utm_campaign=fritz-models&utm_content=image-segmentation).
108 | 
109 | ## Benchmarks
110 | On a Google Cloud Compute GPU instance with a single K80, a single epoch containing roughly 1600 768x768 images takes 20 minutes. Average cross-categorical accuracy reached >80% after 12 hours. An additional 3 hours of training with a learning rate of 0.00001 increased accuracy to ~87%. Inferences with a 768x768 model can be made at 8-9fps on an iPhone X.
111 | 
112 | ## Example - Living Room Objects
113 | 
114 | <img src="https://github.com/fritzlabs/fritz-models/blob/master/image_segmentation/examples/living_room.jpg?raw=true" width="300" height="200">
115 | <img src="https://github.com/fritzlabs/fritz-models/blob/master/image_segmentation/examples/example_image_and_mask.png?raw=true" width="300" height="200">
116 | <img src="https://github.com/fritzlabs/fritz-models/blob/master/image_segmentation/examples/example_pixel_probabilities.png?raw=true" width="500" height="500">
117 | 
118 | Download the [mlmodel](https://github.com/fritzlabs/fritz-models/blob/master/image_segmentation/examples/icnet_768x768_living_room.mlmodel).
119 | 
120 | ## Additional resources
121 | 
122 | * [Original ICNet Implementation](https://github.com/hszhao/ICNet)
123 | * [Keras-ICNet](https://github.com/aitorzip/Keras-ICNet)
124 | * [ICNet-tensorflow](https://github.com/hellochick/ICNet-tensorflow)
125 | 


--------------------------------------------------------------------------------
/resources/AI_Landscape.md:
--------------------------------------------------------------------------------
  1 | # AI and Machine Learning Landscape
  2 | 
  3 | ## Contribute to this list
  4 | 
  5 | The AI and Machine Learning landscape is rapidly changing. We welcome additions and changes to this list!
  6 | 
  7 | ## Data Labeling
  8 | 
  9 | * [Labelbox](https://www.labelbox.com/) - Platform for creating and managing training data
 10 | 
 11 | * [Alegion](https://alegion.com/) - Training platform to build datasets, manage, and more
 12 | 
 13 | * [Clickworker](https://www.clickworker.com/) - Micro tasking marketplace, catering data management and web research services as well as AI algorithm training
 14 | 
 15 | * [Figure Eight](https://www.figure-eight.com/) - Training platform that helps turn data into trainable sets
 16 | 
 17 | * [Gengo AI](https://gengo.ai/) - Training platform for language-based ML tasks 
 18 | 
 19 | * [Mighty AI](https://mighty.ai/) - Training data management for Computer Vision tasks 
 20 | 
 21 | * [Scale](https://scale.ai/) - Training data API largely focused on Computer Vision tasks
 22 | 
 23 | * [CloudSight](https://cloudsight.ai/) - Image recognition API for digital media 
 24 | 
 25 | * [Hive](https://thehive.ai/) - Data labeling, Computer Vision models, and media platform 
 26 | 
 27 | * [Microwork](https://microwork.io/) - Image and video annotation services for AI
 28 | 
 29 | ## Synthetic Data
 30 | 
 31 | * [AI.Reverie](https://aireverie.com/) - Simulation platform that generates synthetic data to train and improve ML models
 32 | 
 33 | * [Neuromation](https://www.neuromation.io/) - Distributed computing platform for deep learning applications and synthetic data generation
 34 | 
 35 | ## Feature Engineering
 36 | 
 37 | * [Feature Labs](https://www.featurelabs.com/) - Automated feature engineering platform for enterprise
 38 | 
 39 | * [Featuretools](https://www.featuretools.com/) - Open source Python framework for automated feature engineering - a product of Feature Labs 
 40 | 
 41 | * [Source{d}](https://sourced.tech/) - Machine learning for large scale code analysis
 42 | 
 43 | ## Training
 44 | 
 45 | * [ClusterOne](https://clusterone.com/) - Deep learning platform that allows you to train your models on distributed GPUs and CPUs without setup or maintenance
 46 | 
 47 | * [DataBricks](https://databricks.com/) -  Unified Analytics Platform that accelerates innovation by unifying data science, engineering and business 
 48 | 
 49 | * [DAWNBench](https://dawn.cs.stanford.edu/benchmark/index.html) - Benchmark suite for end-to-end deep learning training and inference out of Stanford 
 50 | 
 51 | * [Hyperopt](https://hyperopt.github.io/hyperopt/) - Distributed asynchronous hyperparameter optimization in Python 
 52 | 
 53 | * [Lambda Labs](https://lambdalabs.com/) - Workstations, Servers, Laptops, and GPU cloud built for Deep Learning
 54 | 
 55 | * [PaddlePaddle](http://www.paddlepaddle.org/) - An open-source deep learning platform with a simple API
 56 | 
 57 | * [Paperspace](https://www.paperspace.com/) - GPU cloud platform (w/ API), AI/ML infrastructure product
 58 | 
 59 | * [RiseML](https://riseml.com/) - Machine Learning Platform for Kubernetes (:cry: - Sunsetting March 31, 2019) 
 60 | 
 61 | * [Spell](https://www.spell.run/) - Infrastructure for AI and deep learning experiments and collaboration 
 62 | 
 63 | * [Trifacta](https://www.trifacta.com/start-wrangling/) - Data preparation and cleaning platform 
 64 | 
 65 | * [Yellowfin](https://www.yellowfinbi.com/) - Integrated data analytics platform
 66 | 
 67 | ## Model Serving / Deployment
 68 | 
 69 | * [5 Analytics](https://www.5analytics.com/index.html) - Enterprise AI platform to integrate, deploy, and monitor ML models
 70 | 
 71 | * [Algorithmia](https://algorithmia.com/) - Machine learning model management platform
 72 | 
 73 | * [Numericcal](https://www.numericcal.com/) - Platform to automate model optimization and management on mobile and IoT
 74 | 
 75 | * [Seldon](https://www.seldon.io/) - Machine learning deployment platform for Enterprise
 76 | 
 77 | * [Vertex AI](http://vertex.ai/) - PalidML is an open source tensor compiler 
 78 | 
 79 | * [Alteryx](https://www.alteryx.com/platform) - Integrated analytics platform built to foster partnerships between IT, analytics teams, and businesses 
 80 | 
 81 | * [Datatron](https://www.datatron.com/) - Management platform for ML, AI ,and Data Science models 
 82 | 
 83 | ## Model Management
 84 | 
 85 | * [Datmo](https://datmo.com/) - Workflow tools to help you experiment, deploy, and scale AI solutions
 86 | 
 87 | * [Iterative AI](https://iterative.ai/) - CI workflow for machine learning projects
 88 | 
 89 | * [MLFlow](https://mlflow.org/) - Open source ML lifecycle platform
 90 | 
 91 | * [MLPerf](https://mlperf.org/) - A broad ML benchmark suite for measuring performance of ML software frameworks, ML hardware accelerators, and ML cloud platforms
 92 | 
 93 | * [Neptune](https://neptune.ml/) - Platform to build ML models, manage infrastructure and dev environments, and team collaboration tools
 94 | 
 95 | * [ParallelM](https://www.parallelm.com/) - Platform to deploy and optimize ML models at scale
 96 | 
 97 | ## End-to-End ML Platforms
 98 | 
 99 | * [Allegro](https://allegro.ai/) - Computer Vision Deep Learning platform
100 | 
101 | * [Cnvrg.io](https://cnvrg.io/) - Full stack data science and ML platform
102 | 
103 | * [Determined AI](https://determined.ai/) - Platform designed to streamline deep learning workflows
104 | 
105 | * [FloydHub](https://www.floydhub.com/) - Deep learning model pipeline
106 | 
107 | * [SherlockML](https://sherlockml.com/) - Data science developer environment 
108 | 
109 | * [BigML](https://bigml.com/) - Comprehensive ML workflow platform 
110 | 
111 | * [Dataiku](https://www.dataiku.com/) - End-to-end collaborative data science platform
112 | 
113 | * [Metis Machine](https://metismachine.com/) - Skafos Machine Learning Platform to help develop and deploy ML pipelines at scale 
114 | 
115 | * [Valohai](https://valohai.com/) - Deep learning management platform
116 | 
117 | * [Dataspine](https://dataspine.io/) - Automation platform for ML workflows
118 | 
119 | * [PipelineAI](https://pipeline.ai/) - Flexible end-to-end ML pipeline platform
120 | 
121 | * [Deep Cognition](https://deepcognition.ai/) - Deep learning management platform
122 | 
123 | * [Polyaxon](https://polyaxon.com/) - Open source platform for reproducible machine learning at scale
124 | 
125 | * [Clarifai](https://www.clarifai.com/) - ML platform built for Computer Vision problems
126 | 
127 | * [Comet.ml](https://www.comet.ml/) - ML platform to track datasets, training, and more 
128 | 
129 | * [DeepSense.ai](https://deepsense.ai/) - AI AI solution management platform
130 | 
131 | * [H20 AI](https://h2o.ai) - Open source ML platform
132 | 
133 | * [DataRobot](https://www.datarobot.com/) - Automated ML platform for predictive modeling
134 | 
135 | * [Fritz](https://fritz.ai/) - End-to-end platform designed to help mobile developers and ML engineers train and deploy models in mobile apps
136 | 
137 | ## Other
138 | 
139 | * [Element AI](https://www.elementai.com/) - Use case-based AI software
140 | 
141 | * [Ever AI](https://ever.ai/) - Facial recognition and attribute identification SDK and API
142 | 
143 | * [Deepomatic](http://www.deepomatic.com/) - Image-based, computer vision AI solutions platform
144 | 
145 | * [Leverege](https://www.leverege.com/) - Complete IoT development and deployment platform 
146 | 
147 | * [Nuance](https://www.nuance.com/omni-channel-customer-engagement/technologies/artificial-intelligence.html) - Conversational AI for smart customer engagement
148 | 
149 | * [Cortical.io](https:/www.cortical.io) - Intelligent text processing solution
150 | 


--------------------------------------------------------------------------------
/image_segmentation/utils/model_helpers.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | import os
  3 | 
  4 | import coremltools
  5 | import tensorflow as tf
  6 | import PIL.Image
  7 | import skimage.transform
  8 | import skimage.filters
  9 | import numpy
 10 | from tensorflow.python.platform import gfile
 11 | from image_segmentation import data_generator
 12 | import image_segmentation
 13 | import requests
 14 | from io import BytesIO
 15 | 
 16 | 
 17 | class ModelParameters(object):
 18 | 
 19 |     def __init__(self, **params):
 20 |         self.label_set = params['label_set']
 21 |         self.batch_size = params['batch_size']
 22 |         self.resolution = params['resolution']
 23 |         self.alpha = params['alpha']
 24 |         self.labels = params['labels']
 25 |         self.num_classes = len(self.labels)
 26 |         self.gcs_bucket = params.get('gcs_bucket')
 27 |         self._training_data_path = params.get('training_data_path')
 28 |         self._model_path = params.get('model_path')
 29 | 
 30 |         self.file_base = params.get(
 31 |             'file_base',
 32 |             f'{self.label_set}_{self.resolution}x{self.resolution}_1'
 33 |         )
 34 | 
 35 |     @property
 36 |     def training_data_path(self):
 37 |         if self._training_data_path:
 38 |             return self._training_data_path
 39 | 
 40 |         return (
 41 |             '../fritz-image-segmentation/data/'
 42 |             '{label_set}/{label_set}.tfrecord'
 43 |         ).format(label_set=self.label_set)
 44 | 
 45 |     @property
 46 |     def model_path(self):
 47 |         if self._model_path:
 48 |             return self._model_path
 49 | 
 50 |         return (
 51 |             f'gs://{self.gcs_bucket}/train/{self.file_base}.h5'
 52 |         )
 53 | 
 54 | 
 55 | class TrainedModel(object):
 56 | 
 57 |     def __init__(self, model_parameters):
 58 |         self._params = model_parameters
 59 |         resolution = model_parameters.resolution
 60 | 
 61 |         self.dataset = data_generator.ADE20KDatasetBuilder.build(
 62 |             self._params.training_data_path,
 63 |             self._params.batch_size,
 64 |             (resolution, resolution),
 65 |             self._params.num_classes,
 66 |             augment_images=False,
 67 |             repeat=False
 68 |         )
 69 | 
 70 |         self._model = None
 71 | 
 72 |     def download_and_build_model(self):
 73 |         temp_h5 = tempfile.NamedTemporaryFile(suffix='.h5')
 74 |         print("Loading model")
 75 |         # with gfile.Open(self._params.model_path, 'rb') as fid:
 76 |         #     temp_h5.file.write(fid.read())
 77 |         #     temp_h5.seek(0)
 78 | 
 79 |         return image_segmentation.icnet.ICNetModelFactory.build(
 80 |             self._params.resolution,
 81 |             self._params.num_classes,
 82 |             alpha=self._params.alpha,
 83 |             weights_path=self._params.model_path,
 84 |             train=False
 85 |         )
 86 | 
 87 |     @property
 88 |     def model(self):
 89 |         if self._model is None:
 90 |             self._model = self.download_and_build_model()
 91 | 
 92 |         return self._model
 93 | 
 94 |     def iterate_images(self):
 95 |         iterator = self.dataset.make_one_shot_iterator()
 96 |         el = iterator.get_next()
 97 | 
 98 |         try:
 99 |             with tf.Session() as sess:
100 |                 while True:
101 |                     out = sess.run([el])
102 |                     for i in range(out[0]['image'].shape[0]):
103 |                         image = out[0]['image'][i]
104 |                         mask = out[0]['mask'][i]
105 |                         yield (image, mask)
106 |         except tf.errors.OutOfRangeError:
107 |             return
108 | 
109 |     def training_images(self, num_images=10, start_index=0):
110 |         results = []
111 |         for i, (image, mask) in enumerate(self.iterate_images()):
112 |             if i < start_index:
113 |                 continue
114 | 
115 |             if len(results) >= num_images:
116 |                 break
117 |             results.append((image, mask))
118 | 
119 |         return results
120 | 
121 |     def run_prediction(self, img_path=None, img_data=None, img_url=None,
122 |                        img=None):
123 |         if img_url:
124 |             response = requests.get(img_url)
125 |             img = PIL.Image.open(BytesIO(response.content))
126 |         elif img_path:
127 |             img = PIL.Image.open(img_path)
128 | 
129 |         if img_data is None:
130 |             img = img.resize((self._params.resolution,
131 |                               self._params.resolution))
132 |             img_data = numpy.array(img)
133 |             img_data = img_data * 1. / 255. - 0.5
134 |             img_data = skimage.filters.gaussian(img_data, sigma=0.0)
135 |         elif img_data is None:
136 |             raise Exception("Must either pass image data or a path to image")
137 | 
138 |         return self.model.predict(img_data[None, :, :, :])
139 | 
140 |     def predict_and_plot(self, img_path=None, img_data=None, img_url=None,
141 |                          mask=None, probabilities=True):
142 |         if img_url:
143 |             response = requests.get(img_url)
144 |             img = PIL.Image.open(BytesIO(response.content))
145 |             img = img.resize((self._params.resolution,
146 |                               self._params.resolution))
147 |         if img_path:
148 |             img = PIL.Image.open(img_path)
149 |             img = img.resize((self._params.resolution,
150 |                               self._params.resolution))
151 |         elif img_data is not None:
152 |             img = ((img_data + 0.5) * 255).astype('uint8')
153 | 
154 |         output = self.run_prediction(img_path=img_path, img_data=img_data,
155 |                                      img_url=img_url)
156 | 
157 |         figure = image_segmentation.utils.plot_image_and_mask(
158 |             numpy.array(img),
159 |             output[0],
160 |             reference_mask=mask,
161 |             alpha=0.9,
162 |             small=True)
163 |         generated_figures = [figure]
164 | 
165 |         if probabilities:
166 |             generated_figures.append(
167 |                 image_segmentation.utils.plot_pixel_probabilities(
168 |                     output[0],
169 |                     self._params.labels
170 |                 )
171 |             )
172 | 
173 |         return output, generated_figures
174 | 
175 |     def calculate_error(self, results, mask):
176 |         resized_mask = numpy.resize(mask[:, :, 0], (
177 |             results.shape[0], results.shape[1]
178 |         ))
179 |         resized_mask = mask[:, :, 0]
180 | 
181 |         resized_results = skimage.transform.resize(
182 |             numpy.argmax(results, axis=-1),
183 |             mask.shape[:2],
184 |             preserve_range=True,
185 |             anti_aliasing=False,
186 |             order=0)
187 | 
188 |         diff = resized_mask - resized_results
189 | 
190 |         success_rate = []
191 |         for i, label in enumerate(self._params.labels):
192 |             total_class_values = numpy.sum(resized_mask == i)
193 |             if not total_class_values:
194 |                 continue
195 | 
196 |             incorrect = float(numpy.count_nonzero(diff[resized_mask == i]))
197 |             true_positive = float(numpy.sum(diff[resized_mask == i] == 0))
198 |             iou = true_positive / (true_positive + incorrect)
199 |             success_rate.append((i, total_class_values, true_positive, iou))
200 |             print(f"{label} - total: {total_class_values}, IoU: {iou}")
201 | 
202 |         mean_iou = (
203 |             sum([iou for _, _, _, iou in success_rate]) / len(success_rate)
204 |         )
205 |         print(f"mIoU: {mean_iou}")
206 |         return success_rate
207 | 
208 |     def convert_to_coreml(self, mlmodel_path='./'):
209 |         mlmodel = coremltools.converters.keras.convert(
210 |             self.model,
211 |             input_names='image',
212 |             image_input_names='image',
213 |             image_scale=1.0 / 255.0,
214 |             red_bias=-0.5,
215 |             green_bias=-0.5,
216 |             blue_bias=-0.5,
217 |             output_names='output'
218 |         )
219 |         mlmodel_file_path = (
220 |             os.path.join(mlmodel_path, self._params.file_base + '.mlmodel')
221 |         )
222 |         mlmodel.save(mlmodel_file_path)
223 |         print(f"successfully saved {mlmodel_file_path}")
224 | 


--------------------------------------------------------------------------------
/style_transfer/README.md:
--------------------------------------------------------------------------------
  1 | # Fritz Style Transfer
  2 | Code for training artistic style transfer models with Keras and converting them to Core ML.
  3 | 
  4 | <img src="https://github.com/fritzlabs/fritz-models/blob/master/style_transfer/example/starry_night_results.jpg" width="662" height="295">
  5 | 
  6 | Left: Original image. Middle: Image stylzed with a 17kb small model. Right: Image stylzed by the default large model.
  7 | 
  8 | # Add style transfer to your app in minutes with Fritz
  9 | 
 10 | If you're looking to add style transfer to your app quickly, check out [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer). The Fritz SDK provides 11 pre-trained style transfer models along with all the code you need to apply them images or live video. If you want to train your own model, keep reading.
 11 | 
 12 | # 11-21-2018: Train your own custom style model in 20 minutes
 13 | 
 14 | You can now train your own personal style transfer model in about 20 minutes using Fritz Style Transfer and Google Colab. Just create your own playground from [this notebook](https://colab.research.google.com/drive/1nDkxLKBgZGFscGoF0tfyPMGqW03xITl0#scrollTo=L9aTwLIqtFTE) to get started. You can read more about how it works [here](https://heartbeat.fritz.ai/20-minute-masterpiece-4b6043fdfff5?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer).
 15 | 
 16 | # Installation
 17 | 
 18 | If you're not installing using a package manager like `pip`, make sure the root directory is on your `PYTHONPATH`:
 19 | 
 20 | ```
 21 | export PYTHONPATH=$PYTHONPATH:`pwd`
 22 | ```
 23 | 
 24 | # Preprocessing Training Data
 25 | The training data comes from the [COCO Training data set](http://cocodataset.org/). It consists of ~80,000 images and labels, although the labels arent used here.
 26 | 
 27 | the `create_training_dataset.py` script will download and unzip this data then process images to create an h5 dataset used by the style transfer network trainer. You can run this with the command below. Note the first time you run this you will need to download and unzip 13GB worth of data and it can take a while. The command only processes the first 10 images to make sure things are working, but you can modify `--num-images` to process more.
 28 | 
 29 | ```
 30 | python create_training_dataset.py \
 31 | --output example/training_images.tfrecord \
 32 | --image-dir path/to/coco/ \
 33 | --num-images 10
 34 | ```
 35 | 
 36 | Note that if you have already downloaded and extracted a set of images to use for training, that directory needs to be called `train2014/` and you need to point `--coco-image-dir` to the parent directory that contains that folder. Otherwise you can use the `--download` flag.
 37 | 
 38 | # Training a Style Transfer Model
 39 | 
 40 | To train the model from scratch for 100 iterations:
 41 | 
 42 | ```
 43 | python style_transfer/train.py \
 44 | --training-image-dset example/training_images.tfrecord \
 45 | --style-images example/starry_night.jpg \
 46 | --model-checkpoint example/starry_night.h5 \
 47 | --image-size 256,256 \
 48 | --alpha 0.25 \
 49 | --log-interval 1 \
 50 | --num-iterations 10
 51 | ```
 52 | 
 53 | If everything looks good, we can pick up where we left off and keep training the same model.
 54 | 
 55 | ```
 56 | python style_transfer/train.py \
 57 | --training-image-dset example/training_images.tfrecord \
 58 | --style-images example/starry_night.jpg \
 59 | --model-checkpoint example/starry_night.h5 \
 60 | --image-size 256,256 \
 61 | --alpha 0.25 \
 62 | --num-iterations 1000 \
 63 | --fine-tune-checkpoint example/starry_night.h5
 64 | ```
 65 | 
 66 | If you're using the full COCO dataset, you'll need around 20,000 iterations to train a model from scratch with a batch size of 24. If you're starting from a pre-trained model checkpoint, 5,000 steps should work. A model pre-trained on Starry Night is provided in the `example/` folder.
 67 | 
 68 | For styles that are abstract with strong geometric patters, try higher values for `--content-weight` like `3` or `10`. For styles that are more photo-realistic images with smaller details, boost the `--style-weight` to `0.001` or more.
 69 | 
 70 | Finally, note that for training, we resize images to be 256x256px. This is for training only. Final models can be set to take images of any size.
 71 | 
 72 | ## Training models for mobile
 73 | 
 74 | By default, the style transfer networks produced here are roughly 7mb in size and contain 7 million parameters. They can create a stylized image in ~500ms on high end mobile phones, and 5s on lower end phones. To make the model's faster, we've included a width-multiplier parameter similar to the one introduced by Google in their MobileNet architecture. The value `alpha` can be set between 0 and 1 and will control how many filters are included in each layer. Lower `alpha` means fewer filters, fewer parameters, faster models, with slightly worse style transfer abilities. In testing, `alpha=0.25` produced models that ran at 17fps on an iPhone X, while still transfering styles well.
 75 | 
 76 | Finally, for models that are intended to be used in real-time on a CPU only, you can use the `--use-small-network` flag to train a model architecture that has been heavily pruned. The style transfer itself isn't quite as good, but the results are usable and the models are incredible small.
 77 | 
 78 | # Stylizing Images
 79 | To stylize an image with a trained model you can run:
 80 | 
 81 | ```
 82 | python stylize_image.py \
 83 | --input-image example/dog.jpg \
 84 | --output-image example/stylized_dog.jpg \
 85 | --model-checkpoint example/starry_night_256x256_025.h5
 86 | ```
 87 | 
 88 | # Convert to Mobile
 89 | Style transfer models can be converted to both Core ML and TensorFlow Mobile formats.
 90 | 
 91 | ## Convert to Core ML
 92 | Use the converter script to convert to Core ML.
 93 | 
 94 | This converter is a slight modification of Apple's keras converter that allows
 95 | the user to define custom conversions between Keras layers and core ml layers. This allows us to convert the Instance Normalization and Deprocessing layers.
 96 | 
 97 | ```
 98 | python convert_to_coreml.py \
 99 | --keras-checkpoint example/starry_night_256x256_025.h5 \
100 | --alpha 0.25 \
101 | --image-size 640,480 \
102 | --coreml-model example/starry_night_640x480_025.mlmodel
103 | ```
104 | 
105 | ## Convert to TensorFlow Mobile
106 | Models cannot be converted to TFLite because some operations are not supported, but TensorFlow Mobile works fine. To convert your model to an optimized frozen graph, run:
107 | 
108 | ```
109 | python convert_to_tfmobile.py \
110 | --keras-checkpoint example/starry_night_256x256_025.h5 \
111 | --alpha 0.25 \
112 | --image-size 640,480 \
113 | --output-dir example/
114 | ```
115 | 
116 | This produces a number of TensorFlow graph formats. The `*_optimized.pb` graph file is the one you want to use with your app. Note that the input node name is `input_1` and the output node name is `deprocess_stylized_image_1/mul`.
117 | 
118 | # Train on Google Cloud ML
119 | 
120 | This library is designed to work with certain configurations on Google Cloud ML so you can train styles in parallel and take advantage GPUs. Assuming you have Google Cloud ML and Google Cloud Storage set up, the following commands will get you training new models in just a few hours.
121 | 
122 | ## Set up your Google Cloud Storage bucket.
123 | 
124 | This repo assumes the structure on Google Cloud is 
125 | 
126 | ```
127 | gs://${YOUR_GCS_BUCKET}/
128 |     |-- data/
129 |         |-- training_images.tfrecord
130 |         |-- starry_night_256x256_025.h5
131 |         |-- style_images/
132 |             |-- style_1.jpg
133 |             |-- style_2.jpg
134 |     |-- dist/
135 |         |-- fritz_style_transfer.zip
136 |     |-- train/
137 |         |-- pretrained_model.h5
138 |         |-- output_model.h5
139 | ```
140 | 
141 | To make things easier, start by setting some environmental variables.
142 | 
143 | ```
144 | export YOUR_GCS_BUCKET=your_gcs_bucket
145 | export FRITZ_STYLE_TRANSFER_PATH=/path/to/fritz-models/style_transfer/
146 | export KERAS_CONTRIB_PATH=/path/to/keras-contrib
147 | export STYLE_NAME=style_name
148 | ```
149 | 
150 | Note that `STYLE_NAME` should be the filename of the style image (without the extension).
151 | 
152 | Create the GCS bucket if you haven't already:
153 | 
154 | ```
155 | gsutil mb gs://${YOUR_GCS_BUCKET}
156 | ```
157 | 
158 | Copy training data to GCS, pre-trained checkpoints, and style image to:
159 | ```
160 | gsutil cp example/training_images.tfrecord gs://${YOUR_GCS_BUCKET}/data
161 | gsutil cp example/${STYLE_NAME}.jpg gs://${YOUR_GCS_BUCKET}/data/style_images/
162 | gsutil cp example/starry_night_256x256_025.h5 gs://${YOUR_GCS_BUCKET}/data/
163 | ```
164 | 
165 | ## Package up libraries.
166 | 
167 | Zip up all of the local files to send up to Google Cloud.
168 | ```
169 | python setup.py sdist
170 | ```
171 | 
172 | Zip up keras_contrib so it's available to the library as well.
173 | ```
174 | pushd ${KERAS_CONTRIB_PATH}
175 | python setup.py sdist
176 | cp dist/* ${FRITZ_STYLE_TRANSFER_PATH}/dist/
177 | popd
178 | ```
179 | 
180 | ## Start the training job
181 | 
182 | The following command will start training a new style transfer models from a pre-trained checkpoint. This configuration trains on 256x256 images and has `--alpha=0.25` making it suitable for real-time use in mobile apps.
183 | 
184 | ```
185 | gcloud ml-engine jobs submit training `whoami`_style_transfer`date +%s` \
186 |     --runtime-version 1.8 \
187 |     --job-dir=gs://${YOUR_GCS_BUCKET} \
188 |     --packages dist/style_transfer-1.0.tar.gz,dist/keras_contrib-2.0.8.tar.gz \
189 |     --module-name style_transfer.train \
190 |     --region us-east1 \
191 |     --scale-tier basic_gpu \
192 |     -- \
193 |     --training-image-dset gs://${YOUR_GCS_BUCKET}/data/test_training_images.tfrecord \
194 |     --style-images gs://${YOUR_GCS_BUCKET}/data/style_images/${STYLE_NAME}.jpg \
195 |     --model-checkpoint ${STYLE_NAME}_256x256_025.h5 \
196 |     --image-size 256,256 \
197 |     --alpha 0.25 \
198 |     --num-iterations 5000 \
199 |     --batch-size 24 \
200 |     --content-weight 1 \
201 |     --style-weight .0001 \
202 |     --gcs-bucket gs://${YOUR_GCS_BUCKET}/train \
203 |     --fine-tune-checkpoint gs://${YOUR_GCS_BUCKET}/data/starry_night_256x256_025.h5
204 | ```
205 | 
206 | Distributed training and TPUs are not yet supported.
207 | 
208 | # Add the model to your app with Fritz
209 | 
210 | Now that you have a style transfer model that works for both iOS and Android, head over to [https://fritz.ai](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer) for tools to help you integrate it into your app and manage it over time.
211 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/models.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import keras_contrib
  3 | import logging
  4 | 
  5 | from style_transfer import layers
  6 | from style_transfer import utils
  7 | 
  8 | logger = logging.getLogger('models')
  9 | 
 10 | 
 11 | class StyleTransferNetwork(object):
 12 |     """A class that builds a Keras model to perform style transfer.
 13 | 
 14 |     The architecture for this model comes from Johnson et al:
 15 |     https://arxiv.org/abs/1603.08155
 16 |     https://cs.stanford.edu/people/jcjohns/papers/fast-style/fast-style-supp.pdf
 17 | 
 18 |     It differs slightly from Johnson's model by swapping reflective
 19 |     padding with Zero Padding and Batch Normalization for
 20 |     Instance Normalization as recommended in Ulyanov et al:
 21 |     https://arxiv.org/abs/1607.08022
 22 |     """
 23 | 
 24 |     @classmethod
 25 |     def build(
 26 |             cls,
 27 |             image_size,
 28 |             alpha=1.0,
 29 |             input_tensor=None,
 30 |             checkpoint_file=None):
 31 |         """Build a Transfer Network Model using keras' functional API.
 32 | 
 33 |         Args:
 34 |             image_size - the size of the input and output image (H, W)
 35 |             alpha - a width parameter to scale the number of channels by
 36 | 
 37 |         Returns:
 38 |             model: a keras model object
 39 |         """
 40 |         x = keras.layers.Input(
 41 |             shape=(image_size[0], image_size[1], 3), tensor=input_tensor)
 42 |         out = cls._convolution(x, int(alpha * 32), 9, strides=1)
 43 |         out = cls._convolution(out, int(alpha * 64), 3, strides=2)
 44 |         out = cls._convolution(out, int(alpha * 128), 3, strides=2)
 45 |         out = cls._residual_block(out, int(alpha * 128))
 46 |         out = cls._residual_block(out, int(alpha * 128))
 47 |         out = cls._residual_block(out, int(alpha * 128))
 48 |         out = cls._residual_block(out, int(alpha * 128))
 49 |         out = cls._residual_block(out, int(alpha * 128))
 50 |         out = cls._upsample(out, int(alpha * 64), 3)
 51 |         out = cls._upsample(out, int(alpha * 32), 3)
 52 |         # Add a layer of padding to keep sizes consistent.
 53 |         # out = keras.layers.ZeroPadding2D(padding=(1, 1))(out)
 54 |         out = cls._convolution(out, 3, 9, relu=False, padding='same')
 55 |         # Restrict outputs of pixel values to -1 and 1.
 56 |         out = keras.layers.Activation('tanh')(out)
 57 |         # Deprocess the image into valid image data. Note we'll need to define
 58 |         # a custom layer for this in Core ML as well.
 59 |         out = layers.DeprocessStylizedImage()(out)
 60 |         model = keras.models.Model(inputs=x, outputs=out)
 61 | 
 62 |         # Optionally load weights from a checkpoint
 63 |         if checkpoint_file:
 64 |             logger.info(
 65 |                 'Loading weights from checkpoint: %s' % checkpoint_file
 66 |             )
 67 |             if checkpoint_file.startswith('gs://'):
 68 |                 checkpoint_file = utils.copy_file_from_gcs(checkpoint_file)
 69 |             model.load_weights(checkpoint_file, by_name=True)
 70 |         return model
 71 | 
 72 |     @classmethod
 73 |     def _convolution(
 74 |             cls, x, n_filters, kernel_size, strides=1,
 75 |             padding='same', relu=True, use_bias=False):
 76 |         """Create a convolution block.
 77 | 
 78 |         This block consists of a convolution layer, normalization, and an
 79 |         optional RELU activation.
 80 | 
 81 |         Args:
 82 |             x - a keras layer as input
 83 |             n_filters - the number of output dimensions
 84 |             kernel_size - an integer or tuple specifying the (width, height) of
 85 |                          the 2D convolution window
 86 |             strides - An integer or tuple/list of 2 integers, specifying the
 87 |                       strides of the convolution along the width and height.
 88 |                       Default 1.
 89 |             padding: one of "valid" or "same" (case-insensitive).
 90 |             relu - a bool specifying whether or not a RELU activation is
 91 |                    applied. Default True.
 92 |             use_bias = a bool specifying whether or not to use a bias term
 93 |         """
 94 |         out = keras.layers.convolutional.Conv2D(
 95 |             n_filters,
 96 |             kernel_size,
 97 |             strides=strides,
 98 |             padding=padding,
 99 |             use_bias=use_bias
100 |         )(x)
101 | 
102 |         # We are using the keras-contrib library from @farizrahman4u for
103 |         # an implementation of Instance Normalization. Note here that we are
104 |         # specifying the normalization axis to be -1, or the channel axis.
105 |         # By default this is None and simple Batch Normalization is applied.
106 |         out = keras_contrib.layers.normalization.InstanceNormalization(
107 |             axis=-1)(out)
108 |         if relu:
109 |             out = keras.layers.Activation('relu')(out)
110 |         return out
111 | 
112 |     @classmethod
113 |     def _residual_block(cls, x, n_filters, kernel_size=3):
114 |         """Construct a residual block.
115 | 
116 |         Args:
117 |             x - a keras layer as input
118 |             n_filters - the number of output dimensions
119 |             kernel_size - an integer or tuple specifying the (width, height) of
120 |                          the 2D convolution window. Default 3.
121 |         Returns:
122 |             out - a keras layer as output
123 |         """
124 |         # Make sure the layer has the proper size and store a copy of the
125 |         # original, cropped input layer.
126 |         # identity = keras.layers.Cropping2D(cropping=((2, 2), (2, 2)))(x)
127 | 
128 |         out = cls._convolution(x, n_filters, kernel_size, padding='same')
129 |         out = cls._convolution(
130 |             out, n_filters, kernel_size, padding='same', relu=False
131 |         )
132 |         out = keras.layers.Add()([out, x])
133 |         return out
134 | 
135 |     @classmethod
136 |     def _upsample(cls, x, n_filters, kernel_size, size=2):
137 |         """Construct an upsample block.
138 | 
139 |         Args:
140 |             x - a keras layer as input
141 |             n_filters - the number of output dimensions
142 |             kernel_size - an integer or tuple specifying the (width, height) of
143 |                          the 2D convolution window. Default 3.
144 |         Returns:
145 |             out - a keras layer as output
146 |         """
147 |         out = keras.layers.UpSampling2D(size=size)(x)
148 |         # out = keras.layers.ZeroPadding2D(padding=(2, 2))(out)
149 |         out = cls._convolution(out, n_filters, kernel_size, padding='same')
150 |         return out
151 | 
152 | 
153 | class SmallStyleTransferNetwork(StyleTransferNetwork):
154 | 
155 |     @classmethod
156 |     def build(cls, image_size, alpha=1.0, input_tensor=None, checkpoint_file=None):
157 |         """Build a Smaller Transfer Network Model using keras' functional API.
158 | 
159 |         This architecture removes some blocks of layers and reduces the size
160 |         of convolutions to save on computation.
161 | 
162 |         Args:
163 |             image_size - the size of the input and output image (H, W)
164 |             alpha - a width parameter to scale the number of channels by
165 | 
166 |         Returns:
167 |             model: a keras model object
168 |         """
169 |         x = keras.layers.Input(
170 |             shape=(image_size[0], image_size[1], 3), tensor=input_tensor)
171 |         out = cls._convolution(x, int(alpha * 32), 9, strides=1)
172 |         out = cls._convolution(out, int(alpha * 32), 3, strides=2)
173 |         out = cls._convolution(out, int(alpha * 32), 3, strides=2)
174 |         out = cls._residual_block(out, int(alpha * 32))
175 |         out = cls._residual_block(out, int(alpha * 32))
176 |         out = cls._residual_block(out, int(alpha * 32))
177 |         out = cls._upsample(out, int(alpha * 32), 3)
178 |         out = cls._upsample(out, int(alpha * 32), 3)
179 |         out = cls._convolution(out, 3, 9, relu=False, padding='same')
180 |         # Restrict outputs of pixel values to -1 and 1.
181 |         out = keras.layers.Activation('tanh')(out)
182 |         # Deprocess the image into valid image data. Note we'll need to define
183 |         # a custom layer for this in Core ML as well.
184 |         out = layers.DeprocessStylizedImage()(out)
185 |         model = keras.models.Model(inputs=x, outputs=out)
186 | 
187 |         # Optionally load weights from a checkpoint
188 |         if checkpoint_file:
189 |             logger.info(
190 |                 'Loading weights from checkpoint: %s' % checkpoint_file
191 |             )
192 |             if checkpoint_file.startswith('gs://'):
193 |                 checkpoint_file = utils.copy_file_from_gcs(checkpoint_file)
194 |             model.load_weights(checkpoint_file, by_name=True)
195 |         return model
196 | 
197 | 
198 | class IntermediateVGG(object):
199 |     """A VGG network class that allows easy access to intermediate layers.
200 | 
201 |     This class takes the default VGG16 application packaged with Keras and
202 |     constructs a dictionary mapping layer names to layout puts so that
203 |     we can easily extract the network's features at any level. These outputs
204 |     are used to compute losses in artistic style transfer.
205 | 
206 |     """
207 | 
208 |     def __init__(self, prev_layer=None, input_tensor=None):
209 |         """Initialize the model.
210 | 
211 |         Args:
212 |             prev_layer - a keras layer to use as an input layer to the
213 |                          VGG model. This allows us to stitch other models
214 |                          together with the VGG.
215 |             input_tensor - a tensor that will be used as input for the
216 |                           VGG.
217 |         """
218 |         # Create the Keras VGG Model
219 |         self.model = keras.applications.vgg16.VGG16(
220 |             weights='imagenet',
221 |             include_top=False,
222 |             input_tensor=input_tensor
223 |         )
224 | 
225 |         # Make sure none of the VGG layers are trainable
226 |         for layer in self.model.layers:
227 |             layer.trainable = False
228 | 
229 |         # if a previous layer is specified, stitch that layer to the
230 |         # input of the VGG model and rewire the entire model.
231 |         self.layers = {}
232 |         if prev_layer is not None:
233 |             # We need to apply all layers to the output of the style net
234 |             in_layer = prev_layer
235 |             for layer in self.model.layers[1:]:  # Ignore the input layer
236 |                 in_layer = layer(in_layer)
237 |                 self.layers[layer.name] = in_layer
238 |         else:
239 |             self.layers = dict(
240 |                 [(layer.name, layer.output) for layer in self.model.layers]
241 |             )
242 | 


--------------------------------------------------------------------------------
/image_segmentation/create_tfrecord_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | import argparse
 17 | from functools import partial
 18 | import logging
 19 | import os
 20 | import io
 21 | import numpy
 22 | import sys
 23 | 
 24 | import PIL.Image
 25 | import tensorflow as tf
 26 | from tensorflow.python.lib.io import file_io
 27 | 
 28 | from image_segmentation import build_data
 29 | 
 30 | logging.basicConfig(level=logging.INFO)
 31 | logger = logging.getLogger('create_tfrecord_dataset')
 32 | 
 33 | 
 34 | def main(argv):
 35 |     parser = argparse.ArgumentParser(
 36 |         description='Convert the ADE20K Challenge dataset to tfrecords'
 37 |     )
 38 | 
 39 |     parser.add_argument(
 40 |         '-i', '--image-dir', type=str, required=True,
 41 |         help='Folder containing trainng images'
 42 |     )
 43 |     parser.add_argument(
 44 |         '-a', '--annotation-dir', type=str, required=True,
 45 |         help='Folder containing annotations for training images'
 46 |     )
 47 |     parser.add_argument(
 48 |         '-o', '--output', type=str, required=True,
 49 |         help='Path to save converted tfrecord of Tensorflow example'
 50 |     )
 51 |     parser.add_argument(
 52 |         '-l', '--label-filename', type=str, required=True,
 53 |         help='A file containing a single label per line.'
 54 |     )
 55 |     parser.add_argument(
 56 |         '-w', '--whitelist-labels', type=str,
 57 |         help=('A pipe | separated list of object labels to whitelist. '
 58 |               'categories can be merged by seperating them by : '
 59 |               'e.g. "person|car:truck:van|pavement". To see a'
 60 |               ' full list of allowed labels run with  --list-labels.')
 61 |     )
 62 |     parser.add_argument(
 63 |         '-t', '--whitelist-threshold', type=float, default=0.7,
 64 |         help=('The fraction of whitelisted labels an image must contain to be '
 65 |               'used for training.')
 66 |     )
 67 |     parser.add_argument(
 68 |         '--list-labels', action='store_true',
 69 |         help='If true, print a full list of object labels.'
 70 |     )
 71 | 
 72 |     args = parser.parse_args(argv)
 73 | 
 74 |     # Load the class labels
 75 |     class_labels = _load_class_labels(args.label_filename)
 76 |     n_classes = len(class_labels)
 77 |     if args.list_labels:
 78 |         logger.info('Labels:')
 79 |         labels = ''
 80 |         for label in class_labels:
 81 |             labels += '%d, %s\n' % label
 82 |         logger.info(labels)
 83 |         sys.exit()
 84 | 
 85 |     # If a whitelist is provided, get a list of mask indices that correspond
 86 |     # to allowed labels
 87 |     whitelist_labels = None
 88 |     whitelist_indices = None
 89 |     if args.whitelist_labels:
 90 |         whitelist_labels = _parse_whitelist_labels(args.whitelist_labels)
 91 | 
 92 |         # add a 'none' class with a label of 0
 93 |         whitelist_labels.insert(0, ['none'])
 94 |         whitelist_indices = _find_whitelist_indices(
 95 |             class_labels, whitelist_labels)
 96 | 
 97 |         whitelist_filename = os.path.join(
 98 |             os.path.dirname(args.output), 'labels.txt')
 99 |         _save_whitelist_labels(whitelist_filename, whitelist_labels)
100 |         n_classes = len(whitelist_labels)
101 | 
102 |     _create_tfrecord_dataset(
103 |         args.image_dir,
104 |         args.annotation_dir,
105 |         args.output,
106 |         n_classes,
107 |         whitelist_indices=whitelist_indices,
108 |         whitelist_threshold=args.whitelist_threshold
109 |     )
110 | 
111 | 
112 | def _parse_whitelist_labels(whitelist):
113 |     parsed = whitelist.split('|')
114 |     parsed = [category.split(':') for category in parsed]
115 |     return parsed
116 | 
117 | 
118 | def _save_whitelist_labels(whitelist_filename, labels):
119 |     with open(whitelist_filename, 'w') as wfid:
120 |         header = 'idx\tlabel\n'
121 |         wfid.write(header)
122 |         for idx, label_set in enumerate(labels):
123 |             label = label_set[0].split(',')[0]
124 |             wfid.write('%d\t%s\n' % (idx, label))
125 |     print("Saved")
126 | 
127 | 
128 | def _load_class_labels(label_filename):
129 |     """Load class labels.
130 | 
131 |     Assumes the data directory is left unchanged from the original zip.
132 | 
133 |     Args:
134 |         root_directory (str): the dataset's root directory
135 | 
136 |     Returns:
137 |         List[(int, str)]: a list of class ids and labels
138 |     """
139 |     class_labels = []
140 |     header = True
141 |     with file_io.FileIO(label_filename, mode='r') as file:
142 |         for line in file.readlines():
143 |             if header:
144 |                 class_labels.append((0, 'none'))
145 |                 header = False
146 |                 continue
147 |             line = line.rstrip()
148 |             line = line.split('\t')
149 |             label = line[-1]
150 |             label_id = int(line[0])
151 |             class_labels.append((label_id, label))
152 |     return class_labels
153 | 
154 | 
155 | def _find_whitelist_indices(class_labels, whitelist_labels):
156 |     """Map whitelist labels to indices.
157 | 
158 |     Args:
159 |         whitelist (List[str]): a list of whitelisted labels
160 | 
161 |     Returns:
162 |         List[Set]: a list of sets containing index labels
163 |     """
164 |     index = []
165 |     for label_set in whitelist_labels:
166 |         index_set = []
167 |         for label in label_set:
168 |             for class_id, class_label in class_labels:
169 |                 if label == class_label:
170 |                     index_set.append(class_id)
171 |         index.append(index_set)
172 |     return index
173 | 
174 | 
175 | def _filter_whitelabel_classes(
176 |         filenames,
177 |         whitelist,
178 |         whitelist_threshold,
179 |         whitelist_size=None):
180 |     w_size = whitelist_size or len(whitelist)
181 |     mask = numpy.array(PIL.Image.open(filenames[-1]))
182 |     unique_classes = numpy.unique(mask)
183 |     num_found = numpy.intersect1d(unique_classes, whitelist).size
184 |     if float(num_found) / w_size >= whitelist_threshold:
185 |         return True
186 |     return False
187 | 
188 | 
189 | def _relabel_mask(seg_data, whitelist_indices):
190 |     # Read the data into a numpy array.
191 |     mask = numpy.array(PIL.Image.open(io.BytesIO(seg_data)))
192 |     # Relabel each pixel
193 |     new_mask = numpy.zeros(mask.shape)
194 |     for new_label, old_label_set in enumerate(whitelist_indices):
195 |         idx = numpy.where(numpy.isin(mask, old_label_set))
196 |         new_mask[idx] = new_label
197 |     # Convert the new mask back to an image.
198 |     seg_img = PIL.Image.fromarray(new_mask.astype('uint8')).convert('RGB')
199 |     # Save the new image to a PNG byte string.
200 |     byte_buffer = io.BytesIO()
201 |     seg_img.save(byte_buffer, format='png')
202 |     byte_buffer.seek(0)
203 |     return byte_buffer.read()
204 | 
205 | 
206 | def _create_tfrecord_dataset(
207 |         image_dir,
208 |         segmentation_mask_dir,
209 |         output_filename,
210 |         n_classes,
211 |         whitelist_indices=None,
212 |         whitelist_threshold=0.5):
213 |     """Convert the ADE20k dataset into into tfrecord format.
214 | 
215 |     Args:
216 |         dataset_split: Dataset split (e.g., train, val).
217 |         dataset_dir: Dir in which the dataset locates.
218 |         dataset_label_dir: Dir in which the annotations locates.
219 |     Raises:
220 |         RuntimeError: If loaded image and label have different shape.
221 |     """
222 |     # Get all of the image and segmentation mask file names
223 |     img_names = tf.gfile.Glob(os.path.join(image_dir, '*.jpg'))
224 |     seg_names = []
225 |     for f in img_names:
226 |         # get the filename without the extension
227 |         basename = os.path.basename(f).split('.')[0]
228 |         # cover its corresponding *_seg.png
229 |         seg = os.path.join(segmentation_mask_dir, basename + '.png')
230 |         seg_names.append(seg)
231 | 
232 |     # If a whitelist has been provided, loop over all of the segmentation
233 |     # masks and find only the images that contain enough classes.
234 |     kept_files = zip(img_names, seg_names)
235 |     if whitelist_indices is not None:
236 |         # Flatten the whitelist because some categories have been merged
237 |         # but make sure to use the orginal list size when
238 |         # computing the threshold.
239 |         flat_whitelist = numpy.array(
240 |             [idx for idx_set in whitelist_indices for idx in idx_set]
241 |         ).astype('uint8')
242 |         merged_whitelist_size = len(whitelist_indices)
243 |         filter_fn = partial(
244 |             _filter_whitelabel_classes,
245 |             whitelist=flat_whitelist,
246 |             whitelist_threshold=whitelist_threshold,
247 |             whitelist_size=merged_whitelist_size
248 |         )
249 |         kept_files = list(filter(filter_fn, kept_files))
250 |         logger.info(
251 |             'Found %d images after whitelist filtereing.' % len(kept_files))
252 |     num_images = len(kept_files)
253 |     image_reader = build_data.ImageReader('jpeg', channels=3)
254 |     label_reader = build_data.ImageReader('png', channels=1)
255 | 
256 |     with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
257 |         for idx, (image_filename, seg_filename) in enumerate(kept_files):
258 |             if idx % 100 == 0:
259 |                 logger.info('Converting image %d of %d.' % (idx, num_images))
260 |             # Read the image.
261 |             image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
262 |             height, width = image_reader.read_image_dims(image_data)
263 |             # Read the semantic segmentation annotation.
264 |             seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
265 |             # If there is a whitelist, we need to relabel all of the
266 |             # mask classes so that only the whitelisted labels are present.
267 |             if whitelist_indices is not None:
268 |                 seg_data = _relabel_mask(seg_data, whitelist_indices)
269 |             seg_height, seg_width = label_reader.read_image_dims(seg_data)
270 |             if height != seg_height or width != seg_width:
271 |                 raise RuntimeError(
272 |                     'Shape mismatched between image and label.')
273 |             # Convert to tf example.
274 |             example = build_data.image_seg_to_tfexample(
275 |                 image_data, image_filename, height, width, seg_data)
276 |             tfrecord_writer.write(example.SerializeToString())
277 | 
278 | 
279 | if __name__ == '__main__':
280 |     main(sys.argv[1:])
281 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/data_generator.py:
--------------------------------------------------------------------------------
  1 | """Summary.
  2 | 
  3 | Attributes:
  4 |     logger (TYPE): Description
  5 | """
  6 | import logging
  7 | 
  8 | import numpy
  9 | import tensorflow as tf
 10 | from tensorflow.python.lib.io import file_io
 11 | 
 12 | logger = logging.getLogger('data_generator')
 13 | 
 14 | 
 15 | def _gaussian_kernel_3d(sigma, channels=3, size=4.0):
 16 |     radius = sigma * size / 2.0 + 0.5
 17 |     gauss = tf.distributions.Normal(0., sigma)
 18 |     kernel_1d = gauss.prob(
 19 |         tf.range(-radius[0], radius[0] + 1.0, dtype=tf.float32)
 20 |     )
 21 |     kernel_2d = tf.sqrt(tf.einsum('i,j->ij', kernel_1d, kernel_1d))
 22 |     kernel_2d = kernel_2d / tf.reduce_sum(kernel_2d)
 23 |     kernel = tf.expand_dims(kernel_2d, -1)
 24 |     kernel = tf.expand_dims(kernel, -1)
 25 |     kernel = tf.tile(kernel, [1, 1, channels, 1])
 26 |     return kernel
 27 | 
 28 | 
 29 | class ADE20KDatasetBuilder(object):
 30 |     """Create a TFRecord dataset from the ADE20K data."""
 31 | 
 32 |     # Scale and bias parameters to pre-process images so pixel values are
 33 |     # between -0.5 and 0.5
 34 |     _PREPROCESS_IMAGE_SCALE = 1.0 / 255.0
 35 |     _PREPROCESS_CHANNEL_BIAS = -0.5
 36 | 
 37 |     @staticmethod
 38 |     def load_class_labels(label_filename):
 39 |         """Load class labels.
 40 | 
 41 |         Assumes the data directory is left unchanged from the original zip.
 42 | 
 43 |         Args:
 44 |             root_directory (str): the dataset's root directory
 45 | 
 46 |         Returns:
 47 |             arr: an array of class labels
 48 |         """
 49 |         class_labels = []
 50 |         header = True
 51 |         with file_io.FileIO(label_filename, mode='r') as file:
 52 |             for line in file.readlines():
 53 |                 if header:
 54 |                     header = False
 55 |                     continue
 56 |                 line = line.rstrip()
 57 |                 label = line.split('\t')[-1]
 58 |                 class_labels.append(label)
 59 |         return numpy.array(class_labels)
 60 | 
 61 |     @staticmethod
 62 |     def _resize_fn(images, image_size):
 63 |         """Resize an input images..
 64 | 
 65 |         Args:
 66 |             images (tf.tensor): a tensor of input images
 67 |             image_size ((int, int)): a size (H,W) to resize to
 68 | 
 69 |         Returns:
 70 |             tf.tensor: a resized image tensor
 71 |         """
 72 |         return tf.image.resize_images(
 73 |             images,
 74 |             image_size,
 75 |             method=tf.image.ResizeMethod.NEAREST_NEIGHBOR
 76 |         )
 77 | 
 78 |     @classmethod
 79 |     def _preprocess_example(cls, example):
 80 |         """Preprocess an image.
 81 | 
 82 |         Args:
 83 |             example (dict): a single example from the dataset
 84 | 
 85 |         Return:
 86 |             (dict) processed example from the dataset
 87 |         """
 88 |         example['image'] = (tf.cast(example['image'], tf.float32) *
 89 |                             cls._PREPROCESS_IMAGE_SCALE +
 90 |                             cls._PREPROCESS_CHANNEL_BIAS)
 91 |         return example
 92 | 
 93 |     @classmethod
 94 |     def _resize_example(cls, example, image_size):
 95 |         """Resize an image and mask from.
 96 | 
 97 |         Args:
 98 |             example (dict): a single example from the dataset.
 99 |             image_size ((int, int)): the desired size of image and mask
100 | 
101 |         Returns:
102 |             (dict) a single example resized
103 |         """
104 |         return {'image': cls._resize_fn(example['image'], image_size),
105 |                 'mask': cls._resize_fn(example['mask'], image_size)}
106 | 
107 |     @staticmethod
108 |     def _crop_and_resize(image, zoom, image_size):
109 |         """Crop and resize an image.
110 | 
111 |         Uses center cropping.
112 | 
113 |         Args:
114 |             image (tensor): an input image tensor
115 |             zoom (float): a zoom factor
116 |             image_size ((int, int)): a desired output image size
117 | 
118 |         Returns:
119 |             tensor: an outpu timage tensor
120 |         """
121 |         x1 = y1 = 0.5 - 0.5 * zoom  # scale centrally
122 |         x2 = y2 = 0.5 + 0.5 * zoom
123 |         boxes = tf.stack([y1, x1, y2, x2], axis=1)
124 |         box_ind = [0]
125 |         return tf.cast(tf.squeeze(
126 |             tf.image.crop_and_resize(
127 |                 tf.expand_dims(image, 0),
128 |                 boxes,
129 |                 box_ind,
130 |                 image_size,
131 |                 method='nearest'
132 |             )
133 |         ), tf.uint8)
134 | 
135 |     @staticmethod
136 |     def _blur(image, sigma):
137 |         kernel = _gaussian_kernel_3d(sigma)
138 |         # all preprocessing should run on the CPU
139 |         with tf.device('/cpu:0'):
140 |             blurred_image = tf.nn.depthwise_conv2d(
141 |                 tf.cast(tf.expand_dims(image, 0), tf.float32),
142 |                 kernel,
143 |                 [1, 1, 1, 1],
144 |                 padding='SAME',
145 |                 data_format="NHWC"
146 |             )
147 |         return blurred_image[0]
148 | 
149 |     @classmethod
150 |     def _augment_example(cls, example):
151 |         """Augment an example from the dataset.
152 | 
153 |         All augmentation functions are also be applied to the segmentation
154 |         mask.
155 | 
156 |         Args:
157 |             example (dict): a single example from the dataset.
158 | 
159 |         Returns:
160 |             dict: an augmented example
161 |         """
162 |         image = example['image']
163 |         mask = example['mask']
164 | 
165 |         image_size = image.shape.as_list()[0:2]
166 | 
167 |         # Add padding so we don't get black borders
168 |         paddings = numpy.array(
169 |             [[image_size[0] / 2, image_size[0] / 2],
170 |              [image_size[1] / 2, image_size[1] / 2],
171 |              [0, 0]], dtype=numpy.uint32)
172 |         aug_image = tf.pad(image, paddings, mode='REFLECT')
173 |         aug_mask = tf.pad(mask, paddings, mode='REFLECT')
174 |         padded_image_size = [dim * 2 for dim in image_size]
175 | 
176 |         # Rotate
177 |         angle = tf.random_uniform([1], -numpy.pi / 6, numpy.pi / 6)
178 |         aug_image = tf.contrib.image.rotate(aug_image, angle)
179 |         aug_mask = tf.contrib.image.rotate(aug_mask, angle)
180 | 
181 |         # Zoom
182 |         zoom = tf.random_uniform([1], 0.85, 1.75)
183 |         aug_image = cls._crop_and_resize(aug_image, zoom, padded_image_size)
184 |         aug_mask = cls._crop_and_resize(aug_mask, zoom, padded_image_size)
185 | 
186 |         # Crop things back to original size
187 |         aug_image = tf.image.central_crop(aug_image, central_fraction=0.5)
188 |         aug_mask = tf.image.central_crop(aug_mask, central_fraction=0.5)
189 | 
190 |         # blur
191 |         # Not used at the moment because it makes training hard
192 |         # sigma = tf.random_uniform([1], 0.0, 1.0)
193 |         # aug_image = cls._blur(aug_image, sigma)
194 | 
195 |         # Flip left right
196 |         do_flip = tf.greater(tf.random_uniform([1], 0.0, 1.0)[0], 0.5)
197 |         aug_image = tf.cond(
198 |             do_flip,
199 |             true_fn=lambda: tf.image.flip_left_right(aug_image),
200 |             false_fn=lambda: aug_image,
201 |         )
202 |         aug_mask = tf.cond(
203 |             do_flip,
204 |             true_fn=lambda: tf.image.flip_left_right(aug_mask),
205 |             false_fn=lambda: aug_mask,
206 |         )
207 | 
208 |         # Flip up down
209 |         do_flip = tf.greater(tf.random_uniform([1], 0.0, 1.0)[0], 0.5)
210 |         aug_image = tf.cond(
211 |             do_flip,
212 |             true_fn=lambda: tf.image.flip_up_down(aug_image),
213 |             false_fn=lambda: aug_image,
214 |         )
215 |         aug_mask = tf.cond(
216 |             do_flip,
217 |             true_fn=lambda: tf.image.flip_up_down(aug_mask),
218 |             false_fn=lambda: aug_mask,
219 |         )
220 | 
221 |         return {'image': aug_image, 'mask': aug_mask}
222 | 
223 |     @staticmethod
224 |     def _decode_example(example_proto):
225 |         """Decode an example from a TFRecord.
226 | 
227 |         Args:
228 |             example_proto (tfrecord): a serialized tf record
229 | 
230 |         Returns:
231 |             dict: an example from the dataset containing image and mask.
232 |         """
233 |         features = {
234 |             "image/encoded": tf.FixedLenFeature(
235 |                 (), tf.string, default_value=""
236 |             ),
237 |             "image/segmentation/class/encoded": tf.FixedLenFeature(
238 |                 (), tf.string, default_value=""
239 |             )
240 |         }
241 |         parsed_features = tf.parse_single_example(example_proto, features)
242 |         image = tf.image.decode_jpeg(
243 |             parsed_features["image/encoded"], channels=3)
244 |         mask = tf.image.decode_png(
245 |             parsed_features["image/segmentation/class/encoded"], channels=3)
246 |         return {'image': image, 'mask': mask}
247 | 
248 |     @classmethod
249 |     def _generate_multiscale_masks(cls, example, n_classes):
250 |         """Generate masks at mulitple scales for training.
251 | 
252 |         The loss function compares masks at 4, 8, and 16x increases in scale.
253 | 
254 |         Args:
255 |             example (dict): a single example from the dataset
256 |             n_classes (int): the number of classes in the mask
257 | 
258 |         Returns
259 |             (dict): the same example, but with additional mask data for each
260 |                 new resolution.
261 |         """
262 |         original_mask = example['mask']
263 |         # Add the image to the placeholder
264 |         image_size = example['image'].shape.as_list()[0:2]
265 | 
266 |         for scale in [4, 8, 16]:
267 |             example['mask_%d' % scale] = tf.one_hot(
268 |                 cls._resize_fn(
269 |                     original_mask,
270 |                     list(map(lambda x: x // scale, image_size))
271 |                 )[:, :, 0],  # only need one channel
272 |                 depth=n_classes,
273 |                 dtype=tf.float32
274 |             )
275 |         return example
276 | 
277 |     @classmethod
278 |     def scale_mask(cls, mask, scale, image_size, n_classes):
279 |         return tf.one_hot(
280 |             cls._resize_fn(
281 |                 mask,
282 |                 image_size,
283 |             )[:, :, :, 0],  # only need one channel
284 |             depth=n_classes,
285 |             dtype=tf.float32
286 |         )
287 | 
288 |     @classmethod
289 |     def build(
290 |             cls,
291 |             filename,
292 |             batch_size,
293 |             image_size,
294 |             n_classes,
295 |             augment_images=True,
296 |             repeat=True,
297 |             prefetch=False,
298 |             parallel_calls=1):
299 |         """Build a TFRecord dataset.
300 | 
301 |         Args:
302 |             filename (str): a .tfrecord file to read
303 |             batch_size (int): batch size
304 |             image_size (int): the desired image size of examples
305 |             n_classes (int): the number of classes
306 |             whitelist_threshold (float): the minimum fraction of whitelisted
307 |                 classes an example must contain to be used for training.
308 | 
309 |         Returns:
310 |             dataset: a TFRecordDataset
311 |         """
312 |         logger.info('Creating dataset from: %s' % filename)
313 |         dataset = tf.data.TFRecordDataset(filename)
314 |         dataset = dataset.map(cls._decode_example,
315 |                               num_parallel_calls=parallel_calls)
316 |         dataset = dataset.map(lambda x: cls._resize_example(x, image_size),
317 |                               num_parallel_calls=parallel_calls)
318 |         if augment_images:
319 |             dataset = dataset.map(cls._augment_example,
320 |                                   num_parallel_calls=parallel_calls)
321 |         dataset = dataset.map(cls._preprocess_example,
322 |                               num_parallel_calls=parallel_calls)
323 |         dataset = dataset.map(
324 |             lambda x: cls._generate_multiscale_masks(x, n_classes),
325 |             num_parallel_calls=parallel_calls
326 |         )
327 |         if repeat:
328 |             dataset = dataset.repeat()
329 | 
330 |         dataset = dataset.batch(batch_size)
331 |         if prefetch:
332 |             dataset = dataset.prefetch(buffer_size=batch_size)
333 |         return dataset
334 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/train.py:
--------------------------------------------------------------------------------
  1 | """Train an ICNet Model on ADE20K Data."""
  2 | 
  3 | import argparse
  4 | import keras
  5 | import logging
  6 | import time
  7 | import sys
  8 | import struct
  9 | import os
 10 | from tensorflow.python.lib.io import file_io
 11 | import tensorflow as tf
 12 | from image_segmentation.icnet import ICNetModelFactory
 13 | from image_segmentation.data_generator import ADE20KDatasetBuilder
 14 | from image_segmentation import dali_config
 15 | from google.cloud import storage
 16 | 
 17 | logging.basicConfig(level=logging.INFO)
 18 | logger = logging.getLogger('train')
 19 | 
 20 | 
 21 | def _summarize_arguments(args):
 22 |     """Summarize input arguments to ICNet model training.
 23 | 
 24 |     Args:
 25 |         args:
 26 |     """
 27 | 
 28 |     logger.info('ICNet Model training Parameters')
 29 |     logger.info('-------------------------------')
 30 |     for key, value in vars(args).items():
 31 |         logger.info('    {key}={value}'.format(key=key, value=value))
 32 | 
 33 | 
 34 | def _build_parser(argv):
 35 |     parser = argparse.ArgumentParser(
 36 |         description='Train an ICNet model.'
 37 |     )
 38 |     # Data options
 39 |     parser.add_argument(
 40 |         '-d', '--data', nargs='+', required=True,
 41 |         help='A TFRecord file containing images and segmentation masks.'
 42 |     )
 43 |     parser.add_argument(
 44 |         '--tfindex-files', nargs='+',
 45 |         help='TFIndex file for dali pipeline. If not included, will be built'
 46 |     )
 47 |     parser.add_argument(
 48 |         '-l', '--label-filename', type=str, required=True,
 49 |         help='A file containing a single label per line.'
 50 |     )
 51 |     parser.add_argument(
 52 |         '-s', '--image-size', type=int, default=768,
 53 |         help=('The pixel dimension of model input and output. Images '
 54 |               'will be square.')
 55 |     )
 56 |     parser.add_argument(
 57 |         '-a', '--alpha', type=float, default=1.0,
 58 |         help='The width multiplier for the network'
 59 |     )
 60 |     parser.add_argument(
 61 |         '--augment-images', type=bool, default=True,
 62 |         help='turn on image augmentation.'
 63 |     )
 64 |     parser.add_argument(
 65 |         '--add-noise', action='store_true',
 66 |         help='Add gaussian noise to training.'
 67 |     )
 68 |     parser.add_argument(
 69 |         '--use-dali', action='store_true',
 70 |         help='turn on image augmentation.'
 71 |     )
 72 |     parser.add_argument(
 73 |         '--list-labels', action='store_true',
 74 |         help='If true, print a full list of object labels.'
 75 |     )
 76 |     # Training options
 77 |     parser.add_argument(
 78 |         '-b', '--batch-size', type=int, default=8,
 79 |         help='The training batch_size.'
 80 |     )
 81 |     parser.add_argument(
 82 |         '--lr', type=float, default=0.001, help='The learning rate.'
 83 |     )
 84 |     parser.add_argument(
 85 |         '-n', '--num-steps', type=int, default=1000,
 86 |         help='Number of training steps to perform'
 87 |     )
 88 |     parser.add_argument(
 89 |         '--steps-per-epoch', type=int, default=100,
 90 |         help='Number of training steps to perform between model checkpoints'
 91 |     )
 92 |     parser.add_argument(
 93 |         '-o', '--output',
 94 |         help='An output file to save the trained model.')
 95 |     parser.add_argument(
 96 |         '--gpu-cores', type=int, default=1,
 97 |         help='Number of GPU cores to run on.')
 98 |     parser.add_argument(
 99 |         '--fine-tune-checkpoint', type=str,
100 |         help='A Keras model checkpoint to load and continue training.'
101 |     )
102 |     parser.add_argument(
103 |         '--gcs-bucket', type=str,
104 |         help='A GCS Bucket to save models too.'
105 |     )
106 |     parser.add_argument(
107 |         '--parallel-calls', type=int, default=1,
108 |         help='Number of parallel calss to preprocessing to perform.'
109 |     )
110 |     parser.add_argument(
111 |         '--model-name', type=str, required=True,
112 |         help='Short name separated by underscores'
113 |     )
114 | 
115 |     return parser.parse_known_args()
116 | 
117 | 
118 | def _prepare_dataset(args, n_classes):
119 |     dataset = ADE20KDatasetBuilder.build(
120 |         args.data,
121 |         n_classes=n_classes,
122 |         batch_size=args.batch_size,
123 |         image_size=(args.image_size, args.image_size),
124 |         augment_images=False,
125 |         parallel_calls=args.parallel_calls,
126 |         prefetch=True,
127 |     )
128 | 
129 |     iterator = dataset.make_one_shot_iterator()
130 |     example = iterator.get_next()
131 | 
132 |     return {
133 |         'input': example['image'],
134 |         'mask_4': example['mask_4'],
135 |         'mask_8': example['mask_8'],
136 |         'mask_16': example['mask_16'],
137 |     }
138 | 
139 | 
140 | def build_tfindex_file(tfrecord_file, tfindex_file):
141 |     """Builds a tfindex file used by DALI from a tfrecord file.
142 | 
143 |     Args:
144 |         tfrecord_file: Path to TFRecord file.
145 |         tfindex_file: output file to write to.
146 |     """
147 |     tfrecord_fp = open(tfrecord_file, 'rb')
148 |     idx_fp = open(tfindex_file, 'w')
149 | 
150 |     while True:
151 |         current = tfrecord_fp.tell()
152 |         try:
153 |             # length
154 |             byte_len = tfrecord_fp.read(8)
155 |             if byte_len == '':
156 |                 break
157 |             # crc
158 |             tfrecord_fp.read(4)
159 |             proto_len = struct.unpack('q', byte_len)[0]
160 |             # proto
161 |             tfrecord_fp.read(proto_len)
162 |             # crc
163 |             tfrecord_fp.read(4)
164 |             idx_fp.write(str(current) + ' ' +
165 |                          str(tfrecord_fp.tell() - current) + '\n')
166 |         except Exception:
167 |             print("Not a valid TFRecord file")
168 |             break
169 | 
170 |     tfrecord_fp.close()
171 |     idx_fp.close()
172 | 
173 | 
174 | def _prepare_dali(args, n_classes):
175 |     if args.gpu_cores > 1:
176 |         logger.error(
177 |             'Have not built in support for more than one GPU at the moment.'
178 |         )
179 |         sys.exit(1)
180 | 
181 |     # non NVIDIA cloud environments will not have dali, so we
182 |     # have to do the import here.
183 |     from image_segmentation.dali_pipeline import CommonPipeline
184 |     import nvidia.dali.plugin.tf as dali_tf
185 | 
186 |     batch_size = args.batch_size
187 |     image_size = args.image_size
188 |     device_id = 0
189 |     storage_client = storage.Client()
190 |     filenames = []
191 | 
192 |     for filename in args.data:
193 |         if filename.startswith('gs://'):
194 |             parts = filename[5:].split('/')
195 |             bucket_name, blob_name = parts[0], '/'.join(parts[1:])
196 |             bucket = storage_client.get_bucket(bucket_name)
197 |             blob = bucket.blob(blob_name)
198 |             download_filename = os.path.basename(blob_name)
199 |             blob.download_to_filename(download_filename)
200 |             filenames.append(download_filename)
201 |         else:
202 |             filenames.append(filename)
203 | 
204 |     tfindex_files = args.tfindex_files or []
205 |     if not tfindex_files:
206 |         for path in filenames:
207 |             tfindex_file = path.split('.')[0] + '.tfindex'
208 |             build_tfindex_file(path, tfindex_file)
209 |             logger.info('Created tfindex file: {input} -> {output}'.format(
210 |                 input=path,
211 |                 output=tfindex_file
212 |             ))
213 |             tfindex_files.append(tfindex_file)
214 | 
215 |     config = dali_config.DaliConfig()
216 |     config.summarize()
217 | 
218 |     pipe = CommonPipeline(
219 |         args.batch_size,
220 |         args.parallel_calls,
221 |         device_id,
222 |         args.image_size,
223 |         filenames,
224 |         tfindex_files,
225 |         config
226 |     )
227 |     pipe.build()
228 | 
229 |     daliop = dali_tf.DALIIterator()
230 |     with tf.device('/gpu:0'):
231 |         results = daliop(
232 |             serialized_pipeline=pipe.serialize(),
233 |             shape=[args.batch_size, args.image_size, args.image_size, 3],
234 |             label_type=tf.int64,
235 |         )
236 | 
237 |     input_tensor = results.batch
238 | 
239 |     results.label.set_shape([batch_size, image_size, image_size, 3])
240 |     mask = results.label
241 |     new_shape = [image_size / 4, image_size / 4]
242 |     mask_4 = ADE20KDatasetBuilder.scale_mask(mask, 4, new_shape, n_classes)
243 |     new_shape = [image_size / 8, image_size / 8]
244 |     mask_8 = ADE20KDatasetBuilder.scale_mask(mask, 8, new_shape, n_classes)
245 |     new_shape = [image_size / 16, image_size / 16]
246 |     mask_16 = ADE20KDatasetBuilder.scale_mask(mask, 16, new_shape, n_classes)
247 | 
248 |     return {
249 |         'input': input_tensor,
250 |         'mask_4': mask_4,
251 |         'mask_8': mask_8,
252 |         'mask_16': mask_16,
253 |     }
254 | 
255 | 
256 | def train(argv):
257 |     """Train an ICNet model."""
258 | 
259 |     args, unknown = _build_parser(argv)
260 |     _summarize_arguments(args)
261 | 
262 |     class_labels = ADE20KDatasetBuilder.load_class_labels(
263 |         args.label_filename)
264 |     if args.list_labels:
265 |         logger.info('Labels:')
266 |         labels = ''
267 |         for label in class_labels:
268 |             labels += '%s\n' % label
269 |         logger.info(labels)
270 |         sys.exit()
271 | 
272 |     n_classes = len(class_labels)
273 | 
274 |     if args.use_dali:
275 |         data = _prepare_dali(args, n_classes)
276 |     else:
277 |         data = _prepare_dataset(args, n_classes)
278 | 
279 |     if args.add_noise:
280 |         logger.info('Adding gaussian noise to input tensor.')
281 |         noise = tf.random_normal(shape=tf.shape(data['input']),
282 |                                  mean=0.0,
283 |                                  stddev=0.07,
284 |                                  dtype=tf.float32)
285 |         data['input'] = data['input'] + noise
286 | 
287 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
288 |     config = tf.ConfigProto(gpu_options=gpu_options)
289 |     sess = tf.Session(config=config)
290 |     keras.backend.set_session(sess)
291 | 
292 |     if args.gpu_cores > 1:
293 |         with tf.device('/CPU:0'):
294 |             icnet = ICNetModelFactory.build(
295 |                 args.image_size,
296 |                 n_classes,
297 |                 weights_path=args.fine_tune_checkpoint,
298 |                 train=True,
299 |                 input_tensor=data['input'],
300 |                 alpha=args.alpha,
301 |             )
302 | 
303 |         gpu_icnet = keras.utils.multi_gpu_model(icnet, gpus=args.cores)
304 |         gpu_icnet.__setattr__('callback_model', icnet)
305 |         model = gpu_icnet
306 |     else:
307 |         with tf.device('/GPU:0'):
308 |             model = ICNetModelFactory.build(
309 |                 args.image_size,
310 |                 n_classes,
311 |                 weights_path=args.fine_tune_checkpoint,
312 |                 train=True,
313 |                 input_tensor=data['input'],
314 |                 alpha=args.alpha,
315 |             )
316 | 
317 |     optimizer = keras.optimizers.Adam(lr=args.lr)
318 |     model.compile(
319 |         optimizer,
320 |         loss=keras.losses.categorical_crossentropy,
321 |         loss_weights=[1.0, 0.4, 0.16],
322 |         metrics=['categorical_accuracy'],
323 |         target_tensors=[
324 |             data['mask_4'], data['mask_8'], data['mask_16']
325 |         ]
326 |     )
327 | 
328 |     if not args.output:
329 |         output_filename_fmt = '{model_name}_{size}x{size}_{alpha}_{time}.h5'
330 |         filename = output_filename_fmt.format(
331 |             model_name=args.model_name,
332 |             size=args.image_size,
333 |             alpha=str(args.alpha).replace('0', '').replace('.', ''),
334 |             time=int(time.time())
335 |         )
336 |     else:
337 |         filename = args.output
338 | 
339 |     print("=======================")
340 |     print("Output file name: {name}".format(name=filename))
341 |     print("=======================")
342 | 
343 |     callbacks = [
344 |         keras.callbacks.ModelCheckpoint(
345 |             filename,
346 |             verbose=0,
347 |             mode='auto',
348 |             period=1
349 |         ),
350 |     ]
351 | 
352 |     if args.gcs_bucket:
353 |         callbacks.append(SaveCheckpointToGCS(filename, args.gcs_bucket))
354 | 
355 |     model.fit(
356 |         steps_per_epoch=args.steps_per_epoch,
357 |         epochs=int(args.num_steps / args.steps_per_epoch) + 1,
358 |         callbacks=callbacks,
359 |     )
360 | 
361 | 
362 | class SaveCheckpointToGCS(keras.callbacks.Callback):
363 |     """A callback to save local model checkpoints to GCS."""
364 | 
365 |     def __init__(self, local_filename, gcs_filename):
366 |         """Save a checkpoint to GCS.
367 | 
368 |         Args:
369 |             local_filename (str): the path of the local checkpoint
370 |             gcs_filename (str): the GCS bucket to save the model to
371 |         """
372 |         self.gcs_filename = gcs_filename
373 |         self.local_filename = local_filename
374 | 
375 |     @staticmethod
376 |     def _copy_file_to_gcs(job_dir, file_path):
377 |         gcs_url = os.path.join(job_dir, file_path)
378 |         logger.info('Saving models to GCS: %s' % gcs_url)
379 |         with file_io.FileIO(file_path, mode='rb') as input_f:
380 |             with file_io.FileIO(gcs_url, mode='w+') as output_f:
381 |                 output_f.write(input_f.read())
382 | 
383 |     def on_epoch_end(self, epoch, logs={}):
384 |         """Save model to GCS on epoch end.
385 | 
386 |         Args:
387 |             epoch (int): the epoch number
388 |             logs (dict, optional): logs dict
389 |         """
390 |         basename = os.path.basename(self.local_filename)
391 |         self._copy_file_to_gcs(self.gcs_filename, basename)
392 | 
393 | 
394 | if __name__ == '__main__':
395 |     train(sys.argv[1:])
396 | 


--------------------------------------------------------------------------------
/style_transfer/style_transfer/fritz_coreml_converter.py:
--------------------------------------------------------------------------------
  1 | import coremltools
  2 | from coremltools.converters.keras._keras2_converter import * 
  3 | from coremltools.converters.keras._keras2_converter import _KERAS_LAYER_REGISTRY
  4 | from coremltools.converters.keras import _topology2
  5 | from coremltools.converters.keras._topology2 import _KERAS_SKIP_LAYERS
  6 | from coremltools.models.neural_network import NeuralNetworkBuilder as _NeuralNetworkBuilder
  7 | from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2
  8 | from collections import OrderedDict as _OrderedDict
  9 | from coremltools.models import datatypes
 10 | from coremltools.models import MLModel as _MLModel
 11 | from coremltools.models.utils import save_spec as _save_spec
 12 | import keras as _keras
 13 | from coremltools._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF
 14 | import PIL.Image
 15 | from six import string_types
 16 | from coremltools.proto import FeatureTypes_pb2 as ft
 17 | 
 18 | _IMAGE_SUFFIX = '_image'
 19 | 
 20 | 
 21 | class FritzCoremlConverter(object):
 22 |     """A class to convert keras models to coreml.
 23 | 
 24 |     This is converter is a modified version of the one that comes packaged with
 25 |     coremltools, but it allows the user to define custom layer mappings from
 26 |     keras to coreml.
 27 |     """
 28 | 
 29 |     @classmethod
 30 |     def _check_unsupported_layers(cls, model, supported_layers):
 31 |         """Check for any unsupported layers in the keras model.
 32 | 
 33 |         Args:
 34 |             model - a keras model
 35 |             supported_layers - a dictionary of supported layers. Keys are keras
 36 |                                layer classes and values are corresponding
 37 |                                coreml layer classes.
 38 |         """
 39 |         for i, layer in enumerate(model.layers):
 40 |             if (isinstance(layer, _keras.models.Sequential) or
 41 |                     isinstance(layer, _keras.models.Model)):
 42 |                 cls._check_unsupported_layers(layer)
 43 |             else:
 44 |                 if type(layer) not in supported_layers:
 45 |                     print(supported_layers)
 46 |                     raise ValueError(
 47 |                         "Keras layer '%s' not supported. " % str(type(layer))
 48 |                     )
 49 |                 if isinstance(layer, _keras.layers.wrappers.TimeDistributed):
 50 |                     if type(layer.layer) not in supported_layers:
 51 |                         raise ValueError(
 52 |                             "Keras layer '%s' not supported. " %
 53 |                             str(type(layer.layer))
 54 |                         )
 55 |                 if isinstance(layer, _keras.layers.wrappers.Bidirectional):
 56 |                     if not isinstance(layer.layer,
 57 |                                       _keras.layers.recurrent.LSTM):
 58 |                         raise ValueError(
 59 |                             'Keras bi-directional wrapper conversion supports '
 60 |                             'only LSTM layer at this time. ')
 61 | 
 62 |     @staticmethod
 63 |     def _get_layer_converter_fn(layer, supported_layers):
 64 |         """Get the right converter function for Keras.
 65 | 
 66 |         Args:
 67 |             layer - a keras layer
 68 |             supported_layers - a dictionary of supported layers. Keys are keras
 69 |                                layer classes and values are corresponding
 70 |                                coreml layer classes.
 71 |         Returns:
 72 |             layer - a coreml layer
 73 |         """
 74 |         layer_type = type(layer)
 75 |         if layer_type in supported_layers:
 76 |             return supported_layers[layer_type]
 77 |         else:
 78 |             raise TypeError(
 79 |                 "Keras layer of type %s is not supported." % type(layer)
 80 |             )
 81 | 
 82 |     @staticmethod
 83 |     def _convert_multiarray_output_to_image(spec, feature_name, is_bgr=False):
 84 |         """Convert Core ML multiarray output to an image output.
 85 | 
 86 |         This modifies the core ml spec in place.
 87 | 
 88 |         spec - a Core ML spec protobuf object.
 89 |         feature_name - the name of the output feature to convert
 90 |         is_bgr - if true, assume image data is already in BGR mode.
 91 |                  Default False
 92 |         """
 93 |         for output in spec.description.output:
 94 |             if output.name != feature_name:
 95 |                 continue
 96 |             if output.type.WhichOneof('Type') != 'multiArrayType':
 97 |                 raise ValueError(
 98 |                     "{} is not a multiarray type".format(output.name,)
 99 |                 )
100 |             array_shape = tuple(output.type.multiArrayType.shape)
101 |             if len(array_shape) == 2:
102 |                 height, width = array_shape
103 |                 output.type.imageType.colorSpace = \
104 |                     ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE')
105 |             else:
106 |                 channels, height, width = array_shape
107 | 
108 |                 if channels == 1:
109 |                     output.type.imageType.colorSpace = \
110 |                         ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE')
111 |                 elif channels == 3:
112 |                     if is_bgr:
113 |                         output.type.imageType.colorSpace = \
114 |                             ft.ImageFeatureType.ColorSpace.Value('BGR')
115 |                     else:
116 |                         output.type.imageType.colorSpace = \
117 |                             ft.ImageFeatureType.ColorSpace.Value('RGB')
118 |                 else:
119 |                     raise ValueError(
120 |                         "Channel Value {} not supported for image inputs"
121 |                         .format(channels,)
122 |                     )
123 | 
124 |             output.type.imageType.width = width
125 |             output.type.imageType.height = height
126 | 
127 |     @classmethod
128 |     def convert_keras(
129 |             cls,
130 |             model,
131 |             input_names=None,
132 |             output_names=None,
133 |             image_input_names=[],
134 |             image_output_names=[],
135 |             deprocessing_args={},
136 |             is_bgr=False,
137 |             is_grayscale=False,
138 |             red_bias=0.0,
139 |             green_bias=0.0,
140 |             blue_bias=0.0,
141 |             gray_bias=0.0,
142 |             image_scale=1.0,
143 |             class_labels=None,
144 |             predicted_feature_name=None,
145 |             custom_layers=None):
146 |         """
147 |         Convert a Keras model to a Core ML Model.
148 | 
149 |         model - a Keras model to convert
150 |         input_names - names of input layers. Default None
151 |         output_names - names of output layers. Default None
152 |         image_input_names - a list of input names that are image datatypes
153 |         image_output_names - a list of output names that are image datatypes
154 |         preprocessing_args - a dictionary of arguments for input preprocessing
155 |         class_labels - Class labels for outputs,
156 |         predicted_feature_name - name for predicted features,
157 |         custom_layers - a dictionary of custom layer conversions. Keys are
158 |                         Keras layer classes, values are coreml layer functions
159 | 
160 |         Returns:
161 |             mlmodel - a coreml model object.
162 |         """
163 |         if isinstance(model, string_types):
164 |             model = _keras.models.load_model(model)
165 |         elif isinstance(model, tuple):
166 |             model = _load_keras_model(model[0], model[1])
167 | 
168 |         # Merge the custom layers with the Keras layer registry
169 |         supported_layers = {}
170 |         supported_layers.update(_KERAS_LAYER_REGISTRY)
171 |         if custom_layers:
172 |             supported_layers.update(custom_layers)
173 | 
174 |         # Check valid versions
175 |         cls._check_unsupported_layers(model, supported_layers)
176 | 
177 |         # Build network graph to represent Keras model
178 |         graph = _topology2.NetGraph(model)
179 |         graph.build()
180 |         graph.remove_skip_layers(_KERAS_SKIP_LAYERS)
181 |         graph.insert_1d_permute_layers()
182 |         graph.insert_permute_for_spatial_bn()
183 |         graph.defuse_activation()
184 |         graph.remove_internal_input_layers()
185 |         graph.make_output_layers()
186 | 
187 |         # The graph should be finalized before executing this
188 |         graph.generate_blob_names()
189 |         graph.add_recurrent_optionals()
190 | 
191 |         inputs = graph.get_input_layers()
192 |         outputs = graph.get_output_layers()
193 | 
194 |         # check input / output names validity
195 |         if input_names is not None:
196 |             if isinstance(input_names, string_types):
197 |                 input_names = [input_names]
198 |         else:
199 |             input_names = ['input' + str(i + 1) for i in range(len(inputs))]
200 |         if output_names is not None:
201 |             if isinstance(output_names, string_types):
202 |                 output_names = [output_names]
203 |         else:
204 |             output_names = ['output' + str(i + 1) for i in range(len(outputs))]
205 | 
206 |         if (image_input_names is not None and
207 |                 isinstance(image_input_names, string_types)):
208 |             image_input_names = [image_input_names]
209 | 
210 |         graph.reset_model_input_names(input_names)
211 |         graph.reset_model_output_names(output_names)
212 | 
213 |         # Keras -> Core ML input dimension dictionary
214 |         # (None, None) -> [1, 1, 1, 1, 1]
215 |         # (None, D) -> [D] or [D, 1, 1, 1, 1]
216 |         # (None, Seq, D) -> [Seq, 1, D, 1, 1]
217 |         # (None, H, W, C) -> [C, H, W]
218 |         # (D) -> [D]
219 |         # (Seq, D) -> [Seq, 1, 1, D, 1]
220 |         # (Batch, Sequence, D) -> [D]
221 | 
222 |         # Retrieve input shapes from model
223 |         if type(model.input_shape) is list:
224 |             input_dims = [filter(None, x) for x in model.input_shape]
225 |             unfiltered_shapes = model.input_shape
226 |         else:
227 |             input_dims = [filter(None, model.input_shape)]
228 |             unfiltered_shapes = [model.input_shape]
229 | 
230 |         for idx, dim in enumerate(input_dims):
231 |             unfiltered_shape = unfiltered_shapes[idx]
232 |             dim = list(dim)
233 |             if len(dim) == 0:
234 |                 # Used to be [None, None] before filtering; indicating
235 |                 # unknown sequence length
236 |                 input_dims[idx] = tuple([1])
237 |             elif len(dim) == 1:
238 |                 s = graph.get_successors(inputs[idx])[0]
239 |                 if isinstance(graph.get_keras_layer(s),
240 |                               _keras.layers.embeddings.Embedding):
241 |                     # Embedding layer's special input (None, D) where D is
242 |                     # actually sequence length
243 |                     input_dims[idx] = (1,)
244 |                 else:
245 |                     input_dims[idx] = dim  # dim is just a number
246 |             elif len(dim) == 2:  # [Seq, D]
247 |                 input_dims[idx] = (dim[1],)
248 |             elif len(dim) == 3:  # H,W,C
249 |                 if (len(unfiltered_shape) > 3):
250 |                     # keras uses the reverse notation from us
251 |                     input_dims[idx] = (dim[2], dim[0], dim[1])
252 |                 else:
253 |                     # keras provided fixed batch and sequence length, so
254 |                     # the input was (batch, sequence, channel)
255 |                     input_dims[idx] = (dim[2],)
256 |             else:
257 |                 raise ValueError(
258 |                     'Input' + input_names[idx] + 'has input shape of length' +
259 |                     str(len(dim)))
260 | 
261 |         # Retrieve output shapes from model
262 |         if type(model.output_shape) is list:
263 |             output_dims = [filter(None, x) for x in model.output_shape]
264 |         else:
265 |             output_dims = [filter(None, model.output_shape[1:])]
266 | 
267 |         for idx, dim in enumerate(output_dims):
268 |             dim = list(dim)
269 |             if len(dim) == 1:
270 |                 output_dims[idx] = dim
271 |             elif len(dim) == 2:  # [Seq, D]
272 |                 output_dims[idx] = (dim[1],)
273 |             elif len(dim) == 3:
274 |                 output_dims[idx] = (dim[2], dim[0], dim[1])
275 | 
276 |             input_types = [datatypes.Array(*dim) for dim in input_dims]
277 |             output_types = [datatypes.Array(*dim) for dim in output_dims]
278 | 
279 |             # Some of the feature handling is sensitive about string vs unicode
280 |             input_names = map(str, input_names)
281 |             output_names = map(str, output_names)
282 |             is_classifier = class_labels is not None
283 |             if is_classifier:
284 |                 mode = 'classifier'
285 |             else:
286 |                 mode = None
287 | 
288 |             # assuming these match
289 |             input_features = list(zip(input_names, input_types))
290 |             output_features = list(zip(output_names, output_types))
291 | 
292 |             builder = _NeuralNetworkBuilder(
293 |                 input_features, output_features, mode=mode
294 |             )
295 | 
296 |         for iter, layer in enumerate(graph.layer_list):
297 |             keras_layer = graph.keras_layer_map[layer]
298 |             print("%d : %s, %s" % (iter, layer, keras_layer))
299 |             if isinstance(keras_layer, _keras.layers.wrappers.TimeDistributed):
300 |                 keras_layer = keras_layer.layer
301 | 
302 |             converter_func = cls._get_layer_converter_fn(
303 |                 keras_layer, supported_layers
304 |             )
305 |             input_names, output_names = graph.get_layer_blobs(layer)
306 |             converter_func(
307 |                 builder,
308 |                 layer,
309 |                 input_names,
310 |                 output_names,
311 |                 keras_layer
312 |             )
313 | 
314 |         # Set the right inputs and outputs on the model description (interface)
315 |         builder.set_input(input_names, input_dims)
316 |         builder.set_output(output_names, output_dims)
317 | 
318 |         # Since we aren't mangling anything the user gave us, we only need to
319 |         # update the model interface here
320 |         builder.add_optionals(graph.optional_inputs, graph.optional_outputs)
321 | 
322 |         # Add classifier classes (if applicable)
323 |         if is_classifier:
324 |             classes_in = class_labels
325 |             if isinstance(classes_in, string_types):
326 |                 import os
327 |                 if not os.path.isfile(classes_in):
328 |                     raise ValueError(
329 |                         "Path to class labels (%s) does not exist." %
330 |                         classes_in
331 |                     )
332 |                 with open(classes_in, 'r') as f:
333 |                     classes = f.read()
334 |                 classes = classes.splitlines()
335 |             elif type(classes_in) is list:  # list[int or str]
336 |                 classes = classes_in
337 |             else:
338 |                 raise ValueError(
339 |                     'Class labels must be a list of integers / '
340 |                     'strings, or a file path'
341 |                 )
342 | 
343 |             if predicted_feature_name is not None:
344 |                 builder.set_class_labels(
345 |                     classes,
346 |                     predicted_feature_name=predicted_feature_name
347 |                 )
348 |             else:
349 |                 builder.set_class_labels(classes)
350 | 
351 |         # Set pre-processing paramsters
352 |         builder.set_pre_processing_parameters(
353 |             image_input_names=image_input_names,
354 |             is_bgr=is_bgr,
355 |             red_bias=red_bias,
356 |             green_bias=green_bias,
357 |             blue_bias=blue_bias,
358 |             gray_bias=gray_bias,
359 |             image_scale=image_scale)
360 | 
361 |         # Convert the image outputs to actual image datatypes
362 |         for output_name in output_names:
363 |             if output_name in image_output_names:
364 |                 cls._convert_multiarray_output_to_image(
365 |                     builder.spec, output_name, is_bgr=is_bgr
366 |                 )
367 | 
368 |         # Return the protobuf spec
369 |         spec = builder.spec
370 |         return _MLModel(spec)
371 | 


--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/icnet.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from functools import partial
  3 | import os
  4 | 
  5 | from keras.layers import Activation
  6 | from keras.layers import Conv2D
  7 | from keras.layers import Add
  8 | from keras.layers import MaxPooling2D
  9 | from keras.layers import AveragePooling2D
 10 | from keras.layers import ZeroPadding2D
 11 | from keras.layers import Input
 12 | from keras.layers import BatchNormalization
 13 | from keras.layers import UpSampling2D
 14 | from keras.models import Model
 15 | 
 16 | from tensorflow.python.lib.io import file_io
 17 | logger = logging.getLogger('icnet')
 18 | 
 19 | 
 20 | class ICNetModelFactory(object):
 21 |     """Generates ICNet Keras Models."""
 22 | 
 23 |     @staticmethod
 24 |     def _light_cnn_block(
 25 |             out,
 26 |             filter_scale,
 27 |             block_name,
 28 |             strides=[1, 1, 1],
 29 |             include_projection=False):
 30 |         """Construct a light convolution block.
 31 | 
 32 |         Light convolution blocks are used to extract features at the start
 33 |         of a branch for a given scale in the pyramid network.
 34 | 
 35 |         Args:
 36 |             out - The output from a previous Keras layer
 37 |             filter_scale (int) - the base number of filters for the block
 38 |             block_name (str) - the name prefix for the block
 39 |             strides (optional, List[Int]) - a list of strides for each layer
 40 |                 in the block. If a projection convolution is included, the
 41 |                 stride is set to be the same as the first convolution
 42 |             include_projection (optional, bool) - if true, include a projection
 43 |                 convolution
 44 |         Returns
 45 |             out - a keras layer output
 46 |         """
 47 |         conv_fn = partial(
 48 |             Conv2D,
 49 |             kernel_size=3,
 50 |             padding='same',
 51 |             use_bias=False,
 52 |             activation='relu'
 53 |         )
 54 | 
 55 |         out = conv_fn(
 56 |             filters=filter_scale,
 57 |             strides=strides[0],
 58 |             name='%s_1_3x3' % block_name)(out)
 59 |         out = BatchNormalization(name='%s_1_3x3_bn' % block_name)(out)
 60 |         out = conv_fn(
 61 |             filters=filter_scale,
 62 |             strides=strides[1],
 63 |             name='%s_2_3x3' % block_name)(out)
 64 |         out = BatchNormalization(name='%s_2_3x3_bn' % block_name)(out)
 65 |         out = conv_fn(
 66 |             filters=filter_scale * 2,
 67 |             strides=strides[2],
 68 |             name='%s_3_3x3' % block_name)(out)
 69 |         out = BatchNormalization(name='%s_3_3x3_bn' % block_name)(out)
 70 | 
 71 |         if include_projection:
 72 |             out = Conv2D(
 73 |                 filters=filter_scale * 4,
 74 |                 kernel_size=1,
 75 |                 name='%s_proj' % block_name
 76 |             )(out)
 77 |             out = BatchNormalization(name='%s_proj_bn' % block_name)(out)
 78 | 
 79 |         return out
 80 | 
 81 |     @staticmethod
 82 |     def _inner_conv_block(
 83 |             out,
 84 |             filter_scale,
 85 |             block_name,
 86 |             strides=[1, 1, 1],
 87 |             dilation_rate=1):
 88 |         """Construct an inner convolution block.
 89 | 
 90 |         Inner convolution blocks are found repeatedly in the ICNet structure.
 91 | 
 92 |         Args:
 93 |             out - The output from a previous Keras layer
 94 |             filter_scale (int) - the base number of filters for the block
 95 |             block_name (str) - the name prefix for the block
 96 |             strides (optional, List[Int]) - a list of strides for each layer
 97 |                 in the block. If a projection convolution is included, the
 98 |                 stride is set to be the same as the first convolution
 99 |             dilation_rate (optional, Int) - a dilation rate to include atrous
100 |                 convolutions for certain blocks
101 | 
102 |         Returns
103 |             out - a keras layer output
104 |         """
105 |         conv_fn = partial(
106 |             Conv2D,
107 |             activation='relu',
108 |             use_bias=False,
109 |         )
110 |         out = conv_fn(
111 |             filters=filter_scale,
112 |             kernel_size=1,
113 |             strides=strides[0],
114 |             name='%s_1x1_reduce' % block_name)(out)
115 |         out = BatchNormalization(name='%s_1x1_reduce_bn' % block_name)(out)
116 |         out = ZeroPadding2D(
117 |             padding=dilation_rate,
118 |             name='%s_padding' % block_name)(out)
119 |         out = conv_fn(
120 |             filters=filter_scale,
121 |             kernel_size=3,
122 |             strides=strides[1],
123 |             dilation_rate=dilation_rate,
124 |             name='%s_3x3' % block_name)(out)
125 |         out = BatchNormalization(name='%s_3x3_bn' % block_name)(out)
126 |         out = conv_fn(
127 |             filters=filter_scale * 4,
128 |             kernel_size=1,
129 |             activation=None,
130 |             strides=strides[2],
131 |             name='%s_1x1_increase' % block_name)(out)
132 |         out = BatchNormalization(name='%s_1x1_increase_bn' % block_name)(out)
133 |         return out
134 | 
135 |     @classmethod
136 |     def _conv_block(
137 |             cls,
138 |             out,
139 |             filter_scale,
140 |             block_name,
141 |             include_projection=False,
142 |             strides=[1, 1, 1],
143 |             dilation_rate=1):
144 |         """Construct an convolution block.
145 | 
146 |         Convolution blocks are found repeatedly in the ICNet structure.
147 |         The block is structured similarly to a residual block with multiple
148 |         branches.
149 | 
150 |         Args:
151 |             out - The output from a previous Keras layer
152 |             filter_scale (int) - the base number of filters for the block
153 |             block_name (str) - the name prefix for the block
154 |             include_projection (optional, bool) - if true, include a projection
155 |                 convolution
156 |             strides (optional, List[Int]) - a list of strides for each layer
157 |                 in the block. If a projection convolution is included, the
158 |                 stride is set to be the same as the first convolution
159 |             dilation_rate (optional, Int) - a dilation rate to include atrous
160 |                 convolutions for certain blocks
161 | 
162 |         Returns
163 |             out - a keras layer output
164 |         """
165 |         # Branch A
166 |         if include_projection:
167 |             out_a = Conv2D(
168 |                 filters=filter_scale * 4,
169 |                 kernel_size=1,
170 |                 use_bias=False,
171 |                 strides=strides[0],
172 |                 name='%s_1x1_proj' % block_name
173 |             )(out)
174 |             out_a = BatchNormalization(
175 |                 name='%s_1x1_proj_bn' % block_name
176 |             )(out_a)
177 |         else:
178 |             out_a = out
179 | 
180 |         # Branch B
181 |         out_b = cls._inner_conv_block(
182 |             out,
183 |             filter_scale,
184 |             block_name,
185 |             strides=strides,
186 |             dilation_rate=dilation_rate
187 |         )
188 | 
189 |         # Combine
190 |         out = Add(name='%s_add' % block_name)([out_a, out_b])
191 |         out = Activation('relu', name='%s_relu' % block_name)(out)
192 |         return out
193 | 
194 |     @staticmethod
195 |     def _cff_block(
196 |             out_a,
197 |             out_b,
198 |             filter_scale,
199 |             block_name,
200 |             include_projection=False):
201 |         """Construct an cascading feature fusion (CFF) block.
202 | 
203 |         CFF blocks are used to fuse features extracted from multiple scales.
204 | 
205 |         Args:
206 |             out_a - The output layer from lower resoltuon branch
207 |             out_b - The output layer from the higher resolution branch to be
208 |                 merged.
209 |             filter_scale (int) - the base number of filters for the block
210 |             block_name (str) - the name prefix for the block
211 |             include_projection (optional, bool) - if true, include a projection
212 |                 convolution
213 |         Returns
214 |             out - a keras layer output
215 |         """
216 |         aux_1 = UpSampling2D(size=(2, 2), name='%s_interp' % block_name,
217 |                              interpolation='bilinear')(out_a)
218 |         out_a = ZeroPadding2D(padding=2, name='%s_padding' % block_name)(aux_1)
219 |         out_a = Conv2D(
220 |             filters=filter_scale,
221 |             kernel_size=3,
222 |             dilation_rate=2,
223 |             use_bias=False,
224 |             name='%s_conv_3x3' % block_name
225 |         )(out_a)
226 |         out_a = BatchNormalization(name='%s_conv_bn' % block_name)(out_a)
227 | 
228 |         if include_projection:
229 |             out_b = Conv2D(
230 |                 filters=filter_scale,
231 |                 kernel_size=1,
232 |                 use_bias=False,
233 |                 name='%s_proj' % block_name)(out_b)
234 |             out_b = BatchNormalization(name='%s_proj_bn' % block_name)(out_b)
235 | 
236 |         out_a = Add(name='%s_sum' % block_name)([out_a, out_b])
237 |         out_a = Activation('relu', name='%s_sum_relu' % block_name)(out_a)
238 | 
239 |         return out_a, aux_1
240 | 
241 |     @classmethod
242 |     def build(
243 |             cls,
244 |             img_size,
245 |             n_classes,
246 |             alpha=1.0,
247 |             weights_path=None,
248 |             train=False,
249 |             input_tensor=None):
250 |         """Build an ICNet Model.
251 | 
252 |         Args:
253 |             image_size (int): the size of each image. only square images are
254 |                 supported.
255 |             n_classes (int): the number of output labels to predict.
256 |             weights_path (str): (optional) a path to a Keras model file to
257 |                 load after the network is constructed. Useful for re-training.
258 |             train (bool): (optional) if true, add additional output nodes to
259 |                 the network for training.
260 | 
261 |         Returns:
262 |             model (keras.models.Model): A Keras model
263 |         """
264 |         if img_size % 384 != 0:
265 |             raise Exception('`img_size` must be a multiple of 384.')
266 |         logger.info('Building ICNet model.')
267 |         inpt = Input(shape=(img_size, img_size, 3), tensor=input_tensor)
268 | 
269 |         # The full scale branch
270 |         out_1 = cls._light_cnn_block(
271 |             inpt,
272 |             filter_scale=int(alpha * 32),
273 |             strides=[2, 2, 2],
274 |             include_projection=True,
275 |             block_name='sub1_conv'
276 |         )
277 | 
278 |         # The 1/2 scale branch
279 |         out_2 = AveragePooling2D(pool_size=(2, 2), name='sub2_data')(inpt)
280 |         out_2 = cls._light_cnn_block(
281 |             out_2,
282 |             filter_scale=int(alpha * 32),
283 |             strides=[2, 1, 1],
284 |             block_name='sub2_conv'
285 |         )
286 |         out_2 = MaxPooling2D(
287 |             pool_size=3, strides=2, name='sub2_pool1_3x3'
288 |         )(out_2)
289 | 
290 |         for layer_index in range(1, 4):
291 |             out_2 = cls._conv_block(
292 |                 out_2,
293 |                 filter_scale=int(alpha * 32),
294 |                 include_projection=(layer_index == 1),
295 |                 block_name='sub2_conv%d_%d' % (2, layer_index)
296 |             )
297 | 
298 |         # The third large conv block gets split off into another branch.
299 |         out_2 = cls._conv_block(
300 |             out_2,
301 |             filter_scale=int(alpha * 64),
302 |             include_projection=True,
303 |             strides=[2, 1, 1],
304 |             block_name='sub2_conv%d_%d' % (3, 1)
305 |         )
306 | 
307 |         # The 1/4 scale branch
308 |         out_4 = AveragePooling2D(pool_size=(2, 2), name='sub4_conv3_1')(out_2)
309 | 
310 |         for layer_index in range(2, 5):
311 |             out_4 = cls._conv_block(
312 |                 out_4,
313 |                 filter_scale=int(alpha * 64),
314 |                 block_name='sub4_conv%d_%d' % (3, layer_index)
315 |             )
316 | 
317 |         for layer_index in range(1, 7):
318 |             out_4 = cls._conv_block(
319 |                 out_4,
320 |                 filter_scale=int(alpha * 128),
321 |                 dilation_rate=2,
322 |                 include_projection=(layer_index == 1),
323 |                 block_name='sub4_conv%d_%d' % (4, layer_index)
324 |             )
325 | 
326 |         for sub_index in range(1, 4):
327 |             out_4 = cls._conv_block(
328 |                 out_4,
329 |                 filter_scale=int(alpha * 256),
330 |                 dilation_rate=4,
331 |                 include_projection=(sub_index == 1),
332 |                 block_name='sub4_conv%d_%d' % (5, sub_index)
333 |             )
334 |         # In this version we've fixed the input dimensions to be square
335 |         # We also are restricting dimsensions to be multiples of 384 which
336 |         # will allow us to use standard upsampling layers for resizing.
337 |         pool_height, _ = out_4.shape[1:3].as_list()
338 |         pool_scale = int(img_size / 384)
339 |         pool1 = AveragePooling2D(pool_size=pool_height,
340 |                                  strides=pool_height,
341 |                                  name='sub4_conv5_3_pool1')(out_4)
342 |         pool1 = UpSampling2D(size=12 * pool_scale,
343 |                              name='sub4_conv5_3_pool1_interp',
344 |                              interpolation='bilinear')(pool1)
345 |         pool2 = AveragePooling2D(pool_size=pool_height // 2,
346 |                                  strides=pool_height // 2,
347 |                                  name='sub4_conv5_3_pool2')(out_4)
348 |         pool2 = UpSampling2D(size=6 * pool_scale,
349 |                              name='sub4_conv5_3_pool2_interp',
350 |                              interpolation='bilinear')(pool2)
351 |         pool3 = AveragePooling2D(pool_size=pool_height // 3,
352 |                                  strides=pool_height // 3,
353 |                                  name='sub4_conv5_3_pool3')(out_4)
354 |         pool3 = UpSampling2D(size=4 * pool_scale,
355 |                              name='sub4_conv5_3_pool3_interp',
356 |                              interpolation='bilinear')(pool3)
357 |         pool4 = AveragePooling2D(pool_size=pool_height // 4,
358 |                                  strides=pool_height // 4,
359 |                                  name='sub4_conv5_3_pool4')(out_4)
360 |         pool4 = UpSampling2D(size=3 * pool_scale,
361 |                              name='sub4_conv5_3_pool6_interp',
362 |                              interpolation='bilinear')(pool4)
363 | 
364 |         out_4 = Add(
365 |             name='sub4_conv5_3_sum'
366 |         )([out_4, pool1, pool2, pool3, pool4])
367 |         out_4 = Conv2D(
368 |             filters=int(alpha * 256),
369 |             kernel_size=1,
370 |             activation='relu',
371 |             use_bias=False,
372 |             name='sub4_conv5_4_k1')(out_4)
373 |         out_4 = BatchNormalization(name='sub4_conv5_4_k1_bn')(out_4)
374 | 
375 |         out_2, aux_1 = cls._cff_block(
376 |             out_4,
377 |             out_2,
378 |             int(alpha * 128),
379 |             block_name='sub24_cff',
380 |             include_projection=True
381 |         )
382 | 
383 |         out_1, aux_2 = cls._cff_block(
384 |             out_2,
385 |             out_1,
386 |             int(alpha * 128),
387 |             block_name='sub12_cff'
388 |         )
389 |         out_1 = UpSampling2D(size=(2, 2), name='sub12_sum_interp',
390 |                              interpolation='bilinear')(out_1)
391 | 
392 |         out_1 = Conv2D(n_classes, 1, activation='softmax',
393 |                        name='conv6_cls')(out_1)
394 | 
395 |         out = UpSampling2D(size=(4, 4), name='conv6_interp',
396 |                            interpolation='bilinear')(out_1)
397 | 
398 |         if train:
399 |             aux_1 = Conv2D(n_classes, 1, activation='softmax',
400 |                            name='sub4_out')(aux_1)
401 |             aux_2 = Conv2D(n_classes, 1, activation='softmax',
402 |                            name='sub24_out')(aux_2)
403 |             # The loss during training is generated from these three outputs.
404 |             # The final output layer is not needed.
405 |             model = Model(inputs=inpt, outputs=[out_1, aux_2, aux_1])
406 |         else:
407 |             model = Model(inputs=inpt, outputs=out)
408 | 
409 |         if weights_path is not None:
410 |             if weights_path.startswith('gs://'):
411 |                 weights_path = _copy_file_from_gcs(weights_path)
412 |             logger.info('Loading weights from %s.' % weights_path)
413 |             model.load_weights(weights_path, by_name=True)
414 |         logger.info('Done building model.')
415 | 
416 |         return model
417 | 
418 | 
419 | def _copy_file_from_gcs(file_path):
420 |     """Copy a file from gcs to local machine.
421 | 
422 |     Args:
423 |         file_path (str): a GCS url to download
424 |     Returns:
425 |         str: a local path to the file
426 |     """
427 |     logger.info('Downloading %s' % file_path)
428 |     with file_io.FileIO(file_path, mode='rb') as input_f:
429 |         basename = os.path.basename(file_path)
430 |         local_path = os.path.join('/tmp', basename)
431 |         with file_io.FileIO(local_path, mode='w+') as output_f:
432 |             output_f.write(input_f.read())
433 |     return local_path
434 | 


--------------------------------------------------------------------------------