├── image_segmentation
├── utils
│ ├── __init__.py
│ ├── tfrecord2idx
│ ├── deeplab_model.py
│ ├── compare_models.py
│ ├── tfrecord_helpers.py
│ └── model_helpers.py
├── image_segmentation
│ ├── __init__.py
│ ├── dali_config.py
│ ├── utils.py
│ ├── build_data.py
│ ├── dali_pipeline.py
│ ├── data_generator.py
│ ├── train.py
│ └── icnet.py
├── config.yaml
├── .gitattributes
├── examples
│ ├── living_room.jpg
│ ├── example_image_and_mask.png
│ ├── example_pixel_probabilities.png
│ ├── icnet_768x768_living_room.h5
│ ├── icnet_768x768_living_room.zip
│ └── icnet_768x768_living_room.mlmodel
├── nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl
├── requirements.txt
├── LICENSE
├── convert_to_coreml.py
├── setup.py
├── Makefile
├── coco_object_info.txt
├── objectInfo150.txt
├── README.md
└── create_tfrecord_dataset.py
├── style_transfer
├── style_transfer
│ ├── __init__.py
│ ├── dataset_builder.py
│ ├── utils.py
│ ├── layers.py
│ ├── layer_converters.py
│ ├── train.py
│ ├── models.py
│ └── fritz_coreml_converter.py
├── example
│ ├── dog.jpg
│ ├── starry_night.jpg
│ ├── stylized_dog.jpg
│ ├── starry_night_results.jpg
│ ├── starry_night.h5
│ ├── starry_night_256x256_025.h5
│ ├── starry_night_640x480_025.mlmodel
│ ├── starry_night_256x256_small_a03.h5
│ ├── starry_night_640x480_025_optimized.pb
│ └── starry_night_640x480_small_a03_q8.mlmodel
├── setup.py
├── stylize_image.py
├── requirements.txt
├── convert_to_coreml.py
├── convert_to_tfmobile.py
├── create_training_dataset.py
└── README.md
├── resources
├── README.md
└── AI_Landscape.md
├── create_ml_playgrounds
├── pneumonia_detector
│ ├── Pneumonia.playground
│ │ ├── Contents.swift
│ │ └── contents.xcplayground
│ ├── Pneumonia.mlmodel
│ └── README.md
├── subreddit_suggester
│ ├── data.json
│ ├── SubredditSuggester.mlmodel
│ └── SubredditSuggester.playground
│ │ ├── contents.xcplayground
│ │ └── Contents.swift
└── README.md
├── .gitattributes
├── LICENSE
├── README.md
└── .gitignore
/image_segmentation/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/image_segmentation/config.yaml:
--------------------------------------------------------------------------------
1 | trainingInput:
2 | scaleTier: BASIC_GPU
3 |
--------------------------------------------------------------------------------
/style_transfer/example/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/dog.jpg
--------------------------------------------------------------------------------
/image_segmentation/.gitattributes:
--------------------------------------------------------------------------------
1 | nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/starry_night.jpg
--------------------------------------------------------------------------------
/style_transfer/example/stylized_dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/stylized_dog.jpg
--------------------------------------------------------------------------------
/image_segmentation/examples/living_room.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/living_room.jpg
--------------------------------------------------------------------------------
/style_transfer/example/starry_night_results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/starry_night_results.jpg
--------------------------------------------------------------------------------
/resources/README.md:
--------------------------------------------------------------------------------
1 | # Resources
2 |
3 | Additional, non-code resources for machine learning / AI.
4 |
5 | * [AI Startup Landscape](./AI_Landscape.md)
6 |
--------------------------------------------------------------------------------
/image_segmentation/examples/example_image_and_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/example_image_and_mask.png
--------------------------------------------------------------------------------
/image_segmentation/examples/example_pixel_probabilities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/example_pixel_probabilities.png
--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/Pneumonia.playground/Contents.swift:
--------------------------------------------------------------------------------
1 | import CreateMLUI
2 |
3 |
4 | let builder = MLImageClassifierBuilder()
5 | builder.showInLiveView()
6 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9c522b30d709051fe24c06dd4ac27f0bd58101ce68e06fc79e1454d0424678cb
3 | size 569464
4 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night_256x256_025.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb03d6faa1e226da19c82eb6d250d84db12a166d06a8332cfe0a7989b36bcce8
3 | size 569496
4 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/data.json:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:be520cc19d10060552788eb6462640f631be0a845eb8f88048b14e47658d82eb
3 | size 3345068
4 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night_640x480_025.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4b6abac67c45d5385fc17b46f32e1cc5ed1f9107c053344ed9a9757c47aba738
3 | size 438131
4 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/Pneumonia.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:98fecaa9cd499fec169718e2b1156c048393f8b436891df4f9e305f5846c7238
3 | size 16980
4 |
--------------------------------------------------------------------------------
/image_segmentation/examples/icnet_768x768_living_room.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a4d544785680f713f0091b83b938c22a56e1c8f981d1321adf4355097f9fac4c
3 | size 81300248
4 |
--------------------------------------------------------------------------------
/image_segmentation/examples/icnet_768x768_living_room.zip:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:11a3dee73201e2dbeaae9112b8adb2281485d5d79ec1611dd87f3c0b74ed1eae
3 | size 98526975
4 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night_256x256_small_a03.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d3c21d53dda54dd6df0abc6d3b0c7637bc64c7fb9997b3fa29c97fbdc1bd61e1
3 | size 153272
4 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night_640x480_025_optimized.pb:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6262e219e71a9ebb201a13400d817b37625bd7231aedd20e7c22abc45c5d7506
3 | size 478672
4 |
--------------------------------------------------------------------------------
/image_segmentation/examples/icnet_768x768_living_room.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:3f3009bb2bb3b056b707527b4f54aaa8f1df28a46da7e03e1922379f540bf15e
3 | size 26938492
4 |
--------------------------------------------------------------------------------
/style_transfer/example/starry_night_640x480_small_a03_q8.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bc44cfa2aa8c056ff5fac3c83dd80c57731840ed5e68b6272cda34b6490fbfa4
3 | size 16876
4 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/SubredditSuggester.mlmodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7d409517ad9098ba80a33601df3d542a84edc9bb9f1d0494ff8862a96387cb6c
3 | size 1100617
4 |
--------------------------------------------------------------------------------
/image_segmentation/nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:5d9a9865809b1a6f91a1c6033d6ba881a23d29e424f1bdb2b19e3b01177977d4
3 | size 17489870
4 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/README.md:
--------------------------------------------------------------------------------
1 | # Detecting Pneumonia in an iOS App with Create ML
2 |
3 | Swift playground which is used to train image classifier for this [blog post](https://heartbeat.fritz.ai/detecting-pneumonia-in-an-ios-app-with-create-ml-5cff2a60a3d).
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.h5 filter=lfs diff=lfs merge=lfs -text
2 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
3 | *.pb filter=lfs diff=lfs merge=lfs -text
4 | *.zip filter=lfs diff=lfs merge=lfs -text
5 | *.whl filter=lfs diff=lfs merge=lfs -text
6 | *.json filter=lfs diff=lfs merge=lfs -text
7 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/pneumonia_detector/Pneumonia.playground/contents.xcplayground:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/SubredditSuggester.playground/contents.xcplayground:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/image_segmentation/requirements.txt:
--------------------------------------------------------------------------------
1 | Keras==2.2.4
2 | h5py==2.7.1
3 | numpy==1.14.3
4 | matplotlib==2.2.2
5 | scikit-image==0.13.1
6 | Pillow==5.1.0
7 | six==1.10.0
8 | # Forked coremltools which includes fix for bilinear upsampling.
9 | # Update this after it has been merged into master.
10 | -e git+git@github.com:ghop02/coremltools.git@289-add-keras-bilinear-upsampling#egg=coremltools
11 |
--------------------------------------------------------------------------------
/style_transfer/setup.py:
--------------------------------------------------------------------------------
1 | """Setup script for style_transfer."""
2 |
3 | from setuptools import find_packages
4 | from setuptools import setup
5 |
6 |
7 | REQUIRED_PACKAGES = ['h5py', 'keras==2.1.2', 'Pillow']
8 |
9 | setup(
10 | name='style_transfer',
11 | version='1.0',
12 | install_requires=REQUIRED_PACKAGES,
13 | include_package_data=True,
14 | packages=[p for p in find_packages() if p.startswith('style_transfer')],
15 | description='Fritz Style Transfer Library',
16 | )
17 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/README.md:
--------------------------------------------------------------------------------
1 | # Create ML Playgrounds
2 | A collection of Swift playgrounds using Create ML to train Core ML models.
3 |
4 | ## Playgrounds
5 |
6 | * [Subreddit Suggester](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): Reduce the number of clicks required for submitting posts to Reddit by automatically suggesting a subreddit based on the post's title.
7 | * [Pneumonia Detector](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): Classify X-ray images to detect pediatric pneumonia.
8 |
--------------------------------------------------------------------------------
/image_segmentation/utils/tfrecord2idx:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | import struct
4 |
5 | if len(sys.argv) < 3:
6 | print("Usage: tfrecord2idx ")
7 | exit()
8 |
9 | f = open(sys.argv[1], 'rb')
10 | idx = open(sys.argv[2], 'w')
11 |
12 | while True:
13 | current = f.tell()
14 | try:
15 | # length
16 | byte_len = f.read(8)
17 | if byte_len == '':
18 | break
19 | # crc
20 | f.read(4)
21 | proto_len = struct.unpack('q', byte_len)[0]
22 | # proto
23 | f.read(proto_len)
24 | # crc
25 | f.read(4)
26 | idx.write(str(current) + ' ' + str(f.tell() - current) + '\n')
27 | except:
28 | print("Not a valid TFRecord file")
29 | break
30 |
31 | f.close()
32 | idx.close()
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Fritz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/image_segmentation/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Fritz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/dali_config.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logger = logging.getLogger(__name__)
4 |
5 |
6 | def build_config(**updates):
7 | defaults = {
8 | 'hue_min': -30,
9 | 'hue_max': 30,
10 | 'zoom_scale': 1.3,
11 | 'rotate_angle_min': -45,
12 | 'rotate_angle_max': 45,
13 | 'crop_x_max': 0.2,
14 | 'crop_y_max': 0.2,
15 | 'contrast_min': 0.45,
16 | 'contrast_max': 1.5,
17 | 'saturation_min': 0.4,
18 | 'saturation_max': 2.0,
19 | 'brightness_min': 0.35,
20 | 'brightness_max': 1.5,
21 | }
22 | for key in updates:
23 | if key not in defaults:
24 | raise Exception("Augmentation Config %s not found." % key)
25 |
26 | defaults.update(**updates)
27 |
28 | return defaults
29 |
30 |
31 | class DaliConfig(object):
32 | """Wrapper for Dali augmentation yaml config parameters. """
33 | def __init__(self, **updates):
34 |
35 | self.__dict__ = build_config(**updates)
36 |
37 | def summarize(self):
38 | logger.info('Dali Image Augmentation Parameters')
39 | logger.info('==================================')
40 | for key, value in self.__dict__.items():
41 | logger.info(' %s: %s', key, value)
42 |
--------------------------------------------------------------------------------
/image_segmentation/convert_to_coreml.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 |
4 | import coremltools
5 | import keras
6 |
7 | from image_segmentation.icnet import ICNetModelFactory
8 |
9 |
10 | def convert(argv):
11 | parser = argparse.ArgumentParser(
12 | description='Convert a Keras ICNet model to Core ML'
13 | )
14 | parser.add_argument(
15 | 'keras_checkpoint', nargs='?', type=str,
16 | help='a Keras model checkpoint to load and convert.'
17 | )
18 | parser.add_argument(
19 | '--alpha', type=float, required=True,
20 | help='The width paramter of the network.')
21 | parser.add_argument(
22 | 'mlmodel_output', nargs='?', type=str,
23 | help='a .mlmodel output file.'
24 | )
25 |
26 | args = parser.parse_args(argv)
27 |
28 | original_keras_model = keras.models.load_model(args.keras_checkpoint)
29 | img_size = original_keras_model.input_shape[1]
30 | num_classes = original_keras_model.output_shape[0][-1]
31 |
32 | keras_model = ICNetModelFactory.build(
33 | img_size,
34 | num_classes,
35 | alpha=args.alpha,
36 | weights_path=args.keras_checkpoint,
37 | train=False
38 | )
39 |
40 | mlmodel = coremltools.converters.keras.convert(
41 | keras_model,
42 | input_names='image',
43 | image_input_names='image',
44 | image_scale=2.0 / 255.0,
45 | red_bias=-1.0,
46 | green_bias=-1.0,
47 | blue_bias=-1.0,
48 | output_names='output'
49 | )
50 |
51 | mlmodel.save(args.mlmodel_output)
52 |
53 |
54 | if __name__ == '__main__':
55 | convert(sys.argv[1:])
56 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/dataset_builder.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import tensorflow as tf
4 |
5 | logger = logging.getLogger('trainer')
6 |
7 |
8 | class DatasetBuilder(object):
9 | """Build a TFRecord dataset for training."""
10 |
11 | @staticmethod
12 | def _resize_fn(images, image_size):
13 | return tf.image.resize_images(
14 | images,
15 | image_size,
16 | method=tf.image.ResizeMethod.BICUBIC
17 | )
18 |
19 | @staticmethod
20 | def _decode_example(example_proto):
21 | features = {
22 | "image/encoded": tf.FixedLenFeature(
23 | (), tf.string, default_value=""
24 | )
25 | }
26 | parsed_features = tf.parse_single_example(example_proto, features)
27 | image = tf.image.decode_jpeg(
28 | parsed_features["image/encoded"],
29 | channels=3)
30 | return image
31 |
32 | @classmethod
33 | def build(cls, filename, batch_size, image_size):
34 | """Build a TensorFlow dataset from images.
35 |
36 | Args:
37 | filename (str) - a filename of tfrecords to load
38 | batch_size (int) - the batch size for the iterator
39 | image_size ((int, int)) - resize all images to a single size
40 |
41 | Returns
42 | dataset - a tfrecord dataset
43 | """
44 | logger.info('Creating dataset from: %s' % filename)
45 | dataset = tf.data.TFRecordDataset(filename)
46 | dataset = dataset.map(cls._decode_example)
47 | dataset = dataset.map(lambda x: cls._resize_fn(x, image_size))
48 | dataset = dataset.batch(batch_size)
49 | dataset = dataset.repeat() # Repeat forever
50 | return dataset
51 |
--------------------------------------------------------------------------------
/image_segmentation/setup.py:
--------------------------------------------------------------------------------
1 | """Setup script for image_segmentation."""
2 |
3 | import logging
4 | import subprocess
5 | from setuptools import find_packages
6 | from setuptools import setup
7 | from setuptools.command.install import install
8 |
9 |
10 | REQUIRED_PACKAGES = [
11 | 'h5py',
12 | 'keras==2.2.4',
13 | 'Pillow',
14 | 'matplotlib',
15 | 'google-cloud-storage',
16 | ]
17 |
18 |
19 | class CustomCommands(install):
20 | """A setuptools Command class able to run arbitrary commands."""
21 |
22 | def run_custom_command(self, command_list):
23 | p = subprocess.Popen(
24 | command_list,
25 | stdin=subprocess.PIPE,
26 | stdout=subprocess.PIPE,
27 | stderr=subprocess.STDOUT)
28 | # Can use communicate(input='y\n'.encode()) if the command run requires
29 | # some confirmation.
30 | stdout_data, _ = p.communicate()
31 | logging.info('Log command output: %s', stdout_data)
32 | if p.returncode != 0:
33 | raise RuntimeError('Command %s failed: exit code: %s' %
34 | (command_list, p.returncode))
35 |
36 | def run(self):
37 | self.run_custom_command(['apt-get', 'update'])
38 | self.run_custom_command([
39 | 'apt-get', 'install', '-y', 'python-tk'
40 | ])
41 | install.run(self)
42 |
43 |
44 | setup(
45 | name='image_segmentation',
46 | version='1.0',
47 | install_requires=REQUIRED_PACKAGES,
48 | include_package_data=True,
49 | packages=[
50 | p for p in find_packages()
51 | if p.startswith('image_segmentation') or p.startswith('utils')
52 | ],
53 | description='Fritz Style Image Segmentation Library',
54 | cmdclass={
55 | 'install': CustomCommands,
56 | }
57 | )
58 |
--------------------------------------------------------------------------------
/create_ml_playgrounds/subreddit_suggester/SubredditSuggester.playground/Contents.swift:
--------------------------------------------------------------------------------
1 | import CreateMLUI
2 | import CreateML
3 | import Foundation
4 |
5 | let dataFilename = "/Users/jltoole/fritz/fritz-createml-examples/subreddit_title_classifier/popular_data_top_year.json"
6 | let data = try MLDataTable(contentsOf: URL(fileURLWithPath: dataFilename))
7 | print(data.description)
8 |
9 | let (trainingData, testingData) = data.randomSplit(by: 0.8, seed: 5)
10 |
11 | let subredditClassifier = try MLTextClassifier(trainingData: trainingData,
12 | textColumn: "text",
13 | labelColumn: "label")
14 |
15 | // Training accuracy as a percentage
16 | let trainingAccuracy = (1.0 - subredditClassifier.trainingMetrics.classificationError) * 100
17 | // Validation accuracy as a percentage
18 | let validationAccuracy = (1.0 - subredditClassifier.validationMetrics.classificationError) * 100
19 | print("Training Accuracy: \(trainingAccuracy), Validation Accuracy: \(validationAccuracy)")
20 |
21 | let evaluationMetrics = subredditClassifier.evaluation(on: testingData)
22 |
23 | // Evaluation accuracy as a percentage
24 | let evaluationAccuracy = (1.0 - evaluationMetrics.classificationError) * 100
25 | print("Evaluation Accuracy: \(evaluationAccuracy)")
26 |
27 | let title = "Saw this good boy at the park today with TensorFlow."
28 | let predictedSubreddit = try subredditClassifier.prediction(from: title)
29 | print(predictedSubreddit)
30 |
31 | let metadata = MLModelMetadata(author: "Jameson Toole",
32 | shortDescription: "Predict which subreddit a post should go in based on a title.",
33 | version: "1.0")
34 |
35 | try subredditClassifier.write(to: URL(fileURLWithPath: "/Users/jltoole/fritz/fritz-createml-examples/subreddit_title_classifier/subredditClassifier.mlmodel"),
36 | metadata: metadata)
37 |
38 | testingData.
39 |
--------------------------------------------------------------------------------
/style_transfer/stylize_image.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import keras
3 | import logging
4 | import numpy
5 | import PIL.Image
6 |
7 | import keras_contrib
8 |
9 | from style_transfer import layers
10 | from style_transfer import utils
11 |
12 | logging.basicConfig(level=logging.INFO)
13 | logger = logging.getLogger('stylize_image')
14 |
15 |
16 | if __name__ == '__main__':
17 | parser = argparse.ArgumentParser(
18 | description='Stylize an image using a trained model.'
19 | )
20 |
21 | parser.add_argument(
22 | '--input-image', type=str, required=True,
23 | help='An image to stylize.'
24 | )
25 | parser.add_argument(
26 | '--output-image', type=str, required=True,
27 | help='An output file for the stylized image.'
28 | )
29 | parser.add_argument(
30 | '--model-checkpoint', type=str, required=True,
31 | help='Checkpoint from a trained Style Transfer Network.'
32 | )
33 |
34 | args = parser.parse_args()
35 |
36 | logger.info('Loading model from %s' % args.model_checkpoint)
37 | custom_objects = {
38 | 'InstanceNormalization':
39 | keras_contrib.layers.normalization.InstanceNormalization,
40 | 'DeprocessStylizedImage': layers.DeprocessStylizedImage
41 | }
42 | transfer_net = keras.models.load_model(
43 | args.model_checkpoint,
44 | custom_objects=custom_objects
45 | )
46 |
47 | image_size = transfer_net.input_shape[1:3]
48 |
49 | inputs = [transfer_net.input, keras.backend.learning_phase()]
50 | outputs = [transfer_net.output]
51 |
52 | transfer_style = keras.backend.function(inputs, outputs)
53 |
54 | input_image = utils.load_image(
55 | args.input_image,
56 | image_size[0],
57 | image_size[1],
58 | expand_dims=True
59 | )
60 | output_image = transfer_style([input_image, 1])[0]
61 | output_image = PIL.Image.fromarray(numpy.uint8(output_image[0]))
62 | output_image.save(args.output_image)
63 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/utils.py:
--------------------------------------------------------------------------------
1 | """Summary.
2 |
3 | Attributes:
4 | logger (TYPE): Description
5 | """
6 | import io
7 | import logging
8 | import os
9 |
10 | import PIL.Image
11 | import numpy
12 | from tensorflow.python.lib.io import file_io
13 |
14 |
15 | logger = logging.getLogger('utils')
16 |
17 |
18 | def load_image(
19 | filename,
20 | height,
21 | width,
22 | expand_dims=False):
23 | """Load an image and transform it to a specific size.
24 |
25 | Optionally, preprocess the image through the VGG preprocessor.
26 |
27 | Args:
28 | filename (TYPE): Description
29 | height (TYPE): Description
30 | width (TYPE): Description
31 | expand_dims (bool, optional): Description
32 | filename - an image file to load
33 | height - the height of the transformed image
34 | width - the width of the transformed image
35 | vgg_preprocess - if True, preprocess the image for a VGG network.
36 | expand_dims - Add an addition dimension (B, H, W, C), useful for
37 | feeding models.
38 |
39 | Returns:
40 | img - a numpy array representing the image.
41 | """
42 | img = file_io.read_file_to_string(filename, binary_mode=True)
43 | img = PIL.Image.open(io.BytesIO(img))
44 | img = img.resize((width, height), resample=PIL.Image.BILINEAR)
45 | img = numpy.array(img)[:, :, :3]
46 |
47 | if expand_dims:
48 | img = numpy.expand_dims(img, axis=0)
49 |
50 | return img
51 |
52 |
53 | def copy_file_from_gcs(file_path):
54 | """Copy a file from gcs to local machine.
55 |
56 | Args:
57 | file_path (str): a GCS url to download
58 |
59 | Returns:
60 | str: a local path to the file
61 | """
62 | logger.info('Downloading %s' % file_path)
63 | with file_io.FileIO(file_path, mode='rb') as input_f:
64 | basename = os.path.basename(file_path)
65 | with file_io.FileIO(basename, mode='w+') as output_f:
66 | output_f.write(input_f.read())
67 | return basename
68 |
--------------------------------------------------------------------------------
/style_transfer/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.2.0
2 | appnope==0.1.0
3 | astor==0.6.2
4 | awscli==1.14.64
5 | backcall==0.1.0
6 | bleach==1.5.0
7 | botocore==1.9.17
8 | certifi==2018.4.16
9 | chardet==3.0.4
10 | colorama==0.3.7
11 | coremltools==0.8
12 | cycler==0.10.0
13 | Cython==0.28.2
14 | decorator==4.3.0
15 | docutils==0.14
16 | easydict==1.7
17 | entrypoints==0.2.3
18 | gast==0.2.0
19 | graphviz==0.8.3
20 | grpcio==1.11.0
21 | h5py==2.7.1
22 | html5lib==0.9999999
23 | idna==2.6
24 | imgaug==0.2.5
25 | ipykernel==4.8.2
26 | ipython==6.3.1
27 | ipython-genutils==0.2.0
28 | ipywidgets==7.2.1
29 | jedi==0.12.0
30 | Jinja2==2.10
31 | jmespath==0.9.3
32 | jsonschema==2.6.0
33 | jupyter==1.0.0
34 | jupyter-client==5.2.3
35 | jupyter-console==5.2.0
36 | jupyter-core==4.4.0
37 | Keras==2.1.6
38 | Keras-Applications==1.0.2
39 | keras-contrib==2.0.8
40 | Keras-Preprocessing==1.0.1
41 | kiwisolver==1.0.1
42 | lxml==4.2.3
43 | Markdown==2.6.11
44 | MarkupSafe==1.0
45 | matplotlib==2.2.2
46 | mistune==0.8.3
47 | mxnet==1.1.0.post0
48 | nbconvert==5.3.1
49 | nbformat==4.4.0
50 | networkx==2.1
51 | notebook>=5.7.2
52 | numpy==1.14.3
53 | opencv-contrib-python==3.4.0.12
54 | pandas==0.22.0
55 | pandocfilters==1.4.2
56 | parso==0.2.0
57 | pexpect==4.5.0
58 | pickleshare==0.7.4
59 | Pillow==5.1.0
60 | prettytable==0.7.2
61 | prometheus-client==0.3.0
62 | prompt-toolkit==1.0.15
63 | protobuf==3.5.2.post1
64 | ptyprocess==0.5.2
65 | pyasn1==0.4.2
66 | pycocotools==2.0.0
67 | pydot==1.2.4
68 | Pygments==2.2.0
69 | pyparsing==2.2.0
70 | python-dateutil==2.6.1
71 | pytz==2018.4
72 | PyWavelets==0.5.2
73 | PyYAML>=4.2b1
74 | pyzmq==17.0.0
75 | qtconsole==4.3.1
76 | requests==2.20.0
77 | rsa==3.4.2
78 | s3transfer==0.1.13
79 | scikit-image==0.13.1
80 | scipy==1.1.0
81 | seaborn==0.8.1
82 | Send2Trash==1.5.0
83 | simplegeneric==0.8.1
84 | six==1.10.0
85 | tensorboard==1.9.0
86 | tensorflow==1.9.0
87 | termcolor==1.1.0
88 | terminado==0.8.1
89 | testpath==0.3.1
90 | tfcoreml==0.2.0
91 | tornado==5.0.2
92 | traitlets==4.3.2
93 | turicreate==4.3.2
94 | urllib3>=1.23
95 | wcwidth==0.1.7
96 | webencodings==0.5.1
97 | Werkzeug==0.14.1
98 | widgetsnbextension==3.2.1
99 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/layers.py:
--------------------------------------------------------------------------------
1 | import keras
2 |
3 |
4 | class VGGNormalize(keras.layers.Layer):
5 | """A custom layer to normalize an image for input into a VGG model.
6 |
7 | This consists of swapping channel order and centering pixel values.
8 |
9 | Centering values come from:
10 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py # NOQA
11 | """
12 |
13 | def __init__(self, **kwargs):
14 | """Initialize the layer.
15 |
16 | Args:
17 | **kwargs - arguments passed to the Keras layer base.
18 | """
19 | super(VGGNormalize, self).__init__(**kwargs)
20 | # work around for a bug introduced in diffences between the tf.keras and keras APIs
21 | self.outbound_nodes = self._outbound_nodes
22 |
23 | def build(self, input_shape):
24 | """Build the layer."""
25 | pass
26 |
27 | def call(self, x, reverse_channels=True):
28 | """Apply the layer.
29 |
30 | Args:
31 | x - an input tensor.
32 | reverse_channels - if True, reverse the channel order
33 | """
34 | # Swap channel order: 'RGB'->'BGR'
35 | if reverse_channels:
36 | x = x[:, :, :, ::-1]
37 |
38 | # Center pixel values. Technically each channel should have its
39 | # own center value, but the tensor computation is annoying so we'll
40 | # just center them all with the same value.
41 | x -= 120.0
42 |
43 | return x
44 |
45 |
46 | class DeprocessStylizedImage(keras.layers.Layer):
47 | """A layer to deprocess style transfer layer output.
48 |
49 | The style transfer network outputs an image where pixel values are
50 | between -1 and 1 due to a tanh activation. This layer converts that back
51 | to normal values between 0 and 255.
52 | """
53 |
54 | def __init__(self, **kwargs):
55 | """Initialize the layer.
56 |
57 | Args:
58 | **kwargs - arguments passed to the Keras layer base.
59 | """
60 | super(DeprocessStylizedImage, self).__init__(**kwargs)
61 |
62 | def build(self, input_shape):
63 | """Build the layer."""
64 | pass
65 |
66 | def call(self, x):
67 | """Apply the layer."""
68 | return (x + 1.0) * 127.5
69 |
--------------------------------------------------------------------------------
/image_segmentation/Makefile:
--------------------------------------------------------------------------------
1 |
2 | download:
3 | ./download_and_convert_ade20k.sh
4 |
5 | create-training-data:
6 | mkdir -p data/${LABEL_SET}
7 | python create_tfrecord_dataset.py \
8 | -i data/ADEChallengeData2016/images/training/ \
9 | -a data/ADEChallengeData2016/annotations/training/ \
10 | -o data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
11 | -l data/ADEChallengeData2016/objectInfo150.txt \
12 | -w "person, individual, someone, somebody, mortal, soul|house:building, edifice:house:skyscraper|sky|car, auto, automobile, machine, motorcar:bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle:truck, motortruck:van|bicycle, bike, wheel, cycle:minibike, motorbike" \
13 | -t 0.20
14 |
15 | upload-data:
16 | gsutil cp data/${LABEL_SET}/* gs://${GCS_BUCKET}/data/${LABEL_SET}/
17 |
18 |
19 | train-local-refine:
20 | python -m image_segmentation.train \
21 | -d data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
22 | -l data/${LABEL_SET}/labels.txt \
23 | -n 10000 \
24 | -s 768 \
25 | -a 1 \
26 | --steps-per-epoch 100 \
27 | --batch-size 5 \
28 | --lr 0.0001 \
29 | --fine-tune-checkpoint data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_rf.h5 \
30 | -o data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_rf.h5 \
31 | --refine
32 |
33 | train-local:
34 | python -m image_segmentation.train \
35 | --data data/combined2.tfrecord \
36 | --use-dali \
37 | -l data/${LABEL_SET}/labels.txt \
38 | -n 500000 \
39 | -s 768 \
40 | -a 1 \
41 | --batch-size 12 \
42 | --steps-per-epoch 2500 \
43 | --parallel-calls 4 \
44 | --lr 0.0001 \
45 | --fine-tune-checkpoint data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_fine.h5 \
46 | --add-noise \
47 | --model-name people_with_noise
48 |
49 |
50 | train-cloud:
51 | python setup.py sdist
52 | gcloud ml-engine jobs submit training `whoami`_image_segmentation_`date +%s` \
53 | --runtime-version 1.9 \
54 | --job-dir=gs://${GCS_BUCKET} \
55 | --packages dist/image_segmentation-1.0.tar.gz,nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl \
56 | --module-name image_segmentation.train \
57 | --region us-central1 \
58 | --config config.yaml \
59 | -- \
60 | -d gs://fritz-data-sandbox/ADEChallengeData2016/people/people_data.tfrecord \
61 | -l gs://fritz-data-sandbox/ADEChallengeData2016/people/labels.txt \
62 | --use-dali \
63 | -n 5000 \
64 | -s 768 \
65 | -a 1 \
66 | --batch-size 12 \
67 | --steps-per-epoch 250 \
68 | --parallel-calls 4 \
69 | --lr 0.001 \
70 | --add-noise \
71 | --model-name ${MODEL_NAME} \
72 | --gcs-bucket gs://${GCS_BUCKET}/train
73 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/layer_converters.py:
--------------------------------------------------------------------------------
1 | import numpy
2 |
3 | """Functions to convert custom Keras layers to equalivent Core ML Layers.
4 |
5 | Each of these functions must conform to the spec set by apple here:
6 | https://github.com/apple/coremltools/blob/master/coremltools/converters/keras/_layers2.py
7 | """
8 |
9 |
10 | def convert_instancenormalization(
11 | builder,
12 | layer,
13 | input_names,
14 | output_names,
15 | keras_layer):
16 | """
17 | Convert InstanceNormalization layer from to coreml.
18 |
19 | This conforms to the Core ML layer spec.
20 |
21 | Parameters
22 | ----------
23 | keras_layer: layer
24 | A keras layer object.
25 |
26 | builder: NeuralNetworkBuilder
27 | A neural network builder object.
28 | """
29 | input_name, output_name = (input_names[0], output_names[0])
30 | nb_channels = keras_layer.get_weights()[0].shape[0]
31 |
32 | # Set parameters
33 | # Parameter arrangement in Keras: gamma, beta, mean, variance
34 | idx = 0
35 | gamma, beta = None, None
36 | if keras_layer.scale:
37 | gamma = keras_layer.get_weights()[idx]
38 | idx += 1
39 | if keras_layer.center:
40 | beta = keras_layer.get_weights()[idx]
41 | idx += 1
42 |
43 | epsilon = keras_layer.epsilon or 1e-5
44 |
45 | builder.add_batchnorm(
46 | name=layer,
47 | channels=nb_channels,
48 | gamma=gamma,
49 | beta=beta,
50 | compute_mean_var=True,
51 | instance_normalization=True,
52 | input_name=input_name,
53 | output_name=output_name,
54 | epsilon=epsilon
55 | )
56 |
57 |
58 | def convert_deprocessstylizedimage(
59 | builder,
60 | layer,
61 | input_names,
62 | output_names,
63 | keras_layes):
64 | """Convert the DeprocessStylizedImage layer type to Core ML.
65 |
66 | This simply takes the output of the tanh activation layer and scales
67 | values to conform to typical image RGB values.
68 | """
69 | input_name, output_name = (input_names[0], output_names[0])
70 |
71 | # Apple's scale layer performs the following math
72 | # y = w * x + b
73 | # So to match the keras model's deprocessing layer y = (x + 1) * 127.5
74 | # We can set the following matrices
75 | scale = 127.5
76 | w = numpy.array([scale, scale, scale])
77 | b = numpy.array([scale, scale, scale])
78 |
79 | builder.add_scale(
80 | name=input_name,
81 | W=w,
82 | b=b,
83 | has_bias=True,
84 | shape_scale=w.shape,
85 | shape_bias=b.shape,
86 | input_name=input_name,
87 | output_name=output_name
88 | )
89 |
--------------------------------------------------------------------------------
/style_transfer/convert_to_coreml.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import keras_contrib
3 | import logging
4 | import sys
5 |
6 | from style_transfer import layer_converters
7 | from style_transfer import layers
8 | from style_transfer import models
9 | from style_transfer.fritz_coreml_converter import FritzCoremlConverter
10 |
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger('convert_to_coreml')
13 |
14 |
15 | def main(argv):
16 |
17 | parser = argparse.ArgumentParser(
18 | description='Stylize an image using a trained model.'
19 | )
20 | parser.add_argument(
21 | '--keras-checkpoint', type=str, required=True,
22 | help='Weights from a trained Style Transfer Network.'
23 | )
24 | parser.add_argument(
25 | '--alpha', type=float, required=True,
26 | help='The width multiplier of the network.'
27 | )
28 | parser.add_argument(
29 | '--coreml-model', type=str, required=True,
30 | help='A CoreML output file to save to'
31 | )
32 | parser.add_argument(
33 | '--image-size', type=str, default='640,480',
34 | help='The size of input and output of the final Core ML model: H,W'
35 | )
36 | parser.add_argument(
37 | '--use-small-network', action='store_true',
38 | help=('Use a very small network architecture that works in real time '
39 | 'on some mobile devices using only CPU')
40 | )
41 |
42 | args = parser.parse_args(argv)
43 |
44 | image_size = [int(dim) for dim in args.image_size.split(',')]
45 | # Map custom layers to their custom coreml converters
46 | custom_layers = {
47 | keras_contrib.layers.normalization.InstanceNormalization: layer_converters.convert_instancenormalization, # NOQA
48 | layers.DeprocessStylizedImage: layer_converters.convert_deprocessstylizedimage # NOQA
49 | }
50 |
51 | logger.info('Loading model weights from %s' % args.keras_checkpoint)
52 |
53 | if args.use_small_network:
54 | model = models.SmallStyleTransferNetwork.build(
55 | image_size,
56 | alpha=args.alpha,
57 | checkpoint_file=args.keras_checkpoint
58 | )
59 | else:
60 | model = models.StyleTransferNetwork.build(
61 | image_size,
62 | alpha=args.alpha,
63 | checkpoint_file=args.keras_checkpoint
64 | )
65 |
66 | fritz_converter = FritzCoremlConverter()
67 | mlmodel = fritz_converter.convert_keras(
68 | model,
69 | input_names=['image'],
70 | image_input_names=['image'],
71 | output_names=['stylizedImage'],
72 | image_output_names=['stylizedImage'],
73 | custom_layers=custom_layers
74 | )
75 | logger.info('Saving .mlmodel to %s' % args.coreml_model)
76 | mlmodel.save(args.coreml_model)
77 |
78 |
79 | if __name__ == '__main__':
80 | main(sys.argv[1:])
81 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/utils.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as pyplot
2 | import numpy
3 | import skimage.transform
4 |
5 |
6 | def plot_image_and_mask(img, mask, alpha=0.6, deprocess_func=None,
7 | reference_mask=None,
8 | show_original_image=True,
9 | small=False):
10 | """Plot an image and overlays a transparent segmentation mask.
11 |
12 | Args:
13 | img (arr): the image data to plot
14 | mask (arr): the segmentation mask
15 | alpha (float, optional): the alpha value of the segmentation mask.
16 | small: If true, output small figure
17 |
18 | Returns:
19 | pyplot.plot: a plot
20 | """
21 | max_mask = numpy.argmax(mask, axis=-1)
22 |
23 | rows, columns = 1, 1
24 | if show_original_image:
25 | columns += 1
26 | if reference_mask is not None:
27 | columns += 1
28 |
29 | fig = pyplot.figure()
30 |
31 | if deprocess_func:
32 | img = deprocess_func(img)
33 |
34 | # Add Results plot
35 | column_index = 1
36 | fig.add_subplot(rows, columns, column_index)
37 |
38 | pyplot.imshow(img.astype(int))
39 | pyplot.imshow(
40 | skimage.transform.resize(
41 | max_mask,
42 | img.shape[:2],
43 | order=0),
44 | alpha=alpha)
45 |
46 | if reference_mask is not None:
47 | column_index += 1
48 | fig.add_subplot(rows, columns, column_index)
49 | pyplot.imshow(img.astype(int))
50 | pyplot.imshow(
51 | skimage.transform.resize(
52 | reference_mask[:, :, 0],
53 | img.shape[:2],
54 | order=0),
55 | alpha=alpha)
56 |
57 | if show_original_image:
58 | column_index += 1
59 | fig.add_subplot(rows, columns, column_index)
60 | pyplot.imshow(img.astype('uint8'))
61 |
62 | if small:
63 | fig.set_size_inches(columns * 5, 5)
64 | else:
65 | fig.set_size_inches(columns * 10, 10)
66 |
67 | return fig
68 |
69 |
70 | def plot_pixel_probabilities(probabilities, class_labels, subplot=None):
71 | """Plot probabilities that each pixel belows to a given class.
72 |
73 | This creates a subplot for each class and plots a heatmap of
74 | probabilities that each pixel belongs to each class.
75 |
76 | Args:
77 | probabilities (arr): an array of class probabilities for each pixel
78 | class_labels (List[str]): the labels for each class
79 |
80 | Returns:
81 | TYPE: Description
82 | """
83 | num_classes = probabilities.shape[-1]
84 | total_items = num_classes + (1 if subplot else 0)
85 | columns = 4
86 | rows = numpy.ceil(total_items / 4)
87 | fig = pyplot.figure(figsize=(12, rows * 4))
88 |
89 | if subplot:
90 | fig.add_subplot(subplot)
91 |
92 | for cidx in range(num_classes):
93 | ax = fig.add_subplot(rows, columns, cidx + 1)
94 | ax.imshow(probabilities[:, :, cidx], vmin=0, vmax=1.0)
95 | ax.set_title(class_labels[cidx])
96 | fig.tight_layout()
97 | return fig
98 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Fritz Models
2 | A collection of machine and deep learning models designed to run on mobile devices.
3 |
4 | Models in this repository contain code and utility for training models as well as converting them to mobile-friendly formats like Core ML, TensorFlow Mobile, and TensorFlow Lite.
5 |
6 | ## Update: 12/26/2018
7 | For convenience, we've consolodated a few open source projects into a single repository. `fritz-style-transfer` has been renamed `fritz-models`. Have no fear, all of the code for style transfer lives in the `style_transfer` sub-directory.
8 |
9 | ## Models
10 |
11 | * [Style Transfer](https://github.com/fritzlabs/fritz-models/tree/master/style_transfer): Transform images into works of art by transfering the style of one image onto the content of another.
12 | * [Image Segmentation](https://github.com/fritzlabs/fritz-models/tree/master/image_segmentation): Semantic segmentation of images. Assign a value to each pixel of an image corresponding to the type of object it belongs to.
13 | * [Create ML Playgrounds](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): A series of playgrounds for training models with Apple's Create ML tool
14 |
15 | Don't see the model you're looking for? Open an issue and let us know!
16 |
17 | ## Add to your app
18 | To see live demonstrations of these models running on-device, the Heartbeat App is available in both the [App Store](https://itunes.apple.com/us/app/heartbeat-by-fritz/id1325206416?mt=8) ([source code](https://github.com/fritzlabs/heartbeat-ios)) and [Play Store](https://play.google.com/store/apps/details?id=ai.fritz.heartbeat) ([source code](https://github.com/fritzlabs/heartbeat-android)).
19 |
20 | If you'd like to incorporate any of these models or versions you've trained into your own app, head over to [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models). SDKs are available for both iOS and Android.
21 |
22 | ## Additional resources
23 |
24 | Additional, [non-code resources](resources/README.md) for machine learning and AI.
25 |
26 | * [AI and ML Landscape](resources/AI_Landscape.md): our curated list of helpful products and services for AI and machine learning.
27 |
28 | ## Join the community
29 | [Heartbeat](https://heartbeat.fritz.ai/?utm_source=github&utm_campaign=fritz-models) is a community of developers interested in the intesection of mobile and machine learning. [Chat with us in Slack](https://join.slack.com/t/heartbeat-by-fritz/shared_invite/enQtMzY5OTM1MzgyODIzLTZhNTFjYmRiODU0NjZjNjJlOGRjYzI2OTIwY2M4YTBiNjM1ODU1ZmU3Y2Q2MmMzMmI2ZTIzZjQ1ZWI3NzBkZGU) and stay up to date on the latest mobile ML news with our [Newsletter](https://mobileml.us16.list-manage.com/subscribe?u=de53bead690affb8e9a21de8f&id=68acb5c0fd).
30 |
31 | ## A note about large files
32 | Large files like model checkpoints, data, and archives of compiled code are managed via `git lfs`. You need to have Git LFS installed in order to download these files. Installation instructions are available [here](https://github.com/git-lfs/git-lfs#getting-started).
33 |
34 | If you have Git LFS installed, large files will download automatically by default. This can take a while and require a good connection. To clone this repository without downloading the model checkpoints, you can run:
35 |
36 | ```
37 | GIT_LFS_SKIP_SMUDGE=1 git clone ...
38 | ```
39 |
--------------------------------------------------------------------------------
/image_segmentation/coco_object_info.txt:
--------------------------------------------------------------------------------
1 | Idx Ratio Train Val Name
2 | 1 0.0 0 0 person
3 | 2 0.0 0 0 bicycle
4 | 3 0.0 0 0 car
5 | 4 0.0 0 0 motorcycle
6 | 5 0.0 0 0 airplane
7 | 6 0.0 0 0 bus
8 | 7 0.0 0 0 train
9 | 8 0.0 0 0 truck
10 | 9 0.0 0 0 boat
11 | 10 0.0 0 0 traffic light
12 | 11 0.0 0 0 fire hydrant
13 | 13 0.0 0 0 stop sign
14 | 14 0.0 0 0 parking meter
15 | 15 0.0 0 0 bench
16 | 16 0.0 0 0 bird
17 | 17 0.0 0 0 cat
18 | 18 0.0 0 0 dog
19 | 19 0.0 0 0 horse
20 | 20 0.0 0 0 sheep
21 | 21 0.0 0 0 cow
22 | 22 0.0 0 0 elephant
23 | 23 0.0 0 0 bear
24 | 24 0.0 0 0 zebra
25 | 25 0.0 0 0 giraffe
26 | 27 0.0 0 0 backpack
27 | 28 0.0 0 0 umbrella
28 | 31 0.0 0 0 handbag
29 | 32 0.0 0 0 tie
30 | 33 0.0 0 0 suitcase
31 | 34 0.0 0 0 frisbee
32 | 35 0.0 0 0 skis
33 | 36 0.0 0 0 snowboard
34 | 37 0.0 0 0 sports ball
35 | 38 0.0 0 0 kite
36 | 39 0.0 0 0 baseball bat
37 | 40 0.0 0 0 baseball glove
38 | 41 0.0 0 0 skateboard
39 | 42 0.0 0 0 surfboard
40 | 43 0.0 0 0 tennis racket
41 | 44 0.0 0 0 bottle
42 | 46 0.0 0 0 wine glass
43 | 47 0.0 0 0 cup
44 | 48 0.0 0 0 fork
45 | 49 0.0 0 0 knife
46 | 50 0.0 0 0 spoon
47 | 51 0.0 0 0 bowl
48 | 52 0.0 0 0 banana
49 | 53 0.0 0 0 apple
50 | 54 0.0 0 0 sandwich
51 | 55 0.0 0 0 orange
52 | 56 0.0 0 0 broccoli
53 | 57 0.0 0 0 carrot
54 | 58 0.0 0 0 hot dog
55 | 59 0.0 0 0 pizza
56 | 60 0.0 0 0 donut
57 | 61 0.0 0 0 cake
58 | 62 0.0 0 0 chair
59 | 63 0.0 0 0 couch
60 | 64 0.0 0 0 potted plant
61 | 65 0.0 0 0 bed
62 | 67 0.0 0 0 dining table
63 | 70 0.0 0 0 toilet
64 | 72 0.0 0 0 tv
65 | 73 0.0 0 0 laptop
66 | 74 0.0 0 0 mouse
67 | 75 0.0 0 0 remote
68 | 76 0.0 0 0 keyboard
69 | 77 0.0 0 0 cell phone
70 | 78 0.0 0 0 microwave
71 | 79 0.0 0 0 oven
72 | 80 0.0 0 0 toaster
73 | 81 0.0 0 0 sink
74 | 82 0.0 0 0 refrigerator
75 | 84 0.0 0 0 book
76 | 85 0.0 0 0 clock
77 | 86 0.0 0 0 vase
78 | 87 0.0 0 0 scissors
79 | 88 0.0 0 0 teddy bear
80 | 89 0.0 0 0 hair drier
81 | 90 0.0 0 0 toothbrush
82 | 92 0.0 0 0 banner
83 | 93 0.0 0 0 blanket
84 | 95 0.0 0 0 bridge
85 | 100 0.0 0 0 cardboard
86 | 107 0.0 0 0 counter
87 | 109 0.0 0 0 curtain
88 | 112 0.0 0 0 door-stuff
89 | 118 0.0 0 0 floor-wood
90 | 119 0.0 0 0 flower
91 | 122 0.0 0 0 fruit
92 | 125 0.0 0 0 gravel
93 | 128 0.0 0 0 house
94 | 130 0.0 0 0 light
95 | 133 0.0 0 0 mirror-stuff
96 | 138 0.0 0 0 net
97 | 141 0.0 0 0 pillow
98 | 144 0.0 0 0 platform
99 | 145 0.0 0 0 playingfield
100 | 147 0.0 0 0 railroad
101 | 148 0.0 0 0 river
102 | 149 0.0 0 0 road
103 | 151 0.0 0 0 roof
104 | 154 0.0 0 0 sand
105 | 155 0.0 0 0 sea
106 | 156 0.0 0 0 shelf
107 | 159 0.0 0 0 snow
108 | 161 0.0 0 0 stairs
109 | 166 0.0 0 0 tent
110 | 168 0.0 0 0 towel
111 | 171 0.0 0 0 wall-brick
112 | 175 0.0 0 0 wall-stone
113 | 176 0.0 0 0 wall-tile
114 | 177 0.0 0 0 wall-wood
115 | 178 0.0 0 0 water-other
116 | 180 0.0 0 0 window-blind
117 | 181 0.0 0 0 window-other
118 | 184 0.0 0 0 tree-merged
119 | 185 0.0 0 0 fence-merged
120 | 186 0.0 0 0 ceiling-merged
121 | 187 0.0 0 0 sky-other-merged
122 | 188 0.0 0 0 cabinet-merged
123 | 189 0.0 0 0 table-merged
124 | 190 0.0 0 0 floor-other-merged
125 | 191 0.0 0 0 pavement-merged
126 | 192 0.0 0 0 mountain-merged
127 | 193 0.0 0 0 grass-merged
128 | 194 0.0 0 0 dirt-merged
129 | 195 0.0 0 0 paper-merged
130 | 196 0.0 0 0 food-other-merged
131 | 197 0.0 0 0 building-other-merged
132 | 198 0.0 0 0 rock-merged
133 | 199 0.0 0 0 wall-other-merged
134 | 200 0.0 0 0 rug-merged
135 |
--------------------------------------------------------------------------------
/image_segmentation/utils/deeplab_model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tarfile
3 |
4 | import numpy as np
5 | from PIL import Image
6 | from six.moves import urllib
7 | import tempfile
8 | import tensorflow as tf
9 |
10 |
11 | MODEL_NAME = 'mobilenetv2_coco_voctrainaug'
12 |
13 | _DOWNLOAD_URL_PREFIX = 'http://download.tensorflow.org/models/'
14 | _MODEL_URLS = {
15 | 'mobilenetv2_coco_voctrainaug':
16 | 'deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz',
17 | 'mobilenetv2_coco_voctrainval':
18 | 'deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz',
19 | 'xception_coco_voctrainaug':
20 | 'deeplabv3_pascal_train_aug_2018_01_04.tar.gz',
21 | 'xception_coco_voctrainval':
22 | 'deeplabv3_pascal_trainval_2018_01_04.tar.gz',
23 | }
24 | _TARBALL_NAME = 'deeplab_model.tar.gz'
25 |
26 |
27 | class DeepLabModel(object):
28 | """Class to load deeplab model and run inference."""
29 |
30 | INPUT_TENSOR_NAME = 'ImageTensor:0'
31 | OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
32 | INPUT_SIZE = 513
33 | FROZEN_GRAPH_NAME = 'frozen_inference_graph'
34 |
35 | def __init__(self, tarball_path):
36 | """Creates and loads pretrained deeplab model."""
37 | self.graph = tf.Graph()
38 |
39 | graph_def = None
40 | # Extract frozen graph from tar archive.
41 | tar_file = tarfile.open(tarball_path)
42 | for tar_info in tar_file.getmembers():
43 | if self.FROZEN_GRAPH_NAME in os.path.basename(tar_info.name):
44 | file_handle = tar_file.extractfile(tar_info)
45 | graph_def = tf.GraphDef.FromString(file_handle.read())
46 | break
47 |
48 | tar_file.close()
49 |
50 | if graph_def is None:
51 | raise RuntimeError('Cannot find inference graph in tar archive.')
52 |
53 | with self.graph.as_default():
54 | tf.import_graph_def(graph_def, name='')
55 |
56 | self.sess = tf.Session(graph=self.graph)
57 |
58 | def run(self, image):
59 | """Runs inference on a single image.
60 |
61 | Args:
62 | image: A PIL.Image object, raw input image.
63 |
64 | Returns:
65 | resized_image: RGB image resized from original input image.
66 | seg_map: Segmentation map of `resized_image`.
67 | """
68 | width, height = image.size
69 | resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
70 | target_size = (int(resize_ratio * width), int(resize_ratio * height))
71 | resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)
72 | batch_seg_map = self.sess.run(
73 | self.OUTPUT_TENSOR_NAME,
74 | feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
75 | seg_map = batch_seg_map[0]
76 | return resized_image, seg_map
77 |
78 |
79 | def download_deeplab_model(model_name):
80 | model_dir = tempfile.mkdtemp()
81 | tf.gfile.MakeDirs(model_dir)
82 |
83 | download_path = os.path.join(model_dir, _TARBALL_NAME)
84 | print(download_path)
85 | print('downloading model, this might take a while...')
86 |
87 | urllib.request.urlretrieve(
88 | _DOWNLOAD_URL_PREFIX + _MODEL_URLS[MODEL_NAME],
89 | download_path
90 | )
91 | print('download completed! loading DeepLab model...')
92 |
93 | model = DeepLabModel(download_path)
94 | print('model loaded successfully!')
95 | return model
96 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Any data should go in a data/ dir untracked by git
2 | data/
3 |
4 | # Local development
5 | .DS_Store
6 | .vscode/
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | .hypothesis/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # SageMath parsed files
87 | *.sage.py
88 |
89 | # dotenv
90 | .env
91 |
92 | # virtualenv
93 | .venv
94 | venv/
95 | ENV/
96 |
97 | # Spyder project settings
98 | .spyderproject
99 | .spyproject
100 |
101 | # Rope project settings
102 | .ropeproject
103 |
104 | # mkdocs documentation
105 | /site
106 |
107 | # mypy
108 | .mypy_cache/
109 |
110 | # Swift things for Create ML
111 | # Xcode
112 | #
113 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
114 |
115 | ## Build generated
116 | build/
117 | DerivedData/
118 |
119 | ## Various settings
120 | *.pbxuser
121 | !default.pbxuser
122 | *.mode1v3
123 | !default.mode1v3
124 | *.mode2v3
125 | !default.mode2v3
126 | *.perspectivev3
127 | !default.perspectivev3
128 | xcuserdata/
129 |
130 | ## Other
131 | *.moved-aside
132 | *.xccheckout
133 | *.xcscmblueprint
134 |
135 | ## Obj-C/Swift specific
136 | *.hmap
137 | *.ipa
138 | *.dSYM.zip
139 | *.dSYM
140 |
141 | ## Playgrounds
142 | timeline.xctimeline
143 | playground.xcworkspace
144 |
145 | # Swift Package Manager
146 | #
147 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
148 | # Packages/
149 | # Package.pins
150 | # Package.resolved
151 | .build/
152 |
153 | # CocoaPods
154 | #
155 | # We recommend against adding the Pods directory to your .gitignore. However
156 | # you should judge for yourself, the pros and cons are mentioned at:
157 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
158 | #
159 | # Pods/
160 |
161 | # Carthage
162 | #
163 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
164 | # Carthage/Checkouts
165 |
166 | Carthage/Build
167 |
168 | # fastlane
169 | #
170 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
171 | # screenshots whenever they are needed.
172 | # For more information about the recommended setup visit:
173 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
174 |
175 | fastlane/report.xml
176 | fastlane/Preview.html
177 | fastlane/screenshots/**/*.png
178 | fastlane/test_output
--------------------------------------------------------------------------------
/image_segmentation/utils/compare_models.py:
--------------------------------------------------------------------------------
1 | from matplotlib import gridspec
2 | from matplotlib import pyplot
3 | import skimage.transform
4 | import numpy
5 |
6 |
7 | def create_pascal_label_colormap():
8 | """Creates a label colormap used in PASCAL VOC segmentation benchmark.
9 |
10 | Returns:
11 | A Colormap for visualizing segmentation results.
12 | """
13 | colormap = numpy.zeros((256, 3), dtype=int)
14 | ind = numpy.arange(256, dtype=int)
15 |
16 | for shift in reversed(range(8)):
17 | for channel in range(3):
18 | colormap[:, channel] |= ((ind >> channel) & 1) << shift
19 | ind >>= 3
20 |
21 | return colormap
22 |
23 |
24 | def label_to_color_image(label):
25 | """Adds color defined by the dataset colormap to the label.
26 |
27 | Args:
28 | label: A 2D array with integer type, storing the segmentation label.
29 |
30 | Returns:
31 | result: A 2D array with floating type. The element of the array
32 | is the color indexed by the corresponding element in the inumpyut label
33 | to the PASCAL color map.
34 |
35 | Raises:
36 | ValueError: If label is not of rank 2 or its value is larger than color
37 | map maximum entry.
38 | """
39 | if label.ndim != 2:
40 | raise ValueError('Expect 2-D inumpyut label')
41 |
42 | colormap = create_pascal_label_colormap()
43 |
44 | if numpy.max(label) >= len(colormap):
45 | raise ValueError('label value too large.')
46 |
47 | return colormap[label]
48 |
49 |
50 | LABEL_NAMES = numpy.asarray([
51 | 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
52 | 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
53 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
54 | ])
55 |
56 | FULL_LABEL_MAP = numpy.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
57 | FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
58 |
59 |
60 | def vis_segmentation(image, deeplab_seg_map, icnet_seg_map):
61 | """Visualizes inumpyut image, segmentation map and overlay view."""
62 | pyplot.figure(figsize=(15, 5))
63 | grid_spec = gridspec.GridSpec(1, 4, width_ratios=[4, 4, 4, 4])
64 |
65 | pyplot.subplot(grid_spec[0])
66 | pyplot.imshow(image)
67 | pyplot.axis('off')
68 | pyplot.title('Input Image')
69 |
70 | pyplot.subplot(grid_spec[1])
71 | seg_image = label_to_color_image(deeplab_seg_map).astype(numpy.uint8)
72 | pyplot.imshow(seg_image)
73 | pyplot.axis('off')
74 | pyplot.title('Deeplab v3 Segmentation')
75 |
76 | pyplot.subplot(grid_spec[2])
77 | # resize icnet mask
78 | icnet_seg_map = skimage.transform.resize(
79 | icnet_seg_map[0, :, :],
80 | deeplab_seg_map.shape,
81 | preserve_range=True,
82 | anti_aliasing=False,
83 | order=0).astype('int')
84 | seg_image = label_to_color_image(icnet_seg_map).astype(numpy.uint8)
85 | pyplot.imshow(seg_image)
86 | pyplot.axis('off')
87 | pyplot.title('Fritz Segmentation')
88 |
89 | pyplot.subplot(grid_spec[3])
90 | pyplot.imshow(image)
91 | pyplot.imshow(seg_image, alpha=0.7)
92 | pyplot.axis('off')
93 | pyplot.title('Fritz Segmentation Overlay')
94 |
95 | pyplot.grid('off')
96 | pyplot.show()
97 |
98 |
99 | def multiple_vis(results):
100 |
101 | fig = pyplot.figure(figsize=(15, 3 * len(results)))
102 | grid_spec = gridspec.GridSpec(len(results), 4, width_ratios=[4, 4, 4, 4])
103 |
104 | i = 0
105 | for image, deeplab_seg_map, icnet_seg_map in results:
106 | pyplot.subplot(grid_spec[i])
107 | pyplot.imshow(image)
108 | # pyplot.axis('off')
109 | i += 1
110 |
111 | pyplot.subplot(grid_spec[i])
112 | seg_image = label_to_color_image(deeplab_seg_map).astype(numpy.uint8)
113 | pyplot.imshow(seg_image)
114 | pyplot.axis('off')
115 | pyplot.title('Deeplab v3 Segmentation')
116 |
117 | i += 1
118 | pyplot.subplot(grid_spec[i])
119 | # resize icnet mask
120 | icnet_seg_map = skimage.transform.resize(
121 | icnet_seg_map[0, :, :],
122 | deeplab_seg_map.shape,
123 | preserve_range=True,
124 | anti_aliasing=False,
125 | order=0).astype('int')
126 | seg_image = label_to_color_image(icnet_seg_map).astype(numpy.uint8)
127 | pyplot.imshow(seg_image)
128 | pyplot.axis('off')
129 | pyplot.title('Fritz Segmentation')
130 | i += 1
131 |
132 | pyplot.subplot(grid_spec[i])
133 | pyplot.imshow(image)
134 | pyplot.imshow(seg_image, alpha=0.7)
135 | pyplot.axis('off')
136 | pyplot.title('Fritz Segmentation Overlay')
137 | i += 1
138 |
139 | pyplot.grid('off')
140 |
141 | return fig
142 |
--------------------------------------------------------------------------------
/style_transfer/convert_to_tfmobile.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import os
4 | import sys
5 |
6 | import keras
7 | import tensorflow as tf
8 | from tensorflow.python.platform import gfile
9 | from tensorflow.python.tools import freeze_graph
10 | from tensorflow.python.tools import optimize_for_inference_lib
11 | from tensorflow.python.framework import dtypes
12 |
13 | from style_transfer import models
14 |
15 | logging.basicConfig(level=logging.INFO)
16 | logger = logging.getLogger('stylize_image')
17 |
18 |
19 | def _freeze_graph(model, basename, output_dir):
20 | name, _ = os.path.splitext(basename)
21 |
22 | saver = tf.train.Saver()
23 |
24 | with keras.backend.get_session() as sess:
25 | checkpoint_filename = os.path.join(output_dir, '%s.ckpt' % name)
26 | output_graph_filename = os.path.join(output_dir, '%s_frozen.pb' % name)
27 | saver.save(sess, checkpoint_filename)
28 | tf.train.write_graph(
29 | sess.graph_def, output_dir, '%s_graph_def.pbtext' % name
30 | )
31 |
32 | freeze_graph.freeze_graph(
33 | input_graph=os.path.join(output_dir, '%s_graph_def.pbtext' % name),
34 | input_saver='',
35 | input_binary=False,
36 | input_checkpoint=checkpoint_filename,
37 | output_graph=output_graph_filename,
38 | output_node_names='deprocess_stylized_image_1/mul',
39 | restore_op_name="save/restore_all",
40 | filename_tensor_name="save/Const:0",
41 | clear_devices=True,
42 | initializer_nodes=None
43 | )
44 | logger.info('Saved frozen graph to: %s' % output_graph_filename)
45 |
46 |
47 | def load_graph_def(filename):
48 | input_graph_def = tf.GraphDef()
49 | with gfile.FastGFile(filename, 'rb') as file:
50 | data = file.read()
51 | input_graph_def.ParseFromString(data)
52 | return input_graph_def
53 |
54 |
55 | def _optimize_graph(basename, output_dir):
56 | name, _ = os.path.splitext(basename)
57 | frozen_graph_filename = os.path.join(output_dir, '%s_frozen.pb' % name)
58 | graph_def = load_graph_def(frozen_graph_filename)
59 |
60 | optimized_graph = optimize_for_inference_lib.optimize_for_inference(
61 | input_graph_def=graph_def,
62 | input_node_names=['input_1'],
63 | placeholder_type_enum=dtypes.float32.as_datatype_enum,
64 | output_node_names=['deprocess_stylized_image_1/mul'],
65 | toco_compatible=True
66 | )
67 |
68 | optimized_graph_filename = os.path.basename(
69 | frozen_graph_filename).replace('frozen', 'optimized')
70 | optimized_graph_filename = optimized_graph_filename
71 | tf.train.write_graph(
72 | optimized_graph, output_dir, optimized_graph_filename, as_text=False
73 | )
74 | logger.info('Saved optimized graph to: %s' %
75 | os.path.join(output_dir, optimized_graph_filename))
76 |
77 |
78 | def main(argv):
79 |
80 | parser = argparse.ArgumentParser(
81 | description='Stylize an image using a trained model.'
82 | )
83 | parser.add_argument(
84 | '--keras-checkpoint', type=str, required=True,
85 | help='Weights from a trained Style Transfer Network.'
86 | )
87 | parser.add_argument(
88 | '--alpha', type=float, required=True,
89 | help='The width multiplier of the network.'
90 | )
91 | parser.add_argument(
92 | '--output-dir', type=str, required=True,
93 | help='A directory to save various tensorflow graphs to'
94 | )
95 | parser.add_argument(
96 | '--image-size', type=str, default='640,480',
97 | help='The size of input and output of the final Core ML model: H,W'
98 | )
99 | parser.add_argument(
100 | '--use-small-network', action='store_true',
101 | help=('Use a very small network architecture that works in real time '
102 | 'on some mobile devices using only CPU')
103 | )
104 |
105 | args = parser.parse_args(argv)
106 |
107 | image_size = [int(dim) for dim in args.image_size.split(',')]
108 |
109 | logger.info('Loading model weights from %s' % args.keras_checkpoint)
110 |
111 | # Set some keras params before loading the model
112 | keras.backend.clear_session()
113 | keras.backend.set_learning_phase(0)
114 | if args.use_small_network:
115 | model = models.SmallStyleTransferNetwork.build(
116 | image_size,
117 | alpha=args.alpha,
118 | checkpoint_file=args.keras_checkpoint
119 | )
120 | else:
121 | model = models.StyleTransferNetwork.build(
122 | image_size,
123 | alpha=args.alpha,
124 | checkpoint_file=args.keras_checkpoint
125 | )
126 |
127 | basename = os.path.basename(args.keras_checkpoint)
128 | # Freeze Graph
129 | _freeze_graph(model, basename, args.output_dir)
130 | # Optimize Graph
131 | _optimize_graph(basename, args.output_dir)
132 |
133 |
134 | if __name__ == '__main__':
135 | main(sys.argv[1:])
136 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 |
4 | from style_transfer import trainer
5 |
6 | logging.basicConfig(level=logging.INFO)
7 | logger = logging.getLogger('train_network')
8 |
9 | # The default layers are those suggested by Johnson et al.
10 | # The names map to those used in the VGG16 application included
11 | # with Keras.
12 | _DEFAULT_STYLE_LAYERS = [
13 | 'block1_conv2', 'block2_conv2',
14 | 'block3_conv3', 'block4_conv3'
15 | ]
16 | _DEFAULT_CONTENT_LAYERS = ['block3_conv3']
17 |
18 |
19 | if __name__ == '__main__':
20 | parser = argparse.ArgumentParser(
21 | description='Train a Style Transfer Network.'
22 | )
23 |
24 | parser.add_argument(
25 | '--training-image-dset', type=str, required=True,
26 | help=('An h5 file containing images to trian with. The dset must '
27 | 'contain a key `images` with the arrays.')
28 | )
29 | parser.add_argument(
30 | '--style-images', type=str, required=True,
31 | help='A comma separated list of images to take styles from.'
32 | )
33 | parser.add_argument(
34 | '--model-checkpoint', type=str, required=True,
35 | help='An file to save the trained network.'
36 | )
37 | parser.add_argument(
38 | '--image-size', default='256,256', type=str,
39 | help='The size of the image H,W'
40 | )
41 | parser.add_argument(
42 | '--content-layers', type=str,
43 | help=('A comma separated list of VGG layers to use for '
44 | 'computing content loss')
45 | )
46 | parser.add_argument(
47 | '--style-layers', type=str,
48 | help=('A comma separated list of VGG layers to use for '
49 | 'computing style loss')
50 | )
51 | parser.add_argument(
52 | '--content-weight', type=float, default=1.0,
53 | help='Content loss weight'
54 | )
55 | parser.add_argument(
56 | '--style-weight', type=float, default=1e-4,
57 | help='Style loss weight'
58 | )
59 | parser.add_argument(
60 | '--total-variation-weight', type=float, default=0,
61 | help='Total variation loss weight'
62 | )
63 | parser.add_argument(
64 | '--num-iterations', type=int, default=40000,
65 | help='Number of iterations to train for.'
66 | )
67 | parser.add_argument(
68 | '--batch-size', type=int, default=4,
69 | help='The batch size to train with.'
70 | )
71 | parser.add_argument(
72 | '--learning-rate', type=float, default=0.001,
73 | help='The learning rate.'
74 | )
75 | parser.add_argument(
76 | '--log-interval', type=int, default=10,
77 | help='the interval at which log statements are printed.'
78 | )
79 | parser.add_argument(
80 | '--checkpoint-interval', type=int, default=10,
81 | help='the interval at which model checkpoints are saved.'
82 | )
83 | parser.add_argument(
84 | '--fine-tune-checkpoint', type=str,
85 | help='A checkpoint file to finetune from.'
86 | )
87 | parser.add_argument(
88 | '--alpha', type=float, default=1.0,
89 | help='the width parameter controlling the number of filters'
90 | )
91 | parser.add_argument(
92 | '--norm-by-channels', action='store_true',
93 | help='if present, normalize gram matrix by channel'
94 | )
95 | parser.add_argument(
96 | '--gcs-bucket', type=str,
97 | help='a gcs bucket to save results to.'
98 | )
99 | parser.add_argument(
100 | '--use-small-network', action='store_true',
101 | help=('Use a very small network architecture that works in real time '
102 | 'on some mobile devices using only CPU')
103 | )
104 |
105 | args, unknown = parser.parse_known_args()
106 |
107 | # Set the content and style loss layers.
108 | content_layers = _DEFAULT_CONTENT_LAYERS
109 | if args.content_layers:
110 | content_layers = args.content_layers.split(',')
111 |
112 | style_layers = _DEFAULT_STYLE_LAYERS
113 | if args.style_layers:
114 | style_layers = args.style_layers.split(',')
115 |
116 | style_image_files = args.style_images.split(',')
117 | image_size = [int(el) for el in args.image_size.split(',')]
118 | norm_by_channels = args.norm_by_channels or False
119 |
120 | trainer.train(
121 | args.training_image_dset,
122 | style_image_files,
123 | args.model_checkpoint,
124 | content_layers,
125 | style_layers,
126 | content_weight=args.content_weight,
127 | style_weight=args.style_weight,
128 | total_variation_weight=args.total_variation_weight,
129 | image_size=image_size,
130 | alpha=args.alpha,
131 | batch_size=args.batch_size,
132 | num_iterations=args.num_iterations,
133 | learning_rate=args.learning_rate,
134 | log_interval=args.log_interval,
135 | checkpoint_interval=args.checkpoint_interval,
136 | fine_tune_checkpoint=args.fine_tune_checkpoint,
137 | norm_by_channels=norm_by_channels,
138 | gcs_bucket=args.gcs_bucket,
139 | use_small_network=args.use_small_network,
140 | )
141 | logger.info('Done.')
142 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/build_data.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # =============================================================================
15 |
16 | """Contains common utility functions and classes for building dataset.
17 |
18 | This script contains utility functions and classes to converts dataset to
19 | TFRecord file format with Example protos.
20 | The Example proto contains the following fields:
21 | image/encoded: encoded image content.
22 | image/filename: image filename.
23 | image/format: image file format.
24 | image/height: image height.
25 | image/width: image width.
26 | image/channels: image channels.
27 | image/segmentation/class/encoded: encoded semantic segmentation content.
28 | image/segmentation/class/format: semantic segmentation file format.
29 | """
30 | import collections
31 | import six
32 | import tensorflow as tf
33 |
34 | IMAGE_FORMAT = 'jpeg'
35 | LABEL_FORMAT = 'png'
36 |
37 |
38 | class ImageReader(object):
39 | """Helper class that provides TensorFlow image coding utilities."""
40 |
41 | def __init__(self, image_format='jpeg', channels=3):
42 | """Class constructor.
43 |
44 | Args:
45 | image_format: Image format. Only 'jpeg', 'jpg', or 'png'
46 | are supported.
47 | channels: Image channels.
48 | """
49 | with tf.Graph().as_default():
50 | self._decode_data = tf.placeholder(dtype=tf.string)
51 | self._image_format = image_format
52 | self._session = tf.Session()
53 | if self._image_format in ('jpeg', 'jpg'):
54 | self._decode = tf.image.decode_jpeg(
55 | self._decode_data, channels=channels)
56 | elif self._image_format == 'png':
57 | self._decode = tf.image.decode_png(
58 | self._decode_data, channels=channels)
59 |
60 | def read_image_dims(self, image_data):
61 | """Read the image dimensions.
62 |
63 | Args:
64 | image_data: string of image data.
65 | Returns:
66 | image_height and image_width.
67 | """
68 | image = self.decode_image(image_data)
69 | return image.shape[:2]
70 |
71 | def decode_image(self, image_data):
72 | """Decode the image data string.
73 |
74 | Args:
75 | image_data: string of image data.
76 | Returns:
77 | Decoded image data.
78 | Raises:
79 | ValueError: Value of image channels not supported.
80 | """
81 | image = self._session.run(
82 | self._decode, feed_dict={self._decode_data: image_data})
83 | if len(image.shape) != 3 or image.shape[2] not in (1, 3):
84 | raise ValueError('The image channels not supported.')
85 |
86 | return image
87 |
88 |
89 | def _int64_list_feature(values):
90 | """Return a TF-Feature of int64_list.
91 |
92 | Args:
93 | values: A scalar or list of values.
94 | Returns:
95 | A TF-Feature.
96 | """
97 | if not isinstance(values, collections.Iterable):
98 | values = [values]
99 |
100 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
101 |
102 |
103 | def _bytes_list_feature(values):
104 | """Return a TF-Feature of bytes.
105 |
106 | Args:
107 | values: A string.
108 | Returns:
109 | A TF-Feature.
110 | """
111 | def norm2bytes(value):
112 | return value.encode() if isinstance(value, str) and six.PY3 else value
113 |
114 | return tf.train.Feature(
115 | bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))
116 |
117 |
118 | def image_seg_to_tfexample(image_data, filename, height, width, seg_data):
119 | """Convert one image/segmentation pair to tf example.
120 |
121 | Args:
122 | image_data: string of image data.
123 | filename: image filename.
124 | height: image height.
125 | width: image width.
126 | seg_data: string of semantic segmentation data.
127 | Returns:
128 | tf example of one image/segmentation pair.
129 | """
130 | return tf.train.Example(
131 | features=tf.train.Features(
132 | feature={
133 | 'image/encoded': _bytes_list_feature(image_data),
134 | 'image/filename': _bytes_list_feature(filename),
135 | 'image/format': _bytes_list_feature(IMAGE_FORMAT),
136 | 'image/height': _int64_list_feature(height),
137 | 'image/width': _int64_list_feature(width),
138 | 'image/channels': _int64_list_feature(3),
139 | 'image/segmentation/class/encoded': (
140 | _bytes_list_feature(seg_data)),
141 | 'image/segmentation/class/format': _bytes_list_feature(
142 | LABEL_FORMAT),
143 | }
144 | )
145 | )
146 |
--------------------------------------------------------------------------------
/image_segmentation/utils/tfrecord_helpers.py:
--------------------------------------------------------------------------------
1 | import io
2 | import sys
3 | import struct
4 | import random
5 | import six
6 | import tensorflow as tf
7 | import numpy
8 | import PIL
9 |
10 |
11 | def iterate_tfrecord(filename, decode=False):
12 | """Iterate through a tfrecord file.
13 |
14 | Args:
15 | filename (str): Filename to iterate.
16 | decode (bool): Optionally pass all records to example decoder function.
17 | False by default.
18 |
19 | Returns: Iterator of tfrecords.
20 | """
21 | for record in tf.python_io.tf_record_iterator(filename):
22 | example = tf.train.Example()
23 | example.ParseFromString(record)
24 | if decode:
25 | yield decode_image_tensor(example)
26 | else:
27 | yield example
28 |
29 |
30 | def save_tfrecords(records, output_filename):
31 | """Save all tfrecord examples to file.
32 |
33 | Args:
34 | records (Iterator[tf.train.Example]): Iterator of records to save.
35 | output_filename (str): Output file to save to.
36 | """
37 | with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
38 | for record in records:
39 | tfrecord_writer.write(record.SerializeToString())
40 |
41 |
42 | def decode_image_tensor(example):
43 | """Takes a tfrecord example and decodes image and mask data.
44 |
45 | Args:
46 | example (tf.train.Example): TF example to decode.
47 |
48 | Returns: dict of decoded mask and image data.
49 | """
50 | feature_dict = example.features.feature
51 | image_value = feature_dict['image/encoded'].bytes_list.value[0]
52 | encoded_mask = feature_dict['image/segmentation/class/encoded']
53 | filename = feature_dict['image/filename'].bytes_list.value[0]
54 | mask_value = encoded_mask.bytes_list.value[0]
55 | mask = numpy.array(PIL.Image.open(io.BytesIO(mask_value)))
56 | height = feature_dict['image/height'].int64_list.value[0]
57 | width = feature_dict['image/width'].int64_list.value[0]
58 | mask_format = (
59 | feature_dict['image/segmentation/class/format'].bytes_list.value[0]
60 | )
61 |
62 | return {
63 | 'image': PIL.Image.open(io.BytesIO(image_value)),
64 | 'mask': mask,
65 | 'height': height,
66 | 'width': width,
67 | 'filename': filename,
68 | 'format': feature_dict['image/format'].bytes_list.value[0],
69 | 'mask_format': mask_format,
70 | }
71 |
72 |
73 | def get_png_string(mask_array):
74 | """Builds PNG string from mask array.
75 |
76 | Args:
77 | mask_array (HxW): Mask array to generate PNG string from.
78 |
79 | Returns: String of mask encoded as a PNG.
80 | """
81 | # Convert the new mask back to an image.
82 | image = PIL.Image.fromarray(mask_array.astype('uint8')).convert('RGB')
83 | # Save the new image to a PNG byte string.
84 | byte_buffer = io.BytesIO()
85 | image.save(byte_buffer, format='png')
86 | byte_buffer.seek(0)
87 | return byte_buffer.read()
88 |
89 |
90 | def update_mask(record, mask_array):
91 | """Update mask in tensorflow example.
92 |
93 | Args:
94 | record (tf.train.Example): Record to update
95 | mask_array (numpy.Array): HxW array of class values.
96 |
97 | Returns: Updated tf.train.Example.
98 | """
99 | def norm2bytes(value):
100 | return value.encode() if isinstance(value, str) and six.PY3 else value
101 |
102 | mask_data = get_png_string(mask_array)
103 | feature = record.features.feature['image/segmentation/class/encoded']
104 | feature.bytes_list.value.pop()
105 | feature.bytes_list.value.append(norm2bytes(mask_data))
106 | return record
107 |
108 |
109 | def get_mask_ratio(example):
110 | total_people_pixels = example['mask'][:, :, 0].sum(axis=None)
111 | return total_people_pixels / (example['height'] * example['width'])
112 |
113 |
114 | def iter_interleave(kaggle, ade20k, coco):
115 | """
116 | A generator that interleaves the output from a one or more iterators
117 | until they are *all* exhausted.
118 |
119 | """
120 | kaggle_finished = False
121 | ade20k_finished = False
122 | coco_finished = False
123 | a, b, c = 0, 0, 0
124 |
125 | while (not kaggle_finished) or (not ade20k_finished) or (not coco_finished):
126 | if not kaggle_finished:
127 | try:
128 | item = kaggle.next()
129 | a += 1
130 | if random.choice([False, True, True]):
131 | yield item
132 | except StopIteration:
133 | print("kaggle finished")
134 | kaggle_finished = True
135 | if not ade20k_finished:
136 | try:
137 | item = ade20k.next()
138 | b += 1
139 | yield item
140 | except StopIteration:
141 | print("ade20k finished")
142 | ade20k_finished = True
143 |
144 | if not coco_finished:
145 | try:
146 | for _ in range(4):
147 | item = coco.next()
148 | c += 1
149 | yield item
150 | except StopIteration:
151 | print("coco finished")
152 | coco_finished = True
153 |
154 | print(a, b, c)
155 |
156 |
157 | def chunk_records(filename, n, start=0):
158 | records = iterate_tfrecord(filename)
159 | while True:
160 | for i in range(start):
161 | continue
162 |
163 | try:
164 | yield [records.next() for _ in range(n)]
165 | except StopIteration:
166 | return
167 |
--------------------------------------------------------------------------------
/style_transfer/create_training_dataset.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import io
4 | import os
5 | import sys
6 | import urllib
7 | import zipfile
8 |
9 | import PIL.Image
10 | import tensorflow as tf
11 |
12 | logger = logging.getLogger('create_training_dataset')
13 |
14 | _COCO_ZIP_URL = 'http://images.cocodataset.org/zips/train2014.zip'
15 |
16 | try:
17 | raw_input # Python 3
18 | except NameError:
19 | raw_input = input # Python 3
20 |
21 |
22 | class DatasetCreator(object):
23 | """A class to preprocess images from the COCO training data.
24 |
25 | This does not apply any sort of normalization to images. It simply
26 | transforms and scales image sizes before packing them into an H5 dataset
27 | and saving them to disk.
28 | """
29 |
30 | allowed_formats = {'.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG'}
31 | max_resize = 16
32 |
33 | @classmethod
34 | def _get_image_filenames(cls, input_dir, num_images):
35 | """Get a list of image filenames from a directory."""
36 | img_list = []
37 | for filename in os.listdir(input_dir):
38 | _, ext = os.path.splitext(filename)
39 | if ext in cls.allowed_formats:
40 | img_list.append(os.path.join(input_dir, filename))
41 | if num_images and len(img_list) > num_images:
42 | break
43 | return img_list
44 |
45 | @staticmethod
46 | def _bytes_feature(value):
47 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
48 |
49 | @classmethod
50 | def process_images(
51 | cls,
52 | input_dir,
53 | output_filename,
54 | num_images=None,
55 | num_threads=1):
56 | """Process all images in a directory and create an H5 data set.
57 |
58 | Args:
59 | input_dir - a directory containing images
60 | output_filename - the name of the h5 file to write to
61 | num_images - the number of images to process. 'None' processes all
62 | num_threads - the number of threads to use. Default 1.
63 | """
64 | img_list = cls._get_image_filenames(input_dir, num_images)
65 | num_images = len(img_list)
66 | # Remove the h5 file if it exists
67 | try:
68 | os.remove(output_filename)
69 | except OSError:
70 | pass
71 |
72 | record_writer = tf.python_io.TFRecordWriter(output_filename)
73 | for idx, filename in enumerate(img_list):
74 | img = PIL.Image.open(filename)
75 | encoded_jpeg = io.BytesIO()
76 | img.save(encoded_jpeg, format='jpeg')
77 | encoded_jpeg.seek(0)
78 |
79 | example = tf.train.Example(features=tf.train.Features(
80 | feature={
81 | 'image/encoded': cls._bytes_feature(encoded_jpeg.read()),
82 | }))
83 | record_writer.write(example.SerializeToString())
84 | record_writer.close()
85 |
86 |
87 | def download_coco_data(directory):
88 | """Download and extract the COCO image training data set.
89 |
90 | This file is very large (~13GB) so we check with the user to make
91 | sure that is ok.
92 |
93 | Args:
94 | dir - a directory to save the dataset to
95 | """
96 | # This is a really big file so ask the user if they are sure they want
97 | # to start the download.
98 | if not os.path.isdir(directory):
99 | logger.info('Creating directory: %s' % directory)
100 | os.makedirs(directory)
101 |
102 | answer = None
103 | while answer not in {'Y', 'n'}:
104 | answer = raw_input(
105 | 'Are you sure you want to download the COCO dataset? [Y/n] '
106 | )
107 |
108 | if answer == 'n':
109 | sys.exit()
110 |
111 | logger.info('Downloading COCO image data set. This may take a while...')
112 | zip_save_path = os.path.join(directory, 'train2014.zip')
113 | urllib.urlretrieve(_COCO_ZIP_URL, zip_save_path)
114 |
115 | # Files are even bigger to unzip so ask again if they are fine to proceed.
116 | answer = None
117 | while answer not in {'Y', 'n'}:
118 | answer = raw_input(
119 | 'Are you sure you want to unzip things? [Y/n] '
120 | )
121 |
122 | if answer == 'n':
123 | sys.exit()
124 |
125 | logger.info('Unzipping COCO image data set. This may take a while...')
126 | unzip = zipfile.ZipFile(zip_save_path, 'r')
127 | unzip.extractall(directory)
128 | unzip.close()
129 | # Delete the original zipfile
130 | os.remove(zip_save_path)
131 |
132 |
133 | if __name__ == '__main__':
134 | parser = argparse.ArgumentParser(
135 | description=('Create a dataset to use when training the Fritz'
136 | ' Style Transfer model.'))
137 | parser.add_argument(
138 | '--output', type=str, required=True,
139 | help='The name of the resulting dataset.')
140 | parser.add_argument(
141 | '--image-dir', type=str, required=True,
142 | help=('A directory containing images to turn into tfrecords')
143 | )
144 | parser.add_argument(
145 | '--download', action='store_true',
146 | help=('When present, download and extract the COCO image dataset.'
147 | 'Note this is a huge download (~13GB).')
148 | )
149 | parser.add_argument(
150 | '--num-images', type=int, help='The number of images to process.'
151 | )
152 |
153 | args = parser.parse_args()
154 | image_directory = args.image_dir
155 | if args.download:
156 | download_coco_data(image_directory)
157 | image_directory = os.path.join(image_directory, 'train2014')
158 |
159 | image_directory = os.path.join(args.image_dir)
160 | DatasetCreator.process_images(
161 | image_directory,
162 | args.output,
163 | num_images=args.num_images
164 | )
165 |
--------------------------------------------------------------------------------
/image_segmentation/objectInfo150.txt:
--------------------------------------------------------------------------------
1 | Idx Ratio Train Val Name
2 | 1 0.1576 11664 1172 wall
3 | 2 0.1072 6046 612 building, edifice
4 | 3 0.0878 8265 796 sky
5 | 4 0.0621 9336 917 floor, flooring
6 | 5 0.0480 6678 641 tree
7 | 6 0.0450 6604 643 ceiling
8 | 7 0.0398 4023 408 road, route
9 | 8 0.0231 1906 199 bed
10 | 9 0.0198 4688 460 windowpane, window
11 | 10 0.0183 2423 225 grass
12 | 11 0.0181 2874 294 cabinet
13 | 12 0.0166 3068 310 sidewalk, pavement
14 | 13 0.0160 5075 526 person, individual, someone, somebody, mortal, soul
15 | 14 0.0151 1804 190 earth, ground
16 | 15 0.0118 6666 796 door, double door
17 | 16 0.0110 4269 411 table
18 | 17 0.0109 1691 160 mountain, mount
19 | 18 0.0104 3999 441 plant, flora, plant life
20 | 19 0.0104 2149 217 curtain, drape, drapery, mantle, pall
21 | 20 0.0103 3261 318 chair
22 | 21 0.0098 3164 306 car, auto, automobile, machine, motorcar
23 | 22 0.0074 709 75 water
24 | 23 0.0067 3296 315 painting, picture
25 | 24 0.0065 1191 106 sofa, couch, lounge
26 | 25 0.0061 1516 162 shelf
27 | 26 0.0060 667 69 house
28 | 27 0.0053 651 57 sea
29 | 28 0.0052 1847 224 mirror
30 | 29 0.0046 1158 128 rug, carpet, carpeting
31 | 30 0.0044 480 44 field
32 | 31 0.0044 1172 98 armchair
33 | 32 0.0044 1292 184 seat
34 | 33 0.0033 1386 138 fence, fencing
35 | 34 0.0031 698 61 desk
36 | 35 0.0030 781 73 rock, stone
37 | 36 0.0027 380 43 wardrobe, closet, press
38 | 37 0.0026 3089 302 lamp
39 | 38 0.0024 404 37 bathtub, bathing tub, bath, tub
40 | 39 0.0024 804 99 railing, rail
41 | 40 0.0023 1453 153 cushion
42 | 41 0.0023 411 37 base, pedestal, stand
43 | 42 0.0022 1440 162 box
44 | 43 0.0022 800 77 column, pillar
45 | 44 0.0020 2650 298 signboard, sign
46 | 45 0.0019 549 46 chest of drawers, chest, bureau, dresser
47 | 46 0.0019 367 36 counter
48 | 47 0.0018 311 30 sand
49 | 48 0.0018 1181 122 sink
50 | 49 0.0018 287 23 skyscraper
51 | 50 0.0018 468 38 fireplace, hearth, open fireplace
52 | 51 0.0018 402 43 refrigerator, icebox
53 | 52 0.0018 130 12 grandstand, covered stand
54 | 53 0.0018 561 64 path
55 | 54 0.0017 880 102 stairs, steps
56 | 55 0.0017 86 12 runway
57 | 56 0.0017 172 11 case, display case, showcase, vitrine
58 | 57 0.0017 198 18 pool table, billiard table, snooker table
59 | 58 0.0017 930 109 pillow
60 | 59 0.0015 139 18 screen door, screen
61 | 60 0.0015 564 52 stairway, staircase
62 | 61 0.0015 320 26 river
63 | 62 0.0015 261 29 bridge, span
64 | 63 0.0014 275 22 bookcase
65 | 64 0.0014 335 60 blind, screen
66 | 65 0.0014 792 75 coffee table, cocktail table
67 | 66 0.0014 395 49 toilet, can, commode, crapper, pot, potty, stool, throne
68 | 67 0.0014 1309 138 flower
69 | 68 0.0013 1112 113 book
70 | 69 0.0013 266 27 hill
71 | 70 0.0013 659 66 bench
72 | 71 0.0012 331 31 countertop
73 | 72 0.0012 531 56 stove, kitchen stove, range, kitchen range, cooking stove
74 | 73 0.0012 369 36 palm, palm tree
75 | 74 0.0012 144 9 kitchen island
76 | 75 0.0011 265 29 computer, computing machine, computing device, data processor, electronic computer, information processing system
77 | 76 0.0010 324 33 swivel chair
78 | 77 0.0009 304 27 boat
79 | 78 0.0009 170 20 bar
80 | 79 0.0009 68 6 arcade machine
81 | 80 0.0009 65 8 hovel, hut, hutch, shack, shanty
82 | 81 0.0009 248 25 bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle
83 | 82 0.0008 492 49 towel
84 | 83 0.0008 2510 269 light, light source
85 | 84 0.0008 440 39 truck, motortruck
86 | 85 0.0008 147 18 tower
87 | 86 0.0008 583 56 chandelier, pendant, pendent
88 | 87 0.0007 533 61 awning, sunshade, sunblind
89 | 88 0.0007 1989 239 streetlight, street lamp
90 | 89 0.0007 71 5 booth, cubicle, stall, kiosk
91 | 90 0.0007 618 53 television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box
92 | 91 0.0007 135 12 airplane, aeroplane, plane
93 | 92 0.0007 83 5 dirt track
94 | 93 0.0007 178 17 apparel, wearing apparel, dress, clothes
95 | 94 0.0006 1003 104 pole
96 | 95 0.0006 182 12 land, ground, soil
97 | 96 0.0006 452 50 bannister, banister, balustrade, balusters, handrail
98 | 97 0.0006 42 6 escalator, moving staircase, moving stairway
99 | 98 0.0006 307 31 ottoman, pouf, pouffe, puff, hassock
100 | 99 0.0006 965 114 bottle
101 | 100 0.0006 117 13 buffet, counter, sideboard
102 | 101 0.0006 354 35 poster, posting, placard, notice, bill, card
103 | 102 0.0006 108 9 stage
104 | 103 0.0006 557 55 van
105 | 104 0.0006 52 4 ship
106 | 105 0.0005 99 5 fountain
107 | 106 0.0005 57 4 conveyer belt, conveyor belt, conveyer, conveyor, transporter
108 | 107 0.0005 292 31 canopy
109 | 108 0.0005 77 9 washer, automatic washer, washing machine
110 | 109 0.0005 340 38 plaything, toy
111 | 110 0.0005 66 3 swimming pool, swimming bath, natatorium
112 | 111 0.0005 465 49 stool
113 | 112 0.0005 50 4 barrel, cask
114 | 113 0.0005 622 75 basket, handbasket
115 | 114 0.0005 80 9 waterfall, falls
116 | 115 0.0005 59 3 tent, collapsible shelter
117 | 116 0.0005 531 72 bag
118 | 117 0.0005 282 30 minibike, motorbike
119 | 118 0.0005 73 7 cradle
120 | 119 0.0005 435 44 oven
121 | 120 0.0005 136 25 ball
122 | 121 0.0005 116 24 food, solid food
123 | 122 0.0004 266 31 step, stair
124 | 123 0.0004 58 12 tank, storage tank
125 | 124 0.0004 418 83 trade name, brand name, brand, marque
126 | 125 0.0004 319 43 microwave, microwave oven
127 | 126 0.0004 1193 139 pot, flowerpot
128 | 127 0.0004 97 23 animal, animate being, beast, brute, creature, fauna
129 | 128 0.0004 347 36 bicycle, bike, wheel, cycle
130 | 129 0.0004 52 5 lake
131 | 130 0.0004 246 22 dishwasher, dish washer, dishwashing machine
132 | 131 0.0004 108 13 screen, silver screen, projection screen
133 | 132 0.0004 201 30 blanket, cover
134 | 133 0.0004 285 21 sculpture
135 | 134 0.0004 268 27 hood, exhaust hood
136 | 135 0.0003 1020 108 sconce
137 | 136 0.0003 1282 122 vase
138 | 137 0.0003 528 65 traffic light, traffic signal, stoplight
139 | 138 0.0003 453 57 tray
140 | 139 0.0003 671 100 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
141 | 140 0.0003 397 44 fan
142 | 141 0.0003 92 8 pier, wharf, wharfage, dock
143 | 142 0.0003 228 18 crt screen
144 | 143 0.0003 570 59 plate
145 | 144 0.0003 217 22 monitor, monitoring device
146 | 145 0.0003 206 19 bulletin board, notice board
147 | 146 0.0003 130 14 shower
148 | 147 0.0003 178 28 radiator
149 | 148 0.0002 504 57 glass, drinking glass
150 | 149 0.0002 775 96 clock
151 | 150 0.0002 421 56 flag
152 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/dali_pipeline.py:
--------------------------------------------------------------------------------
1 | from nvidia import dali
2 | import nvidia.dali.tfrecord as tfrec
3 | from nvidia.dali import ops
4 | from nvidia.dali import types
5 |
6 |
7 | class CommonPipeline(dali.pipeline.Pipeline):
8 |
9 | def _input(self, tfrecord_path, index_path, shard_id=0):
10 | return ops.TFRecordReader(
11 | path=tfrecord_path,
12 | index_path=index_path,
13 | random_shuffle=True,
14 | features={
15 | 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
16 | 'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""),
17 | 'image/format': tfrec.FixedLenFeature((), tfrec.string, ""),
18 | 'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1),
19 | 'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1),
20 | 'image/channels': tfrec.FixedLenFeature([1], tfrec.int64, -1),
21 | 'image/segmentation/class/encoded': (
22 | tfrec.FixedLenFeature((), tfrec.string, "")
23 | ),
24 | 'image/segmentation/class/format': (
25 | tfrec.FixedLenFeature((), tfrec.string, "")
26 | )
27 | }
28 | )
29 |
30 | def __init__(self,
31 | batch_size,
32 | num_threads,
33 | device_id,
34 | image_size,
35 | tfrecord_path,
36 | index_path,
37 | config,
38 | shard_id=0):
39 |
40 | super(CommonPipeline, self).__init__(batch_size,
41 | num_threads,
42 | device_id)
43 |
44 | self.image_size = image_size
45 | self.input = self._input(tfrecord_path, index_path, shard_id=shard_id)
46 | # The nvjpeg decoder throws an error for some unsupported jpegs.
47 | # until this is fixed, we'll use the host decoder, which runs on the
48 | # CPU.
49 | # self.decode = ops.nvJPEGDecoder(device="mixed",
50 | # output_type=types.RGB)
51 | self.decode = ops.HostDecoder(device="cpu",
52 | output_type=types.RGB)
53 | self.resize = ops.Resize(device="gpu",
54 | image_type=types.RGB,
55 | interp_type=types.INTERP_LINEAR,
56 | resize_x=image_size,
57 | resize_y=image_size)
58 |
59 | self.resize_large = ops.Resize(device="gpu",
60 | image_type=types.RGB,
61 | interp_type=types.INTERP_LINEAR,
62 | resize_x=image_size * config.zoom_scale,
63 | resize_y=image_size * config.zoom_scale)
64 |
65 | self.color_twist = ops.ColorTwist(
66 | device="gpu",
67 | )
68 | self.crop_mirror_normalize = ops.CropMirrorNormalize(
69 | device="gpu",
70 | crop=image_size,
71 | output_dtype=types.FLOAT,
72 | image_type=types.RGB,
73 | output_layout=types.DALITensorLayout.NHWC,
74 | mean=122.5,
75 | std=255.0
76 | )
77 |
78 | self.crop = ops.Crop(
79 | device="gpu",
80 | crop=image_size,
81 | )
82 |
83 | self.cast = ops.Cast(
84 | device="gpu",
85 | dtype=types.DALIDataType.INT64
86 | )
87 | self.rotate = ops.Rotate(
88 | device="gpu",
89 | fill_value=0
90 | )
91 | self.flip = ops.Flip(device="gpu")
92 |
93 | self.coin = ops.CoinFlip(probability=0.5)
94 | self.rotate_rng = ops.Uniform(range=(config.rotate_angle_min,
95 | config.rotate_angle_max))
96 | self.crop_x_rng = ops.Uniform(range=(0.0, config.crop_x_max))
97 | self.crop_y_rng = ops.Uniform(range=(0.0, config.crop_y_max))
98 | self.hue_rng = ops.Uniform(range=(config.hue_min,
99 | config.hue_max))
100 | self.contrast_rng = ops.Uniform(range=(config.contrast_min,
101 | config.contrast_max))
102 | self.saturation_rng = ops.Uniform(range=(config.saturation_min,
103 | config.saturation_max))
104 | self.brightness_rng = ops.Uniform(range=(config.brightness_min,
105 | config.brightness_max))
106 |
107 | self.iter = 0
108 |
109 | def define_graph(self):
110 | inputs = self.input()
111 | angle = self.rotate_rng()
112 | coin = self.coin()
113 | hue = self.hue_rng()
114 | contrast = self.contrast_rng()
115 | saturation = self.saturation_rng()
116 | brightness = self.brightness_rng()
117 | crop_x = self.crop_x_rng()
118 | crop_y = self.crop_y_rng()
119 |
120 | images = self.decode(inputs["image/encoded"])
121 | images = images.gpu()
122 | images = self.resize_large(images)
123 | images = self.rotate(images, angle=angle)
124 | images = self.crop(images, crop_pos_x=crop_x, crop_pos_y=crop_y)
125 | images = self.resize(images)
126 | images = self.color_twist(images,
127 | brightness=brightness,
128 | hue=hue,
129 | saturation=saturation,
130 | contrast=contrast)
131 | images = self.flip(images, horizontal=coin)
132 |
133 | masks = self.decode(inputs["image/segmentation/class/encoded"])
134 | masks = masks.gpu()
135 | masks = self.resize_large(masks)
136 | masks = self.rotate(masks, angle=angle)
137 | masks = self.crop(masks, crop_pos_x=crop_x, crop_pos_y=crop_y)
138 | masks = self.resize(masks)
139 | masks = self.flip(masks, horizontal=coin)
140 |
141 | images = self.crop_mirror_normalize(images)
142 | masks = self.cast(masks)
143 | return (images, masks)
144 |
145 | def iter_setup(self):
146 | pass
147 |
--------------------------------------------------------------------------------
/image_segmentation/README.md:
--------------------------------------------------------------------------------
1 | # Fritz Image Segmentation
2 | A Core ML compatible implementation of semantic segmentation with ICNet in Keras.
3 |
4 | ## Installation
5 |
6 | From this directory, run:
7 |
8 | ```
9 | export PYTHONPATH=$PYTHONPATH:`pwd`
10 | ```
11 |
12 | ## Downaload Data
13 | The model is trained on the [ADE20K dataset](http://groups.csail.mit.edu/vision/datasets/ADE20K/) provided by MIT. You can download and prepare this data for training using this [handy script](https://github.com/tensorflow/models/blob/master/research/deeplab/datasets/download_and_convert_ade20k.sh) provided in the `TensorFlow/models/research/deeplab` repo on GitHub.
14 |
15 | The dataset contains >20,000 images and corresponding segmentation masks. Masks asign one of 150 categories to each individual pixel of the image. A list of object classes is included in this repo: [objectInfo150.txt]()
16 |
17 | ## Create TFRecord Dataset
18 |
19 | Training requires data be read from TFRecords so we'll need to convert the images before we can use them. It's also recommended you train choose less than 20 image labels to train on as performance degrades after this point. The full 150 class labels is too much. A whitelist of class labels can be passed via the command line in a pipe separated string. Note that class labels much match those in the `objectInfo150.txt` exactly. Examples of valid whitelists are:
20 |
21 | ```
22 | "person|wall|floor, flooring"
23 | "chair|wall|coffee table, cocktail table|ceiling|floor, flooring|bed|lamp|sofa, couch, lounge|windowpane, window|pillow"
24 | ```
25 |
26 | You can also set the `whitelist-threshold` argument to specify the fraction of whitelisted labels that must appear in an image for it to be used in training. For example, if 10 labels are whitelisted and the threashold is set to 0.6, at least 6 of the 10 whitelisted labels must appear in the image for it to be included.
27 |
28 | Let's create a training data set for images with objects you might find in a living room or bedroom.
29 |
30 | ```
31 | export LABEL_SET=living_room
32 | mkdir data/${LABEL_SET}
33 | python create_tfrecord_dataset.py \
34 | -i data/ADEChallengeData2016/images/training/ \
35 | -a data/ADEChallengeData2016/annotations/training/ \
36 | -o data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
37 | -l data/objectInfo150.txt \
38 | -w "chair|wall|coffee table, cocktail table|ceiling|floor, flooring|bed|lamp|sofa, couch, lounge|windowpane, window|pillow" \
39 | -t 0.6
40 | ```
41 |
42 | This script also automatically outputs a new set of labels and indices in a file named `labels.txt` found in the same directory as the `.tfrecord` output.
43 |
44 | ## Training
45 | The model can be trained using the `train.py` script.
46 |
47 | Before you start, make sure the `image_segmentation` model is on your $PYTHONPATH. From the `fritz-models/image_segmentation` directory.
48 |
49 | ```
50 | export PYTHONPATH=$PYTHONPATH:`pwd`
51 | ```
52 |
53 | ### Train Locally
54 | Train the model for 10 steps by running:
55 |
56 | ```
57 | export LABEL_SET=living_room
58 | python image_segmentation/train.py \
59 | -d data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
60 | -l data/${LABEL_SET}/labels.txt \
61 | -n 10 \
62 | -s 768 \
63 | -a 0.25 \
64 | -o data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_025.h5
65 | ```
66 |
67 | By default, a model weights checkpoint is saved every epoch. Note that only weights are saved, not the full model. This is to make it easier to build models for training vs inference.
68 |
69 | ### Training on Google Cloud ML
70 | Zip up all of the local files to send up to Google Cloud.
71 |
72 | ```
73 | # from fritz-models/image_segmentation/
74 | python setup.py sdist
75 | ```
76 | Run the training job.
77 |
78 | ```
79 | export LABEL_SET=living_room
80 | export YOUR_GCS_BUCKET=
81 | gcloud ml-engine jobs submit training `whoami`_image_segmentation_`date +%s` \
82 | --runtime-version 1.9 \
83 | --job-dir=gs://${YOUR_GCS_BUCKET} \
84 | --packages dist/image_segmentation-1.0.tar.gz \
85 | --module-name image_segmentation.train \
86 | --region us-east1 \
87 | --scale-tier basic_gpu \
88 | -- \
89 | -d gs://${YOUR_GCS_BUCKET}/data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \
90 | -l gs://${YOUR_GCS_BUCKET}/data/${LABEL_SET}/labels.txt \
91 | -o ${LABEL_SET}_768x768_025.h5 \
92 | --image-size 768 \
93 | --alpha 0.25 \
94 | --num-steps 5000 \
95 | --batch-size 24 \
96 | --model-name ${LABEL_SET} \
97 | --gcs-bucket gs://${YOUR_GCS_BUCKET}/train
98 | ```
99 |
100 | ## Converting to Core ML
101 | The resulting Keras model can be converted using the script provided. It uses the standard `coremltools` package, but removes the additional model output nodes used for training.
102 |
103 | ```
104 | python convert_to_coreml.py --alpha 0.25 ${LABEL_SET}_768x768_025.h5 ${LABEL_SET}_768x768_025.mlmodel
105 | ```
106 |
107 | Once you've got your Core ML model, you can use [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=image-segmentation) to integrate, deploy, and manage it in your app. For more tutorials on mobile machine learning, check out [Heartbeat](https://heartbeat.fritz.ai?utm_source=github&utm_campaign=fritz-models&utm_content=image-segmentation).
108 |
109 | ## Benchmarks
110 | On a Google Cloud Compute GPU instance with a single K80, a single epoch containing roughly 1600 768x768 images takes 20 minutes. Average cross-categorical accuracy reached >80% after 12 hours. An additional 3 hours of training with a learning rate of 0.00001 increased accuracy to ~87%. Inferences with a 768x768 model can be made at 8-9fps on an iPhone X.
111 |
112 | ## Example - Living Room Objects
113 |
114 |
115 |
116 |
117 |
118 | Download the [mlmodel](https://github.com/fritzlabs/fritz-models/blob/master/image_segmentation/examples/icnet_768x768_living_room.mlmodel).
119 |
120 | ## Additional resources
121 |
122 | * [Original ICNet Implementation](https://github.com/hszhao/ICNet)
123 | * [Keras-ICNet](https://github.com/aitorzip/Keras-ICNet)
124 | * [ICNet-tensorflow](https://github.com/hellochick/ICNet-tensorflow)
125 |
--------------------------------------------------------------------------------
/resources/AI_Landscape.md:
--------------------------------------------------------------------------------
1 | # AI and Machine Learning Landscape
2 |
3 | ## Contribute to this list
4 |
5 | The AI and Machine Learning landscape is rapidly changing. We welcome additions and changes to this list!
6 |
7 | ## Data Labeling
8 |
9 | * [Labelbox](https://www.labelbox.com/) - Platform for creating and managing training data
10 |
11 | * [Alegion](https://alegion.com/) - Training platform to build datasets, manage, and more
12 |
13 | * [Clickworker](https://www.clickworker.com/) - Micro tasking marketplace, catering data management and web research services as well as AI algorithm training
14 |
15 | * [Figure Eight](https://www.figure-eight.com/) - Training platform that helps turn data into trainable sets
16 |
17 | * [Gengo AI](https://gengo.ai/) - Training platform for language-based ML tasks
18 |
19 | * [Mighty AI](https://mighty.ai/) - Training data management for Computer Vision tasks
20 |
21 | * [Scale](https://scale.ai/) - Training data API largely focused on Computer Vision tasks
22 |
23 | * [CloudSight](https://cloudsight.ai/) - Image recognition API for digital media
24 |
25 | * [Hive](https://thehive.ai/) - Data labeling, Computer Vision models, and media platform
26 |
27 | * [Microwork](https://microwork.io/) - Image and video annotation services for AI
28 |
29 | ## Synthetic Data
30 |
31 | * [AI.Reverie](https://aireverie.com/) - Simulation platform that generates synthetic data to train and improve ML models
32 |
33 | * [Neuromation](https://www.neuromation.io/) - Distributed computing platform for deep learning applications and synthetic data generation
34 |
35 | ## Feature Engineering
36 |
37 | * [Feature Labs](https://www.featurelabs.com/) - Automated feature engineering platform for enterprise
38 |
39 | * [Featuretools](https://www.featuretools.com/) - Open source Python framework for automated feature engineering - a product of Feature Labs
40 |
41 | * [Source{d}](https://sourced.tech/) - Machine learning for large scale code analysis
42 |
43 | ## Training
44 |
45 | * [ClusterOne](https://clusterone.com/) - Deep learning platform that allows you to train your models on distributed GPUs and CPUs without setup or maintenance
46 |
47 | * [DataBricks](https://databricks.com/) - Unified Analytics Platform that accelerates innovation by unifying data science, engineering and business
48 |
49 | * [DAWNBench](https://dawn.cs.stanford.edu/benchmark/index.html) - Benchmark suite for end-to-end deep learning training and inference out of Stanford
50 |
51 | * [Hyperopt](https://hyperopt.github.io/hyperopt/) - Distributed asynchronous hyperparameter optimization in Python
52 |
53 | * [Lambda Labs](https://lambdalabs.com/) - Workstations, Servers, Laptops, and GPU cloud built for Deep Learning
54 |
55 | * [PaddlePaddle](http://www.paddlepaddle.org/) - An open-source deep learning platform with a simple API
56 |
57 | * [Paperspace](https://www.paperspace.com/) - GPU cloud platform (w/ API), AI/ML infrastructure product
58 |
59 | * [RiseML](https://riseml.com/) - Machine Learning Platform for Kubernetes (:cry: - Sunsetting March 31, 2019)
60 |
61 | * [Spell](https://www.spell.run/) - Infrastructure for AI and deep learning experiments and collaboration
62 |
63 | * [Trifacta](https://www.trifacta.com/start-wrangling/) - Data preparation and cleaning platform
64 |
65 | * [Yellowfin](https://www.yellowfinbi.com/) - Integrated data analytics platform
66 |
67 | ## Model Serving / Deployment
68 |
69 | * [5 Analytics](https://www.5analytics.com/index.html) - Enterprise AI platform to integrate, deploy, and monitor ML models
70 |
71 | * [Algorithmia](https://algorithmia.com/) - Machine learning model management platform
72 |
73 | * [Numericcal](https://www.numericcal.com/) - Platform to automate model optimization and management on mobile and IoT
74 |
75 | * [Seldon](https://www.seldon.io/) - Machine learning deployment platform for Enterprise
76 |
77 | * [Vertex AI](http://vertex.ai/) - PalidML is an open source tensor compiler
78 |
79 | * [Alteryx](https://www.alteryx.com/platform) - Integrated analytics platform built to foster partnerships between IT, analytics teams, and businesses
80 |
81 | * [Datatron](https://www.datatron.com/) - Management platform for ML, AI ,and Data Science models
82 |
83 | ## Model Management
84 |
85 | * [Datmo](https://datmo.com/) - Workflow tools to help you experiment, deploy, and scale AI solutions
86 |
87 | * [Iterative AI](https://iterative.ai/) - CI workflow for machine learning projects
88 |
89 | * [MLFlow](https://mlflow.org/) - Open source ML lifecycle platform
90 |
91 | * [MLPerf](https://mlperf.org/) - A broad ML benchmark suite for measuring performance of ML software frameworks, ML hardware accelerators, and ML cloud platforms
92 |
93 | * [Neptune](https://neptune.ml/) - Platform to build ML models, manage infrastructure and dev environments, and team collaboration tools
94 |
95 | * [ParallelM](https://www.parallelm.com/) - Platform to deploy and optimize ML models at scale
96 |
97 | ## End-to-End ML Platforms
98 |
99 | * [Allegro](https://allegro.ai/) - Computer Vision Deep Learning platform
100 |
101 | * [Cnvrg.io](https://cnvrg.io/) - Full stack data science and ML platform
102 |
103 | * [Determined AI](https://determined.ai/) - Platform designed to streamline deep learning workflows
104 |
105 | * [FloydHub](https://www.floydhub.com/) - Deep learning model pipeline
106 |
107 | * [SherlockML](https://sherlockml.com/) - Data science developer environment
108 |
109 | * [BigML](https://bigml.com/) - Comprehensive ML workflow platform
110 |
111 | * [Dataiku](https://www.dataiku.com/) - End-to-end collaborative data science platform
112 |
113 | * [Metis Machine](https://metismachine.com/) - Skafos Machine Learning Platform to help develop and deploy ML pipelines at scale
114 |
115 | * [Valohai](https://valohai.com/) - Deep learning management platform
116 |
117 | * [Dataspine](https://dataspine.io/) - Automation platform for ML workflows
118 |
119 | * [PipelineAI](https://pipeline.ai/) - Flexible end-to-end ML pipeline platform
120 |
121 | * [Deep Cognition](https://deepcognition.ai/) - Deep learning management platform
122 |
123 | * [Polyaxon](https://polyaxon.com/) - Open source platform for reproducible machine learning at scale
124 |
125 | * [Clarifai](https://www.clarifai.com/) - ML platform built for Computer Vision problems
126 |
127 | * [Comet.ml](https://www.comet.ml/) - ML platform to track datasets, training, and more
128 |
129 | * [DeepSense.ai](https://deepsense.ai/) - AI AI solution management platform
130 |
131 | * [H20 AI](https://h2o.ai) - Open source ML platform
132 |
133 | * [DataRobot](https://www.datarobot.com/) - Automated ML platform for predictive modeling
134 |
135 | * [Fritz](https://fritz.ai/) - End-to-end platform designed to help mobile developers and ML engineers train and deploy models in mobile apps
136 |
137 | ## Other
138 |
139 | * [Element AI](https://www.elementai.com/) - Use case-based AI software
140 |
141 | * [Ever AI](https://ever.ai/) - Facial recognition and attribute identification SDK and API
142 |
143 | * [Deepomatic](http://www.deepomatic.com/) - Image-based, computer vision AI solutions platform
144 |
145 | * [Leverege](https://www.leverege.com/) - Complete IoT development and deployment platform
146 |
147 | * [Nuance](https://www.nuance.com/omni-channel-customer-engagement/technologies/artificial-intelligence.html) - Conversational AI for smart customer engagement
148 |
149 | * [Cortical.io](https:/www.cortical.io) - Intelligent text processing solution
150 |
--------------------------------------------------------------------------------
/image_segmentation/utils/model_helpers.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | import os
3 |
4 | import coremltools
5 | import tensorflow as tf
6 | import PIL.Image
7 | import skimage.transform
8 | import skimage.filters
9 | import numpy
10 | from tensorflow.python.platform import gfile
11 | from image_segmentation import data_generator
12 | import image_segmentation
13 | import requests
14 | from io import BytesIO
15 |
16 |
17 | class ModelParameters(object):
18 |
19 | def __init__(self, **params):
20 | self.label_set = params['label_set']
21 | self.batch_size = params['batch_size']
22 | self.resolution = params['resolution']
23 | self.alpha = params['alpha']
24 | self.labels = params['labels']
25 | self.num_classes = len(self.labels)
26 | self.gcs_bucket = params.get('gcs_bucket')
27 | self._training_data_path = params.get('training_data_path')
28 | self._model_path = params.get('model_path')
29 |
30 | self.file_base = params.get(
31 | 'file_base',
32 | f'{self.label_set}_{self.resolution}x{self.resolution}_1'
33 | )
34 |
35 | @property
36 | def training_data_path(self):
37 | if self._training_data_path:
38 | return self._training_data_path
39 |
40 | return (
41 | '../fritz-image-segmentation/data/'
42 | '{label_set}/{label_set}.tfrecord'
43 | ).format(label_set=self.label_set)
44 |
45 | @property
46 | def model_path(self):
47 | if self._model_path:
48 | return self._model_path
49 |
50 | return (
51 | f'gs://{self.gcs_bucket}/train/{self.file_base}.h5'
52 | )
53 |
54 |
55 | class TrainedModel(object):
56 |
57 | def __init__(self, model_parameters):
58 | self._params = model_parameters
59 | resolution = model_parameters.resolution
60 |
61 | self.dataset = data_generator.ADE20KDatasetBuilder.build(
62 | self._params.training_data_path,
63 | self._params.batch_size,
64 | (resolution, resolution),
65 | self._params.num_classes,
66 | augment_images=False,
67 | repeat=False
68 | )
69 |
70 | self._model = None
71 |
72 | def download_and_build_model(self):
73 | temp_h5 = tempfile.NamedTemporaryFile(suffix='.h5')
74 | print("Loading model")
75 | # with gfile.Open(self._params.model_path, 'rb') as fid:
76 | # temp_h5.file.write(fid.read())
77 | # temp_h5.seek(0)
78 |
79 | return image_segmentation.icnet.ICNetModelFactory.build(
80 | self._params.resolution,
81 | self._params.num_classes,
82 | alpha=self._params.alpha,
83 | weights_path=self._params.model_path,
84 | train=False
85 | )
86 |
87 | @property
88 | def model(self):
89 | if self._model is None:
90 | self._model = self.download_and_build_model()
91 |
92 | return self._model
93 |
94 | def iterate_images(self):
95 | iterator = self.dataset.make_one_shot_iterator()
96 | el = iterator.get_next()
97 |
98 | try:
99 | with tf.Session() as sess:
100 | while True:
101 | out = sess.run([el])
102 | for i in range(out[0]['image'].shape[0]):
103 | image = out[0]['image'][i]
104 | mask = out[0]['mask'][i]
105 | yield (image, mask)
106 | except tf.errors.OutOfRangeError:
107 | return
108 |
109 | def training_images(self, num_images=10, start_index=0):
110 | results = []
111 | for i, (image, mask) in enumerate(self.iterate_images()):
112 | if i < start_index:
113 | continue
114 |
115 | if len(results) >= num_images:
116 | break
117 | results.append((image, mask))
118 |
119 | return results
120 |
121 | def run_prediction(self, img_path=None, img_data=None, img_url=None,
122 | img=None):
123 | if img_url:
124 | response = requests.get(img_url)
125 | img = PIL.Image.open(BytesIO(response.content))
126 | elif img_path:
127 | img = PIL.Image.open(img_path)
128 |
129 | if img_data is None:
130 | img = img.resize((self._params.resolution,
131 | self._params.resolution))
132 | img_data = numpy.array(img)
133 | img_data = img_data * 1. / 255. - 0.5
134 | img_data = skimage.filters.gaussian(img_data, sigma=0.0)
135 | elif img_data is None:
136 | raise Exception("Must either pass image data or a path to image")
137 |
138 | return self.model.predict(img_data[None, :, :, :])
139 |
140 | def predict_and_plot(self, img_path=None, img_data=None, img_url=None,
141 | mask=None, probabilities=True):
142 | if img_url:
143 | response = requests.get(img_url)
144 | img = PIL.Image.open(BytesIO(response.content))
145 | img = img.resize((self._params.resolution,
146 | self._params.resolution))
147 | if img_path:
148 | img = PIL.Image.open(img_path)
149 | img = img.resize((self._params.resolution,
150 | self._params.resolution))
151 | elif img_data is not None:
152 | img = ((img_data + 0.5) * 255).astype('uint8')
153 |
154 | output = self.run_prediction(img_path=img_path, img_data=img_data,
155 | img_url=img_url)
156 |
157 | figure = image_segmentation.utils.plot_image_and_mask(
158 | numpy.array(img),
159 | output[0],
160 | reference_mask=mask,
161 | alpha=0.9,
162 | small=True)
163 | generated_figures = [figure]
164 |
165 | if probabilities:
166 | generated_figures.append(
167 | image_segmentation.utils.plot_pixel_probabilities(
168 | output[0],
169 | self._params.labels
170 | )
171 | )
172 |
173 | return output, generated_figures
174 |
175 | def calculate_error(self, results, mask):
176 | resized_mask = numpy.resize(mask[:, :, 0], (
177 | results.shape[0], results.shape[1]
178 | ))
179 | resized_mask = mask[:, :, 0]
180 |
181 | resized_results = skimage.transform.resize(
182 | numpy.argmax(results, axis=-1),
183 | mask.shape[:2],
184 | preserve_range=True,
185 | anti_aliasing=False,
186 | order=0)
187 |
188 | diff = resized_mask - resized_results
189 |
190 | success_rate = []
191 | for i, label in enumerate(self._params.labels):
192 | total_class_values = numpy.sum(resized_mask == i)
193 | if not total_class_values:
194 | continue
195 |
196 | incorrect = float(numpy.count_nonzero(diff[resized_mask == i]))
197 | true_positive = float(numpy.sum(diff[resized_mask == i] == 0))
198 | iou = true_positive / (true_positive + incorrect)
199 | success_rate.append((i, total_class_values, true_positive, iou))
200 | print(f"{label} - total: {total_class_values}, IoU: {iou}")
201 |
202 | mean_iou = (
203 | sum([iou for _, _, _, iou in success_rate]) / len(success_rate)
204 | )
205 | print(f"mIoU: {mean_iou}")
206 | return success_rate
207 |
208 | def convert_to_coreml(self, mlmodel_path='./'):
209 | mlmodel = coremltools.converters.keras.convert(
210 | self.model,
211 | input_names='image',
212 | image_input_names='image',
213 | image_scale=1.0 / 255.0,
214 | red_bias=-0.5,
215 | green_bias=-0.5,
216 | blue_bias=-0.5,
217 | output_names='output'
218 | )
219 | mlmodel_file_path = (
220 | os.path.join(mlmodel_path, self._params.file_base + '.mlmodel')
221 | )
222 | mlmodel.save(mlmodel_file_path)
223 | print(f"successfully saved {mlmodel_file_path}")
224 |
--------------------------------------------------------------------------------
/style_transfer/README.md:
--------------------------------------------------------------------------------
1 | # Fritz Style Transfer
2 | Code for training artistic style transfer models with Keras and converting them to Core ML.
3 |
4 |
5 |
6 | Left: Original image. Middle: Image stylzed with a 17kb small model. Right: Image stylzed by the default large model.
7 |
8 | # Add style transfer to your app in minutes with Fritz
9 |
10 | If you're looking to add style transfer to your app quickly, check out [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer). The Fritz SDK provides 11 pre-trained style transfer models along with all the code you need to apply them images or live video. If you want to train your own model, keep reading.
11 |
12 | # 11-21-2018: Train your own custom style model in 20 minutes
13 |
14 | You can now train your own personal style transfer model in about 20 minutes using Fritz Style Transfer and Google Colab. Just create your own playground from [this notebook](https://colab.research.google.com/drive/1nDkxLKBgZGFscGoF0tfyPMGqW03xITl0#scrollTo=L9aTwLIqtFTE) to get started. You can read more about how it works [here](https://heartbeat.fritz.ai/20-minute-masterpiece-4b6043fdfff5?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer).
15 |
16 | # Installation
17 |
18 | If you're not installing using a package manager like `pip`, make sure the root directory is on your `PYTHONPATH`:
19 |
20 | ```
21 | export PYTHONPATH=$PYTHONPATH:`pwd`
22 | ```
23 |
24 | # Preprocessing Training Data
25 | The training data comes from the [COCO Training data set](http://cocodataset.org/). It consists of ~80,000 images and labels, although the labels arent used here.
26 |
27 | the `create_training_dataset.py` script will download and unzip this data then process images to create an h5 dataset used by the style transfer network trainer. You can run this with the command below. Note the first time you run this you will need to download and unzip 13GB worth of data and it can take a while. The command only processes the first 10 images to make sure things are working, but you can modify `--num-images` to process more.
28 |
29 | ```
30 | python create_training_dataset.py \
31 | --output example/training_images.tfrecord \
32 | --image-dir path/to/coco/ \
33 | --num-images 10
34 | ```
35 |
36 | Note that if you have already downloaded and extracted a set of images to use for training, that directory needs to be called `train2014/` and you need to point `--coco-image-dir` to the parent directory that contains that folder. Otherwise you can use the `--download` flag.
37 |
38 | # Training a Style Transfer Model
39 |
40 | To train the model from scratch for 100 iterations:
41 |
42 | ```
43 | python style_transfer/train.py \
44 | --training-image-dset example/training_images.tfrecord \
45 | --style-images example/starry_night.jpg \
46 | --model-checkpoint example/starry_night.h5 \
47 | --image-size 256,256 \
48 | --alpha 0.25 \
49 | --log-interval 1 \
50 | --num-iterations 10
51 | ```
52 |
53 | If everything looks good, we can pick up where we left off and keep training the same model.
54 |
55 | ```
56 | python style_transfer/train.py \
57 | --training-image-dset example/training_images.tfrecord \
58 | --style-images example/starry_night.jpg \
59 | --model-checkpoint example/starry_night.h5 \
60 | --image-size 256,256 \
61 | --alpha 0.25 \
62 | --num-iterations 1000 \
63 | --fine-tune-checkpoint example/starry_night.h5
64 | ```
65 |
66 | If you're using the full COCO dataset, you'll need around 20,000 iterations to train a model from scratch with a batch size of 24. If you're starting from a pre-trained model checkpoint, 5,000 steps should work. A model pre-trained on Starry Night is provided in the `example/` folder.
67 |
68 | For styles that are abstract with strong geometric patters, try higher values for `--content-weight` like `3` or `10`. For styles that are more photo-realistic images with smaller details, boost the `--style-weight` to `0.001` or more.
69 |
70 | Finally, note that for training, we resize images to be 256x256px. This is for training only. Final models can be set to take images of any size.
71 |
72 | ## Training models for mobile
73 |
74 | By default, the style transfer networks produced here are roughly 7mb in size and contain 7 million parameters. They can create a stylized image in ~500ms on high end mobile phones, and 5s on lower end phones. To make the model's faster, we've included a width-multiplier parameter similar to the one introduced by Google in their MobileNet architecture. The value `alpha` can be set between 0 and 1 and will control how many filters are included in each layer. Lower `alpha` means fewer filters, fewer parameters, faster models, with slightly worse style transfer abilities. In testing, `alpha=0.25` produced models that ran at 17fps on an iPhone X, while still transfering styles well.
75 |
76 | Finally, for models that are intended to be used in real-time on a CPU only, you can use the `--use-small-network` flag to train a model architecture that has been heavily pruned. The style transfer itself isn't quite as good, but the results are usable and the models are incredible small.
77 |
78 | # Stylizing Images
79 | To stylize an image with a trained model you can run:
80 |
81 | ```
82 | python stylize_image.py \
83 | --input-image example/dog.jpg \
84 | --output-image example/stylized_dog.jpg \
85 | --model-checkpoint example/starry_night_256x256_025.h5
86 | ```
87 |
88 | # Convert to Mobile
89 | Style transfer models can be converted to both Core ML and TensorFlow Mobile formats.
90 |
91 | ## Convert to Core ML
92 | Use the converter script to convert to Core ML.
93 |
94 | This converter is a slight modification of Apple's keras converter that allows
95 | the user to define custom conversions between Keras layers and core ml layers. This allows us to convert the Instance Normalization and Deprocessing layers.
96 |
97 | ```
98 | python convert_to_coreml.py \
99 | --keras-checkpoint example/starry_night_256x256_025.h5 \
100 | --alpha 0.25 \
101 | --image-size 640,480 \
102 | --coreml-model example/starry_night_640x480_025.mlmodel
103 | ```
104 |
105 | ## Convert to TensorFlow Mobile
106 | Models cannot be converted to TFLite because some operations are not supported, but TensorFlow Mobile works fine. To convert your model to an optimized frozen graph, run:
107 |
108 | ```
109 | python convert_to_tfmobile.py \
110 | --keras-checkpoint example/starry_night_256x256_025.h5 \
111 | --alpha 0.25 \
112 | --image-size 640,480 \
113 | --output-dir example/
114 | ```
115 |
116 | This produces a number of TensorFlow graph formats. The `*_optimized.pb` graph file is the one you want to use with your app. Note that the input node name is `input_1` and the output node name is `deprocess_stylized_image_1/mul`.
117 |
118 | # Train on Google Cloud ML
119 |
120 | This library is designed to work with certain configurations on Google Cloud ML so you can train styles in parallel and take advantage GPUs. Assuming you have Google Cloud ML and Google Cloud Storage set up, the following commands will get you training new models in just a few hours.
121 |
122 | ## Set up your Google Cloud Storage bucket.
123 |
124 | This repo assumes the structure on Google Cloud is
125 |
126 | ```
127 | gs://${YOUR_GCS_BUCKET}/
128 | |-- data/
129 | |-- training_images.tfrecord
130 | |-- starry_night_256x256_025.h5
131 | |-- style_images/
132 | |-- style_1.jpg
133 | |-- style_2.jpg
134 | |-- dist/
135 | |-- fritz_style_transfer.zip
136 | |-- train/
137 | |-- pretrained_model.h5
138 | |-- output_model.h5
139 | ```
140 |
141 | To make things easier, start by setting some environmental variables.
142 |
143 | ```
144 | export YOUR_GCS_BUCKET=your_gcs_bucket
145 | export FRITZ_STYLE_TRANSFER_PATH=/path/to/fritz-models/style_transfer/
146 | export KERAS_CONTRIB_PATH=/path/to/keras-contrib
147 | export STYLE_NAME=style_name
148 | ```
149 |
150 | Note that `STYLE_NAME` should be the filename of the style image (without the extension).
151 |
152 | Create the GCS bucket if you haven't already:
153 |
154 | ```
155 | gsutil mb gs://${YOUR_GCS_BUCKET}
156 | ```
157 |
158 | Copy training data to GCS, pre-trained checkpoints, and style image to:
159 | ```
160 | gsutil cp example/training_images.tfrecord gs://${YOUR_GCS_BUCKET}/data
161 | gsutil cp example/${STYLE_NAME}.jpg gs://${YOUR_GCS_BUCKET}/data/style_images/
162 | gsutil cp example/starry_night_256x256_025.h5 gs://${YOUR_GCS_BUCKET}/data/
163 | ```
164 |
165 | ## Package up libraries.
166 |
167 | Zip up all of the local files to send up to Google Cloud.
168 | ```
169 | python setup.py sdist
170 | ```
171 |
172 | Zip up keras_contrib so it's available to the library as well.
173 | ```
174 | pushd ${KERAS_CONTRIB_PATH}
175 | python setup.py sdist
176 | cp dist/* ${FRITZ_STYLE_TRANSFER_PATH}/dist/
177 | popd
178 | ```
179 |
180 | ## Start the training job
181 |
182 | The following command will start training a new style transfer models from a pre-trained checkpoint. This configuration trains on 256x256 images and has `--alpha=0.25` making it suitable for real-time use in mobile apps.
183 |
184 | ```
185 | gcloud ml-engine jobs submit training `whoami`_style_transfer`date +%s` \
186 | --runtime-version 1.8 \
187 | --job-dir=gs://${YOUR_GCS_BUCKET} \
188 | --packages dist/style_transfer-1.0.tar.gz,dist/keras_contrib-2.0.8.tar.gz \
189 | --module-name style_transfer.train \
190 | --region us-east1 \
191 | --scale-tier basic_gpu \
192 | -- \
193 | --training-image-dset gs://${YOUR_GCS_BUCKET}/data/test_training_images.tfrecord \
194 | --style-images gs://${YOUR_GCS_BUCKET}/data/style_images/${STYLE_NAME}.jpg \
195 | --model-checkpoint ${STYLE_NAME}_256x256_025.h5 \
196 | --image-size 256,256 \
197 | --alpha 0.25 \
198 | --num-iterations 5000 \
199 | --batch-size 24 \
200 | --content-weight 1 \
201 | --style-weight .0001 \
202 | --gcs-bucket gs://${YOUR_GCS_BUCKET}/train \
203 | --fine-tune-checkpoint gs://${YOUR_GCS_BUCKET}/data/starry_night_256x256_025.h5
204 | ```
205 |
206 | Distributed training and TPUs are not yet supported.
207 |
208 | # Add the model to your app with Fritz
209 |
210 | Now that you have a style transfer model that works for both iOS and Android, head over to [https://fritz.ai](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer) for tools to help you integrate it into your app and manage it over time.
211 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/models.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import keras_contrib
3 | import logging
4 |
5 | from style_transfer import layers
6 | from style_transfer import utils
7 |
8 | logger = logging.getLogger('models')
9 |
10 |
11 | class StyleTransferNetwork(object):
12 | """A class that builds a Keras model to perform style transfer.
13 |
14 | The architecture for this model comes from Johnson et al:
15 | https://arxiv.org/abs/1603.08155
16 | https://cs.stanford.edu/people/jcjohns/papers/fast-style/fast-style-supp.pdf
17 |
18 | It differs slightly from Johnson's model by swapping reflective
19 | padding with Zero Padding and Batch Normalization for
20 | Instance Normalization as recommended in Ulyanov et al:
21 | https://arxiv.org/abs/1607.08022
22 | """
23 |
24 | @classmethod
25 | def build(
26 | cls,
27 | image_size,
28 | alpha=1.0,
29 | input_tensor=None,
30 | checkpoint_file=None):
31 | """Build a Transfer Network Model using keras' functional API.
32 |
33 | Args:
34 | image_size - the size of the input and output image (H, W)
35 | alpha - a width parameter to scale the number of channels by
36 |
37 | Returns:
38 | model: a keras model object
39 | """
40 | x = keras.layers.Input(
41 | shape=(image_size[0], image_size[1], 3), tensor=input_tensor)
42 | out = cls._convolution(x, int(alpha * 32), 9, strides=1)
43 | out = cls._convolution(out, int(alpha * 64), 3, strides=2)
44 | out = cls._convolution(out, int(alpha * 128), 3, strides=2)
45 | out = cls._residual_block(out, int(alpha * 128))
46 | out = cls._residual_block(out, int(alpha * 128))
47 | out = cls._residual_block(out, int(alpha * 128))
48 | out = cls._residual_block(out, int(alpha * 128))
49 | out = cls._residual_block(out, int(alpha * 128))
50 | out = cls._upsample(out, int(alpha * 64), 3)
51 | out = cls._upsample(out, int(alpha * 32), 3)
52 | # Add a layer of padding to keep sizes consistent.
53 | # out = keras.layers.ZeroPadding2D(padding=(1, 1))(out)
54 | out = cls._convolution(out, 3, 9, relu=False, padding='same')
55 | # Restrict outputs of pixel values to -1 and 1.
56 | out = keras.layers.Activation('tanh')(out)
57 | # Deprocess the image into valid image data. Note we'll need to define
58 | # a custom layer for this in Core ML as well.
59 | out = layers.DeprocessStylizedImage()(out)
60 | model = keras.models.Model(inputs=x, outputs=out)
61 |
62 | # Optionally load weights from a checkpoint
63 | if checkpoint_file:
64 | logger.info(
65 | 'Loading weights from checkpoint: %s' % checkpoint_file
66 | )
67 | if checkpoint_file.startswith('gs://'):
68 | checkpoint_file = utils.copy_file_from_gcs(checkpoint_file)
69 | model.load_weights(checkpoint_file, by_name=True)
70 | return model
71 |
72 | @classmethod
73 | def _convolution(
74 | cls, x, n_filters, kernel_size, strides=1,
75 | padding='same', relu=True, use_bias=False):
76 | """Create a convolution block.
77 |
78 | This block consists of a convolution layer, normalization, and an
79 | optional RELU activation.
80 |
81 | Args:
82 | x - a keras layer as input
83 | n_filters - the number of output dimensions
84 | kernel_size - an integer or tuple specifying the (width, height) of
85 | the 2D convolution window
86 | strides - An integer or tuple/list of 2 integers, specifying the
87 | strides of the convolution along the width and height.
88 | Default 1.
89 | padding: one of "valid" or "same" (case-insensitive).
90 | relu - a bool specifying whether or not a RELU activation is
91 | applied. Default True.
92 | use_bias = a bool specifying whether or not to use a bias term
93 | """
94 | out = keras.layers.convolutional.Conv2D(
95 | n_filters,
96 | kernel_size,
97 | strides=strides,
98 | padding=padding,
99 | use_bias=use_bias
100 | )(x)
101 |
102 | # We are using the keras-contrib library from @farizrahman4u for
103 | # an implementation of Instance Normalization. Note here that we are
104 | # specifying the normalization axis to be -1, or the channel axis.
105 | # By default this is None and simple Batch Normalization is applied.
106 | out = keras_contrib.layers.normalization.InstanceNormalization(
107 | axis=-1)(out)
108 | if relu:
109 | out = keras.layers.Activation('relu')(out)
110 | return out
111 |
112 | @classmethod
113 | def _residual_block(cls, x, n_filters, kernel_size=3):
114 | """Construct a residual block.
115 |
116 | Args:
117 | x - a keras layer as input
118 | n_filters - the number of output dimensions
119 | kernel_size - an integer or tuple specifying the (width, height) of
120 | the 2D convolution window. Default 3.
121 | Returns:
122 | out - a keras layer as output
123 | """
124 | # Make sure the layer has the proper size and store a copy of the
125 | # original, cropped input layer.
126 | # identity = keras.layers.Cropping2D(cropping=((2, 2), (2, 2)))(x)
127 |
128 | out = cls._convolution(x, n_filters, kernel_size, padding='same')
129 | out = cls._convolution(
130 | out, n_filters, kernel_size, padding='same', relu=False
131 | )
132 | out = keras.layers.Add()([out, x])
133 | return out
134 |
135 | @classmethod
136 | def _upsample(cls, x, n_filters, kernel_size, size=2):
137 | """Construct an upsample block.
138 |
139 | Args:
140 | x - a keras layer as input
141 | n_filters - the number of output dimensions
142 | kernel_size - an integer or tuple specifying the (width, height) of
143 | the 2D convolution window. Default 3.
144 | Returns:
145 | out - a keras layer as output
146 | """
147 | out = keras.layers.UpSampling2D(size=size)(x)
148 | # out = keras.layers.ZeroPadding2D(padding=(2, 2))(out)
149 | out = cls._convolution(out, n_filters, kernel_size, padding='same')
150 | return out
151 |
152 |
153 | class SmallStyleTransferNetwork(StyleTransferNetwork):
154 |
155 | @classmethod
156 | def build(cls, image_size, alpha=1.0, input_tensor=None, checkpoint_file=None):
157 | """Build a Smaller Transfer Network Model using keras' functional API.
158 |
159 | This architecture removes some blocks of layers and reduces the size
160 | of convolutions to save on computation.
161 |
162 | Args:
163 | image_size - the size of the input and output image (H, W)
164 | alpha - a width parameter to scale the number of channels by
165 |
166 | Returns:
167 | model: a keras model object
168 | """
169 | x = keras.layers.Input(
170 | shape=(image_size[0], image_size[1], 3), tensor=input_tensor)
171 | out = cls._convolution(x, int(alpha * 32), 9, strides=1)
172 | out = cls._convolution(out, int(alpha * 32), 3, strides=2)
173 | out = cls._convolution(out, int(alpha * 32), 3, strides=2)
174 | out = cls._residual_block(out, int(alpha * 32))
175 | out = cls._residual_block(out, int(alpha * 32))
176 | out = cls._residual_block(out, int(alpha * 32))
177 | out = cls._upsample(out, int(alpha * 32), 3)
178 | out = cls._upsample(out, int(alpha * 32), 3)
179 | out = cls._convolution(out, 3, 9, relu=False, padding='same')
180 | # Restrict outputs of pixel values to -1 and 1.
181 | out = keras.layers.Activation('tanh')(out)
182 | # Deprocess the image into valid image data. Note we'll need to define
183 | # a custom layer for this in Core ML as well.
184 | out = layers.DeprocessStylizedImage()(out)
185 | model = keras.models.Model(inputs=x, outputs=out)
186 |
187 | # Optionally load weights from a checkpoint
188 | if checkpoint_file:
189 | logger.info(
190 | 'Loading weights from checkpoint: %s' % checkpoint_file
191 | )
192 | if checkpoint_file.startswith('gs://'):
193 | checkpoint_file = utils.copy_file_from_gcs(checkpoint_file)
194 | model.load_weights(checkpoint_file, by_name=True)
195 | return model
196 |
197 |
198 | class IntermediateVGG(object):
199 | """A VGG network class that allows easy access to intermediate layers.
200 |
201 | This class takes the default VGG16 application packaged with Keras and
202 | constructs a dictionary mapping layer names to layout puts so that
203 | we can easily extract the network's features at any level. These outputs
204 | are used to compute losses in artistic style transfer.
205 |
206 | """
207 |
208 | def __init__(self, prev_layer=None, input_tensor=None):
209 | """Initialize the model.
210 |
211 | Args:
212 | prev_layer - a keras layer to use as an input layer to the
213 | VGG model. This allows us to stitch other models
214 | together with the VGG.
215 | input_tensor - a tensor that will be used as input for the
216 | VGG.
217 | """
218 | # Create the Keras VGG Model
219 | self.model = keras.applications.vgg16.VGG16(
220 | weights='imagenet',
221 | include_top=False,
222 | input_tensor=input_tensor
223 | )
224 |
225 | # Make sure none of the VGG layers are trainable
226 | for layer in self.model.layers:
227 | layer.trainable = False
228 |
229 | # if a previous layer is specified, stitch that layer to the
230 | # input of the VGG model and rewire the entire model.
231 | self.layers = {}
232 | if prev_layer is not None:
233 | # We need to apply all layers to the output of the style net
234 | in_layer = prev_layer
235 | for layer in self.model.layers[1:]: # Ignore the input layer
236 | in_layer = layer(in_layer)
237 | self.layers[layer.name] = in_layer
238 | else:
239 | self.layers = dict(
240 | [(layer.name, layer.output) for layer in self.model.layers]
241 | )
242 |
--------------------------------------------------------------------------------
/image_segmentation/create_tfrecord_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | import argparse
17 | from functools import partial
18 | import logging
19 | import os
20 | import io
21 | import numpy
22 | import sys
23 |
24 | import PIL.Image
25 | import tensorflow as tf
26 | from tensorflow.python.lib.io import file_io
27 |
28 | from image_segmentation import build_data
29 |
30 | logging.basicConfig(level=logging.INFO)
31 | logger = logging.getLogger('create_tfrecord_dataset')
32 |
33 |
34 | def main(argv):
35 | parser = argparse.ArgumentParser(
36 | description='Convert the ADE20K Challenge dataset to tfrecords'
37 | )
38 |
39 | parser.add_argument(
40 | '-i', '--image-dir', type=str, required=True,
41 | help='Folder containing trainng images'
42 | )
43 | parser.add_argument(
44 | '-a', '--annotation-dir', type=str, required=True,
45 | help='Folder containing annotations for training images'
46 | )
47 | parser.add_argument(
48 | '-o', '--output', type=str, required=True,
49 | help='Path to save converted tfrecord of Tensorflow example'
50 | )
51 | parser.add_argument(
52 | '-l', '--label-filename', type=str, required=True,
53 | help='A file containing a single label per line.'
54 | )
55 | parser.add_argument(
56 | '-w', '--whitelist-labels', type=str,
57 | help=('A pipe | separated list of object labels to whitelist. '
58 | 'categories can be merged by seperating them by : '
59 | 'e.g. "person|car:truck:van|pavement". To see a'
60 | ' full list of allowed labels run with --list-labels.')
61 | )
62 | parser.add_argument(
63 | '-t', '--whitelist-threshold', type=float, default=0.7,
64 | help=('The fraction of whitelisted labels an image must contain to be '
65 | 'used for training.')
66 | )
67 | parser.add_argument(
68 | '--list-labels', action='store_true',
69 | help='If true, print a full list of object labels.'
70 | )
71 |
72 | args = parser.parse_args(argv)
73 |
74 | # Load the class labels
75 | class_labels = _load_class_labels(args.label_filename)
76 | n_classes = len(class_labels)
77 | if args.list_labels:
78 | logger.info('Labels:')
79 | labels = ''
80 | for label in class_labels:
81 | labels += '%d, %s\n' % label
82 | logger.info(labels)
83 | sys.exit()
84 |
85 | # If a whitelist is provided, get a list of mask indices that correspond
86 | # to allowed labels
87 | whitelist_labels = None
88 | whitelist_indices = None
89 | if args.whitelist_labels:
90 | whitelist_labels = _parse_whitelist_labels(args.whitelist_labels)
91 |
92 | # add a 'none' class with a label of 0
93 | whitelist_labels.insert(0, ['none'])
94 | whitelist_indices = _find_whitelist_indices(
95 | class_labels, whitelist_labels)
96 |
97 | whitelist_filename = os.path.join(
98 | os.path.dirname(args.output), 'labels.txt')
99 | _save_whitelist_labels(whitelist_filename, whitelist_labels)
100 | n_classes = len(whitelist_labels)
101 |
102 | _create_tfrecord_dataset(
103 | args.image_dir,
104 | args.annotation_dir,
105 | args.output,
106 | n_classes,
107 | whitelist_indices=whitelist_indices,
108 | whitelist_threshold=args.whitelist_threshold
109 | )
110 |
111 |
112 | def _parse_whitelist_labels(whitelist):
113 | parsed = whitelist.split('|')
114 | parsed = [category.split(':') for category in parsed]
115 | return parsed
116 |
117 |
118 | def _save_whitelist_labels(whitelist_filename, labels):
119 | with open(whitelist_filename, 'w') as wfid:
120 | header = 'idx\tlabel\n'
121 | wfid.write(header)
122 | for idx, label_set in enumerate(labels):
123 | label = label_set[0].split(',')[0]
124 | wfid.write('%d\t%s\n' % (idx, label))
125 | print("Saved")
126 |
127 |
128 | def _load_class_labels(label_filename):
129 | """Load class labels.
130 |
131 | Assumes the data directory is left unchanged from the original zip.
132 |
133 | Args:
134 | root_directory (str): the dataset's root directory
135 |
136 | Returns:
137 | List[(int, str)]: a list of class ids and labels
138 | """
139 | class_labels = []
140 | header = True
141 | with file_io.FileIO(label_filename, mode='r') as file:
142 | for line in file.readlines():
143 | if header:
144 | class_labels.append((0, 'none'))
145 | header = False
146 | continue
147 | line = line.rstrip()
148 | line = line.split('\t')
149 | label = line[-1]
150 | label_id = int(line[0])
151 | class_labels.append((label_id, label))
152 | return class_labels
153 |
154 |
155 | def _find_whitelist_indices(class_labels, whitelist_labels):
156 | """Map whitelist labels to indices.
157 |
158 | Args:
159 | whitelist (List[str]): a list of whitelisted labels
160 |
161 | Returns:
162 | List[Set]: a list of sets containing index labels
163 | """
164 | index = []
165 | for label_set in whitelist_labels:
166 | index_set = []
167 | for label in label_set:
168 | for class_id, class_label in class_labels:
169 | if label == class_label:
170 | index_set.append(class_id)
171 | index.append(index_set)
172 | return index
173 |
174 |
175 | def _filter_whitelabel_classes(
176 | filenames,
177 | whitelist,
178 | whitelist_threshold,
179 | whitelist_size=None):
180 | w_size = whitelist_size or len(whitelist)
181 | mask = numpy.array(PIL.Image.open(filenames[-1]))
182 | unique_classes = numpy.unique(mask)
183 | num_found = numpy.intersect1d(unique_classes, whitelist).size
184 | if float(num_found) / w_size >= whitelist_threshold:
185 | return True
186 | return False
187 |
188 |
189 | def _relabel_mask(seg_data, whitelist_indices):
190 | # Read the data into a numpy array.
191 | mask = numpy.array(PIL.Image.open(io.BytesIO(seg_data)))
192 | # Relabel each pixel
193 | new_mask = numpy.zeros(mask.shape)
194 | for new_label, old_label_set in enumerate(whitelist_indices):
195 | idx = numpy.where(numpy.isin(mask, old_label_set))
196 | new_mask[idx] = new_label
197 | # Convert the new mask back to an image.
198 | seg_img = PIL.Image.fromarray(new_mask.astype('uint8')).convert('RGB')
199 | # Save the new image to a PNG byte string.
200 | byte_buffer = io.BytesIO()
201 | seg_img.save(byte_buffer, format='png')
202 | byte_buffer.seek(0)
203 | return byte_buffer.read()
204 |
205 |
206 | def _create_tfrecord_dataset(
207 | image_dir,
208 | segmentation_mask_dir,
209 | output_filename,
210 | n_classes,
211 | whitelist_indices=None,
212 | whitelist_threshold=0.5):
213 | """Convert the ADE20k dataset into into tfrecord format.
214 |
215 | Args:
216 | dataset_split: Dataset split (e.g., train, val).
217 | dataset_dir: Dir in which the dataset locates.
218 | dataset_label_dir: Dir in which the annotations locates.
219 | Raises:
220 | RuntimeError: If loaded image and label have different shape.
221 | """
222 | # Get all of the image and segmentation mask file names
223 | img_names = tf.gfile.Glob(os.path.join(image_dir, '*.jpg'))
224 | seg_names = []
225 | for f in img_names:
226 | # get the filename without the extension
227 | basename = os.path.basename(f).split('.')[0]
228 | # cover its corresponding *_seg.png
229 | seg = os.path.join(segmentation_mask_dir, basename + '.png')
230 | seg_names.append(seg)
231 |
232 | # If a whitelist has been provided, loop over all of the segmentation
233 | # masks and find only the images that contain enough classes.
234 | kept_files = zip(img_names, seg_names)
235 | if whitelist_indices is not None:
236 | # Flatten the whitelist because some categories have been merged
237 | # but make sure to use the orginal list size when
238 | # computing the threshold.
239 | flat_whitelist = numpy.array(
240 | [idx for idx_set in whitelist_indices for idx in idx_set]
241 | ).astype('uint8')
242 | merged_whitelist_size = len(whitelist_indices)
243 | filter_fn = partial(
244 | _filter_whitelabel_classes,
245 | whitelist=flat_whitelist,
246 | whitelist_threshold=whitelist_threshold,
247 | whitelist_size=merged_whitelist_size
248 | )
249 | kept_files = list(filter(filter_fn, kept_files))
250 | logger.info(
251 | 'Found %d images after whitelist filtereing.' % len(kept_files))
252 | num_images = len(kept_files)
253 | image_reader = build_data.ImageReader('jpeg', channels=3)
254 | label_reader = build_data.ImageReader('png', channels=1)
255 |
256 | with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
257 | for idx, (image_filename, seg_filename) in enumerate(kept_files):
258 | if idx % 100 == 0:
259 | logger.info('Converting image %d of %d.' % (idx, num_images))
260 | # Read the image.
261 | image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
262 | height, width = image_reader.read_image_dims(image_data)
263 | # Read the semantic segmentation annotation.
264 | seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
265 | # If there is a whitelist, we need to relabel all of the
266 | # mask classes so that only the whitelisted labels are present.
267 | if whitelist_indices is not None:
268 | seg_data = _relabel_mask(seg_data, whitelist_indices)
269 | seg_height, seg_width = label_reader.read_image_dims(seg_data)
270 | if height != seg_height or width != seg_width:
271 | raise RuntimeError(
272 | 'Shape mismatched between image and label.')
273 | # Convert to tf example.
274 | example = build_data.image_seg_to_tfexample(
275 | image_data, image_filename, height, width, seg_data)
276 | tfrecord_writer.write(example.SerializeToString())
277 |
278 |
279 | if __name__ == '__main__':
280 | main(sys.argv[1:])
281 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/data_generator.py:
--------------------------------------------------------------------------------
1 | """Summary.
2 |
3 | Attributes:
4 | logger (TYPE): Description
5 | """
6 | import logging
7 |
8 | import numpy
9 | import tensorflow as tf
10 | from tensorflow.python.lib.io import file_io
11 |
12 | logger = logging.getLogger('data_generator')
13 |
14 |
15 | def _gaussian_kernel_3d(sigma, channels=3, size=4.0):
16 | radius = sigma * size / 2.0 + 0.5
17 | gauss = tf.distributions.Normal(0., sigma)
18 | kernel_1d = gauss.prob(
19 | tf.range(-radius[0], radius[0] + 1.0, dtype=tf.float32)
20 | )
21 | kernel_2d = tf.sqrt(tf.einsum('i,j->ij', kernel_1d, kernel_1d))
22 | kernel_2d = kernel_2d / tf.reduce_sum(kernel_2d)
23 | kernel = tf.expand_dims(kernel_2d, -1)
24 | kernel = tf.expand_dims(kernel, -1)
25 | kernel = tf.tile(kernel, [1, 1, channels, 1])
26 | return kernel
27 |
28 |
29 | class ADE20KDatasetBuilder(object):
30 | """Create a TFRecord dataset from the ADE20K data."""
31 |
32 | # Scale and bias parameters to pre-process images so pixel values are
33 | # between -0.5 and 0.5
34 | _PREPROCESS_IMAGE_SCALE = 1.0 / 255.0
35 | _PREPROCESS_CHANNEL_BIAS = -0.5
36 |
37 | @staticmethod
38 | def load_class_labels(label_filename):
39 | """Load class labels.
40 |
41 | Assumes the data directory is left unchanged from the original zip.
42 |
43 | Args:
44 | root_directory (str): the dataset's root directory
45 |
46 | Returns:
47 | arr: an array of class labels
48 | """
49 | class_labels = []
50 | header = True
51 | with file_io.FileIO(label_filename, mode='r') as file:
52 | for line in file.readlines():
53 | if header:
54 | header = False
55 | continue
56 | line = line.rstrip()
57 | label = line.split('\t')[-1]
58 | class_labels.append(label)
59 | return numpy.array(class_labels)
60 |
61 | @staticmethod
62 | def _resize_fn(images, image_size):
63 | """Resize an input images..
64 |
65 | Args:
66 | images (tf.tensor): a tensor of input images
67 | image_size ((int, int)): a size (H,W) to resize to
68 |
69 | Returns:
70 | tf.tensor: a resized image tensor
71 | """
72 | return tf.image.resize_images(
73 | images,
74 | image_size,
75 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR
76 | )
77 |
78 | @classmethod
79 | def _preprocess_example(cls, example):
80 | """Preprocess an image.
81 |
82 | Args:
83 | example (dict): a single example from the dataset
84 |
85 | Return:
86 | (dict) processed example from the dataset
87 | """
88 | example['image'] = (tf.cast(example['image'], tf.float32) *
89 | cls._PREPROCESS_IMAGE_SCALE +
90 | cls._PREPROCESS_CHANNEL_BIAS)
91 | return example
92 |
93 | @classmethod
94 | def _resize_example(cls, example, image_size):
95 | """Resize an image and mask from.
96 |
97 | Args:
98 | example (dict): a single example from the dataset.
99 | image_size ((int, int)): the desired size of image and mask
100 |
101 | Returns:
102 | (dict) a single example resized
103 | """
104 | return {'image': cls._resize_fn(example['image'], image_size),
105 | 'mask': cls._resize_fn(example['mask'], image_size)}
106 |
107 | @staticmethod
108 | def _crop_and_resize(image, zoom, image_size):
109 | """Crop and resize an image.
110 |
111 | Uses center cropping.
112 |
113 | Args:
114 | image (tensor): an input image tensor
115 | zoom (float): a zoom factor
116 | image_size ((int, int)): a desired output image size
117 |
118 | Returns:
119 | tensor: an outpu timage tensor
120 | """
121 | x1 = y1 = 0.5 - 0.5 * zoom # scale centrally
122 | x2 = y2 = 0.5 + 0.5 * zoom
123 | boxes = tf.stack([y1, x1, y2, x2], axis=1)
124 | box_ind = [0]
125 | return tf.cast(tf.squeeze(
126 | tf.image.crop_and_resize(
127 | tf.expand_dims(image, 0),
128 | boxes,
129 | box_ind,
130 | image_size,
131 | method='nearest'
132 | )
133 | ), tf.uint8)
134 |
135 | @staticmethod
136 | def _blur(image, sigma):
137 | kernel = _gaussian_kernel_3d(sigma)
138 | # all preprocessing should run on the CPU
139 | with tf.device('/cpu:0'):
140 | blurred_image = tf.nn.depthwise_conv2d(
141 | tf.cast(tf.expand_dims(image, 0), tf.float32),
142 | kernel,
143 | [1, 1, 1, 1],
144 | padding='SAME',
145 | data_format="NHWC"
146 | )
147 | return blurred_image[0]
148 |
149 | @classmethod
150 | def _augment_example(cls, example):
151 | """Augment an example from the dataset.
152 |
153 | All augmentation functions are also be applied to the segmentation
154 | mask.
155 |
156 | Args:
157 | example (dict): a single example from the dataset.
158 |
159 | Returns:
160 | dict: an augmented example
161 | """
162 | image = example['image']
163 | mask = example['mask']
164 |
165 | image_size = image.shape.as_list()[0:2]
166 |
167 | # Add padding so we don't get black borders
168 | paddings = numpy.array(
169 | [[image_size[0] / 2, image_size[0] / 2],
170 | [image_size[1] / 2, image_size[1] / 2],
171 | [0, 0]], dtype=numpy.uint32)
172 | aug_image = tf.pad(image, paddings, mode='REFLECT')
173 | aug_mask = tf.pad(mask, paddings, mode='REFLECT')
174 | padded_image_size = [dim * 2 for dim in image_size]
175 |
176 | # Rotate
177 | angle = tf.random_uniform([1], -numpy.pi / 6, numpy.pi / 6)
178 | aug_image = tf.contrib.image.rotate(aug_image, angle)
179 | aug_mask = tf.contrib.image.rotate(aug_mask, angle)
180 |
181 | # Zoom
182 | zoom = tf.random_uniform([1], 0.85, 1.75)
183 | aug_image = cls._crop_and_resize(aug_image, zoom, padded_image_size)
184 | aug_mask = cls._crop_and_resize(aug_mask, zoom, padded_image_size)
185 |
186 | # Crop things back to original size
187 | aug_image = tf.image.central_crop(aug_image, central_fraction=0.5)
188 | aug_mask = tf.image.central_crop(aug_mask, central_fraction=0.5)
189 |
190 | # blur
191 | # Not used at the moment because it makes training hard
192 | # sigma = tf.random_uniform([1], 0.0, 1.0)
193 | # aug_image = cls._blur(aug_image, sigma)
194 |
195 | # Flip left right
196 | do_flip = tf.greater(tf.random_uniform([1], 0.0, 1.0)[0], 0.5)
197 | aug_image = tf.cond(
198 | do_flip,
199 | true_fn=lambda: tf.image.flip_left_right(aug_image),
200 | false_fn=lambda: aug_image,
201 | )
202 | aug_mask = tf.cond(
203 | do_flip,
204 | true_fn=lambda: tf.image.flip_left_right(aug_mask),
205 | false_fn=lambda: aug_mask,
206 | )
207 |
208 | # Flip up down
209 | do_flip = tf.greater(tf.random_uniform([1], 0.0, 1.0)[0], 0.5)
210 | aug_image = tf.cond(
211 | do_flip,
212 | true_fn=lambda: tf.image.flip_up_down(aug_image),
213 | false_fn=lambda: aug_image,
214 | )
215 | aug_mask = tf.cond(
216 | do_flip,
217 | true_fn=lambda: tf.image.flip_up_down(aug_mask),
218 | false_fn=lambda: aug_mask,
219 | )
220 |
221 | return {'image': aug_image, 'mask': aug_mask}
222 |
223 | @staticmethod
224 | def _decode_example(example_proto):
225 | """Decode an example from a TFRecord.
226 |
227 | Args:
228 | example_proto (tfrecord): a serialized tf record
229 |
230 | Returns:
231 | dict: an example from the dataset containing image and mask.
232 | """
233 | features = {
234 | "image/encoded": tf.FixedLenFeature(
235 | (), tf.string, default_value=""
236 | ),
237 | "image/segmentation/class/encoded": tf.FixedLenFeature(
238 | (), tf.string, default_value=""
239 | )
240 | }
241 | parsed_features = tf.parse_single_example(example_proto, features)
242 | image = tf.image.decode_jpeg(
243 | parsed_features["image/encoded"], channels=3)
244 | mask = tf.image.decode_png(
245 | parsed_features["image/segmentation/class/encoded"], channels=3)
246 | return {'image': image, 'mask': mask}
247 |
248 | @classmethod
249 | def _generate_multiscale_masks(cls, example, n_classes):
250 | """Generate masks at mulitple scales for training.
251 |
252 | The loss function compares masks at 4, 8, and 16x increases in scale.
253 |
254 | Args:
255 | example (dict): a single example from the dataset
256 | n_classes (int): the number of classes in the mask
257 |
258 | Returns
259 | (dict): the same example, but with additional mask data for each
260 | new resolution.
261 | """
262 | original_mask = example['mask']
263 | # Add the image to the placeholder
264 | image_size = example['image'].shape.as_list()[0:2]
265 |
266 | for scale in [4, 8, 16]:
267 | example['mask_%d' % scale] = tf.one_hot(
268 | cls._resize_fn(
269 | original_mask,
270 | list(map(lambda x: x // scale, image_size))
271 | )[:, :, 0], # only need one channel
272 | depth=n_classes,
273 | dtype=tf.float32
274 | )
275 | return example
276 |
277 | @classmethod
278 | def scale_mask(cls, mask, scale, image_size, n_classes):
279 | return tf.one_hot(
280 | cls._resize_fn(
281 | mask,
282 | image_size,
283 | )[:, :, :, 0], # only need one channel
284 | depth=n_classes,
285 | dtype=tf.float32
286 | )
287 |
288 | @classmethod
289 | def build(
290 | cls,
291 | filename,
292 | batch_size,
293 | image_size,
294 | n_classes,
295 | augment_images=True,
296 | repeat=True,
297 | prefetch=False,
298 | parallel_calls=1):
299 | """Build a TFRecord dataset.
300 |
301 | Args:
302 | filename (str): a .tfrecord file to read
303 | batch_size (int): batch size
304 | image_size (int): the desired image size of examples
305 | n_classes (int): the number of classes
306 | whitelist_threshold (float): the minimum fraction of whitelisted
307 | classes an example must contain to be used for training.
308 |
309 | Returns:
310 | dataset: a TFRecordDataset
311 | """
312 | logger.info('Creating dataset from: %s' % filename)
313 | dataset = tf.data.TFRecordDataset(filename)
314 | dataset = dataset.map(cls._decode_example,
315 | num_parallel_calls=parallel_calls)
316 | dataset = dataset.map(lambda x: cls._resize_example(x, image_size),
317 | num_parallel_calls=parallel_calls)
318 | if augment_images:
319 | dataset = dataset.map(cls._augment_example,
320 | num_parallel_calls=parallel_calls)
321 | dataset = dataset.map(cls._preprocess_example,
322 | num_parallel_calls=parallel_calls)
323 | dataset = dataset.map(
324 | lambda x: cls._generate_multiscale_masks(x, n_classes),
325 | num_parallel_calls=parallel_calls
326 | )
327 | if repeat:
328 | dataset = dataset.repeat()
329 |
330 | dataset = dataset.batch(batch_size)
331 | if prefetch:
332 | dataset = dataset.prefetch(buffer_size=batch_size)
333 | return dataset
334 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/train.py:
--------------------------------------------------------------------------------
1 | """Train an ICNet Model on ADE20K Data."""
2 |
3 | import argparse
4 | import keras
5 | import logging
6 | import time
7 | import sys
8 | import struct
9 | import os
10 | from tensorflow.python.lib.io import file_io
11 | import tensorflow as tf
12 | from image_segmentation.icnet import ICNetModelFactory
13 | from image_segmentation.data_generator import ADE20KDatasetBuilder
14 | from image_segmentation import dali_config
15 | from google.cloud import storage
16 |
17 | logging.basicConfig(level=logging.INFO)
18 | logger = logging.getLogger('train')
19 |
20 |
21 | def _summarize_arguments(args):
22 | """Summarize input arguments to ICNet model training.
23 |
24 | Args:
25 | args:
26 | """
27 |
28 | logger.info('ICNet Model training Parameters')
29 | logger.info('-------------------------------')
30 | for key, value in vars(args).items():
31 | logger.info(' {key}={value}'.format(key=key, value=value))
32 |
33 |
34 | def _build_parser(argv):
35 | parser = argparse.ArgumentParser(
36 | description='Train an ICNet model.'
37 | )
38 | # Data options
39 | parser.add_argument(
40 | '-d', '--data', nargs='+', required=True,
41 | help='A TFRecord file containing images and segmentation masks.'
42 | )
43 | parser.add_argument(
44 | '--tfindex-files', nargs='+',
45 | help='TFIndex file for dali pipeline. If not included, will be built'
46 | )
47 | parser.add_argument(
48 | '-l', '--label-filename', type=str, required=True,
49 | help='A file containing a single label per line.'
50 | )
51 | parser.add_argument(
52 | '-s', '--image-size', type=int, default=768,
53 | help=('The pixel dimension of model input and output. Images '
54 | 'will be square.')
55 | )
56 | parser.add_argument(
57 | '-a', '--alpha', type=float, default=1.0,
58 | help='The width multiplier for the network'
59 | )
60 | parser.add_argument(
61 | '--augment-images', type=bool, default=True,
62 | help='turn on image augmentation.'
63 | )
64 | parser.add_argument(
65 | '--add-noise', action='store_true',
66 | help='Add gaussian noise to training.'
67 | )
68 | parser.add_argument(
69 | '--use-dali', action='store_true',
70 | help='turn on image augmentation.'
71 | )
72 | parser.add_argument(
73 | '--list-labels', action='store_true',
74 | help='If true, print a full list of object labels.'
75 | )
76 | # Training options
77 | parser.add_argument(
78 | '-b', '--batch-size', type=int, default=8,
79 | help='The training batch_size.'
80 | )
81 | parser.add_argument(
82 | '--lr', type=float, default=0.001, help='The learning rate.'
83 | )
84 | parser.add_argument(
85 | '-n', '--num-steps', type=int, default=1000,
86 | help='Number of training steps to perform'
87 | )
88 | parser.add_argument(
89 | '--steps-per-epoch', type=int, default=100,
90 | help='Number of training steps to perform between model checkpoints'
91 | )
92 | parser.add_argument(
93 | '-o', '--output',
94 | help='An output file to save the trained model.')
95 | parser.add_argument(
96 | '--gpu-cores', type=int, default=1,
97 | help='Number of GPU cores to run on.')
98 | parser.add_argument(
99 | '--fine-tune-checkpoint', type=str,
100 | help='A Keras model checkpoint to load and continue training.'
101 | )
102 | parser.add_argument(
103 | '--gcs-bucket', type=str,
104 | help='A GCS Bucket to save models too.'
105 | )
106 | parser.add_argument(
107 | '--parallel-calls', type=int, default=1,
108 | help='Number of parallel calss to preprocessing to perform.'
109 | )
110 | parser.add_argument(
111 | '--model-name', type=str, required=True,
112 | help='Short name separated by underscores'
113 | )
114 |
115 | return parser.parse_known_args()
116 |
117 |
118 | def _prepare_dataset(args, n_classes):
119 | dataset = ADE20KDatasetBuilder.build(
120 | args.data,
121 | n_classes=n_classes,
122 | batch_size=args.batch_size,
123 | image_size=(args.image_size, args.image_size),
124 | augment_images=False,
125 | parallel_calls=args.parallel_calls,
126 | prefetch=True,
127 | )
128 |
129 | iterator = dataset.make_one_shot_iterator()
130 | example = iterator.get_next()
131 |
132 | return {
133 | 'input': example['image'],
134 | 'mask_4': example['mask_4'],
135 | 'mask_8': example['mask_8'],
136 | 'mask_16': example['mask_16'],
137 | }
138 |
139 |
140 | def build_tfindex_file(tfrecord_file, tfindex_file):
141 | """Builds a tfindex file used by DALI from a tfrecord file.
142 |
143 | Args:
144 | tfrecord_file: Path to TFRecord file.
145 | tfindex_file: output file to write to.
146 | """
147 | tfrecord_fp = open(tfrecord_file, 'rb')
148 | idx_fp = open(tfindex_file, 'w')
149 |
150 | while True:
151 | current = tfrecord_fp.tell()
152 | try:
153 | # length
154 | byte_len = tfrecord_fp.read(8)
155 | if byte_len == '':
156 | break
157 | # crc
158 | tfrecord_fp.read(4)
159 | proto_len = struct.unpack('q', byte_len)[0]
160 | # proto
161 | tfrecord_fp.read(proto_len)
162 | # crc
163 | tfrecord_fp.read(4)
164 | idx_fp.write(str(current) + ' ' +
165 | str(tfrecord_fp.tell() - current) + '\n')
166 | except Exception:
167 | print("Not a valid TFRecord file")
168 | break
169 |
170 | tfrecord_fp.close()
171 | idx_fp.close()
172 |
173 |
174 | def _prepare_dali(args, n_classes):
175 | if args.gpu_cores > 1:
176 | logger.error(
177 | 'Have not built in support for more than one GPU at the moment.'
178 | )
179 | sys.exit(1)
180 |
181 | # non NVIDIA cloud environments will not have dali, so we
182 | # have to do the import here.
183 | from image_segmentation.dali_pipeline import CommonPipeline
184 | import nvidia.dali.plugin.tf as dali_tf
185 |
186 | batch_size = args.batch_size
187 | image_size = args.image_size
188 | device_id = 0
189 | storage_client = storage.Client()
190 | filenames = []
191 |
192 | for filename in args.data:
193 | if filename.startswith('gs://'):
194 | parts = filename[5:].split('/')
195 | bucket_name, blob_name = parts[0], '/'.join(parts[1:])
196 | bucket = storage_client.get_bucket(bucket_name)
197 | blob = bucket.blob(blob_name)
198 | download_filename = os.path.basename(blob_name)
199 | blob.download_to_filename(download_filename)
200 | filenames.append(download_filename)
201 | else:
202 | filenames.append(filename)
203 |
204 | tfindex_files = args.tfindex_files or []
205 | if not tfindex_files:
206 | for path in filenames:
207 | tfindex_file = path.split('.')[0] + '.tfindex'
208 | build_tfindex_file(path, tfindex_file)
209 | logger.info('Created tfindex file: {input} -> {output}'.format(
210 | input=path,
211 | output=tfindex_file
212 | ))
213 | tfindex_files.append(tfindex_file)
214 |
215 | config = dali_config.DaliConfig()
216 | config.summarize()
217 |
218 | pipe = CommonPipeline(
219 | args.batch_size,
220 | args.parallel_calls,
221 | device_id,
222 | args.image_size,
223 | filenames,
224 | tfindex_files,
225 | config
226 | )
227 | pipe.build()
228 |
229 | daliop = dali_tf.DALIIterator()
230 | with tf.device('/gpu:0'):
231 | results = daliop(
232 | serialized_pipeline=pipe.serialize(),
233 | shape=[args.batch_size, args.image_size, args.image_size, 3],
234 | label_type=tf.int64,
235 | )
236 |
237 | input_tensor = results.batch
238 |
239 | results.label.set_shape([batch_size, image_size, image_size, 3])
240 | mask = results.label
241 | new_shape = [image_size / 4, image_size / 4]
242 | mask_4 = ADE20KDatasetBuilder.scale_mask(mask, 4, new_shape, n_classes)
243 | new_shape = [image_size / 8, image_size / 8]
244 | mask_8 = ADE20KDatasetBuilder.scale_mask(mask, 8, new_shape, n_classes)
245 | new_shape = [image_size / 16, image_size / 16]
246 | mask_16 = ADE20KDatasetBuilder.scale_mask(mask, 16, new_shape, n_classes)
247 |
248 | return {
249 | 'input': input_tensor,
250 | 'mask_4': mask_4,
251 | 'mask_8': mask_8,
252 | 'mask_16': mask_16,
253 | }
254 |
255 |
256 | def train(argv):
257 | """Train an ICNet model."""
258 |
259 | args, unknown = _build_parser(argv)
260 | _summarize_arguments(args)
261 |
262 | class_labels = ADE20KDatasetBuilder.load_class_labels(
263 | args.label_filename)
264 | if args.list_labels:
265 | logger.info('Labels:')
266 | labels = ''
267 | for label in class_labels:
268 | labels += '%s\n' % label
269 | logger.info(labels)
270 | sys.exit()
271 |
272 | n_classes = len(class_labels)
273 |
274 | if args.use_dali:
275 | data = _prepare_dali(args, n_classes)
276 | else:
277 | data = _prepare_dataset(args, n_classes)
278 |
279 | if args.add_noise:
280 | logger.info('Adding gaussian noise to input tensor.')
281 | noise = tf.random_normal(shape=tf.shape(data['input']),
282 | mean=0.0,
283 | stddev=0.07,
284 | dtype=tf.float32)
285 | data['input'] = data['input'] + noise
286 |
287 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
288 | config = tf.ConfigProto(gpu_options=gpu_options)
289 | sess = tf.Session(config=config)
290 | keras.backend.set_session(sess)
291 |
292 | if args.gpu_cores > 1:
293 | with tf.device('/CPU:0'):
294 | icnet = ICNetModelFactory.build(
295 | args.image_size,
296 | n_classes,
297 | weights_path=args.fine_tune_checkpoint,
298 | train=True,
299 | input_tensor=data['input'],
300 | alpha=args.alpha,
301 | )
302 |
303 | gpu_icnet = keras.utils.multi_gpu_model(icnet, gpus=args.cores)
304 | gpu_icnet.__setattr__('callback_model', icnet)
305 | model = gpu_icnet
306 | else:
307 | with tf.device('/GPU:0'):
308 | model = ICNetModelFactory.build(
309 | args.image_size,
310 | n_classes,
311 | weights_path=args.fine_tune_checkpoint,
312 | train=True,
313 | input_tensor=data['input'],
314 | alpha=args.alpha,
315 | )
316 |
317 | optimizer = keras.optimizers.Adam(lr=args.lr)
318 | model.compile(
319 | optimizer,
320 | loss=keras.losses.categorical_crossentropy,
321 | loss_weights=[1.0, 0.4, 0.16],
322 | metrics=['categorical_accuracy'],
323 | target_tensors=[
324 | data['mask_4'], data['mask_8'], data['mask_16']
325 | ]
326 | )
327 |
328 | if not args.output:
329 | output_filename_fmt = '{model_name}_{size}x{size}_{alpha}_{time}.h5'
330 | filename = output_filename_fmt.format(
331 | model_name=args.model_name,
332 | size=args.image_size,
333 | alpha=str(args.alpha).replace('0', '').replace('.', ''),
334 | time=int(time.time())
335 | )
336 | else:
337 | filename = args.output
338 |
339 | print("=======================")
340 | print("Output file name: {name}".format(name=filename))
341 | print("=======================")
342 |
343 | callbacks = [
344 | keras.callbacks.ModelCheckpoint(
345 | filename,
346 | verbose=0,
347 | mode='auto',
348 | period=1
349 | ),
350 | ]
351 |
352 | if args.gcs_bucket:
353 | callbacks.append(SaveCheckpointToGCS(filename, args.gcs_bucket))
354 |
355 | model.fit(
356 | steps_per_epoch=args.steps_per_epoch,
357 | epochs=int(args.num_steps / args.steps_per_epoch) + 1,
358 | callbacks=callbacks,
359 | )
360 |
361 |
362 | class SaveCheckpointToGCS(keras.callbacks.Callback):
363 | """A callback to save local model checkpoints to GCS."""
364 |
365 | def __init__(self, local_filename, gcs_filename):
366 | """Save a checkpoint to GCS.
367 |
368 | Args:
369 | local_filename (str): the path of the local checkpoint
370 | gcs_filename (str): the GCS bucket to save the model to
371 | """
372 | self.gcs_filename = gcs_filename
373 | self.local_filename = local_filename
374 |
375 | @staticmethod
376 | def _copy_file_to_gcs(job_dir, file_path):
377 | gcs_url = os.path.join(job_dir, file_path)
378 | logger.info('Saving models to GCS: %s' % gcs_url)
379 | with file_io.FileIO(file_path, mode='rb') as input_f:
380 | with file_io.FileIO(gcs_url, mode='w+') as output_f:
381 | output_f.write(input_f.read())
382 |
383 | def on_epoch_end(self, epoch, logs={}):
384 | """Save model to GCS on epoch end.
385 |
386 | Args:
387 | epoch (int): the epoch number
388 | logs (dict, optional): logs dict
389 | """
390 | basename = os.path.basename(self.local_filename)
391 | self._copy_file_to_gcs(self.gcs_filename, basename)
392 |
393 |
394 | if __name__ == '__main__':
395 | train(sys.argv[1:])
396 |
--------------------------------------------------------------------------------
/style_transfer/style_transfer/fritz_coreml_converter.py:
--------------------------------------------------------------------------------
1 | import coremltools
2 | from coremltools.converters.keras._keras2_converter import *
3 | from coremltools.converters.keras._keras2_converter import _KERAS_LAYER_REGISTRY
4 | from coremltools.converters.keras import _topology2
5 | from coremltools.converters.keras._topology2 import _KERAS_SKIP_LAYERS
6 | from coremltools.models.neural_network import NeuralNetworkBuilder as _NeuralNetworkBuilder
7 | from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2
8 | from collections import OrderedDict as _OrderedDict
9 | from coremltools.models import datatypes
10 | from coremltools.models import MLModel as _MLModel
11 | from coremltools.models.utils import save_spec as _save_spec
12 | import keras as _keras
13 | from coremltools._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF
14 | import PIL.Image
15 | from six import string_types
16 | from coremltools.proto import FeatureTypes_pb2 as ft
17 |
18 | _IMAGE_SUFFIX = '_image'
19 |
20 |
21 | class FritzCoremlConverter(object):
22 | """A class to convert keras models to coreml.
23 |
24 | This is converter is a modified version of the one that comes packaged with
25 | coremltools, but it allows the user to define custom layer mappings from
26 | keras to coreml.
27 | """
28 |
29 | @classmethod
30 | def _check_unsupported_layers(cls, model, supported_layers):
31 | """Check for any unsupported layers in the keras model.
32 |
33 | Args:
34 | model - a keras model
35 | supported_layers - a dictionary of supported layers. Keys are keras
36 | layer classes and values are corresponding
37 | coreml layer classes.
38 | """
39 | for i, layer in enumerate(model.layers):
40 | if (isinstance(layer, _keras.models.Sequential) or
41 | isinstance(layer, _keras.models.Model)):
42 | cls._check_unsupported_layers(layer)
43 | else:
44 | if type(layer) not in supported_layers:
45 | print(supported_layers)
46 | raise ValueError(
47 | "Keras layer '%s' not supported. " % str(type(layer))
48 | )
49 | if isinstance(layer, _keras.layers.wrappers.TimeDistributed):
50 | if type(layer.layer) not in supported_layers:
51 | raise ValueError(
52 | "Keras layer '%s' not supported. " %
53 | str(type(layer.layer))
54 | )
55 | if isinstance(layer, _keras.layers.wrappers.Bidirectional):
56 | if not isinstance(layer.layer,
57 | _keras.layers.recurrent.LSTM):
58 | raise ValueError(
59 | 'Keras bi-directional wrapper conversion supports '
60 | 'only LSTM layer at this time. ')
61 |
62 | @staticmethod
63 | def _get_layer_converter_fn(layer, supported_layers):
64 | """Get the right converter function for Keras.
65 |
66 | Args:
67 | layer - a keras layer
68 | supported_layers - a dictionary of supported layers. Keys are keras
69 | layer classes and values are corresponding
70 | coreml layer classes.
71 | Returns:
72 | layer - a coreml layer
73 | """
74 | layer_type = type(layer)
75 | if layer_type in supported_layers:
76 | return supported_layers[layer_type]
77 | else:
78 | raise TypeError(
79 | "Keras layer of type %s is not supported." % type(layer)
80 | )
81 |
82 | @staticmethod
83 | def _convert_multiarray_output_to_image(spec, feature_name, is_bgr=False):
84 | """Convert Core ML multiarray output to an image output.
85 |
86 | This modifies the core ml spec in place.
87 |
88 | spec - a Core ML spec protobuf object.
89 | feature_name - the name of the output feature to convert
90 | is_bgr - if true, assume image data is already in BGR mode.
91 | Default False
92 | """
93 | for output in spec.description.output:
94 | if output.name != feature_name:
95 | continue
96 | if output.type.WhichOneof('Type') != 'multiArrayType':
97 | raise ValueError(
98 | "{} is not a multiarray type".format(output.name,)
99 | )
100 | array_shape = tuple(output.type.multiArrayType.shape)
101 | if len(array_shape) == 2:
102 | height, width = array_shape
103 | output.type.imageType.colorSpace = \
104 | ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE')
105 | else:
106 | channels, height, width = array_shape
107 |
108 | if channels == 1:
109 | output.type.imageType.colorSpace = \
110 | ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE')
111 | elif channels == 3:
112 | if is_bgr:
113 | output.type.imageType.colorSpace = \
114 | ft.ImageFeatureType.ColorSpace.Value('BGR')
115 | else:
116 | output.type.imageType.colorSpace = \
117 | ft.ImageFeatureType.ColorSpace.Value('RGB')
118 | else:
119 | raise ValueError(
120 | "Channel Value {} not supported for image inputs"
121 | .format(channels,)
122 | )
123 |
124 | output.type.imageType.width = width
125 | output.type.imageType.height = height
126 |
127 | @classmethod
128 | def convert_keras(
129 | cls,
130 | model,
131 | input_names=None,
132 | output_names=None,
133 | image_input_names=[],
134 | image_output_names=[],
135 | deprocessing_args={},
136 | is_bgr=False,
137 | is_grayscale=False,
138 | red_bias=0.0,
139 | green_bias=0.0,
140 | blue_bias=0.0,
141 | gray_bias=0.0,
142 | image_scale=1.0,
143 | class_labels=None,
144 | predicted_feature_name=None,
145 | custom_layers=None):
146 | """
147 | Convert a Keras model to a Core ML Model.
148 |
149 | model - a Keras model to convert
150 | input_names - names of input layers. Default None
151 | output_names - names of output layers. Default None
152 | image_input_names - a list of input names that are image datatypes
153 | image_output_names - a list of output names that are image datatypes
154 | preprocessing_args - a dictionary of arguments for input preprocessing
155 | class_labels - Class labels for outputs,
156 | predicted_feature_name - name for predicted features,
157 | custom_layers - a dictionary of custom layer conversions. Keys are
158 | Keras layer classes, values are coreml layer functions
159 |
160 | Returns:
161 | mlmodel - a coreml model object.
162 | """
163 | if isinstance(model, string_types):
164 | model = _keras.models.load_model(model)
165 | elif isinstance(model, tuple):
166 | model = _load_keras_model(model[0], model[1])
167 |
168 | # Merge the custom layers with the Keras layer registry
169 | supported_layers = {}
170 | supported_layers.update(_KERAS_LAYER_REGISTRY)
171 | if custom_layers:
172 | supported_layers.update(custom_layers)
173 |
174 | # Check valid versions
175 | cls._check_unsupported_layers(model, supported_layers)
176 |
177 | # Build network graph to represent Keras model
178 | graph = _topology2.NetGraph(model)
179 | graph.build()
180 | graph.remove_skip_layers(_KERAS_SKIP_LAYERS)
181 | graph.insert_1d_permute_layers()
182 | graph.insert_permute_for_spatial_bn()
183 | graph.defuse_activation()
184 | graph.remove_internal_input_layers()
185 | graph.make_output_layers()
186 |
187 | # The graph should be finalized before executing this
188 | graph.generate_blob_names()
189 | graph.add_recurrent_optionals()
190 |
191 | inputs = graph.get_input_layers()
192 | outputs = graph.get_output_layers()
193 |
194 | # check input / output names validity
195 | if input_names is not None:
196 | if isinstance(input_names, string_types):
197 | input_names = [input_names]
198 | else:
199 | input_names = ['input' + str(i + 1) for i in range(len(inputs))]
200 | if output_names is not None:
201 | if isinstance(output_names, string_types):
202 | output_names = [output_names]
203 | else:
204 | output_names = ['output' + str(i + 1) for i in range(len(outputs))]
205 |
206 | if (image_input_names is not None and
207 | isinstance(image_input_names, string_types)):
208 | image_input_names = [image_input_names]
209 |
210 | graph.reset_model_input_names(input_names)
211 | graph.reset_model_output_names(output_names)
212 |
213 | # Keras -> Core ML input dimension dictionary
214 | # (None, None) -> [1, 1, 1, 1, 1]
215 | # (None, D) -> [D] or [D, 1, 1, 1, 1]
216 | # (None, Seq, D) -> [Seq, 1, D, 1, 1]
217 | # (None, H, W, C) -> [C, H, W]
218 | # (D) -> [D]
219 | # (Seq, D) -> [Seq, 1, 1, D, 1]
220 | # (Batch, Sequence, D) -> [D]
221 |
222 | # Retrieve input shapes from model
223 | if type(model.input_shape) is list:
224 | input_dims = [filter(None, x) for x in model.input_shape]
225 | unfiltered_shapes = model.input_shape
226 | else:
227 | input_dims = [filter(None, model.input_shape)]
228 | unfiltered_shapes = [model.input_shape]
229 |
230 | for idx, dim in enumerate(input_dims):
231 | unfiltered_shape = unfiltered_shapes[idx]
232 | dim = list(dim)
233 | if len(dim) == 0:
234 | # Used to be [None, None] before filtering; indicating
235 | # unknown sequence length
236 | input_dims[idx] = tuple([1])
237 | elif len(dim) == 1:
238 | s = graph.get_successors(inputs[idx])[0]
239 | if isinstance(graph.get_keras_layer(s),
240 | _keras.layers.embeddings.Embedding):
241 | # Embedding layer's special input (None, D) where D is
242 | # actually sequence length
243 | input_dims[idx] = (1,)
244 | else:
245 | input_dims[idx] = dim # dim is just a number
246 | elif len(dim) == 2: # [Seq, D]
247 | input_dims[idx] = (dim[1],)
248 | elif len(dim) == 3: # H,W,C
249 | if (len(unfiltered_shape) > 3):
250 | # keras uses the reverse notation from us
251 | input_dims[idx] = (dim[2], dim[0], dim[1])
252 | else:
253 | # keras provided fixed batch and sequence length, so
254 | # the input was (batch, sequence, channel)
255 | input_dims[idx] = (dim[2],)
256 | else:
257 | raise ValueError(
258 | 'Input' + input_names[idx] + 'has input shape of length' +
259 | str(len(dim)))
260 |
261 | # Retrieve output shapes from model
262 | if type(model.output_shape) is list:
263 | output_dims = [filter(None, x) for x in model.output_shape]
264 | else:
265 | output_dims = [filter(None, model.output_shape[1:])]
266 |
267 | for idx, dim in enumerate(output_dims):
268 | dim = list(dim)
269 | if len(dim) == 1:
270 | output_dims[idx] = dim
271 | elif len(dim) == 2: # [Seq, D]
272 | output_dims[idx] = (dim[1],)
273 | elif len(dim) == 3:
274 | output_dims[idx] = (dim[2], dim[0], dim[1])
275 |
276 | input_types = [datatypes.Array(*dim) for dim in input_dims]
277 | output_types = [datatypes.Array(*dim) for dim in output_dims]
278 |
279 | # Some of the feature handling is sensitive about string vs unicode
280 | input_names = map(str, input_names)
281 | output_names = map(str, output_names)
282 | is_classifier = class_labels is not None
283 | if is_classifier:
284 | mode = 'classifier'
285 | else:
286 | mode = None
287 |
288 | # assuming these match
289 | input_features = list(zip(input_names, input_types))
290 | output_features = list(zip(output_names, output_types))
291 |
292 | builder = _NeuralNetworkBuilder(
293 | input_features, output_features, mode=mode
294 | )
295 |
296 | for iter, layer in enumerate(graph.layer_list):
297 | keras_layer = graph.keras_layer_map[layer]
298 | print("%d : %s, %s" % (iter, layer, keras_layer))
299 | if isinstance(keras_layer, _keras.layers.wrappers.TimeDistributed):
300 | keras_layer = keras_layer.layer
301 |
302 | converter_func = cls._get_layer_converter_fn(
303 | keras_layer, supported_layers
304 | )
305 | input_names, output_names = graph.get_layer_blobs(layer)
306 | converter_func(
307 | builder,
308 | layer,
309 | input_names,
310 | output_names,
311 | keras_layer
312 | )
313 |
314 | # Set the right inputs and outputs on the model description (interface)
315 | builder.set_input(input_names, input_dims)
316 | builder.set_output(output_names, output_dims)
317 |
318 | # Since we aren't mangling anything the user gave us, we only need to
319 | # update the model interface here
320 | builder.add_optionals(graph.optional_inputs, graph.optional_outputs)
321 |
322 | # Add classifier classes (if applicable)
323 | if is_classifier:
324 | classes_in = class_labels
325 | if isinstance(classes_in, string_types):
326 | import os
327 | if not os.path.isfile(classes_in):
328 | raise ValueError(
329 | "Path to class labels (%s) does not exist." %
330 | classes_in
331 | )
332 | with open(classes_in, 'r') as f:
333 | classes = f.read()
334 | classes = classes.splitlines()
335 | elif type(classes_in) is list: # list[int or str]
336 | classes = classes_in
337 | else:
338 | raise ValueError(
339 | 'Class labels must be a list of integers / '
340 | 'strings, or a file path'
341 | )
342 |
343 | if predicted_feature_name is not None:
344 | builder.set_class_labels(
345 | classes,
346 | predicted_feature_name=predicted_feature_name
347 | )
348 | else:
349 | builder.set_class_labels(classes)
350 |
351 | # Set pre-processing paramsters
352 | builder.set_pre_processing_parameters(
353 | image_input_names=image_input_names,
354 | is_bgr=is_bgr,
355 | red_bias=red_bias,
356 | green_bias=green_bias,
357 | blue_bias=blue_bias,
358 | gray_bias=gray_bias,
359 | image_scale=image_scale)
360 |
361 | # Convert the image outputs to actual image datatypes
362 | for output_name in output_names:
363 | if output_name in image_output_names:
364 | cls._convert_multiarray_output_to_image(
365 | builder.spec, output_name, is_bgr=is_bgr
366 | )
367 |
368 | # Return the protobuf spec
369 | spec = builder.spec
370 | return _MLModel(spec)
371 |
--------------------------------------------------------------------------------
/image_segmentation/image_segmentation/icnet.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from functools import partial
3 | import os
4 |
5 | from keras.layers import Activation
6 | from keras.layers import Conv2D
7 | from keras.layers import Add
8 | from keras.layers import MaxPooling2D
9 | from keras.layers import AveragePooling2D
10 | from keras.layers import ZeroPadding2D
11 | from keras.layers import Input
12 | from keras.layers import BatchNormalization
13 | from keras.layers import UpSampling2D
14 | from keras.models import Model
15 |
16 | from tensorflow.python.lib.io import file_io
17 | logger = logging.getLogger('icnet')
18 |
19 |
20 | class ICNetModelFactory(object):
21 | """Generates ICNet Keras Models."""
22 |
23 | @staticmethod
24 | def _light_cnn_block(
25 | out,
26 | filter_scale,
27 | block_name,
28 | strides=[1, 1, 1],
29 | include_projection=False):
30 | """Construct a light convolution block.
31 |
32 | Light convolution blocks are used to extract features at the start
33 | of a branch for a given scale in the pyramid network.
34 |
35 | Args:
36 | out - The output from a previous Keras layer
37 | filter_scale (int) - the base number of filters for the block
38 | block_name (str) - the name prefix for the block
39 | strides (optional, List[Int]) - a list of strides for each layer
40 | in the block. If a projection convolution is included, the
41 | stride is set to be the same as the first convolution
42 | include_projection (optional, bool) - if true, include a projection
43 | convolution
44 | Returns
45 | out - a keras layer output
46 | """
47 | conv_fn = partial(
48 | Conv2D,
49 | kernel_size=3,
50 | padding='same',
51 | use_bias=False,
52 | activation='relu'
53 | )
54 |
55 | out = conv_fn(
56 | filters=filter_scale,
57 | strides=strides[0],
58 | name='%s_1_3x3' % block_name)(out)
59 | out = BatchNormalization(name='%s_1_3x3_bn' % block_name)(out)
60 | out = conv_fn(
61 | filters=filter_scale,
62 | strides=strides[1],
63 | name='%s_2_3x3' % block_name)(out)
64 | out = BatchNormalization(name='%s_2_3x3_bn' % block_name)(out)
65 | out = conv_fn(
66 | filters=filter_scale * 2,
67 | strides=strides[2],
68 | name='%s_3_3x3' % block_name)(out)
69 | out = BatchNormalization(name='%s_3_3x3_bn' % block_name)(out)
70 |
71 | if include_projection:
72 | out = Conv2D(
73 | filters=filter_scale * 4,
74 | kernel_size=1,
75 | name='%s_proj' % block_name
76 | )(out)
77 | out = BatchNormalization(name='%s_proj_bn' % block_name)(out)
78 |
79 | return out
80 |
81 | @staticmethod
82 | def _inner_conv_block(
83 | out,
84 | filter_scale,
85 | block_name,
86 | strides=[1, 1, 1],
87 | dilation_rate=1):
88 | """Construct an inner convolution block.
89 |
90 | Inner convolution blocks are found repeatedly in the ICNet structure.
91 |
92 | Args:
93 | out - The output from a previous Keras layer
94 | filter_scale (int) - the base number of filters for the block
95 | block_name (str) - the name prefix for the block
96 | strides (optional, List[Int]) - a list of strides for each layer
97 | in the block. If a projection convolution is included, the
98 | stride is set to be the same as the first convolution
99 | dilation_rate (optional, Int) - a dilation rate to include atrous
100 | convolutions for certain blocks
101 |
102 | Returns
103 | out - a keras layer output
104 | """
105 | conv_fn = partial(
106 | Conv2D,
107 | activation='relu',
108 | use_bias=False,
109 | )
110 | out = conv_fn(
111 | filters=filter_scale,
112 | kernel_size=1,
113 | strides=strides[0],
114 | name='%s_1x1_reduce' % block_name)(out)
115 | out = BatchNormalization(name='%s_1x1_reduce_bn' % block_name)(out)
116 | out = ZeroPadding2D(
117 | padding=dilation_rate,
118 | name='%s_padding' % block_name)(out)
119 | out = conv_fn(
120 | filters=filter_scale,
121 | kernel_size=3,
122 | strides=strides[1],
123 | dilation_rate=dilation_rate,
124 | name='%s_3x3' % block_name)(out)
125 | out = BatchNormalization(name='%s_3x3_bn' % block_name)(out)
126 | out = conv_fn(
127 | filters=filter_scale * 4,
128 | kernel_size=1,
129 | activation=None,
130 | strides=strides[2],
131 | name='%s_1x1_increase' % block_name)(out)
132 | out = BatchNormalization(name='%s_1x1_increase_bn' % block_name)(out)
133 | return out
134 |
135 | @classmethod
136 | def _conv_block(
137 | cls,
138 | out,
139 | filter_scale,
140 | block_name,
141 | include_projection=False,
142 | strides=[1, 1, 1],
143 | dilation_rate=1):
144 | """Construct an convolution block.
145 |
146 | Convolution blocks are found repeatedly in the ICNet structure.
147 | The block is structured similarly to a residual block with multiple
148 | branches.
149 |
150 | Args:
151 | out - The output from a previous Keras layer
152 | filter_scale (int) - the base number of filters for the block
153 | block_name (str) - the name prefix for the block
154 | include_projection (optional, bool) - if true, include a projection
155 | convolution
156 | strides (optional, List[Int]) - a list of strides for each layer
157 | in the block. If a projection convolution is included, the
158 | stride is set to be the same as the first convolution
159 | dilation_rate (optional, Int) - a dilation rate to include atrous
160 | convolutions for certain blocks
161 |
162 | Returns
163 | out - a keras layer output
164 | """
165 | # Branch A
166 | if include_projection:
167 | out_a = Conv2D(
168 | filters=filter_scale * 4,
169 | kernel_size=1,
170 | use_bias=False,
171 | strides=strides[0],
172 | name='%s_1x1_proj' % block_name
173 | )(out)
174 | out_a = BatchNormalization(
175 | name='%s_1x1_proj_bn' % block_name
176 | )(out_a)
177 | else:
178 | out_a = out
179 |
180 | # Branch B
181 | out_b = cls._inner_conv_block(
182 | out,
183 | filter_scale,
184 | block_name,
185 | strides=strides,
186 | dilation_rate=dilation_rate
187 | )
188 |
189 | # Combine
190 | out = Add(name='%s_add' % block_name)([out_a, out_b])
191 | out = Activation('relu', name='%s_relu' % block_name)(out)
192 | return out
193 |
194 | @staticmethod
195 | def _cff_block(
196 | out_a,
197 | out_b,
198 | filter_scale,
199 | block_name,
200 | include_projection=False):
201 | """Construct an cascading feature fusion (CFF) block.
202 |
203 | CFF blocks are used to fuse features extracted from multiple scales.
204 |
205 | Args:
206 | out_a - The output layer from lower resoltuon branch
207 | out_b - The output layer from the higher resolution branch to be
208 | merged.
209 | filter_scale (int) - the base number of filters for the block
210 | block_name (str) - the name prefix for the block
211 | include_projection (optional, bool) - if true, include a projection
212 | convolution
213 | Returns
214 | out - a keras layer output
215 | """
216 | aux_1 = UpSampling2D(size=(2, 2), name='%s_interp' % block_name,
217 | interpolation='bilinear')(out_a)
218 | out_a = ZeroPadding2D(padding=2, name='%s_padding' % block_name)(aux_1)
219 | out_a = Conv2D(
220 | filters=filter_scale,
221 | kernel_size=3,
222 | dilation_rate=2,
223 | use_bias=False,
224 | name='%s_conv_3x3' % block_name
225 | )(out_a)
226 | out_a = BatchNormalization(name='%s_conv_bn' % block_name)(out_a)
227 |
228 | if include_projection:
229 | out_b = Conv2D(
230 | filters=filter_scale,
231 | kernel_size=1,
232 | use_bias=False,
233 | name='%s_proj' % block_name)(out_b)
234 | out_b = BatchNormalization(name='%s_proj_bn' % block_name)(out_b)
235 |
236 | out_a = Add(name='%s_sum' % block_name)([out_a, out_b])
237 | out_a = Activation('relu', name='%s_sum_relu' % block_name)(out_a)
238 |
239 | return out_a, aux_1
240 |
241 | @classmethod
242 | def build(
243 | cls,
244 | img_size,
245 | n_classes,
246 | alpha=1.0,
247 | weights_path=None,
248 | train=False,
249 | input_tensor=None):
250 | """Build an ICNet Model.
251 |
252 | Args:
253 | image_size (int): the size of each image. only square images are
254 | supported.
255 | n_classes (int): the number of output labels to predict.
256 | weights_path (str): (optional) a path to a Keras model file to
257 | load after the network is constructed. Useful for re-training.
258 | train (bool): (optional) if true, add additional output nodes to
259 | the network for training.
260 |
261 | Returns:
262 | model (keras.models.Model): A Keras model
263 | """
264 | if img_size % 384 != 0:
265 | raise Exception('`img_size` must be a multiple of 384.')
266 | logger.info('Building ICNet model.')
267 | inpt = Input(shape=(img_size, img_size, 3), tensor=input_tensor)
268 |
269 | # The full scale branch
270 | out_1 = cls._light_cnn_block(
271 | inpt,
272 | filter_scale=int(alpha * 32),
273 | strides=[2, 2, 2],
274 | include_projection=True,
275 | block_name='sub1_conv'
276 | )
277 |
278 | # The 1/2 scale branch
279 | out_2 = AveragePooling2D(pool_size=(2, 2), name='sub2_data')(inpt)
280 | out_2 = cls._light_cnn_block(
281 | out_2,
282 | filter_scale=int(alpha * 32),
283 | strides=[2, 1, 1],
284 | block_name='sub2_conv'
285 | )
286 | out_2 = MaxPooling2D(
287 | pool_size=3, strides=2, name='sub2_pool1_3x3'
288 | )(out_2)
289 |
290 | for layer_index in range(1, 4):
291 | out_2 = cls._conv_block(
292 | out_2,
293 | filter_scale=int(alpha * 32),
294 | include_projection=(layer_index == 1),
295 | block_name='sub2_conv%d_%d' % (2, layer_index)
296 | )
297 |
298 | # The third large conv block gets split off into another branch.
299 | out_2 = cls._conv_block(
300 | out_2,
301 | filter_scale=int(alpha * 64),
302 | include_projection=True,
303 | strides=[2, 1, 1],
304 | block_name='sub2_conv%d_%d' % (3, 1)
305 | )
306 |
307 | # The 1/4 scale branch
308 | out_4 = AveragePooling2D(pool_size=(2, 2), name='sub4_conv3_1')(out_2)
309 |
310 | for layer_index in range(2, 5):
311 | out_4 = cls._conv_block(
312 | out_4,
313 | filter_scale=int(alpha * 64),
314 | block_name='sub4_conv%d_%d' % (3, layer_index)
315 | )
316 |
317 | for layer_index in range(1, 7):
318 | out_4 = cls._conv_block(
319 | out_4,
320 | filter_scale=int(alpha * 128),
321 | dilation_rate=2,
322 | include_projection=(layer_index == 1),
323 | block_name='sub4_conv%d_%d' % (4, layer_index)
324 | )
325 |
326 | for sub_index in range(1, 4):
327 | out_4 = cls._conv_block(
328 | out_4,
329 | filter_scale=int(alpha * 256),
330 | dilation_rate=4,
331 | include_projection=(sub_index == 1),
332 | block_name='sub4_conv%d_%d' % (5, sub_index)
333 | )
334 | # In this version we've fixed the input dimensions to be square
335 | # We also are restricting dimsensions to be multiples of 384 which
336 | # will allow us to use standard upsampling layers for resizing.
337 | pool_height, _ = out_4.shape[1:3].as_list()
338 | pool_scale = int(img_size / 384)
339 | pool1 = AveragePooling2D(pool_size=pool_height,
340 | strides=pool_height,
341 | name='sub4_conv5_3_pool1')(out_4)
342 | pool1 = UpSampling2D(size=12 * pool_scale,
343 | name='sub4_conv5_3_pool1_interp',
344 | interpolation='bilinear')(pool1)
345 | pool2 = AveragePooling2D(pool_size=pool_height // 2,
346 | strides=pool_height // 2,
347 | name='sub4_conv5_3_pool2')(out_4)
348 | pool2 = UpSampling2D(size=6 * pool_scale,
349 | name='sub4_conv5_3_pool2_interp',
350 | interpolation='bilinear')(pool2)
351 | pool3 = AveragePooling2D(pool_size=pool_height // 3,
352 | strides=pool_height // 3,
353 | name='sub4_conv5_3_pool3')(out_4)
354 | pool3 = UpSampling2D(size=4 * pool_scale,
355 | name='sub4_conv5_3_pool3_interp',
356 | interpolation='bilinear')(pool3)
357 | pool4 = AveragePooling2D(pool_size=pool_height // 4,
358 | strides=pool_height // 4,
359 | name='sub4_conv5_3_pool4')(out_4)
360 | pool4 = UpSampling2D(size=3 * pool_scale,
361 | name='sub4_conv5_3_pool6_interp',
362 | interpolation='bilinear')(pool4)
363 |
364 | out_4 = Add(
365 | name='sub4_conv5_3_sum'
366 | )([out_4, pool1, pool2, pool3, pool4])
367 | out_4 = Conv2D(
368 | filters=int(alpha * 256),
369 | kernel_size=1,
370 | activation='relu',
371 | use_bias=False,
372 | name='sub4_conv5_4_k1')(out_4)
373 | out_4 = BatchNormalization(name='sub4_conv5_4_k1_bn')(out_4)
374 |
375 | out_2, aux_1 = cls._cff_block(
376 | out_4,
377 | out_2,
378 | int(alpha * 128),
379 | block_name='sub24_cff',
380 | include_projection=True
381 | )
382 |
383 | out_1, aux_2 = cls._cff_block(
384 | out_2,
385 | out_1,
386 | int(alpha * 128),
387 | block_name='sub12_cff'
388 | )
389 | out_1 = UpSampling2D(size=(2, 2), name='sub12_sum_interp',
390 | interpolation='bilinear')(out_1)
391 |
392 | out_1 = Conv2D(n_classes, 1, activation='softmax',
393 | name='conv6_cls')(out_1)
394 |
395 | out = UpSampling2D(size=(4, 4), name='conv6_interp',
396 | interpolation='bilinear')(out_1)
397 |
398 | if train:
399 | aux_1 = Conv2D(n_classes, 1, activation='softmax',
400 | name='sub4_out')(aux_1)
401 | aux_2 = Conv2D(n_classes, 1, activation='softmax',
402 | name='sub24_out')(aux_2)
403 | # The loss during training is generated from these three outputs.
404 | # The final output layer is not needed.
405 | model = Model(inputs=inpt, outputs=[out_1, aux_2, aux_1])
406 | else:
407 | model = Model(inputs=inpt, outputs=out)
408 |
409 | if weights_path is not None:
410 | if weights_path.startswith('gs://'):
411 | weights_path = _copy_file_from_gcs(weights_path)
412 | logger.info('Loading weights from %s.' % weights_path)
413 | model.load_weights(weights_path, by_name=True)
414 | logger.info('Done building model.')
415 |
416 | return model
417 |
418 |
419 | def _copy_file_from_gcs(file_path):
420 | """Copy a file from gcs to local machine.
421 |
422 | Args:
423 | file_path (str): a GCS url to download
424 | Returns:
425 | str: a local path to the file
426 | """
427 | logger.info('Downloading %s' % file_path)
428 | with file_io.FileIO(file_path, mode='rb') as input_f:
429 | basename = os.path.basename(file_path)
430 | local_path = os.path.join('/tmp', basename)
431 | with file_io.FileIO(local_path, mode='w+') as output_f:
432 | output_f.write(input_f.read())
433 | return local_path
434 |
--------------------------------------------------------------------------------