├── image_segmentation ├── utils │ ├── __init__.py │ ├── tfrecord2idx │ ├── deeplab_model.py │ ├── compare_models.py │ ├── tfrecord_helpers.py │ └── model_helpers.py ├── image_segmentation │ ├── __init__.py │ ├── dali_config.py │ ├── utils.py │ ├── build_data.py │ ├── dali_pipeline.py │ ├── data_generator.py │ ├── train.py │ └── icnet.py ├── config.yaml ├── .gitattributes ├── examples │ ├── living_room.jpg │ ├── example_image_and_mask.png │ ├── example_pixel_probabilities.png │ ├── icnet_768x768_living_room.h5 │ ├── icnet_768x768_living_room.zip │ └── icnet_768x768_living_room.mlmodel ├── nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl ├── requirements.txt ├── LICENSE ├── convert_to_coreml.py ├── setup.py ├── Makefile ├── coco_object_info.txt ├── objectInfo150.txt ├── README.md └── create_tfrecord_dataset.py ├── style_transfer ├── style_transfer │ ├── __init__.py │ ├── dataset_builder.py │ ├── utils.py │ ├── layers.py │ ├── layer_converters.py │ ├── train.py │ ├── models.py │ └── fritz_coreml_converter.py ├── example │ ├── dog.jpg │ ├── starry_night.jpg │ ├── stylized_dog.jpg │ ├── starry_night_results.jpg │ ├── starry_night.h5 │ ├── starry_night_256x256_025.h5 │ ├── starry_night_640x480_025.mlmodel │ ├── starry_night_256x256_small_a03.h5 │ ├── starry_night_640x480_025_optimized.pb │ └── starry_night_640x480_small_a03_q8.mlmodel ├── setup.py ├── stylize_image.py ├── requirements.txt ├── convert_to_coreml.py ├── convert_to_tfmobile.py ├── create_training_dataset.py └── README.md ├── resources ├── README.md └── AI_Landscape.md ├── create_ml_playgrounds ├── pneumonia_detector │ ├── Pneumonia.playground │ │ ├── Contents.swift │ │ └── contents.xcplayground │ ├── Pneumonia.mlmodel │ └── README.md ├── subreddit_suggester │ ├── data.json │ ├── SubredditSuggester.mlmodel │ └── SubredditSuggester.playground │ │ ├── contents.xcplayground │ │ └── Contents.swift └── README.md ├── .gitattributes ├── LICENSE ├── README.md └── .gitignore /image_segmentation/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /image_segmentation/config.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | scaleTier: BASIC_GPU 3 | -------------------------------------------------------------------------------- /style_transfer/example/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/dog.jpg -------------------------------------------------------------------------------- /image_segmentation/.gitattributes: -------------------------------------------------------------------------------- 1 | nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/starry_night.jpg -------------------------------------------------------------------------------- /style_transfer/example/stylized_dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/stylized_dog.jpg -------------------------------------------------------------------------------- /image_segmentation/examples/living_room.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/living_room.jpg -------------------------------------------------------------------------------- /style_transfer/example/starry_night_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/style_transfer/example/starry_night_results.jpg -------------------------------------------------------------------------------- /resources/README.md: -------------------------------------------------------------------------------- 1 | # Resources 2 | 3 | Additional, non-code resources for machine learning / AI. 4 | 5 | * [AI Startup Landscape](./AI_Landscape.md) 6 | -------------------------------------------------------------------------------- /image_segmentation/examples/example_image_and_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/example_image_and_mask.png -------------------------------------------------------------------------------- /image_segmentation/examples/example_pixel_probabilities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/todo/fritz-models/master/image_segmentation/examples/example_pixel_probabilities.png -------------------------------------------------------------------------------- /create_ml_playgrounds/pneumonia_detector/Pneumonia.playground/Contents.swift: -------------------------------------------------------------------------------- 1 | import CreateMLUI 2 | 3 | 4 | let builder = MLImageClassifierBuilder() 5 | builder.showInLiveView() 6 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9c522b30d709051fe24c06dd4ac27f0bd58101ce68e06fc79e1454d0424678cb 3 | size 569464 4 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night_256x256_025.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eb03d6faa1e226da19c82eb6d250d84db12a166d06a8332cfe0a7989b36bcce8 3 | size 569496 4 | -------------------------------------------------------------------------------- /create_ml_playgrounds/subreddit_suggester/data.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:be520cc19d10060552788eb6462640f631be0a845eb8f88048b14e47658d82eb 3 | size 3345068 4 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night_640x480_025.mlmodel: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4b6abac67c45d5385fc17b46f32e1cc5ed1f9107c053344ed9a9757c47aba738 3 | size 438131 4 | -------------------------------------------------------------------------------- /create_ml_playgrounds/pneumonia_detector/Pneumonia.mlmodel: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:98fecaa9cd499fec169718e2b1156c048393f8b436891df4f9e305f5846c7238 3 | size 16980 4 | -------------------------------------------------------------------------------- /image_segmentation/examples/icnet_768x768_living_room.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4d544785680f713f0091b83b938c22a56e1c8f981d1321adf4355097f9fac4c 3 | size 81300248 4 | -------------------------------------------------------------------------------- /image_segmentation/examples/icnet_768x768_living_room.zip: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:11a3dee73201e2dbeaae9112b8adb2281485d5d79ec1611dd87f3c0b74ed1eae 3 | size 98526975 4 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night_256x256_small_a03.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d3c21d53dda54dd6df0abc6d3b0c7637bc64c7fb9997b3fa29c97fbdc1bd61e1 3 | size 153272 4 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night_640x480_025_optimized.pb: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6262e219e71a9ebb201a13400d817b37625bd7231aedd20e7c22abc45c5d7506 3 | size 478672 4 | -------------------------------------------------------------------------------- /image_segmentation/examples/icnet_768x768_living_room.mlmodel: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3f3009bb2bb3b056b707527b4f54aaa8f1df28a46da7e03e1922379f540bf15e 3 | size 26938492 4 | -------------------------------------------------------------------------------- /style_transfer/example/starry_night_640x480_small_a03_q8.mlmodel: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bc44cfa2aa8c056ff5fac3c83dd80c57731840ed5e68b6272cda34b6490fbfa4 3 | size 16876 4 | -------------------------------------------------------------------------------- /create_ml_playgrounds/subreddit_suggester/SubredditSuggester.mlmodel: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7d409517ad9098ba80a33601df3d542a84edc9bb9f1d0494ff8862a96387cb6c 3 | size 1100617 4 | -------------------------------------------------------------------------------- /image_segmentation/nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5d9a9865809b1a6f91a1c6033d6ba881a23d29e424f1bdb2b19e3b01177977d4 3 | size 17489870 4 | -------------------------------------------------------------------------------- /create_ml_playgrounds/pneumonia_detector/README.md: -------------------------------------------------------------------------------- 1 | # Detecting Pneumonia in an iOS App with Create ML 2 | 3 | Swift playground which is used to train image classifier for this [blog post](https://heartbeat.fritz.ai/detecting-pneumonia-in-an-ios-app-with-create-ml-5cff2a60a3d). 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.h5 filter=lfs diff=lfs merge=lfs -text 2 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 3 | *.pb filter=lfs diff=lfs merge=lfs -text 4 | *.zip filter=lfs diff=lfs merge=lfs -text 5 | *.whl filter=lfs diff=lfs merge=lfs -text 6 | *.json filter=lfs diff=lfs merge=lfs -text 7 | -------------------------------------------------------------------------------- /create_ml_playgrounds/pneumonia_detector/Pneumonia.playground/contents.xcplayground: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /create_ml_playgrounds/subreddit_suggester/SubredditSuggester.playground/contents.xcplayground: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /image_segmentation/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.2.4 2 | h5py==2.7.1 3 | numpy==1.14.3 4 | matplotlib==2.2.2 5 | scikit-image==0.13.1 6 | Pillow==5.1.0 7 | six==1.10.0 8 | # Forked coremltools which includes fix for bilinear upsampling. 9 | # Update this after it has been merged into master. 10 | -e git+git@github.com:ghop02/coremltools.git@289-add-keras-bilinear-upsampling#egg=coremltools 11 | -------------------------------------------------------------------------------- /style_transfer/setup.py: -------------------------------------------------------------------------------- 1 | """Setup script for style_transfer.""" 2 | 3 | from setuptools import find_packages 4 | from setuptools import setup 5 | 6 | 7 | REQUIRED_PACKAGES = ['h5py', 'keras==2.1.2', 'Pillow'] 8 | 9 | setup( 10 | name='style_transfer', 11 | version='1.0', 12 | install_requires=REQUIRED_PACKAGES, 13 | include_package_data=True, 14 | packages=[p for p in find_packages() if p.startswith('style_transfer')], 15 | description='Fritz Style Transfer Library', 16 | ) 17 | -------------------------------------------------------------------------------- /create_ml_playgrounds/README.md: -------------------------------------------------------------------------------- 1 | # Create ML Playgrounds 2 | A collection of Swift playgrounds using Create ML to train Core ML models. 3 | 4 | ## Playgrounds 5 | 6 | * [Subreddit Suggester](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): Reduce the number of clicks required for submitting posts to Reddit by automatically suggesting a subreddit based on the post's title. 7 | * [Pneumonia Detector](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): Classify X-ray images to detect pediatric pneumonia. 8 | -------------------------------------------------------------------------------- /image_segmentation/utils/tfrecord2idx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import struct 4 | 5 | if len(sys.argv) < 3: 6 | print("Usage: tfrecord2idx ") 7 | exit() 8 | 9 | f = open(sys.argv[1], 'rb') 10 | idx = open(sys.argv[2], 'w') 11 | 12 | while True: 13 | current = f.tell() 14 | try: 15 | # length 16 | byte_len = f.read(8) 17 | if byte_len == '': 18 | break 19 | # crc 20 | f.read(4) 21 | proto_len = struct.unpack('q', byte_len)[0] 22 | # proto 23 | f.read(proto_len) 24 | # crc 25 | f.read(4) 26 | idx.write(str(current) + ' ' + str(f.tell() - current) + '\n') 27 | except: 28 | print("Not a valid TFRecord file") 29 | break 30 | 31 | f.close() 32 | idx.close() 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Fritz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /image_segmentation/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Fritz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/dali_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger(__name__) 4 | 5 | 6 | def build_config(**updates): 7 | defaults = { 8 | 'hue_min': -30, 9 | 'hue_max': 30, 10 | 'zoom_scale': 1.3, 11 | 'rotate_angle_min': -45, 12 | 'rotate_angle_max': 45, 13 | 'crop_x_max': 0.2, 14 | 'crop_y_max': 0.2, 15 | 'contrast_min': 0.45, 16 | 'contrast_max': 1.5, 17 | 'saturation_min': 0.4, 18 | 'saturation_max': 2.0, 19 | 'brightness_min': 0.35, 20 | 'brightness_max': 1.5, 21 | } 22 | for key in updates: 23 | if key not in defaults: 24 | raise Exception("Augmentation Config %s not found." % key) 25 | 26 | defaults.update(**updates) 27 | 28 | return defaults 29 | 30 | 31 | class DaliConfig(object): 32 | """Wrapper for Dali augmentation yaml config parameters. """ 33 | def __init__(self, **updates): 34 | 35 | self.__dict__ = build_config(**updates) 36 | 37 | def summarize(self): 38 | logger.info('Dali Image Augmentation Parameters') 39 | logger.info('==================================') 40 | for key, value in self.__dict__.items(): 41 | logger.info(' %s: %s', key, value) 42 | -------------------------------------------------------------------------------- /image_segmentation/convert_to_coreml.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | import coremltools 5 | import keras 6 | 7 | from image_segmentation.icnet import ICNetModelFactory 8 | 9 | 10 | def convert(argv): 11 | parser = argparse.ArgumentParser( 12 | description='Convert a Keras ICNet model to Core ML' 13 | ) 14 | parser.add_argument( 15 | 'keras_checkpoint', nargs='?', type=str, 16 | help='a Keras model checkpoint to load and convert.' 17 | ) 18 | parser.add_argument( 19 | '--alpha', type=float, required=True, 20 | help='The width paramter of the network.') 21 | parser.add_argument( 22 | 'mlmodel_output', nargs='?', type=str, 23 | help='a .mlmodel output file.' 24 | ) 25 | 26 | args = parser.parse_args(argv) 27 | 28 | original_keras_model = keras.models.load_model(args.keras_checkpoint) 29 | img_size = original_keras_model.input_shape[1] 30 | num_classes = original_keras_model.output_shape[0][-1] 31 | 32 | keras_model = ICNetModelFactory.build( 33 | img_size, 34 | num_classes, 35 | alpha=args.alpha, 36 | weights_path=args.keras_checkpoint, 37 | train=False 38 | ) 39 | 40 | mlmodel = coremltools.converters.keras.convert( 41 | keras_model, 42 | input_names='image', 43 | image_input_names='image', 44 | image_scale=2.0 / 255.0, 45 | red_bias=-1.0, 46 | green_bias=-1.0, 47 | blue_bias=-1.0, 48 | output_names='output' 49 | ) 50 | 51 | mlmodel.save(args.mlmodel_output) 52 | 53 | 54 | if __name__ == '__main__': 55 | convert(sys.argv[1:]) 56 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/dataset_builder.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import tensorflow as tf 4 | 5 | logger = logging.getLogger('trainer') 6 | 7 | 8 | class DatasetBuilder(object): 9 | """Build a TFRecord dataset for training.""" 10 | 11 | @staticmethod 12 | def _resize_fn(images, image_size): 13 | return tf.image.resize_images( 14 | images, 15 | image_size, 16 | method=tf.image.ResizeMethod.BICUBIC 17 | ) 18 | 19 | @staticmethod 20 | def _decode_example(example_proto): 21 | features = { 22 | "image/encoded": tf.FixedLenFeature( 23 | (), tf.string, default_value="" 24 | ) 25 | } 26 | parsed_features = tf.parse_single_example(example_proto, features) 27 | image = tf.image.decode_jpeg( 28 | parsed_features["image/encoded"], 29 | channels=3) 30 | return image 31 | 32 | @classmethod 33 | def build(cls, filename, batch_size, image_size): 34 | """Build a TensorFlow dataset from images. 35 | 36 | Args: 37 | filename (str) - a filename of tfrecords to load 38 | batch_size (int) - the batch size for the iterator 39 | image_size ((int, int)) - resize all images to a single size 40 | 41 | Returns 42 | dataset - a tfrecord dataset 43 | """ 44 | logger.info('Creating dataset from: %s' % filename) 45 | dataset = tf.data.TFRecordDataset(filename) 46 | dataset = dataset.map(cls._decode_example) 47 | dataset = dataset.map(lambda x: cls._resize_fn(x, image_size)) 48 | dataset = dataset.batch(batch_size) 49 | dataset = dataset.repeat() # Repeat forever 50 | return dataset 51 | -------------------------------------------------------------------------------- /image_segmentation/setup.py: -------------------------------------------------------------------------------- 1 | """Setup script for image_segmentation.""" 2 | 3 | import logging 4 | import subprocess 5 | from setuptools import find_packages 6 | from setuptools import setup 7 | from setuptools.command.install import install 8 | 9 | 10 | REQUIRED_PACKAGES = [ 11 | 'h5py', 12 | 'keras==2.2.4', 13 | 'Pillow', 14 | 'matplotlib', 15 | 'google-cloud-storage', 16 | ] 17 | 18 | 19 | class CustomCommands(install): 20 | """A setuptools Command class able to run arbitrary commands.""" 21 | 22 | def run_custom_command(self, command_list): 23 | p = subprocess.Popen( 24 | command_list, 25 | stdin=subprocess.PIPE, 26 | stdout=subprocess.PIPE, 27 | stderr=subprocess.STDOUT) 28 | # Can use communicate(input='y\n'.encode()) if the command run requires 29 | # some confirmation. 30 | stdout_data, _ = p.communicate() 31 | logging.info('Log command output: %s', stdout_data) 32 | if p.returncode != 0: 33 | raise RuntimeError('Command %s failed: exit code: %s' % 34 | (command_list, p.returncode)) 35 | 36 | def run(self): 37 | self.run_custom_command(['apt-get', 'update']) 38 | self.run_custom_command([ 39 | 'apt-get', 'install', '-y', 'python-tk' 40 | ]) 41 | install.run(self) 42 | 43 | 44 | setup( 45 | name='image_segmentation', 46 | version='1.0', 47 | install_requires=REQUIRED_PACKAGES, 48 | include_package_data=True, 49 | packages=[ 50 | p for p in find_packages() 51 | if p.startswith('image_segmentation') or p.startswith('utils') 52 | ], 53 | description='Fritz Style Image Segmentation Library', 54 | cmdclass={ 55 | 'install': CustomCommands, 56 | } 57 | ) 58 | -------------------------------------------------------------------------------- /create_ml_playgrounds/subreddit_suggester/SubredditSuggester.playground/Contents.swift: -------------------------------------------------------------------------------- 1 | import CreateMLUI 2 | import CreateML 3 | import Foundation 4 | 5 | let dataFilename = "/Users/jltoole/fritz/fritz-createml-examples/subreddit_title_classifier/popular_data_top_year.json" 6 | let data = try MLDataTable(contentsOf: URL(fileURLWithPath: dataFilename)) 7 | print(data.description) 8 | 9 | let (trainingData, testingData) = data.randomSplit(by: 0.8, seed: 5) 10 | 11 | let subredditClassifier = try MLTextClassifier(trainingData: trainingData, 12 | textColumn: "text", 13 | labelColumn: "label") 14 | 15 | // Training accuracy as a percentage 16 | let trainingAccuracy = (1.0 - subredditClassifier.trainingMetrics.classificationError) * 100 17 | // Validation accuracy as a percentage 18 | let validationAccuracy = (1.0 - subredditClassifier.validationMetrics.classificationError) * 100 19 | print("Training Accuracy: \(trainingAccuracy), Validation Accuracy: \(validationAccuracy)") 20 | 21 | let evaluationMetrics = subredditClassifier.evaluation(on: testingData) 22 | 23 | // Evaluation accuracy as a percentage 24 | let evaluationAccuracy = (1.0 - evaluationMetrics.classificationError) * 100 25 | print("Evaluation Accuracy: \(evaluationAccuracy)") 26 | 27 | let title = "Saw this good boy at the park today with TensorFlow." 28 | let predictedSubreddit = try subredditClassifier.prediction(from: title) 29 | print(predictedSubreddit) 30 | 31 | let metadata = MLModelMetadata(author: "Jameson Toole", 32 | shortDescription: "Predict which subreddit a post should go in based on a title.", 33 | version: "1.0") 34 | 35 | try subredditClassifier.write(to: URL(fileURLWithPath: "/Users/jltoole/fritz/fritz-createml-examples/subreddit_title_classifier/subredditClassifier.mlmodel"), 36 | metadata: metadata) 37 | 38 | testingData. 39 | -------------------------------------------------------------------------------- /style_transfer/stylize_image.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import keras 3 | import logging 4 | import numpy 5 | import PIL.Image 6 | 7 | import keras_contrib 8 | 9 | from style_transfer import layers 10 | from style_transfer import utils 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger('stylize_image') 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser( 18 | description='Stylize an image using a trained model.' 19 | ) 20 | 21 | parser.add_argument( 22 | '--input-image', type=str, required=True, 23 | help='An image to stylize.' 24 | ) 25 | parser.add_argument( 26 | '--output-image', type=str, required=True, 27 | help='An output file for the stylized image.' 28 | ) 29 | parser.add_argument( 30 | '--model-checkpoint', type=str, required=True, 31 | help='Checkpoint from a trained Style Transfer Network.' 32 | ) 33 | 34 | args = parser.parse_args() 35 | 36 | logger.info('Loading model from %s' % args.model_checkpoint) 37 | custom_objects = { 38 | 'InstanceNormalization': 39 | keras_contrib.layers.normalization.InstanceNormalization, 40 | 'DeprocessStylizedImage': layers.DeprocessStylizedImage 41 | } 42 | transfer_net = keras.models.load_model( 43 | args.model_checkpoint, 44 | custom_objects=custom_objects 45 | ) 46 | 47 | image_size = transfer_net.input_shape[1:3] 48 | 49 | inputs = [transfer_net.input, keras.backend.learning_phase()] 50 | outputs = [transfer_net.output] 51 | 52 | transfer_style = keras.backend.function(inputs, outputs) 53 | 54 | input_image = utils.load_image( 55 | args.input_image, 56 | image_size[0], 57 | image_size[1], 58 | expand_dims=True 59 | ) 60 | output_image = transfer_style([input_image, 1])[0] 61 | output_image = PIL.Image.fromarray(numpy.uint8(output_image[0])) 62 | output_image.save(args.output_image) 63 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/utils.py: -------------------------------------------------------------------------------- 1 | """Summary. 2 | 3 | Attributes: 4 | logger (TYPE): Description 5 | """ 6 | import io 7 | import logging 8 | import os 9 | 10 | import PIL.Image 11 | import numpy 12 | from tensorflow.python.lib.io import file_io 13 | 14 | 15 | logger = logging.getLogger('utils') 16 | 17 | 18 | def load_image( 19 | filename, 20 | height, 21 | width, 22 | expand_dims=False): 23 | """Load an image and transform it to a specific size. 24 | 25 | Optionally, preprocess the image through the VGG preprocessor. 26 | 27 | Args: 28 | filename (TYPE): Description 29 | height (TYPE): Description 30 | width (TYPE): Description 31 | expand_dims (bool, optional): Description 32 | filename - an image file to load 33 | height - the height of the transformed image 34 | width - the width of the transformed image 35 | vgg_preprocess - if True, preprocess the image for a VGG network. 36 | expand_dims - Add an addition dimension (B, H, W, C), useful for 37 | feeding models. 38 | 39 | Returns: 40 | img - a numpy array representing the image. 41 | """ 42 | img = file_io.read_file_to_string(filename, binary_mode=True) 43 | img = PIL.Image.open(io.BytesIO(img)) 44 | img = img.resize((width, height), resample=PIL.Image.BILINEAR) 45 | img = numpy.array(img)[:, :, :3] 46 | 47 | if expand_dims: 48 | img = numpy.expand_dims(img, axis=0) 49 | 50 | return img 51 | 52 | 53 | def copy_file_from_gcs(file_path): 54 | """Copy a file from gcs to local machine. 55 | 56 | Args: 57 | file_path (str): a GCS url to download 58 | 59 | Returns: 60 | str: a local path to the file 61 | """ 62 | logger.info('Downloading %s' % file_path) 63 | with file_io.FileIO(file_path, mode='rb') as input_f: 64 | basename = os.path.basename(file_path) 65 | with file_io.FileIO(basename, mode='w+') as output_f: 66 | output_f.write(input_f.read()) 67 | return basename 68 | -------------------------------------------------------------------------------- /style_transfer/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.2.0 2 | appnope==0.1.0 3 | astor==0.6.2 4 | awscli==1.14.64 5 | backcall==0.1.0 6 | bleach==1.5.0 7 | botocore==1.9.17 8 | certifi==2018.4.16 9 | chardet==3.0.4 10 | colorama==0.3.7 11 | coremltools==0.8 12 | cycler==0.10.0 13 | Cython==0.28.2 14 | decorator==4.3.0 15 | docutils==0.14 16 | easydict==1.7 17 | entrypoints==0.2.3 18 | gast==0.2.0 19 | graphviz==0.8.3 20 | grpcio==1.11.0 21 | h5py==2.7.1 22 | html5lib==0.9999999 23 | idna==2.6 24 | imgaug==0.2.5 25 | ipykernel==4.8.2 26 | ipython==6.3.1 27 | ipython-genutils==0.2.0 28 | ipywidgets==7.2.1 29 | jedi==0.12.0 30 | Jinja2==2.10 31 | jmespath==0.9.3 32 | jsonschema==2.6.0 33 | jupyter==1.0.0 34 | jupyter-client==5.2.3 35 | jupyter-console==5.2.0 36 | jupyter-core==4.4.0 37 | Keras==2.1.6 38 | Keras-Applications==1.0.2 39 | keras-contrib==2.0.8 40 | Keras-Preprocessing==1.0.1 41 | kiwisolver==1.0.1 42 | lxml==4.2.3 43 | Markdown==2.6.11 44 | MarkupSafe==1.0 45 | matplotlib==2.2.2 46 | mistune==0.8.3 47 | mxnet==1.1.0.post0 48 | nbconvert==5.3.1 49 | nbformat==4.4.0 50 | networkx==2.1 51 | notebook>=5.7.2 52 | numpy==1.14.3 53 | opencv-contrib-python==3.4.0.12 54 | pandas==0.22.0 55 | pandocfilters==1.4.2 56 | parso==0.2.0 57 | pexpect==4.5.0 58 | pickleshare==0.7.4 59 | Pillow==5.1.0 60 | prettytable==0.7.2 61 | prometheus-client==0.3.0 62 | prompt-toolkit==1.0.15 63 | protobuf==3.5.2.post1 64 | ptyprocess==0.5.2 65 | pyasn1==0.4.2 66 | pycocotools==2.0.0 67 | pydot==1.2.4 68 | Pygments==2.2.0 69 | pyparsing==2.2.0 70 | python-dateutil==2.6.1 71 | pytz==2018.4 72 | PyWavelets==0.5.2 73 | PyYAML>=4.2b1 74 | pyzmq==17.0.0 75 | qtconsole==4.3.1 76 | requests==2.20.0 77 | rsa==3.4.2 78 | s3transfer==0.1.13 79 | scikit-image==0.13.1 80 | scipy==1.1.0 81 | seaborn==0.8.1 82 | Send2Trash==1.5.0 83 | simplegeneric==0.8.1 84 | six==1.10.0 85 | tensorboard==1.9.0 86 | tensorflow==1.9.0 87 | termcolor==1.1.0 88 | terminado==0.8.1 89 | testpath==0.3.1 90 | tfcoreml==0.2.0 91 | tornado==5.0.2 92 | traitlets==4.3.2 93 | turicreate==4.3.2 94 | urllib3>=1.23 95 | wcwidth==0.1.7 96 | webencodings==0.5.1 97 | Werkzeug==0.14.1 98 | widgetsnbextension==3.2.1 99 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/layers.py: -------------------------------------------------------------------------------- 1 | import keras 2 | 3 | 4 | class VGGNormalize(keras.layers.Layer): 5 | """A custom layer to normalize an image for input into a VGG model. 6 | 7 | This consists of swapping channel order and centering pixel values. 8 | 9 | Centering values come from: 10 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py # NOQA 11 | """ 12 | 13 | def __init__(self, **kwargs): 14 | """Initialize the layer. 15 | 16 | Args: 17 | **kwargs - arguments passed to the Keras layer base. 18 | """ 19 | super(VGGNormalize, self).__init__(**kwargs) 20 | # work around for a bug introduced in diffences between the tf.keras and keras APIs 21 | self.outbound_nodes = self._outbound_nodes 22 | 23 | def build(self, input_shape): 24 | """Build the layer.""" 25 | pass 26 | 27 | def call(self, x, reverse_channels=True): 28 | """Apply the layer. 29 | 30 | Args: 31 | x - an input tensor. 32 | reverse_channels - if True, reverse the channel order 33 | """ 34 | # Swap channel order: 'RGB'->'BGR' 35 | if reverse_channels: 36 | x = x[:, :, :, ::-1] 37 | 38 | # Center pixel values. Technically each channel should have its 39 | # own center value, but the tensor computation is annoying so we'll 40 | # just center them all with the same value. 41 | x -= 120.0 42 | 43 | return x 44 | 45 | 46 | class DeprocessStylizedImage(keras.layers.Layer): 47 | """A layer to deprocess style transfer layer output. 48 | 49 | The style transfer network outputs an image where pixel values are 50 | between -1 and 1 due to a tanh activation. This layer converts that back 51 | to normal values between 0 and 255. 52 | """ 53 | 54 | def __init__(self, **kwargs): 55 | """Initialize the layer. 56 | 57 | Args: 58 | **kwargs - arguments passed to the Keras layer base. 59 | """ 60 | super(DeprocessStylizedImage, self).__init__(**kwargs) 61 | 62 | def build(self, input_shape): 63 | """Build the layer.""" 64 | pass 65 | 66 | def call(self, x): 67 | """Apply the layer.""" 68 | return (x + 1.0) * 127.5 69 | -------------------------------------------------------------------------------- /image_segmentation/Makefile: -------------------------------------------------------------------------------- 1 | 2 | download: 3 | ./download_and_convert_ade20k.sh 4 | 5 | create-training-data: 6 | mkdir -p data/${LABEL_SET} 7 | python create_tfrecord_dataset.py \ 8 | -i data/ADEChallengeData2016/images/training/ \ 9 | -a data/ADEChallengeData2016/annotations/training/ \ 10 | -o data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \ 11 | -l data/ADEChallengeData2016/objectInfo150.txt \ 12 | -w "person, individual, someone, somebody, mortal, soul|house:building, edifice:house:skyscraper|sky|car, auto, automobile, machine, motorcar:bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle:truck, motortruck:van|bicycle, bike, wheel, cycle:minibike, motorbike" \ 13 | -t 0.20 14 | 15 | upload-data: 16 | gsutil cp data/${LABEL_SET}/* gs://${GCS_BUCKET}/data/${LABEL_SET}/ 17 | 18 | 19 | train-local-refine: 20 | python -m image_segmentation.train \ 21 | -d data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \ 22 | -l data/${LABEL_SET}/labels.txt \ 23 | -n 10000 \ 24 | -s 768 \ 25 | -a 1 \ 26 | --steps-per-epoch 100 \ 27 | --batch-size 5 \ 28 | --lr 0.0001 \ 29 | --fine-tune-checkpoint data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_rf.h5 \ 30 | -o data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_rf.h5 \ 31 | --refine 32 | 33 | train-local: 34 | python -m image_segmentation.train \ 35 | --data data/combined2.tfrecord \ 36 | --use-dali \ 37 | -l data/${LABEL_SET}/labels.txt \ 38 | -n 500000 \ 39 | -s 768 \ 40 | -a 1 \ 41 | --batch-size 12 \ 42 | --steps-per-epoch 2500 \ 43 | --parallel-calls 4 \ 44 | --lr 0.0001 \ 45 | --fine-tune-checkpoint data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_1_fine.h5 \ 46 | --add-noise \ 47 | --model-name people_with_noise 48 | 49 | 50 | train-cloud: 51 | python setup.py sdist 52 | gcloud ml-engine jobs submit training `whoami`_image_segmentation_`date +%s` \ 53 | --runtime-version 1.9 \ 54 | --job-dir=gs://${GCS_BUCKET} \ 55 | --packages dist/image_segmentation-1.0.tar.gz,nvidia_dali-0.4.1-38228-cp27-cp27mu-manylinux1_x86_64.whl \ 56 | --module-name image_segmentation.train \ 57 | --region us-central1 \ 58 | --config config.yaml \ 59 | -- \ 60 | -d gs://fritz-data-sandbox/ADEChallengeData2016/people/people_data.tfrecord \ 61 | -l gs://fritz-data-sandbox/ADEChallengeData2016/people/labels.txt \ 62 | --use-dali \ 63 | -n 5000 \ 64 | -s 768 \ 65 | -a 1 \ 66 | --batch-size 12 \ 67 | --steps-per-epoch 250 \ 68 | --parallel-calls 4 \ 69 | --lr 0.001 \ 70 | --add-noise \ 71 | --model-name ${MODEL_NAME} \ 72 | --gcs-bucket gs://${GCS_BUCKET}/train 73 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/layer_converters.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | """Functions to convert custom Keras layers to equalivent Core ML Layers. 4 | 5 | Each of these functions must conform to the spec set by apple here: 6 | https://github.com/apple/coremltools/blob/master/coremltools/converters/keras/_layers2.py 7 | """ 8 | 9 | 10 | def convert_instancenormalization( 11 | builder, 12 | layer, 13 | input_names, 14 | output_names, 15 | keras_layer): 16 | """ 17 | Convert InstanceNormalization layer from to coreml. 18 | 19 | This conforms to the Core ML layer spec. 20 | 21 | Parameters 22 | ---------- 23 | keras_layer: layer 24 | A keras layer object. 25 | 26 | builder: NeuralNetworkBuilder 27 | A neural network builder object. 28 | """ 29 | input_name, output_name = (input_names[0], output_names[0]) 30 | nb_channels = keras_layer.get_weights()[0].shape[0] 31 | 32 | # Set parameters 33 | # Parameter arrangement in Keras: gamma, beta, mean, variance 34 | idx = 0 35 | gamma, beta = None, None 36 | if keras_layer.scale: 37 | gamma = keras_layer.get_weights()[idx] 38 | idx += 1 39 | if keras_layer.center: 40 | beta = keras_layer.get_weights()[idx] 41 | idx += 1 42 | 43 | epsilon = keras_layer.epsilon or 1e-5 44 | 45 | builder.add_batchnorm( 46 | name=layer, 47 | channels=nb_channels, 48 | gamma=gamma, 49 | beta=beta, 50 | compute_mean_var=True, 51 | instance_normalization=True, 52 | input_name=input_name, 53 | output_name=output_name, 54 | epsilon=epsilon 55 | ) 56 | 57 | 58 | def convert_deprocessstylizedimage( 59 | builder, 60 | layer, 61 | input_names, 62 | output_names, 63 | keras_layes): 64 | """Convert the DeprocessStylizedImage layer type to Core ML. 65 | 66 | This simply takes the output of the tanh activation layer and scales 67 | values to conform to typical image RGB values. 68 | """ 69 | input_name, output_name = (input_names[0], output_names[0]) 70 | 71 | # Apple's scale layer performs the following math 72 | # y = w * x + b 73 | # So to match the keras model's deprocessing layer y = (x + 1) * 127.5 74 | # We can set the following matrices 75 | scale = 127.5 76 | w = numpy.array([scale, scale, scale]) 77 | b = numpy.array([scale, scale, scale]) 78 | 79 | builder.add_scale( 80 | name=input_name, 81 | W=w, 82 | b=b, 83 | has_bias=True, 84 | shape_scale=w.shape, 85 | shape_bias=b.shape, 86 | input_name=input_name, 87 | output_name=output_name 88 | ) 89 | -------------------------------------------------------------------------------- /style_transfer/convert_to_coreml.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import keras_contrib 3 | import logging 4 | import sys 5 | 6 | from style_transfer import layer_converters 7 | from style_transfer import layers 8 | from style_transfer import models 9 | from style_transfer.fritz_coreml_converter import FritzCoremlConverter 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger('convert_to_coreml') 13 | 14 | 15 | def main(argv): 16 | 17 | parser = argparse.ArgumentParser( 18 | description='Stylize an image using a trained model.' 19 | ) 20 | parser.add_argument( 21 | '--keras-checkpoint', type=str, required=True, 22 | help='Weights from a trained Style Transfer Network.' 23 | ) 24 | parser.add_argument( 25 | '--alpha', type=float, required=True, 26 | help='The width multiplier of the network.' 27 | ) 28 | parser.add_argument( 29 | '--coreml-model', type=str, required=True, 30 | help='A CoreML output file to save to' 31 | ) 32 | parser.add_argument( 33 | '--image-size', type=str, default='640,480', 34 | help='The size of input and output of the final Core ML model: H,W' 35 | ) 36 | parser.add_argument( 37 | '--use-small-network', action='store_true', 38 | help=('Use a very small network architecture that works in real time ' 39 | 'on some mobile devices using only CPU') 40 | ) 41 | 42 | args = parser.parse_args(argv) 43 | 44 | image_size = [int(dim) for dim in args.image_size.split(',')] 45 | # Map custom layers to their custom coreml converters 46 | custom_layers = { 47 | keras_contrib.layers.normalization.InstanceNormalization: layer_converters.convert_instancenormalization, # NOQA 48 | layers.DeprocessStylizedImage: layer_converters.convert_deprocessstylizedimage # NOQA 49 | } 50 | 51 | logger.info('Loading model weights from %s' % args.keras_checkpoint) 52 | 53 | if args.use_small_network: 54 | model = models.SmallStyleTransferNetwork.build( 55 | image_size, 56 | alpha=args.alpha, 57 | checkpoint_file=args.keras_checkpoint 58 | ) 59 | else: 60 | model = models.StyleTransferNetwork.build( 61 | image_size, 62 | alpha=args.alpha, 63 | checkpoint_file=args.keras_checkpoint 64 | ) 65 | 66 | fritz_converter = FritzCoremlConverter() 67 | mlmodel = fritz_converter.convert_keras( 68 | model, 69 | input_names=['image'], 70 | image_input_names=['image'], 71 | output_names=['stylizedImage'], 72 | image_output_names=['stylizedImage'], 73 | custom_layers=custom_layers 74 | ) 75 | logger.info('Saving .mlmodel to %s' % args.coreml_model) 76 | mlmodel.save(args.coreml_model) 77 | 78 | 79 | if __name__ == '__main__': 80 | main(sys.argv[1:]) 81 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as pyplot 2 | import numpy 3 | import skimage.transform 4 | 5 | 6 | def plot_image_and_mask(img, mask, alpha=0.6, deprocess_func=None, 7 | reference_mask=None, 8 | show_original_image=True, 9 | small=False): 10 | """Plot an image and overlays a transparent segmentation mask. 11 | 12 | Args: 13 | img (arr): the image data to plot 14 | mask (arr): the segmentation mask 15 | alpha (float, optional): the alpha value of the segmentation mask. 16 | small: If true, output small figure 17 | 18 | Returns: 19 | pyplot.plot: a plot 20 | """ 21 | max_mask = numpy.argmax(mask, axis=-1) 22 | 23 | rows, columns = 1, 1 24 | if show_original_image: 25 | columns += 1 26 | if reference_mask is not None: 27 | columns += 1 28 | 29 | fig = pyplot.figure() 30 | 31 | if deprocess_func: 32 | img = deprocess_func(img) 33 | 34 | # Add Results plot 35 | column_index = 1 36 | fig.add_subplot(rows, columns, column_index) 37 | 38 | pyplot.imshow(img.astype(int)) 39 | pyplot.imshow( 40 | skimage.transform.resize( 41 | max_mask, 42 | img.shape[:2], 43 | order=0), 44 | alpha=alpha) 45 | 46 | if reference_mask is not None: 47 | column_index += 1 48 | fig.add_subplot(rows, columns, column_index) 49 | pyplot.imshow(img.astype(int)) 50 | pyplot.imshow( 51 | skimage.transform.resize( 52 | reference_mask[:, :, 0], 53 | img.shape[:2], 54 | order=0), 55 | alpha=alpha) 56 | 57 | if show_original_image: 58 | column_index += 1 59 | fig.add_subplot(rows, columns, column_index) 60 | pyplot.imshow(img.astype('uint8')) 61 | 62 | if small: 63 | fig.set_size_inches(columns * 5, 5) 64 | else: 65 | fig.set_size_inches(columns * 10, 10) 66 | 67 | return fig 68 | 69 | 70 | def plot_pixel_probabilities(probabilities, class_labels, subplot=None): 71 | """Plot probabilities that each pixel belows to a given class. 72 | 73 | This creates a subplot for each class and plots a heatmap of 74 | probabilities that each pixel belongs to each class. 75 | 76 | Args: 77 | probabilities (arr): an array of class probabilities for each pixel 78 | class_labels (List[str]): the labels for each class 79 | 80 | Returns: 81 | TYPE: Description 82 | """ 83 | num_classes = probabilities.shape[-1] 84 | total_items = num_classes + (1 if subplot else 0) 85 | columns = 4 86 | rows = numpy.ceil(total_items / 4) 87 | fig = pyplot.figure(figsize=(12, rows * 4)) 88 | 89 | if subplot: 90 | fig.add_subplot(subplot) 91 | 92 | for cidx in range(num_classes): 93 | ax = fig.add_subplot(rows, columns, cidx + 1) 94 | ax.imshow(probabilities[:, :, cidx], vmin=0, vmax=1.0) 95 | ax.set_title(class_labels[cidx]) 96 | fig.tight_layout() 97 | return fig 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fritz Models 2 | A collection of machine and deep learning models designed to run on mobile devices. 3 | 4 | Models in this repository contain code and utility for training models as well as converting them to mobile-friendly formats like Core ML, TensorFlow Mobile, and TensorFlow Lite. 5 | 6 | ## Update: 12/26/2018 7 | For convenience, we've consolodated a few open source projects into a single repository. `fritz-style-transfer` has been renamed `fritz-models`. Have no fear, all of the code for style transfer lives in the `style_transfer` sub-directory. 8 | 9 | ## Models 10 | 11 | * [Style Transfer](https://github.com/fritzlabs/fritz-models/tree/master/style_transfer): Transform images into works of art by transfering the style of one image onto the content of another. 12 | * [Image Segmentation](https://github.com/fritzlabs/fritz-models/tree/master/image_segmentation): Semantic segmentation of images. Assign a value to each pixel of an image corresponding to the type of object it belongs to. 13 | * [Create ML Playgrounds](https://github.com/fritzlabs/fritz-models/tree/master/create_ml_playgrounds): A series of playgrounds for training models with Apple's Create ML tool 14 | 15 | Don't see the model you're looking for? Open an issue and let us know! 16 | 17 | ## Add to your app 18 | To see live demonstrations of these models running on-device, the Heartbeat App is available in both the [App Store](https://itunes.apple.com/us/app/heartbeat-by-fritz/id1325206416?mt=8) ([source code](https://github.com/fritzlabs/heartbeat-ios)) and [Play Store](https://play.google.com/store/apps/details?id=ai.fritz.heartbeat) ([source code](https://github.com/fritzlabs/heartbeat-android)). 19 | 20 | If you'd like to incorporate any of these models or versions you've trained into your own app, head over to [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models). SDKs are available for both iOS and Android. 21 | 22 | ## Additional resources 23 | 24 | Additional, [non-code resources](resources/README.md) for machine learning and AI. 25 | 26 | * [AI and ML Landscape](resources/AI_Landscape.md): our curated list of helpful products and services for AI and machine learning. 27 | 28 | ## Join the community 29 | [Heartbeat](https://heartbeat.fritz.ai/?utm_source=github&utm_campaign=fritz-models) is a community of developers interested in the intesection of mobile and machine learning. [Chat with us in Slack](https://join.slack.com/t/heartbeat-by-fritz/shared_invite/enQtMzY5OTM1MzgyODIzLTZhNTFjYmRiODU0NjZjNjJlOGRjYzI2OTIwY2M4YTBiNjM1ODU1ZmU3Y2Q2MmMzMmI2ZTIzZjQ1ZWI3NzBkZGU) and stay up to date on the latest mobile ML news with our [Newsletter](https://mobileml.us16.list-manage.com/subscribe?u=de53bead690affb8e9a21de8f&id=68acb5c0fd). 30 | 31 | ## A note about large files 32 | Large files like model checkpoints, data, and archives of compiled code are managed via `git lfs`. You need to have Git LFS installed in order to download these files. Installation instructions are available [here](https://github.com/git-lfs/git-lfs#getting-started). 33 | 34 | If you have Git LFS installed, large files will download automatically by default. This can take a while and require a good connection. To clone this repository without downloading the model checkpoints, you can run: 35 | 36 | ``` 37 | GIT_LFS_SKIP_SMUDGE=1 git clone ... 38 | ``` 39 | -------------------------------------------------------------------------------- /image_segmentation/coco_object_info.txt: -------------------------------------------------------------------------------- 1 | Idx Ratio Train Val Name 2 | 1 0.0 0 0 person 3 | 2 0.0 0 0 bicycle 4 | 3 0.0 0 0 car 5 | 4 0.0 0 0 motorcycle 6 | 5 0.0 0 0 airplane 7 | 6 0.0 0 0 bus 8 | 7 0.0 0 0 train 9 | 8 0.0 0 0 truck 10 | 9 0.0 0 0 boat 11 | 10 0.0 0 0 traffic light 12 | 11 0.0 0 0 fire hydrant 13 | 13 0.0 0 0 stop sign 14 | 14 0.0 0 0 parking meter 15 | 15 0.0 0 0 bench 16 | 16 0.0 0 0 bird 17 | 17 0.0 0 0 cat 18 | 18 0.0 0 0 dog 19 | 19 0.0 0 0 horse 20 | 20 0.0 0 0 sheep 21 | 21 0.0 0 0 cow 22 | 22 0.0 0 0 elephant 23 | 23 0.0 0 0 bear 24 | 24 0.0 0 0 zebra 25 | 25 0.0 0 0 giraffe 26 | 27 0.0 0 0 backpack 27 | 28 0.0 0 0 umbrella 28 | 31 0.0 0 0 handbag 29 | 32 0.0 0 0 tie 30 | 33 0.0 0 0 suitcase 31 | 34 0.0 0 0 frisbee 32 | 35 0.0 0 0 skis 33 | 36 0.0 0 0 snowboard 34 | 37 0.0 0 0 sports ball 35 | 38 0.0 0 0 kite 36 | 39 0.0 0 0 baseball bat 37 | 40 0.0 0 0 baseball glove 38 | 41 0.0 0 0 skateboard 39 | 42 0.0 0 0 surfboard 40 | 43 0.0 0 0 tennis racket 41 | 44 0.0 0 0 bottle 42 | 46 0.0 0 0 wine glass 43 | 47 0.0 0 0 cup 44 | 48 0.0 0 0 fork 45 | 49 0.0 0 0 knife 46 | 50 0.0 0 0 spoon 47 | 51 0.0 0 0 bowl 48 | 52 0.0 0 0 banana 49 | 53 0.0 0 0 apple 50 | 54 0.0 0 0 sandwich 51 | 55 0.0 0 0 orange 52 | 56 0.0 0 0 broccoli 53 | 57 0.0 0 0 carrot 54 | 58 0.0 0 0 hot dog 55 | 59 0.0 0 0 pizza 56 | 60 0.0 0 0 donut 57 | 61 0.0 0 0 cake 58 | 62 0.0 0 0 chair 59 | 63 0.0 0 0 couch 60 | 64 0.0 0 0 potted plant 61 | 65 0.0 0 0 bed 62 | 67 0.0 0 0 dining table 63 | 70 0.0 0 0 toilet 64 | 72 0.0 0 0 tv 65 | 73 0.0 0 0 laptop 66 | 74 0.0 0 0 mouse 67 | 75 0.0 0 0 remote 68 | 76 0.0 0 0 keyboard 69 | 77 0.0 0 0 cell phone 70 | 78 0.0 0 0 microwave 71 | 79 0.0 0 0 oven 72 | 80 0.0 0 0 toaster 73 | 81 0.0 0 0 sink 74 | 82 0.0 0 0 refrigerator 75 | 84 0.0 0 0 book 76 | 85 0.0 0 0 clock 77 | 86 0.0 0 0 vase 78 | 87 0.0 0 0 scissors 79 | 88 0.0 0 0 teddy bear 80 | 89 0.0 0 0 hair drier 81 | 90 0.0 0 0 toothbrush 82 | 92 0.0 0 0 banner 83 | 93 0.0 0 0 blanket 84 | 95 0.0 0 0 bridge 85 | 100 0.0 0 0 cardboard 86 | 107 0.0 0 0 counter 87 | 109 0.0 0 0 curtain 88 | 112 0.0 0 0 door-stuff 89 | 118 0.0 0 0 floor-wood 90 | 119 0.0 0 0 flower 91 | 122 0.0 0 0 fruit 92 | 125 0.0 0 0 gravel 93 | 128 0.0 0 0 house 94 | 130 0.0 0 0 light 95 | 133 0.0 0 0 mirror-stuff 96 | 138 0.0 0 0 net 97 | 141 0.0 0 0 pillow 98 | 144 0.0 0 0 platform 99 | 145 0.0 0 0 playingfield 100 | 147 0.0 0 0 railroad 101 | 148 0.0 0 0 river 102 | 149 0.0 0 0 road 103 | 151 0.0 0 0 roof 104 | 154 0.0 0 0 sand 105 | 155 0.0 0 0 sea 106 | 156 0.0 0 0 shelf 107 | 159 0.0 0 0 snow 108 | 161 0.0 0 0 stairs 109 | 166 0.0 0 0 tent 110 | 168 0.0 0 0 towel 111 | 171 0.0 0 0 wall-brick 112 | 175 0.0 0 0 wall-stone 113 | 176 0.0 0 0 wall-tile 114 | 177 0.0 0 0 wall-wood 115 | 178 0.0 0 0 water-other 116 | 180 0.0 0 0 window-blind 117 | 181 0.0 0 0 window-other 118 | 184 0.0 0 0 tree-merged 119 | 185 0.0 0 0 fence-merged 120 | 186 0.0 0 0 ceiling-merged 121 | 187 0.0 0 0 sky-other-merged 122 | 188 0.0 0 0 cabinet-merged 123 | 189 0.0 0 0 table-merged 124 | 190 0.0 0 0 floor-other-merged 125 | 191 0.0 0 0 pavement-merged 126 | 192 0.0 0 0 mountain-merged 127 | 193 0.0 0 0 grass-merged 128 | 194 0.0 0 0 dirt-merged 129 | 195 0.0 0 0 paper-merged 130 | 196 0.0 0 0 food-other-merged 131 | 197 0.0 0 0 building-other-merged 132 | 198 0.0 0 0 rock-merged 133 | 199 0.0 0 0 wall-other-merged 134 | 200 0.0 0 0 rug-merged 135 | -------------------------------------------------------------------------------- /image_segmentation/utils/deeplab_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tarfile 3 | 4 | import numpy as np 5 | from PIL import Image 6 | from six.moves import urllib 7 | import tempfile 8 | import tensorflow as tf 9 | 10 | 11 | MODEL_NAME = 'mobilenetv2_coco_voctrainaug' 12 | 13 | _DOWNLOAD_URL_PREFIX = 'http://download.tensorflow.org/models/' 14 | _MODEL_URLS = { 15 | 'mobilenetv2_coco_voctrainaug': 16 | 'deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz', 17 | 'mobilenetv2_coco_voctrainval': 18 | 'deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz', 19 | 'xception_coco_voctrainaug': 20 | 'deeplabv3_pascal_train_aug_2018_01_04.tar.gz', 21 | 'xception_coco_voctrainval': 22 | 'deeplabv3_pascal_trainval_2018_01_04.tar.gz', 23 | } 24 | _TARBALL_NAME = 'deeplab_model.tar.gz' 25 | 26 | 27 | class DeepLabModel(object): 28 | """Class to load deeplab model and run inference.""" 29 | 30 | INPUT_TENSOR_NAME = 'ImageTensor:0' 31 | OUTPUT_TENSOR_NAME = 'SemanticPredictions:0' 32 | INPUT_SIZE = 513 33 | FROZEN_GRAPH_NAME = 'frozen_inference_graph' 34 | 35 | def __init__(self, tarball_path): 36 | """Creates and loads pretrained deeplab model.""" 37 | self.graph = tf.Graph() 38 | 39 | graph_def = None 40 | # Extract frozen graph from tar archive. 41 | tar_file = tarfile.open(tarball_path) 42 | for tar_info in tar_file.getmembers(): 43 | if self.FROZEN_GRAPH_NAME in os.path.basename(tar_info.name): 44 | file_handle = tar_file.extractfile(tar_info) 45 | graph_def = tf.GraphDef.FromString(file_handle.read()) 46 | break 47 | 48 | tar_file.close() 49 | 50 | if graph_def is None: 51 | raise RuntimeError('Cannot find inference graph in tar archive.') 52 | 53 | with self.graph.as_default(): 54 | tf.import_graph_def(graph_def, name='') 55 | 56 | self.sess = tf.Session(graph=self.graph) 57 | 58 | def run(self, image): 59 | """Runs inference on a single image. 60 | 61 | Args: 62 | image: A PIL.Image object, raw input image. 63 | 64 | Returns: 65 | resized_image: RGB image resized from original input image. 66 | seg_map: Segmentation map of `resized_image`. 67 | """ 68 | width, height = image.size 69 | resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height) 70 | target_size = (int(resize_ratio * width), int(resize_ratio * height)) 71 | resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS) 72 | batch_seg_map = self.sess.run( 73 | self.OUTPUT_TENSOR_NAME, 74 | feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]}) 75 | seg_map = batch_seg_map[0] 76 | return resized_image, seg_map 77 | 78 | 79 | def download_deeplab_model(model_name): 80 | model_dir = tempfile.mkdtemp() 81 | tf.gfile.MakeDirs(model_dir) 82 | 83 | download_path = os.path.join(model_dir, _TARBALL_NAME) 84 | print(download_path) 85 | print('downloading model, this might take a while...') 86 | 87 | urllib.request.urlretrieve( 88 | _DOWNLOAD_URL_PREFIX + _MODEL_URLS[MODEL_NAME], 89 | download_path 90 | ) 91 | print('download completed! loading DeepLab model...') 92 | 93 | model = DeepLabModel(download_path) 94 | print('model loaded successfully!') 95 | return model 96 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Any data should go in a data/ dir untracked by git 2 | data/ 3 | 4 | # Local development 5 | .DS_Store 6 | .vscode/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # dotenv 90 | .env 91 | 92 | # virtualenv 93 | .venv 94 | venv/ 95 | ENV/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | 110 | # Swift things for Create ML 111 | # Xcode 112 | # 113 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 114 | 115 | ## Build generated 116 | build/ 117 | DerivedData/ 118 | 119 | ## Various settings 120 | *.pbxuser 121 | !default.pbxuser 122 | *.mode1v3 123 | !default.mode1v3 124 | *.mode2v3 125 | !default.mode2v3 126 | *.perspectivev3 127 | !default.perspectivev3 128 | xcuserdata/ 129 | 130 | ## Other 131 | *.moved-aside 132 | *.xccheckout 133 | *.xcscmblueprint 134 | 135 | ## Obj-C/Swift specific 136 | *.hmap 137 | *.ipa 138 | *.dSYM.zip 139 | *.dSYM 140 | 141 | ## Playgrounds 142 | timeline.xctimeline 143 | playground.xcworkspace 144 | 145 | # Swift Package Manager 146 | # 147 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 148 | # Packages/ 149 | # Package.pins 150 | # Package.resolved 151 | .build/ 152 | 153 | # CocoaPods 154 | # 155 | # We recommend against adding the Pods directory to your .gitignore. However 156 | # you should judge for yourself, the pros and cons are mentioned at: 157 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 158 | # 159 | # Pods/ 160 | 161 | # Carthage 162 | # 163 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 164 | # Carthage/Checkouts 165 | 166 | Carthage/Build 167 | 168 | # fastlane 169 | # 170 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 171 | # screenshots whenever they are needed. 172 | # For more information about the recommended setup visit: 173 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 174 | 175 | fastlane/report.xml 176 | fastlane/Preview.html 177 | fastlane/screenshots/**/*.png 178 | fastlane/test_output -------------------------------------------------------------------------------- /image_segmentation/utils/compare_models.py: -------------------------------------------------------------------------------- 1 | from matplotlib import gridspec 2 | from matplotlib import pyplot 3 | import skimage.transform 4 | import numpy 5 | 6 | 7 | def create_pascal_label_colormap(): 8 | """Creates a label colormap used in PASCAL VOC segmentation benchmark. 9 | 10 | Returns: 11 | A Colormap for visualizing segmentation results. 12 | """ 13 | colormap = numpy.zeros((256, 3), dtype=int) 14 | ind = numpy.arange(256, dtype=int) 15 | 16 | for shift in reversed(range(8)): 17 | for channel in range(3): 18 | colormap[:, channel] |= ((ind >> channel) & 1) << shift 19 | ind >>= 3 20 | 21 | return colormap 22 | 23 | 24 | def label_to_color_image(label): 25 | """Adds color defined by the dataset colormap to the label. 26 | 27 | Args: 28 | label: A 2D array with integer type, storing the segmentation label. 29 | 30 | Returns: 31 | result: A 2D array with floating type. The element of the array 32 | is the color indexed by the corresponding element in the inumpyut label 33 | to the PASCAL color map. 34 | 35 | Raises: 36 | ValueError: If label is not of rank 2 or its value is larger than color 37 | map maximum entry. 38 | """ 39 | if label.ndim != 2: 40 | raise ValueError('Expect 2-D inumpyut label') 41 | 42 | colormap = create_pascal_label_colormap() 43 | 44 | if numpy.max(label) >= len(colormap): 45 | raise ValueError('label value too large.') 46 | 47 | return colormap[label] 48 | 49 | 50 | LABEL_NAMES = numpy.asarray([ 51 | 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 52 | 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 53 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv' 54 | ]) 55 | 56 | FULL_LABEL_MAP = numpy.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1) 57 | FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP) 58 | 59 | 60 | def vis_segmentation(image, deeplab_seg_map, icnet_seg_map): 61 | """Visualizes inumpyut image, segmentation map and overlay view.""" 62 | pyplot.figure(figsize=(15, 5)) 63 | grid_spec = gridspec.GridSpec(1, 4, width_ratios=[4, 4, 4, 4]) 64 | 65 | pyplot.subplot(grid_spec[0]) 66 | pyplot.imshow(image) 67 | pyplot.axis('off') 68 | pyplot.title('Input Image') 69 | 70 | pyplot.subplot(grid_spec[1]) 71 | seg_image = label_to_color_image(deeplab_seg_map).astype(numpy.uint8) 72 | pyplot.imshow(seg_image) 73 | pyplot.axis('off') 74 | pyplot.title('Deeplab v3 Segmentation') 75 | 76 | pyplot.subplot(grid_spec[2]) 77 | # resize icnet mask 78 | icnet_seg_map = skimage.transform.resize( 79 | icnet_seg_map[0, :, :], 80 | deeplab_seg_map.shape, 81 | preserve_range=True, 82 | anti_aliasing=False, 83 | order=0).astype('int') 84 | seg_image = label_to_color_image(icnet_seg_map).astype(numpy.uint8) 85 | pyplot.imshow(seg_image) 86 | pyplot.axis('off') 87 | pyplot.title('Fritz Segmentation') 88 | 89 | pyplot.subplot(grid_spec[3]) 90 | pyplot.imshow(image) 91 | pyplot.imshow(seg_image, alpha=0.7) 92 | pyplot.axis('off') 93 | pyplot.title('Fritz Segmentation Overlay') 94 | 95 | pyplot.grid('off') 96 | pyplot.show() 97 | 98 | 99 | def multiple_vis(results): 100 | 101 | fig = pyplot.figure(figsize=(15, 3 * len(results))) 102 | grid_spec = gridspec.GridSpec(len(results), 4, width_ratios=[4, 4, 4, 4]) 103 | 104 | i = 0 105 | for image, deeplab_seg_map, icnet_seg_map in results: 106 | pyplot.subplot(grid_spec[i]) 107 | pyplot.imshow(image) 108 | # pyplot.axis('off') 109 | i += 1 110 | 111 | pyplot.subplot(grid_spec[i]) 112 | seg_image = label_to_color_image(deeplab_seg_map).astype(numpy.uint8) 113 | pyplot.imshow(seg_image) 114 | pyplot.axis('off') 115 | pyplot.title('Deeplab v3 Segmentation') 116 | 117 | i += 1 118 | pyplot.subplot(grid_spec[i]) 119 | # resize icnet mask 120 | icnet_seg_map = skimage.transform.resize( 121 | icnet_seg_map[0, :, :], 122 | deeplab_seg_map.shape, 123 | preserve_range=True, 124 | anti_aliasing=False, 125 | order=0).astype('int') 126 | seg_image = label_to_color_image(icnet_seg_map).astype(numpy.uint8) 127 | pyplot.imshow(seg_image) 128 | pyplot.axis('off') 129 | pyplot.title('Fritz Segmentation') 130 | i += 1 131 | 132 | pyplot.subplot(grid_spec[i]) 133 | pyplot.imshow(image) 134 | pyplot.imshow(seg_image, alpha=0.7) 135 | pyplot.axis('off') 136 | pyplot.title('Fritz Segmentation Overlay') 137 | i += 1 138 | 139 | pyplot.grid('off') 140 | 141 | return fig 142 | -------------------------------------------------------------------------------- /style_transfer/convert_to_tfmobile.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import sys 5 | 6 | import keras 7 | import tensorflow as tf 8 | from tensorflow.python.platform import gfile 9 | from tensorflow.python.tools import freeze_graph 10 | from tensorflow.python.tools import optimize_for_inference_lib 11 | from tensorflow.python.framework import dtypes 12 | 13 | from style_transfer import models 14 | 15 | logging.basicConfig(level=logging.INFO) 16 | logger = logging.getLogger('stylize_image') 17 | 18 | 19 | def _freeze_graph(model, basename, output_dir): 20 | name, _ = os.path.splitext(basename) 21 | 22 | saver = tf.train.Saver() 23 | 24 | with keras.backend.get_session() as sess: 25 | checkpoint_filename = os.path.join(output_dir, '%s.ckpt' % name) 26 | output_graph_filename = os.path.join(output_dir, '%s_frozen.pb' % name) 27 | saver.save(sess, checkpoint_filename) 28 | tf.train.write_graph( 29 | sess.graph_def, output_dir, '%s_graph_def.pbtext' % name 30 | ) 31 | 32 | freeze_graph.freeze_graph( 33 | input_graph=os.path.join(output_dir, '%s_graph_def.pbtext' % name), 34 | input_saver='', 35 | input_binary=False, 36 | input_checkpoint=checkpoint_filename, 37 | output_graph=output_graph_filename, 38 | output_node_names='deprocess_stylized_image_1/mul', 39 | restore_op_name="save/restore_all", 40 | filename_tensor_name="save/Const:0", 41 | clear_devices=True, 42 | initializer_nodes=None 43 | ) 44 | logger.info('Saved frozen graph to: %s' % output_graph_filename) 45 | 46 | 47 | def load_graph_def(filename): 48 | input_graph_def = tf.GraphDef() 49 | with gfile.FastGFile(filename, 'rb') as file: 50 | data = file.read() 51 | input_graph_def.ParseFromString(data) 52 | return input_graph_def 53 | 54 | 55 | def _optimize_graph(basename, output_dir): 56 | name, _ = os.path.splitext(basename) 57 | frozen_graph_filename = os.path.join(output_dir, '%s_frozen.pb' % name) 58 | graph_def = load_graph_def(frozen_graph_filename) 59 | 60 | optimized_graph = optimize_for_inference_lib.optimize_for_inference( 61 | input_graph_def=graph_def, 62 | input_node_names=['input_1'], 63 | placeholder_type_enum=dtypes.float32.as_datatype_enum, 64 | output_node_names=['deprocess_stylized_image_1/mul'], 65 | toco_compatible=True 66 | ) 67 | 68 | optimized_graph_filename = os.path.basename( 69 | frozen_graph_filename).replace('frozen', 'optimized') 70 | optimized_graph_filename = optimized_graph_filename 71 | tf.train.write_graph( 72 | optimized_graph, output_dir, optimized_graph_filename, as_text=False 73 | ) 74 | logger.info('Saved optimized graph to: %s' % 75 | os.path.join(output_dir, optimized_graph_filename)) 76 | 77 | 78 | def main(argv): 79 | 80 | parser = argparse.ArgumentParser( 81 | description='Stylize an image using a trained model.' 82 | ) 83 | parser.add_argument( 84 | '--keras-checkpoint', type=str, required=True, 85 | help='Weights from a trained Style Transfer Network.' 86 | ) 87 | parser.add_argument( 88 | '--alpha', type=float, required=True, 89 | help='The width multiplier of the network.' 90 | ) 91 | parser.add_argument( 92 | '--output-dir', type=str, required=True, 93 | help='A directory to save various tensorflow graphs to' 94 | ) 95 | parser.add_argument( 96 | '--image-size', type=str, default='640,480', 97 | help='The size of input and output of the final Core ML model: H,W' 98 | ) 99 | parser.add_argument( 100 | '--use-small-network', action='store_true', 101 | help=('Use a very small network architecture that works in real time ' 102 | 'on some mobile devices using only CPU') 103 | ) 104 | 105 | args = parser.parse_args(argv) 106 | 107 | image_size = [int(dim) for dim in args.image_size.split(',')] 108 | 109 | logger.info('Loading model weights from %s' % args.keras_checkpoint) 110 | 111 | # Set some keras params before loading the model 112 | keras.backend.clear_session() 113 | keras.backend.set_learning_phase(0) 114 | if args.use_small_network: 115 | model = models.SmallStyleTransferNetwork.build( 116 | image_size, 117 | alpha=args.alpha, 118 | checkpoint_file=args.keras_checkpoint 119 | ) 120 | else: 121 | model = models.StyleTransferNetwork.build( 122 | image_size, 123 | alpha=args.alpha, 124 | checkpoint_file=args.keras_checkpoint 125 | ) 126 | 127 | basename = os.path.basename(args.keras_checkpoint) 128 | # Freeze Graph 129 | _freeze_graph(model, basename, args.output_dir) 130 | # Optimize Graph 131 | _optimize_graph(basename, args.output_dir) 132 | 133 | 134 | if __name__ == '__main__': 135 | main(sys.argv[1:]) 136 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from style_transfer import trainer 5 | 6 | logging.basicConfig(level=logging.INFO) 7 | logger = logging.getLogger('train_network') 8 | 9 | # The default layers are those suggested by Johnson et al. 10 | # The names map to those used in the VGG16 application included 11 | # with Keras. 12 | _DEFAULT_STYLE_LAYERS = [ 13 | 'block1_conv2', 'block2_conv2', 14 | 'block3_conv3', 'block4_conv3' 15 | ] 16 | _DEFAULT_CONTENT_LAYERS = ['block3_conv3'] 17 | 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser( 21 | description='Train a Style Transfer Network.' 22 | ) 23 | 24 | parser.add_argument( 25 | '--training-image-dset', type=str, required=True, 26 | help=('An h5 file containing images to trian with. The dset must ' 27 | 'contain a key `images` with the arrays.') 28 | ) 29 | parser.add_argument( 30 | '--style-images', type=str, required=True, 31 | help='A comma separated list of images to take styles from.' 32 | ) 33 | parser.add_argument( 34 | '--model-checkpoint', type=str, required=True, 35 | help='An file to save the trained network.' 36 | ) 37 | parser.add_argument( 38 | '--image-size', default='256,256', type=str, 39 | help='The size of the image H,W' 40 | ) 41 | parser.add_argument( 42 | '--content-layers', type=str, 43 | help=('A comma separated list of VGG layers to use for ' 44 | 'computing content loss') 45 | ) 46 | parser.add_argument( 47 | '--style-layers', type=str, 48 | help=('A comma separated list of VGG layers to use for ' 49 | 'computing style loss') 50 | ) 51 | parser.add_argument( 52 | '--content-weight', type=float, default=1.0, 53 | help='Content loss weight' 54 | ) 55 | parser.add_argument( 56 | '--style-weight', type=float, default=1e-4, 57 | help='Style loss weight' 58 | ) 59 | parser.add_argument( 60 | '--total-variation-weight', type=float, default=0, 61 | help='Total variation loss weight' 62 | ) 63 | parser.add_argument( 64 | '--num-iterations', type=int, default=40000, 65 | help='Number of iterations to train for.' 66 | ) 67 | parser.add_argument( 68 | '--batch-size', type=int, default=4, 69 | help='The batch size to train with.' 70 | ) 71 | parser.add_argument( 72 | '--learning-rate', type=float, default=0.001, 73 | help='The learning rate.' 74 | ) 75 | parser.add_argument( 76 | '--log-interval', type=int, default=10, 77 | help='the interval at which log statements are printed.' 78 | ) 79 | parser.add_argument( 80 | '--checkpoint-interval', type=int, default=10, 81 | help='the interval at which model checkpoints are saved.' 82 | ) 83 | parser.add_argument( 84 | '--fine-tune-checkpoint', type=str, 85 | help='A checkpoint file to finetune from.' 86 | ) 87 | parser.add_argument( 88 | '--alpha', type=float, default=1.0, 89 | help='the width parameter controlling the number of filters' 90 | ) 91 | parser.add_argument( 92 | '--norm-by-channels', action='store_true', 93 | help='if present, normalize gram matrix by channel' 94 | ) 95 | parser.add_argument( 96 | '--gcs-bucket', type=str, 97 | help='a gcs bucket to save results to.' 98 | ) 99 | parser.add_argument( 100 | '--use-small-network', action='store_true', 101 | help=('Use a very small network architecture that works in real time ' 102 | 'on some mobile devices using only CPU') 103 | ) 104 | 105 | args, unknown = parser.parse_known_args() 106 | 107 | # Set the content and style loss layers. 108 | content_layers = _DEFAULT_CONTENT_LAYERS 109 | if args.content_layers: 110 | content_layers = args.content_layers.split(',') 111 | 112 | style_layers = _DEFAULT_STYLE_LAYERS 113 | if args.style_layers: 114 | style_layers = args.style_layers.split(',') 115 | 116 | style_image_files = args.style_images.split(',') 117 | image_size = [int(el) for el in args.image_size.split(',')] 118 | norm_by_channels = args.norm_by_channels or False 119 | 120 | trainer.train( 121 | args.training_image_dset, 122 | style_image_files, 123 | args.model_checkpoint, 124 | content_layers, 125 | style_layers, 126 | content_weight=args.content_weight, 127 | style_weight=args.style_weight, 128 | total_variation_weight=args.total_variation_weight, 129 | image_size=image_size, 130 | alpha=args.alpha, 131 | batch_size=args.batch_size, 132 | num_iterations=args.num_iterations, 133 | learning_rate=args.learning_rate, 134 | log_interval=args.log_interval, 135 | checkpoint_interval=args.checkpoint_interval, 136 | fine_tune_checkpoint=args.fine_tune_checkpoint, 137 | norm_by_channels=norm_by_channels, 138 | gcs_bucket=args.gcs_bucket, 139 | use_small_network=args.use_small_network, 140 | ) 141 | logger.info('Done.') 142 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/build_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================= 15 | 16 | """Contains common utility functions and classes for building dataset. 17 | 18 | This script contains utility functions and classes to converts dataset to 19 | TFRecord file format with Example protos. 20 | The Example proto contains the following fields: 21 | image/encoded: encoded image content. 22 | image/filename: image filename. 23 | image/format: image file format. 24 | image/height: image height. 25 | image/width: image width. 26 | image/channels: image channels. 27 | image/segmentation/class/encoded: encoded semantic segmentation content. 28 | image/segmentation/class/format: semantic segmentation file format. 29 | """ 30 | import collections 31 | import six 32 | import tensorflow as tf 33 | 34 | IMAGE_FORMAT = 'jpeg' 35 | LABEL_FORMAT = 'png' 36 | 37 | 38 | class ImageReader(object): 39 | """Helper class that provides TensorFlow image coding utilities.""" 40 | 41 | def __init__(self, image_format='jpeg', channels=3): 42 | """Class constructor. 43 | 44 | Args: 45 | image_format: Image format. Only 'jpeg', 'jpg', or 'png' 46 | are supported. 47 | channels: Image channels. 48 | """ 49 | with tf.Graph().as_default(): 50 | self._decode_data = tf.placeholder(dtype=tf.string) 51 | self._image_format = image_format 52 | self._session = tf.Session() 53 | if self._image_format in ('jpeg', 'jpg'): 54 | self._decode = tf.image.decode_jpeg( 55 | self._decode_data, channels=channels) 56 | elif self._image_format == 'png': 57 | self._decode = tf.image.decode_png( 58 | self._decode_data, channels=channels) 59 | 60 | def read_image_dims(self, image_data): 61 | """Read the image dimensions. 62 | 63 | Args: 64 | image_data: string of image data. 65 | Returns: 66 | image_height and image_width. 67 | """ 68 | image = self.decode_image(image_data) 69 | return image.shape[:2] 70 | 71 | def decode_image(self, image_data): 72 | """Decode the image data string. 73 | 74 | Args: 75 | image_data: string of image data. 76 | Returns: 77 | Decoded image data. 78 | Raises: 79 | ValueError: Value of image channels not supported. 80 | """ 81 | image = self._session.run( 82 | self._decode, feed_dict={self._decode_data: image_data}) 83 | if len(image.shape) != 3 or image.shape[2] not in (1, 3): 84 | raise ValueError('The image channels not supported.') 85 | 86 | return image 87 | 88 | 89 | def _int64_list_feature(values): 90 | """Return a TF-Feature of int64_list. 91 | 92 | Args: 93 | values: A scalar or list of values. 94 | Returns: 95 | A TF-Feature. 96 | """ 97 | if not isinstance(values, collections.Iterable): 98 | values = [values] 99 | 100 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) 101 | 102 | 103 | def _bytes_list_feature(values): 104 | """Return a TF-Feature of bytes. 105 | 106 | Args: 107 | values: A string. 108 | Returns: 109 | A TF-Feature. 110 | """ 111 | def norm2bytes(value): 112 | return value.encode() if isinstance(value, str) and six.PY3 else value 113 | 114 | return tf.train.Feature( 115 | bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) 116 | 117 | 118 | def image_seg_to_tfexample(image_data, filename, height, width, seg_data): 119 | """Convert one image/segmentation pair to tf example. 120 | 121 | Args: 122 | image_data: string of image data. 123 | filename: image filename. 124 | height: image height. 125 | width: image width. 126 | seg_data: string of semantic segmentation data. 127 | Returns: 128 | tf example of one image/segmentation pair. 129 | """ 130 | return tf.train.Example( 131 | features=tf.train.Features( 132 | feature={ 133 | 'image/encoded': _bytes_list_feature(image_data), 134 | 'image/filename': _bytes_list_feature(filename), 135 | 'image/format': _bytes_list_feature(IMAGE_FORMAT), 136 | 'image/height': _int64_list_feature(height), 137 | 'image/width': _int64_list_feature(width), 138 | 'image/channels': _int64_list_feature(3), 139 | 'image/segmentation/class/encoded': ( 140 | _bytes_list_feature(seg_data)), 141 | 'image/segmentation/class/format': _bytes_list_feature( 142 | LABEL_FORMAT), 143 | } 144 | ) 145 | ) 146 | -------------------------------------------------------------------------------- /image_segmentation/utils/tfrecord_helpers.py: -------------------------------------------------------------------------------- 1 | import io 2 | import sys 3 | import struct 4 | import random 5 | import six 6 | import tensorflow as tf 7 | import numpy 8 | import PIL 9 | 10 | 11 | def iterate_tfrecord(filename, decode=False): 12 | """Iterate through a tfrecord file. 13 | 14 | Args: 15 | filename (str): Filename to iterate. 16 | decode (bool): Optionally pass all records to example decoder function. 17 | False by default. 18 | 19 | Returns: Iterator of tfrecords. 20 | """ 21 | for record in tf.python_io.tf_record_iterator(filename): 22 | example = tf.train.Example() 23 | example.ParseFromString(record) 24 | if decode: 25 | yield decode_image_tensor(example) 26 | else: 27 | yield example 28 | 29 | 30 | def save_tfrecords(records, output_filename): 31 | """Save all tfrecord examples to file. 32 | 33 | Args: 34 | records (Iterator[tf.train.Example]): Iterator of records to save. 35 | output_filename (str): Output file to save to. 36 | """ 37 | with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: 38 | for record in records: 39 | tfrecord_writer.write(record.SerializeToString()) 40 | 41 | 42 | def decode_image_tensor(example): 43 | """Takes a tfrecord example and decodes image and mask data. 44 | 45 | Args: 46 | example (tf.train.Example): TF example to decode. 47 | 48 | Returns: dict of decoded mask and image data. 49 | """ 50 | feature_dict = example.features.feature 51 | image_value = feature_dict['image/encoded'].bytes_list.value[0] 52 | encoded_mask = feature_dict['image/segmentation/class/encoded'] 53 | filename = feature_dict['image/filename'].bytes_list.value[0] 54 | mask_value = encoded_mask.bytes_list.value[0] 55 | mask = numpy.array(PIL.Image.open(io.BytesIO(mask_value))) 56 | height = feature_dict['image/height'].int64_list.value[0] 57 | width = feature_dict['image/width'].int64_list.value[0] 58 | mask_format = ( 59 | feature_dict['image/segmentation/class/format'].bytes_list.value[0] 60 | ) 61 | 62 | return { 63 | 'image': PIL.Image.open(io.BytesIO(image_value)), 64 | 'mask': mask, 65 | 'height': height, 66 | 'width': width, 67 | 'filename': filename, 68 | 'format': feature_dict['image/format'].bytes_list.value[0], 69 | 'mask_format': mask_format, 70 | } 71 | 72 | 73 | def get_png_string(mask_array): 74 | """Builds PNG string from mask array. 75 | 76 | Args: 77 | mask_array (HxW): Mask array to generate PNG string from. 78 | 79 | Returns: String of mask encoded as a PNG. 80 | """ 81 | # Convert the new mask back to an image. 82 | image = PIL.Image.fromarray(mask_array.astype('uint8')).convert('RGB') 83 | # Save the new image to a PNG byte string. 84 | byte_buffer = io.BytesIO() 85 | image.save(byte_buffer, format='png') 86 | byte_buffer.seek(0) 87 | return byte_buffer.read() 88 | 89 | 90 | def update_mask(record, mask_array): 91 | """Update mask in tensorflow example. 92 | 93 | Args: 94 | record (tf.train.Example): Record to update 95 | mask_array (numpy.Array): HxW array of class values. 96 | 97 | Returns: Updated tf.train.Example. 98 | """ 99 | def norm2bytes(value): 100 | return value.encode() if isinstance(value, str) and six.PY3 else value 101 | 102 | mask_data = get_png_string(mask_array) 103 | feature = record.features.feature['image/segmentation/class/encoded'] 104 | feature.bytes_list.value.pop() 105 | feature.bytes_list.value.append(norm2bytes(mask_data)) 106 | return record 107 | 108 | 109 | def get_mask_ratio(example): 110 | total_people_pixels = example['mask'][:, :, 0].sum(axis=None) 111 | return total_people_pixels / (example['height'] * example['width']) 112 | 113 | 114 | def iter_interleave(kaggle, ade20k, coco): 115 | """ 116 | A generator that interleaves the output from a one or more iterators 117 | until they are *all* exhausted. 118 | 119 | """ 120 | kaggle_finished = False 121 | ade20k_finished = False 122 | coco_finished = False 123 | a, b, c = 0, 0, 0 124 | 125 | while (not kaggle_finished) or (not ade20k_finished) or (not coco_finished): 126 | if not kaggle_finished: 127 | try: 128 | item = kaggle.next() 129 | a += 1 130 | if random.choice([False, True, True]): 131 | yield item 132 | except StopIteration: 133 | print("kaggle finished") 134 | kaggle_finished = True 135 | if not ade20k_finished: 136 | try: 137 | item = ade20k.next() 138 | b += 1 139 | yield item 140 | except StopIteration: 141 | print("ade20k finished") 142 | ade20k_finished = True 143 | 144 | if not coco_finished: 145 | try: 146 | for _ in range(4): 147 | item = coco.next() 148 | c += 1 149 | yield item 150 | except StopIteration: 151 | print("coco finished") 152 | coco_finished = True 153 | 154 | print(a, b, c) 155 | 156 | 157 | def chunk_records(filename, n, start=0): 158 | records = iterate_tfrecord(filename) 159 | while True: 160 | for i in range(start): 161 | continue 162 | 163 | try: 164 | yield [records.next() for _ in range(n)] 165 | except StopIteration: 166 | return 167 | -------------------------------------------------------------------------------- /style_transfer/create_training_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import io 4 | import os 5 | import sys 6 | import urllib 7 | import zipfile 8 | 9 | import PIL.Image 10 | import tensorflow as tf 11 | 12 | logger = logging.getLogger('create_training_dataset') 13 | 14 | _COCO_ZIP_URL = 'http://images.cocodataset.org/zips/train2014.zip' 15 | 16 | try: 17 | raw_input # Python 3 18 | except NameError: 19 | raw_input = input # Python 3 20 | 21 | 22 | class DatasetCreator(object): 23 | """A class to preprocess images from the COCO training data. 24 | 25 | This does not apply any sort of normalization to images. It simply 26 | transforms and scales image sizes before packing them into an H5 dataset 27 | and saving them to disk. 28 | """ 29 | 30 | allowed_formats = {'.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG'} 31 | max_resize = 16 32 | 33 | @classmethod 34 | def _get_image_filenames(cls, input_dir, num_images): 35 | """Get a list of image filenames from a directory.""" 36 | img_list = [] 37 | for filename in os.listdir(input_dir): 38 | _, ext = os.path.splitext(filename) 39 | if ext in cls.allowed_formats: 40 | img_list.append(os.path.join(input_dir, filename)) 41 | if num_images and len(img_list) > num_images: 42 | break 43 | return img_list 44 | 45 | @staticmethod 46 | def _bytes_feature(value): 47 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 48 | 49 | @classmethod 50 | def process_images( 51 | cls, 52 | input_dir, 53 | output_filename, 54 | num_images=None, 55 | num_threads=1): 56 | """Process all images in a directory and create an H5 data set. 57 | 58 | Args: 59 | input_dir - a directory containing images 60 | output_filename - the name of the h5 file to write to 61 | num_images - the number of images to process. 'None' processes all 62 | num_threads - the number of threads to use. Default 1. 63 | """ 64 | img_list = cls._get_image_filenames(input_dir, num_images) 65 | num_images = len(img_list) 66 | # Remove the h5 file if it exists 67 | try: 68 | os.remove(output_filename) 69 | except OSError: 70 | pass 71 | 72 | record_writer = tf.python_io.TFRecordWriter(output_filename) 73 | for idx, filename in enumerate(img_list): 74 | img = PIL.Image.open(filename) 75 | encoded_jpeg = io.BytesIO() 76 | img.save(encoded_jpeg, format='jpeg') 77 | encoded_jpeg.seek(0) 78 | 79 | example = tf.train.Example(features=tf.train.Features( 80 | feature={ 81 | 'image/encoded': cls._bytes_feature(encoded_jpeg.read()), 82 | })) 83 | record_writer.write(example.SerializeToString()) 84 | record_writer.close() 85 | 86 | 87 | def download_coco_data(directory): 88 | """Download and extract the COCO image training data set. 89 | 90 | This file is very large (~13GB) so we check with the user to make 91 | sure that is ok. 92 | 93 | Args: 94 | dir - a directory to save the dataset to 95 | """ 96 | # This is a really big file so ask the user if they are sure they want 97 | # to start the download. 98 | if not os.path.isdir(directory): 99 | logger.info('Creating directory: %s' % directory) 100 | os.makedirs(directory) 101 | 102 | answer = None 103 | while answer not in {'Y', 'n'}: 104 | answer = raw_input( 105 | 'Are you sure you want to download the COCO dataset? [Y/n] ' 106 | ) 107 | 108 | if answer == 'n': 109 | sys.exit() 110 | 111 | logger.info('Downloading COCO image data set. This may take a while...') 112 | zip_save_path = os.path.join(directory, 'train2014.zip') 113 | urllib.urlretrieve(_COCO_ZIP_URL, zip_save_path) 114 | 115 | # Files are even bigger to unzip so ask again if they are fine to proceed. 116 | answer = None 117 | while answer not in {'Y', 'n'}: 118 | answer = raw_input( 119 | 'Are you sure you want to unzip things? [Y/n] ' 120 | ) 121 | 122 | if answer == 'n': 123 | sys.exit() 124 | 125 | logger.info('Unzipping COCO image data set. This may take a while...') 126 | unzip = zipfile.ZipFile(zip_save_path, 'r') 127 | unzip.extractall(directory) 128 | unzip.close() 129 | # Delete the original zipfile 130 | os.remove(zip_save_path) 131 | 132 | 133 | if __name__ == '__main__': 134 | parser = argparse.ArgumentParser( 135 | description=('Create a dataset to use when training the Fritz' 136 | ' Style Transfer model.')) 137 | parser.add_argument( 138 | '--output', type=str, required=True, 139 | help='The name of the resulting dataset.') 140 | parser.add_argument( 141 | '--image-dir', type=str, required=True, 142 | help=('A directory containing images to turn into tfrecords') 143 | ) 144 | parser.add_argument( 145 | '--download', action='store_true', 146 | help=('When present, download and extract the COCO image dataset.' 147 | 'Note this is a huge download (~13GB).') 148 | ) 149 | parser.add_argument( 150 | '--num-images', type=int, help='The number of images to process.' 151 | ) 152 | 153 | args = parser.parse_args() 154 | image_directory = args.image_dir 155 | if args.download: 156 | download_coco_data(image_directory) 157 | image_directory = os.path.join(image_directory, 'train2014') 158 | 159 | image_directory = os.path.join(args.image_dir) 160 | DatasetCreator.process_images( 161 | image_directory, 162 | args.output, 163 | num_images=args.num_images 164 | ) 165 | -------------------------------------------------------------------------------- /image_segmentation/objectInfo150.txt: -------------------------------------------------------------------------------- 1 | Idx Ratio Train Val Name 2 | 1 0.1576 11664 1172 wall 3 | 2 0.1072 6046 612 building, edifice 4 | 3 0.0878 8265 796 sky 5 | 4 0.0621 9336 917 floor, flooring 6 | 5 0.0480 6678 641 tree 7 | 6 0.0450 6604 643 ceiling 8 | 7 0.0398 4023 408 road, route 9 | 8 0.0231 1906 199 bed 10 | 9 0.0198 4688 460 windowpane, window 11 | 10 0.0183 2423 225 grass 12 | 11 0.0181 2874 294 cabinet 13 | 12 0.0166 3068 310 sidewalk, pavement 14 | 13 0.0160 5075 526 person, individual, someone, somebody, mortal, soul 15 | 14 0.0151 1804 190 earth, ground 16 | 15 0.0118 6666 796 door, double door 17 | 16 0.0110 4269 411 table 18 | 17 0.0109 1691 160 mountain, mount 19 | 18 0.0104 3999 441 plant, flora, plant life 20 | 19 0.0104 2149 217 curtain, drape, drapery, mantle, pall 21 | 20 0.0103 3261 318 chair 22 | 21 0.0098 3164 306 car, auto, automobile, machine, motorcar 23 | 22 0.0074 709 75 water 24 | 23 0.0067 3296 315 painting, picture 25 | 24 0.0065 1191 106 sofa, couch, lounge 26 | 25 0.0061 1516 162 shelf 27 | 26 0.0060 667 69 house 28 | 27 0.0053 651 57 sea 29 | 28 0.0052 1847 224 mirror 30 | 29 0.0046 1158 128 rug, carpet, carpeting 31 | 30 0.0044 480 44 field 32 | 31 0.0044 1172 98 armchair 33 | 32 0.0044 1292 184 seat 34 | 33 0.0033 1386 138 fence, fencing 35 | 34 0.0031 698 61 desk 36 | 35 0.0030 781 73 rock, stone 37 | 36 0.0027 380 43 wardrobe, closet, press 38 | 37 0.0026 3089 302 lamp 39 | 38 0.0024 404 37 bathtub, bathing tub, bath, tub 40 | 39 0.0024 804 99 railing, rail 41 | 40 0.0023 1453 153 cushion 42 | 41 0.0023 411 37 base, pedestal, stand 43 | 42 0.0022 1440 162 box 44 | 43 0.0022 800 77 column, pillar 45 | 44 0.0020 2650 298 signboard, sign 46 | 45 0.0019 549 46 chest of drawers, chest, bureau, dresser 47 | 46 0.0019 367 36 counter 48 | 47 0.0018 311 30 sand 49 | 48 0.0018 1181 122 sink 50 | 49 0.0018 287 23 skyscraper 51 | 50 0.0018 468 38 fireplace, hearth, open fireplace 52 | 51 0.0018 402 43 refrigerator, icebox 53 | 52 0.0018 130 12 grandstand, covered stand 54 | 53 0.0018 561 64 path 55 | 54 0.0017 880 102 stairs, steps 56 | 55 0.0017 86 12 runway 57 | 56 0.0017 172 11 case, display case, showcase, vitrine 58 | 57 0.0017 198 18 pool table, billiard table, snooker table 59 | 58 0.0017 930 109 pillow 60 | 59 0.0015 139 18 screen door, screen 61 | 60 0.0015 564 52 stairway, staircase 62 | 61 0.0015 320 26 river 63 | 62 0.0015 261 29 bridge, span 64 | 63 0.0014 275 22 bookcase 65 | 64 0.0014 335 60 blind, screen 66 | 65 0.0014 792 75 coffee table, cocktail table 67 | 66 0.0014 395 49 toilet, can, commode, crapper, pot, potty, stool, throne 68 | 67 0.0014 1309 138 flower 69 | 68 0.0013 1112 113 book 70 | 69 0.0013 266 27 hill 71 | 70 0.0013 659 66 bench 72 | 71 0.0012 331 31 countertop 73 | 72 0.0012 531 56 stove, kitchen stove, range, kitchen range, cooking stove 74 | 73 0.0012 369 36 palm, palm tree 75 | 74 0.0012 144 9 kitchen island 76 | 75 0.0011 265 29 computer, computing machine, computing device, data processor, electronic computer, information processing system 77 | 76 0.0010 324 33 swivel chair 78 | 77 0.0009 304 27 boat 79 | 78 0.0009 170 20 bar 80 | 79 0.0009 68 6 arcade machine 81 | 80 0.0009 65 8 hovel, hut, hutch, shack, shanty 82 | 81 0.0009 248 25 bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle 83 | 82 0.0008 492 49 towel 84 | 83 0.0008 2510 269 light, light source 85 | 84 0.0008 440 39 truck, motortruck 86 | 85 0.0008 147 18 tower 87 | 86 0.0008 583 56 chandelier, pendant, pendent 88 | 87 0.0007 533 61 awning, sunshade, sunblind 89 | 88 0.0007 1989 239 streetlight, street lamp 90 | 89 0.0007 71 5 booth, cubicle, stall, kiosk 91 | 90 0.0007 618 53 television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box 92 | 91 0.0007 135 12 airplane, aeroplane, plane 93 | 92 0.0007 83 5 dirt track 94 | 93 0.0007 178 17 apparel, wearing apparel, dress, clothes 95 | 94 0.0006 1003 104 pole 96 | 95 0.0006 182 12 land, ground, soil 97 | 96 0.0006 452 50 bannister, banister, balustrade, balusters, handrail 98 | 97 0.0006 42 6 escalator, moving staircase, moving stairway 99 | 98 0.0006 307 31 ottoman, pouf, pouffe, puff, hassock 100 | 99 0.0006 965 114 bottle 101 | 100 0.0006 117 13 buffet, counter, sideboard 102 | 101 0.0006 354 35 poster, posting, placard, notice, bill, card 103 | 102 0.0006 108 9 stage 104 | 103 0.0006 557 55 van 105 | 104 0.0006 52 4 ship 106 | 105 0.0005 99 5 fountain 107 | 106 0.0005 57 4 conveyer belt, conveyor belt, conveyer, conveyor, transporter 108 | 107 0.0005 292 31 canopy 109 | 108 0.0005 77 9 washer, automatic washer, washing machine 110 | 109 0.0005 340 38 plaything, toy 111 | 110 0.0005 66 3 swimming pool, swimming bath, natatorium 112 | 111 0.0005 465 49 stool 113 | 112 0.0005 50 4 barrel, cask 114 | 113 0.0005 622 75 basket, handbasket 115 | 114 0.0005 80 9 waterfall, falls 116 | 115 0.0005 59 3 tent, collapsible shelter 117 | 116 0.0005 531 72 bag 118 | 117 0.0005 282 30 minibike, motorbike 119 | 118 0.0005 73 7 cradle 120 | 119 0.0005 435 44 oven 121 | 120 0.0005 136 25 ball 122 | 121 0.0005 116 24 food, solid food 123 | 122 0.0004 266 31 step, stair 124 | 123 0.0004 58 12 tank, storage tank 125 | 124 0.0004 418 83 trade name, brand name, brand, marque 126 | 125 0.0004 319 43 microwave, microwave oven 127 | 126 0.0004 1193 139 pot, flowerpot 128 | 127 0.0004 97 23 animal, animate being, beast, brute, creature, fauna 129 | 128 0.0004 347 36 bicycle, bike, wheel, cycle 130 | 129 0.0004 52 5 lake 131 | 130 0.0004 246 22 dishwasher, dish washer, dishwashing machine 132 | 131 0.0004 108 13 screen, silver screen, projection screen 133 | 132 0.0004 201 30 blanket, cover 134 | 133 0.0004 285 21 sculpture 135 | 134 0.0004 268 27 hood, exhaust hood 136 | 135 0.0003 1020 108 sconce 137 | 136 0.0003 1282 122 vase 138 | 137 0.0003 528 65 traffic light, traffic signal, stoplight 139 | 138 0.0003 453 57 tray 140 | 139 0.0003 671 100 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin 141 | 140 0.0003 397 44 fan 142 | 141 0.0003 92 8 pier, wharf, wharfage, dock 143 | 142 0.0003 228 18 crt screen 144 | 143 0.0003 570 59 plate 145 | 144 0.0003 217 22 monitor, monitoring device 146 | 145 0.0003 206 19 bulletin board, notice board 147 | 146 0.0003 130 14 shower 148 | 147 0.0003 178 28 radiator 149 | 148 0.0002 504 57 glass, drinking glass 150 | 149 0.0002 775 96 clock 151 | 150 0.0002 421 56 flag 152 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/dali_pipeline.py: -------------------------------------------------------------------------------- 1 | from nvidia import dali 2 | import nvidia.dali.tfrecord as tfrec 3 | from nvidia.dali import ops 4 | from nvidia.dali import types 5 | 6 | 7 | class CommonPipeline(dali.pipeline.Pipeline): 8 | 9 | def _input(self, tfrecord_path, index_path, shard_id=0): 10 | return ops.TFRecordReader( 11 | path=tfrecord_path, 12 | index_path=index_path, 13 | random_shuffle=True, 14 | features={ 15 | 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""), 16 | 'image/filename': tfrec.FixedLenFeature((), tfrec.string, ""), 17 | 'image/format': tfrec.FixedLenFeature((), tfrec.string, ""), 18 | 'image/height': tfrec.FixedLenFeature([1], tfrec.int64, -1), 19 | 'image/width': tfrec.FixedLenFeature([1], tfrec.int64, -1), 20 | 'image/channels': tfrec.FixedLenFeature([1], tfrec.int64, -1), 21 | 'image/segmentation/class/encoded': ( 22 | tfrec.FixedLenFeature((), tfrec.string, "") 23 | ), 24 | 'image/segmentation/class/format': ( 25 | tfrec.FixedLenFeature((), tfrec.string, "") 26 | ) 27 | } 28 | ) 29 | 30 | def __init__(self, 31 | batch_size, 32 | num_threads, 33 | device_id, 34 | image_size, 35 | tfrecord_path, 36 | index_path, 37 | config, 38 | shard_id=0): 39 | 40 | super(CommonPipeline, self).__init__(batch_size, 41 | num_threads, 42 | device_id) 43 | 44 | self.image_size = image_size 45 | self.input = self._input(tfrecord_path, index_path, shard_id=shard_id) 46 | # The nvjpeg decoder throws an error for some unsupported jpegs. 47 | # until this is fixed, we'll use the host decoder, which runs on the 48 | # CPU. 49 | # self.decode = ops.nvJPEGDecoder(device="mixed", 50 | # output_type=types.RGB) 51 | self.decode = ops.HostDecoder(device="cpu", 52 | output_type=types.RGB) 53 | self.resize = ops.Resize(device="gpu", 54 | image_type=types.RGB, 55 | interp_type=types.INTERP_LINEAR, 56 | resize_x=image_size, 57 | resize_y=image_size) 58 | 59 | self.resize_large = ops.Resize(device="gpu", 60 | image_type=types.RGB, 61 | interp_type=types.INTERP_LINEAR, 62 | resize_x=image_size * config.zoom_scale, 63 | resize_y=image_size * config.zoom_scale) 64 | 65 | self.color_twist = ops.ColorTwist( 66 | device="gpu", 67 | ) 68 | self.crop_mirror_normalize = ops.CropMirrorNormalize( 69 | device="gpu", 70 | crop=image_size, 71 | output_dtype=types.FLOAT, 72 | image_type=types.RGB, 73 | output_layout=types.DALITensorLayout.NHWC, 74 | mean=122.5, 75 | std=255.0 76 | ) 77 | 78 | self.crop = ops.Crop( 79 | device="gpu", 80 | crop=image_size, 81 | ) 82 | 83 | self.cast = ops.Cast( 84 | device="gpu", 85 | dtype=types.DALIDataType.INT64 86 | ) 87 | self.rotate = ops.Rotate( 88 | device="gpu", 89 | fill_value=0 90 | ) 91 | self.flip = ops.Flip(device="gpu") 92 | 93 | self.coin = ops.CoinFlip(probability=0.5) 94 | self.rotate_rng = ops.Uniform(range=(config.rotate_angle_min, 95 | config.rotate_angle_max)) 96 | self.crop_x_rng = ops.Uniform(range=(0.0, config.crop_x_max)) 97 | self.crop_y_rng = ops.Uniform(range=(0.0, config.crop_y_max)) 98 | self.hue_rng = ops.Uniform(range=(config.hue_min, 99 | config.hue_max)) 100 | self.contrast_rng = ops.Uniform(range=(config.contrast_min, 101 | config.contrast_max)) 102 | self.saturation_rng = ops.Uniform(range=(config.saturation_min, 103 | config.saturation_max)) 104 | self.brightness_rng = ops.Uniform(range=(config.brightness_min, 105 | config.brightness_max)) 106 | 107 | self.iter = 0 108 | 109 | def define_graph(self): 110 | inputs = self.input() 111 | angle = self.rotate_rng() 112 | coin = self.coin() 113 | hue = self.hue_rng() 114 | contrast = self.contrast_rng() 115 | saturation = self.saturation_rng() 116 | brightness = self.brightness_rng() 117 | crop_x = self.crop_x_rng() 118 | crop_y = self.crop_y_rng() 119 | 120 | images = self.decode(inputs["image/encoded"]) 121 | images = images.gpu() 122 | images = self.resize_large(images) 123 | images = self.rotate(images, angle=angle) 124 | images = self.crop(images, crop_pos_x=crop_x, crop_pos_y=crop_y) 125 | images = self.resize(images) 126 | images = self.color_twist(images, 127 | brightness=brightness, 128 | hue=hue, 129 | saturation=saturation, 130 | contrast=contrast) 131 | images = self.flip(images, horizontal=coin) 132 | 133 | masks = self.decode(inputs["image/segmentation/class/encoded"]) 134 | masks = masks.gpu() 135 | masks = self.resize_large(masks) 136 | masks = self.rotate(masks, angle=angle) 137 | masks = self.crop(masks, crop_pos_x=crop_x, crop_pos_y=crop_y) 138 | masks = self.resize(masks) 139 | masks = self.flip(masks, horizontal=coin) 140 | 141 | images = self.crop_mirror_normalize(images) 142 | masks = self.cast(masks) 143 | return (images, masks) 144 | 145 | def iter_setup(self): 146 | pass 147 | -------------------------------------------------------------------------------- /image_segmentation/README.md: -------------------------------------------------------------------------------- 1 | # Fritz Image Segmentation 2 | A Core ML compatible implementation of semantic segmentation with ICNet in Keras. 3 | 4 | ## Installation 5 | 6 | From this directory, run: 7 | 8 | ``` 9 | export PYTHONPATH=$PYTHONPATH:`pwd` 10 | ``` 11 | 12 | ## Downaload Data 13 | The model is trained on the [ADE20K dataset](http://groups.csail.mit.edu/vision/datasets/ADE20K/) provided by MIT. You can download and prepare this data for training using this [handy script](https://github.com/tensorflow/models/blob/master/research/deeplab/datasets/download_and_convert_ade20k.sh) provided in the `TensorFlow/models/research/deeplab` repo on GitHub. 14 | 15 | The dataset contains >20,000 images and corresponding segmentation masks. Masks asign one of 150 categories to each individual pixel of the image. A list of object classes is included in this repo: [objectInfo150.txt]() 16 | 17 | ## Create TFRecord Dataset 18 | 19 | Training requires data be read from TFRecords so we'll need to convert the images before we can use them. It's also recommended you train choose less than 20 image labels to train on as performance degrades after this point. The full 150 class labels is too much. A whitelist of class labels can be passed via the command line in a pipe separated string. Note that class labels much match those in the `objectInfo150.txt` exactly. Examples of valid whitelists are: 20 | 21 | ``` 22 | "person|wall|floor, flooring" 23 | "chair|wall|coffee table, cocktail table|ceiling|floor, flooring|bed|lamp|sofa, couch, lounge|windowpane, window|pillow" 24 | ``` 25 | 26 | You can also set the `whitelist-threshold` argument to specify the fraction of whitelisted labels that must appear in an image for it to be used in training. For example, if 10 labels are whitelisted and the threashold is set to 0.6, at least 6 of the 10 whitelisted labels must appear in the image for it to be included. 27 | 28 | Let's create a training data set for images with objects you might find in a living room or bedroom. 29 | 30 | ``` 31 | export LABEL_SET=living_room 32 | mkdir data/${LABEL_SET} 33 | python create_tfrecord_dataset.py \ 34 | -i data/ADEChallengeData2016/images/training/ \ 35 | -a data/ADEChallengeData2016/annotations/training/ \ 36 | -o data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \ 37 | -l data/objectInfo150.txt \ 38 | -w "chair|wall|coffee table, cocktail table|ceiling|floor, flooring|bed|lamp|sofa, couch, lounge|windowpane, window|pillow" \ 39 | -t 0.6 40 | ``` 41 | 42 | This script also automatically outputs a new set of labels and indices in a file named `labels.txt` found in the same directory as the `.tfrecord` output. 43 | 44 | ## Training 45 | The model can be trained using the `train.py` script. 46 | 47 | Before you start, make sure the `image_segmentation` model is on your $PYTHONPATH. From the `fritz-models/image_segmentation` directory. 48 | 49 | ``` 50 | export PYTHONPATH=$PYTHONPATH:`pwd` 51 | ``` 52 | 53 | ### Train Locally 54 | Train the model for 10 steps by running: 55 | 56 | ``` 57 | export LABEL_SET=living_room 58 | python image_segmentation/train.py \ 59 | -d data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \ 60 | -l data/${LABEL_SET}/labels.txt \ 61 | -n 10 \ 62 | -s 768 \ 63 | -a 0.25 \ 64 | -o data/${LABEL_SET}/${LABEL_SET}_icnet_768x768_025.h5 65 | ``` 66 | 67 | By default, a model weights checkpoint is saved every epoch. Note that only weights are saved, not the full model. This is to make it easier to build models for training vs inference. 68 | 69 | ### Training on Google Cloud ML 70 | Zip up all of the local files to send up to Google Cloud. 71 | 72 | ``` 73 | # from fritz-models/image_segmentation/ 74 | python setup.py sdist 75 | ``` 76 | Run the training job. 77 | 78 | ``` 79 | export LABEL_SET=living_room 80 | export YOUR_GCS_BUCKET= 81 | gcloud ml-engine jobs submit training `whoami`_image_segmentation_`date +%s` \ 82 | --runtime-version 1.9 \ 83 | --job-dir=gs://${YOUR_GCS_BUCKET} \ 84 | --packages dist/image_segmentation-1.0.tar.gz \ 85 | --module-name image_segmentation.train \ 86 | --region us-east1 \ 87 | --scale-tier basic_gpu \ 88 | -- \ 89 | -d gs://${YOUR_GCS_BUCKET}/data/${LABEL_SET}/${LABEL_SET}_data.tfrecord \ 90 | -l gs://${YOUR_GCS_BUCKET}/data/${LABEL_SET}/labels.txt \ 91 | -o ${LABEL_SET}_768x768_025.h5 \ 92 | --image-size 768 \ 93 | --alpha 0.25 \ 94 | --num-steps 5000 \ 95 | --batch-size 24 \ 96 | --model-name ${LABEL_SET} \ 97 | --gcs-bucket gs://${YOUR_GCS_BUCKET}/train 98 | ``` 99 | 100 | ## Converting to Core ML 101 | The resulting Keras model can be converted using the script provided. It uses the standard `coremltools` package, but removes the additional model output nodes used for training. 102 | 103 | ``` 104 | python convert_to_coreml.py --alpha 0.25 ${LABEL_SET}_768x768_025.h5 ${LABEL_SET}_768x768_025.mlmodel 105 | ``` 106 | 107 | Once you've got your Core ML model, you can use [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=image-segmentation) to integrate, deploy, and manage it in your app. For more tutorials on mobile machine learning, check out [Heartbeat](https://heartbeat.fritz.ai?utm_source=github&utm_campaign=fritz-models&utm_content=image-segmentation). 108 | 109 | ## Benchmarks 110 | On a Google Cloud Compute GPU instance with a single K80, a single epoch containing roughly 1600 768x768 images takes 20 minutes. Average cross-categorical accuracy reached >80% after 12 hours. An additional 3 hours of training with a learning rate of 0.00001 increased accuracy to ~87%. Inferences with a 768x768 model can be made at 8-9fps on an iPhone X. 111 | 112 | ## Example - Living Room Objects 113 | 114 | 115 | 116 | 117 | 118 | Download the [mlmodel](https://github.com/fritzlabs/fritz-models/blob/master/image_segmentation/examples/icnet_768x768_living_room.mlmodel). 119 | 120 | ## Additional resources 121 | 122 | * [Original ICNet Implementation](https://github.com/hszhao/ICNet) 123 | * [Keras-ICNet](https://github.com/aitorzip/Keras-ICNet) 124 | * [ICNet-tensorflow](https://github.com/hellochick/ICNet-tensorflow) 125 | -------------------------------------------------------------------------------- /resources/AI_Landscape.md: -------------------------------------------------------------------------------- 1 | # AI and Machine Learning Landscape 2 | 3 | ## Contribute to this list 4 | 5 | The AI and Machine Learning landscape is rapidly changing. We welcome additions and changes to this list! 6 | 7 | ## Data Labeling 8 | 9 | * [Labelbox](https://www.labelbox.com/) - Platform for creating and managing training data 10 | 11 | * [Alegion](https://alegion.com/) - Training platform to build datasets, manage, and more 12 | 13 | * [Clickworker](https://www.clickworker.com/) - Micro tasking marketplace, catering data management and web research services as well as AI algorithm training 14 | 15 | * [Figure Eight](https://www.figure-eight.com/) - Training platform that helps turn data into trainable sets 16 | 17 | * [Gengo AI](https://gengo.ai/) - Training platform for language-based ML tasks 18 | 19 | * [Mighty AI](https://mighty.ai/) - Training data management for Computer Vision tasks 20 | 21 | * [Scale](https://scale.ai/) - Training data API largely focused on Computer Vision tasks 22 | 23 | * [CloudSight](https://cloudsight.ai/) - Image recognition API for digital media 24 | 25 | * [Hive](https://thehive.ai/) - Data labeling, Computer Vision models, and media platform 26 | 27 | * [Microwork](https://microwork.io/) - Image and video annotation services for AI 28 | 29 | ## Synthetic Data 30 | 31 | * [AI.Reverie](https://aireverie.com/) - Simulation platform that generates synthetic data to train and improve ML models 32 | 33 | * [Neuromation](https://www.neuromation.io/) - Distributed computing platform for deep learning applications and synthetic data generation 34 | 35 | ## Feature Engineering 36 | 37 | * [Feature Labs](https://www.featurelabs.com/) - Automated feature engineering platform for enterprise 38 | 39 | * [Featuretools](https://www.featuretools.com/) - Open source Python framework for automated feature engineering - a product of Feature Labs 40 | 41 | * [Source{d}](https://sourced.tech/) - Machine learning for large scale code analysis 42 | 43 | ## Training 44 | 45 | * [ClusterOne](https://clusterone.com/) - Deep learning platform that allows you to train your models on distributed GPUs and CPUs without setup or maintenance 46 | 47 | * [DataBricks](https://databricks.com/) - Unified Analytics Platform that accelerates innovation by unifying data science, engineering and business 48 | 49 | * [DAWNBench](https://dawn.cs.stanford.edu/benchmark/index.html) - Benchmark suite for end-to-end deep learning training and inference out of Stanford 50 | 51 | * [Hyperopt](https://hyperopt.github.io/hyperopt/) - Distributed asynchronous hyperparameter optimization in Python 52 | 53 | * [Lambda Labs](https://lambdalabs.com/) - Workstations, Servers, Laptops, and GPU cloud built for Deep Learning 54 | 55 | * [PaddlePaddle](http://www.paddlepaddle.org/) - An open-source deep learning platform with a simple API 56 | 57 | * [Paperspace](https://www.paperspace.com/) - GPU cloud platform (w/ API), AI/ML infrastructure product 58 | 59 | * [RiseML](https://riseml.com/) - Machine Learning Platform for Kubernetes (:cry: - Sunsetting March 31, 2019) 60 | 61 | * [Spell](https://www.spell.run/) - Infrastructure for AI and deep learning experiments and collaboration 62 | 63 | * [Trifacta](https://www.trifacta.com/start-wrangling/) - Data preparation and cleaning platform 64 | 65 | * [Yellowfin](https://www.yellowfinbi.com/) - Integrated data analytics platform 66 | 67 | ## Model Serving / Deployment 68 | 69 | * [5 Analytics](https://www.5analytics.com/index.html) - Enterprise AI platform to integrate, deploy, and monitor ML models 70 | 71 | * [Algorithmia](https://algorithmia.com/) - Machine learning model management platform 72 | 73 | * [Numericcal](https://www.numericcal.com/) - Platform to automate model optimization and management on mobile and IoT 74 | 75 | * [Seldon](https://www.seldon.io/) - Machine learning deployment platform for Enterprise 76 | 77 | * [Vertex AI](http://vertex.ai/) - PalidML is an open source tensor compiler 78 | 79 | * [Alteryx](https://www.alteryx.com/platform) - Integrated analytics platform built to foster partnerships between IT, analytics teams, and businesses 80 | 81 | * [Datatron](https://www.datatron.com/) - Management platform for ML, AI ,and Data Science models 82 | 83 | ## Model Management 84 | 85 | * [Datmo](https://datmo.com/) - Workflow tools to help you experiment, deploy, and scale AI solutions 86 | 87 | * [Iterative AI](https://iterative.ai/) - CI workflow for machine learning projects 88 | 89 | * [MLFlow](https://mlflow.org/) - Open source ML lifecycle platform 90 | 91 | * [MLPerf](https://mlperf.org/) - A broad ML benchmark suite for measuring performance of ML software frameworks, ML hardware accelerators, and ML cloud platforms 92 | 93 | * [Neptune](https://neptune.ml/) - Platform to build ML models, manage infrastructure and dev environments, and team collaboration tools 94 | 95 | * [ParallelM](https://www.parallelm.com/) - Platform to deploy and optimize ML models at scale 96 | 97 | ## End-to-End ML Platforms 98 | 99 | * [Allegro](https://allegro.ai/) - Computer Vision Deep Learning platform 100 | 101 | * [Cnvrg.io](https://cnvrg.io/) - Full stack data science and ML platform 102 | 103 | * [Determined AI](https://determined.ai/) - Platform designed to streamline deep learning workflows 104 | 105 | * [FloydHub](https://www.floydhub.com/) - Deep learning model pipeline 106 | 107 | * [SherlockML](https://sherlockml.com/) - Data science developer environment 108 | 109 | * [BigML](https://bigml.com/) - Comprehensive ML workflow platform 110 | 111 | * [Dataiku](https://www.dataiku.com/) - End-to-end collaborative data science platform 112 | 113 | * [Metis Machine](https://metismachine.com/) - Skafos Machine Learning Platform to help develop and deploy ML pipelines at scale 114 | 115 | * [Valohai](https://valohai.com/) - Deep learning management platform 116 | 117 | * [Dataspine](https://dataspine.io/) - Automation platform for ML workflows 118 | 119 | * [PipelineAI](https://pipeline.ai/) - Flexible end-to-end ML pipeline platform 120 | 121 | * [Deep Cognition](https://deepcognition.ai/) - Deep learning management platform 122 | 123 | * [Polyaxon](https://polyaxon.com/) - Open source platform for reproducible machine learning at scale 124 | 125 | * [Clarifai](https://www.clarifai.com/) - ML platform built for Computer Vision problems 126 | 127 | * [Comet.ml](https://www.comet.ml/) - ML platform to track datasets, training, and more 128 | 129 | * [DeepSense.ai](https://deepsense.ai/) - AI AI solution management platform 130 | 131 | * [H20 AI](https://h2o.ai) - Open source ML platform 132 | 133 | * [DataRobot](https://www.datarobot.com/) - Automated ML platform for predictive modeling 134 | 135 | * [Fritz](https://fritz.ai/) - End-to-end platform designed to help mobile developers and ML engineers train and deploy models in mobile apps 136 | 137 | ## Other 138 | 139 | * [Element AI](https://www.elementai.com/) - Use case-based AI software 140 | 141 | * [Ever AI](https://ever.ai/) - Facial recognition and attribute identification SDK and API 142 | 143 | * [Deepomatic](http://www.deepomatic.com/) - Image-based, computer vision AI solutions platform 144 | 145 | * [Leverege](https://www.leverege.com/) - Complete IoT development and deployment platform 146 | 147 | * [Nuance](https://www.nuance.com/omni-channel-customer-engagement/technologies/artificial-intelligence.html) - Conversational AI for smart customer engagement 148 | 149 | * [Cortical.io](https:/www.cortical.io) - Intelligent text processing solution 150 | -------------------------------------------------------------------------------- /image_segmentation/utils/model_helpers.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import os 3 | 4 | import coremltools 5 | import tensorflow as tf 6 | import PIL.Image 7 | import skimage.transform 8 | import skimage.filters 9 | import numpy 10 | from tensorflow.python.platform import gfile 11 | from image_segmentation import data_generator 12 | import image_segmentation 13 | import requests 14 | from io import BytesIO 15 | 16 | 17 | class ModelParameters(object): 18 | 19 | def __init__(self, **params): 20 | self.label_set = params['label_set'] 21 | self.batch_size = params['batch_size'] 22 | self.resolution = params['resolution'] 23 | self.alpha = params['alpha'] 24 | self.labels = params['labels'] 25 | self.num_classes = len(self.labels) 26 | self.gcs_bucket = params.get('gcs_bucket') 27 | self._training_data_path = params.get('training_data_path') 28 | self._model_path = params.get('model_path') 29 | 30 | self.file_base = params.get( 31 | 'file_base', 32 | f'{self.label_set}_{self.resolution}x{self.resolution}_1' 33 | ) 34 | 35 | @property 36 | def training_data_path(self): 37 | if self._training_data_path: 38 | return self._training_data_path 39 | 40 | return ( 41 | '../fritz-image-segmentation/data/' 42 | '{label_set}/{label_set}.tfrecord' 43 | ).format(label_set=self.label_set) 44 | 45 | @property 46 | def model_path(self): 47 | if self._model_path: 48 | return self._model_path 49 | 50 | return ( 51 | f'gs://{self.gcs_bucket}/train/{self.file_base}.h5' 52 | ) 53 | 54 | 55 | class TrainedModel(object): 56 | 57 | def __init__(self, model_parameters): 58 | self._params = model_parameters 59 | resolution = model_parameters.resolution 60 | 61 | self.dataset = data_generator.ADE20KDatasetBuilder.build( 62 | self._params.training_data_path, 63 | self._params.batch_size, 64 | (resolution, resolution), 65 | self._params.num_classes, 66 | augment_images=False, 67 | repeat=False 68 | ) 69 | 70 | self._model = None 71 | 72 | def download_and_build_model(self): 73 | temp_h5 = tempfile.NamedTemporaryFile(suffix='.h5') 74 | print("Loading model") 75 | # with gfile.Open(self._params.model_path, 'rb') as fid: 76 | # temp_h5.file.write(fid.read()) 77 | # temp_h5.seek(0) 78 | 79 | return image_segmentation.icnet.ICNetModelFactory.build( 80 | self._params.resolution, 81 | self._params.num_classes, 82 | alpha=self._params.alpha, 83 | weights_path=self._params.model_path, 84 | train=False 85 | ) 86 | 87 | @property 88 | def model(self): 89 | if self._model is None: 90 | self._model = self.download_and_build_model() 91 | 92 | return self._model 93 | 94 | def iterate_images(self): 95 | iterator = self.dataset.make_one_shot_iterator() 96 | el = iterator.get_next() 97 | 98 | try: 99 | with tf.Session() as sess: 100 | while True: 101 | out = sess.run([el]) 102 | for i in range(out[0]['image'].shape[0]): 103 | image = out[0]['image'][i] 104 | mask = out[0]['mask'][i] 105 | yield (image, mask) 106 | except tf.errors.OutOfRangeError: 107 | return 108 | 109 | def training_images(self, num_images=10, start_index=0): 110 | results = [] 111 | for i, (image, mask) in enumerate(self.iterate_images()): 112 | if i < start_index: 113 | continue 114 | 115 | if len(results) >= num_images: 116 | break 117 | results.append((image, mask)) 118 | 119 | return results 120 | 121 | def run_prediction(self, img_path=None, img_data=None, img_url=None, 122 | img=None): 123 | if img_url: 124 | response = requests.get(img_url) 125 | img = PIL.Image.open(BytesIO(response.content)) 126 | elif img_path: 127 | img = PIL.Image.open(img_path) 128 | 129 | if img_data is None: 130 | img = img.resize((self._params.resolution, 131 | self._params.resolution)) 132 | img_data = numpy.array(img) 133 | img_data = img_data * 1. / 255. - 0.5 134 | img_data = skimage.filters.gaussian(img_data, sigma=0.0) 135 | elif img_data is None: 136 | raise Exception("Must either pass image data or a path to image") 137 | 138 | return self.model.predict(img_data[None, :, :, :]) 139 | 140 | def predict_and_plot(self, img_path=None, img_data=None, img_url=None, 141 | mask=None, probabilities=True): 142 | if img_url: 143 | response = requests.get(img_url) 144 | img = PIL.Image.open(BytesIO(response.content)) 145 | img = img.resize((self._params.resolution, 146 | self._params.resolution)) 147 | if img_path: 148 | img = PIL.Image.open(img_path) 149 | img = img.resize((self._params.resolution, 150 | self._params.resolution)) 151 | elif img_data is not None: 152 | img = ((img_data + 0.5) * 255).astype('uint8') 153 | 154 | output = self.run_prediction(img_path=img_path, img_data=img_data, 155 | img_url=img_url) 156 | 157 | figure = image_segmentation.utils.plot_image_and_mask( 158 | numpy.array(img), 159 | output[0], 160 | reference_mask=mask, 161 | alpha=0.9, 162 | small=True) 163 | generated_figures = [figure] 164 | 165 | if probabilities: 166 | generated_figures.append( 167 | image_segmentation.utils.plot_pixel_probabilities( 168 | output[0], 169 | self._params.labels 170 | ) 171 | ) 172 | 173 | return output, generated_figures 174 | 175 | def calculate_error(self, results, mask): 176 | resized_mask = numpy.resize(mask[:, :, 0], ( 177 | results.shape[0], results.shape[1] 178 | )) 179 | resized_mask = mask[:, :, 0] 180 | 181 | resized_results = skimage.transform.resize( 182 | numpy.argmax(results, axis=-1), 183 | mask.shape[:2], 184 | preserve_range=True, 185 | anti_aliasing=False, 186 | order=0) 187 | 188 | diff = resized_mask - resized_results 189 | 190 | success_rate = [] 191 | for i, label in enumerate(self._params.labels): 192 | total_class_values = numpy.sum(resized_mask == i) 193 | if not total_class_values: 194 | continue 195 | 196 | incorrect = float(numpy.count_nonzero(diff[resized_mask == i])) 197 | true_positive = float(numpy.sum(diff[resized_mask == i] == 0)) 198 | iou = true_positive / (true_positive + incorrect) 199 | success_rate.append((i, total_class_values, true_positive, iou)) 200 | print(f"{label} - total: {total_class_values}, IoU: {iou}") 201 | 202 | mean_iou = ( 203 | sum([iou for _, _, _, iou in success_rate]) / len(success_rate) 204 | ) 205 | print(f"mIoU: {mean_iou}") 206 | return success_rate 207 | 208 | def convert_to_coreml(self, mlmodel_path='./'): 209 | mlmodel = coremltools.converters.keras.convert( 210 | self.model, 211 | input_names='image', 212 | image_input_names='image', 213 | image_scale=1.0 / 255.0, 214 | red_bias=-0.5, 215 | green_bias=-0.5, 216 | blue_bias=-0.5, 217 | output_names='output' 218 | ) 219 | mlmodel_file_path = ( 220 | os.path.join(mlmodel_path, self._params.file_base + '.mlmodel') 221 | ) 222 | mlmodel.save(mlmodel_file_path) 223 | print(f"successfully saved {mlmodel_file_path}") 224 | -------------------------------------------------------------------------------- /style_transfer/README.md: -------------------------------------------------------------------------------- 1 | # Fritz Style Transfer 2 | Code for training artistic style transfer models with Keras and converting them to Core ML. 3 | 4 | 5 | 6 | Left: Original image. Middle: Image stylzed with a 17kb small model. Right: Image stylzed by the default large model. 7 | 8 | # Add style transfer to your app in minutes with Fritz 9 | 10 | If you're looking to add style transfer to your app quickly, check out [Fritz](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer). The Fritz SDK provides 11 pre-trained style transfer models along with all the code you need to apply them images or live video. If you want to train your own model, keep reading. 11 | 12 | # 11-21-2018: Train your own custom style model in 20 minutes 13 | 14 | You can now train your own personal style transfer model in about 20 minutes using Fritz Style Transfer and Google Colab. Just create your own playground from [this notebook](https://colab.research.google.com/drive/1nDkxLKBgZGFscGoF0tfyPMGqW03xITl0#scrollTo=L9aTwLIqtFTE) to get started. You can read more about how it works [here](https://heartbeat.fritz.ai/20-minute-masterpiece-4b6043fdfff5?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer). 15 | 16 | # Installation 17 | 18 | If you're not installing using a package manager like `pip`, make sure the root directory is on your `PYTHONPATH`: 19 | 20 | ``` 21 | export PYTHONPATH=$PYTHONPATH:`pwd` 22 | ``` 23 | 24 | # Preprocessing Training Data 25 | The training data comes from the [COCO Training data set](http://cocodataset.org/). It consists of ~80,000 images and labels, although the labels arent used here. 26 | 27 | the `create_training_dataset.py` script will download and unzip this data then process images to create an h5 dataset used by the style transfer network trainer. You can run this with the command below. Note the first time you run this you will need to download and unzip 13GB worth of data and it can take a while. The command only processes the first 10 images to make sure things are working, but you can modify `--num-images` to process more. 28 | 29 | ``` 30 | python create_training_dataset.py \ 31 | --output example/training_images.tfrecord \ 32 | --image-dir path/to/coco/ \ 33 | --num-images 10 34 | ``` 35 | 36 | Note that if you have already downloaded and extracted a set of images to use for training, that directory needs to be called `train2014/` and you need to point `--coco-image-dir` to the parent directory that contains that folder. Otherwise you can use the `--download` flag. 37 | 38 | # Training a Style Transfer Model 39 | 40 | To train the model from scratch for 100 iterations: 41 | 42 | ``` 43 | python style_transfer/train.py \ 44 | --training-image-dset example/training_images.tfrecord \ 45 | --style-images example/starry_night.jpg \ 46 | --model-checkpoint example/starry_night.h5 \ 47 | --image-size 256,256 \ 48 | --alpha 0.25 \ 49 | --log-interval 1 \ 50 | --num-iterations 10 51 | ``` 52 | 53 | If everything looks good, we can pick up where we left off and keep training the same model. 54 | 55 | ``` 56 | python style_transfer/train.py \ 57 | --training-image-dset example/training_images.tfrecord \ 58 | --style-images example/starry_night.jpg \ 59 | --model-checkpoint example/starry_night.h5 \ 60 | --image-size 256,256 \ 61 | --alpha 0.25 \ 62 | --num-iterations 1000 \ 63 | --fine-tune-checkpoint example/starry_night.h5 64 | ``` 65 | 66 | If you're using the full COCO dataset, you'll need around 20,000 iterations to train a model from scratch with a batch size of 24. If you're starting from a pre-trained model checkpoint, 5,000 steps should work. A model pre-trained on Starry Night is provided in the `example/` folder. 67 | 68 | For styles that are abstract with strong geometric patters, try higher values for `--content-weight` like `3` or `10`. For styles that are more photo-realistic images with smaller details, boost the `--style-weight` to `0.001` or more. 69 | 70 | Finally, note that for training, we resize images to be 256x256px. This is for training only. Final models can be set to take images of any size. 71 | 72 | ## Training models for mobile 73 | 74 | By default, the style transfer networks produced here are roughly 7mb in size and contain 7 million parameters. They can create a stylized image in ~500ms on high end mobile phones, and 5s on lower end phones. To make the model's faster, we've included a width-multiplier parameter similar to the one introduced by Google in their MobileNet architecture. The value `alpha` can be set between 0 and 1 and will control how many filters are included in each layer. Lower `alpha` means fewer filters, fewer parameters, faster models, with slightly worse style transfer abilities. In testing, `alpha=0.25` produced models that ran at 17fps on an iPhone X, while still transfering styles well. 75 | 76 | Finally, for models that are intended to be used in real-time on a CPU only, you can use the `--use-small-network` flag to train a model architecture that has been heavily pruned. The style transfer itself isn't quite as good, but the results are usable and the models are incredible small. 77 | 78 | # Stylizing Images 79 | To stylize an image with a trained model you can run: 80 | 81 | ``` 82 | python stylize_image.py \ 83 | --input-image example/dog.jpg \ 84 | --output-image example/stylized_dog.jpg \ 85 | --model-checkpoint example/starry_night_256x256_025.h5 86 | ``` 87 | 88 | # Convert to Mobile 89 | Style transfer models can be converted to both Core ML and TensorFlow Mobile formats. 90 | 91 | ## Convert to Core ML 92 | Use the converter script to convert to Core ML. 93 | 94 | This converter is a slight modification of Apple's keras converter that allows 95 | the user to define custom conversions between Keras layers and core ml layers. This allows us to convert the Instance Normalization and Deprocessing layers. 96 | 97 | ``` 98 | python convert_to_coreml.py \ 99 | --keras-checkpoint example/starry_night_256x256_025.h5 \ 100 | --alpha 0.25 \ 101 | --image-size 640,480 \ 102 | --coreml-model example/starry_night_640x480_025.mlmodel 103 | ``` 104 | 105 | ## Convert to TensorFlow Mobile 106 | Models cannot be converted to TFLite because some operations are not supported, but TensorFlow Mobile works fine. To convert your model to an optimized frozen graph, run: 107 | 108 | ``` 109 | python convert_to_tfmobile.py \ 110 | --keras-checkpoint example/starry_night_256x256_025.h5 \ 111 | --alpha 0.25 \ 112 | --image-size 640,480 \ 113 | --output-dir example/ 114 | ``` 115 | 116 | This produces a number of TensorFlow graph formats. The `*_optimized.pb` graph file is the one you want to use with your app. Note that the input node name is `input_1` and the output node name is `deprocess_stylized_image_1/mul`. 117 | 118 | # Train on Google Cloud ML 119 | 120 | This library is designed to work with certain configurations on Google Cloud ML so you can train styles in parallel and take advantage GPUs. Assuming you have Google Cloud ML and Google Cloud Storage set up, the following commands will get you training new models in just a few hours. 121 | 122 | ## Set up your Google Cloud Storage bucket. 123 | 124 | This repo assumes the structure on Google Cloud is 125 | 126 | ``` 127 | gs://${YOUR_GCS_BUCKET}/ 128 | |-- data/ 129 | |-- training_images.tfrecord 130 | |-- starry_night_256x256_025.h5 131 | |-- style_images/ 132 | |-- style_1.jpg 133 | |-- style_2.jpg 134 | |-- dist/ 135 | |-- fritz_style_transfer.zip 136 | |-- train/ 137 | |-- pretrained_model.h5 138 | |-- output_model.h5 139 | ``` 140 | 141 | To make things easier, start by setting some environmental variables. 142 | 143 | ``` 144 | export YOUR_GCS_BUCKET=your_gcs_bucket 145 | export FRITZ_STYLE_TRANSFER_PATH=/path/to/fritz-models/style_transfer/ 146 | export KERAS_CONTRIB_PATH=/path/to/keras-contrib 147 | export STYLE_NAME=style_name 148 | ``` 149 | 150 | Note that `STYLE_NAME` should be the filename of the style image (without the extension). 151 | 152 | Create the GCS bucket if you haven't already: 153 | 154 | ``` 155 | gsutil mb gs://${YOUR_GCS_BUCKET} 156 | ``` 157 | 158 | Copy training data to GCS, pre-trained checkpoints, and style image to: 159 | ``` 160 | gsutil cp example/training_images.tfrecord gs://${YOUR_GCS_BUCKET}/data 161 | gsutil cp example/${STYLE_NAME}.jpg gs://${YOUR_GCS_BUCKET}/data/style_images/ 162 | gsutil cp example/starry_night_256x256_025.h5 gs://${YOUR_GCS_BUCKET}/data/ 163 | ``` 164 | 165 | ## Package up libraries. 166 | 167 | Zip up all of the local files to send up to Google Cloud. 168 | ``` 169 | python setup.py sdist 170 | ``` 171 | 172 | Zip up keras_contrib so it's available to the library as well. 173 | ``` 174 | pushd ${KERAS_CONTRIB_PATH} 175 | python setup.py sdist 176 | cp dist/* ${FRITZ_STYLE_TRANSFER_PATH}/dist/ 177 | popd 178 | ``` 179 | 180 | ## Start the training job 181 | 182 | The following command will start training a new style transfer models from a pre-trained checkpoint. This configuration trains on 256x256 images and has `--alpha=0.25` making it suitable for real-time use in mobile apps. 183 | 184 | ``` 185 | gcloud ml-engine jobs submit training `whoami`_style_transfer`date +%s` \ 186 | --runtime-version 1.8 \ 187 | --job-dir=gs://${YOUR_GCS_BUCKET} \ 188 | --packages dist/style_transfer-1.0.tar.gz,dist/keras_contrib-2.0.8.tar.gz \ 189 | --module-name style_transfer.train \ 190 | --region us-east1 \ 191 | --scale-tier basic_gpu \ 192 | -- \ 193 | --training-image-dset gs://${YOUR_GCS_BUCKET}/data/test_training_images.tfrecord \ 194 | --style-images gs://${YOUR_GCS_BUCKET}/data/style_images/${STYLE_NAME}.jpg \ 195 | --model-checkpoint ${STYLE_NAME}_256x256_025.h5 \ 196 | --image-size 256,256 \ 197 | --alpha 0.25 \ 198 | --num-iterations 5000 \ 199 | --batch-size 24 \ 200 | --content-weight 1 \ 201 | --style-weight .0001 \ 202 | --gcs-bucket gs://${YOUR_GCS_BUCKET}/train \ 203 | --fine-tune-checkpoint gs://${YOUR_GCS_BUCKET}/data/starry_night_256x256_025.h5 204 | ``` 205 | 206 | Distributed training and TPUs are not yet supported. 207 | 208 | # Add the model to your app with Fritz 209 | 210 | Now that you have a style transfer model that works for both iOS and Android, head over to [https://fritz.ai](https://fritz.ai/?utm_source=github&utm_campaign=fritz-models&utm_content=style-transfer) for tools to help you integrate it into your app and manage it over time. 211 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/models.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import keras_contrib 3 | import logging 4 | 5 | from style_transfer import layers 6 | from style_transfer import utils 7 | 8 | logger = logging.getLogger('models') 9 | 10 | 11 | class StyleTransferNetwork(object): 12 | """A class that builds a Keras model to perform style transfer. 13 | 14 | The architecture for this model comes from Johnson et al: 15 | https://arxiv.org/abs/1603.08155 16 | https://cs.stanford.edu/people/jcjohns/papers/fast-style/fast-style-supp.pdf 17 | 18 | It differs slightly from Johnson's model by swapping reflective 19 | padding with Zero Padding and Batch Normalization for 20 | Instance Normalization as recommended in Ulyanov et al: 21 | https://arxiv.org/abs/1607.08022 22 | """ 23 | 24 | @classmethod 25 | def build( 26 | cls, 27 | image_size, 28 | alpha=1.0, 29 | input_tensor=None, 30 | checkpoint_file=None): 31 | """Build a Transfer Network Model using keras' functional API. 32 | 33 | Args: 34 | image_size - the size of the input and output image (H, W) 35 | alpha - a width parameter to scale the number of channels by 36 | 37 | Returns: 38 | model: a keras model object 39 | """ 40 | x = keras.layers.Input( 41 | shape=(image_size[0], image_size[1], 3), tensor=input_tensor) 42 | out = cls._convolution(x, int(alpha * 32), 9, strides=1) 43 | out = cls._convolution(out, int(alpha * 64), 3, strides=2) 44 | out = cls._convolution(out, int(alpha * 128), 3, strides=2) 45 | out = cls._residual_block(out, int(alpha * 128)) 46 | out = cls._residual_block(out, int(alpha * 128)) 47 | out = cls._residual_block(out, int(alpha * 128)) 48 | out = cls._residual_block(out, int(alpha * 128)) 49 | out = cls._residual_block(out, int(alpha * 128)) 50 | out = cls._upsample(out, int(alpha * 64), 3) 51 | out = cls._upsample(out, int(alpha * 32), 3) 52 | # Add a layer of padding to keep sizes consistent. 53 | # out = keras.layers.ZeroPadding2D(padding=(1, 1))(out) 54 | out = cls._convolution(out, 3, 9, relu=False, padding='same') 55 | # Restrict outputs of pixel values to -1 and 1. 56 | out = keras.layers.Activation('tanh')(out) 57 | # Deprocess the image into valid image data. Note we'll need to define 58 | # a custom layer for this in Core ML as well. 59 | out = layers.DeprocessStylizedImage()(out) 60 | model = keras.models.Model(inputs=x, outputs=out) 61 | 62 | # Optionally load weights from a checkpoint 63 | if checkpoint_file: 64 | logger.info( 65 | 'Loading weights from checkpoint: %s' % checkpoint_file 66 | ) 67 | if checkpoint_file.startswith('gs://'): 68 | checkpoint_file = utils.copy_file_from_gcs(checkpoint_file) 69 | model.load_weights(checkpoint_file, by_name=True) 70 | return model 71 | 72 | @classmethod 73 | def _convolution( 74 | cls, x, n_filters, kernel_size, strides=1, 75 | padding='same', relu=True, use_bias=False): 76 | """Create a convolution block. 77 | 78 | This block consists of a convolution layer, normalization, and an 79 | optional RELU activation. 80 | 81 | Args: 82 | x - a keras layer as input 83 | n_filters - the number of output dimensions 84 | kernel_size - an integer or tuple specifying the (width, height) of 85 | the 2D convolution window 86 | strides - An integer or tuple/list of 2 integers, specifying the 87 | strides of the convolution along the width and height. 88 | Default 1. 89 | padding: one of "valid" or "same" (case-insensitive). 90 | relu - a bool specifying whether or not a RELU activation is 91 | applied. Default True. 92 | use_bias = a bool specifying whether or not to use a bias term 93 | """ 94 | out = keras.layers.convolutional.Conv2D( 95 | n_filters, 96 | kernel_size, 97 | strides=strides, 98 | padding=padding, 99 | use_bias=use_bias 100 | )(x) 101 | 102 | # We are using the keras-contrib library from @farizrahman4u for 103 | # an implementation of Instance Normalization. Note here that we are 104 | # specifying the normalization axis to be -1, or the channel axis. 105 | # By default this is None and simple Batch Normalization is applied. 106 | out = keras_contrib.layers.normalization.InstanceNormalization( 107 | axis=-1)(out) 108 | if relu: 109 | out = keras.layers.Activation('relu')(out) 110 | return out 111 | 112 | @classmethod 113 | def _residual_block(cls, x, n_filters, kernel_size=3): 114 | """Construct a residual block. 115 | 116 | Args: 117 | x - a keras layer as input 118 | n_filters - the number of output dimensions 119 | kernel_size - an integer or tuple specifying the (width, height) of 120 | the 2D convolution window. Default 3. 121 | Returns: 122 | out - a keras layer as output 123 | """ 124 | # Make sure the layer has the proper size and store a copy of the 125 | # original, cropped input layer. 126 | # identity = keras.layers.Cropping2D(cropping=((2, 2), (2, 2)))(x) 127 | 128 | out = cls._convolution(x, n_filters, kernel_size, padding='same') 129 | out = cls._convolution( 130 | out, n_filters, kernel_size, padding='same', relu=False 131 | ) 132 | out = keras.layers.Add()([out, x]) 133 | return out 134 | 135 | @classmethod 136 | def _upsample(cls, x, n_filters, kernel_size, size=2): 137 | """Construct an upsample block. 138 | 139 | Args: 140 | x - a keras layer as input 141 | n_filters - the number of output dimensions 142 | kernel_size - an integer or tuple specifying the (width, height) of 143 | the 2D convolution window. Default 3. 144 | Returns: 145 | out - a keras layer as output 146 | """ 147 | out = keras.layers.UpSampling2D(size=size)(x) 148 | # out = keras.layers.ZeroPadding2D(padding=(2, 2))(out) 149 | out = cls._convolution(out, n_filters, kernel_size, padding='same') 150 | return out 151 | 152 | 153 | class SmallStyleTransferNetwork(StyleTransferNetwork): 154 | 155 | @classmethod 156 | def build(cls, image_size, alpha=1.0, input_tensor=None, checkpoint_file=None): 157 | """Build a Smaller Transfer Network Model using keras' functional API. 158 | 159 | This architecture removes some blocks of layers and reduces the size 160 | of convolutions to save on computation. 161 | 162 | Args: 163 | image_size - the size of the input and output image (H, W) 164 | alpha - a width parameter to scale the number of channels by 165 | 166 | Returns: 167 | model: a keras model object 168 | """ 169 | x = keras.layers.Input( 170 | shape=(image_size[0], image_size[1], 3), tensor=input_tensor) 171 | out = cls._convolution(x, int(alpha * 32), 9, strides=1) 172 | out = cls._convolution(out, int(alpha * 32), 3, strides=2) 173 | out = cls._convolution(out, int(alpha * 32), 3, strides=2) 174 | out = cls._residual_block(out, int(alpha * 32)) 175 | out = cls._residual_block(out, int(alpha * 32)) 176 | out = cls._residual_block(out, int(alpha * 32)) 177 | out = cls._upsample(out, int(alpha * 32), 3) 178 | out = cls._upsample(out, int(alpha * 32), 3) 179 | out = cls._convolution(out, 3, 9, relu=False, padding='same') 180 | # Restrict outputs of pixel values to -1 and 1. 181 | out = keras.layers.Activation('tanh')(out) 182 | # Deprocess the image into valid image data. Note we'll need to define 183 | # a custom layer for this in Core ML as well. 184 | out = layers.DeprocessStylizedImage()(out) 185 | model = keras.models.Model(inputs=x, outputs=out) 186 | 187 | # Optionally load weights from a checkpoint 188 | if checkpoint_file: 189 | logger.info( 190 | 'Loading weights from checkpoint: %s' % checkpoint_file 191 | ) 192 | if checkpoint_file.startswith('gs://'): 193 | checkpoint_file = utils.copy_file_from_gcs(checkpoint_file) 194 | model.load_weights(checkpoint_file, by_name=True) 195 | return model 196 | 197 | 198 | class IntermediateVGG(object): 199 | """A VGG network class that allows easy access to intermediate layers. 200 | 201 | This class takes the default VGG16 application packaged with Keras and 202 | constructs a dictionary mapping layer names to layout puts so that 203 | we can easily extract the network's features at any level. These outputs 204 | are used to compute losses in artistic style transfer. 205 | 206 | """ 207 | 208 | def __init__(self, prev_layer=None, input_tensor=None): 209 | """Initialize the model. 210 | 211 | Args: 212 | prev_layer - a keras layer to use as an input layer to the 213 | VGG model. This allows us to stitch other models 214 | together with the VGG. 215 | input_tensor - a tensor that will be used as input for the 216 | VGG. 217 | """ 218 | # Create the Keras VGG Model 219 | self.model = keras.applications.vgg16.VGG16( 220 | weights='imagenet', 221 | include_top=False, 222 | input_tensor=input_tensor 223 | ) 224 | 225 | # Make sure none of the VGG layers are trainable 226 | for layer in self.model.layers: 227 | layer.trainable = False 228 | 229 | # if a previous layer is specified, stitch that layer to the 230 | # input of the VGG model and rewire the entire model. 231 | self.layers = {} 232 | if prev_layer is not None: 233 | # We need to apply all layers to the output of the style net 234 | in_layer = prev_layer 235 | for layer in self.model.layers[1:]: # Ignore the input layer 236 | in_layer = layer(in_layer) 237 | self.layers[layer.name] = in_layer 238 | else: 239 | self.layers = dict( 240 | [(layer.name, layer.output) for layer in self.model.layers] 241 | ) 242 | -------------------------------------------------------------------------------- /image_segmentation/create_tfrecord_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import argparse 17 | from functools import partial 18 | import logging 19 | import os 20 | import io 21 | import numpy 22 | import sys 23 | 24 | import PIL.Image 25 | import tensorflow as tf 26 | from tensorflow.python.lib.io import file_io 27 | 28 | from image_segmentation import build_data 29 | 30 | logging.basicConfig(level=logging.INFO) 31 | logger = logging.getLogger('create_tfrecord_dataset') 32 | 33 | 34 | def main(argv): 35 | parser = argparse.ArgumentParser( 36 | description='Convert the ADE20K Challenge dataset to tfrecords' 37 | ) 38 | 39 | parser.add_argument( 40 | '-i', '--image-dir', type=str, required=True, 41 | help='Folder containing trainng images' 42 | ) 43 | parser.add_argument( 44 | '-a', '--annotation-dir', type=str, required=True, 45 | help='Folder containing annotations for training images' 46 | ) 47 | parser.add_argument( 48 | '-o', '--output', type=str, required=True, 49 | help='Path to save converted tfrecord of Tensorflow example' 50 | ) 51 | parser.add_argument( 52 | '-l', '--label-filename', type=str, required=True, 53 | help='A file containing a single label per line.' 54 | ) 55 | parser.add_argument( 56 | '-w', '--whitelist-labels', type=str, 57 | help=('A pipe | separated list of object labels to whitelist. ' 58 | 'categories can be merged by seperating them by : ' 59 | 'e.g. "person|car:truck:van|pavement". To see a' 60 | ' full list of allowed labels run with --list-labels.') 61 | ) 62 | parser.add_argument( 63 | '-t', '--whitelist-threshold', type=float, default=0.7, 64 | help=('The fraction of whitelisted labels an image must contain to be ' 65 | 'used for training.') 66 | ) 67 | parser.add_argument( 68 | '--list-labels', action='store_true', 69 | help='If true, print a full list of object labels.' 70 | ) 71 | 72 | args = parser.parse_args(argv) 73 | 74 | # Load the class labels 75 | class_labels = _load_class_labels(args.label_filename) 76 | n_classes = len(class_labels) 77 | if args.list_labels: 78 | logger.info('Labels:') 79 | labels = '' 80 | for label in class_labels: 81 | labels += '%d, %s\n' % label 82 | logger.info(labels) 83 | sys.exit() 84 | 85 | # If a whitelist is provided, get a list of mask indices that correspond 86 | # to allowed labels 87 | whitelist_labels = None 88 | whitelist_indices = None 89 | if args.whitelist_labels: 90 | whitelist_labels = _parse_whitelist_labels(args.whitelist_labels) 91 | 92 | # add a 'none' class with a label of 0 93 | whitelist_labels.insert(0, ['none']) 94 | whitelist_indices = _find_whitelist_indices( 95 | class_labels, whitelist_labels) 96 | 97 | whitelist_filename = os.path.join( 98 | os.path.dirname(args.output), 'labels.txt') 99 | _save_whitelist_labels(whitelist_filename, whitelist_labels) 100 | n_classes = len(whitelist_labels) 101 | 102 | _create_tfrecord_dataset( 103 | args.image_dir, 104 | args.annotation_dir, 105 | args.output, 106 | n_classes, 107 | whitelist_indices=whitelist_indices, 108 | whitelist_threshold=args.whitelist_threshold 109 | ) 110 | 111 | 112 | def _parse_whitelist_labels(whitelist): 113 | parsed = whitelist.split('|') 114 | parsed = [category.split(':') for category in parsed] 115 | return parsed 116 | 117 | 118 | def _save_whitelist_labels(whitelist_filename, labels): 119 | with open(whitelist_filename, 'w') as wfid: 120 | header = 'idx\tlabel\n' 121 | wfid.write(header) 122 | for idx, label_set in enumerate(labels): 123 | label = label_set[0].split(',')[0] 124 | wfid.write('%d\t%s\n' % (idx, label)) 125 | print("Saved") 126 | 127 | 128 | def _load_class_labels(label_filename): 129 | """Load class labels. 130 | 131 | Assumes the data directory is left unchanged from the original zip. 132 | 133 | Args: 134 | root_directory (str): the dataset's root directory 135 | 136 | Returns: 137 | List[(int, str)]: a list of class ids and labels 138 | """ 139 | class_labels = [] 140 | header = True 141 | with file_io.FileIO(label_filename, mode='r') as file: 142 | for line in file.readlines(): 143 | if header: 144 | class_labels.append((0, 'none')) 145 | header = False 146 | continue 147 | line = line.rstrip() 148 | line = line.split('\t') 149 | label = line[-1] 150 | label_id = int(line[0]) 151 | class_labels.append((label_id, label)) 152 | return class_labels 153 | 154 | 155 | def _find_whitelist_indices(class_labels, whitelist_labels): 156 | """Map whitelist labels to indices. 157 | 158 | Args: 159 | whitelist (List[str]): a list of whitelisted labels 160 | 161 | Returns: 162 | List[Set]: a list of sets containing index labels 163 | """ 164 | index = [] 165 | for label_set in whitelist_labels: 166 | index_set = [] 167 | for label in label_set: 168 | for class_id, class_label in class_labels: 169 | if label == class_label: 170 | index_set.append(class_id) 171 | index.append(index_set) 172 | return index 173 | 174 | 175 | def _filter_whitelabel_classes( 176 | filenames, 177 | whitelist, 178 | whitelist_threshold, 179 | whitelist_size=None): 180 | w_size = whitelist_size or len(whitelist) 181 | mask = numpy.array(PIL.Image.open(filenames[-1])) 182 | unique_classes = numpy.unique(mask) 183 | num_found = numpy.intersect1d(unique_classes, whitelist).size 184 | if float(num_found) / w_size >= whitelist_threshold: 185 | return True 186 | return False 187 | 188 | 189 | def _relabel_mask(seg_data, whitelist_indices): 190 | # Read the data into a numpy array. 191 | mask = numpy.array(PIL.Image.open(io.BytesIO(seg_data))) 192 | # Relabel each pixel 193 | new_mask = numpy.zeros(mask.shape) 194 | for new_label, old_label_set in enumerate(whitelist_indices): 195 | idx = numpy.where(numpy.isin(mask, old_label_set)) 196 | new_mask[idx] = new_label 197 | # Convert the new mask back to an image. 198 | seg_img = PIL.Image.fromarray(new_mask.astype('uint8')).convert('RGB') 199 | # Save the new image to a PNG byte string. 200 | byte_buffer = io.BytesIO() 201 | seg_img.save(byte_buffer, format='png') 202 | byte_buffer.seek(0) 203 | return byte_buffer.read() 204 | 205 | 206 | def _create_tfrecord_dataset( 207 | image_dir, 208 | segmentation_mask_dir, 209 | output_filename, 210 | n_classes, 211 | whitelist_indices=None, 212 | whitelist_threshold=0.5): 213 | """Convert the ADE20k dataset into into tfrecord format. 214 | 215 | Args: 216 | dataset_split: Dataset split (e.g., train, val). 217 | dataset_dir: Dir in which the dataset locates. 218 | dataset_label_dir: Dir in which the annotations locates. 219 | Raises: 220 | RuntimeError: If loaded image and label have different shape. 221 | """ 222 | # Get all of the image and segmentation mask file names 223 | img_names = tf.gfile.Glob(os.path.join(image_dir, '*.jpg')) 224 | seg_names = [] 225 | for f in img_names: 226 | # get the filename without the extension 227 | basename = os.path.basename(f).split('.')[0] 228 | # cover its corresponding *_seg.png 229 | seg = os.path.join(segmentation_mask_dir, basename + '.png') 230 | seg_names.append(seg) 231 | 232 | # If a whitelist has been provided, loop over all of the segmentation 233 | # masks and find only the images that contain enough classes. 234 | kept_files = zip(img_names, seg_names) 235 | if whitelist_indices is not None: 236 | # Flatten the whitelist because some categories have been merged 237 | # but make sure to use the orginal list size when 238 | # computing the threshold. 239 | flat_whitelist = numpy.array( 240 | [idx for idx_set in whitelist_indices for idx in idx_set] 241 | ).astype('uint8') 242 | merged_whitelist_size = len(whitelist_indices) 243 | filter_fn = partial( 244 | _filter_whitelabel_classes, 245 | whitelist=flat_whitelist, 246 | whitelist_threshold=whitelist_threshold, 247 | whitelist_size=merged_whitelist_size 248 | ) 249 | kept_files = list(filter(filter_fn, kept_files)) 250 | logger.info( 251 | 'Found %d images after whitelist filtereing.' % len(kept_files)) 252 | num_images = len(kept_files) 253 | image_reader = build_data.ImageReader('jpeg', channels=3) 254 | label_reader = build_data.ImageReader('png', channels=1) 255 | 256 | with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: 257 | for idx, (image_filename, seg_filename) in enumerate(kept_files): 258 | if idx % 100 == 0: 259 | logger.info('Converting image %d of %d.' % (idx, num_images)) 260 | # Read the image. 261 | image_data = tf.gfile.FastGFile(image_filename, 'rb').read() 262 | height, width = image_reader.read_image_dims(image_data) 263 | # Read the semantic segmentation annotation. 264 | seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read() 265 | # If there is a whitelist, we need to relabel all of the 266 | # mask classes so that only the whitelisted labels are present. 267 | if whitelist_indices is not None: 268 | seg_data = _relabel_mask(seg_data, whitelist_indices) 269 | seg_height, seg_width = label_reader.read_image_dims(seg_data) 270 | if height != seg_height or width != seg_width: 271 | raise RuntimeError( 272 | 'Shape mismatched between image and label.') 273 | # Convert to tf example. 274 | example = build_data.image_seg_to_tfexample( 275 | image_data, image_filename, height, width, seg_data) 276 | tfrecord_writer.write(example.SerializeToString()) 277 | 278 | 279 | if __name__ == '__main__': 280 | main(sys.argv[1:]) 281 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/data_generator.py: -------------------------------------------------------------------------------- 1 | """Summary. 2 | 3 | Attributes: 4 | logger (TYPE): Description 5 | """ 6 | import logging 7 | 8 | import numpy 9 | import tensorflow as tf 10 | from tensorflow.python.lib.io import file_io 11 | 12 | logger = logging.getLogger('data_generator') 13 | 14 | 15 | def _gaussian_kernel_3d(sigma, channels=3, size=4.0): 16 | radius = sigma * size / 2.0 + 0.5 17 | gauss = tf.distributions.Normal(0., sigma) 18 | kernel_1d = gauss.prob( 19 | tf.range(-radius[0], radius[0] + 1.0, dtype=tf.float32) 20 | ) 21 | kernel_2d = tf.sqrt(tf.einsum('i,j->ij', kernel_1d, kernel_1d)) 22 | kernel_2d = kernel_2d / tf.reduce_sum(kernel_2d) 23 | kernel = tf.expand_dims(kernel_2d, -1) 24 | kernel = tf.expand_dims(kernel, -1) 25 | kernel = tf.tile(kernel, [1, 1, channels, 1]) 26 | return kernel 27 | 28 | 29 | class ADE20KDatasetBuilder(object): 30 | """Create a TFRecord dataset from the ADE20K data.""" 31 | 32 | # Scale and bias parameters to pre-process images so pixel values are 33 | # between -0.5 and 0.5 34 | _PREPROCESS_IMAGE_SCALE = 1.0 / 255.0 35 | _PREPROCESS_CHANNEL_BIAS = -0.5 36 | 37 | @staticmethod 38 | def load_class_labels(label_filename): 39 | """Load class labels. 40 | 41 | Assumes the data directory is left unchanged from the original zip. 42 | 43 | Args: 44 | root_directory (str): the dataset's root directory 45 | 46 | Returns: 47 | arr: an array of class labels 48 | """ 49 | class_labels = [] 50 | header = True 51 | with file_io.FileIO(label_filename, mode='r') as file: 52 | for line in file.readlines(): 53 | if header: 54 | header = False 55 | continue 56 | line = line.rstrip() 57 | label = line.split('\t')[-1] 58 | class_labels.append(label) 59 | return numpy.array(class_labels) 60 | 61 | @staticmethod 62 | def _resize_fn(images, image_size): 63 | """Resize an input images.. 64 | 65 | Args: 66 | images (tf.tensor): a tensor of input images 67 | image_size ((int, int)): a size (H,W) to resize to 68 | 69 | Returns: 70 | tf.tensor: a resized image tensor 71 | """ 72 | return tf.image.resize_images( 73 | images, 74 | image_size, 75 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR 76 | ) 77 | 78 | @classmethod 79 | def _preprocess_example(cls, example): 80 | """Preprocess an image. 81 | 82 | Args: 83 | example (dict): a single example from the dataset 84 | 85 | Return: 86 | (dict) processed example from the dataset 87 | """ 88 | example['image'] = (tf.cast(example['image'], tf.float32) * 89 | cls._PREPROCESS_IMAGE_SCALE + 90 | cls._PREPROCESS_CHANNEL_BIAS) 91 | return example 92 | 93 | @classmethod 94 | def _resize_example(cls, example, image_size): 95 | """Resize an image and mask from. 96 | 97 | Args: 98 | example (dict): a single example from the dataset. 99 | image_size ((int, int)): the desired size of image and mask 100 | 101 | Returns: 102 | (dict) a single example resized 103 | """ 104 | return {'image': cls._resize_fn(example['image'], image_size), 105 | 'mask': cls._resize_fn(example['mask'], image_size)} 106 | 107 | @staticmethod 108 | def _crop_and_resize(image, zoom, image_size): 109 | """Crop and resize an image. 110 | 111 | Uses center cropping. 112 | 113 | Args: 114 | image (tensor): an input image tensor 115 | zoom (float): a zoom factor 116 | image_size ((int, int)): a desired output image size 117 | 118 | Returns: 119 | tensor: an outpu timage tensor 120 | """ 121 | x1 = y1 = 0.5 - 0.5 * zoom # scale centrally 122 | x2 = y2 = 0.5 + 0.5 * zoom 123 | boxes = tf.stack([y1, x1, y2, x2], axis=1) 124 | box_ind = [0] 125 | return tf.cast(tf.squeeze( 126 | tf.image.crop_and_resize( 127 | tf.expand_dims(image, 0), 128 | boxes, 129 | box_ind, 130 | image_size, 131 | method='nearest' 132 | ) 133 | ), tf.uint8) 134 | 135 | @staticmethod 136 | def _blur(image, sigma): 137 | kernel = _gaussian_kernel_3d(sigma) 138 | # all preprocessing should run on the CPU 139 | with tf.device('/cpu:0'): 140 | blurred_image = tf.nn.depthwise_conv2d( 141 | tf.cast(tf.expand_dims(image, 0), tf.float32), 142 | kernel, 143 | [1, 1, 1, 1], 144 | padding='SAME', 145 | data_format="NHWC" 146 | ) 147 | return blurred_image[0] 148 | 149 | @classmethod 150 | def _augment_example(cls, example): 151 | """Augment an example from the dataset. 152 | 153 | All augmentation functions are also be applied to the segmentation 154 | mask. 155 | 156 | Args: 157 | example (dict): a single example from the dataset. 158 | 159 | Returns: 160 | dict: an augmented example 161 | """ 162 | image = example['image'] 163 | mask = example['mask'] 164 | 165 | image_size = image.shape.as_list()[0:2] 166 | 167 | # Add padding so we don't get black borders 168 | paddings = numpy.array( 169 | [[image_size[0] / 2, image_size[0] / 2], 170 | [image_size[1] / 2, image_size[1] / 2], 171 | [0, 0]], dtype=numpy.uint32) 172 | aug_image = tf.pad(image, paddings, mode='REFLECT') 173 | aug_mask = tf.pad(mask, paddings, mode='REFLECT') 174 | padded_image_size = [dim * 2 for dim in image_size] 175 | 176 | # Rotate 177 | angle = tf.random_uniform([1], -numpy.pi / 6, numpy.pi / 6) 178 | aug_image = tf.contrib.image.rotate(aug_image, angle) 179 | aug_mask = tf.contrib.image.rotate(aug_mask, angle) 180 | 181 | # Zoom 182 | zoom = tf.random_uniform([1], 0.85, 1.75) 183 | aug_image = cls._crop_and_resize(aug_image, zoom, padded_image_size) 184 | aug_mask = cls._crop_and_resize(aug_mask, zoom, padded_image_size) 185 | 186 | # Crop things back to original size 187 | aug_image = tf.image.central_crop(aug_image, central_fraction=0.5) 188 | aug_mask = tf.image.central_crop(aug_mask, central_fraction=0.5) 189 | 190 | # blur 191 | # Not used at the moment because it makes training hard 192 | # sigma = tf.random_uniform([1], 0.0, 1.0) 193 | # aug_image = cls._blur(aug_image, sigma) 194 | 195 | # Flip left right 196 | do_flip = tf.greater(tf.random_uniform([1], 0.0, 1.0)[0], 0.5) 197 | aug_image = tf.cond( 198 | do_flip, 199 | true_fn=lambda: tf.image.flip_left_right(aug_image), 200 | false_fn=lambda: aug_image, 201 | ) 202 | aug_mask = tf.cond( 203 | do_flip, 204 | true_fn=lambda: tf.image.flip_left_right(aug_mask), 205 | false_fn=lambda: aug_mask, 206 | ) 207 | 208 | # Flip up down 209 | do_flip = tf.greater(tf.random_uniform([1], 0.0, 1.0)[0], 0.5) 210 | aug_image = tf.cond( 211 | do_flip, 212 | true_fn=lambda: tf.image.flip_up_down(aug_image), 213 | false_fn=lambda: aug_image, 214 | ) 215 | aug_mask = tf.cond( 216 | do_flip, 217 | true_fn=lambda: tf.image.flip_up_down(aug_mask), 218 | false_fn=lambda: aug_mask, 219 | ) 220 | 221 | return {'image': aug_image, 'mask': aug_mask} 222 | 223 | @staticmethod 224 | def _decode_example(example_proto): 225 | """Decode an example from a TFRecord. 226 | 227 | Args: 228 | example_proto (tfrecord): a serialized tf record 229 | 230 | Returns: 231 | dict: an example from the dataset containing image and mask. 232 | """ 233 | features = { 234 | "image/encoded": tf.FixedLenFeature( 235 | (), tf.string, default_value="" 236 | ), 237 | "image/segmentation/class/encoded": tf.FixedLenFeature( 238 | (), tf.string, default_value="" 239 | ) 240 | } 241 | parsed_features = tf.parse_single_example(example_proto, features) 242 | image = tf.image.decode_jpeg( 243 | parsed_features["image/encoded"], channels=3) 244 | mask = tf.image.decode_png( 245 | parsed_features["image/segmentation/class/encoded"], channels=3) 246 | return {'image': image, 'mask': mask} 247 | 248 | @classmethod 249 | def _generate_multiscale_masks(cls, example, n_classes): 250 | """Generate masks at mulitple scales for training. 251 | 252 | The loss function compares masks at 4, 8, and 16x increases in scale. 253 | 254 | Args: 255 | example (dict): a single example from the dataset 256 | n_classes (int): the number of classes in the mask 257 | 258 | Returns 259 | (dict): the same example, but with additional mask data for each 260 | new resolution. 261 | """ 262 | original_mask = example['mask'] 263 | # Add the image to the placeholder 264 | image_size = example['image'].shape.as_list()[0:2] 265 | 266 | for scale in [4, 8, 16]: 267 | example['mask_%d' % scale] = tf.one_hot( 268 | cls._resize_fn( 269 | original_mask, 270 | list(map(lambda x: x // scale, image_size)) 271 | )[:, :, 0], # only need one channel 272 | depth=n_classes, 273 | dtype=tf.float32 274 | ) 275 | return example 276 | 277 | @classmethod 278 | def scale_mask(cls, mask, scale, image_size, n_classes): 279 | return tf.one_hot( 280 | cls._resize_fn( 281 | mask, 282 | image_size, 283 | )[:, :, :, 0], # only need one channel 284 | depth=n_classes, 285 | dtype=tf.float32 286 | ) 287 | 288 | @classmethod 289 | def build( 290 | cls, 291 | filename, 292 | batch_size, 293 | image_size, 294 | n_classes, 295 | augment_images=True, 296 | repeat=True, 297 | prefetch=False, 298 | parallel_calls=1): 299 | """Build a TFRecord dataset. 300 | 301 | Args: 302 | filename (str): a .tfrecord file to read 303 | batch_size (int): batch size 304 | image_size (int): the desired image size of examples 305 | n_classes (int): the number of classes 306 | whitelist_threshold (float): the minimum fraction of whitelisted 307 | classes an example must contain to be used for training. 308 | 309 | Returns: 310 | dataset: a TFRecordDataset 311 | """ 312 | logger.info('Creating dataset from: %s' % filename) 313 | dataset = tf.data.TFRecordDataset(filename) 314 | dataset = dataset.map(cls._decode_example, 315 | num_parallel_calls=parallel_calls) 316 | dataset = dataset.map(lambda x: cls._resize_example(x, image_size), 317 | num_parallel_calls=parallel_calls) 318 | if augment_images: 319 | dataset = dataset.map(cls._augment_example, 320 | num_parallel_calls=parallel_calls) 321 | dataset = dataset.map(cls._preprocess_example, 322 | num_parallel_calls=parallel_calls) 323 | dataset = dataset.map( 324 | lambda x: cls._generate_multiscale_masks(x, n_classes), 325 | num_parallel_calls=parallel_calls 326 | ) 327 | if repeat: 328 | dataset = dataset.repeat() 329 | 330 | dataset = dataset.batch(batch_size) 331 | if prefetch: 332 | dataset = dataset.prefetch(buffer_size=batch_size) 333 | return dataset 334 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/train.py: -------------------------------------------------------------------------------- 1 | """Train an ICNet Model on ADE20K Data.""" 2 | 3 | import argparse 4 | import keras 5 | import logging 6 | import time 7 | import sys 8 | import struct 9 | import os 10 | from tensorflow.python.lib.io import file_io 11 | import tensorflow as tf 12 | from image_segmentation.icnet import ICNetModelFactory 13 | from image_segmentation.data_generator import ADE20KDatasetBuilder 14 | from image_segmentation import dali_config 15 | from google.cloud import storage 16 | 17 | logging.basicConfig(level=logging.INFO) 18 | logger = logging.getLogger('train') 19 | 20 | 21 | def _summarize_arguments(args): 22 | """Summarize input arguments to ICNet model training. 23 | 24 | Args: 25 | args: 26 | """ 27 | 28 | logger.info('ICNet Model training Parameters') 29 | logger.info('-------------------------------') 30 | for key, value in vars(args).items(): 31 | logger.info(' {key}={value}'.format(key=key, value=value)) 32 | 33 | 34 | def _build_parser(argv): 35 | parser = argparse.ArgumentParser( 36 | description='Train an ICNet model.' 37 | ) 38 | # Data options 39 | parser.add_argument( 40 | '-d', '--data', nargs='+', required=True, 41 | help='A TFRecord file containing images and segmentation masks.' 42 | ) 43 | parser.add_argument( 44 | '--tfindex-files', nargs='+', 45 | help='TFIndex file for dali pipeline. If not included, will be built' 46 | ) 47 | parser.add_argument( 48 | '-l', '--label-filename', type=str, required=True, 49 | help='A file containing a single label per line.' 50 | ) 51 | parser.add_argument( 52 | '-s', '--image-size', type=int, default=768, 53 | help=('The pixel dimension of model input and output. Images ' 54 | 'will be square.') 55 | ) 56 | parser.add_argument( 57 | '-a', '--alpha', type=float, default=1.0, 58 | help='The width multiplier for the network' 59 | ) 60 | parser.add_argument( 61 | '--augment-images', type=bool, default=True, 62 | help='turn on image augmentation.' 63 | ) 64 | parser.add_argument( 65 | '--add-noise', action='store_true', 66 | help='Add gaussian noise to training.' 67 | ) 68 | parser.add_argument( 69 | '--use-dali', action='store_true', 70 | help='turn on image augmentation.' 71 | ) 72 | parser.add_argument( 73 | '--list-labels', action='store_true', 74 | help='If true, print a full list of object labels.' 75 | ) 76 | # Training options 77 | parser.add_argument( 78 | '-b', '--batch-size', type=int, default=8, 79 | help='The training batch_size.' 80 | ) 81 | parser.add_argument( 82 | '--lr', type=float, default=0.001, help='The learning rate.' 83 | ) 84 | parser.add_argument( 85 | '-n', '--num-steps', type=int, default=1000, 86 | help='Number of training steps to perform' 87 | ) 88 | parser.add_argument( 89 | '--steps-per-epoch', type=int, default=100, 90 | help='Number of training steps to perform between model checkpoints' 91 | ) 92 | parser.add_argument( 93 | '-o', '--output', 94 | help='An output file to save the trained model.') 95 | parser.add_argument( 96 | '--gpu-cores', type=int, default=1, 97 | help='Number of GPU cores to run on.') 98 | parser.add_argument( 99 | '--fine-tune-checkpoint', type=str, 100 | help='A Keras model checkpoint to load and continue training.' 101 | ) 102 | parser.add_argument( 103 | '--gcs-bucket', type=str, 104 | help='A GCS Bucket to save models too.' 105 | ) 106 | parser.add_argument( 107 | '--parallel-calls', type=int, default=1, 108 | help='Number of parallel calss to preprocessing to perform.' 109 | ) 110 | parser.add_argument( 111 | '--model-name', type=str, required=True, 112 | help='Short name separated by underscores' 113 | ) 114 | 115 | return parser.parse_known_args() 116 | 117 | 118 | def _prepare_dataset(args, n_classes): 119 | dataset = ADE20KDatasetBuilder.build( 120 | args.data, 121 | n_classes=n_classes, 122 | batch_size=args.batch_size, 123 | image_size=(args.image_size, args.image_size), 124 | augment_images=False, 125 | parallel_calls=args.parallel_calls, 126 | prefetch=True, 127 | ) 128 | 129 | iterator = dataset.make_one_shot_iterator() 130 | example = iterator.get_next() 131 | 132 | return { 133 | 'input': example['image'], 134 | 'mask_4': example['mask_4'], 135 | 'mask_8': example['mask_8'], 136 | 'mask_16': example['mask_16'], 137 | } 138 | 139 | 140 | def build_tfindex_file(tfrecord_file, tfindex_file): 141 | """Builds a tfindex file used by DALI from a tfrecord file. 142 | 143 | Args: 144 | tfrecord_file: Path to TFRecord file. 145 | tfindex_file: output file to write to. 146 | """ 147 | tfrecord_fp = open(tfrecord_file, 'rb') 148 | idx_fp = open(tfindex_file, 'w') 149 | 150 | while True: 151 | current = tfrecord_fp.tell() 152 | try: 153 | # length 154 | byte_len = tfrecord_fp.read(8) 155 | if byte_len == '': 156 | break 157 | # crc 158 | tfrecord_fp.read(4) 159 | proto_len = struct.unpack('q', byte_len)[0] 160 | # proto 161 | tfrecord_fp.read(proto_len) 162 | # crc 163 | tfrecord_fp.read(4) 164 | idx_fp.write(str(current) + ' ' + 165 | str(tfrecord_fp.tell() - current) + '\n') 166 | except Exception: 167 | print("Not a valid TFRecord file") 168 | break 169 | 170 | tfrecord_fp.close() 171 | idx_fp.close() 172 | 173 | 174 | def _prepare_dali(args, n_classes): 175 | if args.gpu_cores > 1: 176 | logger.error( 177 | 'Have not built in support for more than one GPU at the moment.' 178 | ) 179 | sys.exit(1) 180 | 181 | # non NVIDIA cloud environments will not have dali, so we 182 | # have to do the import here. 183 | from image_segmentation.dali_pipeline import CommonPipeline 184 | import nvidia.dali.plugin.tf as dali_tf 185 | 186 | batch_size = args.batch_size 187 | image_size = args.image_size 188 | device_id = 0 189 | storage_client = storage.Client() 190 | filenames = [] 191 | 192 | for filename in args.data: 193 | if filename.startswith('gs://'): 194 | parts = filename[5:].split('/') 195 | bucket_name, blob_name = parts[0], '/'.join(parts[1:]) 196 | bucket = storage_client.get_bucket(bucket_name) 197 | blob = bucket.blob(blob_name) 198 | download_filename = os.path.basename(blob_name) 199 | blob.download_to_filename(download_filename) 200 | filenames.append(download_filename) 201 | else: 202 | filenames.append(filename) 203 | 204 | tfindex_files = args.tfindex_files or [] 205 | if not tfindex_files: 206 | for path in filenames: 207 | tfindex_file = path.split('.')[0] + '.tfindex' 208 | build_tfindex_file(path, tfindex_file) 209 | logger.info('Created tfindex file: {input} -> {output}'.format( 210 | input=path, 211 | output=tfindex_file 212 | )) 213 | tfindex_files.append(tfindex_file) 214 | 215 | config = dali_config.DaliConfig() 216 | config.summarize() 217 | 218 | pipe = CommonPipeline( 219 | args.batch_size, 220 | args.parallel_calls, 221 | device_id, 222 | args.image_size, 223 | filenames, 224 | tfindex_files, 225 | config 226 | ) 227 | pipe.build() 228 | 229 | daliop = dali_tf.DALIIterator() 230 | with tf.device('/gpu:0'): 231 | results = daliop( 232 | serialized_pipeline=pipe.serialize(), 233 | shape=[args.batch_size, args.image_size, args.image_size, 3], 234 | label_type=tf.int64, 235 | ) 236 | 237 | input_tensor = results.batch 238 | 239 | results.label.set_shape([batch_size, image_size, image_size, 3]) 240 | mask = results.label 241 | new_shape = [image_size / 4, image_size / 4] 242 | mask_4 = ADE20KDatasetBuilder.scale_mask(mask, 4, new_shape, n_classes) 243 | new_shape = [image_size / 8, image_size / 8] 244 | mask_8 = ADE20KDatasetBuilder.scale_mask(mask, 8, new_shape, n_classes) 245 | new_shape = [image_size / 16, image_size / 16] 246 | mask_16 = ADE20KDatasetBuilder.scale_mask(mask, 16, new_shape, n_classes) 247 | 248 | return { 249 | 'input': input_tensor, 250 | 'mask_4': mask_4, 251 | 'mask_8': mask_8, 252 | 'mask_16': mask_16, 253 | } 254 | 255 | 256 | def train(argv): 257 | """Train an ICNet model.""" 258 | 259 | args, unknown = _build_parser(argv) 260 | _summarize_arguments(args) 261 | 262 | class_labels = ADE20KDatasetBuilder.load_class_labels( 263 | args.label_filename) 264 | if args.list_labels: 265 | logger.info('Labels:') 266 | labels = '' 267 | for label in class_labels: 268 | labels += '%s\n' % label 269 | logger.info(labels) 270 | sys.exit() 271 | 272 | n_classes = len(class_labels) 273 | 274 | if args.use_dali: 275 | data = _prepare_dali(args, n_classes) 276 | else: 277 | data = _prepare_dataset(args, n_classes) 278 | 279 | if args.add_noise: 280 | logger.info('Adding gaussian noise to input tensor.') 281 | noise = tf.random_normal(shape=tf.shape(data['input']), 282 | mean=0.0, 283 | stddev=0.07, 284 | dtype=tf.float32) 285 | data['input'] = data['input'] + noise 286 | 287 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) 288 | config = tf.ConfigProto(gpu_options=gpu_options) 289 | sess = tf.Session(config=config) 290 | keras.backend.set_session(sess) 291 | 292 | if args.gpu_cores > 1: 293 | with tf.device('/CPU:0'): 294 | icnet = ICNetModelFactory.build( 295 | args.image_size, 296 | n_classes, 297 | weights_path=args.fine_tune_checkpoint, 298 | train=True, 299 | input_tensor=data['input'], 300 | alpha=args.alpha, 301 | ) 302 | 303 | gpu_icnet = keras.utils.multi_gpu_model(icnet, gpus=args.cores) 304 | gpu_icnet.__setattr__('callback_model', icnet) 305 | model = gpu_icnet 306 | else: 307 | with tf.device('/GPU:0'): 308 | model = ICNetModelFactory.build( 309 | args.image_size, 310 | n_classes, 311 | weights_path=args.fine_tune_checkpoint, 312 | train=True, 313 | input_tensor=data['input'], 314 | alpha=args.alpha, 315 | ) 316 | 317 | optimizer = keras.optimizers.Adam(lr=args.lr) 318 | model.compile( 319 | optimizer, 320 | loss=keras.losses.categorical_crossentropy, 321 | loss_weights=[1.0, 0.4, 0.16], 322 | metrics=['categorical_accuracy'], 323 | target_tensors=[ 324 | data['mask_4'], data['mask_8'], data['mask_16'] 325 | ] 326 | ) 327 | 328 | if not args.output: 329 | output_filename_fmt = '{model_name}_{size}x{size}_{alpha}_{time}.h5' 330 | filename = output_filename_fmt.format( 331 | model_name=args.model_name, 332 | size=args.image_size, 333 | alpha=str(args.alpha).replace('0', '').replace('.', ''), 334 | time=int(time.time()) 335 | ) 336 | else: 337 | filename = args.output 338 | 339 | print("=======================") 340 | print("Output file name: {name}".format(name=filename)) 341 | print("=======================") 342 | 343 | callbacks = [ 344 | keras.callbacks.ModelCheckpoint( 345 | filename, 346 | verbose=0, 347 | mode='auto', 348 | period=1 349 | ), 350 | ] 351 | 352 | if args.gcs_bucket: 353 | callbacks.append(SaveCheckpointToGCS(filename, args.gcs_bucket)) 354 | 355 | model.fit( 356 | steps_per_epoch=args.steps_per_epoch, 357 | epochs=int(args.num_steps / args.steps_per_epoch) + 1, 358 | callbacks=callbacks, 359 | ) 360 | 361 | 362 | class SaveCheckpointToGCS(keras.callbacks.Callback): 363 | """A callback to save local model checkpoints to GCS.""" 364 | 365 | def __init__(self, local_filename, gcs_filename): 366 | """Save a checkpoint to GCS. 367 | 368 | Args: 369 | local_filename (str): the path of the local checkpoint 370 | gcs_filename (str): the GCS bucket to save the model to 371 | """ 372 | self.gcs_filename = gcs_filename 373 | self.local_filename = local_filename 374 | 375 | @staticmethod 376 | def _copy_file_to_gcs(job_dir, file_path): 377 | gcs_url = os.path.join(job_dir, file_path) 378 | logger.info('Saving models to GCS: %s' % gcs_url) 379 | with file_io.FileIO(file_path, mode='rb') as input_f: 380 | with file_io.FileIO(gcs_url, mode='w+') as output_f: 381 | output_f.write(input_f.read()) 382 | 383 | def on_epoch_end(self, epoch, logs={}): 384 | """Save model to GCS on epoch end. 385 | 386 | Args: 387 | epoch (int): the epoch number 388 | logs (dict, optional): logs dict 389 | """ 390 | basename = os.path.basename(self.local_filename) 391 | self._copy_file_to_gcs(self.gcs_filename, basename) 392 | 393 | 394 | if __name__ == '__main__': 395 | train(sys.argv[1:]) 396 | -------------------------------------------------------------------------------- /style_transfer/style_transfer/fritz_coreml_converter.py: -------------------------------------------------------------------------------- 1 | import coremltools 2 | from coremltools.converters.keras._keras2_converter import * 3 | from coremltools.converters.keras._keras2_converter import _KERAS_LAYER_REGISTRY 4 | from coremltools.converters.keras import _topology2 5 | from coremltools.converters.keras._topology2 import _KERAS_SKIP_LAYERS 6 | from coremltools.models.neural_network import NeuralNetworkBuilder as _NeuralNetworkBuilder 7 | from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2 8 | from collections import OrderedDict as _OrderedDict 9 | from coremltools.models import datatypes 10 | from coremltools.models import MLModel as _MLModel 11 | from coremltools.models.utils import save_spec as _save_spec 12 | import keras as _keras 13 | from coremltools._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF 14 | import PIL.Image 15 | from six import string_types 16 | from coremltools.proto import FeatureTypes_pb2 as ft 17 | 18 | _IMAGE_SUFFIX = '_image' 19 | 20 | 21 | class FritzCoremlConverter(object): 22 | """A class to convert keras models to coreml. 23 | 24 | This is converter is a modified version of the one that comes packaged with 25 | coremltools, but it allows the user to define custom layer mappings from 26 | keras to coreml. 27 | """ 28 | 29 | @classmethod 30 | def _check_unsupported_layers(cls, model, supported_layers): 31 | """Check for any unsupported layers in the keras model. 32 | 33 | Args: 34 | model - a keras model 35 | supported_layers - a dictionary of supported layers. Keys are keras 36 | layer classes and values are corresponding 37 | coreml layer classes. 38 | """ 39 | for i, layer in enumerate(model.layers): 40 | if (isinstance(layer, _keras.models.Sequential) or 41 | isinstance(layer, _keras.models.Model)): 42 | cls._check_unsupported_layers(layer) 43 | else: 44 | if type(layer) not in supported_layers: 45 | print(supported_layers) 46 | raise ValueError( 47 | "Keras layer '%s' not supported. " % str(type(layer)) 48 | ) 49 | if isinstance(layer, _keras.layers.wrappers.TimeDistributed): 50 | if type(layer.layer) not in supported_layers: 51 | raise ValueError( 52 | "Keras layer '%s' not supported. " % 53 | str(type(layer.layer)) 54 | ) 55 | if isinstance(layer, _keras.layers.wrappers.Bidirectional): 56 | if not isinstance(layer.layer, 57 | _keras.layers.recurrent.LSTM): 58 | raise ValueError( 59 | 'Keras bi-directional wrapper conversion supports ' 60 | 'only LSTM layer at this time. ') 61 | 62 | @staticmethod 63 | def _get_layer_converter_fn(layer, supported_layers): 64 | """Get the right converter function for Keras. 65 | 66 | Args: 67 | layer - a keras layer 68 | supported_layers - a dictionary of supported layers. Keys are keras 69 | layer classes and values are corresponding 70 | coreml layer classes. 71 | Returns: 72 | layer - a coreml layer 73 | """ 74 | layer_type = type(layer) 75 | if layer_type in supported_layers: 76 | return supported_layers[layer_type] 77 | else: 78 | raise TypeError( 79 | "Keras layer of type %s is not supported." % type(layer) 80 | ) 81 | 82 | @staticmethod 83 | def _convert_multiarray_output_to_image(spec, feature_name, is_bgr=False): 84 | """Convert Core ML multiarray output to an image output. 85 | 86 | This modifies the core ml spec in place. 87 | 88 | spec - a Core ML spec protobuf object. 89 | feature_name - the name of the output feature to convert 90 | is_bgr - if true, assume image data is already in BGR mode. 91 | Default False 92 | """ 93 | for output in spec.description.output: 94 | if output.name != feature_name: 95 | continue 96 | if output.type.WhichOneof('Type') != 'multiArrayType': 97 | raise ValueError( 98 | "{} is not a multiarray type".format(output.name,) 99 | ) 100 | array_shape = tuple(output.type.multiArrayType.shape) 101 | if len(array_shape) == 2: 102 | height, width = array_shape 103 | output.type.imageType.colorSpace = \ 104 | ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE') 105 | else: 106 | channels, height, width = array_shape 107 | 108 | if channels == 1: 109 | output.type.imageType.colorSpace = \ 110 | ft.ImageFeatureType.ColorSpace.Value('GRAYSCALE') 111 | elif channels == 3: 112 | if is_bgr: 113 | output.type.imageType.colorSpace = \ 114 | ft.ImageFeatureType.ColorSpace.Value('BGR') 115 | else: 116 | output.type.imageType.colorSpace = \ 117 | ft.ImageFeatureType.ColorSpace.Value('RGB') 118 | else: 119 | raise ValueError( 120 | "Channel Value {} not supported for image inputs" 121 | .format(channels,) 122 | ) 123 | 124 | output.type.imageType.width = width 125 | output.type.imageType.height = height 126 | 127 | @classmethod 128 | def convert_keras( 129 | cls, 130 | model, 131 | input_names=None, 132 | output_names=None, 133 | image_input_names=[], 134 | image_output_names=[], 135 | deprocessing_args={}, 136 | is_bgr=False, 137 | is_grayscale=False, 138 | red_bias=0.0, 139 | green_bias=0.0, 140 | blue_bias=0.0, 141 | gray_bias=0.0, 142 | image_scale=1.0, 143 | class_labels=None, 144 | predicted_feature_name=None, 145 | custom_layers=None): 146 | """ 147 | Convert a Keras model to a Core ML Model. 148 | 149 | model - a Keras model to convert 150 | input_names - names of input layers. Default None 151 | output_names - names of output layers. Default None 152 | image_input_names - a list of input names that are image datatypes 153 | image_output_names - a list of output names that are image datatypes 154 | preprocessing_args - a dictionary of arguments for input preprocessing 155 | class_labels - Class labels for outputs, 156 | predicted_feature_name - name for predicted features, 157 | custom_layers - a dictionary of custom layer conversions. Keys are 158 | Keras layer classes, values are coreml layer functions 159 | 160 | Returns: 161 | mlmodel - a coreml model object. 162 | """ 163 | if isinstance(model, string_types): 164 | model = _keras.models.load_model(model) 165 | elif isinstance(model, tuple): 166 | model = _load_keras_model(model[0], model[1]) 167 | 168 | # Merge the custom layers with the Keras layer registry 169 | supported_layers = {} 170 | supported_layers.update(_KERAS_LAYER_REGISTRY) 171 | if custom_layers: 172 | supported_layers.update(custom_layers) 173 | 174 | # Check valid versions 175 | cls._check_unsupported_layers(model, supported_layers) 176 | 177 | # Build network graph to represent Keras model 178 | graph = _topology2.NetGraph(model) 179 | graph.build() 180 | graph.remove_skip_layers(_KERAS_SKIP_LAYERS) 181 | graph.insert_1d_permute_layers() 182 | graph.insert_permute_for_spatial_bn() 183 | graph.defuse_activation() 184 | graph.remove_internal_input_layers() 185 | graph.make_output_layers() 186 | 187 | # The graph should be finalized before executing this 188 | graph.generate_blob_names() 189 | graph.add_recurrent_optionals() 190 | 191 | inputs = graph.get_input_layers() 192 | outputs = graph.get_output_layers() 193 | 194 | # check input / output names validity 195 | if input_names is not None: 196 | if isinstance(input_names, string_types): 197 | input_names = [input_names] 198 | else: 199 | input_names = ['input' + str(i + 1) for i in range(len(inputs))] 200 | if output_names is not None: 201 | if isinstance(output_names, string_types): 202 | output_names = [output_names] 203 | else: 204 | output_names = ['output' + str(i + 1) for i in range(len(outputs))] 205 | 206 | if (image_input_names is not None and 207 | isinstance(image_input_names, string_types)): 208 | image_input_names = [image_input_names] 209 | 210 | graph.reset_model_input_names(input_names) 211 | graph.reset_model_output_names(output_names) 212 | 213 | # Keras -> Core ML input dimension dictionary 214 | # (None, None) -> [1, 1, 1, 1, 1] 215 | # (None, D) -> [D] or [D, 1, 1, 1, 1] 216 | # (None, Seq, D) -> [Seq, 1, D, 1, 1] 217 | # (None, H, W, C) -> [C, H, W] 218 | # (D) -> [D] 219 | # (Seq, D) -> [Seq, 1, 1, D, 1] 220 | # (Batch, Sequence, D) -> [D] 221 | 222 | # Retrieve input shapes from model 223 | if type(model.input_shape) is list: 224 | input_dims = [filter(None, x) for x in model.input_shape] 225 | unfiltered_shapes = model.input_shape 226 | else: 227 | input_dims = [filter(None, model.input_shape)] 228 | unfiltered_shapes = [model.input_shape] 229 | 230 | for idx, dim in enumerate(input_dims): 231 | unfiltered_shape = unfiltered_shapes[idx] 232 | dim = list(dim) 233 | if len(dim) == 0: 234 | # Used to be [None, None] before filtering; indicating 235 | # unknown sequence length 236 | input_dims[idx] = tuple([1]) 237 | elif len(dim) == 1: 238 | s = graph.get_successors(inputs[idx])[0] 239 | if isinstance(graph.get_keras_layer(s), 240 | _keras.layers.embeddings.Embedding): 241 | # Embedding layer's special input (None, D) where D is 242 | # actually sequence length 243 | input_dims[idx] = (1,) 244 | else: 245 | input_dims[idx] = dim # dim is just a number 246 | elif len(dim) == 2: # [Seq, D] 247 | input_dims[idx] = (dim[1],) 248 | elif len(dim) == 3: # H,W,C 249 | if (len(unfiltered_shape) > 3): 250 | # keras uses the reverse notation from us 251 | input_dims[idx] = (dim[2], dim[0], dim[1]) 252 | else: 253 | # keras provided fixed batch and sequence length, so 254 | # the input was (batch, sequence, channel) 255 | input_dims[idx] = (dim[2],) 256 | else: 257 | raise ValueError( 258 | 'Input' + input_names[idx] + 'has input shape of length' + 259 | str(len(dim))) 260 | 261 | # Retrieve output shapes from model 262 | if type(model.output_shape) is list: 263 | output_dims = [filter(None, x) for x in model.output_shape] 264 | else: 265 | output_dims = [filter(None, model.output_shape[1:])] 266 | 267 | for idx, dim in enumerate(output_dims): 268 | dim = list(dim) 269 | if len(dim) == 1: 270 | output_dims[idx] = dim 271 | elif len(dim) == 2: # [Seq, D] 272 | output_dims[idx] = (dim[1],) 273 | elif len(dim) == 3: 274 | output_dims[idx] = (dim[2], dim[0], dim[1]) 275 | 276 | input_types = [datatypes.Array(*dim) for dim in input_dims] 277 | output_types = [datatypes.Array(*dim) for dim in output_dims] 278 | 279 | # Some of the feature handling is sensitive about string vs unicode 280 | input_names = map(str, input_names) 281 | output_names = map(str, output_names) 282 | is_classifier = class_labels is not None 283 | if is_classifier: 284 | mode = 'classifier' 285 | else: 286 | mode = None 287 | 288 | # assuming these match 289 | input_features = list(zip(input_names, input_types)) 290 | output_features = list(zip(output_names, output_types)) 291 | 292 | builder = _NeuralNetworkBuilder( 293 | input_features, output_features, mode=mode 294 | ) 295 | 296 | for iter, layer in enumerate(graph.layer_list): 297 | keras_layer = graph.keras_layer_map[layer] 298 | print("%d : %s, %s" % (iter, layer, keras_layer)) 299 | if isinstance(keras_layer, _keras.layers.wrappers.TimeDistributed): 300 | keras_layer = keras_layer.layer 301 | 302 | converter_func = cls._get_layer_converter_fn( 303 | keras_layer, supported_layers 304 | ) 305 | input_names, output_names = graph.get_layer_blobs(layer) 306 | converter_func( 307 | builder, 308 | layer, 309 | input_names, 310 | output_names, 311 | keras_layer 312 | ) 313 | 314 | # Set the right inputs and outputs on the model description (interface) 315 | builder.set_input(input_names, input_dims) 316 | builder.set_output(output_names, output_dims) 317 | 318 | # Since we aren't mangling anything the user gave us, we only need to 319 | # update the model interface here 320 | builder.add_optionals(graph.optional_inputs, graph.optional_outputs) 321 | 322 | # Add classifier classes (if applicable) 323 | if is_classifier: 324 | classes_in = class_labels 325 | if isinstance(classes_in, string_types): 326 | import os 327 | if not os.path.isfile(classes_in): 328 | raise ValueError( 329 | "Path to class labels (%s) does not exist." % 330 | classes_in 331 | ) 332 | with open(classes_in, 'r') as f: 333 | classes = f.read() 334 | classes = classes.splitlines() 335 | elif type(classes_in) is list: # list[int or str] 336 | classes = classes_in 337 | else: 338 | raise ValueError( 339 | 'Class labels must be a list of integers / ' 340 | 'strings, or a file path' 341 | ) 342 | 343 | if predicted_feature_name is not None: 344 | builder.set_class_labels( 345 | classes, 346 | predicted_feature_name=predicted_feature_name 347 | ) 348 | else: 349 | builder.set_class_labels(classes) 350 | 351 | # Set pre-processing paramsters 352 | builder.set_pre_processing_parameters( 353 | image_input_names=image_input_names, 354 | is_bgr=is_bgr, 355 | red_bias=red_bias, 356 | green_bias=green_bias, 357 | blue_bias=blue_bias, 358 | gray_bias=gray_bias, 359 | image_scale=image_scale) 360 | 361 | # Convert the image outputs to actual image datatypes 362 | for output_name in output_names: 363 | if output_name in image_output_names: 364 | cls._convert_multiarray_output_to_image( 365 | builder.spec, output_name, is_bgr=is_bgr 366 | ) 367 | 368 | # Return the protobuf spec 369 | spec = builder.spec 370 | return _MLModel(spec) 371 | -------------------------------------------------------------------------------- /image_segmentation/image_segmentation/icnet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from functools import partial 3 | import os 4 | 5 | from keras.layers import Activation 6 | from keras.layers import Conv2D 7 | from keras.layers import Add 8 | from keras.layers import MaxPooling2D 9 | from keras.layers import AveragePooling2D 10 | from keras.layers import ZeroPadding2D 11 | from keras.layers import Input 12 | from keras.layers import BatchNormalization 13 | from keras.layers import UpSampling2D 14 | from keras.models import Model 15 | 16 | from tensorflow.python.lib.io import file_io 17 | logger = logging.getLogger('icnet') 18 | 19 | 20 | class ICNetModelFactory(object): 21 | """Generates ICNet Keras Models.""" 22 | 23 | @staticmethod 24 | def _light_cnn_block( 25 | out, 26 | filter_scale, 27 | block_name, 28 | strides=[1, 1, 1], 29 | include_projection=False): 30 | """Construct a light convolution block. 31 | 32 | Light convolution blocks are used to extract features at the start 33 | of a branch for a given scale in the pyramid network. 34 | 35 | Args: 36 | out - The output from a previous Keras layer 37 | filter_scale (int) - the base number of filters for the block 38 | block_name (str) - the name prefix for the block 39 | strides (optional, List[Int]) - a list of strides for each layer 40 | in the block. If a projection convolution is included, the 41 | stride is set to be the same as the first convolution 42 | include_projection (optional, bool) - if true, include a projection 43 | convolution 44 | Returns 45 | out - a keras layer output 46 | """ 47 | conv_fn = partial( 48 | Conv2D, 49 | kernel_size=3, 50 | padding='same', 51 | use_bias=False, 52 | activation='relu' 53 | ) 54 | 55 | out = conv_fn( 56 | filters=filter_scale, 57 | strides=strides[0], 58 | name='%s_1_3x3' % block_name)(out) 59 | out = BatchNormalization(name='%s_1_3x3_bn' % block_name)(out) 60 | out = conv_fn( 61 | filters=filter_scale, 62 | strides=strides[1], 63 | name='%s_2_3x3' % block_name)(out) 64 | out = BatchNormalization(name='%s_2_3x3_bn' % block_name)(out) 65 | out = conv_fn( 66 | filters=filter_scale * 2, 67 | strides=strides[2], 68 | name='%s_3_3x3' % block_name)(out) 69 | out = BatchNormalization(name='%s_3_3x3_bn' % block_name)(out) 70 | 71 | if include_projection: 72 | out = Conv2D( 73 | filters=filter_scale * 4, 74 | kernel_size=1, 75 | name='%s_proj' % block_name 76 | )(out) 77 | out = BatchNormalization(name='%s_proj_bn' % block_name)(out) 78 | 79 | return out 80 | 81 | @staticmethod 82 | def _inner_conv_block( 83 | out, 84 | filter_scale, 85 | block_name, 86 | strides=[1, 1, 1], 87 | dilation_rate=1): 88 | """Construct an inner convolution block. 89 | 90 | Inner convolution blocks are found repeatedly in the ICNet structure. 91 | 92 | Args: 93 | out - The output from a previous Keras layer 94 | filter_scale (int) - the base number of filters for the block 95 | block_name (str) - the name prefix for the block 96 | strides (optional, List[Int]) - a list of strides for each layer 97 | in the block. If a projection convolution is included, the 98 | stride is set to be the same as the first convolution 99 | dilation_rate (optional, Int) - a dilation rate to include atrous 100 | convolutions for certain blocks 101 | 102 | Returns 103 | out - a keras layer output 104 | """ 105 | conv_fn = partial( 106 | Conv2D, 107 | activation='relu', 108 | use_bias=False, 109 | ) 110 | out = conv_fn( 111 | filters=filter_scale, 112 | kernel_size=1, 113 | strides=strides[0], 114 | name='%s_1x1_reduce' % block_name)(out) 115 | out = BatchNormalization(name='%s_1x1_reduce_bn' % block_name)(out) 116 | out = ZeroPadding2D( 117 | padding=dilation_rate, 118 | name='%s_padding' % block_name)(out) 119 | out = conv_fn( 120 | filters=filter_scale, 121 | kernel_size=3, 122 | strides=strides[1], 123 | dilation_rate=dilation_rate, 124 | name='%s_3x3' % block_name)(out) 125 | out = BatchNormalization(name='%s_3x3_bn' % block_name)(out) 126 | out = conv_fn( 127 | filters=filter_scale * 4, 128 | kernel_size=1, 129 | activation=None, 130 | strides=strides[2], 131 | name='%s_1x1_increase' % block_name)(out) 132 | out = BatchNormalization(name='%s_1x1_increase_bn' % block_name)(out) 133 | return out 134 | 135 | @classmethod 136 | def _conv_block( 137 | cls, 138 | out, 139 | filter_scale, 140 | block_name, 141 | include_projection=False, 142 | strides=[1, 1, 1], 143 | dilation_rate=1): 144 | """Construct an convolution block. 145 | 146 | Convolution blocks are found repeatedly in the ICNet structure. 147 | The block is structured similarly to a residual block with multiple 148 | branches. 149 | 150 | Args: 151 | out - The output from a previous Keras layer 152 | filter_scale (int) - the base number of filters for the block 153 | block_name (str) - the name prefix for the block 154 | include_projection (optional, bool) - if true, include a projection 155 | convolution 156 | strides (optional, List[Int]) - a list of strides for each layer 157 | in the block. If a projection convolution is included, the 158 | stride is set to be the same as the first convolution 159 | dilation_rate (optional, Int) - a dilation rate to include atrous 160 | convolutions for certain blocks 161 | 162 | Returns 163 | out - a keras layer output 164 | """ 165 | # Branch A 166 | if include_projection: 167 | out_a = Conv2D( 168 | filters=filter_scale * 4, 169 | kernel_size=1, 170 | use_bias=False, 171 | strides=strides[0], 172 | name='%s_1x1_proj' % block_name 173 | )(out) 174 | out_a = BatchNormalization( 175 | name='%s_1x1_proj_bn' % block_name 176 | )(out_a) 177 | else: 178 | out_a = out 179 | 180 | # Branch B 181 | out_b = cls._inner_conv_block( 182 | out, 183 | filter_scale, 184 | block_name, 185 | strides=strides, 186 | dilation_rate=dilation_rate 187 | ) 188 | 189 | # Combine 190 | out = Add(name='%s_add' % block_name)([out_a, out_b]) 191 | out = Activation('relu', name='%s_relu' % block_name)(out) 192 | return out 193 | 194 | @staticmethod 195 | def _cff_block( 196 | out_a, 197 | out_b, 198 | filter_scale, 199 | block_name, 200 | include_projection=False): 201 | """Construct an cascading feature fusion (CFF) block. 202 | 203 | CFF blocks are used to fuse features extracted from multiple scales. 204 | 205 | Args: 206 | out_a - The output layer from lower resoltuon branch 207 | out_b - The output layer from the higher resolution branch to be 208 | merged. 209 | filter_scale (int) - the base number of filters for the block 210 | block_name (str) - the name prefix for the block 211 | include_projection (optional, bool) - if true, include a projection 212 | convolution 213 | Returns 214 | out - a keras layer output 215 | """ 216 | aux_1 = UpSampling2D(size=(2, 2), name='%s_interp' % block_name, 217 | interpolation='bilinear')(out_a) 218 | out_a = ZeroPadding2D(padding=2, name='%s_padding' % block_name)(aux_1) 219 | out_a = Conv2D( 220 | filters=filter_scale, 221 | kernel_size=3, 222 | dilation_rate=2, 223 | use_bias=False, 224 | name='%s_conv_3x3' % block_name 225 | )(out_a) 226 | out_a = BatchNormalization(name='%s_conv_bn' % block_name)(out_a) 227 | 228 | if include_projection: 229 | out_b = Conv2D( 230 | filters=filter_scale, 231 | kernel_size=1, 232 | use_bias=False, 233 | name='%s_proj' % block_name)(out_b) 234 | out_b = BatchNormalization(name='%s_proj_bn' % block_name)(out_b) 235 | 236 | out_a = Add(name='%s_sum' % block_name)([out_a, out_b]) 237 | out_a = Activation('relu', name='%s_sum_relu' % block_name)(out_a) 238 | 239 | return out_a, aux_1 240 | 241 | @classmethod 242 | def build( 243 | cls, 244 | img_size, 245 | n_classes, 246 | alpha=1.0, 247 | weights_path=None, 248 | train=False, 249 | input_tensor=None): 250 | """Build an ICNet Model. 251 | 252 | Args: 253 | image_size (int): the size of each image. only square images are 254 | supported. 255 | n_classes (int): the number of output labels to predict. 256 | weights_path (str): (optional) a path to a Keras model file to 257 | load after the network is constructed. Useful for re-training. 258 | train (bool): (optional) if true, add additional output nodes to 259 | the network for training. 260 | 261 | Returns: 262 | model (keras.models.Model): A Keras model 263 | """ 264 | if img_size % 384 != 0: 265 | raise Exception('`img_size` must be a multiple of 384.') 266 | logger.info('Building ICNet model.') 267 | inpt = Input(shape=(img_size, img_size, 3), tensor=input_tensor) 268 | 269 | # The full scale branch 270 | out_1 = cls._light_cnn_block( 271 | inpt, 272 | filter_scale=int(alpha * 32), 273 | strides=[2, 2, 2], 274 | include_projection=True, 275 | block_name='sub1_conv' 276 | ) 277 | 278 | # The 1/2 scale branch 279 | out_2 = AveragePooling2D(pool_size=(2, 2), name='sub2_data')(inpt) 280 | out_2 = cls._light_cnn_block( 281 | out_2, 282 | filter_scale=int(alpha * 32), 283 | strides=[2, 1, 1], 284 | block_name='sub2_conv' 285 | ) 286 | out_2 = MaxPooling2D( 287 | pool_size=3, strides=2, name='sub2_pool1_3x3' 288 | )(out_2) 289 | 290 | for layer_index in range(1, 4): 291 | out_2 = cls._conv_block( 292 | out_2, 293 | filter_scale=int(alpha * 32), 294 | include_projection=(layer_index == 1), 295 | block_name='sub2_conv%d_%d' % (2, layer_index) 296 | ) 297 | 298 | # The third large conv block gets split off into another branch. 299 | out_2 = cls._conv_block( 300 | out_2, 301 | filter_scale=int(alpha * 64), 302 | include_projection=True, 303 | strides=[2, 1, 1], 304 | block_name='sub2_conv%d_%d' % (3, 1) 305 | ) 306 | 307 | # The 1/4 scale branch 308 | out_4 = AveragePooling2D(pool_size=(2, 2), name='sub4_conv3_1')(out_2) 309 | 310 | for layer_index in range(2, 5): 311 | out_4 = cls._conv_block( 312 | out_4, 313 | filter_scale=int(alpha * 64), 314 | block_name='sub4_conv%d_%d' % (3, layer_index) 315 | ) 316 | 317 | for layer_index in range(1, 7): 318 | out_4 = cls._conv_block( 319 | out_4, 320 | filter_scale=int(alpha * 128), 321 | dilation_rate=2, 322 | include_projection=(layer_index == 1), 323 | block_name='sub4_conv%d_%d' % (4, layer_index) 324 | ) 325 | 326 | for sub_index in range(1, 4): 327 | out_4 = cls._conv_block( 328 | out_4, 329 | filter_scale=int(alpha * 256), 330 | dilation_rate=4, 331 | include_projection=(sub_index == 1), 332 | block_name='sub4_conv%d_%d' % (5, sub_index) 333 | ) 334 | # In this version we've fixed the input dimensions to be square 335 | # We also are restricting dimsensions to be multiples of 384 which 336 | # will allow us to use standard upsampling layers for resizing. 337 | pool_height, _ = out_4.shape[1:3].as_list() 338 | pool_scale = int(img_size / 384) 339 | pool1 = AveragePooling2D(pool_size=pool_height, 340 | strides=pool_height, 341 | name='sub4_conv5_3_pool1')(out_4) 342 | pool1 = UpSampling2D(size=12 * pool_scale, 343 | name='sub4_conv5_3_pool1_interp', 344 | interpolation='bilinear')(pool1) 345 | pool2 = AveragePooling2D(pool_size=pool_height // 2, 346 | strides=pool_height // 2, 347 | name='sub4_conv5_3_pool2')(out_4) 348 | pool2 = UpSampling2D(size=6 * pool_scale, 349 | name='sub4_conv5_3_pool2_interp', 350 | interpolation='bilinear')(pool2) 351 | pool3 = AveragePooling2D(pool_size=pool_height // 3, 352 | strides=pool_height // 3, 353 | name='sub4_conv5_3_pool3')(out_4) 354 | pool3 = UpSampling2D(size=4 * pool_scale, 355 | name='sub4_conv5_3_pool3_interp', 356 | interpolation='bilinear')(pool3) 357 | pool4 = AveragePooling2D(pool_size=pool_height // 4, 358 | strides=pool_height // 4, 359 | name='sub4_conv5_3_pool4')(out_4) 360 | pool4 = UpSampling2D(size=3 * pool_scale, 361 | name='sub4_conv5_3_pool6_interp', 362 | interpolation='bilinear')(pool4) 363 | 364 | out_4 = Add( 365 | name='sub4_conv5_3_sum' 366 | )([out_4, pool1, pool2, pool3, pool4]) 367 | out_4 = Conv2D( 368 | filters=int(alpha * 256), 369 | kernel_size=1, 370 | activation='relu', 371 | use_bias=False, 372 | name='sub4_conv5_4_k1')(out_4) 373 | out_4 = BatchNormalization(name='sub4_conv5_4_k1_bn')(out_4) 374 | 375 | out_2, aux_1 = cls._cff_block( 376 | out_4, 377 | out_2, 378 | int(alpha * 128), 379 | block_name='sub24_cff', 380 | include_projection=True 381 | ) 382 | 383 | out_1, aux_2 = cls._cff_block( 384 | out_2, 385 | out_1, 386 | int(alpha * 128), 387 | block_name='sub12_cff' 388 | ) 389 | out_1 = UpSampling2D(size=(2, 2), name='sub12_sum_interp', 390 | interpolation='bilinear')(out_1) 391 | 392 | out_1 = Conv2D(n_classes, 1, activation='softmax', 393 | name='conv6_cls')(out_1) 394 | 395 | out = UpSampling2D(size=(4, 4), name='conv6_interp', 396 | interpolation='bilinear')(out_1) 397 | 398 | if train: 399 | aux_1 = Conv2D(n_classes, 1, activation='softmax', 400 | name='sub4_out')(aux_1) 401 | aux_2 = Conv2D(n_classes, 1, activation='softmax', 402 | name='sub24_out')(aux_2) 403 | # The loss during training is generated from these three outputs. 404 | # The final output layer is not needed. 405 | model = Model(inputs=inpt, outputs=[out_1, aux_2, aux_1]) 406 | else: 407 | model = Model(inputs=inpt, outputs=out) 408 | 409 | if weights_path is not None: 410 | if weights_path.startswith('gs://'): 411 | weights_path = _copy_file_from_gcs(weights_path) 412 | logger.info('Loading weights from %s.' % weights_path) 413 | model.load_weights(weights_path, by_name=True) 414 | logger.info('Done building model.') 415 | 416 | return model 417 | 418 | 419 | def _copy_file_from_gcs(file_path): 420 | """Copy a file from gcs to local machine. 421 | 422 | Args: 423 | file_path (str): a GCS url to download 424 | Returns: 425 | str: a local path to the file 426 | """ 427 | logger.info('Downloading %s' % file_path) 428 | with file_io.FileIO(file_path, mode='rb') as input_f: 429 | basename = os.path.basename(file_path) 430 | local_path = os.path.join('/tmp', basename) 431 | with file_io.FileIO(local_path, mode='w+') as output_f: 432 | output_f.write(input_f.read()) 433 | return local_path 434 | --------------------------------------------------------------------------------