├── .gitignore ├── README.md ├── __init__.py ├── __main__.py ├── _deps └── __init__.py ├── _scripts ├── __init__.py └── converter.py ├── converters ├── __init__.py ├── caffe │ ├── __init__.py │ └── _caffe_converter.py ├── keras │ ├── __init__.py │ ├── _keras2_converter.py │ ├── _keras_converter.py │ ├── _layers.py │ ├── _layers2.py │ ├── _topology.py │ ├── _topology2.py │ └── _utils.py ├── libsvm │ ├── __init__.py │ ├── _libsvm_converter.py │ └── _libsvm_util.py ├── sklearn │ ├── _LinearSVC.py │ ├── _LinearSVR.py │ ├── _NuSVC.py │ ├── _NuSVR.py │ ├── _SVC.py │ ├── _SVR.py │ ├── __init__.py │ ├── _converter.py │ ├── _converter_internal.py │ ├── _decision_tree_classifier.py │ ├── _decision_tree_regressor.py │ ├── _dict_vectorizer.py │ ├── _gradient_boosting_classifier.py │ ├── _gradient_boosting_regressor.py │ ├── _imputer.py │ ├── _linear_regression.py │ ├── _logistic_regression.py │ ├── _normalizer.py │ ├── _one_hot_encoder.py │ ├── _random_forest_classifier.py │ ├── _random_forest_regressor.py │ ├── _sklearn_util.py │ ├── _standard_scaler.py │ ├── _svm_common.py │ └── _tree_ensemble.py └── xgboost │ ├── __init__.py │ ├── _tree.py │ └── _tree_ensemble.py ├── models ├── __init__.py ├── _feature_management.py ├── _interface_management.py ├── array_feature_extractor.py ├── datatypes.py ├── feature_vectorizer.py ├── model.py ├── neural_network.py ├── pipeline.py ├── tree_ensemble.py └── utils.py └── proto ├── ArrayFeatureExtractor_pb2.py ├── CategoricalMapping_pb2.py ├── DataStructures_pb2.py ├── DictVectorizer_pb2.py ├── FeatureTypes_pb2.py ├── FeatureVectorizer_pb2.py ├── GLMClassifier_pb2.py ├── GLMRegressor_pb2.py ├── Identity_pb2.py ├── Imputer_pb2.py ├── Model_pb2.py ├── NeuralNetwork_pb2.py ├── Normalizer_pb2.py ├── OneHotEncoder_pb2.py ├── SVM_pb2.py ├── Scaler_pb2.py ├── TreeEnsemble_pb2.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # SageMath parsed files 79 | *.sage.py 80 | 81 | # Environments 82 | .env 83 | .venv 84 | env/ 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | .spyproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # mkdocs documentation 96 | /site 97 | 98 | # mypy 99 | .mypy_cache/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # coremltools 2 | 3 | A simple copy of https://pypi.python.org/pypi/coremltools to be hosted on github 4 | 5 | See also the Apple docs: https://developer.apple.com/documentation/coreml/converting_trained_models_to_core_ml -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | """ 7 | Core ML is an Apple framework which allows developers to simply and easily integrate machine 8 | learning (ML) models into apps running on Apple devices (including iOS, watchOS, macOS, and 9 | tvOS). Core ML introduces a public file format (.mlmodel) for a broad set of ML methods 10 | including deep neural networks (both convolutional and recurrent), tree ensembles with boosting, 11 | and generalized linear models. Models in this format can be directly integrated into apps 12 | through Xcode. 13 | 14 | Core MLTools in a python package for creating, examining, and testing models in the .mlmodel 15 | format. In particular, it can be used to: 16 | 17 | * Convert existing models to .mlmodel format from popular machine learning tools including: 18 | Keras, Caffe, scikit-learn, libsvm, and XGBoost. 19 | * Express models in .mlmodel format through a simple API. 20 | * Make predictions with an .mlmodel (on select platforms for testing purposes). 21 | 22 | For more information: http://developer.apple.com/documentation/coreml 23 | """ 24 | 25 | # File format versions 26 | SPECIFICATION_VERSION = 1 27 | 28 | # expose sub packages as directories 29 | import converters 30 | import proto 31 | import models 32 | from models import utils 33 | 34 | from _scripts.converter import _main 35 | -------------------------------------------------------------------------------- /__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from scripts.converter import _main 7 | _main() 8 | -------------------------------------------------------------------------------- /_deps/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | """ 7 | List of all external dependancies for this package. Imported as 8 | optional includes 9 | """ 10 | from distutils.version import StrictVersion as _StrictVersion 11 | import logging as _logging 12 | import re as _re 13 | 14 | def __get_version(version): 15 | # matching 1.6.1, and 1.6.1rc, 1.6.1.dev 16 | version_regex = '^\d+\.\d+\.\d+' 17 | version = _re.search(version_regex, str(version)).group(0) 18 | return _StrictVersion(version) 19 | 20 | # --------------------------------------------------------------------------------------- 21 | HAS_SKLEARN = True 22 | SKLEARN_MIN_VERSION = '0.15' 23 | def __get_sklearn_version(version): 24 | # matching 0.15b, 0.16bf, etc 25 | version_regex = '^\d+\.\d+' 26 | version = _re.search(version_regex, str(version)).group(0) 27 | return _StrictVersion(version) 28 | 29 | try: 30 | import sklearn 31 | if __get_sklearn_version(sklearn.__version__) < _StrictVersion(SKLEARN_MIN_VERSION): 32 | HAS_SKLEARN = False 33 | _logging.warn(('scikit-learn version %s is not supported. Minimum required version: %s. ' 34 | 'Disabling scikit-learn conversion API.') 35 | % (sklearn.__version__, SKLEARN_MIN_VERSION) ) 36 | except: 37 | HAS_SKLEARN = False 38 | 39 | # --------------------------------------------------------------------------------------- 40 | HAS_LIBSVM = True 41 | try: 42 | import svm 43 | except: 44 | HAS_LIBSVM = False 45 | 46 | # --------------------------------------------------------------------------------------- 47 | HAS_XGBOOST = True 48 | try: 49 | import xgboost 50 | except: 51 | HAS_XGBOOST = False 52 | 53 | # --------------------------------------------------------------------------------------- 54 | HAS_KERAS_TF = True 55 | HAS_KERAS2_TF = True 56 | KERAS_MIN_VERSION = '1.2.2' 57 | KERAS_MAX_VERSION = '2.0.4' 58 | TF_MIN_VERSION = '1.0.0' 59 | TF_MAX_VERSION = '1.1.1' 60 | 61 | try: 62 | # Prevent keras from printing things that are not errors to standard error. 63 | import sys 64 | import StringIO 65 | stderr = sys.stderr 66 | try: 67 | temp = StringIO.StringIO() 68 | sys.stderr = temp 69 | import keras 70 | except: 71 | # Print out any actual error message and re-raise. 72 | sys.stderr = stderr 73 | sys.stderr.write(temp.getvalue()) 74 | raise 75 | finally: 76 | sys.stderr = stderr 77 | import tensorflow 78 | 79 | tf_ver = __get_version(tensorflow.__version__) 80 | k_ver = __get_version(keras.__version__) 81 | 82 | # keras 1 version too old 83 | if k_ver < _StrictVersion(KERAS_MIN_VERSION): 84 | HAS_KERAS_TF = False 85 | HAS_KERAS2_TF = False 86 | _logging.warn(('Keras version %s is not supported. Minimum required version: %s .' 87 | 'Keras conversion will be disabled.') 88 | % (keras.__version__, KERAS_MIN_VERSION)) 89 | # keras version too new 90 | if k_ver > _StrictVersion(KERAS_MAX_VERSION): 91 | HAS_KERAS_TF = False 92 | _logging.warn(('Keras version %s detected. Last version known to be fully compatible of Keras is %s .') 93 | % (keras.__version__, KERAS_MAX_VERSION)) 94 | # Using Keras 2 rather than 1 95 | if k_ver >= _StrictVersion('2.0.0'): 96 | HAS_KERAS_TF = False 97 | HAS_KERAS2_TF = True 98 | # Using Keras 1 rather than 2 99 | else: 100 | HAS_KERAS_TF = True 101 | HAS_KERAS2_TF = False 102 | # TensorFlow too old 103 | if tf_ver < _StrictVersion(TF_MIN_VERSION): 104 | HAS_KERAS_TF = False 105 | HAS_KERAS2_TF = False 106 | _logging.warn(('TensorFlow version %s is not supported. Minimum required version: %s .' 107 | 'Keras conversion will be disabled.') 108 | % (tensorflow.__version__, TF_MIN_VERSION)) 109 | if tf_ver > _StrictVersion(TF_MAX_VERSION): 110 | _logging.warn(('TensorFlow version %s detected. Last version known to be fully compatible is %s .') 111 | % (tensorflow.__version__, TF_MAX_VERSION)) 112 | if keras.backend.backend() != 'tensorflow': 113 | HAS_KERAS_TF = False 114 | HAS_KERAS2_TF = False 115 | _logging.warn(('Unsupported Keras backend (only Tensorflow is currently supported). ' 116 | 'Keras conversion will be disabled.')) 117 | 118 | except: 119 | HAS_KERAS_TF = False 120 | HAS_KERAS2_TF = False 121 | -------------------------------------------------------------------------------- /_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | 7 | -------------------------------------------------------------------------------- /_scripts/converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import logging as _logging 7 | 8 | # expose files as imports 9 | from ..models import utils 10 | 11 | from ..models import neural_network 12 | from ..models import MLModel 13 | from .. import converters 14 | from .. import proto 15 | 16 | import sys as _sys 17 | 18 | def _convert(args): 19 | if args.srcModelFormat == 'auto': 20 | if args.srcModelPath.endswith('.caffemodel') or args.caffeProtoTxtPath != '': 21 | args.srcModelFormat = 'caffe' 22 | elif args.srcModelPath.endswith('.h5') or args.kerasJsonPath != '': 23 | args.srcModelFormat = 'keras' 24 | else: 25 | print("error: coremlconverter: Unable to auto-detect model format. " 26 | "Please specify the model format using the 'srcModelFormat' argument.") 27 | _sys.exit(1) 28 | 29 | if args.srcModelFormat == 'caffe': 30 | if args.caffeProtoTxtPath: 31 | if args.meanImageProtoPath: 32 | model = (args.srcModelPath, args.caffeProtoTxtPath, args.meanImageProtoPath) 33 | else: 34 | model = (args.srcModelPath, args.caffeProtoTxtPath) 35 | else: 36 | model = args.srcModelPath 37 | try: 38 | model = converters.caffe.convert(model, 39 | image_input_names = set(args.imageInputNames), 40 | is_bgr = args.isBGR, 41 | red_bias = args.redBias, 42 | blue_bias = args.blueBias, 43 | green_bias = args.greenBias, 44 | gray_bias = args.grayBias, 45 | image_scale = args.scale, 46 | class_labels = args.classInputPath, 47 | predicted_feature_name = args.predictedFeatureName) 48 | model.save(args.dstModelPath) 49 | except Exception as e: 50 | print('error: coremlconverter: %s.' % str(e)) 51 | return 1 # error 52 | return 0 53 | 54 | elif args.srcModelFormat == 'keras': 55 | try: 56 | if not args.inputNames: 57 | raise TypeError("Neural network 'inputNames' are required for converting Keras models.") 58 | if not args.outputNames: 59 | raise TypeError("Neural network 'outputNames' are required for converting Keras models.") 60 | 61 | if args.kerasJsonPath: 62 | model = (args.kerasJsonPath, args.srcModelPath) 63 | else: 64 | model = args.srcModelPath 65 | model = converters.keras.convert(model, 66 | args.inputNames, 67 | args.outputNames, 68 | image_input_names = set(args.imageInputNames) if args.imageInputNames else None, 69 | is_bgr = args.isBGR, 70 | red_bias = args.redBias, 71 | blue_bias = args.blueBias, 72 | green_bias = args.greenBias, 73 | gray_bias = args.grayBias, 74 | image_scale = args.scale, 75 | class_labels = args.classInputPath if args.classInputPath else None, 76 | predicted_feature_name = args.predictedFeatureName) 77 | model.save(args.dstModelPath) 78 | except Exception as e: 79 | print('error: coremlconverter: %s.' % str(e)) 80 | return 1 # error 81 | return 0 82 | else: 83 | print('error: coremlconverter: Invalid srcModelFormat specified.') 84 | return 1 85 | 86 | def _main(): 87 | import argparse 88 | 89 | parser = argparse.ArgumentParser(description='Convert other model file formats to MLKit format (.mlmodel).') 90 | parser.add_argument('--srcModelFormat', type=unicode, choices=['auto', 'caffe', 'keras'], default='auto', help='Format of model at srcModelPath (default is to auto-detect).') 91 | parser.add_argument('--srcModelPath', type=unicode, required=True, help='Path to the model file of the external tool (e.g caffe weights proto binary, keras h5 binary') 92 | parser.add_argument('--dstModelPath', type=unicode, required=True, help='Path to save the model in format .mlmodel') 93 | parser.add_argument('--caffeProtoTxtPath', type=unicode, default='', help='Path to the .prototxt file if network differs from the source file (optional)') 94 | parser.add_argument('--meanImageProtoPath', type=unicode, default='', help='Path to the .binaryproto file containing the mean image if required by the network (optional). This requires a prototxt file to be specified.') 95 | parser.add_argument('--kerasJsonPath', type=unicode, default=None, help='Path to the .json file for keras if the network differs from the weights file (optional)') 96 | parser.add_argument('--inputNames', type=unicode, nargs='*', help='Names of the feature (input) columns, in order (required for keras models).') 97 | parser.add_argument('--outputNames', type=unicode, nargs='*', help='Names of the target (output) columns, in order (required for keras models).') 98 | parser.add_argument('--imageInputNames', type=unicode, default=[], action='append', help='Label the named input as an image. Can be specified more than once for multiple image inputs.') 99 | parser.add_argument('--isBGR', action='store_true', default=False, help='True if the image data in BGR order (RGB default)') 100 | parser.add_argument('--redBias', type=float, default=0.0, help='Bias value to be added to the red channel (optional, default 0.0)') 101 | parser.add_argument('--blueBias', type=float, default=0.0, help='Bias value to be added to the blue channel (optional, default 0.0)') 102 | parser.add_argument('--greenBias', type=float, default=0.0, help='Bias value to be added to the green channel (optional, default 0.0)') 103 | parser.add_argument('--grayBias', type=float, default=0.0, help='Bias value to be added to the gray channel for Grayscale images (optional, default 0.0)') 104 | parser.add_argument('--scale', type=float, default=1.0, help='Value by which the image data must be scaled (optional, default 1.0)') 105 | parser.add_argument('--classInputPath', type=unicode, default='', help='Path to class labels (ordered new line separated) for treating the neural network as a classifier') 106 | parser.add_argument('--predictedFeatureName', type=unicode, default='class_output', help='Name of the output feature that captures the class name (for classifiers models).') 107 | 108 | args = parser.parse_args() 109 | ret = _convert(args) 110 | _sys.exit(int(ret)) # cast to int or else the exit code is always 1 111 | 112 | -------------------------------------------------------------------------------- /converters/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | # expose directories as imports 7 | import libsvm 8 | import sklearn 9 | import xgboost 10 | import keras 11 | import caffe 12 | -------------------------------------------------------------------------------- /converters/caffe/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _caffe_converter import convert 7 | -------------------------------------------------------------------------------- /converters/caffe/_caffe_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import sys as _sys 7 | 8 | def convert(model, image_input_names=[], is_bgr=False, 9 | red_bias=0.0, blue_bias=0.0, green_bias=0.0, gray_bias=0.0, 10 | image_scale=1.0, class_labels=None, predicted_feature_name=None): 11 | """ 12 | Convert a Caffe model to Core ML format. 13 | 14 | Parameters 15 | ---------- 16 | model: str | (str, str) | (str, str, str) 17 | 18 | A trained Caffe neural network model which can be represented as: 19 | 20 | - Path on disk to a trained Caffe model (.caffemodel) 21 | - A tuple of two paths, where the first path is the path to the .caffemodel 22 | file while the second is the path to the deploy.prototxt. 23 | - A tuple of three paths, where the first path is the path to the 24 | trained .caffemodel file, the second is the path to the 25 | deploy.prototxt while the third is a path to the mean image binary. 26 | 27 | image_input_names: [str] | str 28 | The name(s) of the input blob(s) in the Caffe model that can be treated 29 | as images by Core ML. All other inputs are treated as MultiArrays (N-D 30 | Arrays) by Core ML. 31 | 32 | is_bgr: bool 33 | Flag to determine if the input images are in pixel order (RGB or BGR) 34 | 35 | red_bias: float 36 | Bias value to be added to the red channel of the input image. 37 | Defaults to 0.0 38 | 39 | blue_bias: float 40 | Bias value to be added to the the blue channel of the input image. 41 | Defaults to 0.0 42 | 43 | green_bias: float 44 | Bias value to be added to the green channel of the input image. 45 | Defaults to 0.0 46 | 47 | gray_bias: float 48 | Bias value to be added to the input image (in grayscale). Defaults to 0.0 49 | 50 | image_scale: float 51 | Value by which the input images will be scaled before bias is added and 52 | Core ML model makes a prediction. Defaults to 1.0. 53 | 54 | class_labels: str 55 | Filepath where classes are parsed as a list of newline separated 56 | strings. Class labels map the index of the output of a neural network to labels in a classifier. 57 | Provide this argument to get a model of type classifier. 58 | 59 | predicted_feature_name: str 60 | Name of the output feature for the class labels exposed in the Core ML 61 | model (applies to classifiers only). Defaults to 'classLabel' 62 | 63 | Returns 64 | ------- 65 | model: MLModel 66 | Model in Core ML format. 67 | 68 | Examples 69 | -------- 70 | .. sourcecode:: python 71 | 72 | # Convert it with default input and output names 73 | >>> import coremltools 74 | >>> coreml_model = coremltools.converters.caffe.convert('my_caffe_model.caffemodel') 75 | 76 | # Saving the Core ML model to a file. 77 | >>> coreml_model.save('my_model.mlmodel') 78 | 79 | Sometimes, critical information in the Caffe converter is missing from the 80 | .caffemodel file. This information is present in the deploy.prototxt file. 81 | You can provide us with both files in the conversion process. 82 | 83 | .. sourcecode:: python 84 | 85 | >>> coreml_model = coremltools.converters.caffe.convert(('my_caffe_model.caffemodel', 'my_deploy.prototxt')) 86 | 87 | Some models (like Resnet-50) also require a mean image file which is 88 | subtracted from the input image before passing through the network. This 89 | file can also be provided during covnersion: 90 | 91 | .. sourcecode:: python 92 | 93 | >>> coreml_model = coremltools.converters.caffe.convert(('my_caffe_model.caffemodel', 94 | ... 'my_deploy.prototxt', 'mean_image.binaryproto')) 95 | 96 | 97 | 98 | Input and output names used in the interface of the converted Core ML model are inferred from the .prototxt file, 99 | which contains a description of the network architecture. 100 | Input names are read from the input layer definition in the .prototxt. By default, they are of type MultiArray. 101 | Argument "image_input_names" can be used to assign image type to specific inputs. 102 | All the blobs that are "dangling", i.e. 103 | which do not feed as input to any other layer are taken as outputs. The .prototxt file can be modified to specify 104 | custom input and output names. 105 | 106 | The converted Core ML model is of type classifier when the argument "class_labels" is specified. 107 | 108 | Advanced usage with custom classifiers, and images: 109 | 110 | .. sourcecode:: python 111 | 112 | # Mark some inputs as Images 113 | >>> coreml_model = coremltools.converters.caffe.convert('my_caffe_model.caffemodel', 114 | ... image_input_names = 'my_image_input') 115 | 116 | # Export as a classifier with classes from a file 117 | >>> coreml_model = coremltools.converters.caffe.convert('my_caffe_model.caffemodel', 118 | ... image_input_names = 'my_image_input', class_labels = 'labels.txt') 119 | 120 | 121 | Sometimes the converter might return a message about not able to infer input data dimensions. 122 | This happens when the input size information is absent from the deploy.prototxt file. This can be easily provided by editing 123 | the .prototxt in a text editor. Simply add a snippet in the beginning, similar to the following, for each of the inputs to the model: 124 | 125 | .. code-block:: bash 126 | 127 | input: "my_image_input" 128 | input_dim: 1 129 | input_dim: 3 130 | input_dim: 227 131 | input_dim: 227 132 | 133 | Here we have specified an input with dimensions (1,3,227,227), using Caffe's convention, in the order (batch, channel, height, width). 134 | Input name string ("my_image_input") must also match the name of the input (or "bottom", as inputs are known in Caffe) of the first layer in the .prototxt. 135 | 136 | """ 137 | from ...models import MLModel 138 | import tempfile 139 | model_path = tempfile.mktemp() 140 | spec = _export(model_path, model, image_input_names, is_bgr, red_bias, blue_bias, 141 | green_bias, gray_bias, image_scale, class_labels, 142 | predicted_feature_name) 143 | return MLModel(model_path) 144 | 145 | 146 | def _export(filename, model, image_input_names=[], is_bgr=False, 147 | red_bias=0.0, blue_bias=0.0, green_bias=0.0, gray_bias=0.0, 148 | image_scale=1.0, 149 | class_labels=None, predicted_feature_name=None): 150 | try: 151 | from ... import libcaffeconverter 152 | except: 153 | if _sys.platform != 'darwin': 154 | raise RuntimeError('Caffe conversion is only supported on macOS.') 155 | else: 156 | raise RuntimeError('Unable to load Caffe converter library.') 157 | if isinstance(model, basestring): 158 | src_model_path = model 159 | prototxt_path = u'' 160 | binaryproto_path = u'' 161 | elif isinstance(model, tuple): 162 | if len(model) == 3: 163 | src_model_path, prototxt_path, binaryproto_path = model 164 | else: 165 | src_model_path, prototxt_path = model 166 | binaryproto_path = u'' 167 | 168 | 169 | if isinstance(image_input_names, basestring): 170 | image_input_names = [image_input_names] 171 | if predicted_feature_name is None: 172 | predicted_feature_name = u'classLabel' 173 | if class_labels is None: 174 | class_labels = u'' 175 | libcaffeconverter._convert_to_file(src_model_path, 176 | filename, 177 | set(image_input_names), 178 | is_bgr, 179 | red_bias, 180 | blue_bias, 181 | green_bias, 182 | gray_bias, 183 | image_scale, 184 | prototxt_path, 185 | binaryproto_path, 186 | class_labels, 187 | predicted_feature_name 188 | ) 189 | -------------------------------------------------------------------------------- /converters/keras/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ..._deps import HAS_KERAS_TF as _HAS_KERAS_TF 7 | from ..._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF 8 | 9 | if _HAS_KERAS_TF or _HAS_KERAS2_TF: 10 | import keras as _keras 11 | import logging as _logging 12 | from ._keras_converter import convert 13 | if _keras.backend.backend() != 'tensorflow': 14 | _HAS_KERAS_TF = False 15 | _HAS_KERAS2_TF = False 16 | _logging.warn('Currently, only Keras models with TensorFlow backend can be converted to CoreML.') 17 | 18 | # if _HAS_KERAS_TF: 19 | # from ._keras_converter import convert 20 | # 21 | # if _HAS_KERAS2_TF: 22 | # from ._keras2_converter import convert 23 | -------------------------------------------------------------------------------- /converters/keras/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | 7 | def raise_error_unsupported_categorical_option(option_name, option_value, layer_type, layer_name): 8 | """ 9 | Raise an error if an option is not supported. 10 | """ 11 | raise RuntimeError("Unsupported option %s=%s in layer %s(%s)" % (option_name, option_value, 12 | layer_type, layer_name)) 13 | 14 | def raise_error_unsupported_option(option, layer_type, layer_name): 15 | """ 16 | Raise an error if an option is not supported. 17 | """ 18 | raise RuntimeError("Unsupported option =%s in layer %s(%s)" % (option, 19 | layer_type, layer_name)) 20 | 21 | def raise_error_unsupported_scenario(message, layer_type, layer_name): 22 | """ 23 | Raise an error if an scenario is not supported. 24 | """ 25 | raise RuntimeError("Unsupported scenario '%s' in layer %s(%s)" % (message, 26 | layer_type, layer_name)) 27 | -------------------------------------------------------------------------------- /converters/libsvm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import _libsvm_converter 7 | import _libsvm_util 8 | 9 | from ..._deps import HAS_LIBSVM as _HAS_LIBSVM 10 | 11 | if _HAS_LIBSVM: 12 | import svm as _libsvm 13 | 14 | 15 | def convert(model, input_names='input', target_name='target', 16 | probability='classProbability', input_length='auto'): 17 | """ 18 | Convert a LIBSVM model to Core ML format. 19 | 20 | Parameters 21 | ---------- 22 | 23 | model: a libsvm model (C-SVC, nu-SVC, epsilon-SVR, or nu-SVR) 24 | or string path to a saved model. 25 | 26 | input_names: str | [str] 27 | Name of the input column(s). 28 | If a single string is used (the default) the input will be an array. The 29 | length of the array will be inferred from the model, this can be overridden 30 | using the 'input_lenght' parameter. 31 | 32 | target: str 33 | Name of the output column. 34 | 35 | probability: str 36 | Name of the output class probability column. 37 | Only used for C-SVC and nu-SVC that have been trained with probability 38 | estimates enabled. 39 | 40 | input_length: int 41 | Set the length of the input array. 42 | This parameter should only be used when the input is an array (i.e. when 43 | 'input_name' is a string). 44 | 45 | Returns 46 | ------- 47 | model: MLModel 48 | Model in Core ML format. 49 | 50 | Examples 51 | -------- 52 | .. sourcecode:: python 53 | 54 | # Make a LIBSVM model 55 | >>> import svmutil 56 | >>> problem = svmutil.svm_problem([0,0,1,1], [[0,1], [1,1], [8,9], [7,7]]) 57 | >>> libsvm_model = svmutil.svm_train(problem, svmutil.svm_parameter()) 58 | 59 | # Convert using default input and output names 60 | >>> import coremltools 61 | >>> coreml_model = coremltools.converters.libsvm.convert(libsvm_model) 62 | 63 | # Save the CoreML model to a file. 64 | >>> coreml_model.save('./my_model.mlmodel') 65 | 66 | # Convert using user specified input names 67 | >>> coreml_model = coremltools.converters.libsvm.convert(libsvm_model, input_names=['x', 'y']) 68 | """ 69 | if not(_HAS_LIBSVM): 70 | raise RuntimeError('libsvm not found. libsvm conversion API is disabled.') 71 | 72 | if isinstance(model, (str, unicode)): 73 | libsvm_model = _libsvm_util.load_model(model) 74 | else: 75 | libsvm_model = model 76 | if not isinstance(libsvm_model, _libsvm.svm_model): 77 | raise TypeError("Expected 'model' of type '%s' (got %s)" % (_libsvm.svm_model, type(libsvm_model))) 78 | 79 | if not isinstance(target_name, (str, unicode)): 80 | raise TypeError("Expected 'target_name' of type str (got %s)" % type(libsvm_model)) 81 | 82 | if input_length != 'auto' and not isinstance(input_length, int): 83 | raise TypeError("Expected 'input_lenght' of type int, got %s" % type(input_length)) 84 | 85 | if input_length != 'auto' and not isinstance(input_names, (str, unicode)): 86 | raise ValueError("'input_length' should not be used unless the input will be only one array.") 87 | 88 | if not isinstance(probability, (str, unicode)): 89 | raise TypeError("Expected 'probability' of type str (got %s)" % type(probability)) 90 | 91 | return _libsvm_converter.convert(libsvm_model, input_names, target_name, input_length, probability) 92 | -------------------------------------------------------------------------------- /converters/libsvm/_libsvm_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ... import SPECIFICATION_VERSION 7 | from ..._deps import HAS_LIBSVM 8 | 9 | 10 | def _infer_min_num_features(model): 11 | # find the largest index of all the support vectors 12 | max_index = 0 13 | for i in range(model.l): 14 | j = 0 15 | while model.SV[i][j].index != -1: 16 | cur_last_index = model.SV[i][j].index 17 | j += 1 18 | if cur_last_index > max_index: 19 | max_index = cur_last_index 20 | return max_index 21 | 22 | 23 | def convert(libsvm_model, feature_names, target, input_length, probability): 24 | """Convert a svm model to the protobuf spec. 25 | 26 | This currently supports: 27 | * C-SVC 28 | * nu-SVC 29 | * Epsilon-SVR 30 | * nu-SVR 31 | 32 | Parameters 33 | ---------- 34 | model_path: libsvm_model 35 | Libsvm representation of the model. 36 | 37 | feature_names : [str] | str 38 | Names of each of the features. 39 | 40 | target: str 41 | Name of the predicted class column. 42 | 43 | probability: str 44 | Name of the class probability column. Only used for C-SVC and nu-SVC. 45 | 46 | Returns 47 | ------- 48 | model_spec: An object of type Model_pb. 49 | Protobuf representation of the model 50 | """ 51 | if not(HAS_LIBSVM): 52 | raise RuntimeError('libsvm not found. libsvm conversion API is disabled.') 53 | 54 | import svm as libsvm 55 | from ...proto import SVM_pb2 56 | from ...proto import Model_pb2 57 | from ...proto import FeatureTypes_pb2 58 | from ...models import MLModel 59 | 60 | svm_type_enum = libsvm_model.param.svm_type 61 | 62 | # Create the spec 63 | export_spec = Model_pb2.Model() 64 | export_spec.specificationVersion = SPECIFICATION_VERSION 65 | 66 | if(svm_type_enum == libsvm.EPSILON_SVR or svm_type_enum == libsvm.NU_SVR): 67 | svm = export_spec.supportVectorRegressor 68 | else: 69 | svm = export_spec.supportVectorClassifier 70 | 71 | # Set the features names 72 | inferred_length = _infer_min_num_features(libsvm_model) 73 | if isinstance(feature_names, str): 74 | # input will be a single array 75 | if input_length == 'auto': 76 | print("[WARNING] Infering an input lenght of %d. If this is not correct," 77 | " use the 'input_length' parameter." % inferred_length) 78 | input_length = inferred_length 79 | elif inferred_length > input_length: 80 | raise ValueError("An input length of %d was given, but the model requires an" 81 | " input of at least %d." % (input_length, inferred_length)) 82 | 83 | input = export_spec.description.input.add() 84 | input.name = feature_names 85 | input.type.multiArrayType.shape.append(input_length) 86 | input.type.multiArrayType.dataType = Model_pb2.ArrayFeatureType.DOUBLE 87 | 88 | else: 89 | # input will be a series of doubles 90 | if inferred_length > len(feature_names): 91 | raise ValueError("%d feature names were given, but the model requires at" 92 | " least %d features." % (len(feature_names), inferred_length)) 93 | for cur_input_name in feature_names: 94 | input = export_spec.description.input.add() 95 | input.name = cur_input_name 96 | input.type.doubleType.MergeFromString('') 97 | 98 | # Set target 99 | output = export_spec.description.output.add() 100 | output.name = target 101 | 102 | # Set the interface types 103 | if(svm_type_enum == libsvm.EPSILON_SVR or svm_type_enum == libsvm.NU_SVR): 104 | export_spec.description.predictedFeatureName = target 105 | output.type.doubleType.MergeFromString('') 106 | nr_class = 2 107 | 108 | elif(svm_type_enum == libsvm.C_SVC or svm_type_enum == libsvm.NU_SVC): 109 | export_spec.description.predictedFeatureName = target 110 | output.type.int64Type.MergeFromString('') 111 | 112 | nr_class = len(libsvm_model.get_labels()) 113 | 114 | for i in range(nr_class): 115 | svm.numberOfSupportVectorsPerClass.append(libsvm_model.nSV[i]) 116 | svm.int64ClassLabels.vector.append(libsvm_model.label[i]) 117 | 118 | if probability and bool(libsvm_model.probA): 119 | output = export_spec.description.output.add() 120 | output.name = probability 121 | output.type.dictionaryType.MergeFromString('') 122 | export_spec.description.predictedProbabilitiesName = probability 123 | 124 | else: 125 | raise ValueError('Only the following SVM types are supported: C_SVC, NU_SVC, EPSILON_SVR, NU_SVR') 126 | 127 | if(libsvm_model.param.kernel_type == libsvm.LINEAR): 128 | svm.kernel.linearKernel.MergeFromString('') # Hack to set kernel to an empty type 129 | elif(libsvm_model.param.kernel_type == libsvm.RBF): 130 | svm.kernel.rbfKernel.gamma = libsvm_model.param.gamma 131 | elif(libsvm_model.param.kernel_type == libsvm.POLY): 132 | svm.kernel.polyKernel.degree = libsvm_model.param.degree 133 | svm.kernel.polyKernel.c = libsvm_model.param.coef0 134 | svm.kernel.polyKernel.gamma = libsvm_model.param.gamma 135 | elif(libsvm_model.param.kernel_type == libsvm.SIGMOID): 136 | svm.kernel.sigmoidKernel.c = libsvm_model.param.coef0 137 | svm.kernel.sigmoidKernel.gamma = libsvm_model.param.gamma 138 | else: 139 | raise ValueError('Unsupported kernel. The following kernel are supported: linear, RBF, polynomial and sigmoid.') 140 | 141 | # set rho 142 | # also set probA/ProbB only for SVC 143 | if(svm_type_enum == libsvm.C_SVC or svm_type_enum == libsvm.NU_SVC): 144 | num_class_pairs = nr_class * (nr_class-1)//2 145 | for i in range(num_class_pairs): 146 | svm.rho.append(libsvm_model.rho[i]) 147 | if(bool(libsvm_model.probA) and bool(libsvm_model.probB)): 148 | for i in range(num_class_pairs): 149 | svm.probA.append(libsvm_model.probA[i]) 150 | svm.probB.append(libsvm_model.probB[i]) 151 | else: 152 | svm.rho = libsvm_model.rho[0] 153 | 154 | # set coefficents 155 | if(svm_type_enum == libsvm.C_SVC or svm_type_enum == libsvm.NU_SVC): 156 | for _ in range(nr_class - 1): 157 | svm.coefficients.add() 158 | for i in range(libsvm_model.l): 159 | for j in range(nr_class - 1): 160 | svm.coefficients[j].alpha.append(libsvm_model.sv_coef[j][i]) 161 | else: 162 | for i in range(libsvm_model.l): 163 | svm.coefficients.alpha.append(libsvm_model.sv_coef[0][i]) 164 | 165 | # set support vectors 166 | for i in range(libsvm_model.l): 167 | j = 0 168 | cur_support_vector = svm.sparseSupportVectors.vectors.add() 169 | while libsvm_model.SV[i][j].index != -1: 170 | cur_node = cur_support_vector.nodes.add() 171 | cur_node.index = libsvm_model.SV[i][j].index 172 | cur_node.value = libsvm_model.SV[i][j].value 173 | j += 1 174 | 175 | return MLModel(export_spec) 176 | -------------------------------------------------------------------------------- /converters/libsvm/_libsvm_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ..._deps import HAS_LIBSVM 7 | 8 | def load_model(model_path): 9 | """Load a libsvm model from a path on disk. 10 | 11 | This currently supports: 12 | * C-SVC 13 | * NU-SVC 14 | * Epsilon-SVR 15 | * NU-SVR 16 | 17 | Parameters 18 | ---------- 19 | model_path: str 20 | Path on disk where the libsvm model representaiton is. 21 | 22 | Returns 23 | ------- 24 | model: libsvm_model 25 | A model of the libsvm format. 26 | """ 27 | if not(HAS_LIBSVM): 28 | raise RuntimeError('libsvm not found. libsvm conversion API is disabled.') 29 | 30 | from svmutil import svm_load_model # From libsvm 31 | import os 32 | if (not os.path.exists(model_path)): 33 | raise IOError("Expected a valid file path. %s does not exist" % model_path) 34 | return svm_load_model(model_path) 35 | -------------------------------------------------------------------------------- /converters/sklearn/_LinearSVC.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 7 | from ...models import MLModel as _MLModel 8 | 9 | if _HAS_SKLEARN: 10 | from sklearn.svm import LinearSVC as _LinearSVC 11 | sklearn_class = _LinearSVC 12 | from . import _sklearn_util 13 | 14 | from . import _logistic_regression 15 | model_type = 'classifier' 16 | 17 | def convert(model, feature_names, target): 18 | """Convert a LinearSVC model to the protobuf spec. 19 | Parameters 20 | ---------- 21 | model: LinearSVC 22 | A trained LinearSVC model. 23 | 24 | feature_names: [str] 25 | Name of the input columns. 26 | 27 | target: str 28 | Name of the output column. 29 | 30 | Returns 31 | ------- 32 | model_spec: An object of type Model_pb. 33 | Protobuf representation of the model 34 | """ 35 | if not(_HAS_SKLEARN): 36 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 37 | 38 | _sklearn_util.check_expected_type(model, _LinearSVC) 39 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 40 | 41 | return _MLModel(_logistic_regression._convert(model, feature_names, target)) 42 | 43 | def supports_output_scores(model): 44 | return True 45 | 46 | def get_output_classes(model): 47 | return _logistic_regression.get_output_classes(model) 48 | 49 | def get_input_dimension(model): 50 | return _logistic_regression.get_input_dimension(model) 51 | -------------------------------------------------------------------------------- /converters/sklearn/_LinearSVR.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 7 | from ...models import MLModel as _MLModel 8 | 9 | if _HAS_SKLEARN: 10 | from sklearn.svm import LinearSVR as _LinearSVR 11 | import sklearn 12 | import _sklearn_util 13 | sklearn_class = sklearn.svm.LinearSVR 14 | 15 | from . import _linear_regression 16 | 17 | model_type = 'regressor' 18 | 19 | def convert(model, features, target): 20 | """Convert a LinearSVR model to the protobuf spec. 21 | Parameters 22 | ---------- 23 | model: LinearSVR 24 | A trained LinearSVR model. 25 | 26 | feature_names: [str] 27 | Name of the input columns. 28 | 29 | target: str 30 | Name of the output column. 31 | 32 | Returns 33 | ------- 34 | model_spec: An object of type Model_pb. 35 | Protobuf representation of the model 36 | """ 37 | if not(_HAS_SKLEARN): 38 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 39 | 40 | # Check the scikit learn model 41 | _sklearn_util.check_expected_type(model, _LinearSVR) 42 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 43 | 44 | return _MLModel(_linear_regression._convert(model, features, target)) 45 | 46 | 47 | def get_input_dimension(model): 48 | return _linear_regression.get_input_dimension(model) 49 | -------------------------------------------------------------------------------- /converters/sklearn/_NuSVC.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import _SVC as _SVC 7 | 8 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 9 | 10 | if _HAS_SKLEARN: 11 | from ._sklearn_util import check_fitted 12 | import _sklearn_util 13 | from sklearn.svm import NuSVC as _NuSVC 14 | sklearn_class = _NuSVC 15 | 16 | model_type = "classifier" 17 | 18 | def convert(model, feature_names, target): 19 | """Convert a Nu-Support Vector Classification (NuSVC) model to the protobuf spec. 20 | Parameters 21 | ---------- 22 | model: NuSVC 23 | A trained NuSVC encoder model. 24 | 25 | feature_names: [str], optional (default=None) 26 | Name of the input columns. 27 | 28 | target: str, optional (default=None) 29 | Name of the output column. 30 | 31 | Returns 32 | ------- 33 | model_spec: An object of type Model_pb. 34 | Protobuf representation of the model 35 | """ 36 | 37 | if not(_HAS_SKLEARN): 38 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 39 | 40 | _sklearn_util.check_expected_type(model, _NuSVC) 41 | return _SVC.convert(model, feature_names, target) 42 | 43 | def supports_output_scores(model): 44 | return _SVC.supports_output_scores(model) 45 | 46 | def get_output_classes(model): 47 | if not(_HAS_SKLEARN): 48 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 49 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 50 | return _SVC.get_output_classes(model) 51 | 52 | def get_input_dimension(model): 53 | if not(_HAS_SKLEARN): 54 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 55 | 56 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 57 | return _SVC.get_input_dimension(model) 58 | 59 | -------------------------------------------------------------------------------- /converters/sklearn/_NuSVR.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import _SVR as _SVR 7 | 8 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 9 | from ...models import MLModel as _MLModel 10 | 11 | if _HAS_SKLEARN: 12 | from ._sklearn_util import check_fitted 13 | from sklearn.svm import NuSVR as _NuSVR 14 | import _sklearn_util 15 | sklearn_class = _NuSVR 16 | 17 | model_type = 'regressor' 18 | 19 | 20 | def convert(model, feature_names, target): 21 | """Convert a Nu Support Vector Regression (NuSVR) model to the protobuf spec. 22 | Parameters 23 | ---------- 24 | model: NuSVR 25 | A trained NuSVR encoder model. 26 | 27 | feature_names: [str] 28 | Name of the input columns. 29 | 30 | target: str 31 | Name of the output column. 32 | 33 | Returns 34 | ------- 35 | model_spec: An object of type Model_pb. 36 | Protobuf representation of the model 37 | """ 38 | if not(_HAS_SKLEARN): 39 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 40 | 41 | _sklearn_util.check_expected_type(model, _NuSVR) 42 | return _SVR.convert(model, feature_names, target) 43 | 44 | def get_input_dimension(model): 45 | if not(_HAS_SKLEARN): 46 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 47 | 48 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 49 | return _SVR.get_input_dimension(model) 50 | 51 | -------------------------------------------------------------------------------- /converters/sklearn/_SVC.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ...proto import Model_pb2 as _Model_pb2 7 | from ...proto import SVM_pb2 as _SVM_pb2 8 | from ... import SPECIFICATION_VERSION 9 | from ...models._interface_management import set_classifier_interface_params 10 | 11 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 12 | from ...models import MLModel as _MLModel 13 | 14 | if _HAS_SKLEARN: 15 | from ._sklearn_util import check_fitted 16 | from sklearn.svm import SVC as _SVC 17 | sklearn_class = _SVC 18 | 19 | model_type = 'classifier' 20 | 21 | 22 | from _svm_common import _set_kernel 23 | 24 | def _generate_base_svm_classifier_spec(model): 25 | """ 26 | Takes an SVM classifier produces a starting spec using the parts. that are 27 | shared between all SVMs. 28 | """ 29 | if not(_HAS_SKLEARN): 30 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 31 | 32 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 33 | 34 | spec = _Model_pb2.Model() 35 | spec.specificationVersion = SPECIFICATION_VERSION 36 | svm = spec.supportVectorClassifier 37 | 38 | _set_kernel(model, svm) 39 | 40 | for cur_rho in model.intercept_: 41 | if(len(model.classes_) == 2): 42 | # For some reason Scikit Learn doesn't negate for binary classification 43 | svm.rho.append(cur_rho) 44 | else: 45 | svm.rho.append(-cur_rho) 46 | 47 | for i in range(len(model._dual_coef_)): 48 | svm.coefficients.add() 49 | for cur_alpha in model._dual_coef_[i]: 50 | svm.coefficients[i].alpha.append(cur_alpha) 51 | 52 | for cur_src_vector in model.support_vectors_: 53 | cur_dest_vector = svm.denseSupportVectors.vectors.add() 54 | for i in cur_src_vector: 55 | cur_dest_vector.values.append(i) 56 | return spec 57 | 58 | def convert(model, feature_names, target): 59 | """Convert a Support Vector Classtion (SVC) model to the protobuf spec. 60 | Parameters 61 | ---------- 62 | model: SVC 63 | A trained SVC encoder model. 64 | 65 | feature_names: [str], optional (default=None) 66 | Name of the input columns. 67 | 68 | target: str, optional (default=None) 69 | Name of the output column. 70 | 71 | Returns 72 | ------- 73 | model_spec: An object of type Model_pb. 74 | Protobuf representation of the model 75 | """ 76 | if not(_HAS_SKLEARN): 77 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 78 | spec = _generate_base_svm_classifier_spec(model) 79 | spec = set_classifier_interface_params(spec, feature_names, model.classes_, 'supportVectorClassifier', output_features = target) 80 | 81 | svm = spec.supportVectorClassifier 82 | for i in model.n_support_: 83 | svm.numberOfSupportVectorsPerClass.append(int(i)) 84 | 85 | if len(model.probA_) != 0 and len(model.classes_) == 2: 86 | print("[WARNING] Scikit Learn uses a technique to normalize pairwise probabilities even for binary classification. " 87 | "This can cause differences in predicted probabilities, usually less than 0.5%.") 88 | 89 | # If this is an empty list, then model.probA_ will be an empty list. 90 | if len(model.probA_) != 0: 91 | for i in model.probA_: 92 | svm.probA.append(i) 93 | 94 | for i in model.probB_: 95 | svm.probB.append(i) 96 | 97 | return _MLModel(spec) 98 | 99 | def supports_output_scores(model): 100 | return (len(model.probA_) != 0) 101 | 102 | def get_output_classes(model): 103 | if not(_HAS_SKLEARN): 104 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 105 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 106 | return list(model.classes_) 107 | 108 | def get_input_dimension(model): 109 | if not(_HAS_SKLEARN): 110 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 111 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 112 | return len(model.support_vectors_[0]) 113 | -------------------------------------------------------------------------------- /converters/sklearn/_SVR.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ...proto import Model_pb2 as _Model_pb2 7 | from ...models._interface_management import set_regressor_interface_params 8 | from ... import SPECIFICATION_VERSION 9 | 10 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 11 | from ...models import MLModel as _MLModel 12 | 13 | if _HAS_SKLEARN: 14 | from ._sklearn_util import check_fitted 15 | from sklearn.svm import SVR as _SVR 16 | import _sklearn_util 17 | sklearn_class = _SVR 18 | 19 | model_type = 'regressor' 20 | 21 | from _svm_common import _set_kernel 22 | 23 | def _generate_base_svm_regression_spec(model): 24 | """ 25 | Takes an SVM regression model produces a starting spec using the parts. 26 | that are shared between all SVMs. 27 | """ 28 | if not(_HAS_SKLEARN): 29 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 30 | 31 | spec = _Model_pb2.Model() 32 | spec.specificationVersion = SPECIFICATION_VERSION 33 | svm = spec.supportVectorRegressor 34 | 35 | _set_kernel(model, svm) 36 | 37 | svm.rho = -model.intercept_[0] 38 | for i in range(len(model._dual_coef_)): 39 | for cur_alpha in model._dual_coef_[i]: 40 | svm.coefficients.alpha.append(cur_alpha) 41 | 42 | for cur_src_vector in model.support_vectors_: 43 | cur_dest_vector = svm.denseSupportVectors.vectors.add() 44 | for i in cur_src_vector: 45 | cur_dest_vector.values.append(i) 46 | return spec 47 | 48 | def convert(model, features, target): 49 | """Convert a Support Vector Regressor (SVR) model to the protobuf spec. 50 | Parameters 51 | ---------- 52 | model: SVR 53 | A trained SVR encoder model. 54 | 55 | feature_names: [str] 56 | Name of the input columns. 57 | 58 | target: str 59 | Name of the output column. 60 | 61 | Returns 62 | ------- 63 | model_spec: An object of type Model_pb. 64 | Protobuf representation of the model 65 | """ 66 | spec = _generate_base_svm_regression_spec(model) 67 | spec = set_regressor_interface_params(spec, features, target) 68 | return _MLModel(spec) 69 | 70 | def get_input_dimension(model): 71 | if not(_HAS_SKLEARN): 72 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 73 | check_fitted(model, lambda m: hasattr(m, 'support_vectors_')) 74 | return len(model.support_vectors_[0]) 75 | -------------------------------------------------------------------------------- /converters/sklearn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | # A single function to manage the importing. 7 | 8 | from _converter import convert 9 | 10 | -------------------------------------------------------------------------------- /converters/sklearn/_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | """ 7 | Defines the primary function for converting scikit-learn models. 8 | """ 9 | 10 | def convert(sk_obj, input_features = None, 11 | output_feature_names = None): 12 | """ 13 | Convert scikit-learn pipeline, classifier, or regressor to Core ML format. 14 | 15 | Parameters 16 | ---------- 17 | sk_obj: model | [model] of scikit-learn format. 18 | Scikit learn model(s) to convert to a Core ML format. 19 | 20 | The input model may be a single scikit learn model, a scikit learn 21 | pipeline model, or a list of scikit learn models. 22 | 23 | Currently supported scikit learn models are: 24 | 25 | - Linear and Logistic Regression 26 | - LinearSVC and LinearSVR 27 | - SVC and SVR 28 | - NuSVC and NuSVR 29 | - Gradient Boosting Classifier and Regressor 30 | - Decision Tree Classifier and Regressor 31 | - Random Forest Classifier and Regressor 32 | - Normalizer 33 | - Imputer 34 | - Standard Scaler 35 | - DictVectorizer 36 | - One Hot Encoder 37 | 38 | The input model, or the last model in a pipeline or list of models, 39 | determines whether this is exposed as a Transformer, Regressor, 40 | or Classifier. 41 | 42 | Note that there may not be a one-to-one correspondence between scikit 43 | learn models and which Core ML models are used to represent them. For 44 | example, many scikit learn models are embedded in a pipeline to handle 45 | processing of input features. 46 | 47 | 48 | input_features: str | dict | list 49 | 50 | Optional name(s) that can be given to the inputs of the scikit-learn 51 | model. Defaults to 'input'. 52 | 53 | Input features can be specified in a number of forms. 54 | 55 | - Single string: In this case, the input is assumed to be a single 56 | array, with the number of dimensions set using num_dimensions. 57 | 58 | - List of strings: In this case, the overall input dimentions to the 59 | scikit-learn model is assumed to be the length of the list. If 60 | neighboring names are identical, they are assumed to be an input 61 | array of that length. For example: 62 | 63 | ["a", "b", "c"] 64 | 65 | resolves to 66 | 67 | [("a", Double), ("b", Double), ("c", Double)]. 68 | 69 | And: 70 | 71 | ["a", "a", "b"] 72 | 73 | resolves to 74 | 75 | [("a", Array(2)), ("b", Double)]. 76 | 77 | - Dictionary: Where the keys are the names and the indices or ranges of 78 | feature indices. 79 | 80 | In this case, it's presented as a mapping from keys to indices or 81 | ranges of contiguous indices. For example, 82 | 83 | {"a" : 0, "b" : [2,3], "c" : 1} 84 | 85 | Resolves to 86 | 87 | [("a", Double), ("c", Double), ("b", Array(2))]. 88 | 89 | Note that the ordering is determined by the indices. 90 | 91 | - List of tuples of the form `(name, datatype)`. Here, `name` is the 92 | name of the exposed feature, and `datatype` is an instance of 93 | `String`, `Double`, `Int64`, `Array`, or `Dictionary`. 94 | 95 | output_feature_names: string or list of strings 96 | Optional name(s) that can be given to the inputs of the scikit-learn 97 | model. 98 | 99 | The output_feature_names is interpreted according to the model type: 100 | 101 | - If the scikit-learn model is a transformer, it is the name of the 102 | array feature output by the final sequence of the transformer 103 | (defaults to "output"). 104 | - If it is a classifier, it should be a 2-tuple of names giving the top 105 | class prediction and the array of scores for each class (defaults to 106 | "classLabel" and "classScores"). 107 | - If it is a regressor, it should give the name of the prediction value 108 | (defaults to "prediction"). 109 | 110 | Returns 111 | ------- 112 | model:MLModel 113 | Returns an MLModel instance representing a Core ML model. 114 | 115 | Examples 116 | -------- 117 | .. sourcecode:: python 118 | 119 | >>> from sklearn.linear_model import LinearRegression 120 | >>> import pandas as pd 121 | 122 | # Load data 123 | >>> data = pd.read_csv('houses.csv') 124 | 125 | # Train a model 126 | >>> model = LinearRegression() 127 | >>> model.fit(data[["bedroom", "bath", "size"]], data["price"]) 128 | 129 | # Convert and save the scikit-learn model 130 | >>> import coremltools 131 | >>> coreml_model = coremltools.converters.sklearn.convert(model, 132 | ["bedroom", "bath", "size"], 133 | "price") 134 | >>> coreml_model.save('HousePricer.mlmodel') 135 | """ 136 | 137 | # This function is just a thin wrapper around the internal converter so 138 | # that sklearn isn't actually imported unless this function is called 139 | from ...models import MLModel 140 | 141 | # NOTE: Providing user-defined class labels will be enabled when 142 | # several issues with the ordering of the classes are worked out. For now, 143 | # to use custom class labels, directly import the internal function below. 144 | from _converter_internal import _convert_sklearn_model 145 | spec = _convert_sklearn_model( 146 | sk_obj, input_features, output_feature_names, class_labels = None) 147 | 148 | return MLModel(spec) 149 | 150 | -------------------------------------------------------------------------------- /converters/sklearn/_decision_tree_classifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble 7 | 8 | from ..._deps import HAS_SKLEARN 9 | from ...models import MLModel as _MLModel 10 | 11 | if HAS_SKLEARN: 12 | import sklearn.tree as _tree 13 | import _sklearn_util 14 | 15 | model_type = 'classifier' 16 | sklearn_class = _tree.DecisionTreeClassifier 17 | 18 | def convert(model, input_name, output_features): 19 | """Convert a decision tree model to protobuf format. 20 | 21 | Parameters 22 | ---------- 23 | decision_tree : DecisionTreeClassifier 24 | A trained scikit-learn tree model. 25 | 26 | input_name: str 27 | Name of the input columns. 28 | 29 | output_name: str 30 | Name of the output columns. 31 | 32 | Returns 33 | ------- 34 | model_spec: An object of type Model_pb. 35 | Protobuf representation of the model 36 | """ 37 | if not(HAS_SKLEARN): 38 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 39 | 40 | _sklearn_util.check_expected_type(model, _tree.DecisionTreeClassifier) 41 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'tree_') and model.tree_ is not None) 42 | 43 | return _MLModel(convert_tree_ensemble(model, input_name, output_features, 44 | mode = 'classifier', 45 | class_labels = model.classes_)) 46 | 47 | def supports_output_scores(model): 48 | return True 49 | 50 | def get_output_classes(model): 51 | return list(model.classes_) 52 | 53 | def get_input_dimension(model): 54 | return model.n_features_ 55 | 56 | 57 | -------------------------------------------------------------------------------- /converters/sklearn/_decision_tree_regressor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble 7 | 8 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 9 | from ...models import MLModel as _MLModel 10 | 11 | if _HAS_SKLEARN: 12 | import sklearn.tree as _tree 13 | import _sklearn_util 14 | 15 | model_type = 'regressor' 16 | sklearn_class = _tree.DecisionTreeRegressor 17 | 18 | def convert(model, feature_names, target): 19 | """Convert a decision tree model to protobuf format. 20 | 21 | Parameters 22 | ---------- 23 | decision_tree : DecisionTreeRegressor 24 | A trained scikit-learn tree model. 25 | 26 | feature_names: [str] 27 | Name of the input columns. 28 | 29 | target: str 30 | Name of the output column. 31 | 32 | Returns 33 | ------- 34 | model_spec: An object of type Model_pb. 35 | Protobuf representation of the model 36 | """ 37 | if not(_HAS_SKLEARN): 38 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 39 | 40 | _sklearn_util.check_expected_type(model, _tree.DecisionTreeRegressor) 41 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'tree_') and model.tree_ is not None) 42 | return _MLModel(_convert_tree_ensemble(model, feature_names, target)) 43 | 44 | def get_input_dimension(model): 45 | return model.n_features_ 46 | 47 | 48 | -------------------------------------------------------------------------------- /converters/sklearn/_dict_vectorizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import _sklearn_util 7 | 8 | from ... import SPECIFICATION_VERSION 9 | from ...models._interface_management import set_transform_interface_params 10 | from ...proto import Model_pb2 as _Model_pb2 11 | from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2 12 | from ...models._feature_management import process_or_validate_features 13 | from ...models.feature_vectorizer import create_feature_vectorizer 14 | from ...models import MLModel as _MLModel 15 | 16 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 17 | if _HAS_SKLEARN: 18 | from sklearn.feature_extraction import DictVectorizer 19 | sklearn_class = DictVectorizer 20 | 21 | from ...models import datatypes 22 | from ...models.pipeline import Pipeline 23 | 24 | model_type = 'transformer' 25 | 26 | 27 | def convert(model, input_features, output_features): 28 | """Convert a _imputer model to the protobuf spec. 29 | 30 | Parameters 31 | ---------- 32 | model: Imputer 33 | A trained Imputer model. 34 | 35 | input_features: str 36 | Name of the input column. 37 | 38 | output_features: str 39 | Name of the output column. 40 | 41 | Returns 42 | ------- 43 | model_spec: An object of type Model_pb. 44 | Protobuf representation of the model 45 | """ 46 | 47 | _INTERMEDIATE_FEATURE_NAME = "__sparse_vector_features__" 48 | 49 | n_dimensions = len(model.feature_names_) 50 | input_features = process_or_validate_features(input_features) 51 | 52 | # Ensure that the output_features are also solid. 53 | output_features = process_or_validate_features(output_features, n_dimensions) 54 | 55 | # The DictVectorizer in the framework outputs a sparse dictionary 56 | # of index to value due to other considerations, but we are expecting 57 | # the output of this to be a dense feature vector. To make that happen, 58 | # put a feature_vectorizer immediately after the dict vectorizer. 59 | pline = Pipeline(input_features, output_features) 60 | 61 | # Set the basic model parameters of the dict vectorizer component. 62 | dv_spec = _Model_pb2.Model() 63 | dv_spec.specificationVersion = SPECIFICATION_VERSION 64 | 65 | # Set up the dict vectorizer parameters 66 | tr_spec = dv_spec.dictVectorizer 67 | is_str = None 68 | for feature_name in model.feature_names_: 69 | if isinstance(feature_name, (str, unicode)): 70 | if is_str == False: 71 | raise ValueError("Mapping of DictVectorizer mixes int and str types.") 72 | 73 | tr_spec.stringToIndex.vector.append(feature_name) 74 | is_str == True 75 | 76 | if isinstance(feature_name, (int, long)): 77 | if is_str == True: 78 | raise ValueError("Mapping of DictVectorizer mixes int and str types.") 79 | 80 | tr_spec.int64ToIndex.vector.append(feature_name) 81 | is_str == False 82 | 83 | intermediate_features = [(_INTERMEDIATE_FEATURE_NAME, 84 | datatypes.Dictionary(key_type = int))] 85 | 86 | # Set the interface for the dict vectorizer with the input and the 87 | # intermediate output 88 | set_transform_interface_params( 89 | dv_spec, input_features, intermediate_features) 90 | 91 | pline.add_model(dv_spec) 92 | 93 | # Follow the dict vectorizer by a feature_vectorizer to change the sparse 94 | # output layer into a dense vector as expected. 95 | fvec, _num_out_dim = create_feature_vectorizer(intermediate_features, 96 | output_features[0][0], {"__sparse_vector_features__" : n_dimensions}) 97 | 98 | pline.add_model(fvec) 99 | 100 | return _MLModel(pline.spec) 101 | 102 | def update_dimension(m, current_num_dimensions): 103 | return len(m.feature_names_) 104 | 105 | def get_input_dimension(m): 106 | return None 107 | 108 | def get_input_feature_names(m): 109 | return m.feature_names_ 110 | -------------------------------------------------------------------------------- /converters/sklearn/_gradient_boosting_classifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble 7 | from _tree_ensemble import get_input_dimension 8 | 9 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 10 | from ...models import MLModel as _MLModel 11 | 12 | if _HAS_SKLEARN: 13 | import sklearn.ensemble as _ensemble 14 | import _sklearn_util 15 | sklearn_class = _ensemble.GradientBoostingClassifier 16 | 17 | model_type = 'classifier' 18 | 19 | def convert(model, feature_names, target): 20 | """Convert a boosted tree model to protobuf format. 21 | 22 | Parameters 23 | ---------- 24 | decision_tree : GradientBoostingClassifier 25 | A trained scikit-learn tree model. 26 | 27 | feature_names: [str] 28 | Name of the input columns. 29 | 30 | target: str 31 | Name of the output column. 32 | 33 | Returns 34 | ------- 35 | model_spec: An object of type Model_pb. 36 | Protobuf representation of the model 37 | """ 38 | if not(_HAS_SKLEARN): 39 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 40 | 41 | _sklearn_util.check_expected_type(model, _ensemble.GradientBoostingClassifier) 42 | def is_gbr_model(m): 43 | if len(m.estimators_) == 0: 44 | return False 45 | if hasattr(m, 'estimators_') and m.estimators_ is not None: 46 | for t in m.estimators_.flatten(): 47 | if not hasattr(t, 'tree_') or t.tree_ is None: 48 | return False 49 | return True 50 | else: 51 | return False 52 | _sklearn_util.check_fitted(model, is_gbr_model) 53 | post_evaluation_transform = None 54 | if model.n_classes_ == 2: 55 | base_prediction = [model.init_.prior] 56 | post_evaluation_transform = 'Regression_Logistic' 57 | else: 58 | base_prediction = list(model.init_.priors) 59 | post_evaluation_transform = 'Classification_SoftMax' 60 | return _MLModel(_convert_tree_ensemble(model, feature_names, target, mode = 'classifier', 61 | base_prediction = base_prediction, class_labels = model.classes_, 62 | post_evaluation_transform = post_evaluation_transform)) 63 | 64 | def supports_output_scores(model): 65 | return True 66 | 67 | def get_output_classes(model): 68 | return list(model.classes_) 69 | 70 | 71 | -------------------------------------------------------------------------------- /converters/sklearn/_gradient_boosting_regressor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble 7 | from _tree_ensemble import get_input_dimension 8 | 9 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 10 | from ...models import MLModel as _MLModel 11 | 12 | if _HAS_SKLEARN: 13 | import sklearn.ensemble as _ensemble 14 | import _sklearn_util 15 | sklearn_class = _ensemble.GradientBoostingRegressor 16 | 17 | model_type = 'regressor' 18 | 19 | def convert(model, input_features, output_features): 20 | """Convert a boosted tree model to protobuf format. 21 | 22 | Parameters 23 | ---------- 24 | decision_tree : GradientBoostingRegressor 25 | A trained scikit-learn tree model. 26 | 27 | input_feature: [str] 28 | Name of the input columns. 29 | 30 | output_features: str 31 | Name of the output column. 32 | 33 | Returns 34 | ------- 35 | model_spec: An object of type Model_pb. 36 | Protobuf representation of the model 37 | """ 38 | if not(_HAS_SKLEARN): 39 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 40 | 41 | _sklearn_util.check_expected_type(model, _ensemble.GradientBoostingRegressor) 42 | def is_gbr_model(m): 43 | if len(m.estimators_) == 0: 44 | return False 45 | if hasattr(m, 'estimators_') and m.estimators_ is not None: 46 | for t in m.estimators_.flatten(): 47 | if not hasattr(t, 'tree_') or t.tree_ is None: 48 | return False 49 | return True 50 | else: 51 | return False 52 | 53 | _sklearn_util.check_fitted(model, is_gbr_model) 54 | 55 | base_prediction = model.init_.mean 56 | 57 | return _MLModel(_convert_tree_ensemble(model, input_features, output_features, 58 | base_prediction = base_prediction)) 59 | 60 | -------------------------------------------------------------------------------- /converters/sklearn/_imputer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import _sklearn_util 7 | from ... import SPECIFICATION_VERSION 8 | from ...models._interface_management import set_transform_interface_params 9 | from ...proto import Model_pb2 as _Model_pb2 10 | from ...models import datatypes 11 | from ...models import MLModel as _MLModel 12 | 13 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 14 | 15 | if _HAS_SKLEARN: 16 | import sklearn 17 | from sklearn.preprocessing import Imputer 18 | model_type = 'transformer' 19 | sklearn_class = sklearn.preprocessing.Imputer 20 | 21 | def convert(model, input_features, output_features): 22 | """Convert a DictVectorizer model to the protobuf spec. 23 | 24 | Parameters 25 | ---------- 26 | model: DictVectorizer 27 | A fitted DictVectorizer model. 28 | 29 | input_features: str 30 | Name of the input column. 31 | 32 | output_features: str 33 | Name of the output column. 34 | 35 | Returns 36 | ------- 37 | model_spec: An object of type Model_pb. 38 | Protobuf representation of the model 39 | """ 40 | if not(_HAS_SKLEARN): 41 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 42 | 43 | # Set the interface params. 44 | spec = _Model_pb2.Model() 45 | spec.specificationVersion = SPECIFICATION_VERSION 46 | 47 | assert len(input_features) == 1 48 | assert isinstance(input_features[0][1], datatypes.Array) 49 | 50 | # feature name in and out are the same here 51 | spec = set_transform_interface_params(spec, input_features, output_features) 52 | 53 | # Test the scikit-learn model 54 | _sklearn_util.check_expected_type(model, Imputer) 55 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'statistics_')) 56 | 57 | if model.axis != 0: 58 | raise ValueError("Imputation is only supported along axis = 0.") 59 | 60 | 61 | # The imputer in our framework only works on single columns, so 62 | # we need to translate that over. The easiest way to do that is to 63 | # put it in a nested pipeline with a feature extractor and a 64 | 65 | tr_spec = spec.imputer 66 | 67 | for v in model.statistics_: 68 | tr_spec.imputedDoubleArray.vector.append(v) 69 | 70 | try: 71 | tr_spec.replaceDoubleValue = float(model.missing_values) 72 | except ValueError: 73 | raise ValueError("Only scalar values or NAN as missing_values " 74 | "in _imputer are supported.") 75 | 76 | return _MLModel(spec) 77 | 78 | 79 | def update_dimension(model, input_dimension): 80 | """ 81 | Given a model that takes an array of dimension input_dimension, returns 82 | the output dimension. 83 | """ 84 | 85 | # This doesn't expand anything. 86 | return input_dimension 87 | 88 | def get_input_dimension(model): 89 | if not(_HAS_SKLEARN): 90 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 91 | 92 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'statistics_')) 93 | return len(model.statistics_) 94 | 95 | 96 | -------------------------------------------------------------------------------- /converters/sklearn/_linear_regression.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ... import SPECIFICATION_VERSION 7 | from ...models._interface_management import set_regressor_interface_params 8 | from ...proto import Model_pb2 as _Model_pb2 9 | from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2 10 | 11 | import numpy as _np 12 | 13 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 14 | from ...models import MLModel as _MLModel 15 | 16 | if _HAS_SKLEARN: 17 | import _sklearn_util 18 | import sklearn 19 | from sklearn.linear_model import LinearRegression 20 | model_type = 'regressor' 21 | sklearn_class = sklearn.linear_model.LinearRegression 22 | 23 | def convert(model, features, target): 24 | 25 | """Convert a linear regression model to the protobuf spec. 26 | Parameters 27 | ---------- 28 | model: LinearRegression 29 | A trained linear regression encoder model. 30 | 31 | feature_names: [str] 32 | Name of the input columns. 33 | 34 | target: str 35 | Name of the output column. 36 | 37 | Returns 38 | ------- 39 | model_spec: An object of type Model_pb. 40 | Protobuf representation of the model 41 | """ 42 | if not(_HAS_SKLEARN): 43 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 44 | 45 | # Check the scikit learn model 46 | _sklearn_util.check_expected_type(model, LinearRegression) 47 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 48 | 49 | return _MLModel(_convert(model, features, target)) 50 | 51 | 52 | def _convert(model, features, target): 53 | # Set the model class (regressor) 54 | spec = _Model_pb2.Model() 55 | spec.specificationVersion = SPECIFICATION_VERSION 56 | spec = set_regressor_interface_params(spec, features, target) 57 | 58 | # Add parameters for the linear regression. 59 | lr = spec.glmRegressor 60 | 61 | if(isinstance(model.intercept_, _np.ndarray)): 62 | assert(len(model.intercept_) == 1) 63 | lr.offset.append(model.intercept_[0]) 64 | else: 65 | lr.offset.append(model.intercept_) 66 | 67 | weights = lr.weights.add() 68 | for i in model.coef_: 69 | weights.value.append(i) 70 | return spec 71 | 72 | def get_input_dimension(model): 73 | if not(_HAS_SKLEARN): 74 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 75 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 76 | return model.coef_.size 77 | -------------------------------------------------------------------------------- /converters/sklearn/_logistic_regression.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from collections import Iterable 7 | 8 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 9 | from ...models import MLModel as _MLModel 10 | if _HAS_SKLEARN: 11 | from sklearn.linear_model import LogisticRegression 12 | from . import _sklearn_util 13 | sklearn_class = LogisticRegression 14 | 15 | from ... import SPECIFICATION_VERSION 16 | from ...models._interface_management import set_classifier_interface_params 17 | from ...proto import Model_pb2 as _Model_pb2 18 | 19 | model_type = 'classifier' 20 | 21 | def convert(model, feature_names, target): 22 | """Convert a Logistic Regression model to the protobuf spec. 23 | Parameters 24 | ---------- 25 | model: LogisticRegression 26 | A trained LogisticRegression model. 27 | 28 | feature_names: [str], optional (default=None) 29 | Name of the input columns. 30 | 31 | target: str, optional (default=None) 32 | Name of the output column. 33 | 34 | Returns 35 | ------- 36 | model_spec: An object of type Model_pb. 37 | Protobuf representation of the model 38 | """ 39 | if not(_HAS_SKLEARN): 40 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 41 | 42 | _sklearn_util.check_expected_type(model, LogisticRegression) 43 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 44 | 45 | return _MLModel(_convert(model, feature_names, target)) 46 | 47 | 48 | def _convert(model, feature_names, target): 49 | spec = _Model_pb2.Model() 50 | spec.specificationVersion = SPECIFICATION_VERSION 51 | 52 | set_classifier_interface_params(spec, feature_names, model.classes_, 'glmClassifier', output_features=target) 53 | 54 | glmClassifier = spec.glmClassifier 55 | 56 | if model.multi_class == "ovr": 57 | glmClassifier.classEncoding = glmClassifier.OneVsRest 58 | else: 59 | print('[ERROR] Currently "One Vs Rest" is the only supported multiclass option.') 60 | return None 61 | 62 | glmClassifier.postEvaluationTransform = glmClassifier.Logit 63 | 64 | if isinstance(model.intercept_, Iterable): 65 | for val in model.intercept_: 66 | glmClassifier.offset.append(val) 67 | else: 68 | for _ in model.coef_: 69 | glmClassifier.offset.append(model.intercept_) 70 | 71 | for cur_in_row in model.coef_: 72 | cur_out_row = glmClassifier.weights.add() 73 | for val in cur_in_row: 74 | cur_out_row.value.append(val) 75 | 76 | return spec 77 | 78 | def supports_output_scores(model): 79 | return True 80 | 81 | def get_output_classes(model): 82 | if not(_HAS_SKLEARN): 83 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 84 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 85 | return list(model.classes_) 86 | 87 | def get_input_dimension(model): 88 | if not(_HAS_SKLEARN): 89 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 90 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) 91 | return len(model.coef_[0]) 92 | -------------------------------------------------------------------------------- /converters/sklearn/_normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | 7 | from ... import SPECIFICATION_VERSION 8 | from ...models._interface_management import set_transform_interface_params as \ 9 | _set_transform_interface_params 10 | from ...proto import Model_pb2 as _Model_pb2 11 | from ...proto.Normalizer_pb2 import Normalizer as _proto__normalizer 12 | 13 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 14 | from ...models import MLModel as _MLModel 15 | 16 | if _HAS_SKLEARN: 17 | import _sklearn_util 18 | from sklearn.preprocessing import Normalizer 19 | sklearn_class = Normalizer 20 | 21 | model_type = 'transformer' 22 | 23 | 24 | def convert(model, input_features, output_features): 25 | """Convert a normalizer model to the protobuf spec. 26 | 27 | Parameters 28 | ---------- 29 | model: Normalizer 30 | A Normalizer. 31 | 32 | input_features: str 33 | Name of the input column. 34 | 35 | output_features: str 36 | Name of the output column. 37 | 38 | Returns 39 | ------- 40 | model_spec: An object of type Model_pb. 41 | Protobuf representation of the model 42 | """ 43 | 44 | if not(_HAS_SKLEARN): 45 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 46 | 47 | # Test the scikit-learn model 48 | _sklearn_util.check_expected_type(model, Normalizer) 49 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'norm')) 50 | 51 | # Set the interface params. 52 | spec = _Model_pb2.Model() 53 | spec.specificationVersion = SPECIFICATION_VERSION 54 | spec = _set_transform_interface_params(spec, input_features, output_features) 55 | 56 | # Set the one hot encoder parameters 57 | _normalizer_spec = spec.normalizer 58 | if model.norm == 'l1': 59 | _normalizer_spec.normType = _proto__normalizer.L1 60 | elif model.norm == 'l2': 61 | _normalizer_spec.normType = _proto__normalizer.L2 62 | elif model.norm == 'max': 63 | _normalizer_spec.normType = _proto__normalizer.LMax 64 | return _MLModel(spec) 65 | 66 | def update_dimension(model, input_dimension): 67 | """ 68 | Given a model that takes an array of dimension input_dimension, returns 69 | the output dimension. 70 | """ 71 | 72 | # No change 73 | return input_dimension 74 | 75 | def get_input_dimension(model): 76 | # Cannot determine this now. 77 | return None 78 | 79 | -------------------------------------------------------------------------------- /converters/sklearn/_one_hot_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import _sklearn_util 7 | 8 | from ... import SPECIFICATION_VERSION 9 | from ...models._interface_management import set_transform_interface_params 10 | 11 | from ...proto import Model_pb2 as _Model_pb2 12 | from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2 13 | from ...proto import OneHotEncoder_pb2 as _OHE_pb2 14 | from ...models import datatypes 15 | from ...models import MLModel as _MLModel 16 | from ...models.feature_vectorizer import create_feature_vectorizer 17 | from ...models.array_feature_extractor import create_array_feature_extractor 18 | 19 | 20 | from ...models.pipeline import Pipeline 21 | 22 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 23 | 24 | if _HAS_SKLEARN: 25 | import sklearn 26 | from sklearn.preprocessing import OneHotEncoder 27 | sklearn_class = OneHotEncoder 28 | 29 | 30 | # model type determines the behavior of this module. 31 | model_type = "transformer" 32 | 33 | def convert(model, input_features, output_features): 34 | """Convert a one-hot-encoder model to the protobuf spec. 35 | 36 | Parameters 37 | ---------- 38 | model: OneHotEncoder 39 | A trained one-hot encoder model. 40 | 41 | input_features: str, optional 42 | Name of the input column. 43 | 44 | output_features: str, optional 45 | Name of the output column. 46 | 47 | Returns 48 | ------- 49 | model_spec: An object of type Model_pb. 50 | Protobuf representation of the model 51 | """ 52 | if not(_HAS_SKLEARN): 53 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 54 | 55 | # Make sure the model is fitted. 56 | _sklearn_util.check_expected_type(model, OneHotEncoder) 57 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'active_features_')) 58 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'n_values_')) 59 | 60 | input_dimension = get_input_dimension(model) 61 | 62 | if input_dimension is not None: 63 | # Make sure that our starting dimensions are correctly managed. 64 | assert len(input_features) == 1 65 | assert input_features[0][1] == datatypes.Array(input_dimension) 66 | 67 | input_dimension = input_features[0][1].num_elements 68 | 69 | expected_output_dimension = update_dimension(model, input_dimension) 70 | assert output_features[0][1] == datatypes.Array(expected_output_dimension) 71 | 72 | # Create a pipeline that can do all of the subsequent feature extraction. 73 | feature_vectorizer_input_features = [] 74 | feature_vectorizer_size_map = {} 75 | 76 | if model.categorical_features == 'all': 77 | _categorical_features = set(xrange(input_dimension)) 78 | _cat_feature_idx_mapping = dict( (i, i) for i in xrange(input_dimension)) 79 | else: 80 | _categorical_features = set(model.categorical_features) 81 | _cat_feature_idx_mapping = dict( (_idx, i) for i, _idx in enumerate(sorted(model.categorical_features))) 82 | 83 | pline = Pipeline(input_features, output_features) 84 | 85 | # Track the overall packing index, which determins the output ordering. 86 | pack_idx = 0 87 | 88 | # First, go through all the columns that are encoded. The sklearn OHE puts 89 | # all of these first, regardless of their original ordering. 90 | for idx in xrange(input_dimension): 91 | f_name = "__OHE_%d__" % pack_idx 92 | 93 | if idx in _categorical_features: 94 | 95 | # This input column is one hot encoded 96 | feature_extractor_spec = create_array_feature_extractor( 97 | input_features, f_name, idx, output_type = 'Int64') 98 | 99 | pline.add_model(feature_extractor_spec) 100 | 101 | _cat_feature_idx = _cat_feature_idx_mapping[idx] 102 | 103 | ohe_input_features = [(f_name, datatypes.Int64())] 104 | ohe_output_features = [(f_name, datatypes.Dictionary('Int64'))] 105 | 106 | # Create a one hot encoder per column 107 | o_spec = _Model_pb2.Model() 108 | o_spec.specificationVersion = SPECIFICATION_VERSION 109 | o_spec = set_transform_interface_params(o_spec, ohe_input_features, ohe_output_features) 110 | 111 | ohe_spec = o_spec.oneHotEncoder 112 | ohe_spec.outputSparse = True 113 | 114 | if model.handle_unknown == 'error': 115 | ohe_spec.handleUnknown = _OHE_pb2.OneHotEncoder.HandleUnknown.Value('ErrorOnUnknown') 116 | else: 117 | ohe_spec.handleUnknown = _OHE_pb2.OneHotEncoder.HandleUnknown.Value('IgnoreUnknown') 118 | 119 | # Need to do a quick search to find the part of the active_features_ mask 120 | # that represents the categorical variables in our part. Could do this 121 | # with binary search, but we probably don't need speed so much here. 122 | def bs_find(a, i): 123 | lb, k = 0, len(a) 124 | while k > 0: 125 | _idx = lb + (k // 2) 126 | if a[_idx] < i: 127 | lb = _idx + 1 128 | k -= 1 129 | k = (k // 2) 130 | 131 | return lb 132 | 133 | # Here are the indices we are looking fo 134 | f_idx_bottom = model.feature_indices_[_cat_feature_idx] 135 | f_idx_top = model.feature_indices_[_cat_feature_idx + 1] 136 | 137 | # Now find where in the active features list we should look. 138 | cat_feat_idx_bottom = bs_find(model.active_features_, f_idx_bottom) 139 | cat_feat_idx_top = bs_find(model.active_features_, f_idx_top) 140 | n_cat_values = cat_feat_idx_top - cat_feat_idx_bottom 141 | 142 | for i in range(cat_feat_idx_bottom, cat_feat_idx_top): 143 | # The actual categorical value is stored as an offset in the active_features list. 144 | cat_idx = model.active_features_[i] - f_idx_bottom 145 | ohe_spec.int64Categories.vector.append(cat_idx) 146 | 147 | # Add the ohe to the pipeline 148 | pline.add_model(o_spec) 149 | 150 | # Add the result to the feature_vectorizer at the end. 151 | feature_vectorizer_input_features.append( (f_name, datatypes.Dictionary('Int64')) ) 152 | feature_vectorizer_size_map[f_name] = n_cat_values 153 | 154 | pack_idx += 1 155 | 156 | # Now go through all the columns that are not encoded as the sklearn OHE puts 157 | # these after the encoded ones. For speed, we can put these all in a single 158 | # ArrayFeatureExtractor 159 | # 160 | pass_through_features = [idx for idx in xrange(input_dimension) 161 | if idx not in _categorical_features] 162 | 163 | if pass_through_features: 164 | 165 | f_name = "__OHE_pass_through__" 166 | 167 | 168 | # This input column is not one hot encoded 169 | feature_extractor_spec = create_array_feature_extractor( 170 | input_features, f_name, pass_through_features) 171 | 172 | pline.add_model(feature_extractor_spec) 173 | feature_vectorizer_input_features.append( 174 | (f_name, datatypes.Array(len(pass_through_features))) ) 175 | 176 | 177 | # Finally, add the feature vectorizer to the pipeline. 178 | output_feature_name = output_features[0][0] 179 | output_feature_dimension = output_features[0][1].num_elements 180 | 181 | fvec, _num_out_dim = create_feature_vectorizer(feature_vectorizer_input_features, 182 | output_features[0][0], feature_vectorizer_size_map) 183 | 184 | # Make sure that the feature vectorizer input actually matches up with the 185 | assert _num_out_dim == output_features[0][1].num_elements 186 | 187 | pline.add_model(fvec) 188 | 189 | return _MLModel(pline.spec) 190 | 191 | 192 | def update_dimension(model, input_dimension): 193 | """ 194 | Given a model that takes an array of dimension input_dimension, returns 195 | the output dimension. 196 | """ 197 | if not(_HAS_SKLEARN): 198 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 199 | 200 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'active_features_')) 201 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'n_values_')) 202 | 203 | if model.categorical_features == 'all': 204 | return len(model.active_features_) 205 | else: 206 | out_dimension = (len(model.active_features_) 207 | + (input_dimension - len(model.n_values_))) 208 | 209 | return out_dimension 210 | 211 | 212 | def get_input_dimension(model): 213 | if not(_HAS_SKLEARN): 214 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 215 | 216 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'active_features_')) 217 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'n_values_')) 218 | 219 | if model.categorical_features == 'all': 220 | return len(model.feature_indices_) - 1 221 | else: 222 | # This can't actually be determined from the model as indices after the 223 | # rest of the categorical values don't seem to be tracked 224 | return None 225 | 226 | -------------------------------------------------------------------------------- /converters/sklearn/_random_forest_classifier.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble 7 | from _tree_ensemble import get_input_dimension 8 | 9 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 10 | from ...models import MLModel as _MLModel 11 | 12 | if _HAS_SKLEARN: 13 | import sklearn.ensemble as _ensemble 14 | import _sklearn_util 15 | sklearn_class = _ensemble.RandomForestClassifier 16 | 17 | model_type = 'classifier' 18 | 19 | def convert(model, feature_names, target): 20 | """Convert a boosted tree model to protobuf format. 21 | 22 | Parameters 23 | ---------- 24 | decision_tree : RandomForestClassifier 25 | A trained scikit-learn tree model. 26 | 27 | feature_names: [str] 28 | Name of the input columns. 29 | 30 | target: str 31 | Name of the output column. 32 | 33 | Returns 34 | ------- 35 | model_spec: An object of type Model_pb. 36 | Protobuf representation of the model 37 | """ 38 | if not(_HAS_SKLEARN): 39 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 40 | 41 | _sklearn_util.check_expected_type(model, _ensemble.RandomForestClassifier) 42 | def is_rf_model(m): 43 | if len(m.estimators_) == 0: 44 | return False 45 | if hasattr(m, 'estimators_') and m.estimators_ is not None: 46 | for t in m.estimators_: 47 | if not hasattr(t, 'tree_') or t.tree_ is None: 48 | return False 49 | return True 50 | else: 51 | return False 52 | _sklearn_util.check_fitted(model, is_rf_model) 53 | return _MLModel(_convert_tree_ensemble(model, feature_names, target, mode = 'classifier', 54 | class_labels = model.classes_)) 55 | 56 | def supports_output_scores(model): 57 | return True 58 | 59 | def get_output_classes(model): 60 | return list(model.classes_) 61 | 62 | 63 | -------------------------------------------------------------------------------- /converters/sklearn/_random_forest_regressor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble 7 | from _tree_ensemble import get_input_dimension 8 | 9 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 10 | from ...models import MLModel as _MLModel 11 | 12 | if _HAS_SKLEARN: 13 | import sklearn.ensemble as _ensemble 14 | import _sklearn_util 15 | sklearn_class = _ensemble.RandomForestRegressor 16 | 17 | model_type = 'regressor' 18 | 19 | def convert(model, feature_names, target): 20 | """Convert a boosted tree model to protobuf format. 21 | 22 | Parameters 23 | ---------- 24 | decision_tree : RandomForestRegressor 25 | A trained scikit-learn tree model. 26 | 27 | feature_names: [str] 28 | Name of the input columns. 29 | 30 | target: str 31 | Name of the output column. 32 | 33 | Returns 34 | ------- 35 | model_spec: An object of type Model_pb. 36 | Protobuf representation of the model 37 | """ 38 | if not(_HAS_SKLEARN): 39 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 40 | 41 | _sklearn_util.check_expected_type(model, _ensemble.RandomForestRegressor) 42 | def is_rf_model(m): 43 | if len(m.estimators_) == 0: 44 | return False 45 | if hasattr(m, 'estimators_') and m.estimators_ is not None: 46 | for t in m.estimators_: 47 | if not hasattr(t, 'tree_') or t.tree_ is None: 48 | return False 49 | return True 50 | else: 51 | return False 52 | _sklearn_util.check_fitted(model, is_rf_model) 53 | return _MLModel(_convert_tree_ensemble(model, feature_names, target)) 54 | 55 | 56 | -------------------------------------------------------------------------------- /converters/sklearn/_sklearn_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | def check_fitted(model, func): 7 | """Check if a model is fitted. Raise error if not. 8 | 9 | Parameters 10 | ---------- 11 | model: model 12 | Any scikit-learn model 13 | 14 | func: model 15 | Function to check if a model is not trained. 16 | """ 17 | if not func(model): 18 | raise TypeError("Expected a 'fitted' model for conversion") 19 | 20 | def check_expected_type(model, expected_type): 21 | """Check if a model is of the right type. Raise error if not. 22 | 23 | Parameters 24 | ---------- 25 | model: model 26 | Any scikit-learn model 27 | 28 | expected_type: Type 29 | Expected type of the scikit-learn. 30 | """ 31 | if (model.__class__.__name__ != expected_type.__name__): 32 | raise TypeError("Expected model of type '%s' (got %s)" % \ 33 | (expected_type.__name__, model.__class__.__name__)) 34 | 35 | -------------------------------------------------------------------------------- /converters/sklearn/_standard_scaler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | 7 | from ... import SPECIFICATION_VERSION 8 | from ...models._interface_management import set_transform_interface_params as \ 9 | _set_transform_interface_params 10 | from ...proto import Model_pb2 as _Model_pb2 11 | from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2 12 | 13 | from ..._deps import HAS_SKLEARN as _HAS_SKLEARN 14 | from ...models import MLModel as _MLModel 15 | 16 | if _HAS_SKLEARN: 17 | import _sklearn_util 18 | import sklearn 19 | from sklearn.preprocessing import StandardScaler 20 | sklearn_class = StandardScaler 21 | 22 | model_type = 'transformer' 23 | 24 | def convert(model, input_features, output_features): 25 | """Convert a _imputer model to the protobuf spec. 26 | 27 | Parameters 28 | ---------- 29 | model: Imputer 30 | A trained Imputer model. 31 | 32 | input_features: str 33 | Name of the input column. 34 | 35 | output_features: str 36 | Name of the output column. 37 | 38 | Returns 39 | ------- 40 | model_spec: An object of type Model_pb. 41 | Protobuf representation of the model 42 | """ 43 | if not(_HAS_SKLEARN): 44 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 45 | 46 | # Test the scikit-learn model 47 | _sklearn_util.check_expected_type(model, StandardScaler) 48 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'mean_')) 49 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'scale_')) 50 | 51 | # Set the interface params. 52 | spec = _Model_pb2.Model() 53 | spec.specificationVersion = SPECIFICATION_VERSION 54 | spec = _set_transform_interface_params(spec, input_features, output_features) 55 | 56 | # Set the parameters 57 | tr_spec = spec.scaler 58 | for x in model.mean_: 59 | tr_spec.shiftValue.append(-x) 60 | 61 | for x in model.scale_: 62 | tr_spec.scaleValue.append(1.0 / x) 63 | 64 | return _MLModel(spec) 65 | 66 | def update_dimension(model, input_dimension): 67 | if not(_HAS_SKLEARN): 68 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 69 | 70 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'mean_')) 71 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'scale_')) 72 | # Nothing to do for this model 73 | return input_dimension 74 | 75 | def get_input_dimension(model): 76 | if not(_HAS_SKLEARN): 77 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 78 | 79 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'mean_')) 80 | _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'scale_')) 81 | return len(model.mean_) 82 | 83 | -------------------------------------------------------------------------------- /converters/sklearn/_svm_common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | """ 7 | Common stuff for SVMs 8 | """ 9 | 10 | 11 | def _set_kernel(model, spec): 12 | """ 13 | Takes the sklearn SVM model and returns the spec with the protobuf kernel for that model. 14 | """ 15 | def gamma_value(model): 16 | if(model.gamma == 'auto'): 17 | # auto gamma value is 1/num_features 18 | return 1/float(len(model.support_vectors_[0])) 19 | else: 20 | return model.gamma 21 | 22 | 23 | result = None 24 | if(model.kernel == 'linear'): 25 | spec.kernel.linearKernel.MergeFromString('') # hack to set kernel to an empty type 26 | elif(model.kernel == 'rbf'): 27 | spec.kernel.rbfKernel.gamma = gamma_value(model) 28 | elif(model.kernel == 'poly'): 29 | spec.kernel.polyKernel.gamma = gamma_value(model) 30 | spec.kernel.polyKernel.c = model.coef0 31 | spec.kernel.polyKernel.degree = model.degree 32 | elif(model.kernel == 'sigmoid'): 33 | spec.kernel.sigmoidKernel.gamma = gamma_value(model) 34 | spec.kernel.sigmoidKernel.c = model.coef0 35 | else: 36 | raise ValueError('Unsupported kernel. The following kernel are supported: linear, RBF, polynomial and sigmoid.') 37 | return result 38 | 39 | -------------------------------------------------------------------------------- /converters/sklearn/_tree_ensemble.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ...models.tree_ensemble import TreeEnsembleRegressor, TreeEnsembleClassifier 7 | from ...models._feature_management import process_or_validate_features 8 | 9 | from ..._deps import HAS_SKLEARN 10 | 11 | if HAS_SKLEARN: 12 | from sklearn.tree import _tree 13 | 14 | import numpy as _np 15 | 16 | def _get_value(scikit_value, mode = 'regressor', scaling = 1.0, n_classes = 2, tree_index = 0): 17 | """ Get the right value from the scikit-tree 18 | """ 19 | # Regression 20 | if mode == 'regressor': 21 | return scikit_value[0] * scaling 22 | 23 | # Binary classification 24 | if n_classes == 2: 25 | # Decision tree 26 | if len(scikit_value[0]) != 1: 27 | value = scikit_value[0][1] * scaling / scikit_value[0].sum() 28 | # boosted tree 29 | else: 30 | value = scikit_value[0][0] * scaling 31 | if value == 0.5: 32 | value = value - 1e-7 33 | 34 | # Multiclass classification 35 | else: 36 | # Decision tree 37 | if len(scikit_value[0]) != 1: 38 | value = scikit_value[0] / scikit_value[0].sum() 39 | # boosted tree 40 | else: 41 | value = {tree_index: scikit_value[0] * scaling} 42 | return value 43 | 44 | def _recurse(coreml_tree, scikit_tree, tree_id, node_id, scaling = 1.0, mode = 'regressor', 45 | n_classes = 2, tree_index = 0): 46 | """Traverse through the tree and append to the tree spec. 47 | """ 48 | if not(HAS_SKLEARN): 49 | raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') 50 | 51 | ## Recursion should not be called on the leaf node. 52 | if node_id == _tree.TREE_LEAF: 53 | raise ValueError("Invalid node_id %s" % _tree.TREE_LEAF) 54 | 55 | # Add a branch node to the tree 56 | if scikit_tree.children_left[node_id] != _tree.TREE_LEAF: 57 | branch_mode = 'BranchOnValueLessThanEqual' 58 | feature_index = scikit_tree.feature[node_id] 59 | feature_value = scikit_tree.threshold[node_id] 60 | left_child_id = scikit_tree.children_left[node_id] 61 | right_child_id = scikit_tree.children_right[node_id] 62 | 63 | # Add a branch node 64 | coreml_tree.add_branch_node(tree_id, node_id, feature_index, 65 | feature_value, branch_mode, left_child_id, right_child_id) 66 | 67 | # Now recurse 68 | _recurse(coreml_tree, scikit_tree, tree_id, left_child_id, scaling, mode, n_classes, tree_index) 69 | _recurse(coreml_tree, scikit_tree, tree_id, right_child_id, scaling, mode, n_classes, tree_index) 70 | 71 | # Add a leaf node to the tree 72 | else: 73 | # Get the scikit-learn value 74 | if scikit_tree.n_outputs != 1: 75 | raise ValueError('Expected only 1 output in the scikit-learn tree.') 76 | value = _get_value(scikit_tree.value[node_id], mode, scaling, n_classes, tree_index) 77 | coreml_tree.add_leaf_node(tree_id, node_id, value) 78 | 79 | 80 | def get_input_dimension(model): 81 | 82 | if hasattr(model, 'n_features_'): 83 | return model.n_features_ 84 | 85 | elif hasattr(model, 'n_estimators'): 86 | if model.n_estimators == 0: 87 | raise ValueError("model not trained.") 88 | 89 | try: 90 | return model.estimators_[0,0].n_features_ 91 | except IndexError: 92 | raise ValueError("Model not trained or invalid model.") 93 | else: 94 | raise ValueError("Unable to obtain input dimension from model.") 95 | 96 | 97 | def convert_tree_ensemble(model, input_features, 98 | output_features = ('predicted_class', float), 99 | mode = 'regressor', 100 | base_prediction = None, 101 | class_labels = None, 102 | post_evaluation_transform = None): 103 | """ 104 | Convert a generic tree regressor model to the protobuf spec. 105 | 106 | This currently supports: 107 | * Decision tree regression 108 | * Gradient bosted tree regression 109 | * Random forest regression 110 | * Decision tree classifier. 111 | * Gradient boosted tree classifier. 112 | * Random forest classifier. 113 | 114 | ---------- 115 | Parameters 116 | model: [DecisionTreeRegressor | GradientBoostingRegression | RandomForestRegressor] 117 | A scikit learn tree model. 118 | 119 | feature_names : list of strings, optional (default=None) 120 | Names of each of the features. 121 | 122 | target: str 123 | Name of the output column. 124 | 125 | base_prediction: double 126 | Base prediction value. 127 | 128 | mode: str in ['regressor', 'classifier'] 129 | Mode of the tree model. 130 | 131 | class_labels: list[int] 132 | List of classes 133 | 134 | post_evaluation_transform: list[int] 135 | Post evaluation transform 136 | 137 | Returns 138 | ------- 139 | model_spec: An object of type Model_pb. 140 | Protobuf representation of the model 141 | """ 142 | 143 | num_dimensions = get_input_dimension(model) 144 | features = process_or_validate_features(input_features, num_dimensions) 145 | 146 | n_classes = None 147 | if mode == 'classifier': 148 | n_classes = model.n_classes_ 149 | if class_labels is None: 150 | class_labels = range(n_classes) 151 | else: 152 | if len(class_labels) != n_classes: 153 | raise ValueError("Number of classes in model (%d) does not match " 154 | "length of supplied class list (%d)." 155 | % (n_classes, len(class_labels))) 156 | 157 | coreml_tree = TreeEnsembleClassifier(input_features, class_labels, output_features) 158 | if post_evaluation_transform is not None: 159 | coreml_tree.set_post_evaluation_transform(post_evaluation_transform) 160 | 161 | # Base prediction not provided 162 | if base_prediction is None: 163 | if n_classes == 2: 164 | base_prediction = [0.0] 165 | else: 166 | base_prediction = [0.0 for c in range(n_classes)] 167 | coreml_tree.set_default_prediction_value(base_prediction) 168 | else: 169 | if base_prediction is None: 170 | base_prediction = 0.0 171 | coreml_tree = TreeEnsembleRegressor(input_features, output_features) 172 | coreml_tree.set_default_prediction_value(base_prediction) 173 | 174 | # Single tree 175 | if hasattr(model, 'tree_'): 176 | _recurse(coreml_tree, model.tree_, tree_id = 0, node_id = 0, 177 | mode = mode, n_classes = n_classes) 178 | 179 | # Multiple trees 180 | elif hasattr(model, 'estimators_'): 181 | is_ensembling_in_separate_trees = False 182 | if type(model.estimators_) != list: 183 | is_ensembling_in_separate_trees = len(model.estimators_.shape) > 0 and model.estimators_.shape[1] > 1 184 | estimators = model.estimators_.flatten() 185 | else: 186 | estimators = model.estimators_ 187 | 188 | scaling = model.learning_rate if hasattr(model, 'learning_rate') else 1.0 / len(estimators) 189 | for tree_id, base_model in enumerate(estimators): 190 | if is_ensembling_in_separate_trees: 191 | tree_index = tree_id % n_classes 192 | else: 193 | tree_index = 0 194 | _recurse(coreml_tree, base_model.tree_, tree_id, node_id = 0, 195 | scaling = scaling, mode = mode, n_classes = n_classes, tree_index = tree_index) 196 | else: 197 | raise TypeError('Unknown scikit-learn tree model type.') 198 | 199 | return coreml_tree.spec 200 | 201 | -------------------------------------------------------------------------------- /converters/xgboost/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree import convert 7 | -------------------------------------------------------------------------------- /converters/xgboost/_tree.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from _tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble 7 | from ...models import MLModel as _MLModel 8 | 9 | def convert(model, feature_names = None, target = 'target'): 10 | """ 11 | Convert a trained XGBoost model to Core ML format. 12 | 13 | Parameters 14 | ---------- 15 | decision_tree : Booster 16 | A trained XGboost tree model. 17 | 18 | feature_names: [str] | str 19 | Names of input features that will be exposed in the Core ML model 20 | interface. 21 | 22 | Can be set to one of the following: 23 | 24 | - None for using the feature names from the model. 25 | - List of names of the input features that should be exposed in the 26 | interface to the Core ML model. These input features are in the same 27 | order as the XGboost model. 28 | 29 | target: str 30 | Name of the output feature name exposed to the Core ML model. 31 | 32 | Returns 33 | ------- 34 | model:MLModel 35 | Returns an MLModel instance representing a Core ML model. 36 | 37 | Examples 38 | -------- 39 | .. sourcecode:: python 40 | 41 | # Convert it with default input and output names 42 | >>> import coremltools 43 | >>> coreml_model = coremltools.converters.xgboost.convert(model) 44 | 45 | # Saving the Core ML model to a file. 46 | >>> coremltools.save('my_model.mlmodel') 47 | """ 48 | return _MLModel(_convert_tree_ensemble(model, feature_names, target)) 49 | -------------------------------------------------------------------------------- /converters/xgboost/_tree_ensemble.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from ...models.tree_ensemble import TreeEnsembleRegressor as _TreeEnsembleRegressor 7 | 8 | from ..._deps import HAS_XGBOOST as _HAS_XGBOOST 9 | if _HAS_XGBOOST: 10 | import xgboost as _xgboost 11 | 12 | def recurse_json(mlkit_tree, xgb_tree_json, tree_id, node_id, feature_map = None): 13 | """Traverse through the tree and append to the tree spec. 14 | """ 15 | relative_hit_rate = None 16 | 17 | try: 18 | relative_hit_rate = xgb_tree_json['cover'] 19 | except KeyError: 20 | pass 21 | 22 | 23 | # Fill node attributes 24 | if 'leaf' not in xgb_tree_json: 25 | branch_mode = 'BranchOnValueLessThan' 26 | split_name = xgb_tree_json['split'] 27 | feature_index = split_name if not feature_map else feature_map[split_name] 28 | feature_value = xgb_tree_json['split_condition'] 29 | true_child_id = xgb_tree_json['yes'] 30 | false_child_id = xgb_tree_json['no'] 31 | 32 | # Get the missing value behavior correct 33 | missing_value_tracks_true_child = False 34 | 35 | try: 36 | if xgb_tree_json['missing'] == true_child_id: 37 | missing_value_tracks_true_child = True 38 | except KeyError: 39 | pass 40 | 41 | mlkit_tree.add_branch_node(tree_id, node_id, feature_index, 42 | feature_value, branch_mode, true_child_id, false_child_id, 43 | relative_hit_rate = relative_hit_rate, 44 | missing_value_tracks_true_child = missing_value_tracks_true_child) 45 | 46 | else: 47 | value = xgb_tree_json["leaf"] 48 | mlkit_tree.add_leaf_node(tree_id, node_id, value, 49 | relative_hit_rate = relative_hit_rate) 50 | 51 | # Now recurse 52 | if "children" in xgb_tree_json: 53 | for child in xgb_tree_json["children"]: 54 | recurse_json(mlkit_tree, child, tree_id, child['nodeid'], feature_map) 55 | 56 | def convert_tree_ensemble(model, feature_names, target, class_names=None): 57 | """Convert a generic tree model to the protobuf spec. 58 | 59 | This currently supports: 60 | * Decision tree regression 61 | 62 | Parameters 63 | ---------- 64 | model: str | Booster 65 | Path on disk where the XGboost JSON representation of the model is or 66 | a handle to the XGboost model. 67 | 68 | feature_names : list of strings or None 69 | Names of each of the features. When set to None, the feature names are 70 | extracted from the model. 71 | 72 | target: str, 73 | Name of the output column. 74 | 75 | class_names: list of strings, optional (default=None) 76 | Names of the class variables (for classification). 77 | 78 | Returns 79 | ------- 80 | model_spec: An object of type Model_pb. 81 | Protobuf representation of the model 82 | """ 83 | if not(_HAS_XGBOOST): 84 | raise RuntimeError('xgboost not found. xgboost conversion API is disabled.') 85 | 86 | import json 87 | import os 88 | feature_map = None 89 | if isinstance(model, (_xgboost.core.Booster, _xgboost.XGBRegressor)): 90 | 91 | # Testing a few corner cases that we don't support 92 | if isinstance(model, _xgboost.XGBRegressor): 93 | try: 94 | objective = model.get_xgb_params()["objective"] 95 | except: 96 | objective = None 97 | if objective in ["reg:gamma", "reg:tweedie"]: 98 | raise ValueError("Regression objective '%s' not supported for export." % objective) 99 | 100 | # Now use the booster API. 101 | if isinstance(model, _xgboost.XGBRegressor): 102 | model = model.booster() 103 | 104 | # Xgboost sometimes has feature names in there. Sometimes does not. 105 | if (feature_names is None) and (model.feature_names is None): 106 | raise ValueError("Feature names not present in the model. Must be provided during conversion.") 107 | feature_names = model.feature_names 108 | if feature_names is None: 109 | feature_names = model.feature_names 110 | 111 | xgb_model_str = model.get_dump(with_stats=True, dump_format = 'json') 112 | 113 | if model.feature_names: 114 | feature_map = {f:i for i,f in enumerate(model.feature_names)} 115 | 116 | # Path on the file system where the XGboost model exists. 117 | elif isinstance(model, str): 118 | if not os.path.exists(model): 119 | raise TypeError("Invalid path %s." % model) 120 | with open(model) as f: 121 | xgb_model_str = json.load(f) 122 | feature_map = {f:i for i,f in enumerate(feature_names)} 123 | else: 124 | raise TypeError("Unexpected type. Expecting XGBoost model.") 125 | 126 | mlkit_tree = _TreeEnsembleRegressor(feature_names, target) 127 | mlkit_tree.set_default_prediction_value(0.5) 128 | for xgb_tree_id, xgb_tree_str in enumerate(xgb_model_str): 129 | xgb_tree_json = json.loads(xgb_tree_str) 130 | recurse_json(mlkit_tree, xgb_tree_json, xgb_tree_id, node_id = 0, 131 | feature_map = feature_map) 132 | 133 | return mlkit_tree.spec 134 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import datatypes 7 | 8 | import _feature_management 9 | 10 | import pipeline 11 | import tree_ensemble 12 | import neural_network 13 | 14 | import _interface_management 15 | 16 | from model import MLModel 17 | -------------------------------------------------------------------------------- /models/_feature_management.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from collections import defaultdict 7 | from copy import copy 8 | import datatypes 9 | from functools import reduce 10 | import operator as op 11 | import numpy as _numpy 12 | from .. import SPECIFICATION_VERSION 13 | from ..proto import Model_pb2 as _Model_pb2 14 | from ..proto import FeatureTypes_pb2 as _FeatureTypes_pb2 15 | import numpy as _np 16 | 17 | def process_or_validate_classifier_output_features( 18 | output_features, class_labels, supports_class_scores = True): 19 | 20 | """ 21 | Given a list of class labels and a list of output_features, validate the 22 | list and return a valid version of output_features with all the correct 23 | data type information included. 24 | """ 25 | 26 | def raise_error(msg): 27 | 28 | raise ValueError("Classifier error: %s" % msg) 29 | 30 | class_labels = list(class_labels) 31 | 32 | # First, we need to determine the type of the classes. 33 | _int_types = (bool, int, long, _np.bool_, _np.int32, _np.int64) 34 | 35 | _str_types = (str, unicode) 36 | 37 | if all(isinstance(cl, _int_types) for cl in class_labels): 38 | output_class_type = datatypes.Int64() 39 | 40 | elif all(isinstance(cl, _str_types) for cl in class_labels): 41 | output_class_type = datatypes.String() 42 | 43 | else: 44 | raise ValueError('Class labels must be all of type int or all of type string.') 45 | 46 | if output_features is None: 47 | 48 | out = [("classLabel", output_class_type)] 49 | 50 | if supports_class_scores: 51 | out += [("classProbability", datatypes.Dictionary(output_class_type))] 52 | 53 | elif isinstance(output_features, (str, unicode)): 54 | 55 | out = [(output_features, output_class_type)] 56 | 57 | if supports_class_scores: 58 | out += [("classProbability", datatypes.Dictionary(output_class_type))] 59 | 60 | elif (isinstance(output_features, (list, tuple)) 61 | and all(isinstance(fn, (str, unicode)) for fn in output_features) 62 | and len(output_features) == 2): 63 | 64 | if supports_class_scores: 65 | out = [(output_features[0], output_class_types), 66 | (output_features[1], datatypes.Dictionary(output_class_type))] 67 | else: 68 | raise ValueError("Classifier model (as trained) does not support output scores for classes.") 69 | 70 | elif is_valid_feature_list(output_features): 71 | 72 | output_features = [(k, datatypes._normalize_datatype(dt)) for k, dt in output_features] 73 | 74 | if len(output_features) == 1 or not supports_class_scores: 75 | if not output_features[0][1] == output_class_type: 76 | raise ValueError("Type of output class feature does not match type of class labels.") 77 | 78 | else: 79 | # Make sure the first two output features specified give the output 80 | # class field and the output class scores dictionary field 81 | if (isinstance(output_features[0][1], datatypes.Dictionary) 82 | and isinstance(output_features[1][1], output_class_type)): 83 | 84 | output_features[0], output_features[1] = output_features[1], output_features[0] 85 | 86 | if not isinstance(output_features[1][1], datatypes.Dictionary): 87 | raise_error("Output features class scores should be dictionary type.") 88 | 89 | if output_features[1][1].key_type != output_class_type: 90 | raise_error("Class scores dictionary key type does not match type of class labels.") 91 | 92 | if output_features[0][1] != output_class_type: 93 | raise_error("Specified type of output class does not match type of class labels.") 94 | 95 | # NOTE: We are intentionally allowing the case where additional fields are allowed 96 | # beyond the original two features. 97 | 98 | out = output_features 99 | 100 | else: 101 | raise_error("Form of output features not recognized") 102 | 103 | return out 104 | 105 | def is_valid_feature_list(features): 106 | 107 | # Just test all the ways this could be 108 | return (type(features) is list 109 | and len(features) >= 1 110 | and all(type(t) is tuple and len(t) == 2 for t in features) 111 | and all(isinstance(n, str) for n, td in features) 112 | and all(datatypes._is_valid_datatype(td) for n, td in features)) 113 | 114 | 115 | def dimension_of_array_features(features): 116 | if not is_valid_feature_list(features): 117 | raise ValueError("Expected feature list in valid form.") 118 | 119 | dim = 0 120 | for n, td in features: 121 | if isinstance(td, (datatypes.Int64, datatypes.Double)): 122 | dim += 1 123 | elif isinstance(td, datatypes.Array): 124 | dim += reduce(op.mul, td.dimensions, 1) 125 | else: 126 | raise ValueError("Unable to determine number of dimensions from feature list.") 127 | 128 | return dim 129 | 130 | def process_or_validate_features(features, num_dimensions = None, feature_type_map = {}): 131 | """ 132 | Puts features into a standard form from a number of different possible forms. 133 | 134 | The standard form is a list of 2-tuples of (name, datatype) pairs. The name 135 | is a string and the datatype is an object as defined in the _datatype module. 136 | 137 | The possible input forms are as follows: 138 | 139 | * A list of strings. in this case, the overall dimension is assumed to be 140 | the length of the list. If neighboring names are identical, they are 141 | assumed to be an input array of that length. For example: 142 | 143 | ["a", "b", "c"] 144 | 145 | resolves to 146 | 147 | [("a", Double), ("b", Double), ("c", Double)]. 148 | 149 | And: 150 | 151 | ["a", "a", "b"] 152 | 153 | resolves to 154 | 155 | [("a", Array(2)), ("b", Double)]. 156 | 157 | * A dictionary of keys to indices or ranges of feature indices. 158 | 159 | In this case, it's presented as a mapping from keys to indices or 160 | ranges of contiguous indices. For example, 161 | 162 | {"a" : 0, "b" : [2,3], "c" : 1} 163 | 164 | Resolves to 165 | 166 | [("a", Double), ("c", Double), ("b", Array(2))]. 167 | 168 | Note that the ordering is determined by the indices. 169 | 170 | * A single string. In this case, the input is assumed to be a single array, 171 | with the number of dimensions set using num_dimensions. 172 | 173 | 174 | Notes: 175 | 176 | If the features variable is in the standard form, it is simply checked and 177 | returned. 178 | 179 | If num_dimensions is given, it is used to check against the existing features, 180 | or fill in missing information in the case when features is a single string. 181 | """ 182 | 183 | original_features = copy(features) 184 | 185 | if num_dimensions is not None and not isinstance(num_dimensions, int): 186 | raise TypeError("num_dimensions must be None or an integer, not '%s'" 187 | % str(type(num_dimensions))) 188 | 189 | 190 | def raise_type_error(additional_msg): 191 | raise TypeError("Error processing feature list: %s\nfeatures = %s" 192 | % (additional_msg, str(original_features))) 193 | 194 | if type(features) is dict and is_valid_feature_list(features.items()): 195 | features = features.items() 196 | 197 | # First, see if the features are already in the correct form. If they are, 198 | # then we 199 | if is_valid_feature_list(features): 200 | if num_dimensions is not None: 201 | try: 202 | feature_dims = dimension_of_array_features(features) 203 | except ValueError: 204 | feature_dims = None 205 | 206 | if feature_dims is not None and feature_dims != num_dimensions: 207 | raise_type_error("Dimension mismatch.") 208 | 209 | # We may need to translate some parts of this back to the actual 210 | # datatype class -- e.g. translate str to datatypes.String(). 211 | return [(k, datatypes._normalize_datatype(dt)) for k, dt in features] 212 | 213 | if isinstance(features, (str, unicode)): 214 | if num_dimensions is None: 215 | raise_type_error("If a single feature name is given, then " 216 | "num_dimensions must be provided.") 217 | features = {features : range(num_dimensions)} 218 | 219 | if isinstance(features, (list, tuple, _np.ndarray)): 220 | # Change this into a dictionary 221 | 222 | mapping = defaultdict(lambda: []) 223 | 224 | for i, k in enumerate(features): 225 | if not isinstance(k, (str, unicode)): 226 | raise_type_error("List of feature names must be list of strings.") 227 | 228 | if num_dimensions is not None and len(features) != num_dimensions: 229 | raise_type_error(("List of feature names has wrong length; " 230 | "%d required, %d provided.") 231 | % (num_dimensions, len(features))) 232 | 233 | for i, k in enumerate(features): 234 | mapping[k].append(i) 235 | 236 | # Replace the features 237 | features = mapping 238 | 239 | if not isinstance(features, dict): 240 | raise_type_error("features must be either a list of feature names " 241 | "or a dictionary of feature names to ranges.") 242 | 243 | # We'll be invasive here so make a copy. 244 | features = copy(features) 245 | 246 | for k, v in list(features.iteritems()): 247 | 248 | if not isinstance(k, str): 249 | raise_type_error("Feature names must be strings.") 250 | 251 | def test_index(val): 252 | error = False 253 | try: 254 | if val != int(val): 255 | error = True 256 | except: 257 | error = True 258 | 259 | if error: 260 | raise_type_error("Specified indices for feature %s must be integers." % k) 261 | 262 | if val < 0 or (num_dimensions is not None and val >= num_dimensions): 263 | raise_type_error("Index in feature %s out of range." % k) 264 | 265 | if isinstance(v, (tuple, list, set, xrange)): 266 | for idx in v: 267 | test_index(idx) 268 | 269 | # Replace and update 270 | features[k] = v = list(sorted(v)) 271 | 272 | elif isinstance(v, (int, long)): 273 | test_index(v) 274 | features[k] = v = [v] 275 | else: 276 | raise_type_error(("Value type for feature %s not recognized; " 277 | "values must be either integers, lists or range objects.") % k) 278 | 279 | # check to make sure things are contiguous 280 | if v != range(v[0], v[-1] + 1): 281 | raise_type_error("Index list for feature %s must consist of " 282 | "a contiguous range of indices." % k) 283 | 284 | if len(set(v)) != len(v): 285 | raise_type_error("Index list for feature %s contains duplicates." % k) 286 | 287 | # Now, set num dimensions from the list if it's actually None 288 | if num_dimensions is None: 289 | from itertools import chain 290 | num_dimensions = 1 + max(chain(*[il for k, il in features.iteritems()])) 291 | 292 | if (set().union(*features.values()) != set(range(num_dimensions)) 293 | or sum(len(v) for v in features.itervalues()) != num_dimensions): 294 | 295 | raise_type_error("Supplied indices must cover entire range of 0, ..., num_dimensions-1.") 296 | 297 | 298 | # Define the output feature types 299 | output_features = [None]*len(features) 300 | 301 | # Finally, go through and map all these things out as types. 302 | # Sort by first value of the index range. 303 | for i, (k, v) in enumerate(sorted(features.iteritems(), key = lambda t: t[1][0])): 304 | if k in feature_type_map: 305 | output_features[i] = (k, feature_type_map[k]) 306 | 307 | elif len(v) == 1: 308 | output_features[i] = (k, datatypes.Double()) 309 | else: 310 | output_features[i] = (k, datatypes.Array(len(v))) 311 | 312 | return output_features 313 | 314 | -------------------------------------------------------------------------------- /models/_interface_management.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import numpy as _np 7 | 8 | import datatypes 9 | from _feature_management import process_or_validate_features 10 | from _feature_management import is_valid_feature_list 11 | import _feature_management as _fm 12 | 13 | def set_classifier_interface_params(spec, features, class_labels, 14 | model_accessor_for_class_labels, output_features = None): 15 | """ 16 | Common utilities to set the regression interface params. 17 | """ 18 | # Normalize the features list. 19 | features = _fm.process_or_validate_features(features) 20 | 21 | if class_labels is None: 22 | raise ValueError("List of class labels must be provided.") 23 | 24 | n_classes = len(class_labels) 25 | 26 | output_features = _fm.process_or_validate_classifier_output_features(output_features, class_labels) 27 | 28 | if len(output_features) == 1: 29 | predicted_class_output, pred_cl_type = output_features[0] 30 | score_output = None 31 | elif len(output_features) == 2: 32 | predicted_class_output, pred_cl_type = output_features[0] 33 | score_output, score_output_type = output_features[1] 34 | else: 35 | raise ValueError("Provided output classes for a classifier must be " 36 | "a list of features, predicted class and (optionally) class_score.") 37 | 38 | spec.description.predictedFeatureName = predicted_class_output 39 | 40 | # Are they out of order? 41 | if not (pred_cl_type == datatypes.Int64() or pred_cl_type == datatypes.String()): 42 | raise ValueError("Provided predicted class output type not Int64 or String (%s)." 43 | % repr(pred_cl_type)) 44 | 45 | if score_output is not None: 46 | if not isinstance(score_output_type, datatypes.Dictionary): 47 | raise ValueError("Provided class score output type not a Dictionary (%s)." 48 | % repr(score_output_type)) 49 | 50 | if score_output_type.key_type != pred_cl_type: 51 | raise ValueError(("Provided class score output (%s) key_type (%s) does not " 52 | "match type of class prediction (%s).") 53 | % (score_output, repr(score_output_type.key_type), repr(pred_cl_type))) 54 | 55 | spec.description.predictedProbabilitiesName = score_output 56 | 57 | # add input 58 | for index, (cur_input_name, input_type) in enumerate(features): 59 | input_ = spec.description.input.add() 60 | input_.name = cur_input_name 61 | datatypes._set_datatype(input_.type, input_type) 62 | 63 | # add output 64 | for index, (cur_output_name, output_type) in enumerate(output_features): 65 | output_ = spec.description.output.add() 66 | output_.name = cur_output_name 67 | datatypes._set_datatype(output_.type, output_type) 68 | 69 | # Worry about the class labels 70 | if pred_cl_type == datatypes.String(): 71 | for c in class_labels: 72 | getattr(spec, model_accessor_for_class_labels).stringClassLabels.vector.append(str(c)) 73 | else: 74 | for c in class_labels: 75 | conv_error = False 76 | try: 77 | if not (int(c) == c): 78 | conv_error = True 79 | except: 80 | conv_error = True 81 | 82 | if conv_error: 83 | raise TypeError(("Cannot cast '%s' class to an int type " % str(c)) 84 | + "(class type determined by type of first class).") 85 | 86 | getattr(spec, model_accessor_for_class_labels).int64ClassLabels.vector.append(int(c)) 87 | 88 | # And we are done! 89 | return spec 90 | 91 | def set_regressor_interface_params(spec, features, output_features): 92 | """ Common utilities to set the regresson interface params. 93 | """ 94 | if output_features is None: 95 | output_features = [("predicted_class", datatypes.Double())] 96 | else: 97 | output_features = _fm.process_or_validate_features(output_features, 1) 98 | 99 | if len(output_features) != 1: 100 | raise ValueError("Provided output features for a regressor must be " 101 | "one Double feature.") 102 | 103 | if output_features[0][1] != datatypes.Double(): 104 | raise ValueError("Output type of a regressor must be a Double.") 105 | 106 | prediction_name = output_features[0][0] 107 | spec.description.predictedFeatureName = prediction_name 108 | 109 | # Normalize the features list. 110 | features = _fm.process_or_validate_features(features) 111 | 112 | # add input and output features 113 | for cur_input_name, feature_type in features: 114 | input_ = spec.description.input.add() 115 | input_.name = cur_input_name 116 | datatypes._set_datatype(input_.type, feature_type) 117 | 118 | output_ = spec.description.output.add() 119 | output_.name = prediction_name 120 | datatypes._set_datatype(output_.type, 'Double') 121 | return spec 122 | 123 | def set_transform_interface_params(spec, input_features, output_features, are_optional = False): 124 | """ Common utilities to set transform interface params. 125 | """ 126 | input_features = _fm.process_or_validate_features(input_features) 127 | output_features = _fm.process_or_validate_features(output_features) 128 | 129 | # Add input and output features 130 | for (fname, ftype) in input_features: 131 | input_ = spec.description.input.add() 132 | input_.name = fname 133 | datatypes._set_datatype(input_.type, ftype) 134 | if are_optional: 135 | input_.type.isOptional = are_optional 136 | 137 | for (fname, ftype) in output_features: 138 | output_ = spec.description.output.add() 139 | output_.name = fname 140 | datatypes._set_datatype(output_.type, ftype) 141 | 142 | return spec 143 | -------------------------------------------------------------------------------- /models/array_feature_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import datatypes 7 | from .. import SPECIFICATION_VERSION 8 | from ..proto import Model_pb2 as _Model_pb2 9 | from ..proto import FeatureTypes_pb2 as _FeatureTypes_pb2 10 | from _interface_management import set_transform_interface_params 11 | 12 | 13 | 14 | def create_array_feature_extractor(input_features, output_name, extract_indices, 15 | output_type = None): 16 | """ 17 | Creates a feature extractor from an input array feature, return 18 | 19 | input_features is a list of one (name, array) tuple. 20 | 21 | extract_indices is either an integer or a list. If it's an integer, 22 | the output type is by default a double (but may also be an integer). 23 | If a list, the output type is an array. 24 | """ 25 | 26 | # Make sure that our starting stuff is in the proper form. 27 | assert len(input_features) == 1 28 | assert isinstance(input_features[0][1], datatypes.Array) 29 | 30 | # Create the model. 31 | spec = _Model_pb2.Model() 32 | spec.specificationVersion = SPECIFICATION_VERSION 33 | 34 | if isinstance(extract_indices, (int, long)): 35 | extract_indices = [extract_indices] 36 | if output_type is None: 37 | output_type = datatypes.Double() 38 | 39 | elif isinstance(extract_indices, (list, tuple)): 40 | if not all(isinstance(x, (int, long)) for x in extract_indices): 41 | raise TypeError("extract_indices must be an integer or a list of integers.") 42 | 43 | if output_type is None: 44 | output_type = datatypes.Array(len(extract_indices)) 45 | 46 | else: 47 | raise TypeError("extract_indices must be an integer or a list of integers.") 48 | 49 | output_features = [(output_name, output_type)] 50 | 51 | for idx in extract_indices: 52 | assert idx < input_features[0][1].num_elements 53 | spec.arrayFeatureExtractor.extractIndex.append(idx) 54 | 55 | set_transform_interface_params(spec, input_features, output_features) 56 | 57 | return spec 58 | 59 | 60 | -------------------------------------------------------------------------------- /models/datatypes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | """ 7 | Basic Data Types. 8 | """ 9 | 10 | from ..proto import FeatureTypes_pb2 as _FeatureTypes_pb2 11 | 12 | class _DatatypeBase(object): 13 | def __init__(self, type_tag, full_tag, num_elements): 14 | self.type_tag, self.full_tag = type_tag, full_tag 15 | self.num_elements = num_elements 16 | def __eq__(self, other): 17 | return hasattr(other, "full_tag") and self.full_tag == other.full_tag 18 | def __ne__(self, other): 19 | return not self.__eq__(other) 20 | def __hash__(self): 21 | return hash(self.full_tag) 22 | def __repr__(self): 23 | return self.full_tag 24 | 25 | class Int64(_DatatypeBase): 26 | """ 27 | Int64 Data Type 28 | """ 29 | def __init__(self): 30 | _DatatypeBase.__init__(self, "Int64", "Int64", 1) 31 | 32 | class Double(_DatatypeBase): 33 | """ 34 | Double Data Type 35 | """ 36 | def __init__(self): 37 | _DatatypeBase.__init__(self, "Double", "Double", 1) 38 | 39 | class String(_DatatypeBase): 40 | """ 41 | String Data Type 42 | """ 43 | def __init__(self): 44 | _DatatypeBase.__init__(self, "String", "String", 1) 45 | 46 | class Array(_DatatypeBase): 47 | """ 48 | Array Data Type 49 | """ 50 | def __init__(self, *dimensions): 51 | """ 52 | Constructs a Array, given its dimensions 53 | 54 | Parameters 55 | ---------- 56 | dimensions: ints | longs 57 | 58 | Examples 59 | -------- 60 | # Create a single dimensions array of length five 61 | >>> arr = coremltools.models.datatypes.Array(5) 62 | 63 | # Create a multi dimension array five by two by ten. 64 | >>> multi_arr = coremltools.models.datatypes.Array(5, 2, 10) 65 | """ 66 | assert len(dimensions) >= 1 67 | assert all(isinstance(d, (int, long)) for d in dimensions),\ 68 | "Dimensions must be ints, not %s" % (str(dimensions)) 69 | self.dimensions = dimensions 70 | 71 | num_elements = 1 72 | for d in self.dimensions: 73 | num_elements *= d 74 | 75 | _DatatypeBase.__init__(self, "Array", 76 | "Array({%s})" % (",".join("%d" % d for d in self.dimensions)), 77 | num_elements) 78 | 79 | 80 | class Dictionary(_DatatypeBase): 81 | """ 82 | Dictionary Data Type 83 | """ 84 | def __init__(self, key_type = None): 85 | """ 86 | Constructs a Dictionary, given its key type 87 | 88 | Parameters 89 | ---------- 90 | key_type: Int64 | String 91 | 92 | Examples 93 | -------- 94 | >>> from coremltools.models.datatypes import Dictionary, Int64, String 95 | 96 | # Create a dictionary with string keys 97 | >>> str_key_dict = Dictionary(key_type=String) 98 | 99 | # Create a dictionary with int keys 100 | >>> int_key_dict = Dictionary(Int64) 101 | """ 102 | # Resolve it to a class if it's 103 | global _simple_type_remap 104 | if key_type in _simple_type_remap: 105 | key_type = _simple_type_remap[key_type] 106 | 107 | if not isinstance(key_type, (Int64, String)): 108 | raise TypeError("Key type for dictionary must be either string or integer.") 109 | 110 | self.key_type = key_type 111 | 112 | _DatatypeBase.__init__(self, "Dictionary", 113 | "Dictionary(%s)" % repr(self.key_type), None) 114 | 115 | 116 | _simple_type_remap = {int : Int64(), 117 | str : String(), 118 | float : Double(), 119 | Double : Double(), 120 | Int64 : Int64(), 121 | String : String(), 122 | 'Double' : Double(), 123 | 'Int64' : Int64(), 124 | 'String' : String()} 125 | 126 | 127 | def _is_valid_datatype(datatype_instance): 128 | """ 129 | Returns true if datatype_instance is a valid datatype object and false otherwise. 130 | """ 131 | 132 | # Remap so we can still use the python types for the simple cases 133 | global _simple_type_remap 134 | if datatype_instance in _simple_type_remap: 135 | return True 136 | 137 | # Now set the protobuf from this interface. 138 | if isinstance(datatype_instance, (Int64, Double, String, Array)): 139 | return True 140 | 141 | elif isinstance(datatype_instance, Dictionary): 142 | kt = datatype_instance.key_type 143 | 144 | if isinstance(kt, (Int64, String)): 145 | return True 146 | 147 | return False 148 | 149 | def _normalize_datatype(datatype_instance): 150 | """ 151 | Translates a user specified datatype to an instance of the ones defined above. 152 | 153 | Valid data types are passed through, and the following type specifications 154 | are translated to the proper instances: 155 | 156 | str, "String" -> String() 157 | int, "Int64" -> Int64() 158 | float, "Double" -> Double() 159 | 160 | If a data type is not recognized, then an error is raised. 161 | """ 162 | global _simple_type_remap 163 | if datatype_instance in _simple_type_remap: 164 | return _simple_type_remap[datatype_instance] 165 | 166 | # Now set the protobuf from this interface. 167 | if isinstance(datatype_instance, (Int64, Double, String, Array)): 168 | return datatype_instance 169 | 170 | elif isinstance(datatype_instance, Dictionary): 171 | kt = datatype_instance.key_type 172 | 173 | if isinstance(kt, (Int64, String)): 174 | return datatype_instance 175 | 176 | raise ValueError("Datatype instance not recognized.") 177 | 178 | 179 | 180 | def _set_datatype(proto_type_obj, datatype_instance): 181 | 182 | # Remap so we can still use the python types for the simple cases 183 | global _simple_type_remap 184 | if datatype_instance in _simple_type_remap: 185 | datatype_instance = _simple_type_remap[datatype_instance] 186 | 187 | # Now set the protobuf from this interface. 188 | if isinstance(datatype_instance, Int64): 189 | proto_type_obj.int64Type.MergeFromString('') 190 | 191 | elif isinstance(datatype_instance, Double): 192 | proto_type_obj.doubleType.MergeFromString('') 193 | 194 | elif isinstance(datatype_instance, String): 195 | proto_type_obj.stringType.MergeFromString('') 196 | 197 | elif isinstance(datatype_instance, Array): 198 | proto_type_obj.multiArrayType.MergeFromString('') 199 | 200 | for n in datatype_instance.dimensions: 201 | proto_type_obj.multiArrayType.shape.append(n) 202 | 203 | elif isinstance(datatype_instance, Dictionary): 204 | proto_type_obj.dictionaryType.MergeFromString('') 205 | 206 | kt = datatype_instance.key_type 207 | 208 | if isinstance(kt, Int64): 209 | proto_type_obj.dictionaryType.int64KeyType.MergeFromString('') 210 | elif isinstance(kt, String): 211 | proto_type_obj.dictionaryType.stringKeyType.MergeFromString('') 212 | else: 213 | raise ValueError("Dictionary key type must be either string or int.") 214 | 215 | else: 216 | raise TypeError("Datatype parameter not recognized; must be an instance " 217 | "of datatypes.{Double, Int64, String, Dictionary, Array}, or " 218 | "python int, float, or str types.") 219 | 220 | -------------------------------------------------------------------------------- /models/feature_vectorizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | import datatypes 7 | from .. import SPECIFICATION_VERSION 8 | from ..proto import Model_pb2 as _Model_pb2 9 | from ..proto import FeatureTypes_pb2 as _FeatureTypes_pb2 10 | from _interface_management import set_transform_interface_params 11 | from _feature_management import process_or_validate_features 12 | 13 | def create_feature_vectorizer(input_features, output_feature_name, 14 | known_size_map = {}): 15 | """ 16 | Creates a feature vectorizer from input features, return the spec for 17 | a feature vectorizer that puts everything into a single array of length 18 | equal to the total size of all the input features. Returns a 2-tuple 19 | `(spec, num_dimension)` 20 | 21 | Parameters 22 | ---------- 23 | input_features: [list of 2-tuples] 24 | Name(s) of the input features, given as a list of `('name', datatype)` 25 | tuples. The datatypes entry is one of the data types defined in the 26 | :ref:`datatypes` module. Allowed datatypes are :ref:`datatype.Int64`, 27 | :ref:`datatype.Double`, :ref:`datatypes.Dictionary`, 28 | or :ref:`datatype.Array`. 29 | 30 | If the feature is a dictionary type, then the dictionary must have integer 31 | keys, and the number of dimensions to expand it into must be given by 32 | `known_size_map`. 33 | 34 | Feature indices in the final array are counted sequentually from the 35 | from 0 through the total number of features. 36 | 37 | 38 | output_feature_name: str 39 | The name of the output feature. The type is an Array 40 | List of output feature of the network. 41 | 42 | known_size_map: 43 | A dictionary mapping the feature name to the expanded size in the final 44 | array. This is most useful for specifying the size of sparse vectors 45 | given as dictionaries of index to value. 46 | 47 | """ 48 | 49 | spec = _Model_pb2.Model() 50 | spec.specificationVersion = SPECIFICATION_VERSION 51 | 52 | input_features = process_or_validate_features(input_features) 53 | 54 | feature_vectorizer = spec.featureVectorizer 55 | 56 | num_output_dimensions = 0 57 | 58 | for n, ft in input_features: 59 | if n in known_size_map: 60 | dim = known_size_map[n] 61 | 62 | if ft.num_elements is not None: 63 | if dim != ft.num_elements: 64 | raise ValueError(("In feature %s, override size (%d) not " 65 | "compatible with inherent value size (%d).") 66 | % (n, dim, ft.num_elements)) 67 | else: 68 | if ft.num_elements is None: 69 | raise ValueError("In feature %s, inherent size unknown so must be manually supplied.") 70 | dim = ft.num_elements 71 | 72 | num_output_dimensions += dim 73 | 74 | new_feature = feature_vectorizer.inputList.add() 75 | new_feature.inputColumn = n 76 | new_feature.inputDimensions = dim 77 | 78 | if not isinstance(output_feature_name, (str, unicode)): 79 | if (is_valid_feature_list(output_feature_name) 80 | and len(output_feature_name) == 1 81 | and output_feature_name[0][1] == datatypes.Array(num_output_dimensions)): 82 | 83 | output_feature_name = output_feature_name[0][0] 84 | 85 | else: 86 | raise TypeError("Output feature must be specified as a " 87 | "feature name or correct output feature list.") 88 | 89 | output_features = [(output_feature_name, datatypes.Array(num_output_dimensions))] 90 | set_transform_interface_params(spec, input_features, output_features) 91 | 92 | return spec, num_output_dimensions 93 | 94 | 95 | -------------------------------------------------------------------------------- /models/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | from copy import deepcopy as _deepcopy 7 | import platform as _platform 8 | import sys as _sys 9 | import tempfile as _tempfile 10 | 11 | from utils import save_spec as _save_spec 12 | from ..proto import Model_pb2 as _Model_pb2 13 | 14 | 15 | class _FeatureDescription(object): 16 | def __init__(self, fd_spec): 17 | self._fd_spec = fd_spec 18 | 19 | def __repr__(self): 20 | return "Features(%s)" % ','.join(map(lambda x: x.name, self._fd_spec)) 21 | 22 | def __len__(self): 23 | return len(self._fd_spec) 24 | 25 | def __getitem__(self, key): 26 | for f in self._fd_spec: 27 | if key == f.name: 28 | return f.shortDescription 29 | raise KeyError("No feature with name %s." % key) 30 | 31 | def __contains__(self, key): 32 | for f in self._fd_spec: 33 | if key == f.name: 34 | return True 35 | return False 36 | 37 | def __setitem__(self, key, value): 38 | for f in self._fd_spec: 39 | if key == f.name: 40 | f.shortDescription = value 41 | return 42 | raise AttributeError("No feature with name %s." % key) 43 | 44 | def __iter__(self): 45 | for f in self._fd_spec: 46 | yield f.name 47 | 48 | def _get_proxy_from_spec(filename): 49 | try: 50 | from ..libcoremlpython import _MLModelProxy 51 | except: 52 | _MLModelProxy = None 53 | 54 | if _MLModelProxy: 55 | return _MLModelProxy.fromSpec(filename) 56 | else: 57 | return None 58 | 59 | class MLModel(object): 60 | """ 61 | This class defines the minimal interface to a CoreML object in Python. 62 | 63 | At a high level, the protobuf specification consists of: 64 | 65 | - Model description: Encodes names and type information of the inputs and outputs to the model. 66 | - Model parameters: The set of parameters required to represent a specific instance of the model. 67 | - Metadata: Information about the origin, license, and author of the model. 68 | 69 | With this class, you can inspect a CoreML model, modifiy metadata, and make 70 | predictions for the purposes of testing (on select platforms). 71 | 72 | Examples 73 | -------- 74 | .. sourcecode:: python 75 | 76 | # Load the model 77 | >>> model = MLModel('HousePricer.mlmodel') 78 | 79 | # Set the model metadata 80 | >>> model.author = 'Author' 81 | >>> model.license = 'BSD' 82 | >>> model.short_description = 'Predicts the price of a house in the Seattle area.' 83 | 84 | # Get the interface to the model 85 | >>> model.input_descriptions 86 | >>> model.output_description 87 | 88 | # Set feature descriptions manually 89 | >>> model.input_description['bedroom'] = 'Number of bedrooms' 90 | >>> model.input_description['bathrooms'] = 'Number of bathrooms' 91 | >>> model.input_description['size'] = 'Size (in square feet)' 92 | 93 | # Set 94 | >>> model.output_description['price'] = 'Price of the house' 95 | 96 | # Make predictions 97 | >>> predictions = model.predict({'bedroom': 1.0, 'bath': 1.0, 'size': 1240}) 98 | 99 | # Get the spec of the model 100 | >>> model.spec 101 | 102 | # Save the model 103 | >>> model.save('HousePricer.mlmodel') 104 | 105 | See Also 106 | -------- 107 | predict 108 | """ 109 | def __init__(self, model): 110 | """ 111 | Construct an MLModel from a .mlmodel 112 | 113 | Parameters 114 | ---------- 115 | model: str | Model_pb2 116 | If a string is given it should be the location of the .mlmodel to load. 117 | 118 | Examples 119 | -------- 120 | >>> loaded_model = MLModel('my_model_file.mlmodel') 121 | """ 122 | from utils import load_spec as _load_spec 123 | 124 | if isinstance(model, str): 125 | self._spec = _load_spec(model) 126 | self.__proxy__ = _get_proxy_from_spec(model) 127 | elif isinstance(model, _Model_pb2.Model): 128 | self._spec = model 129 | filename = _tempfile.mktemp(suffix = '.mlmodel') 130 | _save_spec(model, filename) 131 | self.__proxy__ = _get_proxy_from_spec(filename) 132 | else: 133 | raise TypeError("Expected model to be a .mlmodel file or a Model_pb2 object") 134 | 135 | self._input_description = _FeatureDescription(self._spec.description.input) 136 | self._output_description = _FeatureDescription(self._spec.description.output) 137 | 138 | @property 139 | def short_description(self): 140 | return self._spec.description.metadata.shortDescription 141 | 142 | @short_description.setter 143 | def short_description(self, short_description): 144 | self._spec.description.metadata.shortDescription = short_description 145 | 146 | @property 147 | def input_description(self): 148 | return self._input_description 149 | 150 | @property 151 | def output_description(self): 152 | return self._output_description 153 | 154 | @property 155 | def user_defined_metadata(self): 156 | return self._spec.description.metadata.userDefined 157 | 158 | @property 159 | def author(self): 160 | return self._spec.description.metadata.author 161 | 162 | @author.setter 163 | def author(self, author): 164 | self._spec.description.metadata.author = author 165 | 166 | @property 167 | def license(self): 168 | return self._spec.description.metadata.license 169 | 170 | @license.setter 171 | def license(self, license): 172 | self._spec.description.metadata.license = license 173 | 174 | def __repr__(self): 175 | return self._spec.description.__repr__() 176 | 177 | def __str__(self): 178 | return self.__repr__() 179 | 180 | def save(self, filename): 181 | """ 182 | Save the model to a .mlmodel format. 183 | 184 | Parameters 185 | ---------- 186 | location : str 187 | Target filename for the model. 188 | 189 | See Also 190 | -------- 191 | coremltools.utils.load_model 192 | 193 | Examples 194 | -------- 195 | >>> model.save('my_model_file.mlmodel') 196 | >>> loaded_model = MLModel('my_model_file.mlmodel') 197 | """ 198 | _save_spec(self._spec, filename) 199 | 200 | def get_spec(self): 201 | """ 202 | Get a deep copy of the protobuf specification of the model. 203 | 204 | Returns 205 | ------- 206 | model: Model_pb2 207 | Protobuf specification of the model. 208 | 209 | Examples 210 | ---------- 211 | >>> spec = model.get_spec() 212 | """ 213 | return _deepcopy(self._spec) 214 | 215 | def predict(self, data, **kwargs): 216 | """ 217 | Return predictions for the model. The kwargs gets passed into the 218 | model as a dictionary. 219 | 220 | Parameters 221 | ---------- 222 | data : dict[str, value] 223 | Dictionary of data to make predictions from where the keys are 224 | the names of the input features. 225 | 226 | Returns 227 | ------- 228 | out : dict[str, value] 229 | Predictions as a dictionary where each key is the output feature 230 | name. 231 | 232 | Examples 233 | -------- 234 | >>> data = {'bedroom': 1.0, 'bath': 1.0, 'size': 1240} 235 | >>> predictions = model.predict(data) 236 | """ 237 | if self.__proxy__: 238 | return self.__proxy__.predict(data) 239 | else: 240 | if _sys.platform != 'darwin' or float('.'.join(_platform.mac_ver()[0].split('.')[:2])) < 10.13: 241 | raise Exception('Model prediction is only supported on macOS version 10.13.') 242 | else: 243 | raise Exception('Unable to load CoreML.framework. Cannot make predictions.') 244 | -------------------------------------------------------------------------------- /models/pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Apple Inc. All rights reserved. 2 | # 3 | # Use of this source code is governed by a BSD-3-clause license that can be 4 | # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause 5 | 6 | """ 7 | Pipeline utils for this package. 8 | """ 9 | from .. import SPECIFICATION_VERSION 10 | from ..proto import Model_pb2 as _Model_pb2 11 | import _feature_management 12 | import model as _model 13 | 14 | from ._interface_management import set_regressor_interface_params 15 | from ._interface_management import set_classifier_interface_params 16 | from ._interface_management import set_transform_interface_params 17 | 18 | class Pipeline(object): 19 | """ 20 | A pipeline model that exposes a sequence of models as a single model, 21 | It requires a set of inputs, a sequence of other models and a set of outputs. 22 | 23 | This class is the base class for :py:class:`PipelineClassifier` and 24 | :py:class:`PipelineRegressor`, which contain a sequence ending in a classifier 25 | or regressor and themselves behave like a classifier or regressor. This class 26 | may be used directly for a sequence of feature transformer objects. 27 | 28 | """ 29 | 30 | def __init__(self, input_features, output_features): 31 | """ 32 | Create a pipleine of models to be executed sequentially. 33 | 34 | Parameters 35 | ---------- 36 | 37 | input_features: [list of 2-tuples] 38 | Name(s) of the input features, given as a list of `('name', datatype)` 39 | tuples. The datatypes entry can be any of the data types defined in the 40 | :py:mod:`models.datatypes` module. 41 | 42 | output_features: [list of features] 43 | Name(s) of the output features, given as a list of 44 | `('name',datatype)` tuples. The datatypes entry can be any of the 45 | data types defined in the :py:mod:`models.datatypes` module. All features 46 | must be either defined in the inputs or be produced by one of the 47 | contained models. 48 | 49 | """ 50 | spec = _Model_pb2.Model() 51 | spec.specificationVersion = SPECIFICATION_VERSION 52 | 53 | # Access this to declare it as a pipeline 54 | spec.pipeline 55 | 56 | spec = set_transform_interface_params(spec, input_features, output_features) 57 | 58 | # Save the spec as a member variable. 59 | self.spec = spec 60 | 61 | def add_model(self, spec): 62 | """ 63 | Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline. 64 | 65 | All input features of this model must either match the input_features 66 | of the pipeline, or match the outputs of a previous model. 67 | 68 | Parameters 69 | ---------- 70 | spec: [MLModel, Model_pb2] 71 | A protobuf spec or MLModel instance containing a model. 72 | """ 73 | 74 | if isinstance(spec, _model.MLModel): 75 | spec = spec._spec 76 | 77 | pipeline = self.spec.pipeline 78 | step_spec = pipeline.models.add() 79 | step_spec.CopyFrom(spec) 80 | 81 | class PipelineRegressor(Pipeline): 82 | """ 83 | A pipeline model that exposes a sequence of models as a single model, 84 | It requires a set of inputs, a sequence of other models and a set of outputs. 85 | In this case, the sequence of models must end in a regression model, and the 86 | pipeline itself behaves as a regression model. 87 | """ 88 | 89 | 90 | def __init__(self, input_features, output_features): 91 | """ 92 | Create a set of pipleine models given a set of model specs. The final 93 | output model must be a regression model. 94 | 95 | Parameters 96 | ---------- 97 | 98 | input_features: [list of 2-tuples] 99 | Name(s) of the input features, given as a list of `('name', datatype)` 100 | tuples. The datatypes entry can be any of the data types defined in the 101 | :py:mod:`models.datatypes` module. 102 | 103 | output_features: [list of features] 104 | Name(s) of the output features, given as a list of 105 | `('name',datatype)` tuples. The datatypes entry can be any of the 106 | data types defined in the :py:mod:`models.datatypes` module. All features 107 | must be either defined in the inputs or be produced by one of the 108 | contained models. 109 | 110 | """ 111 | spec = _Model_pb2.Model() 112 | spec.specificationVersion = SPECIFICATION_VERSION 113 | 114 | # Access this to declare it as a pipeline 115 | spec.pipelineRegressor 116 | spec = set_regressor_interface_params(spec, input_features, output_features) 117 | 118 | # Save as a member variable 119 | self.spec = spec 120 | 121 | def add_model(self, spec): 122 | """ 123 | Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline. 124 | 125 | All input features of this model must either match the input_features 126 | of the pipeline, or match the outputs of a previous model. 127 | 128 | Parameters 129 | ---------- 130 | spec: [MLModel, Model_pb2] 131 | A protobuf spec or MLModel instance containing a model. 132 | """ 133 | 134 | if isinstance(spec, _model.MLModel): 135 | spec = spec._spec 136 | 137 | pipeline = self.spec.pipelineRegressor.pipeline 138 | step_spec = pipeline.models.add() 139 | step_spec.CopyFrom(spec) 140 | 141 | class PipelineClassifier(Pipeline): 142 | """ 143 | A pipeline model that exposes a sequence of models as a single model, 144 | It requires a set of inputs, a sequence of other models and a set of outputs. 145 | In this case, the sequence of models must end in a regression model, and the 146 | pipeline itself behaves as a regression model. 147 | """ 148 | 149 | def __init__(self, input_features, class_labels, output_features=None): 150 | """ 151 | Create a set of pipleine models given a set of model specs. The last 152 | model in this list must be a classifier model. 153 | 154 | Parameters 155 | ---------- 156 | input_features: [list of 2-tuples] 157 | Name(s) of the input features, given as a list of `('name', datatype)` 158 | tuples. The datatypes entry can be any of the data types defined in the 159 | :py:mod:`models.datatypes` module. 160 | 161 | class_labels: [list] 162 | A list of string or integer class labels to use in making predictions. 163 | This list must match the class labels in the final classifier model. 164 | 165 | output_features: [list] 166 | A string or a list of two strings specifying the names of the two 167 | output features, the first being a class label corresponding 168 | to the class with the highest predicted score, and the second being 169 | a dictionary mapping each class to its score. If `output_features` 170 | is a string, it specifies the predicted class label and the class 171 | scores is set to the default value of `"classProbability."` 172 | 173 | """ 174 | 175 | output_features = _feature_management.process_or_validate_classifier_output_features( 176 | output_features, class_labels) 177 | 178 | spec = _Model_pb2.Model() 179 | spec.specificationVersion = SPECIFICATION_VERSION 180 | spec = set_classifier_interface_params(spec, input_features, 181 | class_labels, 'pipelineClassifier', output_features) 182 | 183 | # Access this to declare it as a pipeline 184 | spec.pipelineClassifier 185 | 186 | # Save as a member variable 187 | self.spec = spec 188 | 189 | def add_model(self, spec): 190 | """ 191 | Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline. 192 | 193 | All input features of this model must either match the input_features 194 | of the pipeline, or match the outputs of a previous model. 195 | 196 | Parameters 197 | ---------- 198 | spec: [MLModel, Model_pb2] 199 | A protobuf spec or MLModel instance containing a model. 200 | """ 201 | if isinstance(spec, _model.MLModel): 202 | spec = spec._spec 203 | pipeline = self.spec.pipelineClassifier.pipeline 204 | step_spec = pipeline.models.add() 205 | step_spec.CopyFrom(spec) 206 | -------------------------------------------------------------------------------- /proto/ArrayFeatureExtractor_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: ArrayFeatureExtractor.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='ArrayFeatureExtractor.proto', 20 | package='CoreML.Specification', 21 | syntax='proto3', 22 | serialized_pb=_b('\n\x1b\x41rrayFeatureExtractor.proto\x12\x14\x43oreML.Specification\"-\n\x15\x41rrayFeatureExtractor\x12\x14\n\x0c\x65xtractIndex\x18\x01 \x03(\x04\x42\x02H\x03\x62\x06proto3') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | 29 | _ARRAYFEATUREEXTRACTOR = _descriptor.Descriptor( 30 | name='ArrayFeatureExtractor', 31 | full_name='CoreML.Specification.ArrayFeatureExtractor', 32 | filename=None, 33 | file=DESCRIPTOR, 34 | containing_type=None, 35 | fields=[ 36 | _descriptor.FieldDescriptor( 37 | name='extractIndex', full_name='CoreML.Specification.ArrayFeatureExtractor.extractIndex', index=0, 38 | number=1, type=4, cpp_type=4, label=3, 39 | has_default_value=False, default_value=[], 40 | message_type=None, enum_type=None, containing_type=None, 41 | is_extension=False, extension_scope=None, 42 | options=None), 43 | ], 44 | extensions=[ 45 | ], 46 | nested_types=[], 47 | enum_types=[ 48 | ], 49 | options=None, 50 | is_extendable=False, 51 | syntax='proto3', 52 | extension_ranges=[], 53 | oneofs=[ 54 | ], 55 | serialized_start=53, 56 | serialized_end=98, 57 | ) 58 | 59 | DESCRIPTOR.message_types_by_name['ArrayFeatureExtractor'] = _ARRAYFEATUREEXTRACTOR 60 | 61 | ArrayFeatureExtractor = _reflection.GeneratedProtocolMessageType('ArrayFeatureExtractor', (_message.Message,), dict( 62 | DESCRIPTOR = _ARRAYFEATUREEXTRACTOR, 63 | __module__ = 'ArrayFeatureExtractor_pb2' 64 | # @@protoc_insertion_point(class_scope:CoreML.Specification.ArrayFeatureExtractor) 65 | )) 66 | _sym_db.RegisterMessage(ArrayFeatureExtractor) 67 | 68 | 69 | DESCRIPTOR.has_options = True 70 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 71 | # @@protoc_insertion_point(module_scope) 72 | -------------------------------------------------------------------------------- /proto/CategoricalMapping_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: CategoricalMapping.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import DataStructures_pb2 as DataStructures__pb2 17 | FeatureTypes__pb2 = DataStructures__pb2.FeatureTypes__pb2 18 | 19 | from DataStructures_pb2 import * 20 | 21 | DESCRIPTOR = _descriptor.FileDescriptor( 22 | name='CategoricalMapping.proto', 23 | package='CoreML.Specification', 24 | syntax='proto3', 25 | serialized_pb=_b('\n\x18\x43\x61tegoricalMapping.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\"\xe7\x01\n\x12\x43\x61tegoricalMapping\x12\x42\n\x10stringToInt64Map\x18\x01 \x01(\x0b\x32&.CoreML.Specification.StringToInt64MapH\x00\x12\x42\n\x10int64ToStringMap\x18\x02 \x01(\x0b\x32&.CoreML.Specification.Int64ToStringMapH\x00\x12\x12\n\x08strValue\x18\x65 \x01(\tH\x01\x12\x14\n\nint64Value\x18\x66 \x01(\x03H\x01\x42\r\n\x0bMappingTypeB\x10\n\x0eValueOnUnknownB\x02H\x03P\x00\x62\x06proto3') 26 | , 27 | dependencies=[DataStructures__pb2.DESCRIPTOR,], 28 | public_dependencies=[DataStructures__pb2.DESCRIPTOR,]) 29 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 30 | 31 | 32 | 33 | 34 | _CATEGORICALMAPPING = _descriptor.Descriptor( 35 | name='CategoricalMapping', 36 | full_name='CoreML.Specification.CategoricalMapping', 37 | filename=None, 38 | file=DESCRIPTOR, 39 | containing_type=None, 40 | fields=[ 41 | _descriptor.FieldDescriptor( 42 | name='stringToInt64Map', full_name='CoreML.Specification.CategoricalMapping.stringToInt64Map', index=0, 43 | number=1, type=11, cpp_type=10, label=1, 44 | has_default_value=False, default_value=None, 45 | message_type=None, enum_type=None, containing_type=None, 46 | is_extension=False, extension_scope=None, 47 | options=None), 48 | _descriptor.FieldDescriptor( 49 | name='int64ToStringMap', full_name='CoreML.Specification.CategoricalMapping.int64ToStringMap', index=1, 50 | number=2, type=11, cpp_type=10, label=1, 51 | has_default_value=False, default_value=None, 52 | message_type=None, enum_type=None, containing_type=None, 53 | is_extension=False, extension_scope=None, 54 | options=None), 55 | _descriptor.FieldDescriptor( 56 | name='strValue', full_name='CoreML.Specification.CategoricalMapping.strValue', index=2, 57 | number=101, type=9, cpp_type=9, label=1, 58 | has_default_value=False, default_value=_b("").decode('utf-8'), 59 | message_type=None, enum_type=None, containing_type=None, 60 | is_extension=False, extension_scope=None, 61 | options=None), 62 | _descriptor.FieldDescriptor( 63 | name='int64Value', full_name='CoreML.Specification.CategoricalMapping.int64Value', index=3, 64 | number=102, type=3, cpp_type=2, label=1, 65 | has_default_value=False, default_value=0, 66 | message_type=None, enum_type=None, containing_type=None, 67 | is_extension=False, extension_scope=None, 68 | options=None), 69 | ], 70 | extensions=[ 71 | ], 72 | nested_types=[], 73 | enum_types=[ 74 | ], 75 | options=None, 76 | is_extendable=False, 77 | syntax='proto3', 78 | extension_ranges=[], 79 | oneofs=[ 80 | _descriptor.OneofDescriptor( 81 | name='MappingType', full_name='CoreML.Specification.CategoricalMapping.MappingType', 82 | index=0, containing_type=None, fields=[]), 83 | _descriptor.OneofDescriptor( 84 | name='ValueOnUnknown', full_name='CoreML.Specification.CategoricalMapping.ValueOnUnknown', 85 | index=1, containing_type=None, fields=[]), 86 | ], 87 | serialized_start=73, 88 | serialized_end=304, 89 | ) 90 | 91 | _CATEGORICALMAPPING.fields_by_name['stringToInt64Map'].message_type = DataStructures__pb2._STRINGTOINT64MAP 92 | _CATEGORICALMAPPING.fields_by_name['int64ToStringMap'].message_type = DataStructures__pb2._INT64TOSTRINGMAP 93 | _CATEGORICALMAPPING.oneofs_by_name['MappingType'].fields.append( 94 | _CATEGORICALMAPPING.fields_by_name['stringToInt64Map']) 95 | _CATEGORICALMAPPING.fields_by_name['stringToInt64Map'].containing_oneof = _CATEGORICALMAPPING.oneofs_by_name['MappingType'] 96 | _CATEGORICALMAPPING.oneofs_by_name['MappingType'].fields.append( 97 | _CATEGORICALMAPPING.fields_by_name['int64ToStringMap']) 98 | _CATEGORICALMAPPING.fields_by_name['int64ToStringMap'].containing_oneof = _CATEGORICALMAPPING.oneofs_by_name['MappingType'] 99 | _CATEGORICALMAPPING.oneofs_by_name['ValueOnUnknown'].fields.append( 100 | _CATEGORICALMAPPING.fields_by_name['strValue']) 101 | _CATEGORICALMAPPING.fields_by_name['strValue'].containing_oneof = _CATEGORICALMAPPING.oneofs_by_name['ValueOnUnknown'] 102 | _CATEGORICALMAPPING.oneofs_by_name['ValueOnUnknown'].fields.append( 103 | _CATEGORICALMAPPING.fields_by_name['int64Value']) 104 | _CATEGORICALMAPPING.fields_by_name['int64Value'].containing_oneof = _CATEGORICALMAPPING.oneofs_by_name['ValueOnUnknown'] 105 | DESCRIPTOR.message_types_by_name['CategoricalMapping'] = _CATEGORICALMAPPING 106 | 107 | CategoricalMapping = _reflection.GeneratedProtocolMessageType('CategoricalMapping', (_message.Message,), dict( 108 | DESCRIPTOR = _CATEGORICALMAPPING, 109 | __module__ = 'CategoricalMapping_pb2' 110 | # @@protoc_insertion_point(class_scope:CoreML.Specification.CategoricalMapping) 111 | )) 112 | _sym_db.RegisterMessage(CategoricalMapping) 113 | 114 | 115 | DESCRIPTOR.has_options = True 116 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 117 | # @@protoc_insertion_point(module_scope) 118 | -------------------------------------------------------------------------------- /proto/DictVectorizer_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: DictVectorizer.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import DataStructures_pb2 as DataStructures__pb2 17 | FeatureTypes__pb2 = DataStructures__pb2.FeatureTypes__pb2 18 | 19 | from DataStructures_pb2 import * 20 | 21 | DESCRIPTOR = _descriptor.FileDescriptor( 22 | name='DictVectorizer.proto', 23 | package='CoreML.Specification', 24 | syntax='proto3', 25 | serialized_pb=_b('\n\x14\x44ictVectorizer.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\"\x8f\x01\n\x0e\x44ictVectorizer\x12;\n\rstringToIndex\x18\x01 \x01(\x0b\x32\".CoreML.Specification.StringVectorH\x00\x12\x39\n\x0cint64ToIndex\x18\x02 \x01(\x0b\x32!.CoreML.Specification.Int64VectorH\x00\x42\x05\n\x03MapB\x02H\x03P\x00\x62\x06proto3') 26 | , 27 | dependencies=[DataStructures__pb2.DESCRIPTOR,], 28 | public_dependencies=[DataStructures__pb2.DESCRIPTOR,]) 29 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 30 | 31 | 32 | 33 | 34 | _DICTVECTORIZER = _descriptor.Descriptor( 35 | name='DictVectorizer', 36 | full_name='CoreML.Specification.DictVectorizer', 37 | filename=None, 38 | file=DESCRIPTOR, 39 | containing_type=None, 40 | fields=[ 41 | _descriptor.FieldDescriptor( 42 | name='stringToIndex', full_name='CoreML.Specification.DictVectorizer.stringToIndex', index=0, 43 | number=1, type=11, cpp_type=10, label=1, 44 | has_default_value=False, default_value=None, 45 | message_type=None, enum_type=None, containing_type=None, 46 | is_extension=False, extension_scope=None, 47 | options=None), 48 | _descriptor.FieldDescriptor( 49 | name='int64ToIndex', full_name='CoreML.Specification.DictVectorizer.int64ToIndex', index=1, 50 | number=2, type=11, cpp_type=10, label=1, 51 | has_default_value=False, default_value=None, 52 | message_type=None, enum_type=None, containing_type=None, 53 | is_extension=False, extension_scope=None, 54 | options=None), 55 | ], 56 | extensions=[ 57 | ], 58 | nested_types=[], 59 | enum_types=[ 60 | ], 61 | options=None, 62 | is_extendable=False, 63 | syntax='proto3', 64 | extension_ranges=[], 65 | oneofs=[ 66 | _descriptor.OneofDescriptor( 67 | name='Map', full_name='CoreML.Specification.DictVectorizer.Map', 68 | index=0, containing_type=None, fields=[]), 69 | ], 70 | serialized_start=69, 71 | serialized_end=212, 72 | ) 73 | 74 | _DICTVECTORIZER.fields_by_name['stringToIndex'].message_type = DataStructures__pb2._STRINGVECTOR 75 | _DICTVECTORIZER.fields_by_name['int64ToIndex'].message_type = DataStructures__pb2._INT64VECTOR 76 | _DICTVECTORIZER.oneofs_by_name['Map'].fields.append( 77 | _DICTVECTORIZER.fields_by_name['stringToIndex']) 78 | _DICTVECTORIZER.fields_by_name['stringToIndex'].containing_oneof = _DICTVECTORIZER.oneofs_by_name['Map'] 79 | _DICTVECTORIZER.oneofs_by_name['Map'].fields.append( 80 | _DICTVECTORIZER.fields_by_name['int64ToIndex']) 81 | _DICTVECTORIZER.fields_by_name['int64ToIndex'].containing_oneof = _DICTVECTORIZER.oneofs_by_name['Map'] 82 | DESCRIPTOR.message_types_by_name['DictVectorizer'] = _DICTVECTORIZER 83 | 84 | DictVectorizer = _reflection.GeneratedProtocolMessageType('DictVectorizer', (_message.Message,), dict( 85 | DESCRIPTOR = _DICTVECTORIZER, 86 | __module__ = 'DictVectorizer_pb2' 87 | # @@protoc_insertion_point(class_scope:CoreML.Specification.DictVectorizer) 88 | )) 89 | _sym_db.RegisterMessage(DictVectorizer) 90 | 91 | 92 | DESCRIPTOR.has_options = True 93 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 94 | # @@protoc_insertion_point(module_scope) 95 | -------------------------------------------------------------------------------- /proto/FeatureVectorizer_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: FeatureVectorizer.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='FeatureVectorizer.proto', 20 | package='CoreML.Specification', 21 | syntax='proto3', 22 | serialized_pb=_b('\n\x17\x46\x65\x61tureVectorizer.proto\x12\x14\x43oreML.Specification\"\x98\x01\n\x11\x46\x65\x61tureVectorizer\x12\x46\n\tinputList\x18\x01 \x03(\x0b\x32\x33.CoreML.Specification.FeatureVectorizer.InputColumn\x1a;\n\x0bInputColumn\x12\x13\n\x0binputColumn\x18\x01 \x01(\t\x12\x17\n\x0finputDimensions\x18\x02 \x01(\x04\x42\x02H\x03\x62\x06proto3') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | 29 | _FEATUREVECTORIZER_INPUTCOLUMN = _descriptor.Descriptor( 30 | name='InputColumn', 31 | full_name='CoreML.Specification.FeatureVectorizer.InputColumn', 32 | filename=None, 33 | file=DESCRIPTOR, 34 | containing_type=None, 35 | fields=[ 36 | _descriptor.FieldDescriptor( 37 | name='inputColumn', full_name='CoreML.Specification.FeatureVectorizer.InputColumn.inputColumn', index=0, 38 | number=1, type=9, cpp_type=9, label=1, 39 | has_default_value=False, default_value=_b("").decode('utf-8'), 40 | message_type=None, enum_type=None, containing_type=None, 41 | is_extension=False, extension_scope=None, 42 | options=None), 43 | _descriptor.FieldDescriptor( 44 | name='inputDimensions', full_name='CoreML.Specification.FeatureVectorizer.InputColumn.inputDimensions', index=1, 45 | number=2, type=4, cpp_type=4, label=1, 46 | has_default_value=False, default_value=0, 47 | message_type=None, enum_type=None, containing_type=None, 48 | is_extension=False, extension_scope=None, 49 | options=None), 50 | ], 51 | extensions=[ 52 | ], 53 | nested_types=[], 54 | enum_types=[ 55 | ], 56 | options=None, 57 | is_extendable=False, 58 | syntax='proto3', 59 | extension_ranges=[], 60 | oneofs=[ 61 | ], 62 | serialized_start=143, 63 | serialized_end=202, 64 | ) 65 | 66 | _FEATUREVECTORIZER = _descriptor.Descriptor( 67 | name='FeatureVectorizer', 68 | full_name='CoreML.Specification.FeatureVectorizer', 69 | filename=None, 70 | file=DESCRIPTOR, 71 | containing_type=None, 72 | fields=[ 73 | _descriptor.FieldDescriptor( 74 | name='inputList', full_name='CoreML.Specification.FeatureVectorizer.inputList', index=0, 75 | number=1, type=11, cpp_type=10, label=3, 76 | has_default_value=False, default_value=[], 77 | message_type=None, enum_type=None, containing_type=None, 78 | is_extension=False, extension_scope=None, 79 | options=None), 80 | ], 81 | extensions=[ 82 | ], 83 | nested_types=[_FEATUREVECTORIZER_INPUTCOLUMN, ], 84 | enum_types=[ 85 | ], 86 | options=None, 87 | is_extendable=False, 88 | syntax='proto3', 89 | extension_ranges=[], 90 | oneofs=[ 91 | ], 92 | serialized_start=50, 93 | serialized_end=202, 94 | ) 95 | 96 | _FEATUREVECTORIZER_INPUTCOLUMN.containing_type = _FEATUREVECTORIZER 97 | _FEATUREVECTORIZER.fields_by_name['inputList'].message_type = _FEATUREVECTORIZER_INPUTCOLUMN 98 | DESCRIPTOR.message_types_by_name['FeatureVectorizer'] = _FEATUREVECTORIZER 99 | 100 | FeatureVectorizer = _reflection.GeneratedProtocolMessageType('FeatureVectorizer', (_message.Message,), dict( 101 | 102 | InputColumn = _reflection.GeneratedProtocolMessageType('InputColumn', (_message.Message,), dict( 103 | DESCRIPTOR = _FEATUREVECTORIZER_INPUTCOLUMN, 104 | __module__ = 'FeatureVectorizer_pb2' 105 | # @@protoc_insertion_point(class_scope:CoreML.Specification.FeatureVectorizer.InputColumn) 106 | )) 107 | , 108 | DESCRIPTOR = _FEATUREVECTORIZER, 109 | __module__ = 'FeatureVectorizer_pb2' 110 | # @@protoc_insertion_point(class_scope:CoreML.Specification.FeatureVectorizer) 111 | )) 112 | _sym_db.RegisterMessage(FeatureVectorizer) 113 | _sym_db.RegisterMessage(FeatureVectorizer.InputColumn) 114 | 115 | 116 | DESCRIPTOR.has_options = True 117 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 118 | # @@protoc_insertion_point(module_scope) 119 | -------------------------------------------------------------------------------- /proto/GLMClassifier_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: GLMClassifier.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import DataStructures_pb2 as DataStructures__pb2 17 | FeatureTypes__pb2 = DataStructures__pb2.FeatureTypes__pb2 18 | 19 | from DataStructures_pb2 import * 20 | 21 | DESCRIPTOR = _descriptor.FileDescriptor( 22 | name='GLMClassifier.proto', 23 | package='CoreML.Specification', 24 | syntax='proto3', 25 | serialized_pb=_b('\n\x13GLMClassifier.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\"\x9c\x04\n\rGLMClassifier\x12@\n\x07weights\x18\x01 \x03(\x0b\x32/.CoreML.Specification.GLMClassifier.DoubleArray\x12\x0e\n\x06offset\x18\x02 \x03(\x01\x12\\\n\x17postEvaluationTransform\x18\x03 \x01(\x0e\x32;.CoreML.Specification.GLMClassifier.PostEvaluationTransform\x12H\n\rclassEncoding\x18\x04 \x01(\x0e\x32\x31.CoreML.Specification.GLMClassifier.ClassEncoding\x12?\n\x11stringClassLabels\x18\x64 \x01(\x0b\x32\".CoreML.Specification.StringVectorH\x00\x12=\n\x10int64ClassLabels\x18\x65 \x01(\x0b\x32!.CoreML.Specification.Int64VectorH\x00\x1a\x1c\n\x0b\x44oubleArray\x12\r\n\x05value\x18\x01 \x03(\x01\"0\n\x17PostEvaluationTransform\x12\t\n\x05Logit\x10\x00\x12\n\n\x06Probit\x10\x01\"2\n\rClassEncoding\x12\x12\n\x0eReferenceClass\x10\x00\x12\r\n\tOneVsRest\x10\x01\x42\r\n\x0b\x43lassLabelsB\x02H\x03P\x00\x62\x06proto3') 26 | , 27 | dependencies=[DataStructures__pb2.DESCRIPTOR,], 28 | public_dependencies=[DataStructures__pb2.DESCRIPTOR,]) 29 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 30 | 31 | 32 | 33 | _GLMCLASSIFIER_POSTEVALUATIONTRANSFORM = _descriptor.EnumDescriptor( 34 | name='PostEvaluationTransform', 35 | full_name='CoreML.Specification.GLMClassifier.PostEvaluationTransform', 36 | filename=None, 37 | file=DESCRIPTOR, 38 | values=[ 39 | _descriptor.EnumValueDescriptor( 40 | name='Logit', index=0, number=0, 41 | options=None, 42 | type=None), 43 | _descriptor.EnumValueDescriptor( 44 | name='Probit', index=1, number=1, 45 | options=None, 46 | type=None), 47 | ], 48 | containing_type=None, 49 | options=None, 50 | serialized_start=493, 51 | serialized_end=541, 52 | ) 53 | _sym_db.RegisterEnumDescriptor(_GLMCLASSIFIER_POSTEVALUATIONTRANSFORM) 54 | 55 | _GLMCLASSIFIER_CLASSENCODING = _descriptor.EnumDescriptor( 56 | name='ClassEncoding', 57 | full_name='CoreML.Specification.GLMClassifier.ClassEncoding', 58 | filename=None, 59 | file=DESCRIPTOR, 60 | values=[ 61 | _descriptor.EnumValueDescriptor( 62 | name='ReferenceClass', index=0, number=0, 63 | options=None, 64 | type=None), 65 | _descriptor.EnumValueDescriptor( 66 | name='OneVsRest', index=1, number=1, 67 | options=None, 68 | type=None), 69 | ], 70 | containing_type=None, 71 | options=None, 72 | serialized_start=543, 73 | serialized_end=593, 74 | ) 75 | _sym_db.RegisterEnumDescriptor(_GLMCLASSIFIER_CLASSENCODING) 76 | 77 | 78 | _GLMCLASSIFIER_DOUBLEARRAY = _descriptor.Descriptor( 79 | name='DoubleArray', 80 | full_name='CoreML.Specification.GLMClassifier.DoubleArray', 81 | filename=None, 82 | file=DESCRIPTOR, 83 | containing_type=None, 84 | fields=[ 85 | _descriptor.FieldDescriptor( 86 | name='value', full_name='CoreML.Specification.GLMClassifier.DoubleArray.value', index=0, 87 | number=1, type=1, cpp_type=5, label=3, 88 | has_default_value=False, default_value=[], 89 | message_type=None, enum_type=None, containing_type=None, 90 | is_extension=False, extension_scope=None, 91 | options=None), 92 | ], 93 | extensions=[ 94 | ], 95 | nested_types=[], 96 | enum_types=[ 97 | ], 98 | options=None, 99 | is_extendable=False, 100 | syntax='proto3', 101 | extension_ranges=[], 102 | oneofs=[ 103 | ], 104 | serialized_start=463, 105 | serialized_end=491, 106 | ) 107 | 108 | _GLMCLASSIFIER = _descriptor.Descriptor( 109 | name='GLMClassifier', 110 | full_name='CoreML.Specification.GLMClassifier', 111 | filename=None, 112 | file=DESCRIPTOR, 113 | containing_type=None, 114 | fields=[ 115 | _descriptor.FieldDescriptor( 116 | name='weights', full_name='CoreML.Specification.GLMClassifier.weights', index=0, 117 | number=1, type=11, cpp_type=10, label=3, 118 | has_default_value=False, default_value=[], 119 | message_type=None, enum_type=None, containing_type=None, 120 | is_extension=False, extension_scope=None, 121 | options=None), 122 | _descriptor.FieldDescriptor( 123 | name='offset', full_name='CoreML.Specification.GLMClassifier.offset', index=1, 124 | number=2, type=1, cpp_type=5, label=3, 125 | has_default_value=False, default_value=[], 126 | message_type=None, enum_type=None, containing_type=None, 127 | is_extension=False, extension_scope=None, 128 | options=None), 129 | _descriptor.FieldDescriptor( 130 | name='postEvaluationTransform', full_name='CoreML.Specification.GLMClassifier.postEvaluationTransform', index=2, 131 | number=3, type=14, cpp_type=8, label=1, 132 | has_default_value=False, default_value=0, 133 | message_type=None, enum_type=None, containing_type=None, 134 | is_extension=False, extension_scope=None, 135 | options=None), 136 | _descriptor.FieldDescriptor( 137 | name='classEncoding', full_name='CoreML.Specification.GLMClassifier.classEncoding', index=3, 138 | number=4, type=14, cpp_type=8, label=1, 139 | has_default_value=False, default_value=0, 140 | message_type=None, enum_type=None, containing_type=None, 141 | is_extension=False, extension_scope=None, 142 | options=None), 143 | _descriptor.FieldDescriptor( 144 | name='stringClassLabels', full_name='CoreML.Specification.GLMClassifier.stringClassLabels', index=4, 145 | number=100, type=11, cpp_type=10, label=1, 146 | has_default_value=False, default_value=None, 147 | message_type=None, enum_type=None, containing_type=None, 148 | is_extension=False, extension_scope=None, 149 | options=None), 150 | _descriptor.FieldDescriptor( 151 | name='int64ClassLabels', full_name='CoreML.Specification.GLMClassifier.int64ClassLabels', index=5, 152 | number=101, type=11, cpp_type=10, label=1, 153 | has_default_value=False, default_value=None, 154 | message_type=None, enum_type=None, containing_type=None, 155 | is_extension=False, extension_scope=None, 156 | options=None), 157 | ], 158 | extensions=[ 159 | ], 160 | nested_types=[_GLMCLASSIFIER_DOUBLEARRAY, ], 161 | enum_types=[ 162 | _GLMCLASSIFIER_POSTEVALUATIONTRANSFORM, 163 | _GLMCLASSIFIER_CLASSENCODING, 164 | ], 165 | options=None, 166 | is_extendable=False, 167 | syntax='proto3', 168 | extension_ranges=[], 169 | oneofs=[ 170 | _descriptor.OneofDescriptor( 171 | name='ClassLabels', full_name='CoreML.Specification.GLMClassifier.ClassLabels', 172 | index=0, containing_type=None, fields=[]), 173 | ], 174 | serialized_start=68, 175 | serialized_end=608, 176 | ) 177 | 178 | _GLMCLASSIFIER_DOUBLEARRAY.containing_type = _GLMCLASSIFIER 179 | _GLMCLASSIFIER.fields_by_name['weights'].message_type = _GLMCLASSIFIER_DOUBLEARRAY 180 | _GLMCLASSIFIER.fields_by_name['postEvaluationTransform'].enum_type = _GLMCLASSIFIER_POSTEVALUATIONTRANSFORM 181 | _GLMCLASSIFIER.fields_by_name['classEncoding'].enum_type = _GLMCLASSIFIER_CLASSENCODING 182 | _GLMCLASSIFIER.fields_by_name['stringClassLabels'].message_type = DataStructures__pb2._STRINGVECTOR 183 | _GLMCLASSIFIER.fields_by_name['int64ClassLabels'].message_type = DataStructures__pb2._INT64VECTOR 184 | _GLMCLASSIFIER_POSTEVALUATIONTRANSFORM.containing_type = _GLMCLASSIFIER 185 | _GLMCLASSIFIER_CLASSENCODING.containing_type = _GLMCLASSIFIER 186 | _GLMCLASSIFIER.oneofs_by_name['ClassLabels'].fields.append( 187 | _GLMCLASSIFIER.fields_by_name['stringClassLabels']) 188 | _GLMCLASSIFIER.fields_by_name['stringClassLabels'].containing_oneof = _GLMCLASSIFIER.oneofs_by_name['ClassLabels'] 189 | _GLMCLASSIFIER.oneofs_by_name['ClassLabels'].fields.append( 190 | _GLMCLASSIFIER.fields_by_name['int64ClassLabels']) 191 | _GLMCLASSIFIER.fields_by_name['int64ClassLabels'].containing_oneof = _GLMCLASSIFIER.oneofs_by_name['ClassLabels'] 192 | DESCRIPTOR.message_types_by_name['GLMClassifier'] = _GLMCLASSIFIER 193 | 194 | GLMClassifier = _reflection.GeneratedProtocolMessageType('GLMClassifier', (_message.Message,), dict( 195 | 196 | DoubleArray = _reflection.GeneratedProtocolMessageType('DoubleArray', (_message.Message,), dict( 197 | DESCRIPTOR = _GLMCLASSIFIER_DOUBLEARRAY, 198 | __module__ = 'GLMClassifier_pb2' 199 | # @@protoc_insertion_point(class_scope:CoreML.Specification.GLMClassifier.DoubleArray) 200 | )) 201 | , 202 | DESCRIPTOR = _GLMCLASSIFIER, 203 | __module__ = 'GLMClassifier_pb2' 204 | # @@protoc_insertion_point(class_scope:CoreML.Specification.GLMClassifier) 205 | )) 206 | _sym_db.RegisterMessage(GLMClassifier) 207 | _sym_db.RegisterMessage(GLMClassifier.DoubleArray) 208 | 209 | 210 | DESCRIPTOR.has_options = True 211 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 212 | # @@protoc_insertion_point(module_scope) 213 | -------------------------------------------------------------------------------- /proto/GLMRegressor_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: GLMRegressor.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='GLMRegressor.proto', 20 | package='CoreML.Specification', 21 | syntax='proto3', 22 | serialized_pb=_b('\n\x12GLMRegressor.proto\x12\x14\x43oreML.Specification\"\x9d\x02\n\x0cGLMRegressor\x12?\n\x07weights\x18\x01 \x03(\x0b\x32..CoreML.Specification.GLMRegressor.DoubleArray\x12\x0e\n\x06offset\x18\x02 \x03(\x01\x12[\n\x17postEvaluationTransform\x18\x03 \x01(\x0e\x32:.CoreML.Specification.GLMRegressor.PostEvaluationTransform\x1a\x1c\n\x0b\x44oubleArray\x12\r\n\x05value\x18\x01 \x03(\x01\"A\n\x17PostEvaluationTransform\x12\x0f\n\x0bNoTransform\x10\x00\x12\t\n\x05Logit\x10\x01\x12\n\n\x06Probit\x10\x02\x42\x02H\x03\x62\x06proto3') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | _GLMREGRESSOR_POSTEVALUATIONTRANSFORM = _descriptor.EnumDescriptor( 29 | name='PostEvaluationTransform', 30 | full_name='CoreML.Specification.GLMRegressor.PostEvaluationTransform', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | values=[ 34 | _descriptor.EnumValueDescriptor( 35 | name='NoTransform', index=0, number=0, 36 | options=None, 37 | type=None), 38 | _descriptor.EnumValueDescriptor( 39 | name='Logit', index=1, number=1, 40 | options=None, 41 | type=None), 42 | _descriptor.EnumValueDescriptor( 43 | name='Probit', index=2, number=2, 44 | options=None, 45 | type=None), 46 | ], 47 | containing_type=None, 48 | options=None, 49 | serialized_start=265, 50 | serialized_end=330, 51 | ) 52 | _sym_db.RegisterEnumDescriptor(_GLMREGRESSOR_POSTEVALUATIONTRANSFORM) 53 | 54 | 55 | _GLMREGRESSOR_DOUBLEARRAY = _descriptor.Descriptor( 56 | name='DoubleArray', 57 | full_name='CoreML.Specification.GLMRegressor.DoubleArray', 58 | filename=None, 59 | file=DESCRIPTOR, 60 | containing_type=None, 61 | fields=[ 62 | _descriptor.FieldDescriptor( 63 | name='value', full_name='CoreML.Specification.GLMRegressor.DoubleArray.value', index=0, 64 | number=1, type=1, cpp_type=5, label=3, 65 | has_default_value=False, default_value=[], 66 | message_type=None, enum_type=None, containing_type=None, 67 | is_extension=False, extension_scope=None, 68 | options=None), 69 | ], 70 | extensions=[ 71 | ], 72 | nested_types=[], 73 | enum_types=[ 74 | ], 75 | options=None, 76 | is_extendable=False, 77 | syntax='proto3', 78 | extension_ranges=[], 79 | oneofs=[ 80 | ], 81 | serialized_start=235, 82 | serialized_end=263, 83 | ) 84 | 85 | _GLMREGRESSOR = _descriptor.Descriptor( 86 | name='GLMRegressor', 87 | full_name='CoreML.Specification.GLMRegressor', 88 | filename=None, 89 | file=DESCRIPTOR, 90 | containing_type=None, 91 | fields=[ 92 | _descriptor.FieldDescriptor( 93 | name='weights', full_name='CoreML.Specification.GLMRegressor.weights', index=0, 94 | number=1, type=11, cpp_type=10, label=3, 95 | has_default_value=False, default_value=[], 96 | message_type=None, enum_type=None, containing_type=None, 97 | is_extension=False, extension_scope=None, 98 | options=None), 99 | _descriptor.FieldDescriptor( 100 | name='offset', full_name='CoreML.Specification.GLMRegressor.offset', index=1, 101 | number=2, type=1, cpp_type=5, label=3, 102 | has_default_value=False, default_value=[], 103 | message_type=None, enum_type=None, containing_type=None, 104 | is_extension=False, extension_scope=None, 105 | options=None), 106 | _descriptor.FieldDescriptor( 107 | name='postEvaluationTransform', full_name='CoreML.Specification.GLMRegressor.postEvaluationTransform', index=2, 108 | number=3, type=14, cpp_type=8, label=1, 109 | has_default_value=False, default_value=0, 110 | message_type=None, enum_type=None, containing_type=None, 111 | is_extension=False, extension_scope=None, 112 | options=None), 113 | ], 114 | extensions=[ 115 | ], 116 | nested_types=[_GLMREGRESSOR_DOUBLEARRAY, ], 117 | enum_types=[ 118 | _GLMREGRESSOR_POSTEVALUATIONTRANSFORM, 119 | ], 120 | options=None, 121 | is_extendable=False, 122 | syntax='proto3', 123 | extension_ranges=[], 124 | oneofs=[ 125 | ], 126 | serialized_start=45, 127 | serialized_end=330, 128 | ) 129 | 130 | _GLMREGRESSOR_DOUBLEARRAY.containing_type = _GLMREGRESSOR 131 | _GLMREGRESSOR.fields_by_name['weights'].message_type = _GLMREGRESSOR_DOUBLEARRAY 132 | _GLMREGRESSOR.fields_by_name['postEvaluationTransform'].enum_type = _GLMREGRESSOR_POSTEVALUATIONTRANSFORM 133 | _GLMREGRESSOR_POSTEVALUATIONTRANSFORM.containing_type = _GLMREGRESSOR 134 | DESCRIPTOR.message_types_by_name['GLMRegressor'] = _GLMREGRESSOR 135 | 136 | GLMRegressor = _reflection.GeneratedProtocolMessageType('GLMRegressor', (_message.Message,), dict( 137 | 138 | DoubleArray = _reflection.GeneratedProtocolMessageType('DoubleArray', (_message.Message,), dict( 139 | DESCRIPTOR = _GLMREGRESSOR_DOUBLEARRAY, 140 | __module__ = 'GLMRegressor_pb2' 141 | # @@protoc_insertion_point(class_scope:CoreML.Specification.GLMRegressor.DoubleArray) 142 | )) 143 | , 144 | DESCRIPTOR = _GLMREGRESSOR, 145 | __module__ = 'GLMRegressor_pb2' 146 | # @@protoc_insertion_point(class_scope:CoreML.Specification.GLMRegressor) 147 | )) 148 | _sym_db.RegisterMessage(GLMRegressor) 149 | _sym_db.RegisterMessage(GLMRegressor.DoubleArray) 150 | 151 | 152 | DESCRIPTOR.has_options = True 153 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 154 | # @@protoc_insertion_point(module_scope) 155 | -------------------------------------------------------------------------------- /proto/Identity_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: Identity.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='Identity.proto', 20 | package='CoreML.Specification', 21 | syntax='proto3', 22 | serialized_pb=_b('\n\x0eIdentity.proto\x12\x14\x43oreML.Specification\"\n\n\x08IdentityB\x02H\x03\x62\x06proto3') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | 29 | _IDENTITY = _descriptor.Descriptor( 30 | name='Identity', 31 | full_name='CoreML.Specification.Identity', 32 | filename=None, 33 | file=DESCRIPTOR, 34 | containing_type=None, 35 | fields=[ 36 | ], 37 | extensions=[ 38 | ], 39 | nested_types=[], 40 | enum_types=[ 41 | ], 42 | options=None, 43 | is_extendable=False, 44 | syntax='proto3', 45 | extension_ranges=[], 46 | oneofs=[ 47 | ], 48 | serialized_start=40, 49 | serialized_end=50, 50 | ) 51 | 52 | DESCRIPTOR.message_types_by_name['Identity'] = _IDENTITY 53 | 54 | Identity = _reflection.GeneratedProtocolMessageType('Identity', (_message.Message,), dict( 55 | DESCRIPTOR = _IDENTITY, 56 | __module__ = 'Identity_pb2' 57 | # @@protoc_insertion_point(class_scope:CoreML.Specification.Identity) 58 | )) 59 | _sym_db.RegisterMessage(Identity) 60 | 61 | 62 | DESCRIPTOR.has_options = True 63 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 64 | # @@protoc_insertion_point(module_scope) 65 | -------------------------------------------------------------------------------- /proto/Imputer_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: Imputer.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import DataStructures_pb2 as DataStructures__pb2 17 | FeatureTypes__pb2 = DataStructures__pb2.FeatureTypes__pb2 18 | 19 | from DataStructures_pb2 import * 20 | 21 | DESCRIPTOR = _descriptor.FileDescriptor( 22 | name='Imputer.proto', 23 | package='CoreML.Specification', 24 | syntax='proto3', 25 | serialized_pb=_b('\n\rImputer.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\"\xf3\x03\n\x07Imputer\x12\x1c\n\x12imputedDoubleValue\x18\x01 \x01(\x01H\x00\x12\x1b\n\x11imputedInt64Value\x18\x02 \x01(\x03H\x00\x12\x1c\n\x12imputedStringValue\x18\x03 \x01(\tH\x00\x12@\n\x12imputedDoubleArray\x18\x04 \x01(\x0b\x32\".CoreML.Specification.DoubleVectorH\x00\x12>\n\x11imputedInt64Array\x18\x05 \x01(\x0b\x32!.CoreML.Specification.Int64VectorH\x00\x12J\n\x17imputedStringDictionary\x18\x06 \x01(\x0b\x32\'.CoreML.Specification.StringToDoubleMapH\x00\x12H\n\x16imputedInt64Dictionary\x18\x07 \x01(\x0b\x32&.CoreML.Specification.Int64ToDoubleMapH\x00\x12\x1c\n\x12replaceDoubleValue\x18\x0b \x01(\x01H\x01\x12\x1b\n\x11replaceInt64Value\x18\x0c \x01(\x03H\x01\x12\x1c\n\x12replaceStringValue\x18\r \x01(\tH\x01\x42\x0e\n\x0cImputedValueB\x0e\n\x0cReplaceValueB\x02H\x03P\x00\x62\x06proto3') 26 | , 27 | dependencies=[DataStructures__pb2.DESCRIPTOR,], 28 | public_dependencies=[DataStructures__pb2.DESCRIPTOR,]) 29 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 30 | 31 | 32 | 33 | 34 | _IMPUTER = _descriptor.Descriptor( 35 | name='Imputer', 36 | full_name='CoreML.Specification.Imputer', 37 | filename=None, 38 | file=DESCRIPTOR, 39 | containing_type=None, 40 | fields=[ 41 | _descriptor.FieldDescriptor( 42 | name='imputedDoubleValue', full_name='CoreML.Specification.Imputer.imputedDoubleValue', index=0, 43 | number=1, type=1, cpp_type=5, label=1, 44 | has_default_value=False, default_value=float(0), 45 | message_type=None, enum_type=None, containing_type=None, 46 | is_extension=False, extension_scope=None, 47 | options=None), 48 | _descriptor.FieldDescriptor( 49 | name='imputedInt64Value', full_name='CoreML.Specification.Imputer.imputedInt64Value', index=1, 50 | number=2, type=3, cpp_type=2, label=1, 51 | has_default_value=False, default_value=0, 52 | message_type=None, enum_type=None, containing_type=None, 53 | is_extension=False, extension_scope=None, 54 | options=None), 55 | _descriptor.FieldDescriptor( 56 | name='imputedStringValue', full_name='CoreML.Specification.Imputer.imputedStringValue', index=2, 57 | number=3, type=9, cpp_type=9, label=1, 58 | has_default_value=False, default_value=_b("").decode('utf-8'), 59 | message_type=None, enum_type=None, containing_type=None, 60 | is_extension=False, extension_scope=None, 61 | options=None), 62 | _descriptor.FieldDescriptor( 63 | name='imputedDoubleArray', full_name='CoreML.Specification.Imputer.imputedDoubleArray', index=3, 64 | number=4, type=11, cpp_type=10, label=1, 65 | has_default_value=False, default_value=None, 66 | message_type=None, enum_type=None, containing_type=None, 67 | is_extension=False, extension_scope=None, 68 | options=None), 69 | _descriptor.FieldDescriptor( 70 | name='imputedInt64Array', full_name='CoreML.Specification.Imputer.imputedInt64Array', index=4, 71 | number=5, type=11, cpp_type=10, label=1, 72 | has_default_value=False, default_value=None, 73 | message_type=None, enum_type=None, containing_type=None, 74 | is_extension=False, extension_scope=None, 75 | options=None), 76 | _descriptor.FieldDescriptor( 77 | name='imputedStringDictionary', full_name='CoreML.Specification.Imputer.imputedStringDictionary', index=5, 78 | number=6, type=11, cpp_type=10, label=1, 79 | has_default_value=False, default_value=None, 80 | message_type=None, enum_type=None, containing_type=None, 81 | is_extension=False, extension_scope=None, 82 | options=None), 83 | _descriptor.FieldDescriptor( 84 | name='imputedInt64Dictionary', full_name='CoreML.Specification.Imputer.imputedInt64Dictionary', index=6, 85 | number=7, type=11, cpp_type=10, label=1, 86 | has_default_value=False, default_value=None, 87 | message_type=None, enum_type=None, containing_type=None, 88 | is_extension=False, extension_scope=None, 89 | options=None), 90 | _descriptor.FieldDescriptor( 91 | name='replaceDoubleValue', full_name='CoreML.Specification.Imputer.replaceDoubleValue', index=7, 92 | number=11, type=1, cpp_type=5, label=1, 93 | has_default_value=False, default_value=float(0), 94 | message_type=None, enum_type=None, containing_type=None, 95 | is_extension=False, extension_scope=None, 96 | options=None), 97 | _descriptor.FieldDescriptor( 98 | name='replaceInt64Value', full_name='CoreML.Specification.Imputer.replaceInt64Value', index=8, 99 | number=12, type=3, cpp_type=2, label=1, 100 | has_default_value=False, default_value=0, 101 | message_type=None, enum_type=None, containing_type=None, 102 | is_extension=False, extension_scope=None, 103 | options=None), 104 | _descriptor.FieldDescriptor( 105 | name='replaceStringValue', full_name='CoreML.Specification.Imputer.replaceStringValue', index=9, 106 | number=13, type=9, cpp_type=9, label=1, 107 | has_default_value=False, default_value=_b("").decode('utf-8'), 108 | message_type=None, enum_type=None, containing_type=None, 109 | is_extension=False, extension_scope=None, 110 | options=None), 111 | ], 112 | extensions=[ 113 | ], 114 | nested_types=[], 115 | enum_types=[ 116 | ], 117 | options=None, 118 | is_extendable=False, 119 | syntax='proto3', 120 | extension_ranges=[], 121 | oneofs=[ 122 | _descriptor.OneofDescriptor( 123 | name='ImputedValue', full_name='CoreML.Specification.Imputer.ImputedValue', 124 | index=0, containing_type=None, fields=[]), 125 | _descriptor.OneofDescriptor( 126 | name='ReplaceValue', full_name='CoreML.Specification.Imputer.ReplaceValue', 127 | index=1, containing_type=None, fields=[]), 128 | ], 129 | serialized_start=62, 130 | serialized_end=561, 131 | ) 132 | 133 | _IMPUTER.fields_by_name['imputedDoubleArray'].message_type = DataStructures__pb2._DOUBLEVECTOR 134 | _IMPUTER.fields_by_name['imputedInt64Array'].message_type = DataStructures__pb2._INT64VECTOR 135 | _IMPUTER.fields_by_name['imputedStringDictionary'].message_type = DataStructures__pb2._STRINGTODOUBLEMAP 136 | _IMPUTER.fields_by_name['imputedInt64Dictionary'].message_type = DataStructures__pb2._INT64TODOUBLEMAP 137 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 138 | _IMPUTER.fields_by_name['imputedDoubleValue']) 139 | _IMPUTER.fields_by_name['imputedDoubleValue'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 140 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 141 | _IMPUTER.fields_by_name['imputedInt64Value']) 142 | _IMPUTER.fields_by_name['imputedInt64Value'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 143 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 144 | _IMPUTER.fields_by_name['imputedStringValue']) 145 | _IMPUTER.fields_by_name['imputedStringValue'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 146 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 147 | _IMPUTER.fields_by_name['imputedDoubleArray']) 148 | _IMPUTER.fields_by_name['imputedDoubleArray'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 149 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 150 | _IMPUTER.fields_by_name['imputedInt64Array']) 151 | _IMPUTER.fields_by_name['imputedInt64Array'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 152 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 153 | _IMPUTER.fields_by_name['imputedStringDictionary']) 154 | _IMPUTER.fields_by_name['imputedStringDictionary'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 155 | _IMPUTER.oneofs_by_name['ImputedValue'].fields.append( 156 | _IMPUTER.fields_by_name['imputedInt64Dictionary']) 157 | _IMPUTER.fields_by_name['imputedInt64Dictionary'].containing_oneof = _IMPUTER.oneofs_by_name['ImputedValue'] 158 | _IMPUTER.oneofs_by_name['ReplaceValue'].fields.append( 159 | _IMPUTER.fields_by_name['replaceDoubleValue']) 160 | _IMPUTER.fields_by_name['replaceDoubleValue'].containing_oneof = _IMPUTER.oneofs_by_name['ReplaceValue'] 161 | _IMPUTER.oneofs_by_name['ReplaceValue'].fields.append( 162 | _IMPUTER.fields_by_name['replaceInt64Value']) 163 | _IMPUTER.fields_by_name['replaceInt64Value'].containing_oneof = _IMPUTER.oneofs_by_name['ReplaceValue'] 164 | _IMPUTER.oneofs_by_name['ReplaceValue'].fields.append( 165 | _IMPUTER.fields_by_name['replaceStringValue']) 166 | _IMPUTER.fields_by_name['replaceStringValue'].containing_oneof = _IMPUTER.oneofs_by_name['ReplaceValue'] 167 | DESCRIPTOR.message_types_by_name['Imputer'] = _IMPUTER 168 | 169 | Imputer = _reflection.GeneratedProtocolMessageType('Imputer', (_message.Message,), dict( 170 | DESCRIPTOR = _IMPUTER, 171 | __module__ = 'Imputer_pb2' 172 | # @@protoc_insertion_point(class_scope:CoreML.Specification.Imputer) 173 | )) 174 | _sym_db.RegisterMessage(Imputer) 175 | 176 | 177 | DESCRIPTOR.has_options = True 178 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 179 | # @@protoc_insertion_point(module_scope) 180 | -------------------------------------------------------------------------------- /proto/Normalizer_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: Normalizer.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='Normalizer.proto', 20 | package='CoreML.Specification', 21 | syntax='proto3', 22 | serialized_pb=_b('\n\x10Normalizer.proto\x12\x14\x43oreML.Specification\"o\n\nNormalizer\x12;\n\x08normType\x18\x01 \x01(\x0e\x32).CoreML.Specification.Normalizer.NormType\"$\n\x08NormType\x12\x08\n\x04LMax\x10\x00\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\x42\x02H\x03\x62\x06proto3') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | _NORMALIZER_NORMTYPE = _descriptor.EnumDescriptor( 29 | name='NormType', 30 | full_name='CoreML.Specification.Normalizer.NormType', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | values=[ 34 | _descriptor.EnumValueDescriptor( 35 | name='LMax', index=0, number=0, 36 | options=None, 37 | type=None), 38 | _descriptor.EnumValueDescriptor( 39 | name='L1', index=1, number=1, 40 | options=None, 41 | type=None), 42 | _descriptor.EnumValueDescriptor( 43 | name='L2', index=2, number=2, 44 | options=None, 45 | type=None), 46 | ], 47 | containing_type=None, 48 | options=None, 49 | serialized_start=117, 50 | serialized_end=153, 51 | ) 52 | _sym_db.RegisterEnumDescriptor(_NORMALIZER_NORMTYPE) 53 | 54 | 55 | _NORMALIZER = _descriptor.Descriptor( 56 | name='Normalizer', 57 | full_name='CoreML.Specification.Normalizer', 58 | filename=None, 59 | file=DESCRIPTOR, 60 | containing_type=None, 61 | fields=[ 62 | _descriptor.FieldDescriptor( 63 | name='normType', full_name='CoreML.Specification.Normalizer.normType', index=0, 64 | number=1, type=14, cpp_type=8, label=1, 65 | has_default_value=False, default_value=0, 66 | message_type=None, enum_type=None, containing_type=None, 67 | is_extension=False, extension_scope=None, 68 | options=None), 69 | ], 70 | extensions=[ 71 | ], 72 | nested_types=[], 73 | enum_types=[ 74 | _NORMALIZER_NORMTYPE, 75 | ], 76 | options=None, 77 | is_extendable=False, 78 | syntax='proto3', 79 | extension_ranges=[], 80 | oneofs=[ 81 | ], 82 | serialized_start=42, 83 | serialized_end=153, 84 | ) 85 | 86 | _NORMALIZER.fields_by_name['normType'].enum_type = _NORMALIZER_NORMTYPE 87 | _NORMALIZER_NORMTYPE.containing_type = _NORMALIZER 88 | DESCRIPTOR.message_types_by_name['Normalizer'] = _NORMALIZER 89 | 90 | Normalizer = _reflection.GeneratedProtocolMessageType('Normalizer', (_message.Message,), dict( 91 | DESCRIPTOR = _NORMALIZER, 92 | __module__ = 'Normalizer_pb2' 93 | # @@protoc_insertion_point(class_scope:CoreML.Specification.Normalizer) 94 | )) 95 | _sym_db.RegisterMessage(Normalizer) 96 | 97 | 98 | DESCRIPTOR.has_options = True 99 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 100 | # @@protoc_insertion_point(module_scope) 101 | -------------------------------------------------------------------------------- /proto/OneHotEncoder_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: OneHotEncoder.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | import DataStructures_pb2 as DataStructures__pb2 17 | FeatureTypes__pb2 = DataStructures__pb2.FeatureTypes__pb2 18 | 19 | from DataStructures_pb2 import * 20 | 21 | DESCRIPTOR = _descriptor.FileDescriptor( 22 | name='OneHotEncoder.proto', 23 | package='CoreML.Specification', 24 | syntax='proto3', 25 | serialized_pb=_b('\n\x13OneHotEncoder.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\"\xb5\x02\n\rOneHotEncoder\x12>\n\x10stringCategories\x18\x01 \x01(\x0b\x32\".CoreML.Specification.StringVectorH\x00\x12<\n\x0fint64Categories\x18\x02 \x01(\x0b\x32!.CoreML.Specification.Int64VectorH\x00\x12\x14\n\x0coutputSparse\x18\n \x01(\x08\x12H\n\rhandleUnknown\x18\x0b \x01(\x0e\x32\x31.CoreML.Specification.OneHotEncoder.HandleUnknown\"6\n\rHandleUnknown\x12\x12\n\x0e\x45rrorOnUnknown\x10\x00\x12\x11\n\rIgnoreUnknown\x10\x01\x42\x0e\n\x0c\x43\x61tegoryTypeB\x02H\x03P\x00\x62\x06proto3') 26 | , 27 | dependencies=[DataStructures__pb2.DESCRIPTOR,], 28 | public_dependencies=[DataStructures__pb2.DESCRIPTOR,]) 29 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 30 | 31 | 32 | 33 | _ONEHOTENCODER_HANDLEUNKNOWN = _descriptor.EnumDescriptor( 34 | name='HandleUnknown', 35 | full_name='CoreML.Specification.OneHotEncoder.HandleUnknown', 36 | filename=None, 37 | file=DESCRIPTOR, 38 | values=[ 39 | _descriptor.EnumValueDescriptor( 40 | name='ErrorOnUnknown', index=0, number=0, 41 | options=None, 42 | type=None), 43 | _descriptor.EnumValueDescriptor( 44 | name='IgnoreUnknown', index=1, number=1, 45 | options=None, 46 | type=None), 47 | ], 48 | containing_type=None, 49 | options=None, 50 | serialized_start=307, 51 | serialized_end=361, 52 | ) 53 | _sym_db.RegisterEnumDescriptor(_ONEHOTENCODER_HANDLEUNKNOWN) 54 | 55 | 56 | _ONEHOTENCODER = _descriptor.Descriptor( 57 | name='OneHotEncoder', 58 | full_name='CoreML.Specification.OneHotEncoder', 59 | filename=None, 60 | file=DESCRIPTOR, 61 | containing_type=None, 62 | fields=[ 63 | _descriptor.FieldDescriptor( 64 | name='stringCategories', full_name='CoreML.Specification.OneHotEncoder.stringCategories', index=0, 65 | number=1, type=11, cpp_type=10, label=1, 66 | has_default_value=False, default_value=None, 67 | message_type=None, enum_type=None, containing_type=None, 68 | is_extension=False, extension_scope=None, 69 | options=None), 70 | _descriptor.FieldDescriptor( 71 | name='int64Categories', full_name='CoreML.Specification.OneHotEncoder.int64Categories', index=1, 72 | number=2, type=11, cpp_type=10, label=1, 73 | has_default_value=False, default_value=None, 74 | message_type=None, enum_type=None, containing_type=None, 75 | is_extension=False, extension_scope=None, 76 | options=None), 77 | _descriptor.FieldDescriptor( 78 | name='outputSparse', full_name='CoreML.Specification.OneHotEncoder.outputSparse', index=2, 79 | number=10, type=8, cpp_type=7, label=1, 80 | has_default_value=False, default_value=False, 81 | message_type=None, enum_type=None, containing_type=None, 82 | is_extension=False, extension_scope=None, 83 | options=None), 84 | _descriptor.FieldDescriptor( 85 | name='handleUnknown', full_name='CoreML.Specification.OneHotEncoder.handleUnknown', index=3, 86 | number=11, type=14, cpp_type=8, label=1, 87 | has_default_value=False, default_value=0, 88 | message_type=None, enum_type=None, containing_type=None, 89 | is_extension=False, extension_scope=None, 90 | options=None), 91 | ], 92 | extensions=[ 93 | ], 94 | nested_types=[], 95 | enum_types=[ 96 | _ONEHOTENCODER_HANDLEUNKNOWN, 97 | ], 98 | options=None, 99 | is_extendable=False, 100 | syntax='proto3', 101 | extension_ranges=[], 102 | oneofs=[ 103 | _descriptor.OneofDescriptor( 104 | name='CategoryType', full_name='CoreML.Specification.OneHotEncoder.CategoryType', 105 | index=0, containing_type=None, fields=[]), 106 | ], 107 | serialized_start=68, 108 | serialized_end=377, 109 | ) 110 | 111 | _ONEHOTENCODER.fields_by_name['stringCategories'].message_type = DataStructures__pb2._STRINGVECTOR 112 | _ONEHOTENCODER.fields_by_name['int64Categories'].message_type = DataStructures__pb2._INT64VECTOR 113 | _ONEHOTENCODER.fields_by_name['handleUnknown'].enum_type = _ONEHOTENCODER_HANDLEUNKNOWN 114 | _ONEHOTENCODER_HANDLEUNKNOWN.containing_type = _ONEHOTENCODER 115 | _ONEHOTENCODER.oneofs_by_name['CategoryType'].fields.append( 116 | _ONEHOTENCODER.fields_by_name['stringCategories']) 117 | _ONEHOTENCODER.fields_by_name['stringCategories'].containing_oneof = _ONEHOTENCODER.oneofs_by_name['CategoryType'] 118 | _ONEHOTENCODER.oneofs_by_name['CategoryType'].fields.append( 119 | _ONEHOTENCODER.fields_by_name['int64Categories']) 120 | _ONEHOTENCODER.fields_by_name['int64Categories'].containing_oneof = _ONEHOTENCODER.oneofs_by_name['CategoryType'] 121 | DESCRIPTOR.message_types_by_name['OneHotEncoder'] = _ONEHOTENCODER 122 | 123 | OneHotEncoder = _reflection.GeneratedProtocolMessageType('OneHotEncoder', (_message.Message,), dict( 124 | DESCRIPTOR = _ONEHOTENCODER, 125 | __module__ = 'OneHotEncoder_pb2' 126 | # @@protoc_insertion_point(class_scope:CoreML.Specification.OneHotEncoder) 127 | )) 128 | _sym_db.RegisterMessage(OneHotEncoder) 129 | 130 | 131 | DESCRIPTOR.has_options = True 132 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 133 | # @@protoc_insertion_point(module_scope) 134 | -------------------------------------------------------------------------------- /proto/Scaler_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: Scaler.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='Scaler.proto', 20 | package='CoreML.Specification', 21 | syntax='proto3', 22 | serialized_pb=_b('\n\x0cScaler.proto\x12\x14\x43oreML.Specification\"0\n\x06Scaler\x12\x12\n\nshiftValue\x18\x01 \x03(\x01\x12\x12\n\nscaleValue\x18\x02 \x03(\x01\x42\x02H\x03\x62\x06proto3') 23 | ) 24 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 25 | 26 | 27 | 28 | 29 | _SCALER = _descriptor.Descriptor( 30 | name='Scaler', 31 | full_name='CoreML.Specification.Scaler', 32 | filename=None, 33 | file=DESCRIPTOR, 34 | containing_type=None, 35 | fields=[ 36 | _descriptor.FieldDescriptor( 37 | name='shiftValue', full_name='CoreML.Specification.Scaler.shiftValue', index=0, 38 | number=1, type=1, cpp_type=5, label=3, 39 | has_default_value=False, default_value=[], 40 | message_type=None, enum_type=None, containing_type=None, 41 | is_extension=False, extension_scope=None, 42 | options=None), 43 | _descriptor.FieldDescriptor( 44 | name='scaleValue', full_name='CoreML.Specification.Scaler.scaleValue', index=1, 45 | number=2, type=1, cpp_type=5, label=3, 46 | has_default_value=False, default_value=[], 47 | message_type=None, enum_type=None, containing_type=None, 48 | is_extension=False, extension_scope=None, 49 | options=None), 50 | ], 51 | extensions=[ 52 | ], 53 | nested_types=[], 54 | enum_types=[ 55 | ], 56 | options=None, 57 | is_extendable=False, 58 | syntax='proto3', 59 | extension_ranges=[], 60 | oneofs=[ 61 | ], 62 | serialized_start=38, 63 | serialized_end=86, 64 | ) 65 | 66 | DESCRIPTOR.message_types_by_name['Scaler'] = _SCALER 67 | 68 | Scaler = _reflection.GeneratedProtocolMessageType('Scaler', (_message.Message,), dict( 69 | DESCRIPTOR = _SCALER, 70 | __module__ = 'Scaler_pb2' 71 | # @@protoc_insertion_point(class_scope:CoreML.Specification.Scaler) 72 | )) 73 | _sym_db.RegisterMessage(Scaler) 74 | 75 | 76 | DESCRIPTOR.has_options = True 77 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) 78 | # @@protoc_insertion_point(module_scope) 79 | -------------------------------------------------------------------------------- /proto/__init__.py: -------------------------------------------------------------------------------- 1 | ### Module for proto generated Python code. 2 | --------------------------------------------------------------------------------