├── sklearn_pmml ├── convert │ ├── test │ │ ├── __init__.py │ │ ├── jpmml-csv-evaluator │ │ │ ├── README.md │ │ │ ├── pom.xml │ │ │ └── src │ │ │ │ └── main │ │ │ │ └── java │ │ │ │ └── sklearn │ │ │ │ └── pmml │ │ │ │ └── jpmml │ │ │ │ └── JPMMLCSVEvaluator.java │ │ ├── test_randomForestConverter.py │ │ ├── test_derived_fields.py │ │ ├── test_gradientBoostingConverter.py │ │ ├── test_decisionTreeClassifierConverter.py │ │ └── jpmml_test.py │ ├── __init__.py │ ├── random_forest.py │ ├── features.py │ ├── utils.py │ ├── tree.py │ ├── gbrt.py │ └── model.py ├── __init__.py └── test │ ├── data │ └── gradient_boosting_classifier │ │ ├── context.pkl │ │ ├── document.pmml │ │ └── estimator.pkl │ └── __init__.py ├── MANIFEST.in ├── .travis.yml ├── .gitignore ├── setup.py ├── LICENSE ├── README.md └── examples └── pmml ├── DecisionTreeClassifier.pmml ├── GradientBoostingClassifier.pmml └── RandomForestClassifier.pmml /sklearn_pmml/convert/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sklearn_pmml/__init__.py: -------------------------------------------------------------------------------- 1 | from sklearn_pmml.convert import * -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt -------------------------------------------------------------------------------- /sklearn_pmml/convert/__init__.py: -------------------------------------------------------------------------------- 1 | from sklearn_pmml import pmml 2 | from sklearn_pmml.convert.features import Feature, NumericFeature, CategoricalFeature, RealNumericFeature 3 | from sklearn_pmml.convert.gbrt import * 4 | from sklearn_pmml.convert.tree import * 5 | from sklearn_pmml.convert.random_forest import * 6 | from sklearn_pmml.convert.model import * 7 | from sklearn_pmml.convert.utils import * 8 | 9 | 10 | __all__ = ['TransformationContext', 'EstimatorConverter', 'find_converter', 'GradientBoostingConverter', 'LogOddsEstimatorConverter', 'DecisionTreeConverter', 'features'] 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/jpmml-csv-evaluator/README.md: -------------------------------------------------------------------------------- 1 | # About 2 | This is a simple [JPMML](http://github.com/jpmml)-based CLI evaluator for PMML models. 3 | 4 | # Notes 5 | This submodule relies on AGPL library [jpmml-evaluator](http://github.com/jpmml/jpmml-evaluator), 6 | but it's only used for testing and it's not a part of sklearn-pmml distribution. 7 | Since users will not interact with AGPL-licensed library, I think it's OK to use it in tests. 8 | 9 | # Usage 10 | 1. Build the JAR file (make sure you have JDK8 installed): 11 | ``` 12 | mvn clean package 13 | ``` 14 | 2. Run with maven: 15 | ``` 16 | mvn exec:java -e -q \ 17 | -Dexec.mainClass=sklearn.pmml.jpmml.JPMMLCSVEvaluator \ 18 | -Dexec.args=/path/to/pmml /path/to/input.csv /path/to/output.csv 19 | ``` -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.4" 5 | # - "nightly" 6 | # command to install dependencies 7 | before_install: 8 | - sudo add-apt-repository ppa:webupd8team/java -y 9 | - sudo apt-get update -qq 10 | - sudo apt-get install oracle-java8-installer 11 | - sudo apt-get install maven 12 | - export PATH=/usr/bin:$PATH 13 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 14 | - chmod +x miniconda.sh 15 | - ./miniconda.sh -b 16 | - export PATH=/home/travis/miniconda/bin:$PATH 17 | - conda update --yes conda 18 | # install the heaviest dependencies with conda to save some time 19 | - travis_retry conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy scikit-learn pandas lxml 20 | 21 | install: 22 | - travis_retry pip install . 23 | # command to run tests 24 | script: python setup.py test 25 | cache: apt 26 | -------------------------------------------------------------------------------- /sklearn_pmml/test/data/gradient_boosting_classifier/context.pkl: -------------------------------------------------------------------------------- 1 | ccopy_reg 2 | _reconstructor 3 | p1 4 | (csklearn_pmml.convert 5 | TransformationContext 6 | p2 7 | c__builtin__ 8 | object 9 | p3 10 | NtRp4 11 | (dp5 12 | S'schemas' 13 | p6 14 | (dp7 15 | S'output' 16 | p8 17 | (lp9 18 | g1 19 | (csklearn_pmml.convert.features 20 | RealNumericFeature 21 | p10 22 | g3 23 | NtRp11 24 | (dp12 25 | S'_namespace' 26 | p13 27 | S'' 28 | sS'_invalid_value_treatment' 29 | p14 30 | S'asIs' 31 | p15 32 | sS'_name' 33 | p16 34 | g8 35 | sbasS'input' 36 | p17 37 | (lp18 38 | g1 39 | (csklearn_pmml.convert.features 40 | IntegerNumericFeature 41 | p19 42 | g3 43 | NtRp20 44 | (dp21 45 | g13 46 | S'' 47 | sg14 48 | g15 49 | sg16 50 | S'x1' 51 | p22 52 | sbag1 53 | (csklearn_pmml.convert.features 54 | StringCategoricalFeature 55 | p23 56 | g3 57 | NtRp24 58 | (dp25 59 | S'value_list' 60 | p26 61 | (lp27 62 | S'zero' 63 | p28 64 | aS'one' 65 | p29 66 | asg13 67 | S'' 68 | sg14 69 | g15 70 | sg16 71 | S'x2' 72 | p30 73 | sbassb. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | #java/intellij stuff 60 | *.iml 61 | *.class 62 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Command 2 | 3 | 4 | class PyTest(Command): 5 | user_options = [] 6 | 7 | def initialize_options(self): 8 | pass 9 | 10 | def finalize_options(self): 11 | pass 12 | 13 | def run(self): 14 | import subprocess 15 | import sys 16 | errno = subprocess.call([sys.executable, 'runtests.py']) 17 | raise SystemExit(errno) 18 | 19 | setup( 20 | name='sklearn-pmml', 21 | version='0.1.2', 22 | packages=['sklearn_pmml', 'sklearn_pmml.convert'], 23 | install_requires=[ 24 | "pyxb", 25 | "scikit-learn", 26 | "pandas", 27 | "scipy", 28 | "pytest", 29 | "lxml", 30 | "enum34", 31 | ], 32 | cmdclass={'test': PyTest}, 33 | url='https://github.com/alex-pirozhenko/sklearn-pmml', 34 | license='MIT', 35 | author='Alex Pirozhenko', 36 | author_email='apirozhenko@pulsepoint.com', 37 | description='A library that allows serialization of SciKit-Learn estimators into PMML' 38 | ) 39 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/test_randomForestConverter.py: -------------------------------------------------------------------------------- 1 | from sklearn_pmml.convert import IntegerCategoricalFeature 2 | from sklearn_pmml.convert.test.jpmml_test import JPMMLClassificationTest, JPMMLTest, TARGET_NAME 3 | from unittest import TestCase 4 | from sklearn.ensemble import RandomForestClassifier 5 | 6 | __author__ = 'evancox' 7 | 8 | 9 | from sklearn_pmml.convert.random_forest import RandomForestClassifierConverter 10 | 11 | 12 | class TestRandomForestClassifierParity(TestCase, JPMMLClassificationTest): 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | if JPMMLTest.can_run(): 17 | JPMMLTest.init_jpmml() 18 | 19 | def setUp(self): 20 | self.model = RandomForestClassifier( 21 | n_estimators=3, 22 | max_depth=3 23 | ) 24 | self.init_data() 25 | self.converter = RandomForestClassifierConverter( 26 | estimator=self.model, 27 | context=self.ctx 28 | ) 29 | 30 | @property 31 | def output(self): 32 | return IntegerCategoricalFeature(name=TARGET_NAME, value_list=[0, 1, 2]) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 alex-pirozhenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/alex-pirozhenko/sklearn-pmml.svg)](https://travis-ci.org/alex-pirozhenko/sklearn-pmml) 2 | [![Join the chat at https://gitter.im/alex-pirozhenko/sklearn-pmml](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/alex-pirozhenko/sklearn-pmml?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 3 | 4 | # sklearn-pmml 5 | 6 | A library that allows serialization of SciKit-Learn estimators into PMML 7 | 8 | # Installation 9 | The easiest way is to use pip: 10 | ``` 11 | pip install sklearn-pmml 12 | ``` 13 | 14 | # Supported models 15 | - DecisionTreeClassifier 16 | - DecisionTreeRegressor 17 | - GradientBoostingClassifier 18 | - RandomForestClassifier 19 | 20 | # PMML output 21 | 22 | ## Classification 23 | Classifier converters can only operate with categorical outputs, and for each categorical output variable ```varname``` 24 | the PMML output contains the following outputs: 25 | - categorical ```varname``` for the predicted label of the instance 26 | - double ```varname.label``` for the probability for a given label 27 | 28 | ## Regression 29 | Regression model PMML outputs the numeric response variable named as the output variable 30 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/jpmml-csv-evaluator/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | sklearn.pmml.jpmml 8 | jpmml-csv-evaluator 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | 14 | org.jpmml 15 | pmml-evaluator 16 | 17 | 1.2.5 18 | 19 | 20 | 21 | org.jpmml 22 | pmml-model 23 | 24 | 1.2.6 25 | 26 | 27 | 28 | net.sf.supercsv 29 | super-csv 30 | 2.0.1 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | org.apache.maven.plugins 42 | maven-compiler-plugin 43 | 3.0 44 | 45 | 1.7 46 | 1.7 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/random_forest.py: -------------------------------------------------------------------------------- 1 | from sklearn_pmml.convert import CategoricalFeature 2 | 3 | __author__ = 'evancox' 4 | 5 | 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn_pmml.convert.model import Schema, ModelMode, ClassifierConverter 8 | from sklearn_pmml.convert.tree import DecisionTreeConverter 9 | from sklearn_pmml.convert.utils import estimator_to_converter 10 | 11 | import sklearn_pmml.pmml as pmml 12 | 13 | 14 | class RandomForestClassifierConverter(ClassifierConverter): 15 | def __init__(self, estimator, context): 16 | super(RandomForestClassifierConverter, self).__init__(estimator, context) 17 | assert isinstance(estimator, RandomForestClassifier), \ 18 | 'This converter can only process RandomForestClassifier instances' 19 | assert len(context.schemas[Schema.OUTPUT]) == 1, 'Only one-label classification is supported' 20 | 21 | def model(self, verification_data=None): 22 | mining_model = pmml.MiningModel(functionName=ModelMode.CLASSIFICATION.value) 23 | mining_model.append(self.mining_schema()) 24 | mining_model.append(self.output()) 25 | mining_model.append(self.segmentation()) 26 | if verification_data is not None: 27 | mining_model.append(self.model_verification(verification_data)) 28 | return mining_model 29 | 30 | def segmentation(self): 31 | """ 32 | Build a segmentation (sequence of estimators) 33 | :return: Segmentation element 34 | """ 35 | segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage") 36 | 37 | for index, est in enumerate(self.estimator.estimators_): 38 | s = pmml.Segment(id=index) 39 | s.append(pmml.True_()) 40 | s.append(DecisionTreeConverter(est, self.context, ModelMode.CLASSIFICATION)._model()) 41 | segmentation.append(s) 42 | 43 | return segmentation 44 | 45 | 46 | estimator_to_converter[RandomForestClassifier] = RandomForestClassifierConverter -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/test_derived_fields.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sklearn.tree import DecisionTreeClassifier 3 | from sklearn_pmml import EstimatorConverter, TransformationContext, pmml 4 | from sklearn_pmml.convert import Schema, ModelMode 5 | from sklearn_pmml.convert.features import * 6 | import numpy as np 7 | 8 | test_cases = [ 9 | ( 10 | [ 11 | RealNumericFeature(name='f1'), 12 | ], 13 | [ 14 | DerivedFeature( 15 | feature=RealNumericFeature(name='f2'), 16 | transformation=pmml.Discretize(mapMissingTo=0, defaultValue=1, field='f1'), 17 | function=np.vectorize(lambda f1: 0 if f1 is None else 1) 18 | ) 19 | ], 20 | [RealNumericFeature(name='f3')], 21 | 22 | '' 23 | '' 24 | '' 25 | '' 26 | '', 27 | 28 | '' 29 | '' 30 | '' 31 | '' 32 | '' 33 | '' 34 | ) 35 | ] 36 | 37 | @pytest.mark.parametrize("input_fields,derived_fields,output_fields,expected_data_dictionary,expected_transformation_dictionary", test_cases) 38 | def test_transformation_dictionary(input_fields, derived_fields, output_fields, expected_data_dictionary, expected_transformation_dictionary): 39 | converter = EstimatorConverter( 40 | DecisionTreeClassifier(), 41 | context=TransformationContext({ 42 | Schema.INPUT: input_fields, 43 | Schema.DERIVED: derived_fields, 44 | Schema.MODEL: input_fields + derived_fields, 45 | Schema.OUTPUT: output_fields 46 | }), 47 | mode=ModelMode.CLASSIFICATION 48 | ) 49 | 50 | assert converter.data_dictionary().toxml() == expected_data_dictionary, 'Error in data dictionary generation' 51 | assert converter.transformation_dictionary().toxml() == expected_transformation_dictionary,\ 52 | 'Error in transformation dictionary generation' -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/test_gradientBoostingConverter.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from sklearn.ensemble import GradientBoostingClassifier 4 | import numpy as np 5 | 6 | from sklearn_pmml.convert.test.jpmml_test import JPMMLClassificationTest, JPMMLTest, TARGET_NAME 7 | from sklearn_pmml.convert import TransformationContext, Schema 8 | from sklearn_pmml.convert.features import * 9 | from sklearn_pmml.convert.gbrt import GradientBoostingConverter 10 | 11 | 12 | class TestGradientBoostingClassifierConverter(TestCase): 13 | def setUp(self): 14 | np.random.seed(1) 15 | self.est = GradientBoostingClassifier(max_depth=2, n_estimators=10) 16 | self.est.fit([ 17 | [0, 0], 18 | [0, 1], 19 | [1, 0], 20 | [1, 1], 21 | ], [0, 1, 1, 1]) 22 | self.ctx = TransformationContext({ 23 | Schema.INPUT: [ 24 | IntegerNumericFeature('x1'), 25 | StringCategoricalFeature('x2', ['zero', 'one']) 26 | ], 27 | Schema.MODEL: [ 28 | IntegerNumericFeature('x1'), 29 | StringCategoricalFeature('x2', ['zero', 'one']) 30 | ], 31 | Schema.DERIVED: [], 32 | Schema.OUTPUT: [ 33 | IntegerCategoricalFeature('output', [0, 1]) 34 | ] 35 | }) 36 | self.converter = GradientBoostingConverter( 37 | estimator=self.est, 38 | context=self.ctx 39 | ) 40 | 41 | def test_transform(self): 42 | p = self.converter.pmml() 43 | mm = p.MiningModel[0] 44 | assert mm.MiningSchema is not None, 'Missing mining schema' 45 | assert len(mm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' 46 | assert mm.Segmentation is not None, 'Missing segmentation root' 47 | 48 | def test_transform_with_verification(self): 49 | p = self.converter.pmml([ 50 | {'x1': 0, 'x2': 'zero', 'output#1': self.est.predict_proba([[0, 0]])[0, 1], 'output#0': self.est.predict_proba([[0, 0]])[0, 0], 'output': self.est.predict([[0, 0]])}, 51 | {'x1': 0, 'x2': 'one', 'output#1': self.est.predict_proba([[0, 1]])[0, 1], 'output#0': self.est.predict_proba([[0, 1]])[0, 0], 'output': self.est.predict([[0, 1]])}, 52 | {'x1': 1, 'x2': 'zero', 'output#1': self.est.predict_proba([[1, 0]])[0, 1], 'output#0': self.est.predict_proba([[1, 0]])[0, 0], 'output': self.est.predict([[1, 0]])}, 53 | {'x1': 1, 'x2': 'one', 'output#1': self.est.predict_proba([[1, 1]])[0, 1], 'output#0': self.est.predict_proba([[1, 1]])[0, 0], 'output': self.est.predict([[1, 1]])}, 54 | ]) 55 | mm = p.MiningModel[0] 56 | assert mm.MiningSchema is not None, 'Missing mining schema' 57 | assert len(mm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' 58 | assert mm.Segmentation is not None, 'Missing segmentation root' 59 | 60 | 61 | class TestGradientBoostingClassifierParity(TestCase, JPMMLClassificationTest): 62 | 63 | @classmethod 64 | def setUpClass(cls): 65 | if JPMMLTest.can_run(): 66 | JPMMLTest.init_jpmml() 67 | 68 | def setUp(self): 69 | self.model = GradientBoostingClassifier(n_estimators=2, max_depth=2) 70 | self.init_data_one_label() 71 | self.converter = GradientBoostingConverter( 72 | estimator=self.model, 73 | context=self.ctx 74 | ) 75 | -------------------------------------------------------------------------------- /sklearn_pmml/test/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | from sklearn.base import BaseEstimator 4 | 5 | try: 6 | import cPickle as pickle 7 | except: 8 | import pickle 9 | from sklearn_pmml.convert import * 10 | from sklearn_pmml import pmml 11 | 12 | 13 | class TestSerializationMeta(type): 14 | TEST_DIR = os.path.dirname(__file__) 15 | DATA_DIR = os.path.join(TEST_DIR, 'data') 16 | ESTIMATOR_FILE_NAME = 'estimator.pkl' 17 | PMML_FILE_NAME = 'document.pmml' 18 | CONTEXT_FILE_NAME = 'context.pkl' 19 | 20 | def __new__(mcs, name, bases, d): 21 | """ 22 | This method overrides default behaviour for creation of new instances. For every directory abc in data it 23 | creates a method called test_abc, with the body of load_and_compare function. 24 | """ 25 | def gen_test(suite_name): 26 | def load_and_compare(self): 27 | # load the context.pkl, document.pmml and estimator.pkl 28 | suite_path = os.path.join(mcs.DATA_DIR, suite_name) 29 | content = os.listdir(suite_path) 30 | assert len(content) == 3, 'There should be exactly two files in the suite directory' 31 | assert mcs.ESTIMATOR_FILE_NAME in content, 'Estimator should be stored in {} file'.format(mcs.ESTIMATOR_FILE_NAME) 32 | assert mcs.PMML_FILE_NAME in content, 'PMML should be stored in {} file'.format(mcs.PMML_FILE_NAME) 33 | assert mcs.CONTEXT_FILE_NAME in content, 'Context should be stored in {} file'.format(mcs.CONTEXT_FILE_NAME) 34 | with open(os.path.join(suite_path, mcs.ESTIMATOR_FILE_NAME), 'r') as est_file: 35 | est = pickle.load(est_file) 36 | assert isinstance(est, BaseEstimator), '{} should be a trained estimator'.format(mcs.ESTIMATOR_FILE_NAME) 37 | with open(os.path.join(suite_path, mcs.CONTEXT_FILE_NAME), 'r') as ctx_file: 38 | ctx = pickle.load(ctx_file) 39 | assert isinstance(ctx, TransformationContext), '{} should be a transformation context'.format(mcs.CONTEXT_FILE_NAME) 40 | converter = find_converter(est) 41 | assert converter is not None, 'Can not find converter for {}'.format(est) 42 | transformed_pmml = converter(est, ctx).pmml() 43 | with open(os.path.join(suite_path, mcs.PMML_FILE_NAME), 'r') as pmml_file: 44 | loaded_pmml = pmml.CreateFromDocument('\n'.join(pmml_file.readlines())) 45 | self.maxDiff = None 46 | # make sure that the expected PMML matches the produced one 47 | self.assertEquals(loaded_pmml.toDOM().toprettyxml(), transformed_pmml.toDOM().toprettyxml()) 48 | 49 | return load_and_compare 50 | 51 | # for every batch in the data dir create a corresponding test method 52 | for case in os.listdir(TestSerializationMeta.DATA_DIR): 53 | test_name = 'test_{}'.format(case) 54 | d[test_name] = gen_test(case) 55 | return type.__new__(mcs, name, bases, d) 56 | 57 | 58 | class TestSerialization(TestCase): 59 | """ 60 | This is an automated tester for serializers. It uses a custom metaclass to define the test cases based on the 61 | content of the data directory. For the logic behind every check see load_and_compare method above. 62 | """ 63 | __metaclass__ = TestSerializationMeta 64 | 65 | 66 | -------------------------------------------------------------------------------- /examples/pmml/DecisionTreeClassifier.pmml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/jpmml-csv-evaluator/src/main/java/sklearn/pmml/jpmml/JPMMLCSVEvaluator.java: -------------------------------------------------------------------------------- 1 | package sklearn.pmml.jpmml; 2 | 3 | import com.google.common.collect.Lists; 4 | import com.google.common.collect.Maps; 5 | 6 | import com.google.common.collect.Sets; 7 | import org.dmg.pmml.PMML; 8 | import org.dmg.pmml.FieldName; 9 | import org.jpmml.model.JAXBUtil; 10 | import org.jpmml.model.ImportFilter; 11 | import org.jpmml.evaluator.FieldValue; 12 | import org.jpmml.evaluator.Evaluator; 13 | import org.jpmml.evaluator.ModelEvaluator; 14 | import org.jpmml.evaluator.ModelEvaluatorFactory; 15 | import org.supercsv.io.CsvMapReader; 16 | import org.supercsv.io.CsvMapWriter; 17 | import org.supercsv.prefs.CsvPreference; 18 | import org.xml.sax.SAXException; 19 | import org.xml.sax.InputSource; 20 | 21 | import javax.xml.bind.JAXBException; 22 | 23 | import java.io.FileInputStream; 24 | import java.io.FileReader; 25 | import java.io.FileWriter; 26 | import java.io.IOException; 27 | import java.io.InputStream; 28 | import java.util.Arrays; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.Map; 32 | import java.util.Set; 33 | import java.util.logging.Level; 34 | import java.util.logging.Logger; 35 | 36 | /** 37 | * Created by evancox on 7/23/15. 38 | */ 39 | public class JPMMLCSVEvaluator 40 | { 41 | private static final Logger logger = Logger.getLogger(JPMMLCSVEvaluator.class.getCanonicalName()); 42 | 43 | static PMML pmmlFromXml(final InputStream is) 44 | { 45 | try 46 | { 47 | return JAXBUtil.unmarshalPMML(ImportFilter.apply(new InputSource(is))); 48 | } 49 | catch (SAXException | JAXBException e) 50 | { 51 | throw new RuntimeException("Error reading PMML.", e); 52 | } 53 | } 54 | 55 | static Evaluator evaluatorFromPmml(final PMML pmml) 56 | { 57 | ModelEvaluatorFactory modelEvaluatorFactory = ModelEvaluatorFactory.newInstance(); 58 | 59 | ModelEvaluator modelEvaluator = modelEvaluatorFactory.newModelManager(pmml); 60 | 61 | return modelEvaluator; 62 | } 63 | 64 | static Evaluator evaluatorFromXml(final InputStream is) 65 | { 66 | // Adapted from: 67 | // * https://github.com/jpmml/jpmml/blob/master/README.md 68 | // * https://github.com/jpmml/jpmml-example/blob/master/src/main/java/org/jpmml/example/CsvEvaluationExample.java 69 | return evaluatorFromPmml(pmmlFromXml(is)); 70 | } 71 | 72 | static List> getPredictions(Evaluator evaluator, String csvFeaturesFile) throws IOException 73 | { 74 | try (final CsvMapReader csvMapReader = new CsvMapReader(new FileReader(csvFeaturesFile), CsvPreference.STANDARD_PREFERENCE)) { 75 | final String[] headers = csvMapReader.getHeader(true); 76 | final Map fieldNameMap = new HashMap<>(headers.length); 77 | for (String header : Arrays.asList(headers)) 78 | { 79 | fieldNameMap.put(header, new FieldName(header)); 80 | } 81 | 82 | Map rawCsvMap; 83 | final List> predictions = Lists.newArrayList(); 84 | while ((rawCsvMap = csvMapReader.read(headers)) != null) { 85 | final Map featureMap = Maps.newHashMapWithExpectedSize(rawCsvMap.size()); 86 | for (Map.Entry keyValue : rawCsvMap.entrySet()) 87 | { 88 | final FieldName fieldName = fieldNameMap.get(keyValue.getKey()); 89 | final FieldValue fieldValue = evaluator.prepare(fieldName, keyValue.getValue()); 90 | featureMap.put(fieldName, fieldValue); 91 | } 92 | predictions.add(evaluator.evaluate(featureMap)); 93 | } 94 | return predictions; 95 | } 96 | } 97 | 98 | static void writePredictions(Evaluator evaluator, List> predictions, String outputFile) throws IOException 99 | { 100 | final int outputFieldCount = predictions.get(0).keySet().size(); 101 | final Set outputFields = Sets.newHashSetWithExpectedSize(outputFieldCount); 102 | final String[] header = new String[outputFieldCount]; 103 | int index = 0; 104 | for (FieldName fieldName : predictions.get(0).keySet()) 105 | { 106 | if (fieldName != null) { 107 | outputFields.add(fieldName); 108 | header[index++] = fieldName.toString(); 109 | } 110 | } 111 | 112 | try (final CsvMapWriter csvMapWriter = new CsvMapWriter(new FileWriter(outputFile), CsvPreference.STANDARD_PREFERENCE)) 113 | { 114 | csvMapWriter.writeHeader(header); 115 | for (Map prediction : predictions) { 116 | 117 | final Map row = Maps.newHashMapWithExpectedSize(prediction.size()); 118 | for (Map.Entry keyValue : prediction.entrySet()) 119 | { 120 | if (keyValue.getKey() != null) { 121 | row.put(keyValue.getKey().toString(), keyValue.getValue()); 122 | } 123 | } 124 | csvMapWriter.write(row, header); 125 | } 126 | } 127 | } 128 | 129 | public static void main(String[] args) 130 | { 131 | if (args.length != 3) 132 | { 133 | throw new RuntimeException("Expected PMML file, feature data, and output predictions file"); 134 | } 135 | final String pmmlFile = args[0]; 136 | final String csvFeaturesFile = args[1]; 137 | final String outputFile = args[2]; 138 | try 139 | { 140 | Evaluator evaluator = evaluatorFromXml(new FileInputStream(pmmlFile)); 141 | evaluator.verify(); 142 | final List> predictions = getPredictions(evaluator, csvFeaturesFile); 143 | writePredictions(evaluator, predictions, outputFile); 144 | logger.info(String.format("Wrote %d predictions from %s to %s", predictions.size(), csvFeaturesFile, outputFile)); 145 | } 146 | catch (IOException ex) 147 | { 148 | logger.log(Level.SEVERE, "IOException", ex); 149 | System.exit(1); 150 | } 151 | 152 | 153 | } 154 | 155 | } 156 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/test_decisionTreeClassifierConverter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn_pmml.convert.test.jpmml_test import JPMMLClassificationTest, JPMMLRegressionTest, TARGET_NAME, TARGET 3 | 4 | from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor 5 | 6 | from sklearn_pmml.convert import TransformationContext, pmml_row, ModelMode, Schema 7 | from sklearn_pmml.convert.features import * 8 | from sklearn_pmml.convert.tree import DecisionTreeConverter 9 | from sklearn_pmml import pmml 10 | 11 | from unittest import TestCase 12 | 13 | 14 | class TestDecisionTreeClassifierConverter(TestCase): 15 | def setUp(self): 16 | np.random.seed(1) 17 | self.est = DecisionTreeClassifier(max_depth=2) 18 | self.est.fit([ 19 | [0, 0], 20 | [0, 1], 21 | [1, 0], 22 | [1, 1], 23 | ], [0, 1, 1, 1]) 24 | self.ctx = TransformationContext({ 25 | Schema.INPUT: [ 26 | IntegerNumericFeature('x1'), 27 | StringCategoricalFeature('x2', ['zero', 'one']) 28 | ], 29 | Schema.MODEL: [ 30 | IntegerNumericFeature('x1'), 31 | StringCategoricalFeature('x2', ['zero', 'one']) 32 | ], 33 | Schema.DERIVED: [], 34 | Schema.OUTPUT: [ 35 | IntegerNumericFeature('output') 36 | ] 37 | }) 38 | self.converter = DecisionTreeConverter( 39 | estimator=self.est, 40 | context=self.ctx, 41 | mode=ModelMode.CLASSIFICATION 42 | ) 43 | 44 | def test_transform(self): 45 | p = self.converter.pmml() 46 | tm = p.TreeModel[0] 47 | assert tm.MiningSchema is not None, 'Missing mining schema' 48 | assert len(tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' 49 | assert tm.Node is not None, 'Missing root node' 50 | assert tm.Node.recordCount == 4 51 | assert tm.Node.True_ is not None, 'Root condition should always be True' 52 | 53 | def test_transform_with_derived_field(self): 54 | self.est = DecisionTreeClassifier(max_depth=2) 55 | self.est.fit([ 56 | [0, 0, 0], 57 | [0, 1, 0], 58 | [1, 0, 0], 59 | [1, 1, 1], 60 | ], [0, 1, 1, 1]) 61 | mapping = pmml.MapValues(dataType="double", outputColumn="output") 62 | mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) 63 | mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) 64 | it = pmml.InlineTable() 65 | mapping_df = pd.DataFrame([ 66 | dict(x1=0, x2='zero', output=0), 67 | dict(x1=0, x2='one', output=0), 68 | dict(x1=1, x2='zero', output=0), 69 | dict(x1=1, x2='one', output=1), 70 | ]) 71 | for idx, line in mapping_df.iterrows(): 72 | it.append(pmml_row(**dict(line))) 73 | mapping.append(it) 74 | mapping_df.set_index(keys=['x1', 'x2']) 75 | mapping_f = np.vectorize(lambda x1, x2: mapping_df.ix[x1, x2].output.values[0]) 76 | self.ctx = TransformationContext({ 77 | Schema.INPUT: [ 78 | IntegerNumericFeature('x1'), 79 | StringCategoricalFeature('x2', ['zero', 'one']) 80 | ], 81 | Schema.DERIVED: [ 82 | DerivedFeature( 83 | feature=RealNumericFeature(name='x3'), 84 | transformation=mapping, 85 | function=mapping_f 86 | ) 87 | ], 88 | Schema.MODEL: [ 89 | IntegerNumericFeature('x1'), 90 | StringCategoricalFeature('x2', ['zero', 'one']), 91 | RealNumericFeature(name='x3') 92 | ], 93 | Schema.OUTPUT: [ 94 | IntegerCategoricalFeature('output', ['neg', 'pos']) 95 | ] 96 | }) 97 | self.converter = DecisionTreeConverter( 98 | estimator=self.est, 99 | context=self.ctx, 100 | mode=ModelMode.CLASSIFICATION 101 | ) 102 | self.converter.pmml().toxml() 103 | 104 | 105 | class TestDecisionTreeRegressorConverter(TestCase): 106 | def setUp(self): 107 | np.random.seed(1) 108 | self.est = DecisionTreeRegressor(max_depth=2) 109 | self.est.fit([ 110 | [0, 0], 111 | [0, 1], 112 | [1, 0], 113 | [1, 1], 114 | ], [0, 1, 1, 1]) 115 | self.ctx = TransformationContext({ 116 | Schema.INPUT: [ 117 | IntegerNumericFeature('x1'), 118 | StringCategoricalFeature('x2', ['zero', 'one']) 119 | ], 120 | Schema.MODEL: [ 121 | IntegerNumericFeature('x1'), 122 | StringCategoricalFeature('x2', ['zero', 'one']) 123 | ], 124 | Schema.DERIVED: [], 125 | Schema.OUTPUT: [ 126 | IntegerNumericFeature('output') 127 | ] 128 | }) 129 | self.converter = DecisionTreeConverter( 130 | estimator=self.est, 131 | context=self.ctx, 132 | mode=ModelMode.REGRESSION 133 | ) 134 | 135 | def test_transform(self): 136 | p = self.converter.pmml() 137 | tm = p.TreeModel[0] 138 | assert tm.MiningSchema is not None, 'Missing mining schema' 139 | assert len(tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' 140 | assert tm.Node is not None, 'Missing root node' 141 | assert tm.Node.recordCount == 4 142 | assert tm.Node.True_ is not None, 'Root condition should always be True' 143 | 144 | 145 | class TestDecisionTreeClassificationJPMMLParity(TestCase, JPMMLClassificationTest): 146 | 147 | def setUp(self): 148 | self.model = DecisionTreeClassifier(max_depth=2) 149 | self.init_data() 150 | self.converter = DecisionTreeConverter( 151 | estimator=self.model, 152 | context=self.ctx, 153 | mode=ModelMode.CLASSIFICATION 154 | ) 155 | 156 | @property 157 | def output(self): 158 | return IntegerCategoricalFeature(name=TARGET_NAME, value_list=TARGET) 159 | 160 | 161 | class TestDecisionTreeRegressionJPMMLParity(TestCase, JPMMLRegressionTest): 162 | 163 | def setUp(self): 164 | self.model = DecisionTreeRegressor() 165 | self.init_data() 166 | self.converter = DecisionTreeConverter( 167 | estimator=self.model, 168 | context=self.ctx, 169 | mode=ModelMode.REGRESSION 170 | ) 171 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/features.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import pandas as pd 3 | 4 | 5 | class FeatureOpType(Enum): 6 | CATEGORICAL = 'categorical' 7 | CONTINUOUS = 'continuous' 8 | 9 | 10 | class FeatureType(Enum): 11 | DOUBLE = 'double' 12 | INT = 'integer' 13 | STRING = 'string' 14 | 15 | 16 | class InvalidValueTreatment(Enum): 17 | AS_IS = 'asIs' 18 | 19 | 20 | class Feature(object): 21 | def __init__(self, name, namespace='', invalid_value_treatment=InvalidValueTreatment.AS_IS): 22 | """ 23 | Create a new feature 24 | :type name: str 25 | :type namespace: str 26 | :type invalid_value_treatment: InvalidValueTreatment 27 | """ 28 | self._name = str(name) 29 | self._namespace = str(namespace) 30 | self._invalid_value_treatment = invalid_value_treatment 31 | 32 | @property 33 | def name(self): 34 | """ 35 | :rtype: str 36 | """ 37 | return self._name 38 | 39 | @property 40 | def namespace(self): 41 | """ 42 | :rtype: str 43 | """ 44 | return self._namespace 45 | 46 | @property 47 | def full_name(self): 48 | """ 49 | :rtype: str 50 | """ 51 | if self._namespace: 52 | return '{}.{}'.format(self._namespace, self.name) 53 | else: 54 | return self.name 55 | 56 | @property 57 | def invalid_value_treatment(self): 58 | return self._invalid_value_treatment 59 | 60 | @property 61 | def optype(self): 62 | """ 63 | :rtype: FeatureOpType 64 | """ 65 | raise NotImplementedError() 66 | 67 | @property 68 | def data_type(self): 69 | """ 70 | :rtype: FeatureType 71 | """ 72 | raise NotImplementedError() 73 | 74 | def from_number(self, value): 75 | raise NotImplementedError() 76 | 77 | def __str__(self): 78 | return self.name 79 | 80 | def __repr__(self): 81 | return "{}#{}".format(self.name, self.__class__.__name__) 82 | 83 | 84 | class NumericFeature(Feature): 85 | @property 86 | def optype(self): 87 | return FeatureOpType.CONTINUOUS 88 | 89 | def from_number(self, value): 90 | return float(value) 91 | 92 | 93 | class RealNumericFeature(NumericFeature): 94 | @property 95 | def data_type(self): 96 | return FeatureType.DOUBLE 97 | 98 | 99 | class IntegerNumericFeature(NumericFeature): 100 | def from_number(self, value): 101 | return int(value) 102 | 103 | @property 104 | def data_type(self): 105 | return FeatureType.INT 106 | 107 | 108 | class CategoricalFeature(Feature): 109 | """ 110 | Represents a categorical feature. Categorical features are defined with optype 'categorical' and the corresponding 111 | dataType. The corresponding derived field will have a double data type and will be defined as a MapValues PMML 112 | element. 113 | """ 114 | def __init__(self, name, value_list, namespace='', invalid_value_treatment=InvalidValueTreatment.AS_IS, map_missing_to=None): 115 | super(CategoricalFeature, self).__init__(name, namespace, invalid_value_treatment) 116 | self.map_missing_to = map_missing_to 117 | self.value_list = value_list 118 | 119 | @property 120 | def optype(self): 121 | return FeatureOpType.CATEGORICAL 122 | 123 | def from_number(self, value): 124 | assert value >= 0, 'Negative numbers can not be used as categorical indexes' 125 | assert value < len(self.value_list), 'Unknown category index {}'.format(value) 126 | return self.value_list[value] 127 | 128 | def to_number(self, value): 129 | """ 130 | Transform categorical value to the ordinal. Raises ValueError if value is not in self.value_list 131 | """ 132 | try: 133 | return list(self.value_list).index(value) 134 | except ValueError as e: 135 | if self.map_missing_to: 136 | return self.map_missing_to 137 | else: 138 | raise e 139 | 140 | 141 | class IntegerCategoricalFeature(CategoricalFeature): 142 | @property 143 | def data_type(self): 144 | return FeatureType.INT 145 | 146 | 147 | class StringCategoricalFeature(CategoricalFeature): 148 | @property 149 | def data_type(self): 150 | return FeatureType.STRING 151 | 152 | 153 | class DerivedFeature(NumericFeature): 154 | """ 155 | This class represents a derived feature constructed from previously defined features. 156 | The transformation parameter defines the recipe for creating a feature, and will be inserted into pmml.DerivedField 157 | element for this feature. 158 | Note, that the transformation only allows references to the already declare fields. 159 | 160 | For convenience, one can also pass the function that performs the transformation on the input data frame. 161 | """ 162 | 163 | def __init__(self, feature, transformation, function): 164 | """ 165 | Construct a derived feature. 166 | :param feature: declaration of feature (name, data_type and optype) 167 | :type feature: Feature 168 | :param transformation: definition of DerivedField content 169 | :param function: transformation function 170 | :type function: callable 171 | """ 172 | super(DerivedFeature, self).__init__( 173 | name=feature.name, 174 | namespace=feature.namespace, 175 | invalid_value_treatment=feature.invalid_value_treatment 176 | ) 177 | assert isinstance(feature, NumericFeature), 'All derived features must be declared as NumericFeatures' 178 | assert function is not None, 'Function can not be None' 179 | assert callable(function), 'Function must be callable' 180 | self.feature = feature 181 | self.transformation = transformation 182 | self.function = function 183 | 184 | def from_number(self, value): 185 | return self.feature.from_number(value) 186 | 187 | @property 188 | def data_type(self): 189 | return self.feature.data_type 190 | 191 | @property 192 | def optype(self): 193 | return self.feature.optype 194 | 195 | def apply(self, df): 196 | """ 197 | Calculate derived feature's values based on the values in the input data frame. 198 | Note that the input data frame will not be affected by the transformation. 199 | :param df: input data frame 200 | :return: array with results 201 | """ 202 | assert self.function is not None, 'Function was not provided' 203 | assert isinstance(df, pd.DataFrame), 'Input should be a data frame' 204 | return self.function(df.copy(deep=False)) -------------------------------------------------------------------------------- /sklearn_pmml/convert/test/jpmml_test.py: -------------------------------------------------------------------------------- 1 | __author__ = 'evancox' 2 | 3 | import numpy as np 4 | import hashlib 5 | import os 6 | import shutil 7 | import subprocess 8 | import logging 9 | 10 | from sklearn_pmml.convert import TransformationContext, Schema 11 | from sklearn_pmml.convert.features import * 12 | 13 | 14 | TARGET = [0, 1, 2] 15 | TARGET_NAME = 'y' 16 | TEST_DIR = 'jpmml_test_data' 17 | 18 | EPSILON = 0.00001 19 | 20 | logging.basicConfig(format='%(asctime)s %(message)s') 21 | 22 | 23 | # Adapted from http://stackoverflow.com/questions/1724693/find-a-file-in-python 24 | def find_file_or_dir(name): 25 | for root, dirs, files in os.walk(os.path.dirname(__file__)): 26 | if name in files or name in dirs: 27 | return os.path.join(root, name) 28 | 29 | 30 | class JPMMLTest(): 31 | USE_VERIFICATION = True 32 | """ 33 | If true, the PMML will be generated with the ModelVerification section that allows PMML interpreter to check the 34 | correctness of deserialized model. 35 | """ 36 | 37 | def __init__(self): 38 | self.x = None 39 | self.y = None 40 | self.ctx = None 41 | self.converter = None 42 | 43 | @staticmethod 44 | def can_run(): 45 | try: 46 | subprocess.check_call(['java', '-version']) 47 | except OSError: 48 | logging.warning("Couldn't find java to run JPMML integration tests") 49 | return False 50 | 51 | try: 52 | subprocess.check_call(['mvn', '-version']) 53 | except OSError: 54 | logging.warning("Couldn't find maven to run JPMML integration tests") 55 | return False 56 | 57 | return True 58 | 59 | @staticmethod 60 | def init_jpmml(): 61 | result = subprocess.call(['mvn', '-q', 'clean', 'package', '-f', find_file_or_dir('jpmml-csv-evaluator')]) 62 | assert result == 0, "Unable to package jpmml csv evaluator" 63 | return True 64 | 65 | # taken from http://stackoverflow.com/questions/18159221/remove-namespace-and-prefix-from-xml-in-python-using-lxml 66 | @staticmethod 67 | def remove_namespace(doc, namespace): 68 | ns = u'{%s}' % namespace 69 | nsl = len(ns) 70 | for elem in doc.getiterator(): 71 | if elem.tag.startswith(ns): 72 | elem.tag = elem.tag[nsl:] 73 | 74 | @property 75 | def model(self): 76 | if self._model is None: 77 | raise NotImplementedError() 78 | return self._model 79 | 80 | @model.setter 81 | def model(self, model): 82 | self._model = model 83 | 84 | @property 85 | def output(self): 86 | raise NotImplementedError() 87 | 88 | def setup_jpmml_test(self): 89 | if not JPMMLTest.can_run(): 90 | logging.warning("Can't run regression test, java and/or maven not installed") 91 | return None 92 | 93 | if os.path.exists(TEST_DIR): 94 | shutil.rmtree(TEST_DIR) 95 | os.makedirs(TEST_DIR) 96 | 97 | if self.USE_VERIFICATION: 98 | verification_data = self.x.copy() 99 | 100 | xml = self.converter.pmml(verification_data=[ 101 | dict((str(_[0]), _[1]) for _ in dict(row).items()) 102 | for idx, row in verification_data[:10].iterrows() 103 | ]).toDOM().toprettyxml() 104 | else: 105 | xml = self.converter.pmml().toDOM().toprettyxml() 106 | 107 | pmml_hash = hashlib.md5(xml.encode('utf-8')).hexdigest() 108 | pmml_file_path = os.path.join(TEST_DIR, pmml_hash + '.pmml') 109 | with open(pmml_file_path, 'w') as pmml_file: 110 | pmml_file.write(xml) 111 | 112 | input_file_path = os.path.join(TEST_DIR, pmml_hash + '_input.csv') 113 | self.x.to_csv(input_file_path, index=False) 114 | target_file_path = os.path.join(TEST_DIR, pmml_hash + '_output.csv') 115 | 116 | java_args = ' '.join(map("'{}'".format, [ 117 | os.path.abspath(pmml_file_path), 118 | os.path.abspath(input_file_path), 119 | os.path.abspath(target_file_path) 120 | ])) 121 | result = subprocess.call([ 122 | 'mvn', 'package', 'exec:java', '-q', '-e', 123 | '-f', find_file_or_dir('jpmml-csv-evaluator'), 124 | '-Dexec.mainClass=sklearn.pmml.jpmml.JPMMLCSVEvaluator', 125 | '-Dexec.args=' + java_args 126 | ]) 127 | if result: 128 | print(xml) 129 | assert result == 0, 'Executing JPMML evaluator returned non zero result' 130 | return pd.read_csv(target_file_path) 131 | 132 | def init_data(self): 133 | np.random.seed(12363) 134 | self.x = pd.DataFrame(np.random.randn(500, 4), columns=['col_' + str(_) for _ in range(4)]) 135 | self.y = pd.DataFrame({TARGET_NAME: [np.random.choice([0, 1, 2]) for _ in range(self.x.shape[0])]}) 136 | self._model.fit(self.x, np.ravel(self.y)) 137 | self.ctx = TransformationContext() 138 | self.ctx.schemas[Schema.INPUT] = [RealNumericFeature(col) for col in list(self.x)] 139 | self.ctx.schemas[Schema.DERIVED] = [] 140 | self.ctx.schemas[Schema.MODEL] = [RealNumericFeature(col) for col in list(self.x)] 141 | self.ctx.schemas[Schema.OUTPUT] = [self.output] 142 | 143 | def init_data_one_label(self): 144 | np.random.seed(12363) 145 | self.x = pd.DataFrame(np.random.randn(500, 4), columns=['col_' + str(_) for _ in range(4)]) 146 | self.y = pd.DataFrame({TARGET_NAME: [np.random.choice([0, 1]) for _ in range(self.x.shape[0])]}) 147 | self._model.fit(self.x, np.ravel(self.y)) 148 | self.ctx = TransformationContext() 149 | self.ctx.schemas[Schema.INPUT] = [RealNumericFeature(col) for col in list(self.x)] 150 | self.ctx.schemas[Schema.DERIVED] = [] 151 | self.ctx.schemas[Schema.MODEL] = [RealNumericFeature(col) for col in list(self.x)] 152 | self.ctx.schemas[Schema.OUTPUT] = [self.output] 153 | 154 | 155 | class JPMMLRegressionTest(JPMMLTest): 156 | @property 157 | def output(self): 158 | return IntegerNumericFeature(name=TARGET_NAME) 159 | 160 | def test_regression(self): 161 | jpmml_predictions = self.setup_jpmml_test() 162 | if jpmml_predictions is None: 163 | return 164 | 165 | sklearn_predictions = pd.DataFrame({TARGET_NAME: self.converter.estimator.predict(self.x)}) 166 | diff = jpmml_predictions[TARGET_NAME] - sklearn_predictions[TARGET_NAME] 167 | assert np.all(np.abs(diff) < EPSILON) 168 | 169 | 170 | class JPMMLClassificationTest(JPMMLTest): 171 | @property 172 | def output(self): 173 | return StringCategoricalFeature(name=TARGET_NAME, value_list=["negative", "positive"]) 174 | 175 | def test_classification(self): 176 | 177 | jpmml_predictions = self.setup_jpmml_test() 178 | if jpmml_predictions is None: 179 | return 180 | 181 | raw_sklearn_predictions = self.converter.estimator.predict_proba(self.x) 182 | prob_outputs = [self.output.name + '.' + str(clazz) for clazz in self.output.value_list] 183 | sklearn_predictions = pd.DataFrame(columns=prob_outputs) 184 | for index, prediction in enumerate(raw_sklearn_predictions): 185 | sklearn_predictions.loc[index] = list(prediction) 186 | 187 | np.testing.assert_almost_equal( 188 | np.array(jpmml_predictions[list(sklearn_predictions.columns)]), 189 | sklearn_predictions.values, 190 | err_msg='Probability mismatch' 191 | ) 192 | np.testing.assert_equal( 193 | np.array(self.output.value_list)[self.converter.estimator.predict(self.x)], 194 | jpmml_predictions[self.output.name].values, 195 | err_msg='Labels mismatch' 196 | ) -------------------------------------------------------------------------------- /sklearn_pmml/convert/utils.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from sklearn_pmml import pmml 3 | from sklearn_pmml.convert.features import Feature, FeatureType 4 | from pyxb.utils.domutils import BindingDOMSupport as bds 5 | import numpy as np 6 | estimator_to_converter = {} 7 | 8 | 9 | def find_converter(estimator): 10 | # TODO: do the search here 11 | return estimator_to_converter.get(estimator.__class__, None) 12 | 13 | 14 | def pmml_row(**columns): 15 | """ 16 | Creates pmml.row element with columns 17 | :param columns: key-value pairs to be inserted into the row 18 | :return: pmml.row element 19 | """ 20 | r = pmml.row() 21 | for name, value in columns.items(): 22 | el = bds().createChildElement(name) 23 | bds().appendTextChild(value, el) 24 | r.append(el) 25 | return r 26 | 27 | 28 | class DerivedFeatureTransformations(object): 29 | """ 30 | A helper for building Derived Feature transformations. Creates both transformation and the DerivedFeature content. 31 | Typical usage of the methods: 32 | 33 | DerivedFeature( 34 | RealNumericFeature('my_derived_feature'), 35 | **DerivedFeatureTransformations.field_in_list('input_feature', ['A', 'B', 'C']) 36 | ) 37 | """ 38 | TRANSFORMATION = 'transformation' 39 | FUNCTION = 'function' 40 | 41 | @staticmethod 42 | def field_in_list(field, values): 43 | mv = pmml.MapValues(outputColumn='output', defaultValue=0) 44 | mv.append(pmml.FieldColumnPair(field=field, column='input')) 45 | it = pmml.InlineTable() 46 | for v in values: 47 | it.append(pmml_row(input=v, output=1)) 48 | mv.append(it) 49 | return { 50 | DerivedFeatureTransformations.TRANSFORMATION: mv, 51 | DerivedFeatureTransformations.FUNCTION: lambda df: reduce(np.logical_or, [df[field] == _ for _ in values]) 52 | } 53 | 54 | @staticmethod 55 | def field_not_in_list(field, values): 56 | mv = pmml.MapValues(outputColumn='output', defaultValue=1) 57 | mv.append(pmml.FieldColumnPair(field=field, column='input')) 58 | it = pmml.InlineTable() 59 | for v in values: 60 | it.append(pmml_row(input=v, output=0)) 61 | mv.append(it) 62 | return { 63 | DerivedFeatureTransformations.TRANSFORMATION: mv, 64 | DerivedFeatureTransformations.FUNCTION: lambda df: reduce(np.logical_and, [df[field] != _ for _ in values]) 65 | } 66 | 67 | @staticmethod 68 | def map_values(field, value_map, default_value): 69 | mv = pmml.MapValues(outputColumn='output', default_value=default_value) 70 | mv.append(pmml.FieldColumnPair(field=field, column='input')) 71 | it = pmml.InlineTable() 72 | for k, v in value_map.items(): 73 | it.append(pmml_row(input=k, output=v)) 74 | mv.append(it) 75 | return { 76 | DerivedFeatureTransformations.TRANSFORMATION: mv, 77 | DerivedFeatureTransformations.FUNCTION: 78 | lambda df: np.vectorize(partial(value_map.get, default_value))(df[field]) 79 | } 80 | 81 | @staticmethod 82 | def arithmetics(tree): 83 | """ 84 | Takes an arithmetic operations tree (Lisp-styled) as an input 85 | """ 86 | 87 | def basic_function(func_name, args): 88 | expr = pmml.Apply(function=func_name) 89 | for a in args: 90 | expr.append(a) 91 | return expr 92 | 93 | def mod_function(args): 94 | expr = pmml.Apply(function='-') 95 | expr.append(args[0]) 96 | mul = pmml.Apply(function='*') 97 | mul.append(args[1]) 98 | floor = pmml.Apply(function='floor') 99 | mul.append(floor) 100 | div = pmml.Apply(function='/') 101 | floor.append(div) 102 | div.append(args[0]) 103 | div.append(args[1]) 104 | return expr 105 | 106 | # TODO: test me 107 | def greedy_evaluation(node): 108 | if isinstance(node, str): 109 | # field reference 110 | return (lambda df: df[node]), pmml.FieldRef(field=node) 111 | elif isinstance(node, (tuple, list)): 112 | # eval arguments 113 | args = map(greedy_evaluation, node[1:]) 114 | functions = { 115 | '*': lambda df: np.multiply(*[_[0](df) for _ in args]), 116 | '-': lambda df: np.subtract(*[_[0](df) for _ in args]), 117 | '+': lambda df: np.add(*[_[0](df) for _ in args]), 118 | '/': lambda df: np.divide(*[_[0](df) for _ in args]), 119 | '%': lambda df: np.mod(*[_[0](df) for _ in args]), 120 | } 121 | assert isinstance(node[0], str), 'First element should be a code of operation' 122 | assert node[0] in functions, 'Unknown function code {}. Supported codes: {}'.format(node[0], functions.keys()) 123 | expr = { 124 | '*': partial(basic_function, '*'), 125 | '-': partial(basic_function, '-'), 126 | '+': partial(basic_function, '+'), 127 | '/': partial(basic_function, '/'), 128 | '%': mod_function 129 | }.get(node[0])([a[1] for a in args]) 130 | func = functions[node[0]] 131 | return func, expr 132 | else: 133 | # numeric terminal 134 | return lambda df: node, pmml.Constant(node, dataType='double') 135 | 136 | function, transformation = greedy_evaluation(tree) 137 | 138 | return { 139 | DerivedFeatureTransformations.TRANSFORMATION: transformation, 140 | DerivedFeatureTransformations.FUNCTION: function 141 | } 142 | 143 | @staticmethod 144 | def replace_value(field, original, replacement): 145 | if original is not None: 146 | transformation = pmml.Apply(function='if') 147 | cond = pmml.Apply(function='equals') 148 | cond.append(pmml.FieldRef(field=field)) 149 | cond.append(pmml.Constant(original)) 150 | transformation.append(pmml.Constant(replacement)) 151 | transformation.append(pmml.FieldRef(field=field)) 152 | 153 | return { 154 | DerivedFeatureTransformations.TRANSFORMATION: transformation, 155 | DerivedFeatureTransformations.FUNCTION: lambda df: np.where(df[field] == original, replacement, df[field]) 156 | } 157 | else: 158 | transformation = pmml.Apply(function='+', mapMissingTo=replacement) 159 | transformation.append(pmml.Constant(0)) 160 | transformation.append(pmml.FieldRef(field=field)) 161 | return { 162 | DerivedFeatureTransformations.TRANSFORMATION: transformation, 163 | DerivedFeatureTransformations.FUNCTION: lambda df: np.where(df[field].isnull(), replacement, df[field]) 164 | } 165 | 166 | 167 | def assert_equal(feature, expected, actual): 168 | """ 169 | Compare expected and actual values for the feature and raise an exception if they are not equal 170 | :type feature: Feature 171 | :type expected: np.array 172 | :type actual: np.array 173 | """ 174 | # if the feature has the transformation included and the result data is passed, we can compare them 175 | if feature.data_type == FeatureType.STRING: 176 | assert all(actual == expected), \ 177 | 'Some passed values of "{}" don\'t match the evaluated results'.format(feature.full_name) 178 | else: 179 | np.testing.assert_almost_equal( 180 | actual, 181 | expected, 182 | err_msg='Some passed values of "{}" don\'t match the evaluated results'.format(feature.full_name) 183 | ) -------------------------------------------------------------------------------- /sklearn_pmml/convert/tree.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from sklearn.base import ClassifierMixin, RegressorMixin 4 | from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor 5 | from sklearn.tree._tree import Tree, TREE_LEAF 6 | import numpy as np 7 | 8 | from sklearn_pmml.convert.model import EstimatorConverter, ModelMode, Schema 9 | from sklearn_pmml.convert.features import Feature, CategoricalFeature, NumericFeature 10 | import sklearn_pmml.pmml as pmml 11 | from sklearn_pmml.convert.utils import estimator_to_converter 12 | 13 | 14 | class DecisionTreeConverter(EstimatorConverter): 15 | SPLIT_BINARY = 'binarySplit' 16 | OPERATOR_LE = 'lessOrEqual' 17 | NODE_ROOT = 0 18 | OUTPUT_PROBABILITY = 'proba' 19 | OUTPUT_LABEL = 'proba' 20 | 21 | def __init__(self, estimator, context, mode): 22 | super(DecisionTreeConverter, self).__init__(estimator, context, mode) 23 | 24 | assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported' 25 | assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute' 26 | if mode == ModelMode.CLASSIFICATION: 27 | if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature): 28 | self.prediction_output = self.OUTPUT_LABEL 29 | else: 30 | self.prediction_output = self.OUTPUT_PROBABILITY 31 | assert isinstance(self.estimator, ClassifierMixin), \ 32 | 'Only a classifier can be serialized in classification mode' 33 | if mode == ModelMode.REGRESSION: 34 | assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \ 35 | 'Only a numeric feature can be an output of regression' 36 | assert isinstance(self.estimator, RegressorMixin), \ 37 | 'Only a regressor can be serialized in regression mode' 38 | assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \ 39 | 'Tree outputs {} results while the schema specifies {} output fields'.format( 40 | estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT])) 41 | 42 | # create hidden variables for each categorical output 43 | # TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree 44 | # TODO: converter and subclasses for classifier and regression converters 45 | internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT])) 46 | self.context.schemas[Schema.INTERNAL] = internal_schema 47 | 48 | def _model(self): 49 | assert Schema.NUMERIC in self.context.schemas, \ 50 | 'Either build transformation dictionary or provide {} schema in context'.format(Schema.NUMERIC) 51 | tm = pmml.TreeModel(functionName=self.model_function.value, splitCharacteristic=self.SPLIT_BINARY) 52 | tm.append(self.mining_schema()) 53 | tm.append(self.output()) 54 | tm.Node = self._transform_node( 55 | self.estimator.tree_, 56 | self.NODE_ROOT, 57 | self.context.schemas[Schema.NUMERIC], 58 | self.context.schemas[Schema.OUTPUT][0] 59 | ) 60 | return tm 61 | 62 | def model(self, verification_data=None): 63 | assert Schema.NUMERIC in self.context.schemas, \ 64 | 'Either build transformation dictionary or provide {} schema in context'.format(Schema.NUMERIC) 65 | tm = self._model() 66 | if verification_data is not None: 67 | tm.ModelVerification = self.model_verification(verification_data) 68 | return tm 69 | 70 | def _transform_node(self, tree, index, input_schema, output_feature, enter_condition=None): 71 | """ 72 | Recursive mapping of sklearn Tree into PMML Node tree 73 | :return: Node element 74 | """ 75 | assert isinstance(tree, Tree) 76 | assert isinstance(input_schema, list) 77 | assert isinstance(output_feature, Feature) 78 | 79 | node = pmml.Node() 80 | if enter_condition is None: 81 | node.append(pmml.True_()) 82 | else: 83 | node.append(enter_condition) 84 | node.recordCount = tree.n_node_samples[index] 85 | 86 | if tree.children_left[index] != TREE_LEAF: 87 | feature = input_schema[tree.feature[index]] 88 | assert isinstance(feature, Feature) 89 | left_child = self._transform_node( 90 | tree, 91 | tree.children_left[index], 92 | input_schema, 93 | output_feature, 94 | enter_condition=pmml.SimplePredicate( 95 | field=feature.full_name, operator=DecisionTreeConverter.OPERATOR_LE, value_=tree.threshold[index] 96 | ) 97 | ) 98 | right_child = self._transform_node(tree, tree.children_right[index], input_schema, output_feature) 99 | if self.model_function == ModelMode.CLASSIFICATION: 100 | score, score_prob = None, 0.0 101 | for i in range(len(tree.value[index][0])): 102 | left_score = left_child.ScoreDistribution[i] 103 | right_score = right_child.ScoreDistribution[i] 104 | prob = float(left_score.recordCount + right_score.recordCount) / node.recordCount 105 | node.append(pmml.ScoreDistribution( 106 | recordCount=left_score.recordCount + right_score.recordCount, 107 | value_=left_score.value_, 108 | confidence=prob 109 | )) 110 | if score_prob < prob: 111 | score, score_prob = left_score.value_, prob 112 | node.score = score 113 | node.append(left_child).append(right_child) 114 | 115 | else: 116 | node_value = np.array(tree.value[index][0]) 117 | if self.model_function == ModelMode.CLASSIFICATION: 118 | probs = node_value / float(node_value.sum()) 119 | for i in range(len(probs)): 120 | node.append(pmml.ScoreDistribution( 121 | confidence=probs[i], 122 | recordCount=node_value[i], 123 | value_=output_feature.from_number(i) 124 | )) 125 | node.score = output_feature.from_number(probs.argmax()) 126 | elif self.model_function == ModelMode.REGRESSION: 127 | node.score = node_value[0] 128 | 129 | return node 130 | 131 | def output(self): 132 | """ 133 | Output section of PMML contains all model outputs. 134 | Classification tree output contains output variable as a label, 135 | and # as a probability of a value for a variable 136 | :return: pmml.Output 137 | """ 138 | output = pmml.Output() 139 | 140 | # the response variables 141 | for feature in self.context.schemas[Schema.OUTPUT]: 142 | output_field = pmml.OutputField( 143 | name=Schema.OUTPUT.extract_feature_name(feature), 144 | feature='predictedValue', 145 | optype=feature.optype.value, 146 | dataType=feature.data_type.value 147 | ) 148 | output.append(output_field) 149 | 150 | # the probabilities for categories; should only be populated for classification jobs 151 | for feature in self.context.schemas[Schema.CATEGORIES]: 152 | output_field = pmml.OutputField( 153 | name=Schema.CATEGORIES.extract_feature_name(feature), 154 | optype=feature.optype.value, 155 | dataType=feature.data_type.value, 156 | feature='probability', 157 | targetField=Schema.INTERNAL.extract_feature_name(feature.namespace), 158 | value_=feature.name 159 | ) 160 | output.append(output_field) 161 | 162 | return output 163 | 164 | 165 | estimator_to_converter[DecisionTreeClassifier] = partial( 166 | DecisionTreeConverter, mode=ModelMode.CLASSIFICATION 167 | ) 168 | estimator_to_converter[DecisionTreeRegressor] = partial( 169 | DecisionTreeConverter, mode=ModelMode.REGRESSION 170 | ) -------------------------------------------------------------------------------- /sklearn_pmml/convert/gbrt.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from sklearn.ensemble import GradientBoostingClassifier 3 | 4 | from sklearn.ensemble.gradient_boosting import LogOddsEstimator 5 | 6 | from sklearn_pmml.convert.features import * 7 | from sklearn_pmml.convert.model import EstimatorConverter, ClassifierConverter, ModelMode, RegressionConverter, Schema, \ 8 | TransformationContext 9 | from sklearn_pmml.convert.tree import DecisionTreeConverter 10 | import sklearn_pmml.pmml as pmml 11 | from sklearn_pmml.convert.utils import estimator_to_converter, find_converter 12 | 13 | 14 | class LogOddsEstimatorConverter(RegressionConverter): 15 | REGRESSION_LINEAR = "linearRegression" 16 | 17 | def __init__(self, estimator, context): 18 | super(LogOddsEstimatorConverter, self).__init__(estimator, context) 19 | 20 | assert isinstance(estimator, LogOddsEstimator), 'This converter can only process LogOddsEstimator instances' 21 | 22 | def model(self, verification_data=None): 23 | rm = pmml.RegressionModel(functionName=self.model_function.value, algorithmName=self.REGRESSION_LINEAR) 24 | rm.append(self.mining_schema()) 25 | rm.append(pmml.RegressionTable(intercept=self.estimator.prior)) 26 | if verification_data is not None: 27 | rm.append(self.model_verification(verification_data)) 28 | return rm 29 | 30 | 31 | class GradientBoostingConverter(ClassifierConverter): 32 | """ 33 | Converter for GradientBoostingClassifier model. 34 | 35 | NOTE: at the moment only binary one-label classification is supported. 36 | """ 37 | SCHEMAS_IN_MINING_MODEL = {Schema.INPUT} 38 | 39 | def __init__(self, estimator, context): 40 | super(GradientBoostingConverter, self).__init__(estimator, context) 41 | 42 | assert isinstance(estimator, GradientBoostingClassifier), \ 43 | 'This converter can only process GradientBoostingClassifier instances' 44 | assert len(context.schemas[Schema.OUTPUT]) == 1, 'Only one-label classification is supported' 45 | assert not estimator.loss_.is_multi_class, 'Only one-label classification is supported' 46 | assert context.schemas[Schema.OUTPUT][0].optype == FeatureOpType.CATEGORICAL, \ 47 | 'Classification output must be categorical' 48 | assert len(context.schemas[Schema.OUTPUT][0].value_list) == 2, 'Only binary classifier is supported' 49 | assert find_converter(estimator.init_) is not None, 'Can not find a converter for {}'.format(estimator.init_) 50 | 51 | def model(self, verification_data=None): 52 | # The ensemble of regression models can only be a regression model. Surprise! 53 | mining_model = pmml.MiningModel(functionName=ModelMode.REGRESSION.value) 54 | mining_model.append(self.mining_schema()) 55 | mining_model.append(self.output_transformation()) 56 | mining_model.append(self.segmentation()) 57 | if verification_data is not None: 58 | mining_model.append(self.model_verification(verification_data)) 59 | return mining_model 60 | 61 | def output_transformation(self): 62 | """ 63 | Build sigmoid output transformation: 64 | proba = 1 / (1 + exp(-(initial_estimate + weighted_sum(estimates)))) 65 | :return: Output element 66 | """ 67 | output = pmml.Output() 68 | 69 | # storing the raw prediction into internal::varname variable 70 | for f in self.context.schemas[Schema.INTERNAL]: 71 | output.append(pmml.OutputField(feature='predictedValue', name=Schema.INTERNAL.extract_feature_name(f))) 72 | 73 | # setting up a logistic transformation for the positive label 74 | positive_category = self.context.schemas[Schema.CATEGORIES][1] 75 | output_field = pmml.OutputField( 76 | dataType=positive_category.data_type.value, 77 | feature='transformedValue', 78 | name=Schema.CATEGORIES.extract_feature_name(positive_category), 79 | optype=positive_category.optype.value 80 | ) 81 | neg = pmml.Apply(function='*') 82 | neg.append(pmml.FieldRef(field=Schema.INTERNAL.extract_feature_name(positive_category.namespace))) 83 | neg.append(pmml.Constant( 84 | # there is no notion of weighted sum in segment aggregation, so we used weighted average, 85 | # and now the result should be multiplied by total weight 86 | -(1 + self.estimator.n_estimators * self.estimator.learning_rate), 87 | dataType=FeatureType.DOUBLE.value 88 | )) 89 | exp = pmml.Apply(function='exp') 90 | exp.append(neg) 91 | plus = pmml.Apply(function='+') 92 | plus.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value)) 93 | plus.append(exp) 94 | div = pmml.Apply(function='/') 95 | div.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value)) 96 | div.append(plus) 97 | output_field.append(div) 98 | output.append(output_field) 99 | 100 | # probability of negative label is 1 - positive_proba 101 | negative_category = self.context.schemas[Schema.CATEGORIES][0] 102 | output_field = pmml.OutputField( 103 | dataType=negative_category.data_type.value, 104 | feature='transformedValue', 105 | name=Schema.CATEGORIES.extract_feature_name(negative_category), 106 | optype=negative_category.optype.value 107 | ) 108 | subtract = pmml.Apply(function='-') 109 | subtract.append(pmml.Constant(1, dataType=FeatureType.DOUBLE.value)) 110 | subtract.append(pmml.FieldRef(field=Schema.CATEGORIES.extract_feature_name(positive_category))) 111 | output_field.append(subtract) 112 | output.append(output_field) 113 | 114 | # now we should define a label; we can look at the raw predicted output and compare it with 0 115 | label = self.context.schemas[Schema.OUTPUT][0] 116 | output_field = pmml.OutputField( 117 | feature='transformedValue', 118 | name=Schema.OUTPUT.extract_feature_name(label), 119 | optype=label.optype.value, 120 | dataType=label.data_type.value 121 | ) 122 | discretize = pmml.Discretize(field=Schema.INTERNAL.extract_feature_name(label)) 123 | discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[0]) 124 | discretize_bin.append(pmml.Interval(closure="openOpen", rightMargin=0)) 125 | discretize.append(discretize_bin) 126 | discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[1]) 127 | discretize_bin.append(pmml.Interval(closure="closedOpen", leftMargin=0)) 128 | discretize.append(discretize_bin) 129 | output_field.append(discretize) 130 | output.append(output_field) 131 | 132 | return output 133 | 134 | def segmentation(self): 135 | """ 136 | Build a segmentation (sequence of estimators) 137 | :return: Segmentation element 138 | """ 139 | # there is no notion of weighted sum, so we should take weighted average and multiply result by total weight 140 | # in output transformation 141 | segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage") 142 | 143 | # build the context for the nested regression models by replacing output categorical feature 144 | # with the continuous numeric feature 145 | regression_context = TransformationContext(schemas=dict(self.context.schemas)) 146 | regression_context.schemas[Schema.OUTPUT] = [RealNumericFeature( 147 | name=self.context.schemas[Schema.OUTPUT][0].name, 148 | namespace=Schema.NUMERIC.namespace 149 | )] 150 | 151 | # first, transform initial estimator 152 | init_segment = pmml.Segment(weight=1) 153 | init_segment.append(pmml.True_()) 154 | init_segment.append(find_converter(self.estimator.init_)(self.estimator.init_, regression_context).model()) 155 | segmentation.append(init_segment) 156 | 157 | for est in self.estimator.estimators_[:, 0]: 158 | s = pmml.Segment(weight=self.estimator.learning_rate) 159 | s.append(pmml.True_()) 160 | s.append(DecisionTreeConverter(est, regression_context, ModelMode.REGRESSION)._model()) 161 | segmentation.append(s) 162 | 163 | return segmentation 164 | 165 | 166 | estimator_to_converter[GradientBoostingClassifier] = GradientBoostingConverter 167 | estimator_to_converter[LogOddsEstimator] = LogOddsEstimatorConverter -------------------------------------------------------------------------------- /examples/pmml/GradientBoostingClassifier.pmml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 1.0 31 | 32 | 1.0 33 | 34 | 35 | -1.2 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 1 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /sklearn_pmml/test/data/gradient_boosting_classifier/document.pmml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | zero0 18 | one1 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 1.0 34 | 35 | 1.0 36 | 37 | 38 | -2.0 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | -------------------------------------------------------------------------------- /sklearn_pmml/convert/model.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from enum import Enum 3 | from sklearn.base import ClassifierMixin, RegressorMixin, BaseEstimator 4 | from sklearn_pmml import pmml 5 | from sklearn_pmml.convert.utils import pmml_row, assert_equal 6 | from sklearn_pmml.convert.features import * 7 | from pyxb.utils.domutils import BindingDOMSupport as bds 8 | import numpy as np 9 | 10 | 11 | class TransformationContext(object): 12 | """ 13 | Context holder object 14 | """ 15 | 16 | def __init__(self, schemas=None): 17 | """ 18 | :type schemas: dict[Schema, list[Feature]] | None 19 | """ 20 | if schemas is None: 21 | schemas = {} 22 | self.schemas = schemas 23 | 24 | 25 | class ModelMode(Enum): 26 | CLASSIFICATION = 'classification' 27 | REGRESSION = 'regression' 28 | 29 | 30 | class Schema(Enum): 31 | INPUT = ('input', True, True) 32 | """ 33 | Schema used to define input variables. Short names allowed 34 | """ 35 | 36 | OUTPUT = ('output', True, True) 37 | """ 38 | Schema used to define output variables. Short names allowed. For the categorical variables the continuous 39 | probability variables will be automatically created as . 40 | """ 41 | 42 | DERIVED = ('derived', False, False) 43 | """ 44 | Schema used to define derived features. Short names not allowed due to potential overlap with input variables. 45 | """ 46 | 47 | NUMERIC = ('numeric', False, False) 48 | """ 49 | Schema used to encode categorical features as numbers. Short names not allowed due to their overlap with 50 | input variables 51 | """ 52 | 53 | MODEL = ('model', True, False) 54 | """ 55 | Schema used to define features fed into the sklearn estimator. 56 | Short names allowed because these variables are not going into PMML. 57 | """ 58 | 59 | INTERNAL = ('internal', False, True) 60 | """ 61 | This schema may be used by complex converters to hide the variables used for internal needs 62 | (e.g. the raw predictions of GBRT) 63 | """ 64 | 65 | CATEGORIES = ('categories', True, False) 66 | """ 67 | This schema is used to extend categorical outputs with probabilities of categories 68 | """ 69 | 70 | def __init__(self, name, short_names_allowed, data_dict_elibigle): 71 | self._name = name 72 | self._short_names_allowed = short_names_allowed 73 | self._data_dict_elibigle = data_dict_elibigle 74 | 75 | @property 76 | def namespace(self): 77 | """ 78 | The namespace corresponding to the schema 79 | """ 80 | return self._name 81 | 82 | @property 83 | def short_names_allowed(self): 84 | """ 85 | The schema allows usage of short names instead of fully-qualified names 86 | """ 87 | return self._short_names_allowed 88 | 89 | @property 90 | def eligible_for_data_dictionary(self): 91 | """ 92 | The variables defined in the schema should appear in the DataDictionary 93 | """ 94 | return self._data_dict_elibigle 95 | 96 | def extract_feature_name(self, f): 97 | """ 98 | Extract the printed name of the feature. 99 | :param f: feature to work with 100 | :type f: Feature|str 101 | """ 102 | if self.short_names_allowed: 103 | if isinstance(f, str): 104 | return f 105 | else: 106 | return f.full_name 107 | else: 108 | return "{}.{}".format(self.namespace, f if isinstance(f, str) else f.full_name) 109 | 110 | 111 | class EstimatorConverter(object): 112 | """ 113 | A new base class for the estimator converters 114 | """ 115 | EPSILON = 0.00001 116 | SCHEMAS_IN_MINING_MODEL = {Schema.INPUT, Schema.INTERNAL} 117 | 118 | def __init__(self, estimator, context, mode): 119 | self.model_function = mode 120 | self.estimator = estimator 121 | self.context = context 122 | 123 | assert not any(isinstance(_, DerivedFeature) for _ in context.schemas[Schema.INPUT]), \ 124 | 'Input schema represents the input fields only' 125 | assert all(isinstance(_, DerivedFeature) for _ in context.schemas[Schema.DERIVED]), \ 126 | 'Derived schema represents the set of automatically generated fields' 127 | assert not any(isinstance(_, DerivedFeature) for _ in context.schemas[Schema.OUTPUT]), \ 128 | 'Only regular features allowed in output schema; use Output transformation if you want to transform values' 129 | 130 | # create a new schema for categories probabilities 131 | categories = [] 132 | for feature in context.schemas[Schema.OUTPUT]: 133 | if isinstance(feature, CategoricalFeature): 134 | for value in feature.value_list: 135 | categories.append(RealNumericFeature( 136 | name=value, 137 | namespace=feature.name 138 | )) 139 | context.schemas[Schema.CATEGORIES] = categories 140 | 141 | def data_dictionary(self): 142 | """ 143 | Build a data dictionary and return a DataDictionary element. 144 | 145 | DataDictionary contains feature types for all variables used in the PMML, 146 | except the ones defined as Derived Features 147 | """ 148 | dd = pmml.DataDictionary() 149 | for schema, fields in sorted(self.context.schemas.items(), key=lambda x: x[0].name): 150 | assert isinstance(schema, Schema) 151 | if schema.eligible_for_data_dictionary: 152 | for f in fields: 153 | data_field = pmml.DataField( 154 | dataType=f.data_type.value, 155 | name=schema.extract_feature_name(f), 156 | optype=f.optype.value) 157 | dd.DataField.append(data_field) 158 | if isinstance(f, CategoricalFeature): 159 | for v in f.value_list: 160 | data_field.append(pmml.Value(value_=v)) 161 | return dd 162 | 163 | def output(self): 164 | """ 165 | Output section of PMML contains all model outputs. 166 | :return: pmml.Output 167 | """ 168 | output = pmml.Output() 169 | 170 | # the response variables 171 | for feature in self.context.schemas[Schema.OUTPUT]: 172 | output_field = pmml.OutputField( 173 | name=Schema.OUTPUT.extract_feature_name(feature), 174 | feature='predictedValue' 175 | ) 176 | output.append(output_field) 177 | 178 | return output 179 | 180 | def transformation_dictionary(self): 181 | """ 182 | Build a transformation dictionary and return a TransformationDictionary element 183 | """ 184 | td = pmml.TransformationDictionary() 185 | # define a schema with all variables available for a model 186 | encoded_schema = [] 187 | self.context.schemas[Schema.NUMERIC] = encoded_schema 188 | idx = {} 189 | 190 | # First, populate transformation dictionary for _all_ derived fields, because they can be requested later 191 | for f in self.context.schemas[Schema.DERIVED]: 192 | ef = RealNumericFeature(name=f.name) 193 | df = pmml.DerivedField( 194 | name=ef.full_name, 195 | optype=ef.optype.value, 196 | dataType=ef.data_type.value 197 | ) 198 | df.append(f.transformation) 199 | td.append(df) 200 | assert f.name not in idx, 'Duplicate field definition: {}'.format(f.name) 201 | idx[f.name] = ef 202 | 203 | # second, define the numeric transformations for the categorical variables 204 | for f in self.context.schemas[Schema.INPUT]: 205 | assert f.name not in idx, 'Duplicate field definition: {}'.format(f.name) 206 | if isinstance(f, CategoricalFeature): 207 | ef = RealNumericFeature(name=f.name, namespace=Schema.NUMERIC.namespace) 208 | # create a record in transformation dictionary with mapping from raw values into numbers 209 | df = pmml.DerivedField( 210 | name=ef.full_name, 211 | optype=ef.optype.value, 212 | dataType=ef.data_type.value 213 | ) 214 | mv = pmml.MapValues(outputColumn='output', dataType=ef.data_type.value) 215 | mv.append(pmml.FieldColumnPair(field=f.full_name, column='input')) 216 | it = pmml.InlineTable() 217 | for i, v in enumerate(f.value_list): 218 | it.append(pmml_row(input=v, output=i)) 219 | td.append(df.append(mv.append(it))) 220 | idx[f.name] = ef 221 | else: 222 | idx[f.name] = f 223 | 224 | # now we can build a mirror of model schema into the numeric schema 225 | self.context.schemas[Schema.NUMERIC] = [idx[f.name] for f in self.context.schemas[Schema.MODEL]] 226 | 227 | return td 228 | 229 | def model(self, verification_data=None): 230 | """ 231 | Build a mining model and return one of the MODEL-ELEMENTs 232 | """ 233 | pass 234 | 235 | def model_verification(self, verification_data): 236 | """ 237 | Use the input verification_data, apply the transformations, evaluate the model response and produce the 238 | ModelVerification element 239 | :param verification_data: list of dictionaries or data frame 240 | :type verification_data: dict[str, object]|pd.DataFrame 241 | :return: ModelVerification element 242 | """ 243 | verification_data = pd.DataFrame(verification_data) 244 | assert len(verification_data) > 0, 'Verification data can not be empty' 245 | 246 | verification_input = pd.DataFrame(index=verification_data.index) 247 | verification_model_input = pd.DataFrame(index=verification_data.index) 248 | for key in self.context.schemas[Schema.INPUT]: 249 | # all input features MUST be present in the verification_data 250 | assert key.full_name in verification_data.columns, 'Missing input field "{}"'.format(key.full_name) 251 | verification_input[Schema.INPUT.extract_feature_name(key)] = verification_data[key.full_name] 252 | if isinstance(key, CategoricalFeature): 253 | verification_model_input[Schema.INPUT.extract_feature_name(key)] = np.vectorize(key.to_number)(verification_data[key.full_name]) 254 | else: 255 | verification_model_input[Schema.INPUT.extract_feature_name(key)] = verification_data[key.full_name] 256 | 257 | for key in self.context.schemas[Schema.DERIVED]: 258 | assert isinstance(key, DerivedFeature), 'Only DerivedFeatures are allowed in the DERIVED schema' 259 | verification_model_input[key.full_name] = key.apply(verification_input) 260 | 261 | # at this point we can check that MODEL schema contains only known features 262 | for key in self.context.schemas[Schema.MODEL]: 263 | assert Schema.MODEL.extract_feature_name(key) in verification_model_input.columns, \ 264 | 'Unknown feature "{}" in the MODEL schema'.format(key.full_name) 265 | 266 | # TODO: we can actually support multiple columns, but need to figure out the way to extract the data 267 | # TODO: from the estimator properly 268 | # building model results 269 | assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one output is currently supported' 270 | key = self.context.schemas[Schema.OUTPUT][0] 271 | model_input = verification_model_input[list(map(Schema.MODEL.extract_feature_name, self.context.schemas[Schema.MODEL]))].values 272 | model_results = np.vectorize(key.from_number)(self.estimator.predict(X=model_input)) 273 | if key.full_name in verification_data: 274 | # make sure that if results are provided, the expected and actual values are equal 275 | assert_equal(key, model_results, verification_data[key.full_name].values) 276 | verification_input[Schema.OUTPUT.extract_feature_name(key)] = model_results 277 | 278 | if isinstance(key, CategoricalFeature): 279 | probabilities = self.estimator.predict_proba(X=model_input) 280 | for i, key in enumerate(self.context.schemas[Schema.CATEGORIES]): 281 | verification_input[Schema.CATEGORIES.extract_feature_name(key)] = probabilities[:, i] 282 | 283 | fields = [] 284 | field_names = [] 285 | for s in [Schema.INPUT, Schema.OUTPUT, Schema.CATEGORIES]: 286 | fields += self.context.schemas[s] 287 | field_names += list(map(s.extract_feature_name, self.context.schemas[s])) 288 | 289 | mv = pmml.ModelVerification(recordCount=len(verification_input), fieldCount=len(fields)) 290 | 291 | # step one: build verification schema 292 | verification_fields = pmml.VerificationFields() 293 | for key in fields: 294 | if isinstance(key, NumericFeature): 295 | vf = pmml.VerificationField(field=key.name, column=key.name, precision=self.EPSILON) 296 | else: 297 | vf = pmml.VerificationField(field=key.name, column=key.name) 298 | verification_fields.append(vf) 299 | mv.append(verification_fields) 300 | 301 | # step two: build data table 302 | it = pmml.InlineTable() 303 | for data in verification_input.iterrows(): 304 | data = data[1] 305 | row = pmml.row() 306 | row_empty = True 307 | for key in field_names: 308 | if verification_input[key].dtype == object or not np.isnan(data[key]): 309 | col = bds().createChildElement(key) 310 | bds().appendTextChild(data[key], col) 311 | row.append(col) 312 | row_empty = False 313 | if not row_empty: 314 | it.append(row) 315 | mv.append(it) 316 | 317 | return mv 318 | 319 | def mining_schema(self): 320 | """ 321 | Mining schema contains the model input features. 322 | NOTE: In order to avoid duplicates, I've decided to remove output features from MiningSchema 323 | NOTE: We don't need to specify any DERIVED/NUMERIC fields here, because PMML interpreter will create them 324 | in a lazy manner. 325 | """ 326 | ms = pmml.MiningSchema() 327 | 328 | if Schema.INPUT in self.SCHEMAS_IN_MINING_MODEL: 329 | for f in sorted(self.context.schemas[Schema.INPUT], key=lambda _: _.full_name): 330 | ms.append(pmml.MiningField(invalidValueTreatment=f.invalid_value_treatment.value, name=f.full_name)) 331 | 332 | for s in [Schema.OUTPUT, Schema.INTERNAL]: 333 | if s in self.SCHEMAS_IN_MINING_MODEL: 334 | for f in self.context.schemas.get(s, []): 335 | ms.append(pmml.MiningField( 336 | name=s.extract_feature_name(f), 337 | usageType="predicted" 338 | )) 339 | 340 | return ms 341 | 342 | def header(self): 343 | """ 344 | Build and return Header element 345 | """ 346 | return pmml.Header() 347 | 348 | def pmml(self, verification_data=None): 349 | """ 350 | Build PMML from the context and estimator. 351 | Returns PMML element 352 | """ 353 | p = pmml.PMML(version="4.2") 354 | p.append(self.header()) 355 | p.append(self.data_dictionary()) 356 | p.append(self.transformation_dictionary()) 357 | p.append(self.model(verification_data)) 358 | return p 359 | 360 | 361 | class ClassifierConverter(EstimatorConverter): 362 | """ 363 | Base class for classifier converters. 364 | It is required that the output schema contains only categorical features. 365 | The serializer will output result labels as output::feature_name and probabilities for each value of result feature 366 | as output::feature_name::feature_value. 367 | """ 368 | def __init__(self, estimator, context): 369 | """ 370 | :param estimator: Estimator to convert 371 | :type estimator: BaseEstimator 372 | :param context: context to work with 373 | :type context: TransformationContext 374 | """ 375 | super(ClassifierConverter, self).__init__(estimator, context, ModelMode.CLASSIFICATION) 376 | assert isinstance(estimator, ClassifierMixin), 'Classifier converter should only be applied to the classification models' 377 | for f in context.schemas[Schema.OUTPUT]: 378 | assert isinstance(f, CategoricalFeature), 'Only categorical outputs are supported for classification task' 379 | 380 | # create hidden variables for each categorical output 381 | internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT])) 382 | self.context.schemas[Schema.INTERNAL] = internal_schema 383 | 384 | def output(self): 385 | """ 386 | Output section of PMML contains all model outputs. 387 | Classification tree output contains output variable as a label, 388 | and . as a probability of a value for a variable 389 | :return: pmml.Output 390 | """ 391 | output = pmml.Output() 392 | 393 | # the response variables 394 | for feature in self.context.schemas[Schema.OUTPUT]: 395 | output_field = pmml.OutputField( 396 | name=Schema.OUTPUT.extract_feature_name(feature), 397 | feature='predictedValue', 398 | optype=feature.optype.value, 399 | dataType=feature.data_type.value 400 | ) 401 | output.append(output_field) 402 | 403 | # the probabilities for categories; should only be populated for classification jobs 404 | for feature in self.context.schemas[Schema.CATEGORIES]: 405 | output_field = pmml.OutputField( 406 | name=Schema.CATEGORIES.extract_feature_name(feature), 407 | optype=feature.optype.value, 408 | dataType=feature.data_type.value, 409 | feature='probability', 410 | targetField=Schema.INTERNAL.extract_feature_name(feature.namespace), 411 | value_=feature.name 412 | ) 413 | output.append(output_field) 414 | 415 | return output 416 | 417 | 418 | class RegressionConverter(EstimatorConverter): 419 | def __init__(self, estimator, context): 420 | super(RegressionConverter, self).__init__(estimator, context, ModelMode.REGRESSION) -------------------------------------------------------------------------------- /examples/pmml/RandomForestClassifier.pmml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | -------------------------------------------------------------------------------- /sklearn_pmml/test/data/gradient_boosting_classifier/estimator.pkl: -------------------------------------------------------------------------------- 1 | ccopy_reg 2 | _reconstructor 3 | p1 4 | (csklearn.ensemble.gradient_boosting 5 | GradientBoostingClassifier 6 | p2 7 | c__builtin__ 8 | object 9 | p3 10 | NtRp4 11 | (dp5 12 | S'verbose' 13 | p6 14 | I0 15 | sS'classes_' 16 | p7 17 | cnumpy.core.multiarray 18 | _reconstruct 19 | p8 20 | (cnumpy 21 | ndarray 22 | p9 23 | (I0 24 | tS'b' 25 | tRp10 26 | (I1 27 | (I2 28 | tcnumpy 29 | dtype 30 | p11 31 | (S'i8' 32 | I0 33 | I1 34 | tRp12 35 | (I3 36 | S'<' 37 | NNNI-1 38 | I-1 39 | I0 40 | tbI00 41 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00' 42 | tbsS'min_samples_leaf' 43 | p13 44 | I1 45 | sS'max_features' 46 | p14 47 | NsS'n_classes_' 48 | p15 49 | I2 50 | sS'init' 51 | p16 52 | NsS'random_state' 53 | p17 54 | NsS'loss_' 55 | p18 56 | g1 57 | (csklearn.ensemble.gradient_boosting 58 | BinomialDeviance 59 | p19 60 | g3 61 | NtRp20 62 | (dp21 63 | S'K' 64 | I1 65 | sbsS'train_score_' 66 | p22 67 | g8 68 | (g9 69 | (I0 70 | tS'b' 71 | tRp23 72 | (I1 73 | (I10 74 | tg11 75 | (S'f8' 76 | I0 77 | I1 78 | tRp24 79 | (I3 80 | S'<' 81 | NNNI-1 82 | I-1 83 | I0 84 | tbI00 85 | S'A\xbf\xa9\xd2O\xec\xed?\xf0\t\x85=\x1c\x8a\xe9?-\xc1o\n\xce\x1b\xe6?\xfe\x7f8}\x13P\xe3?2\x81V`h\xfa\xe0?\x83\xed\xa1/3\xfe\xdd?\xaf0\xe5;\xde\x96\xda?] /\xe5\x05\xa4\xd7?(\xc4j\xcd\xa8\x11\xd5?\x1d\x1e\x84\x16\xb9\xa2#*\x9c\xc6=|\xe2\xad+}y\xac\xba\x9e\xa1t\x91\xa5\xa3\xc2\xa7\x93\x99-X\xb4\xf1\xff\x91\xd5\xa0\x1b\xe3\x8d\xc1:\xae9R\x18\x1a\xf65T\xac\xcc\xb5wpz\na\xe8fml\x16\x9b\xea\xfdRe\x15\xca\xf5\xd2\x88\xfeC}g\xf3\xd2F2<\x8e\xfcz\xfa\xde\xb5\xb0l\xf0\xc4\xfb\x8bf\xea\xa3\xbd\xcc\x16\t%\xfb\x03/2\xd5\xd2w\x04\xa8\x83@\x89b\x12\x85\xf9\xae\xdb\xeb\xb4\xe6?\xc7\x1d\x06s\xb5\x06X\xec{\xc3H\x1c\x0b\x88I\xa5Q*\x8e\xe2\xb5\x85M\x0f\xfd%\xa4\x9f\xf6\xac\x0fn1\xfa\xee\x92\x83\xca\x7f\xdd\x18&`\x08?%\xb7\xda\xf4\xf1Hby\xb6\xf3\xad\x18\x98\xc9Y"\x1d\x86\xc6\xf5\xd8r\xcf\xcc\xdf\xe2\xc6^\xca\xab!\xcd\x9d\x05Nk}\xab^\xfdg=\x9b\x84\xb0\\\xb4\xba\xa9\x05\x7f\xde\x98n\xe0\xe2M\xa98\x9e\xb4\x93S\x89\xd91\xca\x1c]]\x91\xa4|\x9cR\x90\xc1\xd1\x02*\x192\x1d\x81x\x8dw\x01S(\x81Q\xeb\xa5\xfc\t\x04\xd4\xd4\xcc\xf2\x98\xed]\xf0\x8bCZ\x99\xd4~T1N\x18\\\xd1\x0b\xb3\xdd\x8b\xa0\x9a\x84\xc8~\xe60O9Muh\xea:\xc0\xed1\xf7r\xfb\xfe\xbf\xaf\xbb\xd2\x7f\xfc\xc3\xa8V\xbe\xea\xd0\x13\x9cO\xe0\x1b\xa2\xe6\x15d$j\xd7\x86\x9cY\xe1$\x86\x84\xa0\x00\xc3\xe9\x88\n\xcd\x9d\x84z/\xa7)\x11\xfd\n\xa2*\xe3\x0fF\x9a_.\x9b\x98\xe7<{\xacf\xf2\x87p\xaajy\xf1\x16\xda\xeb\x1b\x1fUBT\x07\x9b\xc210oz\xe1\x98O\x8c]z\xbet\x98W\x88\xa95m\r\xe1W\x87\xc7\x92\x05\xa3Y\xd4\xe4\x99\x90\xdb\xcb\xa3l\xdd\t\xda\x15\x87\t\x97g\xa7z,w:\xdc\x1e\xceC\xe6\xb6-\xa5\xf5p\xc6u\xd6(\xb5_\x18r\x9b$0w\xb8-\x88\xb6\x00\xfc\xf2\xb0\xff\xd5\xdcv\xc8\xae\xdfq\xf1\x84RC\x94\x07\\\x83\xcd\x8a~,\x0e\xc8Y\x17\xac\xbf*\xe4\xd3\x10\x9a\xef\xe9\xfb\x18\xfe]\x06\x9f\xe4\x11\xebR\xf9\xf0\xf3Ic\xe4\xbc\xbak[\xba\x12\x84\xa8\x97M\xe0u\x88\x99od\xac\x12\xc0\x9c\xc5\xd4\xf99\x84:h\x1d\xff\xb1\xf4\x8f\xa7>/\xc9S\xe7\xd0\xe8\xe5@\x03z\xe9\xcb\xa9+N\x9e]\xcf\xa8\x99\xe4\xf2\x85\xec\xe8\xca\xd6S:\xcez\xeeH]\x85n\xb2\x90\t\xe4\x11\xbd\xea8X\xd1R\xb9dR\xb4e\x9c\tn\\\x14?\xe5\xae\x84\xc4\xd4p\xa0\xf1j\xf1p\xf7\x04\xf5!\xf4s\x15\x89\x8d\x1f\xd3p\x8a\x96#\xc5\x0e@2U\xc9x\xe2\x0fa\x12{s\x0fC3\x11H\xe1\xa2"\xb6\x15\xce\x89\xb1)\xda\x06\x16\x1cC\xe4\x96\x11\xfb\x18\x97\xf0\xdf\x81\xea\xe2\x9c\xa4 \x8d\x1b\x93!\x9f\t\x1fau\x08\xe9\xd4\x02\x87\x83\x1a4vud\x00\x85\x07y\xfe\x7fu\x90v$:\x05\xc7\x82L Ne\xa0g\xf1\xe4W+(M\xb9\x10\xcc7}\xb0\x7f\xf4<{g\xfeRT\xdav\xe1B\xc5F)M\xa8\xeb#mGr;\x1cu5\xdf\xe9ea\\\x98}\xeb\x15\t\xf1\x1c\xfe}\xb4\x02`\xf6\xa6\xa6&Mef\xdb\xa1\xa0\xcd\xd68I\xe2\xaf2\xc7\xc5\xa7k\r<\xd6\xd2\xa3\x1ci\xf2&gI\xfb\xc9\xec\xb7\xfaP\x88\x1b>* \xdaw\x8f]\x8fK@\xe2\x9f9\x01\x1b\xd8\xf0\xfe\x91\x0bT\xcc\x0fT\x01\x81\xd1\x00G\xe1\x9e\xc8\xc0\x11\x97\xa6\xd1au\x8e\\\xf8\xff9!\'X\x0e>\xc7n\x1cR\x91\xb4\xc7v\xac\xf5\xd2\xe4\xde\xae0\xf5M\xbfd\xb1p\xb9\xfeV2\xfe\xff\xd3\xed\x91\x1d\xe2\xc2R\x1c\xf9,w$\x13\xc80\x0b\xbd\xbd\xc3i\xd0{\x8cj@\x03\xa0\x95w\xe3T\x0cb\xc5F\xedgN\xe5\x97U|v\xde\xec\x06p3\xa4\x00\xcc\xfd\x83\x9bT\x1f\xe4D\xf7\x85\x94S\x97\xc2\xf5:\xdc"\xf0\xed%\x93\x98oA(\x07D\xe1\x9b\xa5\xfa\x14\xbb\x96\x15\x975\xc8\x04\xcag\xe5?\xe0\xe4\xe2\x10\xdf\x87(o\xe7\xd9\xf1(\x00\x8d\xd4\x17v\xb0\xb2}\xf2c\xf1# \x9e\xd9\xb8\x8f[\xf0!j\x85\\\xbbz|=:\xac\xe2\x92\xfcB\xe8\x0f1 \x06\x9e\xab\xce\xf8(g\xc8\xcf^\xe1\xf1h\xc0\x8b\x05\xbc\xb7o\x19|O!1}GP\xcdx+\x89~\xe5\x00\x12\xc7\xd6\x8a\\\xfc)\xd7\x81\xdeIGJ\xb0d\xb3l\xb9\xf2\\\x1b\x98-\xcf"M\xf5\xd6\xec\x83\xefa\xa2~\xfd\x95\xf7\x02j\xbf\xee\xacY\xd8\x9c\x94\xa3\xcc\xb8\x97Dq\x1c\x069=\t\xe7\xccW>\xf4\x12\xf8@\xd1\x18\xb9d\'\xb9P;y\xb9\x18u_\x92\x85\xc49v/\xdd%\xbf]\x9b\n\x1f\x82\x0b\xb6\xc5\x82\xb1\xf5\x13Q%\xa6\x02\xc99c\xbb\xf9J\xad;\x89\x81X?\r\x04]\n\xba\xfe\xacQ-\xcf\x8e\xe4\x18\xe7\x1a43|\xc7\xf6\xb40\xce\xaf\xa4\x91\xcf\xa5\x17\xa4\x00\xb4\xcf|\xf4KI\x10\xc5\x80\xbb\xeeB\xddB6\x15RB_d\xc5\xa6:\x0e\xf9\xe9\x0b\xea\xce\x0f\x8f\xbeI\x86\xf7\xfe\xfbzk\x88Kvv\xfa8yZ\x1fx{u\xef\xea\xbb[Y\xa0\x85\x96\x9fg\xb8\xc2\x84E\xcf\xedvAA\xbfd\x805\x19\xae\x93\x87\x1c$o#\'\xb7\x1a\xf4\x84,a6\x84\xa1*\x16\x06I\x9c\xdd5\'\xea\xd1\x9c\xf2\x14\x1e\xa2\x08O\xf6\xa4\xddX^M\xbc\xd29C\x05\x83\x81\x81\xba\xe1\xca~\x18\xa5F\ng\x1b\x94\xe8\x8eu\xa9\xf3\xf5\xc1\xbc\xcb,\x84\xff\xe6\xfb\xe6\xb6b\\,\x18o\xb6\x96\xd0\xfc\xde\x0e2\xdb\x96\x90\x88\xa2\x9c\x9ce%\x8a\x8bl\xc9E\xd3\xee>\x9f,\xbf\xbeD\x1c\x0c@\xe0\xe5C\xfb\xf3K\\\xdea\xc5\xd6\xae)\xd2{+\xbd9\xff7k\xc8\xa1\xaf\xce,\xd0>\xfaj}\xf8\x91\xe3/\xcc\x1d;N\xbc9\xb3@\x9a\x7f1\xff\xed\xf9\x80\x8f\xb4a\xc9;\xf7\x10\xc6\x84\xa9\x17)\x954\xd32^-\x0e~E\xb9\xea\xdd\xf0t\x01GQ\xae\xaf\xc2\x01\xe8K7U\xdf*\xff\x9c\n\xd5$\xd0\xfe=9\x14n\xe5\xc6x\xba\xc1]\xdcZ\xcbB\xc8\xb7\xb4P \xc7\xaeC0\xc9\xaeu:\xe2w\x9a\xc4\xde\x93\x1b>3\xbf\x83\xbd\x8f\x1b\x977r\xc3E\x07\x8b\xfe!Lq?\x02\x14\xf1pv+\xff*\x13\x1bJ\x15\x93\x14\x88\xa8\x81\x94<\x9buxt\x0e\xdb\xb3u(>\xe1\xd7NVl\xc5\xa3s\xee\x1b\xfd9\x1a\xae\xf9\xb2x\xf4G\x1c\xd3m:b]*2\xe0\xe8\x81W\x17\xe1\x84\xf4<6\xfb"\x83\xa9\x19\xb9\xd0\xcb\x15a~\x9dT\xb2`\xca\xf6\xa6b\x12\x0f~\xf7T\x03CD\xb8>\xc1\xa8O\x8b@zk\x1f\xd8\xf5g\xab\xa6\x9e\xc8tb\xf9\xacK\xcc\xbcxMy\x1f\xe6\t=\\\xf7\xe7\xfa\x00\x85\xa1\x96\x14\xa5\xbf\x0c\xff0\xe7\x95c\x02\x0c\x92\xfb\x11\x88\x13\xdf\rM\xcd\xf3Z\x90\xe7\x89&\x8b\xa9jR\xc5\x86\x8d\x98\x0e\xea$\x0b2\x1a\xc80\xde\xe1\xb2[LH\x8de\xdc\xfd\x84\xb0B\xa5\x97_\r\x97\xf2\x96\xda\x90\x9a\x1f\xa76n1\xe15\xa5T\x8b\xc9\xe0:7\x87\xd5Y\x96\xd7\xa1\xb4\xe1K\xf9cHd\xce[\x0bP{\x18\x92\x9a\r\xf5\xd2\xb4\xcc\x91!\xb2i<\xc2l\xafmkH(mX\x0f\x1f\xc4\x9e\xf9\xf7\x82\x02\xd2\xc9\x9aHX3\xb8\xb8\xffy\xac"\r\x94\x90\x86\xf64\x9b\x7f\x1eXW?\xd1\xa2\x0f\xb0S\x9fdL\xdd\x05\xa5\xae~\xe6\xda\xe8\x01\x8fZ\xad\xd6\xe2\xef\xa8\xcc\xd7\xc1"2\xb6C\xf9_2 \xb1\xe9\xfa&\xaa@<\xcf\x85\xbb\x11;\xe8\xda\x87&rD\xdf\xd8r\x93\xa2\x950\xb5\xfc\xb9\xcbE\x87XQ"\x81Z\xceR\xa0\xe84\x14\xaf\xbd/\x8c%\x8bg\xf1\xbfLR\xa5M\x04o\xdd\x05Y\xb4\xe9\xe3\xa6\x85!\x89\r\xc8\x01\x0e\x90\xc9`\xee\x98Y\x15]\x80\x9dl\'\x1d\x1bJ\xa1\xa4\x08-\xaa\xad&\t\xb9^F;r\x87\xd7O\x98\xba3\xbd\xa4\x11\xd5\xa5\x02\xee\xdc\n\x86\xebKm\xe6\xa5J\xcf\xbch\x19\xa8\xa1\t\x15\x81(j\xbc\xf3v\x05\t\xef\x912\xd4I\xc8\x88%\xddH",\x86\x19\x17LG/\xfa\xab\xbc\xe9\x01\xb0\xe9\xff}\x1bk\xbc\x07\xe6}\xcc\xe93\xc8\xafbc#\x89\xf5\x9b\xb5\xdcb\xef\xb0\xa4:I\xe5\xf4\xab\x8b\xd4\xb4\x83(\x92\xba\xc1\x12\x04\\h\x0eU\xd0E\xb5\x82`\x01|\x04\x83\x86?\x7f\xc7\xe3\xbbn\xa5j8\xdf\x19\x1b\r\x9b\xd7\xe0\x1d\x02\x1c\xc2]!Xb\xc6|JJ\x018{\xfb\xad\x86\xb4\xe4J\x9bH\xbb\xb3z\x03\x10\xa8\xb1\xf4\x1b`\xbc\xa1\xe5C\xa3ED1\x0fKT}bm\xa6\x1e\x16q\xec\xfdBF\xc0v\xf9>f\x843\x9c,H\xa3\xe4.E\x82\xb5NZE~(\x1f\xd0\x04\xe0c;\xec\x8csG\xf4\x07\xbc\xac\xf0\xf1s,$\xd3\x06\xf6\x01z\x93\xfcSc\xf9K\x01\xc1g\xbb\x1f\x7fd_N\x00\xe6\xfbb\x06\x15 \xe7"?\x16Y#\xf8\x06' 155 | tbI10 156 | I0 157 | F0 158 | tbtRp42 159 | (dbsS'tree_' 160 | p43 161 | csklearn.tree._tree 162 | Tree 163 | p44 164 | (I2 165 | g8 166 | (g9 167 | (I0 168 | tS'b' 169 | tRp45 170 | (I1 171 | (I1 172 | tg12 173 | I00 174 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 175 | tbI1 176 | tRp46 177 | (dp47 178 | S'node_count' 179 | p48 180 | I5 181 | sS'values' 182 | p49 183 | g8 184 | (g9 185 | (I0 186 | tS'b' 187 | tRp50 188 | (I1 189 | (I5 190 | I1 191 | I1 192 | tg24 193 | I00 194 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\xc0UUUUUU\xf5?UUUUUU\xf5?' 195 | tbsS'nodes' 196 | p51 197 | g8 198 | (g9 199 | (I0 200 | tS'b' 201 | tRp52 202 | (I1 203 | (I5 204 | tg11 205 | (S'V56' 206 | I0 207 | I1 208 | tRp53 209 | (I3 210 | S'|' 211 | N(S'left_child' 212 | p54 213 | S'right_child' 214 | p55 215 | S'feature' 216 | p56 217 | S'threshold' 218 | p57 219 | S'impurity' 220 | p58 221 | S'n_node_samples' 222 | p59 223 | S'weighted_n_node_samples' 224 | p60 225 | tp61 226 | (dp62 227 | g60 228 | (g24 229 | I48 230 | tp63 231 | sg58 232 | (g24 233 | I32 234 | tp64 235 | sg55 236 | (g12 237 | I8 238 | tp65 239 | sg56 240 | (g12 241 | I16 242 | tp66 243 | sg57 244 | (g24 245 | I24 246 | tp67 247 | sg54 248 | (g12 249 | I0 250 | tp68 251 | sg59 252 | (g12 253 | I40 254 | tp69 255 | sI56 256 | I1 257 | I16 258 | tbI00 259 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x00\x00\x00\x00\x00\x00\xc8?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x00\x00\x00\x00\x00\x00\xd0?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 260 | tbsbsS'n_features_' 261 | p70 262 | I2 263 | sS'n_outputs_' 264 | p71 265 | I1 266 | sg15 267 | cnumpy.core.multiarray 268 | scalar 269 | p72 270 | (g12 271 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 272 | tRp73 273 | sS'max_leaf_nodes' 274 | p74 275 | Nsg7 276 | NsS'max_features_' 277 | p75 278 | I2 279 | sg13 280 | I1 281 | sg14 282 | Nsg17 283 | g38 284 | sS'criterion' 285 | p76 286 | g36 287 | sS'min_samples_split' 288 | p77 289 | I2 290 | sg25 291 | I2 292 | sbag1 293 | (g30 294 | g3 295 | NtRp78 296 | (dp79 297 | g33 298 | g42 299 | sg43 300 | g44 301 | (I2 302 | g8 303 | (g9 304 | (I0 305 | tS'b' 306 | tRp80 307 | (I1 308 | (I1 309 | tg12 310 | I00 311 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 312 | tbI1 313 | tRp81 314 | (dp82 315 | g48 316 | I5 317 | sg49 318 | g8 319 | (g9 320 | (I0 321 | tS'b' 322 | tRp83 323 | (I1 324 | (I5 325 | I1 326 | I1 327 | tg24 328 | I00 329 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb3\xd4\xd5Dr\x16\x08\xc0\xdb\xfa\xfd>\xe7\xaa\xf4?\xdb\xfa\xfd>\xe7\xaa\xf4?' 330 | tbsg51 331 | g8 332 | (g9 333 | (I0 334 | tS'b' 335 | tRp84 336 | (I1 337 | (I5 338 | tg53 339 | I00 340 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?y\xb3\x89Rq+\xc3?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xa2Db\xc3A\x8f\xc9?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00p<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00`\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 341 | tbsbsg70 342 | I2 343 | sg71 344 | I1 345 | sg15 346 | g72 347 | (g12 348 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 349 | tRp85 350 | sg74 351 | Nsg7 352 | Nsg75 353 | I2 354 | sg13 355 | I1 356 | sg14 357 | Nsg17 358 | g38 359 | sg76 360 | g36 361 | sg77 362 | I2 363 | sg25 364 | I2 365 | sbag1 366 | (g30 367 | g3 368 | NtRp86 369 | (dp87 370 | g33 371 | g42 372 | sg43 373 | g44 374 | (I2 375 | g8 376 | (g9 377 | (I0 378 | tS'b' 379 | tRp88 380 | (I1 381 | (I1 382 | tg12 383 | I00 384 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 385 | tbI1 386 | tRp89 387 | (dp90 388 | g48 389 | I5 390 | sg49 391 | g8 392 | (g9 393 | (I0 394 | tS'b' 395 | tRp91 396 | (I1 397 | (I5 398 | I1 399 | I1 400 | tg24 401 | I00 402 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xef\x1d\xfa\x8f\xad\xe7\x03\xc0F\xf6\x84\x98\x1b\x1a\xf4?F\xf6\x84\x98\x1b\x1a\xf4?' 403 | tbsg51 404 | g8 405 | (g9 406 | (I0 407 | tS'b' 408 | tRp92 409 | (I1 410 | (I5 411 | tg53 412 | I00 413 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?n\xc7b\xd6\xa2\xe2\xbe?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x9e/\x979\x17\x97\xc4?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00p\xbc\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00x\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 414 | tbsbsg70 415 | I2 416 | sg71 417 | I1 418 | sg15 419 | g72 420 | (g12 421 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 422 | tRp93 423 | sg74 424 | Nsg7 425 | Nsg75 426 | I2 427 | sg13 428 | I1 429 | sg14 430 | Nsg17 431 | g38 432 | sg76 433 | g36 434 | sg77 435 | I2 436 | sg25 437 | I2 438 | sbag1 439 | (g30 440 | g3 441 | NtRp94 442 | (dp95 443 | g33 444 | g42 445 | sg43 446 | g44 447 | (I2 448 | g8 449 | (g9 450 | (I0 451 | tS'b' 452 | tRp96 453 | (I1 454 | (I1 455 | tg12 456 | I00 457 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 458 | tbI1 459 | tRp97 460 | (dp98 461 | g48 462 | I5 463 | sg49 464 | g8 465 | (g9 466 | (I0 467 | tS'b' 468 | tRp99 469 | (I1 470 | (I5 471 | I1 472 | I1 473 | tg24 474 | I00 475 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x86Y\xf6\xbeZH\x01\xc0eZZZ \x9e\xf3?eZZZ \x9e\xf3?' 476 | tbsg51 477 | g8 478 | (g9 479 | (I0 480 | tS'b' 481 | tRp100 482 | (I1 483 | (I5 484 | tg53 485 | I00 486 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?krW\xa2\xf7\xfc\xb8?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xf2\xf6\xe4\x16\xa5\xa8\xc0?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00x\xbc\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x84\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 487 | tbsbsg70 488 | I2 489 | sg71 490 | I1 491 | sg15 492 | g72 493 | (g12 494 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 495 | tRp101 496 | sg74 497 | Nsg7 498 | Nsg75 499 | I2 500 | sg13 501 | I1 502 | sg14 503 | Nsg17 504 | g38 505 | sg76 506 | g36 507 | sg77 508 | I2 509 | sg25 510 | I2 511 | sbag1 512 | (g30 513 | g3 514 | NtRp102 515 | (dp103 516 | g33 517 | g42 518 | sg43 519 | g44 520 | (I2 521 | g8 522 | (g9 523 | (I0 524 | tS'b' 525 | tRp104 526 | (I1 527 | (I1 528 | tg12 529 | I00 530 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 531 | tbI1 532 | tRp105 533 | (dp106 534 | g48 535 | I5 536 | sg49 537 | g8 538 | (g9 539 | (I0 540 | tS'b' 541 | tRp107 542 | (I1 543 | (I5 544 | I1 545 | I1 546 | tg24 547 | I00 548 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdc_\xe0\x9cL\xf5\xfe\xbf}v\xafIB3\xf3?}v\xafIB3\xf3?' 549 | tbsg51 550 | g8 551 | (g9 552 | (I0 553 | tS'b' 554 | tRp108 555 | (I1 556 | (I5 557 | tg53 558 | I00 559 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xf6\x8d}\xccKE\xb4?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x9d\x12R\xbb\x0f\x07\xbb?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 560 | tbsbsg70 561 | I2 562 | sg71 563 | I1 564 | sg15 565 | g72 566 | (g12 567 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 568 | tRp109 569 | sg74 570 | Nsg7 571 | Nsg75 572 | I2 573 | sg13 574 | I1 575 | sg14 576 | Nsg17 577 | g38 578 | sg76 579 | g36 580 | sg77 581 | I2 582 | sg25 583 | I2 584 | sbag1 585 | (g30 586 | g3 587 | NtRp110 588 | (dp111 589 | g33 590 | g42 591 | sg43 592 | g44 593 | (I2 594 | g8 595 | (g9 596 | (I0 597 | tS'b' 598 | tRp112 599 | (I1 600 | (I1 601 | tg12 602 | I00 603 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 604 | tbI1 605 | tRp113 606 | (dp114 607 | g48 608 | I5 609 | sg49 610 | g8 611 | (g9 612 | (I0 613 | tS'b' 614 | tRp115 615 | (I1 616 | (I5 617 | I1 618 | I1 619 | tg24 620 | I00 621 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd7P\x94@\xa5S\xfc\xbf\xab9\x92\xcf\x9d\xd6\xf2?\xab9\x92\xcf\x9d\xd6\xf2?' 622 | tbsg51 623 | g8 624 | (g9 625 | (I0 626 | tS'b' 627 | tRp116 628 | (I1 629 | (I5 630 | tg53 631 | I00 632 | S"\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xee\xe4\x9f'8y\xb0?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x93\x86*\x8a\xf5\xf6\xb5?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00`\xbc\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00h\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@" 633 | tbsbsg70 634 | I2 635 | sg71 636 | I1 637 | sg15 638 | g72 639 | (g12 640 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 641 | tRp117 642 | sg74 643 | Nsg7 644 | Nsg75 645 | I2 646 | sg13 647 | I1 648 | sg14 649 | Nsg17 650 | g38 651 | sg76 652 | g36 653 | sg77 654 | I2 655 | sg25 656 | I2 657 | sbag1 658 | (g30 659 | g3 660 | NtRp118 661 | (dp119 662 | g33 663 | g42 664 | sg43 665 | g44 666 | (I2 667 | g8 668 | (g9 669 | (I0 670 | tS'b' 671 | tRp120 672 | (I1 673 | (I1 674 | tg12 675 | I00 676 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 677 | tbI1 678 | tRp121 679 | (dp122 680 | g48 681 | I5 682 | sg49 683 | g8 684 | (g9 685 | (I0 686 | tS'b' 687 | tRp123 688 | (I1 689 | (I5 690 | I1 691 | I1 692 | tg24 693 | I00 694 | S"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x98 \xe9\x9fS\xfa\xbfJ\x89'\xc8\xe8\x85\xf2?J\x89'\xc8\xe8\x85\xf2?" 695 | tbsg51 696 | g8 697 | (g9 698 | (I0 699 | tS'b' 700 | tRp124 701 | (I1 702 | (I5 703 | tg53 704 | I00 705 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?2\x98\xd4\xeep\xcf\xaa?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?we8\x9f\xa0\xdf\xb1?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00P<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00P<\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 706 | tbsbsg70 707 | I2 708 | sg71 709 | I1 710 | sg15 711 | g72 712 | (g12 713 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 714 | tRp125 715 | sg74 716 | Nsg7 717 | Nsg75 718 | I2 719 | sg13 720 | I1 721 | sg14 722 | Nsg17 723 | g38 724 | sg76 725 | g36 726 | sg77 727 | I2 728 | sg25 729 | I2 730 | sbag1 731 | (g30 732 | g3 733 | NtRp126 734 | (dp127 735 | g33 736 | g42 737 | sg43 738 | g44 739 | (I2 740 | g8 741 | (g9 742 | (I0 743 | tS'b' 744 | tRp128 745 | (I1 746 | (I1 747 | tg12 748 | I00 749 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 750 | tbI1 751 | tRp129 752 | (dp130 753 | g48 754 | I5 755 | sg49 756 | g8 757 | (g9 758 | (I0 759 | tS'b' 760 | tRp131 761 | (I1 762 | (I5 763 | I1 764 | I1 765 | tg24 766 | I00 767 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xce\xf2\xdfW\x8a\xc2\xf8\xbf\xe3AS\x88L?\xf2?\xe3AS\x88L?\xf2?' 768 | tbsg51 769 | g8 770 | (g9 771 | (I0 772 | tS'b' 773 | tRp132 774 | (I1 775 | (I5 776 | tg53 777 | I00 778 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xbf\xd0\xfc\xe2\x97\xd6\xa5?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?S\x16Q\xd9\x1f\x1e\xad?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00h<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00r<\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 779 | tbsbsg70 780 | I2 781 | sg71 782 | I1 783 | sg15 784 | g72 785 | (g12 786 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 787 | tRp133 788 | sg74 789 | Nsg7 790 | Nsg75 791 | I2 792 | sg13 793 | I1 794 | sg14 795 | Nsg17 796 | g38 797 | sg76 798 | g36 799 | sg77 800 | I2 801 | sg25 802 | I2 803 | sbag1 804 | (g30 805 | g3 806 | NtRp134 807 | (dp135 808 | g33 809 | g42 810 | sg43 811 | g44 812 | (I2 813 | g8 814 | (g9 815 | (I0 816 | tS'b' 817 | tRp136 818 | (I1 819 | (I1 820 | tg12 821 | I00 822 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 823 | tbI1 824 | tRp137 825 | (dp138 826 | g48 827 | I5 828 | sg49 829 | g8 830 | (g9 831 | (I0 832 | tS'b' 833 | tRp139 834 | (I1 835 | (I5 836 | I1 837 | I1 838 | tg24 839 | I00 840 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbd\x1eJ\xd9\x06\x81\xf7\xbf\xb3\xa4\xad\xb1J\x01\xf2?\xb3\xa4\xad\xb1J\x01\xf2?' 841 | tbsg51 842 | g8 843 | (g9 844 | (I0 845 | tS'b' 846 | tRp140 847 | (I1 848 | (I5 849 | tg53 850 | I00 851 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xa3\xf1\xd00T\xcd\xa1?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x84\x97\x16Ap\xbc\xa7?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00P<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00@\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 852 | tbsbsg70 853 | I2 854 | sg71 855 | I1 856 | sg15 857 | g72 858 | (g12 859 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 860 | tRp141 861 | sg74 862 | Nsg7 863 | Nsg75 864 | I2 865 | sg13 866 | I1 867 | sg14 868 | Nsg17 869 | g38 870 | sg76 871 | g36 872 | sg77 873 | I2 874 | sg25 875 | I2 876 | sbag1 877 | (g30 878 | g3 879 | NtRp142 880 | (dp143 881 | g33 882 | g42 883 | sg43 884 | g44 885 | (I2 886 | g8 887 | (g9 888 | (I0 889 | tS'b' 890 | tRp144 891 | (I1 892 | (I1 893 | tg12 894 | I00 895 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 896 | tbI1 897 | tRp145 898 | (dp146 899 | g48 900 | I5 901 | sg49 902 | g8 903 | (g9 904 | (I0 905 | tS'b' 906 | tRp147 907 | (I1 908 | (I5 909 | I1 910 | I1 911 | tg24 912 | I00 913 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xce{\xb7\x99\x93z\xf6\xbf\x9d%Me\xa9\xca\xf1?\x9d%Me\xa9\xca\xf1?' 914 | tbsg51 915 | g8 916 | (g9 917 | (I0 918 | tS'b' 919 | tRp148 920 | (I1 921 | (I5 922 | tg53 923 | I00 924 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xae\xc96\x9f\xae\n\x9d?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?t\x86$jt\\\xa3?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00@<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00@<\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@' 925 | tbsbsg70 926 | I2 927 | sg71 928 | I1 929 | sg15 930 | g72 931 | (g12 932 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 933 | tRp149 934 | sg74 935 | Nsg7 936 | Nsg75 937 | I2 938 | sg13 939 | I1 940 | sg14 941 | Nsg17 942 | g38 943 | sg76 944 | g36 945 | sg77 946 | I2 947 | sg25 948 | I2 949 | sbatbsg74 950 | NsS'learning_rate' 951 | p150 952 | F0.10000000000000001 953 | sS'n_estimators' 954 | p151 955 | I10 956 | sg77 957 | I2 958 | sS'alpha' 959 | p152 960 | F0.90000000000000002 961 | sS'warm_start' 962 | p153 963 | I00 964 | sS'loss' 965 | p154 966 | S'deviance' 967 | p155 968 | sg75 969 | I2 970 | sS'subsample' 971 | p156 972 | F1 973 | sS'init_' 974 | p157 975 | g1 976 | (csklearn.ensemble.gradient_boosting 977 | LogOddsEstimator 978 | p158 979 | g3 980 | NtRp159 981 | (dp160 982 | S'prior' 983 | p161 984 | g72 985 | (g24 986 | S'\x0b\x03\xadz\xea\x93\xf1?' 987 | tRp162 988 | sbsS'n_features' 989 | p163 990 | I2 991 | sb. --------------------------------------------------------------------------------