├── sklearn_pmml
    ├── convert
    │   ├── test
    │   │   ├── __init__.py
    │   │   ├── jpmml-csv-evaluator
    │   │   │   ├── README.md
    │   │   │   ├── pom.xml
    │   │   │   └── src
    │   │   │   │   └── main
    │   │   │   │       └── java
    │   │   │   │           └── sklearn
    │   │   │   │               └── pmml
    │   │   │   │                   └── jpmml
    │   │   │   │                       └── JPMMLCSVEvaluator.java
    │   │   ├── test_randomForestConverter.py
    │   │   ├── test_derived_fields.py
    │   │   ├── test_gradientBoostingConverter.py
    │   │   ├── test_decisionTreeClassifierConverter.py
    │   │   └── jpmml_test.py
    │   ├── __init__.py
    │   ├── random_forest.py
    │   ├── features.py
    │   ├── utils.py
    │   ├── tree.py
    │   ├── gbrt.py
    │   └── model.py
    ├── __init__.py
    └── test
    │   ├── data
    │       └── gradient_boosting_classifier
    │       │   ├── context.pkl
    │       │   ├── document.pmml
    │       │   └── estimator.pkl
    │   └── __init__.py
├── MANIFEST.in
├── .travis.yml
├── .gitignore
├── setup.py
├── LICENSE
├── README.md
└── examples
    └── pmml
        ├── DecisionTreeClassifier.pmml
        ├── GradientBoostingClassifier.pmml
        └── RandomForestClassifier.pmml


/sklearn_pmml/convert/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sklearn_pmml/__init__.py:
--------------------------------------------------------------------------------
1 | from sklearn_pmml.convert import *


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt


--------------------------------------------------------------------------------
/sklearn_pmml/convert/__init__.py:
--------------------------------------------------------------------------------
 1 | from sklearn_pmml import pmml
 2 | from sklearn_pmml.convert.features import Feature, NumericFeature, CategoricalFeature, RealNumericFeature
 3 | from sklearn_pmml.convert.gbrt import *
 4 | from sklearn_pmml.convert.tree import *
 5 | from sklearn_pmml.convert.random_forest import *
 6 | from sklearn_pmml.convert.model import *
 7 | from sklearn_pmml.convert.utils import *
 8 | 
 9 | 
10 | __all__ = ['TransformationContext', 'EstimatorConverter', 'find_converter', 'GradientBoostingConverter', 'LogOddsEstimatorConverter', 'DecisionTreeConverter', 'features']
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/jpmml-csv-evaluator/README.md:
--------------------------------------------------------------------------------
 1 | # About
 2 | This is a simple [JPMML](http://github.com/jpmml)-based CLI evaluator for PMML models.
 3 | 
 4 | # Notes
 5 | This submodule relies on AGPL library [jpmml-evaluator](http://github.com/jpmml/jpmml-evaluator), 
 6 | but it's only used for testing and it's not a part of sklearn-pmml distribution.
 7 | Since users will not interact with AGPL-licensed library, I think it's OK to use it in tests.
 8 |  
 9 | # Usage
10 | 1. Build the JAR file (make sure you have JDK8 installed):
11 | ```
12 | mvn clean package
13 | ```
14 | 2. Run with maven:
15 | ```
16 | mvn exec:java -e -q \
17 | -Dexec.mainClass=sklearn.pmml.jpmml.JPMMLCSVEvaluator \
18 | -Dexec.args=/path/to/pmml /path/to/input.csv /path/to/output.csv 
19 | ```


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 |   - "3.4"
 5 | #  - "nightly"
 6 | # command to install dependencies
 7 | before_install:
 8 |   - sudo add-apt-repository ppa:webupd8team/java -y
 9 |   - sudo apt-get update -qq
10 |   - sudo apt-get install oracle-java8-installer
11 |   - sudo apt-get install maven
12 |   - export PATH=/usr/bin:$PATH
13 |   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
14 |   - chmod +x miniconda.sh
15 |   - ./miniconda.sh -b
16 |   - export PATH=/home/travis/miniconda/bin:$PATH
17 |   - conda update --yes conda
18 |   # install the heaviest dependencies with conda to save some time
19 |   - travis_retry conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy scikit-learn pandas lxml
20 | 
21 | install:
22 |   - travis_retry pip install .
23 | # command to run tests
24 | script: python setup.py test
25 | cache: apt
26 | 


--------------------------------------------------------------------------------
/sklearn_pmml/test/data/gradient_boosting_classifier/context.pkl:
--------------------------------------------------------------------------------
 1 | ccopy_reg
 2 | _reconstructor
 3 | p1
 4 | (csklearn_pmml.convert
 5 | TransformationContext
 6 | p2
 7 | c__builtin__
 8 | object
 9 | p3
10 | NtRp4
11 | (dp5
12 | S'schemas'
13 | p6
14 | (dp7
15 | S'output'
16 | p8
17 | (lp9
18 | g1
19 | (csklearn_pmml.convert.features
20 | RealNumericFeature
21 | p10
22 | g3
23 | NtRp11
24 | (dp12
25 | S'_namespace'
26 | p13
27 | S''
28 | sS'_invalid_value_treatment'
29 | p14
30 | S'asIs'
31 | p15
32 | sS'_name'
33 | p16
34 | g8
35 | sbasS'input'
36 | p17
37 | (lp18
38 | g1
39 | (csklearn_pmml.convert.features
40 | IntegerNumericFeature
41 | p19
42 | g3
43 | NtRp20
44 | (dp21
45 | g13
46 | S''
47 | sg14
48 | g15
49 | sg16
50 | S'x1'
51 | p22
52 | sbag1
53 | (csklearn_pmml.convert.features
54 | StringCategoricalFeature
55 | p23
56 | g3
57 | NtRp24
58 | (dp25
59 | S'value_list'
60 | p26
61 | (lp27
62 | S'zero'
63 | p28
64 | aS'one'
65 | p29
66 | asg13
67 | S''
68 | sg14
69 | g15
70 | sg16
71 | S'x2'
72 | p30
73 | sbassb.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | #java/intellij stuff
60 | *.iml
61 | *.class
62 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Command
 2 | 
 3 | 
 4 | class PyTest(Command):
 5 |     user_options = []
 6 | 
 7 |     def initialize_options(self):
 8 |         pass
 9 | 
10 |     def finalize_options(self):
11 |         pass
12 | 
13 |     def run(self):
14 |         import subprocess
15 |         import sys
16 |         errno = subprocess.call([sys.executable, 'runtests.py'])
17 |         raise SystemExit(errno)
18 | 
19 | setup(
20 |     name='sklearn-pmml',
21 |     version='0.1.2',
22 |     packages=['sklearn_pmml', 'sklearn_pmml.convert'],
23 |     install_requires=[
24 |         "pyxb",
25 |         "scikit-learn",
26 |         "pandas",
27 |         "scipy",
28 |         "pytest",
29 |         "lxml",
30 |         "enum34",
31 |     ],
32 |     cmdclass={'test': PyTest},
33 |     url='https://github.com/alex-pirozhenko/sklearn-pmml',
34 |     license='MIT',
35 |     author='Alex Pirozhenko',
36 |     author_email='apirozhenko@pulsepoint.com',
37 |     description='A library that allows serialization of SciKit-Learn estimators into PMML'
38 | )
39 | 


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/test_randomForestConverter.py:
--------------------------------------------------------------------------------
 1 | from sklearn_pmml.convert import IntegerCategoricalFeature
 2 | from sklearn_pmml.convert.test.jpmml_test import JPMMLClassificationTest, JPMMLTest, TARGET_NAME
 3 | from unittest import TestCase
 4 | from sklearn.ensemble import RandomForestClassifier
 5 | 
 6 | __author__ = 'evancox'
 7 | 
 8 | 
 9 | from sklearn_pmml.convert.random_forest import RandomForestClassifierConverter
10 | 
11 | 
12 | class TestRandomForestClassifierParity(TestCase, JPMMLClassificationTest):
13 | 
14 |     @classmethod
15 |     def setUpClass(cls):
16 |         if JPMMLTest.can_run():
17 |             JPMMLTest.init_jpmml()
18 | 
19 |     def setUp(self):
20 |         self.model = RandomForestClassifier(
21 |             n_estimators=3,
22 |             max_depth=3
23 |         )
24 |         self.init_data()
25 |         self.converter = RandomForestClassifierConverter(
26 |             estimator=self.model,
27 |             context=self.ctx
28 |         )
29 | 
30 |     @property
31 |     def output(self):
32 |         return IntegerCategoricalFeature(name=TARGET_NAME, value_list=[0, 1, 2])
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 alex-pirozhenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/alex-pirozhenko/sklearn-pmml.svg)](https://travis-ci.org/alex-pirozhenko/sklearn-pmml)
 2 | [![Join the chat at https://gitter.im/alex-pirozhenko/sklearn-pmml](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/alex-pirozhenko/sklearn-pmml?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 3 | 
 4 | # sklearn-pmml
 5 | 
 6 | A library that allows serialization of SciKit-Learn estimators into PMML
 7 | 
 8 | # Installation
 9 | The easiest way is to use pip:
10 | ```
11 | pip install sklearn-pmml
12 | ```
13 | 
14 | # Supported models
15 | - DecisionTreeClassifier
16 | - DecisionTreeRegressor
17 | - GradientBoostingClassifier
18 | - RandomForestClassifier
19 | 
20 | # PMML output
21 | 
22 | ## Classification
23 | Classifier converters can only operate with categorical outputs, and for each categorical output variable ```varname``` 
24 | the PMML output contains the following outputs:
25 | - categorical ```varname``` for the predicted label of the instance
26 | - double ```varname.label``` for the probability for a given label
27 | 
28 | ## Regression
29 | Regression model PMML outputs the numeric response variable named as the output variable
30 | 


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/jpmml-csv-evaluator/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xmlns="http://maven.apache.org/POM/4.0.0"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <modelVersion>4.0.0</modelVersion>
 6 | 
 7 |     <groupId>sklearn.pmml.jpmml</groupId>
 8 |     <artifactId>jpmml-csv-evaluator</artifactId>
 9 |     <version>1.0-SNAPSHOT</version>
10 | 
11 |     <dependencies>
12 | 
13 |         <dependency>
14 |             <groupId>org.jpmml</groupId>
15 |             <artifactId>pmml-evaluator</artifactId>
16 |             <!-- DO NOT UPGRADE: newer versions are licensed under AGPL -->
17 |             <version>1.2.5</version>
18 |         </dependency>
19 | 
20 |         <dependency>
21 |             <groupId>org.jpmml</groupId>
22 |             <artifactId>pmml-model</artifactId>
23 |             <!-- OK to upgrade this one: released under BSD 3-clause. Must not upgrade pmml-evaluator.  -->
24 |             <version>1.2.6</version>
25 |         </dependency>
26 | 
27 |         <dependency>
28 |             <groupId>net.sf.supercsv</groupId>
29 |             <artifactId>super-csv</artifactId>
30 |             <version>2.0.1</version>
31 |         </dependency>
32 | 
33 |     </dependencies>
34 | 
35 | 
36 | 
37 |     <build>
38 | 
39 |         <plugins>
40 |             <plugin>
41 |                 <groupId>org.apache.maven.plugins</groupId>
42 |                 <artifactId>maven-compiler-plugin</artifactId>
43 |                 <version>3.0</version>
44 |                 <configuration>
45 |                     <source>1.7</source>
46 |                     <target>1.7</target>
47 |                 </configuration>
48 |             </plugin>
49 |         </plugins>
50 |     </build>
51 | 
52 | 
53 | </project>


--------------------------------------------------------------------------------
/sklearn_pmml/convert/random_forest.py:
--------------------------------------------------------------------------------
 1 | from sklearn_pmml.convert import CategoricalFeature
 2 | 
 3 | __author__ = 'evancox'
 4 | 
 5 | 
 6 | from sklearn.ensemble import RandomForestClassifier
 7 | from sklearn_pmml.convert.model import Schema, ModelMode, ClassifierConverter
 8 | from sklearn_pmml.convert.tree import DecisionTreeConverter
 9 | from sklearn_pmml.convert.utils import estimator_to_converter
10 | 
11 | import sklearn_pmml.pmml as pmml
12 | 
13 | 
14 | class RandomForestClassifierConverter(ClassifierConverter):
15 |     def __init__(self, estimator, context):
16 |         super(RandomForestClassifierConverter, self).__init__(estimator, context)
17 |         assert isinstance(estimator, RandomForestClassifier), \
18 |             'This converter can only process RandomForestClassifier instances'
19 |         assert len(context.schemas[Schema.OUTPUT]) == 1, 'Only one-label classification is supported'
20 | 
21 |     def model(self, verification_data=None):
22 |         mining_model = pmml.MiningModel(functionName=ModelMode.CLASSIFICATION.value)
23 |         mining_model.append(self.mining_schema())
24 |         mining_model.append(self.output())
25 |         mining_model.append(self.segmentation())
26 |         if verification_data is not None:
27 |             mining_model.append(self.model_verification(verification_data))
28 |         return mining_model
29 | 
30 |     def segmentation(self):
31 |         """
32 |         Build a segmentation (sequence of estimators)
33 |         :return: Segmentation element
34 |         """
35 |         segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage")
36 | 
37 |         for index, est in enumerate(self.estimator.estimators_):
38 |             s = pmml.Segment(id=index)
39 |             s.append(pmml.True_())
40 |             s.append(DecisionTreeConverter(est, self.context, ModelMode.CLASSIFICATION)._model())
41 |             segmentation.append(s)
42 | 
43 |         return segmentation
44 | 
45 | 
46 | estimator_to_converter[RandomForestClassifier] = RandomForestClassifierConverter


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/test_derived_fields.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from sklearn.tree import DecisionTreeClassifier
 3 | from sklearn_pmml import EstimatorConverter, TransformationContext, pmml
 4 | from sklearn_pmml.convert import Schema, ModelMode
 5 | from sklearn_pmml.convert.features import *
 6 | import numpy as np
 7 | 
 8 | test_cases = [
 9 |     (
10 |         [
11 |             RealNumericFeature(name='f1'),
12 |         ],
13 |         [
14 |             DerivedFeature(
15 |                 feature=RealNumericFeature(name='f2'),
16 |                 transformation=pmml.Discretize(mapMissingTo=0, defaultValue=1, field='f1'),
17 |                 function=np.vectorize(lambda f1: 0 if f1 is None else 1)
18 |             )
19 |         ],
20 |         [RealNumericFeature(name='f3')],
21 | 
22 |         '<?xml version="1.0" ?>'
23 |         '<ns1:DataDictionary xmlns:ns1="http://www.dmg.org/PMML-4_2">'
24 |         '<ns1:DataField dataType="double" name="f1" optype="continuous"/>'
25 |         '<ns1:DataField dataType="double" name="f3" optype="continuous"/>'
26 |         '</ns1:DataDictionary>',
27 | 
28 |         '<?xml version="1.0" ?>'
29 |         '<ns1:TransformationDictionary xmlns:ns1="http://www.dmg.org/PMML-4_2">'
30 |         '<ns1:DerivedField dataType="double" name="f2" optype="continuous">'
31 |         '<ns1:Discretize defaultValue="1" field="f1" mapMissingTo="0"/>'
32 |         '</ns1:DerivedField>'
33 |         '</ns1:TransformationDictionary>'
34 |     )
35 | ]
36 | 
37 | @pytest.mark.parametrize("input_fields,derived_fields,output_fields,expected_data_dictionary,expected_transformation_dictionary", test_cases)
38 | def test_transformation_dictionary(input_fields, derived_fields, output_fields, expected_data_dictionary, expected_transformation_dictionary):
39 |     converter = EstimatorConverter(
40 |         DecisionTreeClassifier(),
41 |         context=TransformationContext({
42 |             Schema.INPUT: input_fields,
43 |             Schema.DERIVED: derived_fields,
44 |             Schema.MODEL: input_fields + derived_fields,
45 |             Schema.OUTPUT: output_fields
46 |         }),
47 |         mode=ModelMode.CLASSIFICATION
48 |     )
49 | 
50 |     assert converter.data_dictionary().toxml() == expected_data_dictionary, 'Error in data dictionary generation'
51 |     assert converter.transformation_dictionary().toxml() == expected_transformation_dictionary,\
52 |         'Error in transformation dictionary generation'


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/test_gradientBoostingConverter.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from sklearn.ensemble import GradientBoostingClassifier
 4 | import numpy as np
 5 | 
 6 | from sklearn_pmml.convert.test.jpmml_test import JPMMLClassificationTest, JPMMLTest, TARGET_NAME
 7 | from sklearn_pmml.convert import TransformationContext, Schema
 8 | from sklearn_pmml.convert.features import *
 9 | from sklearn_pmml.convert.gbrt import GradientBoostingConverter
10 | 
11 | 
12 | class TestGradientBoostingClassifierConverter(TestCase):
13 |     def setUp(self):
14 |         np.random.seed(1)
15 |         self.est = GradientBoostingClassifier(max_depth=2, n_estimators=10)
16 |         self.est.fit([
17 |             [0, 0],
18 |             [0, 1],
19 |             [1, 0],
20 |             [1, 1],
21 |         ], [0, 1, 1, 1])
22 |         self.ctx = TransformationContext({
23 |             Schema.INPUT: [
24 |                 IntegerNumericFeature('x1'),
25 |                 StringCategoricalFeature('x2', ['zero', 'one'])
26 |             ],
27 |             Schema.MODEL: [
28 |                 IntegerNumericFeature('x1'),
29 |                 StringCategoricalFeature('x2', ['zero', 'one'])
30 |             ],
31 |             Schema.DERIVED: [],
32 |             Schema.OUTPUT: [
33 |                 IntegerCategoricalFeature('output', [0, 1])
34 |             ]
35 |         })
36 |         self.converter = GradientBoostingConverter(
37 |             estimator=self.est,
38 |             context=self.ctx
39 |         )
40 | 
41 |     def test_transform(self):
42 |         p = self.converter.pmml()
43 |         mm = p.MiningModel[0]
44 |         assert mm.MiningSchema is not None, 'Missing mining schema'
45 |         assert len(mm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
46 |         assert mm.Segmentation is not None, 'Missing segmentation root'
47 | 
48 |     def test_transform_with_verification(self):
49 |         p = self.converter.pmml([
50 |             {'x1': 0, 'x2': 'zero', 'output#1': self.est.predict_proba([[0, 0]])[0, 1], 'output#0': self.est.predict_proba([[0, 0]])[0, 0], 'output': self.est.predict([[0, 0]])},
51 |             {'x1': 0, 'x2': 'one', 'output#1': self.est.predict_proba([[0, 1]])[0, 1], 'output#0': self.est.predict_proba([[0, 1]])[0, 0], 'output': self.est.predict([[0, 1]])},
52 |             {'x1': 1, 'x2': 'zero', 'output#1': self.est.predict_proba([[1, 0]])[0, 1], 'output#0': self.est.predict_proba([[1, 0]])[0, 0], 'output': self.est.predict([[1, 0]])},
53 |             {'x1': 1, 'x2': 'one', 'output#1': self.est.predict_proba([[1, 1]])[0, 1], 'output#0': self.est.predict_proba([[1, 1]])[0, 0], 'output': self.est.predict([[1, 1]])},
54 |         ])
55 |         mm = p.MiningModel[0]
56 |         assert mm.MiningSchema is not None, 'Missing mining schema'
57 |         assert len(mm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
58 |         assert mm.Segmentation is not None, 'Missing segmentation root'
59 | 
60 | 
61 | class TestGradientBoostingClassifierParity(TestCase, JPMMLClassificationTest):
62 | 
63 |     @classmethod
64 |     def setUpClass(cls):
65 |         if JPMMLTest.can_run():
66 |             JPMMLTest.init_jpmml()
67 | 
68 |     def setUp(self):
69 |         self.model = GradientBoostingClassifier(n_estimators=2, max_depth=2)
70 |         self.init_data_one_label()
71 |         self.converter = GradientBoostingConverter(
72 |             estimator=self.model,
73 |             context=self.ctx
74 |         )
75 | 


--------------------------------------------------------------------------------
/sklearn_pmml/test/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | from sklearn.base import BaseEstimator
 4 | 
 5 | try:
 6 |     import cPickle as pickle
 7 | except:
 8 |     import pickle
 9 | from sklearn_pmml.convert import *
10 | from sklearn_pmml import pmml
11 | 
12 | 
13 | class TestSerializationMeta(type):
14 |     TEST_DIR = os.path.dirname(__file__)
15 |     DATA_DIR = os.path.join(TEST_DIR, 'data')
16 |     ESTIMATOR_FILE_NAME = 'estimator.pkl'
17 |     PMML_FILE_NAME = 'document.pmml'
18 |     CONTEXT_FILE_NAME = 'context.pkl'
19 | 
20 |     def __new__(mcs, name, bases, d):
21 |         """
22 |         This method overrides default behaviour for creation of new instances. For every directory abc in data it
23 |         creates a method called test_abc, with the body of load_and_compare function.
24 |         """
25 |         def gen_test(suite_name):
26 |             def load_and_compare(self):
27 |                 # load the context.pkl, document.pmml and estimator.pkl
28 |                 suite_path = os.path.join(mcs.DATA_DIR, suite_name)
29 |                 content = os.listdir(suite_path)
30 |                 assert len(content) == 3, 'There should be exactly two files in the suite directory'
31 |                 assert mcs.ESTIMATOR_FILE_NAME in content, 'Estimator should be stored in {} file'.format(mcs.ESTIMATOR_FILE_NAME)
32 |                 assert mcs.PMML_FILE_NAME in content, 'PMML should be stored in {} file'.format(mcs.PMML_FILE_NAME)
33 |                 assert mcs.CONTEXT_FILE_NAME in content, 'Context should be stored in {} file'.format(mcs.CONTEXT_FILE_NAME)
34 |                 with open(os.path.join(suite_path, mcs.ESTIMATOR_FILE_NAME), 'r') as est_file:
35 |                     est = pickle.load(est_file)
36 |                     assert isinstance(est, BaseEstimator), '{} should be a trained estimator'.format(mcs.ESTIMATOR_FILE_NAME)
37 |                 with open(os.path.join(suite_path, mcs.CONTEXT_FILE_NAME), 'r') as ctx_file:
38 |                     ctx = pickle.load(ctx_file)
39 |                     assert isinstance(ctx, TransformationContext), '{} should be a transformation context'.format(mcs.CONTEXT_FILE_NAME)
40 |                 converter = find_converter(est)
41 |                 assert converter is not None, 'Can not find converter for {}'.format(est)
42 |                 transformed_pmml = converter(est, ctx).pmml()
43 |                 with open(os.path.join(suite_path, mcs.PMML_FILE_NAME), 'r') as pmml_file:
44 |                     loaded_pmml = pmml.CreateFromDocument('\n'.join(pmml_file.readlines()))
45 |                 self.maxDiff = None
46 |                 # make sure that the expected PMML matches the produced one
47 |                 self.assertEquals(loaded_pmml.toDOM().toprettyxml(), transformed_pmml.toDOM().toprettyxml())
48 | 
49 |             return load_and_compare
50 | 
51 |         # for every batch in the data dir create a corresponding test method
52 |         for case in os.listdir(TestSerializationMeta.DATA_DIR):
53 |             test_name = 'test_{}'.format(case)
54 |             d[test_name] = gen_test(case)
55 |         return type.__new__(mcs, name, bases, d)
56 | 
57 | 
58 | class TestSerialization(TestCase):
59 |     """
60 |     This is an automated tester for serializers. It uses a custom metaclass to define the test cases based on the
61 |     content of the data directory. For the logic behind every check see load_and_compare method above.
62 |     """
63 |     __metaclass__ = TestSerializationMeta
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/examples/pmml/DecisionTreeClassifier.pmml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <ns1:PMML xmlns:ns1="http://www.dmg.org/PMML-4_2" version="4.2">
 3 |     <ns1:Header/>
 4 |     <ns1:DataDictionary>
 5 |         <ns1:DataField dataType="integer" name="internal::y" optype="categorical">
 6 |             <ns1:Value value="0"/>
 7 |             <ns1:Value value="1"/>
 8 |             <ns1:Value value="2"/>
 9 |         </ns1:DataField>
10 |         <ns1:DataField dataType="integer" name="y" optype="categorical">
11 |             <ns1:Value value="0"/>
12 |             <ns1:Value value="1"/>
13 |             <ns1:Value value="2"/>
14 |         </ns1:DataField>
15 |         <ns1:DataField dataType="double" name="col_0" optype="continuous"/>
16 |         <ns1:DataField dataType="double" name="col_1" optype="continuous"/>
17 |         <ns1:DataField dataType="double" name="col_2" optype="continuous"/>
18 |         <ns1:DataField dataType="double" name="col_3" optype="continuous"/>
19 |     </ns1:DataDictionary>
20 |     <ns1:TransformationDictionary/>
21 |     <ns1:TreeModel functionName="classification" splitCharacteristic="binarySplit">
22 |         <ns1:MiningSchema>
23 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
24 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
25 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
26 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
27 |             <ns1:MiningField name="internal::y" usageType="predicted"/>
28 |         </ns1:MiningSchema>
29 |         <ns1:Output>
30 |             <ns1:OutputField dataType="integer" feature="predictedValue" name="y" optype="categorical"/>
31 |             <ns1:OutputField dataType="double" feature="probability" name="y::0" optype="continuous" targetField="internal::y" value="0"/>
32 |             <ns1:OutputField dataType="double" feature="probability" name="y::1" optype="continuous" targetField="internal::y" value="1"/>
33 |             <ns1:OutputField dataType="double" feature="probability" name="y::2" optype="continuous" targetField="internal::y" value="2"/>
34 |         </ns1:Output>
35 |         <ns1:Node recordCount="500.0" score="1">
36 |             <ns1:True/>
37 |             <ns1:ScoreDistribution confidence="0.328" recordCount="164.0" value="0"/>
38 |             <ns1:ScoreDistribution confidence="0.346" recordCount="173.0" value="1"/>
39 |             <ns1:ScoreDistribution confidence="0.326" recordCount="163.0" value="2"/>
40 |             <ns1:Node recordCount="52.0" score="1">
41 |                 <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="-1.23250246048"/>
42 |                 <ns1:ScoreDistribution confidence="0.115384615385" recordCount="6.0" value="0"/>
43 |                 <ns1:ScoreDistribution confidence="0.596153846154" recordCount="31.0" value="1"/>
44 |                 <ns1:ScoreDistribution confidence="0.288461538462" recordCount="15.0" value="2"/>
45 |                 <ns1:Node recordCount="49.0" score="1">
46 |                     <ns1:SimplePredicate field="col_3" operator="lessOrEqual" value="1.78307962418"/>
47 |                     <ns1:ScoreDistribution confidence="0.122448979592" recordCount="6.0" value="0"/>
48 |                     <ns1:ScoreDistribution confidence="0.632653061224" recordCount="31.0" value="1"/>
49 |                     <ns1:ScoreDistribution confidence="0.244897959184" recordCount="12.0" value="2"/>
50 |                 </ns1:Node>
51 |                 <ns1:Node recordCount="3.0" score="2">
52 |                     <ns1:True/>
53 |                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
54 |                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="1"/>
55 |                     <ns1:ScoreDistribution confidence="1.0" recordCount="3.0" value="2"/>
56 |                 </ns1:Node>
57 |             </ns1:Node>
58 |             <ns1:Node recordCount="448.0" score="0">
59 |                 <ns1:True/>
60 |                 <ns1:ScoreDistribution confidence="0.352678571429" recordCount="158.0" value="0"/>
61 |                 <ns1:ScoreDistribution confidence="0.316964285714" recordCount="142.0" value="1"/>
62 |                 <ns1:ScoreDistribution confidence="0.330357142857" recordCount="148.0" value="2"/>
63 |                 <ns1:Node recordCount="227.0" score="2">
64 |                     <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="0.21028482914"/>
65 |                     <ns1:ScoreDistribution confidence="0.339207048458" recordCount="77.0" value="0"/>
66 |                     <ns1:ScoreDistribution confidence="0.251101321586" recordCount="57.0" value="1"/>
67 |                     <ns1:ScoreDistribution confidence="0.409691629956" recordCount="93.0" value="2"/>
68 |                 </ns1:Node>
69 |                 <ns1:Node recordCount="221.0" score="1">
70 |                     <ns1:True/>
71 |                     <ns1:ScoreDistribution confidence="0.366515837104" recordCount="81.0" value="0"/>
72 |                     <ns1:ScoreDistribution confidence="0.384615384615" recordCount="85.0" value="1"/>
73 |                     <ns1:ScoreDistribution confidence="0.248868778281" recordCount="55.0" value="2"/>
74 |                 </ns1:Node>
75 |             </ns1:Node>
76 |         </ns1:Node>
77 |     </ns1:TreeModel>
78 | </ns1:PMML>
79 | 


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/jpmml-csv-evaluator/src/main/java/sklearn/pmml/jpmml/JPMMLCSVEvaluator.java:
--------------------------------------------------------------------------------
  1 | package sklearn.pmml.jpmml;
  2 | 
  3 | import com.google.common.collect.Lists;
  4 | import com.google.common.collect.Maps;
  5 | 
  6 | import com.google.common.collect.Sets;
  7 | import org.dmg.pmml.PMML;
  8 | import org.dmg.pmml.FieldName;
  9 | import org.jpmml.model.JAXBUtil;
 10 | import org.jpmml.model.ImportFilter;
 11 | import org.jpmml.evaluator.FieldValue;
 12 | import org.jpmml.evaluator.Evaluator;
 13 | import org.jpmml.evaluator.ModelEvaluator;
 14 | import org.jpmml.evaluator.ModelEvaluatorFactory;
 15 | import org.supercsv.io.CsvMapReader;
 16 | import org.supercsv.io.CsvMapWriter;
 17 | import org.supercsv.prefs.CsvPreference;
 18 | import org.xml.sax.SAXException;
 19 | import org.xml.sax.InputSource;
 20 | 
 21 | import javax.xml.bind.JAXBException;
 22 | 
 23 | import java.io.FileInputStream;
 24 | import java.io.FileReader;
 25 | import java.io.FileWriter;
 26 | import java.io.IOException;
 27 | import java.io.InputStream;
 28 | import java.util.Arrays;
 29 | import java.util.HashMap;
 30 | import java.util.List;
 31 | import java.util.Map;
 32 | import java.util.Set;
 33 | import java.util.logging.Level;
 34 | import java.util.logging.Logger;
 35 | 
 36 | /**
 37 |  * Created by evancox on 7/23/15.
 38 |  */
 39 | public class JPMMLCSVEvaluator
 40 | {
 41 |     private static final Logger logger = Logger.getLogger(JPMMLCSVEvaluator.class.getCanonicalName());
 42 | 
 43 |     static PMML pmmlFromXml(final InputStream is)
 44 |     {
 45 |         try
 46 |         {
 47 |             return JAXBUtil.unmarshalPMML(ImportFilter.apply(new InputSource(is)));
 48 |         }
 49 |         catch (SAXException | JAXBException e)
 50 |         {
 51 |             throw new RuntimeException("Error reading PMML.", e);
 52 |         }
 53 |     }
 54 | 
 55 |     static Evaluator evaluatorFromPmml(final PMML pmml)
 56 |     {
 57 |         ModelEvaluatorFactory modelEvaluatorFactory = ModelEvaluatorFactory.newInstance();
 58 | 
 59 |         ModelEvaluator<?> modelEvaluator = modelEvaluatorFactory.newModelManager(pmml);
 60 | 
 61 |         return modelEvaluator;
 62 |     }
 63 | 
 64 |     static Evaluator evaluatorFromXml(final InputStream is)
 65 |     {
 66 |         // Adapted from:
 67 |         //   * https://github.com/jpmml/jpmml/blob/master/README.md
 68 |         //   * https://github.com/jpmml/jpmml-example/blob/master/src/main/java/org/jpmml/example/CsvEvaluationExample.java
 69 |         return evaluatorFromPmml(pmmlFromXml(is));
 70 |     }
 71 | 
 72 |     static List<Map<FieldName, ?>> getPredictions(Evaluator evaluator, String csvFeaturesFile) throws IOException
 73 |     {
 74 |         try (final CsvMapReader csvMapReader = new CsvMapReader(new FileReader(csvFeaturesFile), CsvPreference.STANDARD_PREFERENCE)) {
 75 |             final String[] headers = csvMapReader.getHeader(true);
 76 |             final Map<String, FieldName> fieldNameMap = new HashMap<>(headers.length);
 77 |             for (String header : Arrays.asList(headers))
 78 |             {
 79 |                 fieldNameMap.put(header, new FieldName(header));
 80 |             }
 81 | 
 82 |             Map<String, String> rawCsvMap;
 83 |             final List<Map<FieldName, ?>> predictions = Lists.newArrayList();
 84 |             while ((rawCsvMap = csvMapReader.read(headers)) != null) {
 85 |                 final Map<FieldName, FieldValue> featureMap = Maps.newHashMapWithExpectedSize(rawCsvMap.size());
 86 |                 for (Map.Entry<String, String> keyValue : rawCsvMap.entrySet())
 87 |                 {
 88 |                     final FieldName fieldName = fieldNameMap.get(keyValue.getKey());
 89 |                     final FieldValue fieldValue = evaluator.prepare(fieldName, keyValue.getValue());
 90 |                     featureMap.put(fieldName, fieldValue);
 91 |                 }
 92 |                 predictions.add(evaluator.evaluate(featureMap));
 93 |             }
 94 |             return predictions;
 95 |         }
 96 |     }
 97 | 
 98 |     static void writePredictions(Evaluator evaluator, List<Map<FieldName, ?>> predictions, String outputFile) throws IOException
 99 |     {
100 |         final int outputFieldCount = predictions.get(0).keySet().size();
101 |         final Set<FieldName> outputFields = Sets.newHashSetWithExpectedSize(outputFieldCount);
102 |         final String[] header = new String[outputFieldCount];
103 |         int index = 0;
104 |         for (FieldName fieldName : predictions.get(0).keySet())
105 |         {
106 |             if (fieldName != null) {
107 |                 outputFields.add(fieldName);
108 |                 header[index++] = fieldName.toString();
109 |             }
110 |         }
111 | 
112 |         try (final CsvMapWriter csvMapWriter = new CsvMapWriter(new FileWriter(outputFile), CsvPreference.STANDARD_PREFERENCE))
113 |         {
114 |             csvMapWriter.writeHeader(header);
115 |             for (Map<FieldName, ?> prediction : predictions) {
116 | 
117 |                 final Map<String, Object> row = Maps.newHashMapWithExpectedSize(prediction.size());
118 |                 for (Map.Entry<FieldName, ?> keyValue : prediction.entrySet())
119 |                 {
120 |                     if (keyValue.getKey() != null) {
121 |                         row.put(keyValue.getKey().toString(), keyValue.getValue());
122 |                     }
123 |                 }
124 |                 csvMapWriter.write(row, header);
125 |             }
126 |         }
127 |     }
128 | 
129 |     public static void main(String[] args)
130 |     {
131 |         if (args.length != 3)
132 |         {
133 |             throw new RuntimeException("Expected PMML file, feature data, and output predictions file");
134 |         }
135 |         final String pmmlFile = args[0];
136 |         final String csvFeaturesFile = args[1];
137 |         final String outputFile = args[2];
138 |         try
139 |         {
140 |             Evaluator evaluator = evaluatorFromXml(new FileInputStream(pmmlFile));
141 |             evaluator.verify();
142 |             final List<Map<FieldName, ?>> predictions = getPredictions(evaluator, csvFeaturesFile);
143 |             writePredictions(evaluator, predictions, outputFile);
144 |             logger.info(String.format("Wrote %d predictions from %s to %s", predictions.size(), csvFeaturesFile, outputFile));
145 |         }
146 |         catch (IOException ex)
147 |         {
148 |             logger.log(Level.SEVERE, "IOException", ex);
149 |             System.exit(1);
150 |         }
151 | 
152 | 
153 |     }
154 | 
155 | }
156 | 


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/test_decisionTreeClassifierConverter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn_pmml.convert.test.jpmml_test import JPMMLClassificationTest, JPMMLRegressionTest, TARGET_NAME, TARGET
  3 | 
  4 | from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
  5 | 
  6 | from sklearn_pmml.convert import TransformationContext, pmml_row, ModelMode, Schema
  7 | from sklearn_pmml.convert.features import *
  8 | from sklearn_pmml.convert.tree import DecisionTreeConverter
  9 | from sklearn_pmml import pmml
 10 | 
 11 | from unittest import TestCase
 12 | 
 13 | 
 14 | class TestDecisionTreeClassifierConverter(TestCase):
 15 |     def setUp(self):
 16 |         np.random.seed(1)
 17 |         self.est = DecisionTreeClassifier(max_depth=2)
 18 |         self.est.fit([
 19 |             [0, 0],
 20 |             [0, 1],
 21 |             [1, 0],
 22 |             [1, 1],
 23 |         ], [0, 1, 1, 1])
 24 |         self.ctx = TransformationContext({
 25 |             Schema.INPUT: [
 26 |                 IntegerNumericFeature('x1'),
 27 |                 StringCategoricalFeature('x2', ['zero', 'one'])
 28 |             ],
 29 |             Schema.MODEL: [
 30 |                 IntegerNumericFeature('x1'),
 31 |                 StringCategoricalFeature('x2', ['zero', 'one'])
 32 |             ],
 33 |             Schema.DERIVED: [],
 34 |             Schema.OUTPUT: [
 35 |                 IntegerNumericFeature('output')
 36 |             ]
 37 |         })
 38 |         self.converter = DecisionTreeConverter(
 39 |             estimator=self.est,
 40 |             context=self.ctx,
 41 |             mode=ModelMode.CLASSIFICATION
 42 |         )
 43 | 
 44 |     def test_transform(self):
 45 |         p = self.converter.pmml()
 46 |         tm = p.TreeModel[0]
 47 |         assert tm.MiningSchema is not None, 'Missing mining schema'
 48 |         assert len(tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
 49 |         assert tm.Node is not None, 'Missing root node'
 50 |         assert tm.Node.recordCount == 4
 51 |         assert tm.Node.True_ is not None, 'Root condition should always be True'
 52 | 
 53 |     def test_transform_with_derived_field(self):
 54 |         self.est = DecisionTreeClassifier(max_depth=2)
 55 |         self.est.fit([
 56 |             [0, 0, 0],
 57 |             [0, 1, 0],
 58 |             [1, 0, 0],
 59 |             [1, 1, 1],
 60 |         ], [0, 1, 1, 1])
 61 |         mapping = pmml.MapValues(dataType="double", outputColumn="output")
 62 |         mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
 63 |         mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
 64 |         it = pmml.InlineTable()
 65 |         mapping_df = pd.DataFrame([
 66 |             dict(x1=0, x2='zero', output=0),
 67 |             dict(x1=0, x2='one', output=0),
 68 |             dict(x1=1, x2='zero', output=0),
 69 |             dict(x1=1, x2='one', output=1),
 70 |         ])
 71 |         for idx, line in mapping_df.iterrows():
 72 |             it.append(pmml_row(**dict(line)))
 73 |         mapping.append(it)
 74 |         mapping_df.set_index(keys=['x1', 'x2'])
 75 |         mapping_f = np.vectorize(lambda x1, x2: mapping_df.ix[x1, x2].output.values[0])
 76 |         self.ctx = TransformationContext({
 77 |             Schema.INPUT: [
 78 |                 IntegerNumericFeature('x1'),
 79 |                 StringCategoricalFeature('x2', ['zero', 'one'])
 80 |             ],
 81 |             Schema.DERIVED: [
 82 |                 DerivedFeature(
 83 |                     feature=RealNumericFeature(name='x3'),
 84 |                     transformation=mapping,
 85 |                     function=mapping_f
 86 |                 )
 87 |             ],
 88 |             Schema.MODEL: [
 89 |                 IntegerNumericFeature('x1'),
 90 |                 StringCategoricalFeature('x2', ['zero', 'one']),
 91 |                 RealNumericFeature(name='x3')
 92 |             ],
 93 |             Schema.OUTPUT: [
 94 |                 IntegerCategoricalFeature('output', ['neg', 'pos'])
 95 |             ]
 96 |         })
 97 |         self.converter = DecisionTreeConverter(
 98 |             estimator=self.est,
 99 |             context=self.ctx,
100 |             mode=ModelMode.CLASSIFICATION
101 |         )
102 |         self.converter.pmml().toxml()
103 | 
104 | 
105 | class TestDecisionTreeRegressorConverter(TestCase):
106 |     def setUp(self):
107 |         np.random.seed(1)
108 |         self.est = DecisionTreeRegressor(max_depth=2)
109 |         self.est.fit([
110 |             [0, 0],
111 |             [0, 1],
112 |             [1, 0],
113 |             [1, 1],
114 |         ], [0, 1, 1, 1])
115 |         self.ctx = TransformationContext({
116 |             Schema.INPUT: [
117 |                 IntegerNumericFeature('x1'),
118 |                 StringCategoricalFeature('x2', ['zero', 'one'])
119 |             ],
120 |             Schema.MODEL: [
121 |                 IntegerNumericFeature('x1'),
122 |                 StringCategoricalFeature('x2', ['zero', 'one'])
123 |             ],
124 |             Schema.DERIVED: [],
125 |             Schema.OUTPUT: [
126 |                 IntegerNumericFeature('output')
127 |             ]
128 |         })
129 |         self.converter = DecisionTreeConverter(
130 |             estimator=self.est,
131 |             context=self.ctx,
132 |             mode=ModelMode.REGRESSION
133 |         )
134 | 
135 |     def test_transform(self):
136 |         p = self.converter.pmml()
137 |         tm = p.TreeModel[0]
138 |         assert tm.MiningSchema is not None, 'Missing mining schema'
139 |         assert len(tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
140 |         assert tm.Node is not None, 'Missing root node'
141 |         assert tm.Node.recordCount == 4
142 |         assert tm.Node.True_ is not None, 'Root condition should always be True'
143 | 
144 | 
145 | class TestDecisionTreeClassificationJPMMLParity(TestCase, JPMMLClassificationTest):
146 | 
147 |     def setUp(self):
148 |         self.model = DecisionTreeClassifier(max_depth=2)
149 |         self.init_data()
150 |         self.converter = DecisionTreeConverter(
151 |             estimator=self.model,
152 |             context=self.ctx,
153 |             mode=ModelMode.CLASSIFICATION
154 |         )
155 | 
156 |     @property
157 |     def output(self):
158 |         return IntegerCategoricalFeature(name=TARGET_NAME, value_list=TARGET)
159 | 
160 | 
161 | class TestDecisionTreeRegressionJPMMLParity(TestCase, JPMMLRegressionTest):
162 | 
163 |     def setUp(self):
164 |         self.model = DecisionTreeRegressor()
165 |         self.init_data()
166 |         self.converter = DecisionTreeConverter(
167 |             estimator=self.model,
168 |             context=self.ctx,
169 |             mode=ModelMode.REGRESSION
170 |         )
171 | 


--------------------------------------------------------------------------------
/sklearn_pmml/convert/features.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | import pandas as pd
  3 | 
  4 | 
  5 | class FeatureOpType(Enum):
  6 |     CATEGORICAL = 'categorical'
  7 |     CONTINUOUS = 'continuous'
  8 | 
  9 | 
 10 | class FeatureType(Enum):
 11 |     DOUBLE = 'double'
 12 |     INT = 'integer'
 13 |     STRING = 'string'
 14 | 
 15 | 
 16 | class InvalidValueTreatment(Enum):
 17 |     AS_IS = 'asIs'
 18 | 
 19 | 
 20 | class Feature(object):
 21 |     def __init__(self, name, namespace='', invalid_value_treatment=InvalidValueTreatment.AS_IS):
 22 |         """
 23 |         Create a new feature
 24 |         :type name: str
 25 |         :type namespace: str
 26 |         :type invalid_value_treatment: InvalidValueTreatment
 27 |         """
 28 |         self._name = str(name)
 29 |         self._namespace = str(namespace)
 30 |         self._invalid_value_treatment = invalid_value_treatment
 31 | 
 32 |     @property
 33 |     def name(self):
 34 |         """
 35 |         :rtype: str
 36 |         """
 37 |         return self._name
 38 | 
 39 |     @property
 40 |     def namespace(self):
 41 |         """
 42 |         :rtype: str
 43 |         """
 44 |         return self._namespace
 45 | 
 46 |     @property
 47 |     def full_name(self):
 48 |         """
 49 |         :rtype: str
 50 |         """
 51 |         if self._namespace:
 52 |             return '{}.{}'.format(self._namespace, self.name)
 53 |         else:
 54 |             return self.name
 55 | 
 56 |     @property
 57 |     def invalid_value_treatment(self):
 58 |         return self._invalid_value_treatment
 59 | 
 60 |     @property
 61 |     def optype(self):
 62 |         """
 63 |         :rtype: FeatureOpType
 64 |         """
 65 |         raise NotImplementedError()
 66 | 
 67 |     @property
 68 |     def data_type(self):
 69 |         """
 70 |         :rtype: FeatureType
 71 |         """
 72 |         raise NotImplementedError()
 73 | 
 74 |     def from_number(self, value):
 75 |         raise NotImplementedError()
 76 | 
 77 |     def __str__(self):
 78 |         return self.name
 79 | 
 80 |     def __repr__(self):
 81 |         return "{}#{}".format(self.name, self.__class__.__name__)
 82 | 
 83 | 
 84 | class NumericFeature(Feature):
 85 |     @property
 86 |     def optype(self):
 87 |         return FeatureOpType.CONTINUOUS
 88 | 
 89 |     def from_number(self, value):
 90 |         return float(value)
 91 | 
 92 | 
 93 | class RealNumericFeature(NumericFeature):
 94 |     @property
 95 |     def data_type(self):
 96 |         return FeatureType.DOUBLE
 97 | 
 98 | 
 99 | class IntegerNumericFeature(NumericFeature):
100 |     def from_number(self, value):
101 |         return int(value)
102 | 
103 |     @property
104 |     def data_type(self):
105 |         return FeatureType.INT
106 | 
107 | 
108 | class CategoricalFeature(Feature):
109 |     """
110 |     Represents a categorical feature. Categorical features are defined with optype 'categorical' and the corresponding
111 |     dataType. The corresponding derived field will have a double data type and will be defined as a MapValues PMML
112 |     element.
113 |     """
114 |     def __init__(self, name, value_list, namespace='', invalid_value_treatment=InvalidValueTreatment.AS_IS, map_missing_to=None):
115 |         super(CategoricalFeature, self).__init__(name, namespace, invalid_value_treatment)
116 |         self.map_missing_to = map_missing_to
117 |         self.value_list = value_list
118 | 
119 |     @property
120 |     def optype(self):
121 |         return FeatureOpType.CATEGORICAL
122 | 
123 |     def from_number(self, value):
124 |         assert value >= 0, 'Negative numbers can not be used as categorical indexes'
125 |         assert value < len(self.value_list), 'Unknown category index {}'.format(value)
126 |         return self.value_list[value]
127 | 
128 |     def to_number(self, value):
129 |         """
130 |         Transform categorical value to the ordinal. Raises ValueError if value is not in self.value_list
131 |         """
132 |         try:
133 |             return list(self.value_list).index(value)
134 |         except ValueError as e:
135 |             if self.map_missing_to:
136 |                 return self.map_missing_to
137 |             else:
138 |                 raise e
139 | 
140 | 
141 | class IntegerCategoricalFeature(CategoricalFeature):
142 |     @property
143 |     def data_type(self):
144 |         return FeatureType.INT
145 | 
146 | 
147 | class StringCategoricalFeature(CategoricalFeature):
148 |     @property
149 |     def data_type(self):
150 |         return FeatureType.STRING
151 | 
152 | 
153 | class DerivedFeature(NumericFeature):
154 |     """
155 |     This class represents a derived feature constructed from previously defined features.
156 |     The transformation parameter defines the recipe for creating a feature, and will be inserted into pmml.DerivedField
157 |     element for this feature.
158 |     Note, that the transformation only allows references to the already declare fields.
159 | 
160 |     For convenience, one can also pass the function that performs the transformation on the input data frame.
161 |     """
162 | 
163 |     def __init__(self, feature, transformation, function):
164 |         """
165 |         Construct a derived feature.
166 |         :param feature: declaration of feature (name, data_type and optype)
167 |         :type feature: Feature
168 |         :param transformation: definition of DerivedField content
169 |         :param function: transformation function
170 |         :type function: callable
171 |         """
172 |         super(DerivedFeature, self).__init__(
173 |             name=feature.name,
174 |             namespace=feature.namespace,
175 |             invalid_value_treatment=feature.invalid_value_treatment
176 |         )
177 |         assert isinstance(feature, NumericFeature), 'All derived features must be declared as NumericFeatures'
178 |         assert function is not None, 'Function can not be None'
179 |         assert callable(function), 'Function must be callable'
180 |         self.feature = feature
181 |         self.transformation = transformation
182 |         self.function = function
183 | 
184 |     def from_number(self, value):
185 |         return self.feature.from_number(value)
186 | 
187 |     @property
188 |     def data_type(self):
189 |         return self.feature.data_type
190 | 
191 |     @property
192 |     def optype(self):
193 |         return self.feature.optype
194 | 
195 |     def apply(self, df):
196 |         """
197 |         Calculate derived feature's values based on the values in the input data frame.
198 |         Note that the input data frame will not be affected by the transformation.
199 |         :param df: input data frame
200 |         :return: array with results
201 |         """
202 |         assert self.function is not None, 'Function was not provided'
203 |         assert isinstance(df, pd.DataFrame), 'Input should be a data frame'
204 |         return self.function(df.copy(deep=False))


--------------------------------------------------------------------------------
/sklearn_pmml/convert/test/jpmml_test.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'evancox'
  2 | 
  3 | import numpy as np
  4 | import hashlib
  5 | import os
  6 | import shutil
  7 | import subprocess
  8 | import logging
  9 | 
 10 | from sklearn_pmml.convert import TransformationContext, Schema
 11 | from sklearn_pmml.convert.features import *
 12 | 
 13 | 
 14 | TARGET = [0, 1, 2]
 15 | TARGET_NAME = 'y'
 16 | TEST_DIR = 'jpmml_test_data'
 17 | 
 18 | EPSILON = 0.00001
 19 | 
 20 | logging.basicConfig(format='%(asctime)s %(message)s')
 21 | 
 22 | 
 23 | # Adapted from http://stackoverflow.com/questions/1724693/find-a-file-in-python
 24 | def find_file_or_dir(name):
 25 |     for root, dirs, files in os.walk(os.path.dirname(__file__)):
 26 |         if name in files or name in dirs:
 27 |             return os.path.join(root, name)
 28 | 
 29 | 
 30 | class JPMMLTest():
 31 |     USE_VERIFICATION = True
 32 |     """
 33 |     If true, the PMML will be generated with the ModelVerification section that allows PMML interpreter to check the
 34 |     correctness of deserialized model.
 35 |     """
 36 | 
 37 |     def __init__(self):
 38 |         self.x = None
 39 |         self.y = None
 40 |         self.ctx = None
 41 |         self.converter = None
 42 | 
 43 |     @staticmethod
 44 |     def can_run():
 45 |         try:
 46 |             subprocess.check_call(['java', '-version'])
 47 |         except OSError:
 48 |             logging.warning("Couldn't find java to run JPMML integration tests")
 49 |             return False
 50 | 
 51 |         try:
 52 |             subprocess.check_call(['mvn', '-version'])
 53 |         except OSError:
 54 |             logging.warning("Couldn't find maven to run JPMML integration tests")
 55 |             return False
 56 | 
 57 |         return True
 58 | 
 59 |     @staticmethod
 60 |     def init_jpmml():
 61 |         result = subprocess.call(['mvn', '-q', 'clean', 'package', '-f', find_file_or_dir('jpmml-csv-evaluator')])
 62 |         assert result == 0, "Unable to package jpmml csv evaluator"
 63 |         return True
 64 | 
 65 |     # taken from http://stackoverflow.com/questions/18159221/remove-namespace-and-prefix-from-xml-in-python-using-lxml
 66 |     @staticmethod
 67 |     def remove_namespace(doc, namespace):
 68 |         ns = u'{%s}' % namespace
 69 |         nsl = len(ns)
 70 |         for elem in doc.getiterator():
 71 |             if elem.tag.startswith(ns):
 72 |                 elem.tag = elem.tag[nsl:]
 73 | 
 74 |     @property
 75 |     def model(self):
 76 |         if self._model is None:
 77 |             raise NotImplementedError()
 78 |         return self._model
 79 | 
 80 |     @model.setter
 81 |     def model(self, model):
 82 |         self._model = model
 83 | 
 84 |     @property
 85 |     def output(self):
 86 |         raise NotImplementedError()
 87 | 
 88 |     def setup_jpmml_test(self):
 89 |         if not JPMMLTest.can_run():
 90 |             logging.warning("Can't run regression test, java and/or maven not installed")
 91 |             return None
 92 | 
 93 |         if os.path.exists(TEST_DIR):
 94 |             shutil.rmtree(TEST_DIR)
 95 |         os.makedirs(TEST_DIR)
 96 | 
 97 |         if self.USE_VERIFICATION:
 98 |             verification_data = self.x.copy()
 99 | 
100 |             xml = self.converter.pmml(verification_data=[
101 |                 dict((str(_[0]), _[1]) for _ in dict(row).items())
102 |                 for idx, row in verification_data[:10].iterrows()
103 |             ]).toDOM().toprettyxml()
104 |         else:
105 |             xml = self.converter.pmml().toDOM().toprettyxml()
106 | 
107 |         pmml_hash = hashlib.md5(xml.encode('utf-8')).hexdigest()
108 |         pmml_file_path = os.path.join(TEST_DIR, pmml_hash + '.pmml')
109 |         with open(pmml_file_path, 'w') as pmml_file:
110 |             pmml_file.write(xml)
111 | 
112 |         input_file_path = os.path.join(TEST_DIR, pmml_hash + '_input.csv')
113 |         self.x.to_csv(input_file_path, index=False)
114 |         target_file_path = os.path.join(TEST_DIR, pmml_hash + '_output.csv')
115 | 
116 |         java_args = ' '.join(map("'{}'".format, [
117 |             os.path.abspath(pmml_file_path),
118 |             os.path.abspath(input_file_path),
119 |             os.path.abspath(target_file_path)
120 |         ]))
121 |         result = subprocess.call([
122 |             'mvn', 'package', 'exec:java', '-q', '-e',
123 |             '-f', find_file_or_dir('jpmml-csv-evaluator'),
124 |             '-Dexec.mainClass=sklearn.pmml.jpmml.JPMMLCSVEvaluator',
125 |             '-Dexec.args=' + java_args
126 |         ])
127 |         if result:
128 |             print(xml)
129 |         assert result == 0, 'Executing JPMML evaluator returned non zero result'
130 |         return pd.read_csv(target_file_path)
131 | 
132 |     def init_data(self):
133 |         np.random.seed(12363)
134 |         self.x = pd.DataFrame(np.random.randn(500, 4), columns=['col_' + str(_) for _ in range(4)])
135 |         self.y = pd.DataFrame({TARGET_NAME: [np.random.choice([0, 1, 2]) for _ in range(self.x.shape[0])]})
136 |         self._model.fit(self.x, np.ravel(self.y))
137 |         self.ctx = TransformationContext()
138 |         self.ctx.schemas[Schema.INPUT] = [RealNumericFeature(col) for col in list(self.x)]
139 |         self.ctx.schemas[Schema.DERIVED] = []
140 |         self.ctx.schemas[Schema.MODEL] = [RealNumericFeature(col) for col in list(self.x)]
141 |         self.ctx.schemas[Schema.OUTPUT] = [self.output]
142 | 
143 |     def init_data_one_label(self):
144 |         np.random.seed(12363)
145 |         self.x = pd.DataFrame(np.random.randn(500, 4), columns=['col_' + str(_) for _ in range(4)])
146 |         self.y = pd.DataFrame({TARGET_NAME: [np.random.choice([0, 1]) for _ in range(self.x.shape[0])]})
147 |         self._model.fit(self.x, np.ravel(self.y))
148 |         self.ctx = TransformationContext()
149 |         self.ctx.schemas[Schema.INPUT] = [RealNumericFeature(col) for col in list(self.x)]
150 |         self.ctx.schemas[Schema.DERIVED] = []
151 |         self.ctx.schemas[Schema.MODEL] = [RealNumericFeature(col) for col in list(self.x)]
152 |         self.ctx.schemas[Schema.OUTPUT] = [self.output]
153 | 
154 | 
155 | class JPMMLRegressionTest(JPMMLTest):
156 |     @property
157 |     def output(self):
158 |         return IntegerNumericFeature(name=TARGET_NAME)
159 | 
160 |     def test_regression(self):
161 |         jpmml_predictions = self.setup_jpmml_test()
162 |         if jpmml_predictions is None:
163 |             return
164 | 
165 |         sklearn_predictions = pd.DataFrame({TARGET_NAME: self.converter.estimator.predict(self.x)})
166 |         diff = jpmml_predictions[TARGET_NAME] - sklearn_predictions[TARGET_NAME]
167 |         assert np.all(np.abs(diff) < EPSILON)
168 | 
169 | 
170 | class JPMMLClassificationTest(JPMMLTest):
171 |     @property
172 |     def output(self):
173 |         return StringCategoricalFeature(name=TARGET_NAME, value_list=["negative", "positive"])
174 | 
175 |     def test_classification(self):
176 | 
177 |         jpmml_predictions = self.setup_jpmml_test()
178 |         if jpmml_predictions is None:
179 |             return
180 | 
181 |         raw_sklearn_predictions = self.converter.estimator.predict_proba(self.x)
182 |         prob_outputs = [self.output.name + '.' + str(clazz) for clazz in self.output.value_list]
183 |         sklearn_predictions = pd.DataFrame(columns=prob_outputs)
184 |         for index, prediction in enumerate(raw_sklearn_predictions):
185 |             sklearn_predictions.loc[index] = list(prediction)
186 | 
187 |         np.testing.assert_almost_equal(
188 |             np.array(jpmml_predictions[list(sklearn_predictions.columns)]),
189 |             sklearn_predictions.values,
190 |             err_msg='Probability mismatch'
191 |         )
192 |         np.testing.assert_equal(
193 |             np.array(self.output.value_list)[self.converter.estimator.predict(self.x)],
194 |             jpmml_predictions[self.output.name].values,
195 |             err_msg='Labels mismatch'
196 |         )


--------------------------------------------------------------------------------
/sklearn_pmml/convert/utils.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from sklearn_pmml import pmml
  3 | from sklearn_pmml.convert.features import Feature, FeatureType
  4 | from pyxb.utils.domutils import BindingDOMSupport as bds
  5 | import numpy as np
  6 | estimator_to_converter = {}
  7 | 
  8 | 
  9 | def find_converter(estimator):
 10 |     # TODO: do the search here
 11 |     return estimator_to_converter.get(estimator.__class__, None)
 12 | 
 13 | 
 14 | def pmml_row(**columns):
 15 |     """
 16 |     Creates pmml.row element with columns
 17 |     :param columns: key-value pairs to be inserted into the row
 18 |     :return: pmml.row element
 19 |     """
 20 |     r = pmml.row()
 21 |     for name, value in columns.items():
 22 |         el = bds().createChildElement(name)
 23 |         bds().appendTextChild(value, el)
 24 |         r.append(el)
 25 |     return r
 26 | 
 27 | 
 28 | class DerivedFeatureTransformations(object):
 29 |     """
 30 |     A helper for building Derived Feature transformations. Creates both transformation and the DerivedFeature content.
 31 |     Typical usage of the methods:
 32 | 
 33 |     DerivedFeature(
 34 |             RealNumericFeature('my_derived_feature'),
 35 |             **DerivedFeatureTransformations.field_in_list('input_feature', ['A', 'B', 'C'])
 36 |     )
 37 |     """
 38 |     TRANSFORMATION = 'transformation'
 39 |     FUNCTION = 'function'
 40 | 
 41 |     @staticmethod
 42 |     def field_in_list(field, values):
 43 |         mv = pmml.MapValues(outputColumn='output', defaultValue=0)
 44 |         mv.append(pmml.FieldColumnPair(field=field, column='input'))
 45 |         it = pmml.InlineTable()
 46 |         for v in values:
 47 |             it.append(pmml_row(input=v, output=1))
 48 |         mv.append(it)
 49 |         return {
 50 |             DerivedFeatureTransformations.TRANSFORMATION: mv,
 51 |             DerivedFeatureTransformations.FUNCTION: lambda df: reduce(np.logical_or, [df[field] == _ for _ in values])
 52 |         }
 53 | 
 54 |     @staticmethod
 55 |     def field_not_in_list(field, values):
 56 |         mv = pmml.MapValues(outputColumn='output', defaultValue=1)
 57 |         mv.append(pmml.FieldColumnPair(field=field, column='input'))
 58 |         it = pmml.InlineTable()
 59 |         for v in values:
 60 |             it.append(pmml_row(input=v, output=0))
 61 |         mv.append(it)
 62 |         return {
 63 |             DerivedFeatureTransformations.TRANSFORMATION: mv,
 64 |             DerivedFeatureTransformations.FUNCTION: lambda df: reduce(np.logical_and, [df[field] != _ for _ in values])
 65 |         }
 66 | 
 67 |     @staticmethod
 68 |     def map_values(field, value_map, default_value):
 69 |         mv = pmml.MapValues(outputColumn='output', default_value=default_value)
 70 |         mv.append(pmml.FieldColumnPair(field=field, column='input'))
 71 |         it = pmml.InlineTable()
 72 |         for k, v in value_map.items():
 73 |             it.append(pmml_row(input=k, output=v))
 74 |         mv.append(it)
 75 |         return {
 76 |             DerivedFeatureTransformations.TRANSFORMATION: mv,
 77 |             DerivedFeatureTransformations.FUNCTION:
 78 |                 lambda df: np.vectorize(partial(value_map.get, default_value))(df[field])
 79 |         }
 80 | 
 81 |     @staticmethod
 82 |     def arithmetics(tree):
 83 |         """
 84 |         Takes an arithmetic operations tree (Lisp-styled) as an input
 85 |         """
 86 | 
 87 |         def basic_function(func_name, args):
 88 |             expr = pmml.Apply(function=func_name)
 89 |             for a in args:
 90 |                 expr.append(a)
 91 |             return expr
 92 | 
 93 |         def mod_function(args):
 94 |             expr = pmml.Apply(function='-')
 95 |             expr.append(args[0])
 96 |             mul = pmml.Apply(function='*')
 97 |             mul.append(args[1])
 98 |             floor = pmml.Apply(function='floor')
 99 |             mul.append(floor)
100 |             div = pmml.Apply(function='/')
101 |             floor.append(div)
102 |             div.append(args[0])
103 |             div.append(args[1])
104 |             return expr
105 | 
106 |         # TODO: test me
107 |         def greedy_evaluation(node):
108 |             if isinstance(node, str):
109 |                 # field reference
110 |                 return (lambda df: df[node]), pmml.FieldRef(field=node)
111 |             elif isinstance(node, (tuple, list)):
112 |                 # eval arguments
113 |                 args = map(greedy_evaluation, node[1:])
114 |                 functions = {
115 |                     '*': lambda df: np.multiply(*[_[0](df) for _ in args]),
116 |                     '-': lambda df: np.subtract(*[_[0](df) for _ in args]),
117 |                     '+': lambda df: np.add(*[_[0](df) for _ in args]),
118 |                     '/': lambda df: np.divide(*[_[0](df) for _ in args]),
119 |                     '%': lambda df: np.mod(*[_[0](df) for _ in args]),
120 |                 }
121 |                 assert isinstance(node[0], str), 'First element should be a code of operation'
122 |                 assert node[0] in functions, 'Unknown function code {}. Supported codes: {}'.format(node[0], functions.keys())
123 |                 expr = {
124 |                     '*': partial(basic_function, '*'),
125 |                     '-': partial(basic_function, '-'),
126 |                     '+': partial(basic_function, '+'),
127 |                     '/': partial(basic_function, '/'),
128 |                     '%': mod_function
129 |                 }.get(node[0])([a[1] for a in args])
130 |                 func = functions[node[0]]
131 |                 return func, expr
132 |             else:
133 |                 # numeric terminal
134 |                 return lambda df: node, pmml.Constant(node, dataType='double')
135 | 
136 |         function, transformation = greedy_evaluation(tree)
137 | 
138 |         return {
139 |             DerivedFeatureTransformations.TRANSFORMATION: transformation,
140 |             DerivedFeatureTransformations.FUNCTION: function
141 |         }
142 | 
143 |     @staticmethod
144 |     def replace_value(field, original, replacement):
145 |         if original is not None:
146 |             transformation = pmml.Apply(function='if')
147 |             cond = pmml.Apply(function='equals')
148 |             cond.append(pmml.FieldRef(field=field))
149 |             cond.append(pmml.Constant(original))
150 |             transformation.append(pmml.Constant(replacement))
151 |             transformation.append(pmml.FieldRef(field=field))
152 | 
153 |             return {
154 |                 DerivedFeatureTransformations.TRANSFORMATION: transformation,
155 |                 DerivedFeatureTransformations.FUNCTION: lambda df: np.where(df[field] == original, replacement, df[field])
156 |             }
157 |         else:
158 |             transformation = pmml.Apply(function='+', mapMissingTo=replacement)
159 |             transformation.append(pmml.Constant(0))
160 |             transformation.append(pmml.FieldRef(field=field))
161 |             return {
162 |                 DerivedFeatureTransformations.TRANSFORMATION: transformation,
163 |                 DerivedFeatureTransformations.FUNCTION: lambda df: np.where(df[field].isnull(), replacement, df[field])
164 |             }
165 | 
166 | 
167 | def assert_equal(feature, expected, actual):
168 |     """
169 |     Compare expected and actual values for the feature and raise an exception if they are not equal
170 |     :type feature: Feature
171 |     :type expected: np.array
172 |     :type actual: np.array
173 |     """
174 |     # if the feature has the transformation included and the result data is passed, we can compare them
175 |     if feature.data_type == FeatureType.STRING:
176 |         assert all(actual == expected), \
177 |             'Some passed values of "{}" don\'t match the evaluated results'.format(feature.full_name)
178 |     else:
179 |         np.testing.assert_almost_equal(
180 |             actual,
181 |             expected,
182 |             err_msg='Some passed values of "{}" don\'t match the evaluated results'.format(feature.full_name)
183 |         )


--------------------------------------------------------------------------------
/sklearn_pmml/convert/tree.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | from sklearn.base import ClassifierMixin, RegressorMixin
  4 | from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
  5 | from sklearn.tree._tree import Tree, TREE_LEAF
  6 | import numpy as np
  7 | 
  8 | from sklearn_pmml.convert.model import EstimatorConverter, ModelMode, Schema
  9 | from sklearn_pmml.convert.features import Feature, CategoricalFeature, NumericFeature
 10 | import sklearn_pmml.pmml as pmml
 11 | from sklearn_pmml.convert.utils import estimator_to_converter
 12 | 
 13 | 
 14 | class DecisionTreeConverter(EstimatorConverter):
 15 |     SPLIT_BINARY = 'binarySplit'
 16 |     OPERATOR_LE = 'lessOrEqual'
 17 |     NODE_ROOT = 0
 18 |     OUTPUT_PROBABILITY = 'proba'
 19 |     OUTPUT_LABEL = 'proba'
 20 | 
 21 |     def __init__(self, estimator, context, mode):
 22 |         super(DecisionTreeConverter, self).__init__(estimator, context, mode)
 23 | 
 24 |         assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one-label trees are supported'
 25 |         assert hasattr(estimator, 'tree_'), 'Estimator has no tree_ attribute'
 26 |         if mode == ModelMode.CLASSIFICATION:
 27 |             if isinstance(self.context.schemas[Schema.OUTPUT][0], CategoricalFeature):
 28 |                 self.prediction_output = self.OUTPUT_LABEL
 29 |             else:
 30 |                 self.prediction_output = self.OUTPUT_PROBABILITY
 31 |             assert isinstance(self.estimator, ClassifierMixin), \
 32 |                 'Only a classifier can be serialized in classification mode'
 33 |         if mode == ModelMode.REGRESSION:
 34 |             assert isinstance(self.context.schemas[Schema.OUTPUT][0], NumericFeature), \
 35 |                 'Only a numeric feature can be an output of regression'
 36 |             assert isinstance(self.estimator, RegressorMixin), \
 37 |                 'Only a regressor can be serialized in regression mode'
 38 |         assert estimator.tree_.value.shape[1] == len(self.context.schemas[Schema.OUTPUT]), \
 39 |             'Tree outputs {} results while the schema specifies {} output fields'.format(
 40 |                 estimator.tree_.value.shape[1], len(self.context.schemas[Schema.OUTPUT]))
 41 | 
 42 |         # create hidden variables for each categorical output
 43 |         # TODO: this code is copied from the ClassifierConverter. To make things right, we need an abstract tree
 44 |         # TODO: converter and subclasses for classifier and regression converters
 45 |         internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
 46 |         self.context.schemas[Schema.INTERNAL] = internal_schema
 47 | 
 48 |     def _model(self):
 49 |         assert Schema.NUMERIC in self.context.schemas, \
 50 |             'Either build transformation dictionary or provide {} schema in context'.format(Schema.NUMERIC)
 51 |         tm = pmml.TreeModel(functionName=self.model_function.value, splitCharacteristic=self.SPLIT_BINARY)
 52 |         tm.append(self.mining_schema())
 53 |         tm.append(self.output())
 54 |         tm.Node = self._transform_node(
 55 |             self.estimator.tree_,
 56 |             self.NODE_ROOT,
 57 |             self.context.schemas[Schema.NUMERIC],
 58 |             self.context.schemas[Schema.OUTPUT][0]
 59 |         )
 60 |         return tm
 61 | 
 62 |     def model(self, verification_data=None):
 63 |         assert Schema.NUMERIC in self.context.schemas, \
 64 |             'Either build transformation dictionary or provide {} schema in context'.format(Schema.NUMERIC)
 65 |         tm = self._model()
 66 |         if verification_data is not None:
 67 |             tm.ModelVerification = self.model_verification(verification_data)
 68 |         return tm
 69 | 
 70 |     def _transform_node(self, tree, index, input_schema, output_feature, enter_condition=None):
 71 |         """
 72 |         Recursive mapping of sklearn Tree into PMML Node tree
 73 |         :return: Node element
 74 |         """
 75 |         assert isinstance(tree, Tree)
 76 |         assert isinstance(input_schema, list)
 77 |         assert isinstance(output_feature, Feature)
 78 | 
 79 |         node = pmml.Node()
 80 |         if enter_condition is None:
 81 |             node.append(pmml.True_())
 82 |         else:
 83 |             node.append(enter_condition)
 84 |         node.recordCount = tree.n_node_samples[index]
 85 | 
 86 |         if tree.children_left[index] != TREE_LEAF:
 87 |             feature = input_schema[tree.feature[index]]
 88 |             assert isinstance(feature, Feature)
 89 |             left_child = self._transform_node(
 90 |                 tree,
 91 |                 tree.children_left[index],
 92 |                 input_schema,
 93 |                 output_feature,
 94 |                 enter_condition=pmml.SimplePredicate(
 95 |                     field=feature.full_name, operator=DecisionTreeConverter.OPERATOR_LE, value_=tree.threshold[index]
 96 |                 )
 97 |             )
 98 |             right_child = self._transform_node(tree, tree.children_right[index], input_schema, output_feature)
 99 |             if self.model_function == ModelMode.CLASSIFICATION:
100 |                 score, score_prob = None, 0.0
101 |                 for i in range(len(tree.value[index][0])):
102 |                     left_score = left_child.ScoreDistribution[i]
103 |                     right_score = right_child.ScoreDistribution[i]
104 |                     prob = float(left_score.recordCount + right_score.recordCount) / node.recordCount
105 |                     node.append(pmml.ScoreDistribution(
106 |                         recordCount=left_score.recordCount + right_score.recordCount,
107 |                         value_=left_score.value_,
108 |                         confidence=prob
109 |                     ))
110 |                     if score_prob < prob:
111 |                         score, score_prob = left_score.value_, prob
112 |                 node.score = score
113 |             node.append(left_child).append(right_child)
114 | 
115 |         else:
116 |             node_value = np.array(tree.value[index][0])
117 |             if self.model_function == ModelMode.CLASSIFICATION:
118 |                 probs = node_value / float(node_value.sum())
119 |                 for i in range(len(probs)):
120 |                     node.append(pmml.ScoreDistribution(
121 |                         confidence=probs[i],
122 |                         recordCount=node_value[i],
123 |                         value_=output_feature.from_number(i)
124 |                     ))
125 |                 node.score = output_feature.from_number(probs.argmax())
126 |             elif self.model_function == ModelMode.REGRESSION:
127 |                 node.score = node_value[0]
128 | 
129 |         return node
130 | 
131 |     def output(self):
132 |         """
133 |         Output section of PMML contains all model outputs.
134 |         Classification tree output contains output variable as a label,
135 |         and <variable>#<value> as a probability of a value for a variable
136 |         :return: pmml.Output
137 |         """
138 |         output = pmml.Output()
139 | 
140 |         # the response variables
141 |         for feature in self.context.schemas[Schema.OUTPUT]:
142 |             output_field = pmml.OutputField(
143 |                 name=Schema.OUTPUT.extract_feature_name(feature),
144 |                 feature='predictedValue',
145 |                 optype=feature.optype.value,
146 |                 dataType=feature.data_type.value
147 |             )
148 |             output.append(output_field)
149 | 
150 |         # the probabilities for categories; should only be populated for classification jobs
151 |         for feature in self.context.schemas[Schema.CATEGORIES]:
152 |             output_field = pmml.OutputField(
153 |                 name=Schema.CATEGORIES.extract_feature_name(feature),
154 |                 optype=feature.optype.value,
155 |                 dataType=feature.data_type.value,
156 |                 feature='probability',
157 |                 targetField=Schema.INTERNAL.extract_feature_name(feature.namespace),
158 |                 value_=feature.name
159 |             )
160 |             output.append(output_field)
161 | 
162 |         return output
163 | 
164 | 
165 | estimator_to_converter[DecisionTreeClassifier] = partial(
166 |     DecisionTreeConverter, mode=ModelMode.CLASSIFICATION
167 | )
168 | estimator_to_converter[DecisionTreeRegressor] = partial(
169 |     DecisionTreeConverter, mode=ModelMode.REGRESSION
170 | )


--------------------------------------------------------------------------------
/sklearn_pmml/convert/gbrt.py:
--------------------------------------------------------------------------------
  1 | from copy import copy
  2 | from sklearn.ensemble import GradientBoostingClassifier
  3 | 
  4 | from sklearn.ensemble.gradient_boosting import LogOddsEstimator
  5 | 
  6 | from sklearn_pmml.convert.features import *
  7 | from sklearn_pmml.convert.model import EstimatorConverter, ClassifierConverter, ModelMode, RegressionConverter, Schema, \
  8 |     TransformationContext
  9 | from sklearn_pmml.convert.tree import DecisionTreeConverter
 10 | import sklearn_pmml.pmml as pmml
 11 | from sklearn_pmml.convert.utils import estimator_to_converter, find_converter
 12 | 
 13 | 
 14 | class LogOddsEstimatorConverter(RegressionConverter):
 15 |     REGRESSION_LINEAR = "linearRegression"
 16 | 
 17 |     def __init__(self, estimator, context):
 18 |         super(LogOddsEstimatorConverter, self).__init__(estimator, context)
 19 | 
 20 |         assert isinstance(estimator, LogOddsEstimator), 'This converter can only process LogOddsEstimator instances'
 21 | 
 22 |     def model(self, verification_data=None):
 23 |         rm = pmml.RegressionModel(functionName=self.model_function.value, algorithmName=self.REGRESSION_LINEAR)
 24 |         rm.append(self.mining_schema())
 25 |         rm.append(pmml.RegressionTable(intercept=self.estimator.prior))
 26 |         if verification_data is not None:
 27 |             rm.append(self.model_verification(verification_data))
 28 |         return rm
 29 | 
 30 | 
 31 | class GradientBoostingConverter(ClassifierConverter):
 32 |     """
 33 |     Converter for GradientBoostingClassifier model.
 34 | 
 35 |     NOTE: at the moment only binary one-label classification is supported.
 36 |     """
 37 |     SCHEMAS_IN_MINING_MODEL = {Schema.INPUT}
 38 | 
 39 |     def __init__(self, estimator, context):
 40 |         super(GradientBoostingConverter, self).__init__(estimator, context)
 41 | 
 42 |         assert isinstance(estimator, GradientBoostingClassifier), \
 43 |             'This converter can only process GradientBoostingClassifier instances'
 44 |         assert len(context.schemas[Schema.OUTPUT]) == 1, 'Only one-label classification is supported'
 45 |         assert not estimator.loss_.is_multi_class, 'Only one-label classification is supported'
 46 |         assert context.schemas[Schema.OUTPUT][0].optype == FeatureOpType.CATEGORICAL, \
 47 |             'Classification output must be categorical'
 48 |         assert len(context.schemas[Schema.OUTPUT][0].value_list) == 2, 'Only binary classifier is supported'
 49 |         assert find_converter(estimator.init_) is not None, 'Can not find a converter for {}'.format(estimator.init_)
 50 | 
 51 |     def model(self, verification_data=None):
 52 |         # The ensemble of regression models can only be a regression model. Surprise!
 53 |         mining_model = pmml.MiningModel(functionName=ModelMode.REGRESSION.value)
 54 |         mining_model.append(self.mining_schema())
 55 |         mining_model.append(self.output_transformation())
 56 |         mining_model.append(self.segmentation())
 57 |         if verification_data is not None:
 58 |             mining_model.append(self.model_verification(verification_data))
 59 |         return mining_model
 60 | 
 61 |     def output_transformation(self):
 62 |         """
 63 |         Build sigmoid output transformation:
 64 |         proba = 1 / (1 + exp(-(initial_estimate + weighted_sum(estimates))))
 65 |         :return: Output element
 66 |         """
 67 |         output = pmml.Output()
 68 | 
 69 |         # storing the raw prediction into internal::varname variable
 70 |         for f in self.context.schemas[Schema.INTERNAL]:
 71 |             output.append(pmml.OutputField(feature='predictedValue', name=Schema.INTERNAL.extract_feature_name(f)))
 72 | 
 73 |         # setting up a logistic transformation for the positive label
 74 |         positive_category = self.context.schemas[Schema.CATEGORIES][1]
 75 |         output_field = pmml.OutputField(
 76 |             dataType=positive_category.data_type.value,
 77 |             feature='transformedValue',
 78 |             name=Schema.CATEGORIES.extract_feature_name(positive_category),
 79 |             optype=positive_category.optype.value
 80 |         )
 81 |         neg = pmml.Apply(function='*')
 82 |         neg.append(pmml.FieldRef(field=Schema.INTERNAL.extract_feature_name(positive_category.namespace)))
 83 |         neg.append(pmml.Constant(
 84 |             # there is no notion of weighted sum in segment aggregation, so we used weighted average,
 85 |             # and now the result should be multiplied by total weight
 86 |             -(1 + self.estimator.n_estimators * self.estimator.learning_rate),
 87 |             dataType=FeatureType.DOUBLE.value
 88 |         ))
 89 |         exp = pmml.Apply(function='exp')
 90 |         exp.append(neg)
 91 |         plus = pmml.Apply(function='+')
 92 |         plus.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value))
 93 |         plus.append(exp)
 94 |         div = pmml.Apply(function='/')
 95 |         div.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value))
 96 |         div.append(plus)
 97 |         output_field.append(div)
 98 |         output.append(output_field)
 99 | 
100 |         # probability of negative label is 1 - positive_proba
101 |         negative_category = self.context.schemas[Schema.CATEGORIES][0]
102 |         output_field = pmml.OutputField(
103 |             dataType=negative_category.data_type.value,
104 |             feature='transformedValue',
105 |             name=Schema.CATEGORIES.extract_feature_name(negative_category),
106 |             optype=negative_category.optype.value
107 |         )
108 |         subtract = pmml.Apply(function='-')
109 |         subtract.append(pmml.Constant(1, dataType=FeatureType.DOUBLE.value))
110 |         subtract.append(pmml.FieldRef(field=Schema.CATEGORIES.extract_feature_name(positive_category)))
111 |         output_field.append(subtract)
112 |         output.append(output_field)
113 | 
114 |         # now we should define a label; we can look at the raw predicted output and compare it with 0
115 |         label = self.context.schemas[Schema.OUTPUT][0]
116 |         output_field = pmml.OutputField(
117 |             feature='transformedValue',
118 |             name=Schema.OUTPUT.extract_feature_name(label),
119 |             optype=label.optype.value,
120 |             dataType=label.data_type.value
121 |         )
122 |         discretize = pmml.Discretize(field=Schema.INTERNAL.extract_feature_name(label))
123 |         discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[0])
124 |         discretize_bin.append(pmml.Interval(closure="openOpen", rightMargin=0))
125 |         discretize.append(discretize_bin)
126 |         discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[1])
127 |         discretize_bin.append(pmml.Interval(closure="closedOpen", leftMargin=0))
128 |         discretize.append(discretize_bin)
129 |         output_field.append(discretize)
130 |         output.append(output_field)
131 | 
132 |         return output
133 | 
134 |     def segmentation(self):
135 |         """
136 |         Build a segmentation (sequence of estimators)
137 |         :return: Segmentation element
138 |         """
139 |         # there is no notion of weighted sum, so we should take weighted average and multiply result by total weight
140 |         # in output transformation
141 |         segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage")
142 | 
143 |         # build the context for the nested regression models by replacing output categorical feature
144 |         # with the continuous numeric feature
145 |         regression_context = TransformationContext(schemas=dict(self.context.schemas))
146 |         regression_context.schemas[Schema.OUTPUT] = [RealNumericFeature(
147 |             name=self.context.schemas[Schema.OUTPUT][0].name,
148 |             namespace=Schema.NUMERIC.namespace
149 |         )]
150 | 
151 |         # first, transform initial estimator
152 |         init_segment = pmml.Segment(weight=1)
153 |         init_segment.append(pmml.True_())
154 |         init_segment.append(find_converter(self.estimator.init_)(self.estimator.init_, regression_context).model())
155 |         segmentation.append(init_segment)
156 | 
157 |         for est in self.estimator.estimators_[:, 0]:
158 |             s = pmml.Segment(weight=self.estimator.learning_rate)
159 |             s.append(pmml.True_())
160 |             s.append(DecisionTreeConverter(est, regression_context, ModelMode.REGRESSION)._model())
161 |             segmentation.append(s)
162 | 
163 |         return segmentation
164 | 
165 | 
166 | estimator_to_converter[GradientBoostingClassifier] = GradientBoostingConverter
167 | estimator_to_converter[LogOddsEstimator] = LogOddsEstimatorConverter


--------------------------------------------------------------------------------
/examples/pmml/GradientBoostingClassifier.pmml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <ns1:PMML xmlns:ns1="http://www.dmg.org/PMML-4_2" version="4.2">
  3 |     <ns1:Header/>
  4 |     <ns1:DataDictionary>
  5 |         <ns1:DataField dataType="string" name="internal::y" optype="categorical">
  6 |             <ns1:Value value="negative"/>
  7 |             <ns1:Value value="positive"/>
  8 |         </ns1:DataField>
  9 |         <ns1:DataField dataType="string" name="y" optype="categorical">
 10 |             <ns1:Value value="negative"/>
 11 |             <ns1:Value value="positive"/>
 12 |         </ns1:DataField>
 13 |         <ns1:DataField dataType="double" name="col_0" optype="continuous"/>
 14 |         <ns1:DataField dataType="double" name="col_1" optype="continuous"/>
 15 |         <ns1:DataField dataType="double" name="col_2" optype="continuous"/>
 16 |         <ns1:DataField dataType="double" name="col_3" optype="continuous"/>
 17 |     </ns1:DataDictionary>
 18 |     <ns1:TransformationDictionary/>
 19 |     <ns1:MiningModel functionName="regression">
 20 |         <ns1:MiningSchema>
 21 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
 22 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
 23 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
 24 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
 25 |         </ns1:MiningSchema>
 26 |         <ns1:Output>
 27 |             <ns1:OutputField feature="predictedValue" name="internal::y"/>
 28 |             <ns1:OutputField dataType="double" feature="transformedValue" name="y::positive" optype="continuous">
 29 |                 <ns1:Apply function="/">
 30 |                     <ns1:Constant dataType="double">1.0</ns1:Constant>
 31 |                     <ns1:Apply function="+">
 32 |                         <ns1:Constant dataType="double">1.0</ns1:Constant>
 33 |                         <ns1:Apply function="exp">
 34 |                             <ns1:Apply function="*">
 35 |                                 <ns1:Constant dataType="double">-1.2</ns1:Constant>
 36 |                                 <ns1:FieldRef field="internal::y"/>
 37 |                             </ns1:Apply>
 38 |                         </ns1:Apply>
 39 |                     </ns1:Apply>
 40 |                 </ns1:Apply>
 41 |             </ns1:OutputField>
 42 |             <ns1:OutputField dataType="double" feature="transformedValue" name="y::negative" optype="continuous">
 43 |                 <ns1:Apply function="-">
 44 |                     <ns1:Constant dataType="double">1</ns1:Constant>
 45 |                     <ns1:FieldRef field="y::positive"/>
 46 |                 </ns1:Apply>
 47 |             </ns1:OutputField>
 48 |             <ns1:OutputField dataType="string" feature="transformedValue" name="y" optype="categorical">
 49 |                 <ns1:Discretize field="internal::y">
 50 |                     <ns1:DiscretizeBin binValue="negative">
 51 |                         <ns1:Interval closure="openOpen" rightMargin="0.0"/>
 52 |                     </ns1:DiscretizeBin>
 53 |                     <ns1:DiscretizeBin binValue="positive">
 54 |                         <ns1:Interval closure="closedOpen" leftMargin="0.0"/>
 55 |                     </ns1:DiscretizeBin>
 56 |                 </ns1:Discretize>
 57 |             </ns1:OutputField>
 58 |         </ns1:Output>
 59 |         <ns1:Segmentation multipleModelMethod="weightedAverage">
 60 |             <ns1:Segment weight="1.0">
 61 |                 <ns1:True/>
 62 |                 <ns1:RegressionModel algorithmName="linearRegression" functionName="regression">
 63 |                     <ns1:MiningSchema>
 64 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
 65 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
 66 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
 67 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
 68 |                         <ns1:MiningField name="internal::y" usageType="predicted"/>
 69 |                     </ns1:MiningSchema>
 70 |                     <ns1:RegressionTable intercept="0.104093891043"/>
 71 |                 </ns1:RegressionModel>
 72 |             </ns1:Segment>
 73 |             <ns1:Segment weight="0.1">
 74 |                 <ns1:True/>
 75 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
 76 |                     <ns1:MiningSchema>
 77 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
 78 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
 79 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
 80 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
 81 |                     </ns1:MiningSchema>
 82 |                     <ns1:Output>
 83 |                         <ns1:OutputField dataType="double" feature="predictedValue" name="('numeric', False, False)::y" optype="continuous"/>
 84 |                     </ns1:Output>
 85 |                     <ns1:Node recordCount="500.0">
 86 |                         <ns1:True/>
 87 |                         <ns1:Node recordCount="280.0">
 88 |                             <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.155709639192"/>
 89 |                             <ns1:Node recordCount="55.0" score="-1.01583773432">
 90 |                                 <ns1:SimplePredicate field="col_3" operator="lessOrEqual" value="-0.81821501255"/>
 91 |                             </ns1:Node>
 92 |                             <ns1:Node recordCount="225.0" score="-0.0775430096314">
 93 |                                 <ns1:True/>
 94 |                             </ns1:Node>
 95 |                         </ns1:Node>
 96 |                         <ns1:Node recordCount="220.0">
 97 |                             <ns1:True/>
 98 |                             <ns1:Node recordCount="78.0" score="0.97556099385">
 99 |                                 <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.628501057625"/>
100 |                             </ns1:Node>
101 |                             <ns1:Node recordCount="142.0" score="-0.0195458096157">
102 |                                 <ns1:True/>
103 |                             </ns1:Node>
104 |                         </ns1:Node>
105 |                     </ns1:Node>
106 |                 </ns1:TreeModel>
107 |             </ns1:Segment>
108 |             <ns1:Segment weight="0.1">
109 |                 <ns1:True/>
110 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
111 |                     <ns1:MiningSchema>
112 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
113 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
114 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
115 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
116 |                     </ns1:MiningSchema>
117 |                     <ns1:Output>
118 |                         <ns1:OutputField dataType="double" feature="predictedValue" name="('numeric', False, False)::y" optype="continuous"/>
119 |                     </ns1:Output>
120 |                     <ns1:Node recordCount="500.0">
121 |                         <ns1:True/>
122 |                         <ns1:Node recordCount="176.0">
123 |                             <ns1:SimplePredicate field="col_3" operator="lessOrEqual" value="-0.457421511412"/>
124 |                             <ns1:Node recordCount="152.0" score="-0.235672493053">
125 |                                 <ns1:SimplePredicate field="col_3" operator="lessOrEqual" value="-0.541861891747"/>
126 |                             </ns1:Node>
127 |                             <ns1:Node recordCount="24.0" score="-1.28228877166">
128 |                                 <ns1:True/>
129 |                             </ns1:Node>
130 |                         </ns1:Node>
131 |                         <ns1:Node recordCount="324.0">
132 |                             <ns1:True/>
133 |                             <ns1:Node recordCount="142.0" score="0.555457948501">
134 |                                 <ns1:SimplePredicate field="col_3" operator="lessOrEqual" value="0.346540868282"/>
135 |                             </ns1:Node>
136 |                             <ns1:Node recordCount="182.0" score="-0.0667138520138">
137 |                                 <ns1:True/>
138 |                             </ns1:Node>
139 |                         </ns1:Node>
140 |                     </ns1:Node>
141 |                 </ns1:TreeModel>
142 |             </ns1:Segment>
143 |         </ns1:Segmentation>
144 |     </ns1:MiningModel>
145 | </ns1:PMML>
146 | 


--------------------------------------------------------------------------------
/sklearn_pmml/test/data/gradient_boosting_classifier/document.pmml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <ns1:PMML xmlns:ns1="http://www.dmg.org/PMML-4_2" version="4.2">
  3 |     <ns1:Header/>
  4 |     <ns1:DataDictionary>
  5 |         <ns1:DataField dataType="integer" name="x1" optype="continuous"/>
  6 |         <ns1:DataField dataType="string" name="x2" optype="categorical">
  7 |             <ns1:Value value="zero"/>
  8 |             <ns1:Value value="one"/>
  9 |         </ns1:DataField>
 10 |         <ns1:DataField dataType="double" name="output" optype="continuous"/>
 11 |     </ns1:DataDictionary>
 12 |     <ns1:TransformationDictionary>
 13 |         <ns1:DerivedField dataType="double" name="numeric#x2" optype="continuous">
 14 |             <ns1:MapValues dataType="double" outputColumn="output">
 15 |                 <ns1:FieldColumnPair column="input" field="x2"/>
 16 |                 <ns1:InlineTable>
 17 |                     <ns1:row><input>zero</input><output>0</output></ns1:row>
 18 |                     <ns1:row><input>one</input><output>1</output></ns1:row>
 19 |                 </ns1:InlineTable>
 20 |             </ns1:MapValues>
 21 |         </ns1:DerivedField>
 22 |     </ns1:TransformationDictionary>
 23 |     <ns1:MiningModel functionName="regression">
 24 |         <ns1:MiningSchema>
 25 |             <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
 26 |             <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
 27 |             <ns1:MiningField name="output" usageType="predicted"/>
 28 |         </ns1:MiningSchema>
 29 |         <ns1:Output>
 30 |             <ns1:OutputField feature="predictedValue" name="predictedValue"/>
 31 |             <ns1:OutputField dataType="double" feature="transformedValue" name="output" optype="continuous">
 32 |                 <ns1:Apply function="/">
 33 |                     <ns1:Constant dataType="double">1.0</ns1:Constant>
 34 |                     <ns1:Apply function="+">
 35 |                         <ns1:Constant dataType="double">1.0</ns1:Constant>
 36 |                         <ns1:Apply function="exp">
 37 |                             <ns1:Apply function="*">
 38 |                                 <ns1:Constant dataType="double">-2.0</ns1:Constant>
 39 |                                 <ns1:FieldRef field="predictedValue"/>
 40 |                             </ns1:Apply>
 41 |                         </ns1:Apply>
 42 |                     </ns1:Apply>
 43 |                 </ns1:Apply>
 44 |             </ns1:OutputField>
 45 |         </ns1:Output>
 46 |         <ns1:Segmentation multipleModelMethod="weightedAverage">
 47 |             <ns1:Segment weight="1.0">
 48 |                 <ns1:True/>
 49 |                 <ns1:RegressionModel algorithmName="linearRegression" functionName="regression">
 50 |                     <ns1:MiningSchema>
 51 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
 52 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
 53 |                         <ns1:MiningField name="output" usageType="predicted"/>
 54 |                     </ns1:MiningSchema>
 55 |                     <ns1:RegressionTable intercept="1.09861228867"/>
 56 |                 </ns1:RegressionModel>
 57 |             </ns1:Segment>
 58 |             <ns1:Segment weight="0.1">
 59 |                 <ns1:True/>
 60 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
 61 |                     <ns1:MiningSchema>
 62 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
 63 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
 64 |                         <ns1:MiningField name="output" usageType="predicted"/>
 65 |                     </ns1:MiningSchema>
 66 |                     <ns1:Node recordCount="4.0">
 67 |                         <ns1:True/>
 68 |                         <ns1:Node recordCount="2.0">
 69 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
 70 |                             <ns1:Node recordCount="1.0" score="-4.0">
 71 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
 72 |                             </ns1:Node>
 73 |                             <ns1:Node recordCount="1.0" score="1.33333333333">
 74 |                                 <ns1:True/>
 75 |                             </ns1:Node>
 76 |                         </ns1:Node>
 77 |                         <ns1:Node recordCount="2.0" score="1.33333333333">
 78 |                             <ns1:True/>
 79 |                         </ns1:Node>
 80 |                     </ns1:Node>
 81 |                 </ns1:TreeModel>
 82 |             </ns1:Segment>
 83 |             <ns1:Segment weight="0.1">
 84 |                 <ns1:True/>
 85 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
 86 |                     <ns1:MiningSchema>
 87 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
 88 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
 89 |                         <ns1:MiningField name="output" usageType="predicted"/>
 90 |                     </ns1:MiningSchema>
 91 |                     <ns1:Node recordCount="4.0">
 92 |                         <ns1:True/>
 93 |                         <ns1:Node recordCount="2.0">
 94 |                             <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
 95 |                             <ns1:Node recordCount="1.0" score="-3.01096013811">
 96 |                                 <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
 97 |                             </ns1:Node>
 98 |                             <ns1:Node recordCount="1.0" score="1.29172443968">
 99 |                                 <ns1:True/>
100 |                             </ns1:Node>
101 |                         </ns1:Node>
102 |                         <ns1:Node recordCount="2.0" score="1.29172443968">
103 |                             <ns1:True/>
104 |                         </ns1:Node>
105 |                     </ns1:Node>
106 |                 </ns1:TreeModel>
107 |             </ns1:Segment>
108 |             <ns1:Segment weight="0.1">
109 |                 <ns1:True/>
110 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
111 |                     <ns1:MiningSchema>
112 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
113 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
114 |                         <ns1:MiningField name="output" usageType="predicted"/>
115 |                     </ns1:MiningSchema>
116 |                     <ns1:Node recordCount="4.0">
117 |                         <ns1:True/>
118 |                         <ns1:Node recordCount="2.0">
119 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
120 |                             <ns1:Node recordCount="1.0" score="-2.48812401278">
121 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
122 |                             </ns1:Node>
123 |                             <ns1:Node recordCount="1.0" score="1.25637397364">
124 |                                 <ns1:True/>
125 |                             </ns1:Node>
126 |                         </ns1:Node>
127 |                         <ns1:Node recordCount="2.0" score="1.25637397364">
128 |                             <ns1:True/>
129 |                         </ns1:Node>
130 |                     </ns1:Node>
131 |                 </ns1:TreeModel>
132 |             </ns1:Segment>
133 |             <ns1:Segment weight="0.1">
134 |                 <ns1:True/>
135 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
136 |                     <ns1:MiningSchema>
137 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
138 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
139 |                         <ns1:MiningField name="output" usageType="predicted"/>
140 |                     </ns1:MiningSchema>
141 |                     <ns1:Node recordCount="4.0">
142 |                         <ns1:True/>
143 |                         <ns1:Node recordCount="2.0">
144 |                             <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
145 |                             <ns1:Node recordCount="1.0" score="-2.16032933416">
146 |                                 <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
147 |                             </ns1:Node>
148 |                             <ns1:Node recordCount="1.0" score="1.22610507292">
149 |                                 <ns1:True/>
150 |                             </ns1:Node>
151 |                         </ns1:Node>
152 |                         <ns1:Node recordCount="2.0" score="1.22610507292">
153 |                             <ns1:True/>
154 |                         </ns1:Node>
155 |                     </ns1:Node>
156 |                 </ns1:TreeModel>
157 |             </ns1:Segment>
158 |             <ns1:Segment weight="0.1">
159 |                 <ns1:True/>
160 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
161 |                     <ns1:MiningSchema>
162 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
163 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
164 |                         <ns1:MiningField name="output" usageType="predicted"/>
165 |                     </ns1:MiningSchema>
166 |                     <ns1:Node recordCount="4.0">
167 |                         <ns1:True/>
168 |                         <ns1:Node recordCount="2.0">
169 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
170 |                             <ns1:Node recordCount="1.0" score="-1.93488751678">
171 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
172 |                             </ns1:Node>
173 |                             <ns1:Node recordCount="1.0" score="1.20001438888">
174 |                                 <ns1:True/>
175 |                             </ns1:Node>
176 |                         </ns1:Node>
177 |                         <ns1:Node recordCount="2.0" score="1.20001438888">
178 |                             <ns1:True/>
179 |                         </ns1:Node>
180 |                     </ns1:Node>
181 |                 </ns1:TreeModel>
182 |             </ns1:Segment>
183 |             <ns1:Segment weight="0.1">
184 |                 <ns1:True/>
185 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
186 |                     <ns1:MiningSchema>
187 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
188 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
189 |                         <ns1:MiningField name="output" usageType="predicted"/>
190 |                     </ns1:MiningSchema>
191 |                     <ns1:Node recordCount="4.0">
192 |                         <ns1:True/>
193 |                         <ns1:Node recordCount="2.0">
194 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
195 |                             <ns1:Node recordCount="1.0" score="-1.77042126871">
196 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
197 |                             </ns1:Node>
198 |                             <ns1:Node recordCount="1.0" score="1.17739659388">
199 |                                 <ns1:True/>
200 |                             </ns1:Node>
201 |                         </ns1:Node>
202 |                         <ns1:Node recordCount="2.0" score="1.17739659388">
203 |                             <ns1:True/>
204 |                         </ns1:Node>
205 |                     </ns1:Node>
206 |                 </ns1:TreeModel>
207 |             </ns1:Segment>
208 |             <ns1:Segment weight="0.1">
209 |                 <ns1:True/>
210 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
211 |                     <ns1:MiningSchema>
212 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
213 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
214 |                         <ns1:MiningField name="output" usageType="predicted"/>
215 |                     </ns1:MiningSchema>
216 |                     <ns1:Node recordCount="4.0">
217 |                         <ns1:True/>
218 |                         <ns1:Node recordCount="2.0">
219 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
220 |                             <ns1:Node recordCount="1.0" score="-1.64541617456">
221 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
222 |                             </ns1:Node>
223 |                             <ns1:Node recordCount="1.0" score="1.1576927012">
224 |                                 <ns1:True/>
225 |                             </ns1:Node>
226 |                         </ns1:Node>
227 |                         <ns1:Node recordCount="2.0" score="1.1576927012">
228 |                             <ns1:True/>
229 |                         </ns1:Node>
230 |                     </ns1:Node>
231 |                 </ns1:TreeModel>
232 |             </ns1:Segment>
233 |             <ns1:Segment weight="0.1">
234 |                 <ns1:True/>
235 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
236 |                     <ns1:MiningSchema>
237 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
238 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
239 |                         <ns1:MiningField name="output" usageType="predicted"/>
240 |                     </ns1:MiningSchema>
241 |                     <ns1:Node recordCount="4.0">
242 |                         <ns1:True/>
243 |                         <ns1:Node recordCount="2.0">
244 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
245 |                             <ns1:Node recordCount="1.0" score="-1.54749521566">
246 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
247 |                             </ns1:Node>
248 |                             <ns1:Node recordCount="1.0" score="1.14045384647">
249 |                                 <ns1:True/>
250 |                             </ns1:Node>
251 |                         </ns1:Node>
252 |                         <ns1:Node recordCount="2.0" score="1.14045384647">
253 |                             <ns1:True/>
254 |                         </ns1:Node>
255 |                     </ns1:Node>
256 |                 </ns1:TreeModel>
257 |             </ns1:Segment>
258 |             <ns1:Segment weight="0.1">
259 |                 <ns1:True/>
260 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
261 |                     <ns1:MiningSchema>
262 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
263 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
264 |                         <ns1:MiningField name="output" usageType="predicted"/>
265 |                     </ns1:MiningSchema>
266 |                     <ns1:Node recordCount="4.0">
267 |                         <ns1:True/>
268 |                         <ns1:Node recordCount="2.0">
269 |                             <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
270 |                             <ns1:Node recordCount="1.0" score="-1.46900067214">
271 |                                 <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
272 |                             </ns1:Node>
273 |                             <ns1:Node recordCount="1.0" score="1.12531537443">
274 |                                 <ns1:True/>
275 |                             </ns1:Node>
276 |                         </ns1:Node>
277 |                         <ns1:Node recordCount="2.0" score="1.12531537443">
278 |                             <ns1:True/>
279 |                         </ns1:Node>
280 |                     </ns1:Node>
281 |                 </ns1:TreeModel>
282 |             </ns1:Segment>
283 |             <ns1:Segment weight="0.1">
284 |                 <ns1:True/>
285 |                 <ns1:TreeModel functionName="regression" splitCharacteristic="binarySplit">
286 |                     <ns1:MiningSchema>
287 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x1"/>
288 |                         <ns1:MiningField invalidValueTreatment="asIs" name="x2"/>
289 |                         <ns1:MiningField name="output" usageType="predicted"/>
290 |                     </ns1:MiningSchema>
291 |                     <ns1:Node recordCount="4.0">
292 |                         <ns1:True/>
293 |                         <ns1:Node recordCount="2.0">
294 |                             <ns1:SimplePredicate field="x1" operator="lessOrEqual" value="0.5"/>
295 |                             <ns1:Node recordCount="1.0" score="-1.40492591901">
296 |                                 <ns1:SimplePredicate field="numeric#x2" operator="lessOrEqual" value="0.5"/>
297 |                             </ns1:Node>
298 |                             <ns1:Node recordCount="1.0" score="1.11197795459">
299 |                                 <ns1:True/>
300 |                             </ns1:Node>
301 |                         </ns1:Node>
302 |                         <ns1:Node recordCount="2.0" score="1.11197795459">
303 |                             <ns1:True/>
304 |                         </ns1:Node>
305 |                     </ns1:Node>
306 |                 </ns1:TreeModel>
307 |             </ns1:Segment>
308 |         </ns1:Segmentation>
309 |     </ns1:MiningModel>
310 | </ns1:PMML>


--------------------------------------------------------------------------------
/sklearn_pmml/convert/model.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from enum import Enum
  3 | from sklearn.base import ClassifierMixin, RegressorMixin, BaseEstimator
  4 | from sklearn_pmml import pmml
  5 | from sklearn_pmml.convert.utils import pmml_row, assert_equal
  6 | from sklearn_pmml.convert.features import *
  7 | from pyxb.utils.domutils import BindingDOMSupport as bds
  8 | import numpy as np
  9 | 
 10 | 
 11 | class TransformationContext(object):
 12 |     """
 13 |     Context holder object
 14 |     """
 15 | 
 16 |     def __init__(self, schemas=None):
 17 |         """
 18 |         :type schemas: dict[Schema, list[Feature]] | None
 19 |         """
 20 |         if schemas is None:
 21 |             schemas = {}
 22 |         self.schemas = schemas
 23 | 
 24 | 
 25 | class ModelMode(Enum):
 26 |     CLASSIFICATION = 'classification'
 27 |     REGRESSION = 'regression'
 28 | 
 29 | 
 30 | class Schema(Enum):
 31 |     INPUT = ('input', True, True)
 32 |     """
 33 |     Schema used to define input variables. Short names allowed
 34 |     """
 35 | 
 36 |     OUTPUT = ('output', True, True)
 37 |     """
 38 |     Schema used to define output variables. Short names allowed. For the categorical variables the continuous
 39 |     probability variables will be automatically created as <feature_name>.<feature_value>
 40 |     """
 41 | 
 42 |     DERIVED = ('derived', False, False)
 43 |     """
 44 |     Schema used to define derived features. Short names not allowed due to potential overlap with input variables.
 45 |     """
 46 | 
 47 |     NUMERIC = ('numeric', False, False)
 48 |     """
 49 |     Schema used to encode categorical features as numbers. Short names not allowed due to their overlap with
 50 |     input variables
 51 |     """
 52 | 
 53 |     MODEL = ('model', True, False)
 54 |     """
 55 |     Schema used to define features fed into the sklearn estimator.
 56 |     Short names allowed because these variables are not going into PMML.
 57 |     """
 58 | 
 59 |     INTERNAL = ('internal', False, True)
 60 |     """
 61 |     This schema may be used by complex converters to hide the variables used for internal needs
 62 |     (e.g. the raw predictions of GBRT)
 63 |     """
 64 | 
 65 |     CATEGORIES = ('categories', True, False)
 66 |     """
 67 |     This schema is used to extend categorical outputs with probabilities of categories
 68 |     """
 69 | 
 70 |     def __init__(self, name, short_names_allowed, data_dict_elibigle):
 71 |         self._name = name
 72 |         self._short_names_allowed = short_names_allowed
 73 |         self._data_dict_elibigle = data_dict_elibigle
 74 | 
 75 |     @property
 76 |     def namespace(self):
 77 |         """
 78 |         The namespace corresponding to the schema
 79 |         """
 80 |         return self._name
 81 | 
 82 |     @property
 83 |     def short_names_allowed(self):
 84 |         """
 85 |         The schema allows usage of short names instead of fully-qualified names
 86 |         """
 87 |         return self._short_names_allowed
 88 | 
 89 |     @property
 90 |     def eligible_for_data_dictionary(self):
 91 |         """
 92 |         The variables defined in the schema should appear in the DataDictionary
 93 |         """
 94 |         return self._data_dict_elibigle
 95 | 
 96 |     def extract_feature_name(self, f):
 97 |         """
 98 |         Extract the printed name of the feature.
 99 |         :param f: feature to work with
100 |         :type f: Feature|str
101 |         """
102 |         if self.short_names_allowed:
103 |             if isinstance(f, str):
104 |                 return f
105 |             else:
106 |                 return f.full_name
107 |         else:
108 |             return "{}.{}".format(self.namespace, f if isinstance(f, str) else f.full_name)
109 | 
110 | 
111 | class EstimatorConverter(object):
112 |     """
113 |     A new base class for the estimator converters
114 |     """
115 |     EPSILON = 0.00001
116 |     SCHEMAS_IN_MINING_MODEL = {Schema.INPUT, Schema.INTERNAL}
117 | 
118 |     def __init__(self, estimator, context, mode):
119 |         self.model_function = mode
120 |         self.estimator = estimator
121 |         self.context = context
122 | 
123 |         assert not any(isinstance(_, DerivedFeature) for _ in context.schemas[Schema.INPUT]), \
124 |             'Input schema represents the input fields only'
125 |         assert all(isinstance(_, DerivedFeature) for _ in context.schemas[Schema.DERIVED]), \
126 |             'Derived schema represents the set of automatically generated fields'
127 |         assert not any(isinstance(_, DerivedFeature) for _ in context.schemas[Schema.OUTPUT]), \
128 |             'Only regular features allowed in output schema; use Output transformation if you want to transform values'
129 | 
130 |         # create a new schema for categories probabilities
131 |         categories = []
132 |         for feature in context.schemas[Schema.OUTPUT]:
133 |             if isinstance(feature, CategoricalFeature):
134 |                 for value in feature.value_list:
135 |                     categories.append(RealNumericFeature(
136 |                         name=value,
137 |                         namespace=feature.name
138 |                     ))
139 |         context.schemas[Schema.CATEGORIES] = categories
140 | 
141 |     def data_dictionary(self):
142 |         """
143 |         Build a data dictionary and return a DataDictionary element.
144 | 
145 |         DataDictionary contains feature types for all variables used in the PMML,
146 |         except the ones defined as Derived Features
147 |         """
148 |         dd = pmml.DataDictionary()
149 |         for schema, fields in sorted(self.context.schemas.items(), key=lambda x: x[0].name):
150 |             assert isinstance(schema, Schema)
151 |             if schema.eligible_for_data_dictionary:
152 |                 for f in fields:
153 |                     data_field = pmml.DataField(
154 |                         dataType=f.data_type.value,
155 |                         name=schema.extract_feature_name(f),
156 |                         optype=f.optype.value)
157 |                     dd.DataField.append(data_field)
158 |                     if isinstance(f, CategoricalFeature):
159 |                         for v in f.value_list:
160 |                             data_field.append(pmml.Value(value_=v))
161 |         return dd
162 | 
163 |     def output(self):
164 |         """
165 |         Output section of PMML contains all model outputs.
166 |         :return: pmml.Output
167 |         """
168 |         output = pmml.Output()
169 | 
170 |         # the response variables
171 |         for feature in self.context.schemas[Schema.OUTPUT]:
172 |             output_field = pmml.OutputField(
173 |                 name=Schema.OUTPUT.extract_feature_name(feature),
174 |                 feature='predictedValue'
175 |             )
176 |             output.append(output_field)
177 | 
178 |         return output
179 | 
180 |     def transformation_dictionary(self):
181 |         """
182 |         Build a transformation dictionary and return a TransformationDictionary element
183 |         """
184 |         td = pmml.TransformationDictionary()
185 |         # define a schema with all variables available for a model
186 |         encoded_schema = []
187 |         self.context.schemas[Schema.NUMERIC] = encoded_schema
188 |         idx = {}
189 | 
190 |         # First, populate transformation dictionary for _all_ derived fields, because they can be requested later
191 |         for f in self.context.schemas[Schema.DERIVED]:
192 |             ef = RealNumericFeature(name=f.name)
193 |             df = pmml.DerivedField(
194 |                 name=ef.full_name,
195 |                 optype=ef.optype.value,
196 |                 dataType=ef.data_type.value
197 |             )
198 |             df.append(f.transformation)
199 |             td.append(df)
200 |             assert f.name not in idx, 'Duplicate field definition: {}'.format(f.name)
201 |             idx[f.name] = ef
202 | 
203 |         # second, define the numeric transformations for the categorical variables
204 |         for f in self.context.schemas[Schema.INPUT]:
205 |             assert f.name not in idx, 'Duplicate field definition: {}'.format(f.name)
206 |             if isinstance(f, CategoricalFeature):
207 |                 ef = RealNumericFeature(name=f.name, namespace=Schema.NUMERIC.namespace)
208 |                 # create a record in transformation dictionary with mapping from raw values into numbers
209 |                 df = pmml.DerivedField(
210 |                     name=ef.full_name,
211 |                     optype=ef.optype.value,
212 |                     dataType=ef.data_type.value
213 |                 )
214 |                 mv = pmml.MapValues(outputColumn='output', dataType=ef.data_type.value)
215 |                 mv.append(pmml.FieldColumnPair(field=f.full_name, column='input'))
216 |                 it = pmml.InlineTable()
217 |                 for i, v in enumerate(f.value_list):
218 |                     it.append(pmml_row(input=v, output=i))
219 |                 td.append(df.append(mv.append(it)))
220 |                 idx[f.name] = ef
221 |             else:
222 |                 idx[f.name] = f
223 | 
224 |         # now we can build a mirror of model schema into the numeric schema
225 |         self.context.schemas[Schema.NUMERIC] = [idx[f.name] for f in self.context.schemas[Schema.MODEL]]
226 | 
227 |         return td
228 | 
229 |     def model(self, verification_data=None):
230 |         """
231 |         Build a mining model and return one of the MODEL-ELEMENTs
232 |         """
233 |         pass
234 | 
235 |     def model_verification(self, verification_data):
236 |         """
237 |         Use the input verification_data, apply the transformations, evaluate the model response and produce the
238 |         ModelVerification element
239 |         :param verification_data: list of dictionaries or data frame
240 |         :type verification_data: dict[str, object]|pd.DataFrame
241 |         :return: ModelVerification element
242 |         """
243 |         verification_data = pd.DataFrame(verification_data)
244 |         assert len(verification_data) > 0, 'Verification data can not be empty'
245 | 
246 |         verification_input = pd.DataFrame(index=verification_data.index)
247 |         verification_model_input = pd.DataFrame(index=verification_data.index)
248 |         for key in self.context.schemas[Schema.INPUT]:
249 |             # all input features MUST be present in the verification_data
250 |             assert key.full_name in verification_data.columns, 'Missing input field "{}"'.format(key.full_name)
251 |             verification_input[Schema.INPUT.extract_feature_name(key)] = verification_data[key.full_name]
252 |             if isinstance(key, CategoricalFeature):
253 |                 verification_model_input[Schema.INPUT.extract_feature_name(key)] = np.vectorize(key.to_number)(verification_data[key.full_name])
254 |             else:
255 |                 verification_model_input[Schema.INPUT.extract_feature_name(key)] = verification_data[key.full_name]
256 | 
257 |         for key in self.context.schemas[Schema.DERIVED]:
258 |             assert isinstance(key, DerivedFeature), 'Only DerivedFeatures are allowed in the DERIVED schema'
259 |             verification_model_input[key.full_name] = key.apply(verification_input)
260 | 
261 |         # at this point we can check that MODEL schema contains only known features
262 |         for key in self.context.schemas[Schema.MODEL]:
263 |             assert Schema.MODEL.extract_feature_name(key) in verification_model_input.columns, \
264 |                 'Unknown feature "{}" in the MODEL schema'.format(key.full_name)
265 | 
266 |         # TODO: we can actually support multiple columns, but need to figure out the way to extract the data
267 |         # TODO: from the estimator properly
268 |         # building model results
269 |         assert len(self.context.schemas[Schema.OUTPUT]) == 1, 'Only one output is currently supported'
270 |         key = self.context.schemas[Schema.OUTPUT][0]
271 |         model_input = verification_model_input[list(map(Schema.MODEL.extract_feature_name, self.context.schemas[Schema.MODEL]))].values
272 |         model_results = np.vectorize(key.from_number)(self.estimator.predict(X=model_input))
273 |         if key.full_name in verification_data:
274 |             # make sure that if results are provided, the expected and actual values are equal
275 |             assert_equal(key, model_results, verification_data[key.full_name].values)
276 |         verification_input[Schema.OUTPUT.extract_feature_name(key)] = model_results
277 | 
278 |         if isinstance(key, CategoricalFeature):
279 |             probabilities = self.estimator.predict_proba(X=model_input)
280 |             for i, key in enumerate(self.context.schemas[Schema.CATEGORIES]):
281 |                 verification_input[Schema.CATEGORIES.extract_feature_name(key)] = probabilities[:, i]
282 | 
283 |         fields = []
284 |         field_names = []
285 |         for s in [Schema.INPUT, Schema.OUTPUT, Schema.CATEGORIES]:
286 |             fields += self.context.schemas[s]
287 |             field_names += list(map(s.extract_feature_name, self.context.schemas[s]))
288 | 
289 |         mv = pmml.ModelVerification(recordCount=len(verification_input), fieldCount=len(fields))
290 | 
291 |         # step one: build verification schema
292 |         verification_fields = pmml.VerificationFields()
293 |         for key in fields:
294 |             if isinstance(key, NumericFeature):
295 |                 vf = pmml.VerificationField(field=key.name, column=key.name, precision=self.EPSILON)
296 |             else:
297 |                 vf = pmml.VerificationField(field=key.name, column=key.name)
298 |             verification_fields.append(vf)
299 |         mv.append(verification_fields)
300 | 
301 |         # step two: build data table
302 |         it = pmml.InlineTable()
303 |         for data in verification_input.iterrows():
304 |             data = data[1]
305 |             row = pmml.row()
306 |             row_empty = True
307 |             for key in field_names:
308 |                 if verification_input[key].dtype == object or not np.isnan(data[key]):
309 |                     col = bds().createChildElement(key)
310 |                     bds().appendTextChild(data[key], col)
311 |                     row.append(col)
312 |                     row_empty = False
313 |             if not row_empty:
314 |                 it.append(row)
315 |         mv.append(it)
316 | 
317 |         return mv
318 | 
319 |     def mining_schema(self):
320 |         """
321 |         Mining schema contains the model input features.
322 |         NOTE: In order to avoid duplicates, I've decided to remove output features from MiningSchema
323 |         NOTE: We don't need to specify any DERIVED/NUMERIC fields here, because PMML interpreter will create them
324 |         in a lazy manner.
325 |         """
326 |         ms = pmml.MiningSchema()
327 | 
328 |         if Schema.INPUT in self.SCHEMAS_IN_MINING_MODEL:
329 |             for f in sorted(self.context.schemas[Schema.INPUT], key=lambda _: _.full_name):
330 |                 ms.append(pmml.MiningField(invalidValueTreatment=f.invalid_value_treatment.value, name=f.full_name))
331 | 
332 |         for s in [Schema.OUTPUT, Schema.INTERNAL]:
333 |             if s in self.SCHEMAS_IN_MINING_MODEL:
334 |                 for f in self.context.schemas.get(s, []):
335 |                     ms.append(pmml.MiningField(
336 |                         name=s.extract_feature_name(f),
337 |                         usageType="predicted"
338 |                     ))
339 | 
340 |         return ms
341 | 
342 |     def header(self):
343 |         """
344 |         Build and return Header element
345 |         """
346 |         return pmml.Header()
347 | 
348 |     def pmml(self, verification_data=None):
349 |         """
350 |         Build PMML from the context and estimator.
351 |         Returns PMML element
352 |         """
353 |         p = pmml.PMML(version="4.2")
354 |         p.append(self.header())
355 |         p.append(self.data_dictionary())
356 |         p.append(self.transformation_dictionary())
357 |         p.append(self.model(verification_data))
358 |         return p
359 | 
360 | 
361 | class ClassifierConverter(EstimatorConverter):
362 |     """
363 |     Base class for classifier converters.
364 |     It is required that the output schema contains only categorical features.
365 |     The serializer will output result labels as output::feature_name and probabilities for each value of result feature
366 |     as output::feature_name::feature_value.
367 |     """
368 |     def __init__(self, estimator, context):
369 |         """
370 |         :param estimator: Estimator to convert
371 |         :type estimator: BaseEstimator
372 |         :param context: context to work with
373 |         :type context: TransformationContext
374 |         """
375 |         super(ClassifierConverter, self).__init__(estimator, context, ModelMode.CLASSIFICATION)
376 |         assert isinstance(estimator, ClassifierMixin), 'Classifier converter should only be applied to the classification models'
377 |         for f in context.schemas[Schema.OUTPUT]:
378 |             assert isinstance(f, CategoricalFeature), 'Only categorical outputs are supported for classification task'
379 | 
380 |         # create hidden variables for each categorical output
381 |         internal_schema = list(filter(lambda x: isinstance(x, CategoricalFeature), self.context.schemas[Schema.OUTPUT]))
382 |         self.context.schemas[Schema.INTERNAL] = internal_schema
383 | 
384 |     def output(self):
385 |         """
386 |         Output section of PMML contains all model outputs.
387 |         Classification tree output contains output variable as a label,
388 |         and <variable>.<value> as a probability of a value for a variable
389 |         :return: pmml.Output
390 |         """
391 |         output = pmml.Output()
392 | 
393 |         # the response variables
394 |         for feature in self.context.schemas[Schema.OUTPUT]:
395 |             output_field = pmml.OutputField(
396 |                 name=Schema.OUTPUT.extract_feature_name(feature),
397 |                 feature='predictedValue',
398 |                 optype=feature.optype.value,
399 |                 dataType=feature.data_type.value
400 |             )
401 |             output.append(output_field)
402 | 
403 |         # the probabilities for categories; should only be populated for classification jobs
404 |         for feature in self.context.schemas[Schema.CATEGORIES]:
405 |             output_field = pmml.OutputField(
406 |                 name=Schema.CATEGORIES.extract_feature_name(feature),
407 |                 optype=feature.optype.value,
408 |                 dataType=feature.data_type.value,
409 |                 feature='probability',
410 |                 targetField=Schema.INTERNAL.extract_feature_name(feature.namespace),
411 |                 value_=feature.name
412 |             )
413 |             output.append(output_field)
414 | 
415 |         return output
416 | 
417 | 
418 | class RegressionConverter(EstimatorConverter):
419 |     def __init__(self, estimator, context):
420 |         super(RegressionConverter, self).__init__(estimator, context, ModelMode.REGRESSION)


--------------------------------------------------------------------------------
/examples/pmml/RandomForestClassifier.pmml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <ns1:PMML xmlns:ns1="http://www.dmg.org/PMML-4_2" version="4.2">
  3 |     <ns1:Header/>
  4 |     <ns1:DataDictionary>
  5 |         <ns1:DataField dataType="integer" name="internal::y" optype="categorical">
  6 |             <ns1:Value value="0"/>
  7 |             <ns1:Value value="1"/>
  8 |             <ns1:Value value="2"/>
  9 |         </ns1:DataField>
 10 |         <ns1:DataField dataType="integer" name="y" optype="categorical">
 11 |             <ns1:Value value="0"/>
 12 |             <ns1:Value value="1"/>
 13 |             <ns1:Value value="2"/>
 14 |         </ns1:DataField>
 15 |         <ns1:DataField dataType="double" name="col_0" optype="continuous"/>
 16 |         <ns1:DataField dataType="double" name="col_1" optype="continuous"/>
 17 |         <ns1:DataField dataType="double" name="col_2" optype="continuous"/>
 18 |         <ns1:DataField dataType="double" name="col_3" optype="continuous"/>
 19 |     </ns1:DataDictionary>
 20 |     <ns1:TransformationDictionary/>
 21 |     <ns1:MiningModel functionName="classification">
 22 |         <ns1:MiningSchema>
 23 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
 24 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
 25 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
 26 |             <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
 27 |             <ns1:MiningField name="internal::y" usageType="predicted"/>
 28 |         </ns1:MiningSchema>
 29 |         <ns1:Output>
 30 |             <ns1:OutputField dataType="integer" feature="predictedValue" name="y" optype="categorical"/>
 31 |             <ns1:OutputField dataType="double" feature="probability" name="y::0" optype="continuous" targetField="internal::y" value="0"/>
 32 |             <ns1:OutputField dataType="double" feature="probability" name="y::1" optype="continuous" targetField="internal::y" value="1"/>
 33 |             <ns1:OutputField dataType="double" feature="probability" name="y::2" optype="continuous" targetField="internal::y" value="2"/>
 34 |         </ns1:Output>
 35 |         <ns1:Segmentation multipleModelMethod="weightedAverage">
 36 |             <ns1:Segment id="0">
 37 |                 <ns1:True/>
 38 |                 <ns1:TreeModel functionName="classification" splitCharacteristic="binarySplit">
 39 |                     <ns1:MiningSchema>
 40 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
 41 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
 42 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
 43 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
 44 |                         <ns1:MiningField name="internal::y" usageType="predicted"/>
 45 |                     </ns1:MiningSchema>
 46 |                     <ns1:Output>
 47 |                         <ns1:OutputField dataType="integer" feature="predictedValue" name="y" optype="categorical"/>
 48 |                         <ns1:OutputField dataType="double" feature="probability" name="y::0" optype="continuous" targetField="internal::y" value="0"/>
 49 |                         <ns1:OutputField dataType="double" feature="probability" name="y::1" optype="continuous" targetField="internal::y" value="1"/>
 50 |                         <ns1:OutputField dataType="double" feature="probability" name="y::2" optype="continuous" targetField="internal::y" value="2"/>
 51 |                     </ns1:Output>
 52 |                     <ns1:Node recordCount="321.0" score="0">
 53 |                         <ns1:True/>
 54 |                         <ns1:ScoreDistribution confidence="0.535825545171" recordCount="172.0" value="0"/>
 55 |                         <ns1:ScoreDistribution confidence="0.489096573209" recordCount="157.0" value="1"/>
 56 |                         <ns1:ScoreDistribution confidence="0.532710280374" recordCount="171.0" value="2"/>
 57 |                         <ns1:Node recordCount="11.0" score="1">
 58 |                             <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="-2.0007185936"/>
 59 |                             <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
 60 |                             <ns1:ScoreDistribution confidence="1.27272727273" recordCount="14.0" value="1"/>
 61 |                             <ns1:ScoreDistribution confidence="0.181818181818" recordCount="2.0" value="2"/>
 62 |                             <ns1:Node recordCount="9.0" score="1">
 63 |                                 <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.638656973839"/>
 64 |                                 <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
 65 |                                 <ns1:ScoreDistribution confidence="1.0" recordCount="12.0" value="1"/>
 66 |                                 <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
 67 |                             </ns1:Node>
 68 |                             <ns1:Node recordCount="2.0" score="1">
 69 |                                 <ns1:True/>
 70 |                                 <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
 71 |                                 <ns1:ScoreDistribution confidence="1.0" recordCount="2.0" value="1"/>
 72 |                                 <ns1:ScoreDistribution confidence="1.0" recordCount="2.0" value="2"/>
 73 |                                 <ns1:Node recordCount="1.0" score="1">
 74 |                                     <ns1:SimplePredicate field="col_0" operator="lessOrEqual" value="-0.382083296776"/>
 75 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
 76 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="2.0" value="1"/>
 77 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
 78 |                                 </ns1:Node>
 79 |                                 <ns1:Node recordCount="1.0" score="2">
 80 |                                     <ns1:True/>
 81 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
 82 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="1"/>
 83 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="2.0" value="2"/>
 84 |                                 </ns1:Node>
 85 |                             </ns1:Node>
 86 |                         </ns1:Node>
 87 |                         <ns1:Node recordCount="310.0" score="0">
 88 |                             <ns1:True/>
 89 |                             <ns1:ScoreDistribution confidence="0.554838709677" recordCount="172.0" value="0"/>
 90 |                             <ns1:ScoreDistribution confidence="0.461290322581" recordCount="143.0" value="1"/>
 91 |                             <ns1:ScoreDistribution confidence="0.545161290323" recordCount="169.0" value="2"/>
 92 |                             <ns1:Node recordCount="252.0" score="2">
 93 |                                 <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.997073411942"/>
 94 |                                 <ns1:ScoreDistribution confidence="0.575396825397" recordCount="145.0" value="0"/>
 95 |                                 <ns1:ScoreDistribution confidence="0.428571428571" recordCount="108.0" value="1"/>
 96 |                                 <ns1:ScoreDistribution confidence="0.587301587302" recordCount="148.0" value="2"/>
 97 |                                 <ns1:Node recordCount="217.0" score="2">
 98 |                                     <ns1:SimplePredicate field="col_0" operator="lessOrEqual" value="1.07116675377"/>
 99 |                                     <ns1:ScoreDistribution confidence="0.31884057971" recordCount="110.0" value="0"/>
100 |                                     <ns1:ScoreDistribution confidence="0.28115942029" recordCount="97.0" value="1"/>
101 |                                     <ns1:ScoreDistribution confidence="0.4" recordCount="138.0" value="2"/>
102 |                                 </ns1:Node>
103 |                                 <ns1:Node recordCount="35.0" score="0">
104 |                                     <ns1:True/>
105 |                                     <ns1:ScoreDistribution confidence="0.625" recordCount="35.0" value="0"/>
106 |                                     <ns1:ScoreDistribution confidence="0.196428571429" recordCount="11.0" value="1"/>
107 |                                     <ns1:ScoreDistribution confidence="0.178571428571" recordCount="10.0" value="2"/>
108 |                                 </ns1:Node>
109 |                             </ns1:Node>
110 |                             <ns1:Node recordCount="58.0" score="1">
111 |                                 <ns1:True/>
112 |                                 <ns1:ScoreDistribution confidence="0.465517241379" recordCount="27.0" value="0"/>
113 |                                 <ns1:ScoreDistribution confidence="0.603448275862" recordCount="35.0" value="1"/>
114 |                                 <ns1:ScoreDistribution confidence="0.362068965517" recordCount="21.0" value="2"/>
115 |                                 <ns1:Node recordCount="34.0" score="1">
116 |                                     <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="1.56480169296"/>
117 |                                     <ns1:ScoreDistribution confidence="0.291666666667" recordCount="14.0" value="0"/>
118 |                                     <ns1:ScoreDistribution confidence="0.520833333333" recordCount="25.0" value="1"/>
119 |                                     <ns1:ScoreDistribution confidence="0.1875" recordCount="9.0" value="2"/>
120 |                                 </ns1:Node>
121 |                                 <ns1:Node recordCount="24.0" score="0">
122 |                                     <ns1:True/>
123 |                                     <ns1:ScoreDistribution confidence="0.371428571429" recordCount="13.0" value="0"/>
124 |                                     <ns1:ScoreDistribution confidence="0.285714285714" recordCount="10.0" value="1"/>
125 |                                     <ns1:ScoreDistribution confidence="0.342857142857" recordCount="12.0" value="2"/>
126 |                                 </ns1:Node>
127 |                             </ns1:Node>
128 |                         </ns1:Node>
129 |                     </ns1:Node>
130 |                 </ns1:TreeModel>
131 |             </ns1:Segment>
132 |             <ns1:Segment id="1">
133 |                 <ns1:True/>
134 |                 <ns1:TreeModel functionName="classification" splitCharacteristic="binarySplit">
135 |                     <ns1:MiningSchema>
136 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
137 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
138 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
139 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
140 |                         <ns1:MiningField name="internal::y" usageType="predicted"/>
141 |                     </ns1:MiningSchema>
142 |                     <ns1:Output>
143 |                         <ns1:OutputField dataType="integer" feature="predictedValue" name="y" optype="categorical"/>
144 |                         <ns1:OutputField dataType="double" feature="probability" name="y::0" optype="continuous" targetField="internal::y" value="0"/>
145 |                         <ns1:OutputField dataType="double" feature="probability" name="y::1" optype="continuous" targetField="internal::y" value="1"/>
146 |                         <ns1:OutputField dataType="double" feature="probability" name="y::2" optype="continuous" targetField="internal::y" value="2"/>
147 |                     </ns1:Output>
148 |                     <ns1:Node recordCount="336.0" score="1">
149 |                         <ns1:True/>
150 |                         <ns1:ScoreDistribution confidence="0.449404761905" recordCount="151.0" value="0"/>
151 |                         <ns1:ScoreDistribution confidence="0.5625" recordCount="189.0" value="1"/>
152 |                         <ns1:ScoreDistribution confidence="0.47619047619" recordCount="160.0" value="2"/>
153 |                         <ns1:Node recordCount="12.0" score="1">
154 |                             <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="-1.92965960503"/>
155 |                             <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
156 |                             <ns1:ScoreDistribution confidence="1.41666666667" recordCount="17.0" value="1"/>
157 |                             <ns1:ScoreDistribution confidence="0.166666666667" recordCount="2.0" value="2"/>
158 |                             <ns1:Node recordCount="10.0" score="1">
159 |                                 <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.709010601044"/>
160 |                                 <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
161 |                                 <ns1:ScoreDistribution confidence="1.6" recordCount="16.0" value="1"/>
162 |                                 <ns1:ScoreDistribution confidence="0.1" recordCount="1.0" value="2"/>
163 |                                 <ns1:Node recordCount="3.0" score="1">
164 |                                     <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="-1.03807127476"/>
165 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
166 |                                     <ns1:ScoreDistribution confidence="0.8" recordCount="4.0" value="1"/>
167 |                                     <ns1:ScoreDistribution confidence="0.2" recordCount="1.0" value="2"/>
168 |                                 </ns1:Node>
169 |                                 <ns1:Node recordCount="7.0" score="1">
170 |                                     <ns1:True/>
171 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
172 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="12.0" value="1"/>
173 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
174 |                                 </ns1:Node>
175 |                             </ns1:Node>
176 |                             <ns1:Node recordCount="2.0" score="1">
177 |                                 <ns1:True/>
178 |                                 <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
179 |                                 <ns1:ScoreDistribution confidence="0.5" recordCount="1.0" value="1"/>
180 |                                 <ns1:ScoreDistribution confidence="0.5" recordCount="1.0" value="2"/>
181 |                                 <ns1:Node recordCount="1.0" score="1">
182 |                                     <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="-2.05933618546"/>
183 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
184 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="1.0" value="1"/>
185 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
186 |                                 </ns1:Node>
187 |                                 <ns1:Node recordCount="1.0" score="2">
188 |                                     <ns1:True/>
189 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
190 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="1"/>
191 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="1.0" value="2"/>
192 |                                 </ns1:Node>
193 |                             </ns1:Node>
194 |                         </ns1:Node>
195 |                         <ns1:Node recordCount="324.0" score="1">
196 |                             <ns1:True/>
197 |                             <ns1:ScoreDistribution confidence="0.466049382716" recordCount="151.0" value="0"/>
198 |                             <ns1:ScoreDistribution confidence="0.530864197531" recordCount="172.0" value="1"/>
199 |                             <ns1:ScoreDistribution confidence="0.487654320988" recordCount="158.0" value="2"/>
200 |                             <ns1:Node recordCount="290.0" score="2">
201 |                                 <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="1.3240275383"/>
202 |                                 <ns1:ScoreDistribution confidence="0.472413793103" recordCount="137.0" value="0"/>
203 |                                 <ns1:ScoreDistribution confidence="0.506896551724" recordCount="147.0" value="1"/>
204 |                                 <ns1:ScoreDistribution confidence="0.520689655172" recordCount="151.0" value="2"/>
205 |                                 <ns1:Node recordCount="279.0" score="2">
206 |                                     <ns1:SimplePredicate field="col_3" operator="lessOrEqual" value="1.9689218998"/>
207 |                                     <ns1:ScoreDistribution confidence="0.313397129187" recordCount="131.0" value="0"/>
208 |                                     <ns1:ScoreDistribution confidence="0.327751196172" recordCount="137.0" value="1"/>
209 |                                     <ns1:ScoreDistribution confidence="0.358851674641" recordCount="150.0" value="2"/>
210 |                                 </ns1:Node>
211 |                                 <ns1:Node recordCount="11.0" score="1">
212 |                                     <ns1:True/>
213 |                                     <ns1:ScoreDistribution confidence="0.352941176471" recordCount="6.0" value="0"/>
214 |                                     <ns1:ScoreDistribution confidence="0.588235294118" recordCount="10.0" value="1"/>
215 |                                     <ns1:ScoreDistribution confidence="0.0588235294118" recordCount="1.0" value="2"/>
216 |                                 </ns1:Node>
217 |                             </ns1:Node>
218 |                             <ns1:Node recordCount="34.0" score="1">
219 |                                 <ns1:True/>
220 |                                 <ns1:ScoreDistribution confidence="0.411764705882" recordCount="14.0" value="0"/>
221 |                                 <ns1:ScoreDistribution confidence="0.735294117647" recordCount="25.0" value="1"/>
222 |                                 <ns1:ScoreDistribution confidence="0.205882352941" recordCount="7.0" value="2"/>
223 |                                 <ns1:Node recordCount="2.0" score="1">
224 |                                     <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="1.34579753876"/>
225 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
226 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="4.0" value="1"/>
227 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
228 |                                 </ns1:Node>
229 |                                 <ns1:Node recordCount="32.0" score="1">
230 |                                     <ns1:True/>
231 |                                     <ns1:ScoreDistribution confidence="0.333333333333" recordCount="14.0" value="0"/>
232 |                                     <ns1:ScoreDistribution confidence="0.5" recordCount="21.0" value="1"/>
233 |                                     <ns1:ScoreDistribution confidence="0.166666666667" recordCount="7.0" value="2"/>
234 |                                 </ns1:Node>
235 |                             </ns1:Node>
236 |                         </ns1:Node>
237 |                     </ns1:Node>
238 |                 </ns1:TreeModel>
239 |             </ns1:Segment>
240 |             <ns1:Segment id="2">
241 |                 <ns1:True/>
242 |                 <ns1:TreeModel functionName="classification" splitCharacteristic="binarySplit">
243 |                     <ns1:MiningSchema>
244 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_0"/>
245 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_1"/>
246 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_2"/>
247 |                         <ns1:MiningField invalidValueTreatment="asIs" name="col_3"/>
248 |                         <ns1:MiningField name="internal::y" usageType="predicted"/>
249 |                     </ns1:MiningSchema>
250 |                     <ns1:Output>
251 |                         <ns1:OutputField dataType="integer" feature="predictedValue" name="y" optype="categorical"/>
252 |                         <ns1:OutputField dataType="double" feature="probability" name="y::0" optype="continuous" targetField="internal::y" value="0"/>
253 |                         <ns1:OutputField dataType="double" feature="probability" name="y::1" optype="continuous" targetField="internal::y" value="1"/>
254 |                         <ns1:OutputField dataType="double" feature="probability" name="y::2" optype="continuous" targetField="internal::y" value="2"/>
255 |                     </ns1:Output>
256 |                     <ns1:Node recordCount="321.0" score="1">
257 |                         <ns1:True/>
258 |                         <ns1:ScoreDistribution confidence="0.510903426791" recordCount="164.0" value="0"/>
259 |                         <ns1:ScoreDistribution confidence="0.570093457944" recordCount="183.0" value="1"/>
260 |                         <ns1:ScoreDistribution confidence="0.476635514019" recordCount="153.0" value="2"/>
261 |                         <ns1:Node recordCount="21.0" score="1">
262 |                             <ns1:SimplePredicate field="col_0" operator="lessOrEqual" value="-1.47007727623"/>
263 |                             <ns1:ScoreDistribution confidence="0.333333333333" recordCount="7.0" value="0"/>
264 |                             <ns1:ScoreDistribution confidence="1.19047619048" recordCount="25.0" value="1"/>
265 |                             <ns1:ScoreDistribution confidence="0.190476190476" recordCount="4.0" value="2"/>
266 |                             <ns1:Node recordCount="6.0" score="0">
267 |                                 <ns1:SimplePredicate field="col_0" operator="lessOrEqual" value="-2.19679045677"/>
268 |                                 <ns1:ScoreDistribution confidence="0.833333333333" recordCount="5.0" value="0"/>
269 |                                 <ns1:ScoreDistribution confidence="0.5" recordCount="3.0" value="1"/>
270 |                                 <ns1:ScoreDistribution confidence="0.333333333333" recordCount="2.0" value="2"/>
271 |                                 <ns1:Node recordCount="2.0" score="1">
272 |                                     <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.17954005301"/>
273 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
274 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="3.0" value="1"/>
275 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
276 |                                 </ns1:Node>
277 |                                 <ns1:Node recordCount="4.0" score="0">
278 |                                     <ns1:True/>
279 |                                     <ns1:ScoreDistribution confidence="0.714285714286" recordCount="5.0" value="0"/>
280 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="1"/>
281 |                                     <ns1:ScoreDistribution confidence="0.285714285714" recordCount="2.0" value="2"/>
282 |                                 </ns1:Node>
283 |                             </ns1:Node>
284 |                             <ns1:Node recordCount="15.0" score="1">
285 |                                 <ns1:True/>
286 |                                 <ns1:ScoreDistribution confidence="0.133333333333" recordCount="2.0" value="0"/>
287 |                                 <ns1:ScoreDistribution confidence="1.46666666667" recordCount="22.0" value="1"/>
288 |                                 <ns1:ScoreDistribution confidence="0.133333333333" recordCount="2.0" value="2"/>
289 |                                 <ns1:Node recordCount="12.0" score="1">
290 |                                     <ns1:SimplePredicate field="col_2" operator="lessOrEqual" value="0.618381023407"/>
291 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
292 |                                     <ns1:ScoreDistribution confidence="0.952380952381" recordCount="20.0" value="1"/>
293 |                                     <ns1:ScoreDistribution confidence="0.047619047619" recordCount="1.0" value="2"/>
294 |                                 </ns1:Node>
295 |                                 <ns1:Node recordCount="3.0" score="0">
296 |                                     <ns1:True/>
297 |                                     <ns1:ScoreDistribution confidence="0.4" recordCount="2.0" value="0"/>
298 |                                     <ns1:ScoreDistribution confidence="0.4" recordCount="2.0" value="1"/>
299 |                                     <ns1:ScoreDistribution confidence="0.2" recordCount="1.0" value="2"/>
300 |                                 </ns1:Node>
301 |                             </ns1:Node>
302 |                         </ns1:Node>
303 |                         <ns1:Node recordCount="300.0" score="1">
304 |                             <ns1:True/>
305 |                             <ns1:ScoreDistribution confidence="0.523333333333" recordCount="157.0" value="0"/>
306 |                             <ns1:ScoreDistribution confidence="0.526666666667" recordCount="158.0" value="1"/>
307 |                             <ns1:ScoreDistribution confidence="0.496666666667" recordCount="149.0" value="2"/>
308 |                             <ns1:Node recordCount="39.0" score="2">
309 |                                 <ns1:SimplePredicate field="col_0" operator="lessOrEqual" value="-0.854827046394"/>
310 |                                 <ns1:ScoreDistribution confidence="0.487179487179" recordCount="19.0" value="0"/>
311 |                                 <ns1:ScoreDistribution confidence="0.230769230769" recordCount="9.0" value="1"/>
312 |                                 <ns1:ScoreDistribution confidence="0.74358974359" recordCount="29.0" value="2"/>
313 |                                 <ns1:Node recordCount="2.0" score="1">
314 |                                     <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="-1.53912472725"/>
315 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
316 |                                     <ns1:ScoreDistribution confidence="1.0" recordCount="2.0" value="1"/>
317 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="2"/>
318 |                                 </ns1:Node>
319 |                                 <ns1:Node recordCount="37.0" score="2">
320 |                                     <ns1:True/>
321 |                                     <ns1:ScoreDistribution confidence="0.345454545455" recordCount="19.0" value="0"/>
322 |                                     <ns1:ScoreDistribution confidence="0.127272727273" recordCount="7.0" value="1"/>
323 |                                     <ns1:ScoreDistribution confidence="0.527272727273" recordCount="29.0" value="2"/>
324 |                                 </ns1:Node>
325 |                             </ns1:Node>
326 |                             <ns1:Node recordCount="261.0" score="1">
327 |                                 <ns1:True/>
328 |                                 <ns1:ScoreDistribution confidence="0.528735632184" recordCount="138.0" value="0"/>
329 |                                 <ns1:ScoreDistribution confidence="0.570881226054" recordCount="149.0" value="1"/>
330 |                                 <ns1:ScoreDistribution confidence="0.459770114943" recordCount="120.0" value="2"/>
331 |                                 <ns1:Node recordCount="7.0" score="1">
332 |                                     <ns1:SimplePredicate field="col_1" operator="lessOrEqual" value="-2.00775241852"/>
333 |                                     <ns1:ScoreDistribution confidence="0.0" recordCount="0.0" value="0"/>
334 |                                     <ns1:ScoreDistribution confidence="0.9" recordCount="9.0" value="1"/>
335 |                                     <ns1:ScoreDistribution confidence="0.1" recordCount="1.0" value="2"/>
336 |                                 </ns1:Node>
337 |                                 <ns1:Node recordCount="254.0" score="1">
338 |                                     <ns1:True/>
339 |                                     <ns1:ScoreDistribution confidence="0.347607052897" recordCount="138.0" value="0"/>
340 |                                     <ns1:ScoreDistribution confidence="0.352644836272" recordCount="140.0" value="1"/>
341 |                                     <ns1:ScoreDistribution confidence="0.299748110831" recordCount="119.0" value="2"/>
342 |                                 </ns1:Node>
343 |                             </ns1:Node>
344 |                         </ns1:Node>
345 |                     </ns1:Node>
346 |                 </ns1:TreeModel>
347 |             </ns1:Segment>
348 |         </ns1:Segmentation>
349 |     </ns1:MiningModel>
350 | </ns1:PMML>
351 | 


--------------------------------------------------------------------------------
/sklearn_pmml/test/data/gradient_boosting_classifier/estimator.pkl:
--------------------------------------------------------------------------------
  1 | ccopy_reg
  2 | _reconstructor
  3 | p1
  4 | (csklearn.ensemble.gradient_boosting
  5 | GradientBoostingClassifier
  6 | p2
  7 | c__builtin__
  8 | object
  9 | p3
 10 | NtRp4
 11 | (dp5
 12 | S'verbose'
 13 | p6
 14 | I0
 15 | sS'classes_'
 16 | p7
 17 | cnumpy.core.multiarray
 18 | _reconstruct
 19 | p8
 20 | (cnumpy
 21 | ndarray
 22 | p9
 23 | (I0
 24 | tS'b'
 25 | tRp10
 26 | (I1
 27 | (I2
 28 | tcnumpy
 29 | dtype
 30 | p11
 31 | (S'i8'
 32 | I0
 33 | I1
 34 | tRp12
 35 | (I3
 36 | S'<'
 37 | NNNI-1
 38 | I-1
 39 | I0
 40 | tbI00
 41 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00'
 42 | tbsS'min_samples_leaf'
 43 | p13
 44 | I1
 45 | sS'max_features'
 46 | p14
 47 | NsS'n_classes_'
 48 | p15
 49 | I2
 50 | sS'init'
 51 | p16
 52 | NsS'random_state'
 53 | p17
 54 | NsS'loss_'
 55 | p18
 56 | g1
 57 | (csklearn.ensemble.gradient_boosting
 58 | BinomialDeviance
 59 | p19
 60 | g3
 61 | NtRp20
 62 | (dp21
 63 | S'K'
 64 | I1
 65 | sbsS'train_score_'
 66 | p22
 67 | g8
 68 | (g9
 69 | (I0
 70 | tS'b'
 71 | tRp23
 72 | (I1
 73 | (I10
 74 | tg11
 75 | (S'f8'
 76 | I0
 77 | I1
 78 | tRp24
 79 | (I3
 80 | S'<'
 81 | NNNI-1
 82 | I-1
 83 | I0
 84 | tbI00
 85 | S'A\xbf\xa9\xd2O\xec\xed?\xf0\t\x85=\x1c\x8a\xe9?-\xc1o\n\xce\x1b\xe6?\xfe\x7f8}\x13P\xe3?2\x81V`h\xfa\xe0?\x83\xed\xa1/3\xfe\xdd?\xaf0\xe5;\xde\x96\xda?] /\xe5\x05\xa4\xd7?(\xc4j\xcd\xa8\x11\xd5?<i\xe1f[\xd0\xd2?'
 86 | tbsS'max_depth'
 87 | p25
 88 | I2
 89 | sS'estimators_'
 90 | p26
 91 | g8
 92 | (g9
 93 | (I0
 94 | tS'b'
 95 | tRp27
 96 | (I1
 97 | (I10
 98 | I1
 99 | tg11
100 | (S'O8'
101 | I0
102 | I1
103 | tRp28
104 | (I3
105 | S'|'
106 | NNNI-1
107 | I-1
108 | I63
109 | tbI00
110 | (lp29
111 | g1
112 | (csklearn.tree.tree
113 | DecisionTreeRegressor
114 | p30
115 | g3
116 | NtRp31
117 | (dp32
118 | S'splitter'
119 | p33
120 | csklearn.tree._tree
121 | PresortBestSplitter
122 | p34
123 | (csklearn.tree._tree
124 | RegressionCriterion
125 | p35
126 | (I1
127 | tRp36
128 | (dbI2
129 | I1
130 | cnumpy.random
131 | __RandomState_ctor
132 | p37
133 | (tRp38
134 | (S'MT19937'
135 | p39
136 | g8
137 | (g9
138 | (I0
139 | tS'b'
140 | tRp40
141 | (I1
142 | (I624
143 | tg11
144 | (S'u4'
145 | I0
146 | I1
147 | tRp41
148 | (I3
149 | S'<'
150 | NNNI-1
151 | I-1
152 | I0
153 | tbI00
154 | S'\x9az\xb4\x9c\x08\x91\xd1\xb1\x17\x17\xa0(\xe10l\xf0\xe6\xc0@/=<\x1c\xc5\xbbH\x89\x052?fj;\x86\x84\xc8\xa3\xff+\x8f\x8a\x977\xc0\xb2\x91BD\xbd\xd4\x90\xe1\xcc\xc0"t\xd1\x0c\x01\x9fJ_\xe7\x9aa\x1a\xf46&\x027P\x8f0\x96\xef?\x12<\xad`TrPI\x00\x8d\x9fu\xeaq\xfaF\x06`w\xc4\xd0\x02\xd3u@\xd3\xfb?6;\xda\xc3w\xebe~\x18\x05lTJ;\xb1\xd4q\xe8^\xa5\xfc\xc4\xcd\xbb\xca\xe6\xd5<Nq\xc0\x1f\xbb9\xbe/r\xde\xc3\xfb\x85c\x9d<\xc1\x97\xb8\xa9$O\xdf\xf3\xa4\x9f\x88\x89\xdb\x01\x81\xf7fB^{*\xe0\x8f\xc7\x031\x97\x88\xc3\xb5\n\xb2\xfd\xafa%\xe7\x0f\xbc\xfaQ\xafV|]o\xf1\xbc\x003$>\x1d\x1e\x84\x16\xb9\xa2#*\x9c\xc6=|\xe2\xad+}y\xac\xba\x9e\xa1t\x91\xa5\xa3\xc2\xa7\x93\x99-X\xb4\xf1\xff\x91\xd5\xa0\x1b\xe3\x8d\xc1:\xae9R\x18\x1a\xf65T\xac\xcc\xb5wpz\na\xe8fml\x16\x9b\xea\xfdRe\x15\xca\xf5\xd2\x88\xfeC}g\xf3\xd2F2<\x8e\xfcz\xfa\xde\xb5\xb0l\xf0\xc4\xfb\x8bf\xea\xa3\xbd\xcc\x16\t%\xfb\x03/2\xd5\xd2w\x04\xa8\x83@\x89b\x12\x85\xf9\xae\xdb\xeb\xb4\xe6?\xc7\x1d\x06s\xb5\x06X\xec{\xc3H\x1c\x0b\x88I\xa5Q*\x8e\xe2\xb5\x85M\x0f\xfd%\xa4\x9f\xf6\xac\x0fn1\xfa\xee\x92\x83\xca\x7f\xdd\x18&`\x08?%\xb7\xda\xf4\xf1Hby\xb6\xf3\xad\x18\x98\xc9Y"\x1d\x86\xc6\xf5\xd8r\xcf\xcc\xdf\xe2\xc6^\xca\xab!\xcd\x9d\x05Nk}\xab^\xfdg=\x9b\x84\xb0\\\xb4\xba\xa9\x05\x7f\xde\x98n\xe0\xe2M\xa98\x9e\xb4\x93S\x89\xd91\xca\x1c]]\x91\xa4|\x9cR\x90\xc1\xd1\x02*\x192\x1d\x81x\x8dw\x01S(\x81Q\xeb\xa5\xfc\t\x04\xd4\xd4\xcc\xf2\x98\xed]\xf0\x8bCZ\x99\xd4~T1N\x18\\\xd1\x0b\xb3\xdd\x8b\xa0\x9a\x84\xc8~\xe60O9Muh\xea:\xc0\xed1\xf7r\xfb\xfe\xbf\xaf\xbb\xd2\x7f\xfc\xc3\xa8V\xbe\xea\xd0\x13\x9cO\xe0\x1b\xa2\xe6\x15d$j\xd7\x86\x9cY\xe1$\x86\x84\xa0\x00\xc3\xe9\x88\n\xcd\x9d\x84z/\xa7)\x11\xfd\n\xa2*\xe3\x0fF\x9a_.\x9b\x98\xe7<{\xacf\xf2\x87p\xaajy\xf1\x16\xda\xeb\x1b\x1fUBT\x07\x9b\xc210oz\xe1\x98O\x8c]z\xbet\x98W\x88\xa95m\r\xe1W\x87\xc7\x92\x05\xa3Y\xd4\xe4\x99\x90\xdb\xcb\xa3l\xdd\t\xda\x15\x87\t\x97g\xa7z,w:\xdc\x1e\xceC\xe6\xb6-\xa5\xf5p\xc6u\xd6(\xb5_\x18r\x9b$0w\xb8-\x88\xb6\x00\xfc\xf2\xb0\xff\xd5\xdcv\xc8\xae\xdfq\xf1\x84RC\x94\x07\\\x83\xcd\x8a~,\x0e\xc8Y\x17\xac\xbf*\xe4\xd3\x10\x9a\xef\xe9\xfb\x18\xfe]\x06<s\xfd\xe9Y\x8cg$sA\xce\x10rH2m\xcfq.\x9f\xe0\x1d^<\xc4\xfe\xc9X+U\xc3Qi\x14\xc7\x96\x98\xe67$O\x0e\xef\xea\x80g\xe6\xbe\t\x8b\xf4\x10f^X\xdc\x85\xa7h\x13\xd0=>\x9f\xe4\x11\xebR\xf9\xf0\xf3Ic\xe4\xbc\xbak[\xba\x12\x84\xa8\x97M\xe0u\x88\x99od\xac\x12\xc0\x9c\xc5\xd4\xf99\x84:h\x1d\xff\xb1\xf4\x8f\xa7>/\xc9S\xe7\xd0\xe8\xe5@\x03z\xe9\xcb\xa9+N\x9e]\xcf\xa8\x99\xe4\xf2\x85\xec\xe8\xca\xd6S:\xcez\xeeH]\x85n\xb2\x90\t\xe4\x11\xbd\xea8X\xd1R\xb9dR\xb4e\x9c\tn\\\x14?\xe5\xae\x84\xc4\xd4p\xa0\xf1j\xf1p\xf7\x04\xf5!\xf4s\x15\x89\x8d\x1f\xd3p\x8a\x96#\xc5\x0e@2U\xc9x\xe2\x0fa\x12{s\x0fC3\x11H\xe1\xa2"\xb6\x15\xce\x89\xb1)\xda\x06\x16\x1cC\xe4\x96\x11\xfb\x18\x97\xf0\xdf\x81\xea\xe2\x9c\xa4 \x8d\x1b\x93!\x9f\t\x1fau\x08\xe9\xd4\x02\x87\x83\x1a4vud\x00\x85\x07y\xfe\x7fu\x90v$:\x05\xc7\x82L Ne\xa0g\xf1\xe4W+(M\xb9\x10\xcc7}\xb0\x7f\xf4<{g\xfeRT\xdav\xe1B\xc5F)M\xa8\xeb#mGr;\x1cu5\xdf\xe9ea\\\x98}\xeb\x15\t\xf1\x1c\xfe}\xb4\x02`\xf6\xa6\xa6&Mef\xdb\xa1\xa0\xcd\xd68I\xe2\xaf2\xc7\xc5\xa7k\r<\xd6\xd2\xa3\x1ci\xf2&gI\xfb\xc9\xec\xb7\xfaP\x88\x1b>* \xdaw\x8f]\x8fK@\xe2\x9f9\x01\x1b\xd8\xf0\xfe\x91\x0bT\xcc\x0fT\x01\x81\xd1\x00G\xe1\x9e\xc8\xc0\x11\x97\xa6\xd1au\x8e\\\xf8\xff9!\'X\x0e>\xc7n\x1cR\x91\xb4\xc7v\xac\xf5\xd2\xe4\xde\xae0\xf5M\xbfd\xb1p\xb9\xfeV2\xfe\xff\xd3\xed\x91\x1d\xe2\xc2R\x1c\xf9,w$\x13\xc80\x0b\xbd\xbd\xc3i\xd0{\x8cj@\x03\xa0\x95w\xe3T\x0cb\xc5F\xedgN\xe5\x97U|v\xde\xec\x06p3\xa4\x00\xcc\xfd\x83\x9bT\x1f\xe4D\xf7\x85\x94S\x97\xc2\xf5:\xdc"\xf0\xed%\x93\x98oA(\x07D\xe1\x9b\xa5\xfa\x14\xbb\x96\x15\x975\xc8\x04\xcag\xe5?\xe0\xe4\xe2\x10\xdf\x87(o\xe7\xd9\xf1(\x00\x8d\xd4\x17v\xb0\xb2}\xf2c\xf1# \x9e\xd9\xb8\x8f[\xf0!j\x85\\\xbbz|=:\xac\xe2\x92\xfcB\xe8\x0f1 \x06\x9e\xab\xce\xf8(g\xc8\xcf^\xe1\xf1h\xc0\x8b\x05\xbc\xb7o\x19|O!1}GP\xcdx+\x89~\xe5\x00\x12\xc7\xd6\x8a\\\xfc)\xd7\x81\xdeIGJ\xb0d\xb3l\xb9\xf2\\\x1b\x98-\xcf"M\xf5\xd6\xec\x83\xefa\xa2~\xfd\x95\xf7\x02j\xbf\xee\xacY\xd8\x9c\x94\xa3\xcc\xb8\x97Dq\x1c\x069=\t\xe7\xccW>\xf4\x12\xf8@\xd1\x18\xb9d\'\xb9P;y\xb9\x18u_\x92\x85\xc49v/\xdd%\xbf]\x9b\n\x1f\x82\x0b\xb6\xc5\x82\xb1\xf5\x13Q%\xa6\x02\xc99c\xbb\xf9J\xad;\x89\x81X?\r\x04]\n\xba\xfe\xacQ-\xcf\x8e\xe4\x18\xe7\x1a43|\xc7\xf6\xb40\xce\xaf\xa4\x91\xcf\xa5\x17\xa4\x00\xb4\xcf|\xf4KI\x10\xc5\x80\xbb\xeeB\xddB6\x15RB_d\xc5\xa6:\x0e\xf9\xe9\x0b\xea\xce\x0f\x8f\xbeI\x86\xf7\xfe\xfbzk\x88Kvv\xfa8yZ\x1fx{u\xef\xea\xbb[Y\xa0\x85\x96\x9fg\xb8\xc2\x84E\xcf\xedvAA\xbfd\x805\x19\xae\x93\x87\x1c$o#\'\xb7\x1a\xf4\x84,a6\x84\xa1*\x16\x06I\x9c\xdd5\'\xea\xd1\x9c\xf2\x14\x1e\xa2\x08O\xf6\xa4\xddX^M\xbc\xd29C\x05\x83\x81\x81\xba\xe1\xca~\x18\xa5F\ng\x1b\x94\xe8\x8eu\xa9\xf3\xf5\xc1\xbc\xcb,\x84\xff\xe6\xfb\xe6\xb6b\\,\x18o\xb6\x96\xd0\xfc\xde\x0e2\xdb\x96\x90\x88\xa2\x9c\x9ce%\x8a\x8bl\xc9E\xd3\xee>\x9f,\xbf\xbeD\x1c\x0c@\xe0\xe5C\xfb\xf3K\\\xdea\xc5\xd6\xae)\xd2{+\xbd9\xff7k\xc8\xa1\xaf\xce,\xd0>\xfaj}\xf8\x91\xe3/\xcc\x1d;N\xbc9\xb3@\x9a\x7f1\xff\xed\xf9\x80\x8f\xb4a\xc9;\xf7\x10\xc6\x84\xa9\x17)\x954\xd32^-\x0e~E\xb9\xea\xdd\xf0t\x01GQ\xae\xaf\xc2\x01\xe8K7U\xdf*\xff\x9c\n\xd5$\xd0\xfe=9\x14n\xe5\xc6x\xba\xc1]\xdcZ\xcbB\xc8\xb7\xb4P \xc7\xaeC0\xc9\xaeu:\xe2w\x9a\xc4\xde\x93\x1b>3\xbf\x83\xbd\x8f\x1b\x977r\xc3E\x07\x8b\xfe!Lq?\x02\x14\xf1pv+\xff*\x13\x1bJ\x15\x93\x14\x88\xa8\x81\x94<\x9buxt\x0e\xdb\xb3u(>\xe1\xd7NVl\xc5\xa3s\xee\x1b\xfd9\x1a\xae\xf9\xb2x\xf4G\x1c\xd3m:b]*2\xe0\xe8\x81W\x17\xe1\x84\xf4<6\xfb"\x83\xa9\x19\xb9\xd0\xcb\x15a~\x9dT\xb2`\xca\xf6\xa6b\x12\x0f~\xf7T\x03CD\xb8>\xc1\xa8O\x8b@zk\x1f\xd8\xf5g\xab\xa6\x9e\xc8tb\xf9\xacK\xcc\xbcxMy\x1f\xe6\t=\\\xf7\xe7\xfa\x00\x85\xa1\x96\x14\xa5\xbf\x0c\xff0\xe7\x95c\x02\x0c\x92\xfb\x11\x88\x13\xdf\rM\xcd\xf3Z\x90\xe7\x89&\x8b\xa9jR\xc5\x86\x8d\x98\x0e\xea$\x0b2\x1a\xc80\xde\xe1\xb2[LH\x8de\xdc\xfd\x84\xb0B\xa5\x97_\r\x97\xf2\x96\xda\x90\x9a\x1f\xa76n1\xe15\xa5T\x8b\xc9\xe0:7\x87\xd5Y\x96\xd7\xa1\xb4\xe1K\xf9cHd\xce[\x0bP{\x18\x92\x9a\r\xf5\xd2\xb4\xcc\x91!\xb2i<\xc2l\xafmkH(mX\x0f\x1f\xc4\x9e\xf9\xf7\x82\x02\xd2\xc9\x9aHX3\xb8\xb8\xffy\xac"\r\x94\x90\x86\xf64\x9b\x7f\x1eXW?\xd1\xa2\x0f\xb0S\x9fdL\xdd\x05\xa5\xae~\xe6\xda\xe8\x01\x8fZ\xad\xd6\xe2\xef\xa8\xcc\xd7\xc1"2\xb6C\xf9_2 \xb1\xe9\xfa&\xaa@<\xcf\x85\xbb\x11;\xe8\xda\x87&rD\xdf\xd8r\x93\xa2\x950\xb5\xfc\xb9\xcbE\x87XQ"\x81Z\xceR\xa0\xe84\x14\xaf\xbd/\x8c%\x8bg\xf1\xbfLR\xa5M\x04o\xdd\x05Y\xb4\xe9\xe3\xa6\x85!\x89\r\xc8\x01\x0e\x90\xc9`\xee\x98Y\x15]\x80\x9dl\'\x1d\x1bJ\xa1\xa4\x08-\xaa\xad&\t\xb9^F;r\x87\xd7O\x98\xba3\xbd\xa4\x11\xd5\xa5\x02\xee\xdc\n\x86\xebKm\xe6\xa5J\xcf\xbch\x19\xa8\xa1\t\x15\x81(j\xbc\xf3v\x05\t\xef\x912\xd4I\xc8\x88%\xddH",\x86\x19\x17LG/\xfa\xab\xbc\xe9\x01\xb0\xe9\xff}\x1bk\xbc\x07\xe6}\xcc\xe93\xc8\xafbc#\x89\xf5\x9b\xb5\xdcb\xef\xb0\xa4:I\xe5\xf4\xab\x8b\xd4\xb4\x83(\x92\xba\xc1\x12\x04\\h\x0eU\xd0E\xb5\x82`\x01|\x04\x83\x86?\x7f\xc7\xe3\xbbn\xa5j8\xdf\x19\x1b\r\x9b\xd7\xe0\x1d\x02\x1c\xc2]!Xb\xc6|JJ\x018{\xfb\xad\x86\xb4\xe4J\x9bH\xbb\xb3z\x03\x10\xa8\xb1\xf4\x1b`\xbc\xa1\xe5C\xa3ED1\x0fKT}bm\xa6\x1e\x16q\xec\xfdBF\xc0v\xf9>f\x843\x9c,H\xa3\xe4.E\x82\xb5NZE~(\x1f\xd0\x04\xe0c;\xec\x8csG\xf4\x07\xbc\xac\xf0\xf1s,$\xd3\x06\xf6\x01z\x93\xfcSc\xf9K\x01\xc1g\xbb\x1f\x7fd_N\x00\xe6\xfbb\x06\x15 \xe7"?\x16Y#\xf8\x06'
155 | tbI10
156 | I0
157 | F0
158 | tbtRp42
159 | (dbsS'tree_'
160 | p43
161 | csklearn.tree._tree
162 | Tree
163 | p44
164 | (I2
165 | g8
166 | (g9
167 | (I0
168 | tS'b'
169 | tRp45
170 | (I1
171 | (I1
172 | tg12
173 | I00
174 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
175 | tbI1
176 | tRp46
177 | (dp47
178 | S'node_count'
179 | p48
180 | I5
181 | sS'values'
182 | p49
183 | g8
184 | (g9
185 | (I0
186 | tS'b'
187 | tRp50
188 | (I1
189 | (I5
190 | I1
191 | I1
192 | tg24
193 | I00
194 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\xc0UUUUUU\xf5?UUUUUU\xf5?'
195 | tbsS'nodes'
196 | p51
197 | g8
198 | (g9
199 | (I0
200 | tS'b'
201 | tRp52
202 | (I1
203 | (I5
204 | tg11
205 | (S'V56'
206 | I0
207 | I1
208 | tRp53
209 | (I3
210 | S'|'
211 | N(S'left_child'
212 | p54
213 | S'right_child'
214 | p55
215 | S'feature'
216 | p56
217 | S'threshold'
218 | p57
219 | S'impurity'
220 | p58
221 | S'n_node_samples'
222 | p59
223 | S'weighted_n_node_samples'
224 | p60
225 | tp61
226 | (dp62
227 | g60
228 | (g24
229 | I48
230 | tp63
231 | sg58
232 | (g24
233 | I32
234 | tp64
235 | sg55
236 | (g12
237 | I8
238 | tp65
239 | sg56
240 | (g12
241 | I16
242 | tp66
243 | sg57
244 | (g24
245 | I24
246 | tp67
247 | sg54
248 | (g12
249 | I0
250 | tp68
251 | sg59
252 | (g12
253 | I40
254 | tp69
255 | sI56
256 | I1
257 | I16
258 | tbI00
259 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x00\x00\x00\x00\x00\x00\xc8?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x00\x00\x00\x00\x00\x00\xd0?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
260 | tbsbsS'n_features_'
261 | p70
262 | I2
263 | sS'n_outputs_'
264 | p71
265 | I1
266 | sg15
267 | cnumpy.core.multiarray
268 | scalar
269 | p72
270 | (g12
271 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
272 | tRp73
273 | sS'max_leaf_nodes'
274 | p74
275 | Nsg7
276 | NsS'max_features_'
277 | p75
278 | I2
279 | sg13
280 | I1
281 | sg14
282 | Nsg17
283 | g38
284 | sS'criterion'
285 | p76
286 | g36
287 | sS'min_samples_split'
288 | p77
289 | I2
290 | sg25
291 | I2
292 | sbag1
293 | (g30
294 | g3
295 | NtRp78
296 | (dp79
297 | g33
298 | g42
299 | sg43
300 | g44
301 | (I2
302 | g8
303 | (g9
304 | (I0
305 | tS'b'
306 | tRp80
307 | (I1
308 | (I1
309 | tg12
310 | I00
311 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
312 | tbI1
313 | tRp81
314 | (dp82
315 | g48
316 | I5
317 | sg49
318 | g8
319 | (g9
320 | (I0
321 | tS'b'
322 | tRp83
323 | (I1
324 | (I5
325 | I1
326 | I1
327 | tg24
328 | I00
329 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb3\xd4\xd5Dr\x16\x08\xc0\xdb\xfa\xfd>\xe7\xaa\xf4?\xdb\xfa\xfd>\xe7\xaa\xf4?'
330 | tbsg51
331 | g8
332 | (g9
333 | (I0
334 | tS'b'
335 | tRp84
336 | (I1
337 | (I5
338 | tg53
339 | I00
340 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?y\xb3\x89Rq+\xc3?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xa2Db\xc3A\x8f\xc9?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00p<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00`\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
341 | tbsbsg70
342 | I2
343 | sg71
344 | I1
345 | sg15
346 | g72
347 | (g12
348 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
349 | tRp85
350 | sg74
351 | Nsg7
352 | Nsg75
353 | I2
354 | sg13
355 | I1
356 | sg14
357 | Nsg17
358 | g38
359 | sg76
360 | g36
361 | sg77
362 | I2
363 | sg25
364 | I2
365 | sbag1
366 | (g30
367 | g3
368 | NtRp86
369 | (dp87
370 | g33
371 | g42
372 | sg43
373 | g44
374 | (I2
375 | g8
376 | (g9
377 | (I0
378 | tS'b'
379 | tRp88
380 | (I1
381 | (I1
382 | tg12
383 | I00
384 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
385 | tbI1
386 | tRp89
387 | (dp90
388 | g48
389 | I5
390 | sg49
391 | g8
392 | (g9
393 | (I0
394 | tS'b'
395 | tRp91
396 | (I1
397 | (I5
398 | I1
399 | I1
400 | tg24
401 | I00
402 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xef\x1d\xfa\x8f\xad\xe7\x03\xc0F\xf6\x84\x98\x1b\x1a\xf4?F\xf6\x84\x98\x1b\x1a\xf4?'
403 | tbsg51
404 | g8
405 | (g9
406 | (I0
407 | tS'b'
408 | tRp92
409 | (I1
410 | (I5
411 | tg53
412 | I00
413 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?n\xc7b\xd6\xa2\xe2\xbe?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x9e/\x979\x17\x97\xc4?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00p\xbc\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00x\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
414 | tbsbsg70
415 | I2
416 | sg71
417 | I1
418 | sg15
419 | g72
420 | (g12
421 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
422 | tRp93
423 | sg74
424 | Nsg7
425 | Nsg75
426 | I2
427 | sg13
428 | I1
429 | sg14
430 | Nsg17
431 | g38
432 | sg76
433 | g36
434 | sg77
435 | I2
436 | sg25
437 | I2
438 | sbag1
439 | (g30
440 | g3
441 | NtRp94
442 | (dp95
443 | g33
444 | g42
445 | sg43
446 | g44
447 | (I2
448 | g8
449 | (g9
450 | (I0
451 | tS'b'
452 | tRp96
453 | (I1
454 | (I1
455 | tg12
456 | I00
457 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
458 | tbI1
459 | tRp97
460 | (dp98
461 | g48
462 | I5
463 | sg49
464 | g8
465 | (g9
466 | (I0
467 | tS'b'
468 | tRp99
469 | (I1
470 | (I5
471 | I1
472 | I1
473 | tg24
474 | I00
475 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x86Y\xf6\xbeZH\x01\xc0eZZZ \x9e\xf3?eZZZ \x9e\xf3?'
476 | tbsg51
477 | g8
478 | (g9
479 | (I0
480 | tS'b'
481 | tRp100
482 | (I1
483 | (I5
484 | tg53
485 | I00
486 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?krW\xa2\xf7\xfc\xb8?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xf2\xf6\xe4\x16\xa5\xa8\xc0?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00x\xbc\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x84\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
487 | tbsbsg70
488 | I2
489 | sg71
490 | I1
491 | sg15
492 | g72
493 | (g12
494 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
495 | tRp101
496 | sg74
497 | Nsg7
498 | Nsg75
499 | I2
500 | sg13
501 | I1
502 | sg14
503 | Nsg17
504 | g38
505 | sg76
506 | g36
507 | sg77
508 | I2
509 | sg25
510 | I2
511 | sbag1
512 | (g30
513 | g3
514 | NtRp102
515 | (dp103
516 | g33
517 | g42
518 | sg43
519 | g44
520 | (I2
521 | g8
522 | (g9
523 | (I0
524 | tS'b'
525 | tRp104
526 | (I1
527 | (I1
528 | tg12
529 | I00
530 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
531 | tbI1
532 | tRp105
533 | (dp106
534 | g48
535 | I5
536 | sg49
537 | g8
538 | (g9
539 | (I0
540 | tS'b'
541 | tRp107
542 | (I1
543 | (I5
544 | I1
545 | I1
546 | tg24
547 | I00
548 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdc_\xe0\x9cL\xf5\xfe\xbf}v\xafIB3\xf3?}v\xafIB3\xf3?'
549 | tbsg51
550 | g8
551 | (g9
552 | (I0
553 | tS'b'
554 | tRp108
555 | (I1
556 | (I5
557 | tg53
558 | I00
559 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xf6\x8d}\xccKE\xb4?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x9d\x12R\xbb\x0f\x07\xbb?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
560 | tbsbsg70
561 | I2
562 | sg71
563 | I1
564 | sg15
565 | g72
566 | (g12
567 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
568 | tRp109
569 | sg74
570 | Nsg7
571 | Nsg75
572 | I2
573 | sg13
574 | I1
575 | sg14
576 | Nsg17
577 | g38
578 | sg76
579 | g36
580 | sg77
581 | I2
582 | sg25
583 | I2
584 | sbag1
585 | (g30
586 | g3
587 | NtRp110
588 | (dp111
589 | g33
590 | g42
591 | sg43
592 | g44
593 | (I2
594 | g8
595 | (g9
596 | (I0
597 | tS'b'
598 | tRp112
599 | (I1
600 | (I1
601 | tg12
602 | I00
603 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
604 | tbI1
605 | tRp113
606 | (dp114
607 | g48
608 | I5
609 | sg49
610 | g8
611 | (g9
612 | (I0
613 | tS'b'
614 | tRp115
615 | (I1
616 | (I5
617 | I1
618 | I1
619 | tg24
620 | I00
621 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd7P\x94@\xa5S\xfc\xbf\xab9\x92\xcf\x9d\xd6\xf2?\xab9\x92\xcf\x9d\xd6\xf2?'
622 | tbsg51
623 | g8
624 | (g9
625 | (I0
626 | tS'b'
627 | tRp116
628 | (I1
629 | (I5
630 | tg53
631 | I00
632 | S"\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xee\xe4\x9f'8y\xb0?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x93\x86*\x8a\xf5\xf6\xb5?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00`\xbc\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00h\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@"
633 | tbsbsg70
634 | I2
635 | sg71
636 | I1
637 | sg15
638 | g72
639 | (g12
640 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
641 | tRp117
642 | sg74
643 | Nsg7
644 | Nsg75
645 | I2
646 | sg13
647 | I1
648 | sg14
649 | Nsg17
650 | g38
651 | sg76
652 | g36
653 | sg77
654 | I2
655 | sg25
656 | I2
657 | sbag1
658 | (g30
659 | g3
660 | NtRp118
661 | (dp119
662 | g33
663 | g42
664 | sg43
665 | g44
666 | (I2
667 | g8
668 | (g9
669 | (I0
670 | tS'b'
671 | tRp120
672 | (I1
673 | (I1
674 | tg12
675 | I00
676 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
677 | tbI1
678 | tRp121
679 | (dp122
680 | g48
681 | I5
682 | sg49
683 | g8
684 | (g9
685 | (I0
686 | tS'b'
687 | tRp123
688 | (I1
689 | (I5
690 | I1
691 | I1
692 | tg24
693 | I00
694 | S"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x98 \xe9\x9fS\xfa\xbfJ\x89'\xc8\xe8\x85\xf2?J\x89'\xc8\xe8\x85\xf2?"
695 | tbsg51
696 | g8
697 | (g9
698 | (I0
699 | tS'b'
700 | tRp124
701 | (I1
702 | (I5
703 | tg53
704 | I00
705 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?2\x98\xd4\xeep\xcf\xaa?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?we8\x9f\xa0\xdf\xb1?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00P<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00P<\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
706 | tbsbsg70
707 | I2
708 | sg71
709 | I1
710 | sg15
711 | g72
712 | (g12
713 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
714 | tRp125
715 | sg74
716 | Nsg7
717 | Nsg75
718 | I2
719 | sg13
720 | I1
721 | sg14
722 | Nsg17
723 | g38
724 | sg76
725 | g36
726 | sg77
727 | I2
728 | sg25
729 | I2
730 | sbag1
731 | (g30
732 | g3
733 | NtRp126
734 | (dp127
735 | g33
736 | g42
737 | sg43
738 | g44
739 | (I2
740 | g8
741 | (g9
742 | (I0
743 | tS'b'
744 | tRp128
745 | (I1
746 | (I1
747 | tg12
748 | I00
749 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
750 | tbI1
751 | tRp129
752 | (dp130
753 | g48
754 | I5
755 | sg49
756 | g8
757 | (g9
758 | (I0
759 | tS'b'
760 | tRp131
761 | (I1
762 | (I5
763 | I1
764 | I1
765 | tg24
766 | I00
767 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xce\xf2\xdfW\x8a\xc2\xf8\xbf\xe3AS\x88L?\xf2?\xe3AS\x88L?\xf2?'
768 | tbsg51
769 | g8
770 | (g9
771 | (I0
772 | tS'b'
773 | tRp132
774 | (I1
775 | (I5
776 | tg53
777 | I00
778 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xbf\xd0\xfc\xe2\x97\xd6\xa5?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?S\x16Q\xd9\x1f\x1e\xad?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00h<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00r<\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
779 | tbsbsg70
780 | I2
781 | sg71
782 | I1
783 | sg15
784 | g72
785 | (g12
786 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
787 | tRp133
788 | sg74
789 | Nsg7
790 | Nsg75
791 | I2
792 | sg13
793 | I1
794 | sg14
795 | Nsg17
796 | g38
797 | sg76
798 | g36
799 | sg77
800 | I2
801 | sg25
802 | I2
803 | sbag1
804 | (g30
805 | g3
806 | NtRp134
807 | (dp135
808 | g33
809 | g42
810 | sg43
811 | g44
812 | (I2
813 | g8
814 | (g9
815 | (I0
816 | tS'b'
817 | tRp136
818 | (I1
819 | (I1
820 | tg12
821 | I00
822 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
823 | tbI1
824 | tRp137
825 | (dp138
826 | g48
827 | I5
828 | sg49
829 | g8
830 | (g9
831 | (I0
832 | tS'b'
833 | tRp139
834 | (I1
835 | (I5
836 | I1
837 | I1
838 | tg24
839 | I00
840 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbd\x1eJ\xd9\x06\x81\xf7\xbf\xb3\xa4\xad\xb1J\x01\xf2?\xb3\xa4\xad\xb1J\x01\xf2?'
841 | tbsg51
842 | g8
843 | (g9
844 | (I0
845 | tS'b'
846 | tRp140
847 | (I1
848 | (I5
849 | tg53
850 | I00
851 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xa3\xf1\xd00T\xcd\xa1?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\x84\x97\x16Ap\xbc\xa7?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00P<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00@\xbc\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
852 | tbsbsg70
853 | I2
854 | sg71
855 | I1
856 | sg15
857 | g72
858 | (g12
859 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
860 | tRp141
861 | sg74
862 | Nsg7
863 | Nsg75
864 | I2
865 | sg13
866 | I1
867 | sg14
868 | Nsg17
869 | g38
870 | sg76
871 | g36
872 | sg77
873 | I2
874 | sg25
875 | I2
876 | sbag1
877 | (g30
878 | g3
879 | NtRp142
880 | (dp143
881 | g33
882 | g42
883 | sg43
884 | g44
885 | (I2
886 | g8
887 | (g9
888 | (I0
889 | tS'b'
890 | tRp144
891 | (I1
892 | (I1
893 | tg12
894 | I00
895 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
896 | tbI1
897 | tRp145
898 | (dp146
899 | g48
900 | I5
901 | sg49
902 | g8
903 | (g9
904 | (I0
905 | tS'b'
906 | tRp147
907 | (I1
908 | (I5
909 | I1
910 | I1
911 | tg24
912 | I00
913 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xce{\xb7\x99\x93z\xf6\xbf\x9d%Me\xa9\xca\xf1?\x9d%Me\xa9\xca\xf1?'
914 | tbsg51
915 | g8
916 | (g9
917 | (I0
918 | tS'b'
919 | tRp148
920 | (I1
921 | (I5
922 | tg53
923 | I00
924 | S'\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?\xae\xc96\x9f\xae\n\x9d?\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10@\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0?t\x86$jt\\\xa3?\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00@<\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00@<\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@'
925 | tbsbsg70
926 | I2
927 | sg71
928 | I1
929 | sg15
930 | g72
931 | (g12
932 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
933 | tRp149
934 | sg74
935 | Nsg7
936 | Nsg75
937 | I2
938 | sg13
939 | I1
940 | sg14
941 | Nsg17
942 | g38
943 | sg76
944 | g36
945 | sg77
946 | I2
947 | sg25
948 | I2
949 | sbatbsg74
950 | NsS'learning_rate'
951 | p150
952 | F0.10000000000000001
953 | sS'n_estimators'
954 | p151
955 | I10
956 | sg77
957 | I2
958 | sS'alpha'
959 | p152
960 | F0.90000000000000002
961 | sS'warm_start'
962 | p153
963 | I00
964 | sS'loss'
965 | p154
966 | S'deviance'
967 | p155
968 | sg75
969 | I2
970 | sS'subsample'
971 | p156
972 | F1
973 | sS'init_'
974 | p157
975 | g1
976 | (csklearn.ensemble.gradient_boosting
977 | LogOddsEstimator
978 | p158
979 | g3
980 | NtRp159
981 | (dp160
982 | S'prior'
983 | p161
984 | g72
985 | (g24
986 | S'\x0b\x03\xadz\xea\x93\xf1?'
987 | tRp162
988 | sbsS'n_features'
989 | p163
990 | I2
991 | sb.


--------------------------------------------------------------------------------