├── pmml-sparkml-lightgbm
├── src
│ ├── test
│ │ ├── resources
│ │ │ ├── pipeline
│ │ │ │ └── .gitkeep
│ │ │ ├── schema
│ │ │ │ └── .gitkeep
│ │ │ ├── README.md
│ │ │ └── main.scala
│ │ └── java
│ │ │ └── org
│ │ │ └── jpmml
│ │ │ └── sparkml
│ │ │ └── lightgbm
│ │ │ └── testing
│ │ │ └── LightGBMTest.java
│ └── main
│ │ ├── resources
│ │ └── META-INF
│ │ │ └── sparkml2pmml.properties
│ │ └── java
│ │ └── org
│ │ └── jpmml
│ │ └── sparkml
│ │ └── lightgbm
│ │ ├── LightGBMRegressionModelConverter.java
│ │ ├── LightGBMClassificationModelConverter.java
│ │ └── BoosterUtil.java
└── pom.xml
├── pmml-sparkml-xgboost
├── src
│ ├── test
│ │ └── resources
│ │ │ ├── pipeline
│ │ │ └── .gitkeep
│ │ │ ├── schema
│ │ │ └── .gitkeep
│ │ │ └── README.md
│ └── main
│ │ ├── resources
│ │ └── META-INF
│ │ │ └── sparkml2pmml.properties
│ │ └── java
│ │ └── org
│ │ └── jpmml
│ │ └── sparkml
│ │ └── xgboost
│ │ ├── XGBoostClassificationModelConverter.java
│ │ └── XGBoostRegressionModelConverter.java
└── pom.xml
├── pmml-sparkml
├── src
│ ├── test
│ │ ├── resources
│ │ │ ├── requirements.txt
│ │ │ ├── pipeline
│ │ │ │ ├── GBTAudit.zip
│ │ │ │ ├── GBTAuto.zip
│ │ │ │ ├── GLMAudit.zip
│ │ │ │ ├── GLMAuto.zip
│ │ │ │ ├── GLMVisit.zip
│ │ │ │ ├── GLMHousing.zip
│ │ │ │ ├── KMeansIris.zip
│ │ │ │ ├── GLMSentiment.zip
│ │ │ │ ├── ModelChainAuto.zip
│ │ │ │ ├── ModelChainIris.zip
│ │ │ │ ├── NaiveBayesIris.zip
│ │ │ │ ├── DecisionTreeAuto.zip
│ │ │ │ ├── DecisionTreeIris.zip
│ │ │ │ ├── FPGrowthShopping.zip
│ │ │ │ ├── ModelChainAudit.zip
│ │ │ │ ├── NaiveBayesAudit.zip
│ │ │ │ ├── RandomForestAuto.zip
│ │ │ │ ├── RandomForestIris.zip
│ │ │ │ ├── DecisionTreeAudit.zip
│ │ │ │ ├── DecisionTreeHousing.zip
│ │ │ │ ├── LinearSVCSentiment.zip
│ │ │ │ ├── NeuralNetworkAudit.zip
│ │ │ │ ├── NeuralNetworkIris.zip
│ │ │ │ ├── RandomForestAudit.zip
│ │ │ │ ├── RandomForestHousing.zip
│ │ │ │ ├── DecisionTreeSentiment.zip
│ │ │ │ ├── LinearRegressionAuto.zip
│ │ │ │ ├── RandomForestSentiment.zip
│ │ │ │ ├── LinearRegressionAutoNA.zip
│ │ │ │ ├── LinearRegressionHousing.zip
│ │ │ │ ├── LogisticRegressionAudit.zip
│ │ │ │ ├── LogisticRegressionIris.zip
│ │ │ │ ├── IsotonicRegressionDecrAuto.zip
│ │ │ │ ├── IsotonicRegressionIncrAuto.zip
│ │ │ │ ├── LinearRegressionHousingVec.zip
│ │ │ │ ├── LogisticRegressionAuditNA.zip
│ │ │ │ └── LogisticRegressionIrisVec.zip
│ │ │ ├── schema
│ │ │ │ ├── Sentiment.json
│ │ │ │ ├── Shopping.json
│ │ │ │ ├── Iris.json
│ │ │ │ ├── Auto.json
│ │ │ │ ├── AutoNA.json
│ │ │ │ ├── Visit.json
│ │ │ │ ├── AuditNA.json
│ │ │ │ ├── Audit.json
│ │ │ │ ├── IrisVec.json
│ │ │ │ ├── HousingVec.json
│ │ │ │ └── Housing.json
│ │ │ ├── README.md
│ │ │ ├── csv
│ │ │ │ └── KMeansIris.csv
│ │ │ ├── data.py
│ │ │ └── main.scala
│ │ └── java
│ │ │ └── org
│ │ │ └── jpmml
│ │ │ └── sparkml
│ │ │ ├── testing
│ │ │ ├── SparkMLDatasets.java
│ │ │ ├── ClusteringTest.java
│ │ │ ├── SparkMLAlgorithms.java
│ │ │ ├── SimpleSparkMLEncoderBatchTest.java
│ │ │ └── AssociationRulesTest.java
│ │ │ ├── TermUtilTest.java
│ │ │ ├── SparkMLTest.java
│ │ │ ├── RegexKeyTest.java
│ │ │ ├── PipelineModelUtilTest.java
│ │ │ ├── PMMLBuilderTest.java
│ │ │ ├── AliasExpressionTest.java
│ │ │ ├── SparkMLEncoderTest.java
│ │ │ └── feature
│ │ │ ├── VectorDisassemblerTest.java
│ │ │ └── DomainTest.java
│ └── main
│ │ ├── java
│ │ └── org
│ │ │ └── jpmml
│ │ │ └── sparkml
│ │ │ ├── model
│ │ │ ├── HasFeatureImportances.java
│ │ │ ├── HasRegressionTableOptions.java
│ │ │ ├── HasPredictionModelOptions.java
│ │ │ ├── LinearRegressionModelConverter.java
│ │ │ ├── HasTreeOptions.java
│ │ │ ├── DecisionTreeRegressionModelConverter.java
│ │ │ ├── DecisionTreeClassificationModelConverter.java
│ │ │ ├── LogisticRegressionModelConverter.java
│ │ │ ├── NaiveBayesModelConverter.java
│ │ │ ├── RandomForestRegressionModelConverter.java
│ │ │ ├── RandomForestClassificationModelConverter.java
│ │ │ ├── GBTRegressionModelConverter.java
│ │ │ ├── KMeansModelConverter.java
│ │ │ ├── LinearSVCModelConverter.java
│ │ │ └── GBTClassificationModelConverter.java
│ │ │ ├── HasSparkMLOptions.java
│ │ │ ├── feature
│ │ │ ├── ColumnPrunerConverter.java
│ │ │ ├── VectorAttributeRewriterConverter.java
│ │ │ ├── VectorDisassemblerConverter.java
│ │ │ ├── SparseToDenseTransformerConverter.java
│ │ │ ├── NGramConverter.java
│ │ │ ├── VectorSizeHintConverter.java
│ │ │ ├── VectorSlicerConverter.java
│ │ │ ├── ChiSqSelectorModelConverter.java
│ │ │ ├── VectorAssemblerConverter.java
│ │ │ ├── IndexToStringConverter.java
│ │ │ ├── TokenizerConverter.java
│ │ │ ├── RegexTokenizerConverter.java
│ │ │ ├── IDFModelConverter.java
│ │ │ ├── RFormulaModelConverter.java
│ │ │ ├── BinarizerConverter.java
│ │ │ ├── StopWordsRemoverConverter.java
│ │ │ ├── DomainUtil.java
│ │ │ ├── MaxAbsScalerModelConverter.java
│ │ │ ├── PCAModelConverter.java
│ │ │ ├── InteractionConverter.java
│ │ │ ├── ContinuousDomainModelConverter.java
│ │ │ ├── DomainModelConverter.java
│ │ │ └── MinMaxScalerModelConverter.java
│ │ │ ├── ScalaUtil.java
│ │ │ ├── ItemSetFeature.java
│ │ │ ├── VectorUtil.java
│ │ │ ├── AssociationRulesModelConverter.java
│ │ │ ├── SparkSessionUtil.java
│ │ │ ├── TransformerConverter.java
│ │ │ ├── TermUtil.java
│ │ │ ├── PredictionModelConverter.java
│ │ │ ├── MatrixUtil.java
│ │ │ ├── RegexKey.java
│ │ │ ├── MultiFeatureConverter.java
│ │ │ ├── WeightedTermFeature.java
│ │ │ ├── BinarizedCategoricalFeature.java
│ │ │ ├── testing
│ │ │ └── SparkMLEncoderBatchTest.java
│ │ │ ├── ProbabilisticClassificationModelConverter.java
│ │ │ ├── ClusteringModelConverter.java
│ │ │ ├── AliasExpression.java
│ │ │ └── DocumentFeature.java
│ │ └── scala
│ │ └── org
│ │ └── jpmml
│ │ └── sparkml
│ │ └── feature
│ │ ├── package.scala
│ │ └── SparseToDenseTransformer.scala
└── pom.xml
├── .github
└── workflows
│ └── maven.yml
├── pmml-sparkml-example
└── src
│ └── main
│ └── java
│ └── org
│ └── jpmml
│ └── sparkml
│ └── example
│ └── NullSplitter.java
└── pmml-sparkml-evaluator
├── pom.xml
└── src
└── main
└── java
└── org
└── jpmml
└── sparkml
└── evaluator
└── SparkMLFunctionRegistry.java
/pmml-sparkml-lightgbm/src/test/resources/pipeline/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pmml-sparkml-lightgbm/src/test/resources/schema/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pmml-sparkml-xgboost/src/test/resources/pipeline/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pmml-sparkml-xgboost/src/test/resources/schema/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/requirements.txt:
--------------------------------------------------------------------------------
1 | pyspark2pmml==0.8.0
2 |
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GBTAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GBTAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GBTAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GBTAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GLMAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GLMAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GLMAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GLMAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GLMVisit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GLMVisit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GLMHousing.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GLMHousing.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/KMeansIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/KMeansIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/GLMSentiment.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/GLMSentiment.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/ModelChainAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/ModelChainAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/ModelChainIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/ModelChainIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/NaiveBayesIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/NaiveBayesIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/DecisionTreeAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/DecisionTreeAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/DecisionTreeIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/DecisionTreeIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/FPGrowthShopping.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/FPGrowthShopping.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/ModelChainAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/ModelChainAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/NaiveBayesAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/NaiveBayesAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/RandomForestAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/RandomForestAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/RandomForestIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/RandomForestIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/DecisionTreeAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/DecisionTreeAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/DecisionTreeHousing.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/DecisionTreeHousing.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LinearSVCSentiment.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LinearSVCSentiment.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/NeuralNetworkAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/NeuralNetworkAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/NeuralNetworkIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/NeuralNetworkIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/RandomForestAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/RandomForestAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/RandomForestHousing.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/RandomForestHousing.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/DecisionTreeSentiment.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/DecisionTreeSentiment.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LinearRegressionAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LinearRegressionAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/RandomForestSentiment.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/RandomForestSentiment.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LinearRegressionAutoNA.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LinearRegressionAutoNA.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LinearRegressionHousing.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LinearRegressionHousing.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionAudit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionAudit.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionIris.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionIris.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/IsotonicRegressionDecrAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/IsotonicRegressionDecrAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/IsotonicRegressionIncrAuto.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/IsotonicRegressionIncrAuto.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LinearRegressionHousingVec.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LinearRegressionHousingVec.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionAuditNA.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionAuditNA.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionIrisVec.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpmml/jpmml-sparkml/HEAD/pmml-sparkml/src/test/resources/pipeline/LogisticRegressionIrisVec.zip
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Sentiment.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"Sentence","nullable":true,"type":"string"},{"metadata":{},"name":"Score","nullable":true,"type":"integer"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Shopping.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"transaction","nullable":true,"type":"integer"},{"metadata":{},"name":"items","nullable":false,"type":{"containsNull":false,"elementType":"string","type":"array"}}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml-xgboost/src/main/resources/META-INF/sparkml2pmml.properties:
--------------------------------------------------------------------------------
1 | ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel = org.jpmml.sparkml.xgboost.XGBoostClassificationModelConverter
2 | ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel = org.jpmml.sparkml.xgboost.XGBoostRegressionModelConverter
3 |
--------------------------------------------------------------------------------
/pmml-sparkml-lightgbm/src/main/resources/META-INF/sparkml2pmml.properties:
--------------------------------------------------------------------------------
1 | com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel = org.jpmml.sparkml.lightgbm.LightGBMClassificationModelConverter
2 | com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel = org.jpmml.sparkml.lightgbm.LightGBMRegressionModelConverter
3 |
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Iris.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"Sepal_Length","nullable":true,"type":"double"},{"metadata":{},"name":"Sepal_Width","nullable":true,"type":"double"},{"metadata":{},"name":"Petal_Length","nullable":true,"type":"double"},{"metadata":{},"name":"Petal_Width","nullable":true,"type":"double"},{"metadata":{},"name":"Species","nullable":true,"type":"string"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml-xgboost/src/test/resources/README.md:
--------------------------------------------------------------------------------
1 | Launch `spark-shell`:
2 |
3 | ```bash
4 | $SPARK_HOME/bin/spark-shell --jars ../../../../pmml-sparkml-example/target/pmml-sparkml-example-executable-3.2-SNAPSHOT.jar --packages ml.dmlc:xgboost4j-spark_2.12:${xgboost4j-spark.version}
5 | ```
6 |
7 | Load scripts:
8 |
9 | ```spark-shell
10 | :load ../../../../pmml-sparkml/src/test/resources/common.scala
11 | :load main.scala
12 | ```
13 |
--------------------------------------------------------------------------------
/pmml-sparkml-lightgbm/src/test/resources/README.md:
--------------------------------------------------------------------------------
1 | Launch `spark-shell`:
2 |
3 | ```bash
4 | $SPARK_HOME/bin/spark-shell --jars "../../../../pmml-sparkml-example/target/pmml-sparkml-example-executable-3.2-SNAPSHOT.jar,scala-library-2.12.20.jar" --packages com.microsoft.azure:synapseml-lightgbm_2.12:${synapseml-lightgbm.version}
5 | ```
6 |
7 | Load scripts:
8 |
9 | ```spark-shell
10 | :load ../../../../pmml-sparkml/src/test/resources/common.scala
11 | :load main.scala
12 | ```
13 |
--------------------------------------------------------------------------------
/.github/workflows/maven.yml:
--------------------------------------------------------------------------------
1 | name: maven
2 |
3 | on:
4 | push:
5 | branches: [ '3.0.X', '3.1.X', master ]
6 |
7 | jobs:
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 | strategy:
12 | matrix:
13 | java: [ 17, 21 ]
14 |
15 | steps:
16 | - uses: actions/checkout@v4
17 | - uses: actions/setup-java@v4
18 | with:
19 | distribution: 'zulu'
20 | java-version: ${{ matrix.java }}
21 | cache: 'maven'
22 | - run: mvn -B package --file pom.xml
23 |
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/README.md:
--------------------------------------------------------------------------------
1 | Run `spark-submit`:
2 |
3 | ```bash
4 | $SPARK_HOME/bin/spark-submit --jars ../../../../pmml-sparkml-example/target/pmml-sparkml-example-executable-3.2-SNAPSHOT.jar main.py
5 | ```
6 |
7 | Launch `spark-shell`:
8 |
9 | ```bash
10 | $SPARK_HOME/bin/spark-shell --jars ../../../../pmml-sparkml-example/target/pmml-sparkml-example-executable-3.2-SNAPSHOT.jar
11 | ```
12 |
13 | Load scripts:
14 |
15 | ```spark-shell
16 | :load common.scala
17 | :load main.scala
18 | ```
19 |
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Auto.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"displacement","nullable":true,"type":"double"},{"metadata":{},"name":"horsepower","nullable":true,"type":"integer"},{"metadata":{},"name":"weight","nullable":true,"type":"integer"},{"metadata":{},"name":"acceleration","nullable":true,"type":"double"},{"metadata":{},"name":"mpg","nullable":true,"type":"double"},{"metadata":{},"name":"cylinders","nullable":true,"type":"string"},{"metadata":{},"name":"model_year","nullable":true,"type":"string"},{"metadata":{},"name":"origin","nullable":true,"type":"string"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/AutoNA.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"mpg","nullable":true,"type":"double"},{"metadata":{},"name":"cylinders","nullable":true,"type":"string"},{"metadata":{},"name":"model_year","nullable":true,"type":"string"},{"metadata":{},"name":"origin","nullable":true,"type":"string"},{"metadata":{},"name":"acceleration","nullable":true,"type":"double"},{"metadata":{},"name":"displacement","nullable":true,"type":"double"},{"metadata":{},"name":"horsepower","nullable":true,"type":"double"},{"metadata":{},"name":"weight","nullable":true,"type":"double"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Visit.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"edlevel","nullable":true,"type":"string"},{"metadata":{},"name":"age","nullable":true,"type":"integer"},{"metadata":{},"name":"outwork","nullable":true,"type":"integer"},{"metadata":{},"name":"female","nullable":true,"type":"integer"},{"metadata":{},"name":"married","nullable":true,"type":"integer"},{"metadata":{},"name":"kids","nullable":true,"type":"integer"},{"metadata":{},"name":"hhninc","nullable":true,"type":"double"},{"metadata":{},"name":"educ","nullable":true,"type":"double"},{"metadata":{},"name":"self","nullable":true,"type":"integer"},{"metadata":{},"name":"docvis","nullable":true,"type":"integer"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/AuditNA.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"Employment","nullable":true,"type":"string"},{"metadata":{},"name":"Education","nullable":true,"type":"string"},{"metadata":{},"name":"Marital","nullable":true,"type":"string"},{"metadata":{},"name":"Occupation","nullable":true,"type":"string"},{"metadata":{},"name":"Gender","nullable":true,"type":"string"},{"metadata":{},"name":"Deductions","nullable":true,"type":"string"},{"metadata":{},"name":"Adjusted","nullable":true,"type":"string"},{"metadata":{},"name":"Age","nullable":true,"type":"double"},{"metadata":{},"name":"Income","nullable":true,"type":"double"},{"metadata":{},"name":"Hours","nullable":true,"type":"double"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Audit.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"Age","nullable":true,"type":"integer"},{"metadata":{},"name":"Employment","nullable":true,"type":"string"},{"metadata":{},"name":"Education","nullable":true,"type":"string"},{"metadata":{},"name":"Marital","nullable":true,"type":"string"},{"metadata":{},"name":"Occupation","nullable":true,"type":"string"},{"metadata":{},"name":"Income","nullable":true,"type":"double"},{"metadata":{},"name":"Gender","nullable":true,"type":"string"},{"metadata":{},"name":"Adjusted","nullable":true,"type":"integer"},{"metadata":{},"name":"Deductions","nullable":true,"type":"boolean"},{"metadata":{},"name":"Hours","nullable":true,"type":"double"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/IrisVec.json:
--------------------------------------------------------------------------------
1 | {"type":"struct","fields":[{"name":"label","type":"integer","nullable":true,"metadata":{}},{"name":"features","type":{"type":"udt","class":"org.apache.spark.ml.linalg.VectorUDT","pyClass":"pyspark.ml.linalg.VectorUDT","sqlType":{"type":"struct","fields":[{"name":"type","type":"byte","nullable":false,"metadata":{}},{"name":"size","type":"integer","nullable":true,"metadata":{}},{"name":"indices","type":{"type":"array","elementType":"integer","containsNull":false},"nullable":true,"metadata":{}},{"name":"values","type":{"type":"array","elementType":"double","containsNull":false},"nullable":true,"metadata":{}}]}},"nullable":true,"metadata":{"numFeatures":4,"ml_attr":{"num_attrs":4}}}]}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/HousingVec.json:
--------------------------------------------------------------------------------
1 | {"type":"struct","fields":[{"name":"label","type":"double","nullable":true,"metadata":{}},{"name":"features","type":{"type":"udt","class":"org.apache.spark.ml.linalg.VectorUDT","pyClass":"pyspark.ml.linalg.VectorUDT","sqlType":{"type":"struct","fields":[{"name":"type","type":"byte","nullable":false,"metadata":{}},{"name":"size","type":"integer","nullable":true,"metadata":{}},{"name":"indices","type":{"type":"array","elementType":"integer","containsNull":false},"nullable":true,"metadata":{}},{"name":"values","type":{"type":"array","elementType":"double","containsNull":false},"nullable":true,"metadata":{}}]}},"nullable":true,"metadata":{"numFeatures":13,"ml_attr":{"num_attrs":13}}}]}
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/schema/Housing.json:
--------------------------------------------------------------------------------
1 | {"fields":[{"metadata":{},"name":"CRIM","nullable":true,"type":"double"},{"metadata":{},"name":"ZN","nullable":true,"type":"double"},{"metadata":{},"name":"INDUS","nullable":true,"type":"double"},{"metadata":{},"name":"CHAS","nullable":true,"type":"integer"},{"metadata":{},"name":"NOX","nullable":true,"type":"double"},{"metadata":{},"name":"RM","nullable":true,"type":"double"},{"metadata":{},"name":"AGE","nullable":true,"type":"double"},{"metadata":{},"name":"DIS","nullable":true,"type":"double"},{"metadata":{},"name":"RAD","nullable":true,"type":"integer"},{"metadata":{},"name":"TAX","nullable":true,"type":"double"},{"metadata":{},"name":"PTRATIO","nullable":true,"type":"double"},{"metadata":{},"name":"B","nullable":true,"type":"double"},{"metadata":{},"name":"LSTAT","nullable":true,"type":"double"},{"metadata":{},"name":"MEDV","nullable":true,"type":"double"}],"type":"struct"}
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/model/HasFeatureImportances.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.model;
20 |
21 | import org.apache.spark.ml.linalg.Vector;
22 |
23 | public interface HasFeatureImportances {
24 |
25 | Vector getFeatureImportances();
26 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/HasSparkMLOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.jpmml.converter.HasOptions;
22 |
23 | /**
24 | * @see TransformerConverter#getOption(String, Object)
25 | */
26 | public interface HasSparkMLOptions extends HasOptions {
27 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/testing/SparkMLDatasets.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.testing;
20 |
21 | import org.jpmml.converter.testing.Datasets;
22 |
23 | interface SparkMLDatasets extends Datasets {
24 |
25 | String HOUSING_VEC = HOUSING + "Vec";
26 | String IRIS_VEC = IRIS + "Vec";
27 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/model/HasRegressionTableOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.model;
20 |
21 | import org.jpmml.sparkml.HasSparkMLOptions;
22 |
23 | public interface HasRegressionTableOptions extends HasSparkMLOptions {
24 |
25 | String OPTION_REPRESENTATION = "representation";
26 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/model/HasPredictionModelOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.model;
20 |
21 | import org.jpmml.sparkml.HasSparkMLOptions;
22 |
23 | public interface HasPredictionModelOptions extends HasSparkMLOptions {
24 |
25 | String OPTION_KEEP_PREDICTIONCOL = "keep_predictionCol";
26 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/ColumnPrunerConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import org.apache.spark.ml.feature.ColumnPruner;
22 | import org.jpmml.sparkml.FeatureConverter;
23 |
24 | public class ColumnPrunerConverter extends FeatureConverter {
25 |
26 | public ColumnPrunerConverter(ColumnPruner transformer){
27 | super(transformer);
28 | }
29 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/csv/KMeansIris.csv:
--------------------------------------------------------------------------------
1 | cluster
2 | 1
3 | 10
4 | 10
5 | 10
6 | 1
7 | 1
8 | 10
9 | 1
10 | 10
11 | 10
12 | 1
13 | 10
14 | 10
15 | 10
16 | 1
17 | 1
18 | 1
19 | 1
20 | 1
21 | 1
22 | 1
23 | 1
24 | 10
25 | 1
26 | 10
27 | 10
28 | 1
29 | 1
30 | 1
31 | 10
32 | 10
33 | 1
34 | 1
35 | 1
36 | 10
37 | 10
38 | 1
39 | 1
40 | 10
41 | 1
42 | 1
43 | 10
44 | 10
45 | 1
46 | 1
47 | 10
48 | 1
49 | 10
50 | 1
51 | 10
52 | 3
53 | 3
54 | 3
55 | 6
56 | 3
57 | 2
58 | 3
59 | 5
60 | 3
61 | 6
62 | 5
63 | 2
64 | 6
65 | 9
66 | 6
67 | 3
68 | 2
69 | 6
70 | 9
71 | 6
72 | 4
73 | 6
74 | 9
75 | 9
76 | 3
77 | 3
78 | 3
79 | 3
80 | 2
81 | 6
82 | 6
83 | 6
84 | 6
85 | 4
86 | 2
87 | 2
88 | 3
89 | 9
90 | 2
91 | 6
92 | 2
93 | 2
94 | 6
95 | 5
96 | 2
97 | 2
98 | 2
99 | 2
100 | 5
101 | 2
102 | 7
103 | 4
104 | 8
105 | 8
106 | 8
107 | 0
108 | 2
109 | 0
110 | 8
111 | 0
112 | 8
113 | 8
114 | 8
115 | 4
116 | 4
117 | 7
118 | 8
119 | 0
120 | 0
121 | 9
122 | 8
123 | 4
124 | 0
125 | 9
126 | 8
127 | 0
128 | 9
129 | 4
130 | 8
131 | 8
132 | 0
133 | 0
134 | 8
135 | 9
136 | 4
137 | 0
138 | 7
139 | 8
140 | 4
141 | 8
142 | 8
143 | 8
144 | 4
145 | 8
146 | 7
147 | 8
148 | 9
149 | 8
150 | 7
151 | 4
152 |
--------------------------------------------------------------------------------
/pmml-sparkml-example/src/main/java/org/jpmml/sparkml/example/NullSplitter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.example;
20 |
21 | import java.util.Collections;
22 | import java.util.List;
23 |
24 | import com.beust.jcommander.converters.IParameterSplitter;
25 |
26 | public class NullSplitter implements IParameterSplitter {
27 |
28 | @Override
29 | public List split(String value){
30 | return Collections.singletonList(value);
31 | }
32 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/scala/org/jpmml/sparkml/feature/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml
20 |
21 | /**
22 | * @groupname param Parameters
23 | * @groupprio param -3
24 | *
25 | * @groupname setParam Parameter setters
26 | * @groupprio setParam -2
27 | *
28 | * @groupname getParam Parameter getters
29 | * @groupprio getParam -1
30 | *
31 | * @groupname Ungrouped Members
32 | * @groupprio Ungrouped 0
33 | */
34 | package object feature
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/VectorAttributeRewriterConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import org.apache.spark.ml.feature.VectorAttributeRewriter;
22 | import org.jpmml.sparkml.FeatureConverter;
23 |
24 | public class VectorAttributeRewriterConverter extends FeatureConverter {
25 |
26 | public VectorAttributeRewriterConverter(VectorAttributeRewriter transformer){
27 | super(transformer);
28 | }
29 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/resources/data.py:
--------------------------------------------------------------------------------
1 | from sklearn.preprocessing import LabelEncoder
2 |
3 | import math
4 | import pandas
5 |
6 | def read_csv(name):
7 | df = pandas.read_csv("csv/" + name + ".csv", na_values = ["", "NA", "N/A"])
8 |
9 | X = df.iloc[:, :-1].values
10 | y = df.iloc[:, -1].values
11 |
12 | return (X, y)
13 |
14 | def write_libsvm(X, y, name):
15 | n_rows, n_cols = X.shape
16 |
17 | with open("libsvm/" + name + ".libsvm", "w") as file:
18 | for row in range(n_rows):
19 | cells = []
20 | label = y[row]
21 | cells.append("{:g}".format(label))
22 | for col in range(n_cols):
23 | value = X[row, col]
24 | if not math.isnan(value):
25 | cells.append("{}:{:g}".format(col + 1, value))
26 | file.write(" ".join(cells) + "\n")
27 |
28 | #
29 | # Auto
30 | #
31 |
32 | auto_X, auto_y = read_csv("Auto")
33 |
34 | write_libsvm(auto_X, auto_y, "Auto")
35 |
36 | auto_X, auto_y = read_csv("AutoNA")
37 |
38 | write_libsvm(auto_X, auto_y, "AutoNA")
39 |
40 | #
41 | # Housing
42 | #
43 |
44 | housing_X, housing_y = read_csv("Housing")
45 |
46 | write_libsvm(housing_X, housing_y, "Housing")
47 |
48 | #
49 | # Iris
50 | #
51 |
52 | iris_X, iris_y = read_csv("Iris")
53 |
54 | iris_le = LabelEncoder()
55 | iris_y = iris_le.fit_transform(iris_y)
56 |
57 | write_libsvm(iris_X, iris_y, "Iris")
58 |
--------------------------------------------------------------------------------
/pmml-sparkml-evaluator/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 |
5 |
6 | org.jpmml
7 | jpmml-sparkml
8 | 3.2-SNAPSHOT
9 |
10 |
11 | org.jpmml
12 | pmml-sparkml-evaluator
13 | jar
14 |
15 | JPMML Spark ML JPMML-Evaluator integration
16 | JPMML Apache Spark ML JPMML-Evaluator integration
17 |
18 |
19 |
20 | GNU Affero General Public License (AGPL) version 3.0
21 | http://www.gnu.org/licenses/agpl-3.0.html
22 | repo
23 |
24 |
25 |
26 |
27 |
28 | org.jpmml
29 | pmml-evaluator
30 | provided
31 |
32 |
33 | org.jpmml
34 | pmml-evaluator-testing
35 | provided
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/testing/ClusteringTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.testing;
20 |
21 | import org.jpmml.converter.FieldNameUtil;
22 | import org.junit.jupiter.api.Test;
23 |
24 | public class ClusteringTest extends SimpleSparkMLEncoderBatchTest implements SparkMLAlgorithms, SparkMLDatasets {
25 |
26 | @Test
27 | public void evaluateKMeansIris() throws Exception {
28 | String[] outputFields = {FieldNameUtil.create("pmml", "cluster")};
29 |
30 | evaluate(K_MEANS, IRIS, excludeFields(outputFields));
31 | }
32 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/ScalaUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import java.util.ArrayList;
22 | import java.util.List;
23 |
24 | import scala.collection.Seq;
25 |
26 | public class ScalaUtil {
27 |
28 | private ScalaUtil(){
29 | }
30 |
31 | static
32 | public List seqAsJavaList(Seq seq){
33 | List result = new ArrayList<>();
34 |
35 | for(int i = 0, max = seq.length(); i < max; i++){
36 | E element = seq.apply(i);
37 |
38 | result.add(element);
39 | }
40 |
41 | return result;
42 | }
43 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/ItemSetFeature.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.dmg.pmml.Field;
22 | import org.jpmml.converter.ContinuousFeature;
23 | import org.jpmml.converter.Feature;
24 |
25 | public class ItemSetFeature extends Feature {
26 |
27 | public ItemSetFeature(SparkMLEncoder encoder, Field> field){
28 | super(encoder, field.requireName(), field.requireDataType());
29 | }
30 |
31 | @Override
32 | public ContinuousFeature toContinuousFeature(){
33 | throw new UnsupportedOperationException();
34 | }
35 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/VectorUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import java.util.List;
22 |
23 | import com.google.common.primitives.Doubles;
24 | import org.apache.spark.ml.linalg.DenseVector;
25 | import org.apache.spark.ml.linalg.Vector;
26 |
27 | public class VectorUtil {
28 |
29 | private VectorUtil(){
30 | }
31 |
32 | static
33 | public List toList(Vector vector){
34 | DenseVector denseVector = vector.toDense();
35 |
36 | double[] values = denseVector.values();
37 |
38 | return Doubles.asList(values);
39 | }
40 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/AssociationRulesModelConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.apache.spark.ml.Model;
22 | import org.apache.spark.ml.param.shared.HasPredictionCol;
23 | import org.dmg.pmml.MiningFunction;
24 |
25 | abstract
26 | public class AssociationRulesModelConverter & HasPredictionCol> extends ModelConverter {
27 |
28 | public AssociationRulesModelConverter(T model){
29 | super(model);
30 | }
31 |
32 | @Override
33 | public MiningFunction getMiningFunction(){
34 | return MiningFunction.ASSOCIATION_RULES;
35 | }
36 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/TermUtilTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.junit.jupiter.api.Test;
22 |
23 | import static org.junit.jupiter.api.Assertions.assertFalse;
24 | import static org.junit.jupiter.api.Assertions.assertTrue;
25 |
26 | public class TermUtilTest {
27 |
28 | @Test
29 | public void hasPunctuation(){
30 | assertFalse(TermUtil.hasPunctuation("one"));
31 | assertTrue(TermUtil.hasPunctuation("one?"));
32 |
33 | assertFalse(TermUtil.hasPunctuation("one-half"));
34 |
35 | assertFalse(TermUtil.hasPunctuation("one two"));
36 | assertTrue(TermUtil.hasPunctuation("one, two"));
37 | }
38 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/SparkMLTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.apache.spark.sql.SparkSession;
22 | import org.junit.jupiter.api.AfterAll;
23 | import org.junit.jupiter.api.BeforeAll;
24 |
25 | abstract
26 | public class SparkMLTest {
27 |
28 | @BeforeAll
29 | static
30 | public void createSparkSession(){
31 | SparkMLTest.sparkSession = SparkSessionUtil.createSparkSession();
32 | }
33 |
34 | @AfterAll
35 | static
36 | public void destroySparkSession(){
37 | SparkMLTest.sparkSession = SparkSessionUtil.destroySparkSession(SparkMLTest.sparkSession);
38 | }
39 |
40 | public static SparkSession sparkSession = null;
41 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/testing/SparkMLAlgorithms.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2021 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.testing;
20 |
21 | interface SparkMLAlgorithms {
22 |
23 | String DECISION_TREE = "DecisionTree";
24 | String FP_GROWTH = "FPGrowth";
25 | String GBT = "GBT";
26 | String GLM = "GLM";
27 | String ISOTONIC_REGRESSION = "IsotonicRegression";
28 | String K_MEANS = "KMeans";
29 | String LINEAR_REGRESION = "LinearRegression";
30 | String LINEAR_SVC = "LinearSVC";
31 | String LOGISTIC_REGRESSION = "LogisticRegression";
32 | String MODEL_CHAIN = "ModelChain";
33 | String NAIVE_BAYES = "NaiveBayes";
34 | String NEURAL_NETWORK = "NeuralNetwork";
35 | String RANDOM_FOREST = "RandomForest";
36 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/RegexKeyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import java.util.regex.Pattern;
22 |
23 | import org.junit.jupiter.api.Test;
24 |
25 | import static org.junit.jupiter.api.Assertions.assertFalse;
26 | import static org.junit.jupiter.api.Assertions.assertTrue;
27 |
28 | public class RegexKeyTest {
29 |
30 | @Test
31 | public void compile(){
32 | RegexKey anyKey = new RegexKey(Pattern.compile(".*"));
33 | RegexKey dotAsteriskKey = new RegexKey(Pattern.compile(".*", Pattern.LITERAL));
34 |
35 | assertTrue(anyKey.test(""));
36 | assertTrue(anyKey.test(".*"));
37 |
38 | assertFalse(dotAsteriskKey.test(""));
39 | assertTrue(dotAsteriskKey.test(".*"));
40 | }
41 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/VectorDisassemblerConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.List;
22 |
23 | import org.jpmml.converter.Feature;
24 | import org.jpmml.sparkml.FeatureConverter;
25 | import org.jpmml.sparkml.SparkMLEncoder;
26 |
27 | public class VectorDisassemblerConverter extends FeatureConverter {
28 |
29 | public VectorDisassemblerConverter(VectorDisassembler transformer){
30 | super(transformer);
31 | }
32 |
33 | @Override
34 | public List encodeFeatures(SparkMLEncoder encoder){
35 | VectorDisassembler transformer = getTransformer();
36 |
37 | String inputCol = transformer.getInputCol();
38 |
39 | return encoder.getFeatures(inputCol);
40 | }
41 | }
--------------------------------------------------------------------------------
/pmml-sparkml-lightgbm/src/main/java/org/jpmml/sparkml/lightgbm/LightGBMRegressionModelConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.lightgbm;
20 |
21 | import com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel;
22 | import org.dmg.pmml.mining.MiningModel;
23 | import org.jpmml.converter.Schema;
24 | import org.jpmml.sparkml.RegressionModelConverter;
25 |
26 | public class LightGBMRegressionModelConverter extends RegressionModelConverter {
27 |
28 | public LightGBMRegressionModelConverter(LightGBMRegressionModel model){
29 | super(model);
30 | }
31 |
32 | @Override
33 | public MiningModel encodeModel(Schema schema){
34 | LightGBMRegressionModel model = getModel();
35 |
36 | return BoosterUtil.encodeModel(this, schema);
37 | }
38 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/model/LinearRegressionModelConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.model;
20 |
21 | import org.apache.spark.ml.regression.LinearRegressionModel;
22 | import org.dmg.pmml.Model;
23 | import org.jpmml.converter.Schema;
24 | import org.jpmml.sparkml.RegressionModelConverter;
25 |
26 | public class LinearRegressionModelConverter extends RegressionModelConverter implements HasRegressionTableOptions {
27 |
28 | public LinearRegressionModelConverter(LinearRegressionModel model){
29 | super(model);
30 | }
31 |
32 | @Override
33 | public Model encodeModel(Schema schema){
34 | LinearRegressionModel model = getModel();
35 |
36 | return LinearModelUtil.createRegression(this, model.coefficients(), model.intercept(), schema);
37 | }
38 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/SparseToDenseTransformerConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.List;
22 |
23 | import org.jpmml.converter.Feature;
24 | import org.jpmml.sparkml.FeatureConverter;
25 | import org.jpmml.sparkml.SparkMLEncoder;
26 |
27 | public class SparseToDenseTransformerConverter extends FeatureConverter {
28 |
29 | public SparseToDenseTransformerConverter(SparseToDenseTransformer transformer){
30 | super(transformer);
31 | }
32 |
33 | @Override
34 | public List encodeFeatures(SparkMLEncoder encoder){
35 | SparseToDenseTransformer transformer = getTransformer();
36 |
37 | List features = encoder.getFeatures(transformer.getInputCol());
38 |
39 | return features;
40 | }
41 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/PipelineModelUtilTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.apache.spark.ml.PipelineModel;
22 | import org.apache.spark.ml.Transformer;
23 | import org.apache.spark.ml.feature.SQLTransformer;
24 |
25 | import static org.junit.jupiter.api.Assertions.assertArrayEquals;
26 | import static org.junit.jupiter.api.Assertions.assertEquals;
27 |
28 | public class PipelineModelUtilTest {
29 |
30 | public void create(){
31 | Transformer identityTransformer = new SQLTransformer()
32 | .setStatement("SELECT * FROM __THIS__");
33 |
34 | PipelineModel pipelineModel = PipelineModelUtil.create("test", new Transformer[]{identityTransformer});
35 |
36 | assertEquals("test", pipelineModel.uid());
37 | assertArrayEquals(new Transformer[]{identityTransformer}, pipelineModel.stages());
38 | }
39 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/model/HasTreeOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.model;
20 |
21 | import java.util.LinkedHashMap;
22 | import java.util.Map;
23 |
24 | import org.jpmml.converter.HasNativeConfiguration;
25 | import org.jpmml.sparkml.HasSparkMLOptions;
26 | import org.jpmml.sparkml.visitors.TreeModelCompactor;
27 |
28 | public interface HasTreeOptions extends HasSparkMLOptions, HasNativeConfiguration {
29 |
30 | /**
31 | * @see TreeModelCompactor
32 | */
33 | String OPTION_COMPACT = "compact";
34 |
35 | String OPTION_ESTIMATE_FEATURE_IMPORTANCES = "estimate_featureImportances";
36 |
37 | @Override
38 | default
39 | public Map getNativeConfiguration(){
40 | Map result = new LinkedHashMap<>();
41 | result.put(HasTreeOptions.OPTION_COMPACT, Boolean.FALSE);
42 |
43 | return result;
44 | }
45 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/NGramConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.Collections;
22 | import java.util.List;
23 |
24 | import org.apache.spark.ml.feature.NGram;
25 | import org.jpmml.converter.Feature;
26 | import org.jpmml.sparkml.DocumentFeature;
27 | import org.jpmml.sparkml.FeatureConverter;
28 | import org.jpmml.sparkml.SparkMLEncoder;
29 |
30 | public class NGramConverter extends FeatureConverter {
31 |
32 | public NGramConverter(NGram transformer){
33 | super(transformer);
34 | }
35 |
36 | @Override
37 | public List encodeFeatures(SparkMLEncoder encoder){
38 | NGram transformer = getTransformer();
39 |
40 | DocumentFeature documentFeature = (DocumentFeature)encoder.getOnlyFeature(transformer.getInputCol());
41 |
42 | return Collections.singletonList(documentFeature);
43 | }
44 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/VectorSizeHintConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2019 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.List;
22 |
23 | import org.apache.spark.ml.feature.VectorSizeHint;
24 | import org.jpmml.converter.Feature;
25 | import org.jpmml.converter.SchemaUtil;
26 | import org.jpmml.sparkml.FeatureConverter;
27 | import org.jpmml.sparkml.SparkMLEncoder;
28 |
29 | public class VectorSizeHintConverter extends FeatureConverter {
30 |
31 | public VectorSizeHintConverter(VectorSizeHint transformer){
32 | super(transformer);
33 | }
34 |
35 | @Override
36 | public List encodeFeatures(SparkMLEncoder encoder){
37 | VectorSizeHint transformer = getTransformer();
38 |
39 | int size = transformer.getSize();
40 |
41 | List features = encoder.getFeatures(transformer.getInputCol());
42 |
43 | SchemaUtil.checkSize(size, features);
44 |
45 | return features;
46 | }
47 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/VectorSlicerConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.List;
22 |
23 | import org.apache.spark.ml.feature.VectorSlicer;
24 | import org.jpmml.converter.Feature;
25 | import org.jpmml.sparkml.FeatureConverter;
26 | import org.jpmml.sparkml.SparkMLEncoder;
27 |
28 | public class VectorSlicerConverter extends FeatureConverter {
29 |
30 | public VectorSlicerConverter(VectorSlicer transformer){
31 | super(transformer);
32 | }
33 |
34 | @Override
35 | public List encodeFeatures(SparkMLEncoder encoder){
36 | VectorSlicer transformer = getTransformer();
37 |
38 | String[] names = transformer.getNames();
39 | if(names != null && names.length > 0){
40 | throw new IllegalArgumentException("Expected index mode, got name mode");
41 | }
42 |
43 | return encoder.getFeatures(transformer.getInputCol(), transformer.getIndices());
44 | }
45 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/SparkSessionUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import org.apache.spark.SparkContext;
22 | import org.apache.spark.sql.SparkSession;
23 |
24 | public class SparkSessionUtil {
25 |
26 | private SparkSessionUtil(){
27 | }
28 |
29 | static
30 | public SparkSession createSparkSession(){
31 | return createSparkSession("local");
32 | }
33 |
34 | static
35 | public SparkSession createSparkSession(String master){
36 | SparkSession.Builder builder = SparkSession.builder()
37 | .master(master)
38 | .config("spark.ui.enabled", false);
39 |
40 | SparkSession sparkSession = builder.getOrCreate();
41 |
42 | SparkContext sparkContext = sparkSession.sparkContext();
43 | sparkContext.setLogLevel("ERROR");
44 |
45 | return sparkSession;
46 | }
47 |
48 | static
49 | public SparkSession destroySparkSession(SparkSession sparkSession){
50 | sparkSession.stop();
51 |
52 | return null;
53 | }
54 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/model/DecisionTreeRegressionModelConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.model;
20 |
21 | import org.apache.spark.ml.linalg.Vector;
22 | import org.apache.spark.ml.regression.DecisionTreeRegressionModel;
23 | import org.dmg.pmml.tree.TreeModel;
24 | import org.jpmml.converter.Schema;
25 | import org.jpmml.sparkml.RegressionModelConverter;
26 |
27 | public class DecisionTreeRegressionModelConverter extends RegressionModelConverter implements HasFeatureImportances, HasTreeOptions {
28 |
29 | public DecisionTreeRegressionModelConverter(DecisionTreeRegressionModel model){
30 | super(model);
31 | }
32 |
33 | @Override
34 | public Vector getFeatureImportances(){
35 | DecisionTreeRegressionModel model = getModel();
36 |
37 | return model.featureImportances();
38 | }
39 |
40 | @Override
41 | public TreeModel encodeModel(Schema schema){
42 | return TreeModelUtil.encodeDecisionTree(this, schema);
43 | }
44 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/ChiSqSelectorModelConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.Arrays;
22 | import java.util.List;
23 |
24 | import org.apache.spark.ml.feature.ChiSqSelectorModel;
25 | import org.jpmml.converter.Feature;
26 | import org.jpmml.sparkml.FeatureConverter;
27 | import org.jpmml.sparkml.SparkMLEncoder;
28 |
29 | public class ChiSqSelectorModelConverter extends FeatureConverter {
30 |
31 | public ChiSqSelectorModelConverter(ChiSqSelectorModel transformer){
32 | super(transformer);
33 | }
34 |
35 | @Override
36 | public List encodeFeatures(SparkMLEncoder encoder){
37 | ChiSqSelectorModel transformer = getTransformer();
38 |
39 | int[] indices = transformer.selectedFeatures();
40 | if(indices.length > 0){
41 | indices = indices.clone();
42 |
43 | Arrays.sort(indices);
44 | }
45 |
46 | return encoder.getFeatures(transformer.getFeaturesCol(), indices);
47 | }
48 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/feature/VectorAssemblerConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.feature;
20 |
21 | import java.util.ArrayList;
22 | import java.util.List;
23 |
24 | import org.apache.spark.ml.feature.VectorAssembler;
25 | import org.jpmml.converter.Feature;
26 | import org.jpmml.sparkml.FeatureConverter;
27 | import org.jpmml.sparkml.SparkMLEncoder;
28 |
29 | public class VectorAssemblerConverter extends FeatureConverter {
30 |
31 | public VectorAssemblerConverter(VectorAssembler transformer){
32 | super(transformer);
33 | }
34 |
35 | @Override
36 | public List encodeFeatures(SparkMLEncoder encoder){
37 | VectorAssembler transformer = getTransformer();
38 |
39 | List result = new ArrayList<>();
40 |
41 | String[] inputCols = transformer.getInputCols();
42 | for(String inputCol : inputCols){
43 | List features = encoder.getFeatures(inputCol);
44 |
45 | result.addAll(features);
46 | }
47 |
48 | return result;
49 | }
50 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/main/java/org/jpmml/sparkml/TransformerConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml;
20 |
21 | import java.util.Map;
22 |
23 | import org.apache.spark.ml.Transformer;
24 |
25 | abstract
26 | public class TransformerConverter {
27 |
28 | private T object = null;
29 |
30 | private Map options = null;
31 |
32 |
33 | public TransformerConverter(T object){
34 | setObject(object);
35 | }
36 |
37 | public Object getOption(String key, Object defaultValue){
38 | Map options = getOptions();
39 |
40 | if(options != null && options.containsKey(key)){
41 | return options.get(key);
42 | }
43 |
44 | return defaultValue;
45 | }
46 |
47 | public T getObject(){
48 | return this.object;
49 | }
50 |
51 | private void setObject(T object){
52 | this.object = object;
53 | }
54 |
55 | public Map getOptions(){
56 | return this.options;
57 | }
58 |
59 | public void setOptions(Map options){
60 | this.options = options;
61 | }
62 | }
--------------------------------------------------------------------------------
/pmml-sparkml/src/test/java/org/jpmml/sparkml/testing/SimpleSparkMLEncoderBatchTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022 Villu Ruusmann
3 | *
4 | * This file is part of JPMML-SparkML
5 | *
6 | * JPMML-SparkML is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Affero General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * JPMML-SparkML is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Affero General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU Affero General Public License
17 | * along with JPMML-SparkML. If not, see .
18 | */
19 | package org.jpmml.sparkml.testing;
20 |
21 | import java.util.function.Predicate;
22 |
23 | import com.google.common.base.Equivalence;
24 | import org.jpmml.evaluator.ResultField;
25 | import org.junit.jupiter.api.AfterAll;
26 | import org.junit.jupiter.api.BeforeAll;
27 |
28 | abstract
29 | public class SimpleSparkMLEncoderBatchTest extends SparkMLEncoderBatchTest {
30 |
31 | @Override
32 | public SparkMLEncoderBatch createBatch(String algorithm, String dataset, Predicate columnFilter, Equivalence