├── project ├── build.properties ├── Version.scala └── Dependencies.scala ├── src └── main │ └── scala │ └── io │ └── picnicml │ └── doddlemodel │ └── examples │ ├── misc │ ├── ShuffleDatasetExample.scala │ ├── ReadingDataExample.scala │ ├── SplitDatasetExample.scala │ └── EstimatorPersistenceExample.scala │ ├── preprocessing │ ├── NormalizerExample.scala │ ├── RangeScalerExample.scala │ ├── OneHotEncoderExample.scala │ ├── BinarizerExample.scala │ └── StandardScalerExample.scala │ ├── impute │ ├── MeanValueImputerExample.scala │ └── MostFrequentValueImputerExample.scala │ ├── modelselection │ ├── KFoldExample.scala │ ├── GroupKFoldExample.scala │ ├── GridSearchExample.scala │ └── RandomSearchExample.scala │ ├── dummy │ ├── MeanRegressorExample.scala │ ├── MedianRegressorExample.scala │ ├── UniformClassifierExample.scala │ ├── StratifiedClassifierExample.scala │ └── MostFrequentClassifierExample.scala │ ├── linear │ ├── LinearRegressionExample.scala │ ├── LogisticRegressionExample.scala │ ├── PoissonRegressionExample.scala │ └── SoftmaxClassifierExample.scala │ ├── pipeline │ └── PipelineExample.scala │ └── usecase │ └── HousePrices.scala ├── .gitignore ├── LICENSE └── README.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.2.8 2 | -------------------------------------------------------------------------------- /project/Version.scala: -------------------------------------------------------------------------------- 1 | import scala.util.Properties.envOrNone 2 | 3 | object Version { 4 | 5 | val baseVersion = "0.0.1" 6 | 7 | def apply(): String = 8 | envOrNone("PRE_RELEASE").fold(baseVersion)(preRelease => s"$baseVersion-$preRelease") 9 | } 10 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Dependencies { 4 | 5 | object DependencyVersion { 6 | val doodle = "0.0.1-beta4" 7 | val breeze = "1.0" 8 | val slf4j = "1.7.26" 9 | } 10 | 11 | val compileDependencies: Seq[ModuleID] = Seq( 12 | "io.github.picnicml" %% "doddle-model" % DependencyVersion.doodle, 13 | "org.scalanlp" %% "breeze-natives" % DependencyVersion.breeze, 14 | "org.slf4j" % "slf4j-nop" % DependencyVersion.slf4j 15 | ) 16 | 17 | def settings: Seq[ModuleID] = { 18 | compileDependencies 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/misc/ShuffleDatasetExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.misc 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.shuffleDataset 4 | import io.picnicml.doddlemodel.data.loadIrisDataset 5 | 6 | import scala.util.Random 7 | 8 | object ShuffleDatasetExample extends App { 9 | // set random seed 10 | implicit val rand: Random = new Random(42) 11 | 12 | // load the data and shuffle afterwards 13 | val (x, y, featureIndex) = loadIrisDataset 14 | val (xShuffled, yShuffled) = shuffleDataset(x, y) 15 | } 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Intellij ### 2 | .idea/ 3 | *.ipr 4 | *.iws 5 | 6 | ## File-based project format: 7 | *.iws 8 | 9 | ## Plugin-specific files: 10 | /out/ 11 | 12 | # mpeltonen/sbt-idea plugin 13 | .idea_modules/ 14 | 15 | ### Intellij Patch ### 16 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 17 | 18 | # *.iml 19 | # modules.xml 20 | # .idea/misc.xml 21 | # *.ipr 22 | 23 | ### Scala ### 24 | *.class 25 | *.log 26 | target 27 | logs 28 | lib_managed 29 | dist/ 30 | data/ 31 | *.class 32 | *.log 33 | *.iml 34 | *.zip 35 | *.jar 36 | *.env 37 | .env 38 | project/project/ 39 | project/plugins/project/ 40 | !lib/*.jar 41 | 42 | # Custom 43 | .DS_Store 44 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/NormalizerExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.preprocessing 2 | 3 | import breeze.linalg.DenseMatrix 4 | import io.picnicml.doddlemodel.preprocessing.Normalizer 5 | import io.picnicml.doddlemodel.preprocessing.Norms.L2Norm 6 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 7 | 8 | object NormalizerExample extends App { 9 | val x = DenseMatrix( 10 | List(1.0, 1.0, 1.0), 11 | List(3.0, 0.0, 1.5), 12 | List(6.0, 2.0, 0.0) 13 | ) 14 | 15 | val normalizer = Normalizer(L2Norm) 16 | val fittedNormalizer = normalizer.fit(x) 17 | println(s"normalized rows:\n${fittedNormalizer.transform(x)}") 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/misc/ReadingDataExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.misc 2 | 3 | import java.io.File 4 | 5 | import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset 6 | 7 | object ReadingDataExample extends App { 8 | val dataPath = "/path/to/local/dataset.csv" 9 | val (data, featureIndex) = loadCsvDataset(new File(dataPath)) 10 | 11 | // 'label' is the last column, drop it from features and feature index 12 | val (x, y) = (data(::, 0 to -2), data(::, -1)) 13 | val fixedFeatureIndex = featureIndex.drop(x.cols) 14 | 15 | println(s"features shape: ${x.rows}, ${x.cols}") 16 | println(s"target shape: ${y.length}") 17 | println(s"features: $fixedFeatureIndex") 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/impute/MeanValueImputerExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.impute 2 | 3 | import breeze.linalg.DenseMatrix 4 | import io.picnicml.doddlemodel.data.Feature.FeatureIndex 5 | import io.picnicml.doddlemodel.impute.MeanValueImputer 6 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 7 | 8 | object MeanValueImputerExample extends App { 9 | val xMissing = DenseMatrix( 10 | List(Double.NaN, 1.0, 2.0), 11 | List(3.0, Double.NaN, 5.0), 12 | List(6.0, 7.0, 8.0) 13 | ) 14 | val featureIndex = FeatureIndex.numerical(xMissing.cols) 15 | 16 | // only impute the last two columns 17 | val imputer = MeanValueImputer(featureIndex.subset(1 to 2)) 18 | val trainedImputer = imputer.fit(xMissing) 19 | println(s"imputed data:\n${trainedImputer.transform(xMissing)}") 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/impute/MostFrequentValueImputerExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.impute 2 | 3 | import breeze.linalg.DenseMatrix 4 | import io.picnicml.doddlemodel.data.Feature.FeatureIndex 5 | import io.picnicml.doddlemodel.impute.MostFrequentValueImputer 6 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 7 | 8 | object MostFrequentValueImputerExample extends App { 9 | val xMissing = DenseMatrix( 10 | List(Double.NaN, 1.0, 2.0), 11 | List(3.0, Double.NaN, 5.0), 12 | List(6.0, 7.0, 8.0), 13 | List(6.0, 7.0, 8.0) 14 | ) 15 | val featureIndex = FeatureIndex.categorical(xMissing.cols) 16 | 17 | // only impute the first and the last column 18 | val imputer = MostFrequentValueImputer(featureIndex.subset(0, 2)) 19 | val trainedImputer = imputer.fit(xMissing) 20 | println(s"imputed data:\n${trainedImputer.transform(xMissing)}") 21 | } 22 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/misc/SplitDatasetExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.misc 2 | 3 | import breeze.linalg.DenseVector 4 | import io.picnicml.doddlemodel.data.DatasetUtils.{splitDataset, splitDatasetWithGroups} 5 | import io.picnicml.doddlemodel.data.loadIrisDataset 6 | 7 | object SplitDatasetExample extends App { 8 | val (x, y, featureIndex) = loadIrisDataset 9 | println(s"features: $featureIndex") 10 | 11 | // if not specified, the default value of 0.5 is used for proportionTrain 12 | val split = splitDataset(x, y, proportionTrain = 0.9) 13 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 14 | 15 | // split with groups 16 | val groups = DenseVector((0 until x.rows).map(x => x % 4):_*) 17 | val groupsSplit = splitDatasetWithGroups(x, y, groups, proportionTrain = 0.9) 18 | println(s"training set size: ${groupsSplit.xTr.rows}, test set size: ${groupsSplit.xTe.rows}") 19 | } 20 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/modelselection/KFoldExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.modelselection 2 | 3 | import io.picnicml.doddlemodel.data.loadBreastCancerDataset 4 | import io.picnicml.doddlemodel.linear.LogisticRegression 5 | import io.picnicml.doddlemodel.metrics.accuracy 6 | import io.picnicml.doddlemodel.modelselection.{CrossValidation, KFoldSplitter} 7 | 8 | import scala.util.Random 9 | 10 | object KFoldExample extends App { 11 | val (x, y, featureIndex) = loadBreastCancerDataset 12 | println(s"features: $featureIndex") 13 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 14 | 15 | // lambda is L2 regularization strength 16 | val model = LogisticRegression(lambda = 1.5) 17 | val cv = CrossValidation(accuracy, KFoldSplitter(numFolds = 10)) 18 | 19 | implicit val rand: Random = new Random(42) 20 | val score = cv.score(model, x, y) 21 | println(f"cross-validation accuracy: $score%1.4f") 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/dummy/MeanRegressorExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.dummy 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 4 | import io.picnicml.doddlemodel.data.loadBostonDataset 5 | import io.picnicml.doddlemodel.dummy.regression.MeanRegressor 6 | import io.picnicml.doddlemodel.metrics.rmse 7 | import io.picnicml.doddlemodel.syntax.RegressorSyntax._ 8 | 9 | object MeanRegressorExample extends App { 10 | val (x, y, featureIndex) = loadBostonDataset 11 | println(s"features: $featureIndex") 12 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 13 | 14 | val split = splitDataset(x, y) 15 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 16 | 17 | val model = MeanRegressor() 18 | val trainedModel = model.fit(split.xTr, split.yTr) 19 | 20 | val score = rmse(split.yTe, trainedModel.predict(split.xTe)) 21 | println(f"test root mean squared error: $score%1.4f") 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/dummy/MedianRegressorExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.dummy 2 | 3 | import io.picnicml.doddlemodel.data.loadBostonDataset 4 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 5 | import io.picnicml.doddlemodel.dummy.regression.MedianRegressor 6 | import io.picnicml.doddlemodel.metrics.rmse 7 | import io.picnicml.doddlemodel.syntax.RegressorSyntax._ 8 | 9 | object MedianRegressorExample extends App { 10 | val (x, y, featureIndex) = loadBostonDataset 11 | println(s"features: $featureIndex") 12 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 13 | 14 | val split = splitDataset(x, y) 15 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 16 | 17 | val model = MedianRegressor() 18 | val trainedModel = model.fit(split.xTr, split.yTr) 19 | 20 | val score = rmse(split.yTe, trainedModel.predict(split.xTe)) 21 | println(f"test root mean squared error: $score%1.4f") 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/linear/LinearRegressionExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.linear 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 4 | import io.picnicml.doddlemodel.data.loadBostonDataset 5 | import io.picnicml.doddlemodel.linear.LinearRegression 6 | import io.picnicml.doddlemodel.metrics.rmse 7 | import io.picnicml.doddlemodel.syntax.RegressorSyntax._ 8 | 9 | object LinearRegressionExample extends App { 10 | val (x, y, featureIndex) = loadBostonDataset 11 | println(s"features: $featureIndex") 12 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 13 | 14 | val split = splitDataset(x, y) 15 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 16 | 17 | // lambda is L2 regularization strength 18 | val model = LinearRegression(lambda = 1.5) 19 | val trainedModel = model.fit(split.xTr, split.yTr) 20 | 21 | val score = rmse(split.yTe, trainedModel.predict(split.xTe)) 22 | println(f"test root mean squared error: $score%1.4f") 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/linear/LogisticRegressionExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.linear 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 4 | import io.picnicml.doddlemodel.data.loadBreastCancerDataset 5 | import io.picnicml.doddlemodel.linear.LogisticRegression 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 8 | 9 | object LogisticRegressionExample extends App { 10 | val (x, y, featureIndex) = loadBreastCancerDataset 11 | println(s"features: $featureIndex") 12 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 13 | 14 | val split = splitDataset(x, y) 15 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 16 | 17 | // lambda is L2 regularization strength 18 | val model = LogisticRegression(lambda = 1.5) 19 | val trainedModel = model.fit(split.xTr, split.yTr) 20 | 21 | val score = accuracy(split.yTe, trainedModel.predict(split.xTe)) 22 | println(f"test accuracy: $score%1.4f") 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/linear/PoissonRegressionExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.linear 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 4 | import io.picnicml.doddlemodel.data.loadHighSchoolTestDataset 5 | import io.picnicml.doddlemodel.linear.PoissonRegression 6 | import io.picnicml.doddlemodel.metrics.rmse 7 | import io.picnicml.doddlemodel.syntax.RegressorSyntax._ 8 | 9 | object PoissonRegressionExample extends App { 10 | val (x, y, featureIndex) = loadHighSchoolTestDataset 11 | println(s"features: $featureIndex") 12 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 13 | 14 | val split = splitDataset(x, y) 15 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 16 | 17 | // lambda is L2 regularization strength 18 | val model = PoissonRegression(lambda = 1.5) 19 | val trainedModel = model.fit(split.xTr, split.yTr) 20 | 21 | val score = rmse(split.yTe, trainedModel.predict(split.xTe)) 22 | println(f"test root mean squared error: $score%1.4f") 23 | } 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Picnic Machine Learning 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/modelselection/GroupKFoldExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.modelselection 2 | 3 | import breeze.linalg.DenseVector 4 | import io.picnicml.doddlemodel.data.loadBreastCancerDataset 5 | import io.picnicml.doddlemodel.linear.LogisticRegression 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.modelselection.{CrossValidation, GroupKFoldSplitter} 8 | 9 | import scala.util.Random 10 | 11 | object GroupKFoldExample extends App { 12 | val (x, y, featureIndex) = loadBreastCancerDataset 13 | println(s"features: $featureIndex") 14 | 15 | // e.g. we have 10 patients 16 | val groups = Some(DenseVector((0 until x.rows).map(x => x % 10):_*)) 17 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 18 | 19 | // lambda is L2 regularization strength 20 | val model = LogisticRegression(lambda = 1.5) 21 | val cv = CrossValidation(accuracy, GroupKFoldSplitter(numFolds = 10)) 22 | 23 | implicit val rand: Random = new Random(42) 24 | val score = cv.score(model, x, y, groups) 25 | println(f"cross-validation accuracy: $score%1.4f") 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/RangeScalerExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.preprocessing 2 | 3 | import breeze.linalg.DenseMatrix 4 | import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature} 5 | import io.picnicml.doddlemodel.preprocessing.RangeScaler 6 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 7 | 8 | object RangeScalerExample extends App { 9 | val x = DenseMatrix( 10 | List(1.0, 1.0, 1.0), 11 | List(3.0, 0.0, 1.5), 12 | List(6.0, 2.0, 0.0) 13 | ) 14 | val featureIndex = FeatureIndex(List(NumericalFeature, CategoricalFeature, NumericalFeature)) 15 | println(s"features: $featureIndex") 16 | 17 | // scale all numerical features 18 | val scaler = RangeScaler(range = (0, 1), featureIndex = featureIndex) 19 | val fittedScaler = scaler.fit(x) 20 | println(s"scaled data:\n${fittedScaler.transform(x)}") 21 | 22 | // only scale the last feature 23 | val scalerSubset = RangeScaler(range = (0, 1), featureIndex = featureIndex.subset(2)) 24 | val fittedScalerSubset = scalerSubset.fit(x) 25 | println(s"scaled data:\n${fittedScalerSubset.transform(x)}") 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/dummy/UniformClassifierExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.dummy 2 | 3 | import io.picnicml.doddlemodel.data.loadIrisDataset 4 | import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset} 5 | import io.picnicml.doddlemodel.dummy.classification.UniformClassifier 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 8 | 9 | import scala.util.Random 10 | 11 | object UniformClassifierExample extends App { 12 | implicit val rand: Random = new Random(42) 13 | val (features, target, featureIndex) = loadIrisDataset 14 | println(s"features: $featureIndex") 15 | 16 | val (x, y) = shuffleDataset(features, target) 17 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 18 | 19 | val split = splitDataset(x, y) 20 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 21 | 22 | val model = UniformClassifier() 23 | val trainedModel = model.fit(split.xTr, split.yTr) 24 | 25 | val score = accuracy(split.yTe, trainedModel.predict(split.xTe)) 26 | println(f"test accuracy: $score%1.4f") 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/misc/EstimatorPersistenceExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.misc 2 | 3 | import io.picnicml.doddlemodel.data.loadBreastCancerDataset 4 | import io.picnicml.doddlemodel.linear.LogisticRegression 5 | import io.picnicml.doddlemodel.loadEstimator 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 8 | 9 | object EstimatorPersistenceExample extends App { 10 | val (x, y, _) = loadBreastCancerDataset 11 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 12 | 13 | val model = LogisticRegression() 14 | val trainedModel = model.fit(x, y) 15 | val score = accuracy(y, trainedModel.predict(x)) 16 | println(f"training accuracy: $score%1.4f") 17 | 18 | // save the model 19 | val tempFilePath = s"${System.getProperty("java.io.tmpdir")}model.ser" 20 | trainedModel.save(tempFilePath) 21 | 22 | // load the model 23 | val loadedModel = loadEstimator[LogisticRegression](tempFilePath) 24 | 25 | val scoreAfterLoading = accuracy(y, loadedModel.predict(x)) 26 | println(f"training accuracy of the loaded model: $scoreAfterLoading%1.4f") 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/dummy/StratifiedClassifierExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.dummy 2 | 3 | import io.picnicml.doddlemodel.data.loadIrisDataset 4 | import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset} 5 | import io.picnicml.doddlemodel.dummy.classification.StratifiedClassifier 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 8 | 9 | import scala.util.Random 10 | 11 | object StratifiedClassifierExample extends App { 12 | implicit val rand: Random = new Random(42) 13 | val (features, target, featureIndex) = loadIrisDataset 14 | println(s"features: $featureIndex") 15 | 16 | val (x, y) = shuffleDataset(features, target) 17 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 18 | 19 | val split = splitDataset(x, y) 20 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 21 | 22 | val model = StratifiedClassifier() 23 | val trainedModel = model.fit(split.xTr, split.yTr) 24 | 25 | val score = accuracy(split.yTe, trainedModel.predict(split.xTe)) 26 | println(f"test accuracy: $score%1.4f") 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/OneHotEncoderExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.preprocessing 2 | 3 | import breeze.linalg.DenseMatrix 4 | import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature} 5 | import io.picnicml.doddlemodel.preprocessing.OneHotEncoder 6 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 7 | 8 | object OneHotEncoderExample extends App { 9 | val x = DenseMatrix( 10 | List(1.0, 1.0, 1.0), 11 | List(3.0, 0.0, 1.0), 12 | List(6.0, 2.0, 0.0) 13 | ) 14 | val featureIndex = FeatureIndex(List(CategoricalFeature, NumericalFeature, CategoricalFeature)) 15 | println(s"features: $featureIndex") 16 | 17 | // encode all categorical features 18 | val oneHotEncoder = OneHotEncoder(featureIndex) 19 | val fittedOneHotEncoder = oneHotEncoder.fit(x) 20 | println(s"encoded data:\n${fittedOneHotEncoder.transform(x)}") 21 | 22 | // only encode the last feature 23 | val oneHotEncoderSubset = OneHotEncoder(featureIndex.subset(2)) 24 | val fittedOneHotEncoderSubset = oneHotEncoderSubset.fit(x) 25 | println(s"encoded data:\n${fittedOneHotEncoderSubset.transform(x)}") 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/dummy/MostFrequentClassifierExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.dummy 2 | 3 | import io.picnicml.doddlemodel.data.loadIrisDataset 4 | import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset} 5 | import io.picnicml.doddlemodel.dummy.classification.MostFrequentClassifier 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 8 | 9 | import scala.util.Random 10 | 11 | object MostFrequentClassifierExample extends App { 12 | implicit val rand: Random = new Random(42) 13 | val (features, target, featureIndex) = loadIrisDataset 14 | println(s"features: $featureIndex") 15 | 16 | val (x, y) = shuffleDataset(features, target) 17 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 18 | 19 | val split = splitDataset(x, y) 20 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 21 | 22 | val model = MostFrequentClassifier() 23 | val trainedModel = model.fit(split.xTr, split.yTr) 24 | 25 | val score = accuracy(split.yTe, trainedModel.predict(split.xTe)) 26 | println(f"test accuracy: $score%1.4f") 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/BinarizerExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.preprocessing 2 | 3 | import breeze.linalg.DenseMatrix 4 | import io.picnicml.doddlemodel.data.Feature.{CategoricalFeature, FeatureIndex, NumericalFeature} 5 | import io.picnicml.doddlemodel.preprocessing.Binarizer 6 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 7 | 8 | object BinarizerExample extends App { 9 | val x = DenseMatrix( 10 | List(1.0, 1.0, 1.0), 11 | List(3.0, 0.0, 1.5), 12 | List(6.0, 2.0, 0.0) 13 | ) 14 | val featureIndex = FeatureIndex(List(NumericalFeature, CategoricalFeature, NumericalFeature)) 15 | println(s"features: $featureIndex") 16 | 17 | // binarize all numerical features 18 | val binarizer = Binarizer(threshold = 1.0, featureIndex = featureIndex) 19 | val fittedBinarizer = binarizer.fit(x) 20 | println(s"binarized data:\n${fittedBinarizer.transform(x)}") 21 | 22 | // only binarize the last feature 23 | val binarizerSubset = Binarizer(threshold = 1.0, featureIndex = featureIndex.subset(2)) 24 | val fittedBinarizerSubset = binarizerSubset.fit(x) 25 | println(s"binarized data:\n${fittedBinarizerSubset.transform(x)}") 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/linear/SoftmaxClassifierExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.linear 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset} 4 | import io.picnicml.doddlemodel.data.loadIrisDataset 5 | import io.picnicml.doddlemodel.linear.SoftmaxClassifier 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 8 | 9 | import scala.util.Random 10 | 11 | object SoftmaxClassifierExample extends App { 12 | implicit val rand: Random = new Random(42) 13 | val (features, target, featureIndex) = loadIrisDataset 14 | println(s"features: $featureIndex") 15 | 16 | val (x, y) = shuffleDataset(features, target) 17 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 18 | 19 | val split = splitDataset(x, y) 20 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 21 | 22 | // lambda is L2 regularization strength 23 | val model = SoftmaxClassifier(lambda = 1.5) 24 | val trainedModel = model.fit(split.xTr, split.yTr) 25 | 26 | val score = accuracy(split.yTe, trainedModel.predict(split.xTe)) 27 | println(f"test accuracy: $score%1.4f") 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/modelselection/GridSearchExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.modelselection 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 4 | import io.picnicml.doddlemodel.data.loadBreastCancerDataset 5 | import io.picnicml.doddlemodel.linear.LogisticRegression 6 | import io.picnicml.doddlemodel.metrics.accuracy 7 | import io.picnicml.doddlemodel.modelselection.{CrossValidation, HyperparameterSearch, KFoldSplitter} 8 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 9 | 10 | import scala.util.Random 11 | 12 | object GridSearchExample extends App { 13 | val (x, y, featureIndex) = loadBreastCancerDataset 14 | println(s"features: $featureIndex") 15 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 16 | 17 | val split = splitDataset(x, y) 18 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 19 | 20 | val numSearchIterations = 100 21 | val cv: CrossValidation = CrossValidation(accuracy, KFoldSplitter(numFolds = 5)) 22 | val search = HyperparameterSearch(numSearchIterations, cv) 23 | 24 | implicit val rand: Random = new Random(42) 25 | val grid = (0 until numSearchIterations).iterator.map(_.toDouble) 26 | val bestModel = search.bestOf(split.xTr, split.yTr) { 27 | LogisticRegression(lambda = grid.next) 28 | } 29 | 30 | val score = accuracy(split.yTe, bestModel.predict(split.xTe)) 31 | println(f"test accuracy: $score%1.4f") 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/StandardScalerExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.preprocessing 2 | 3 | import breeze.linalg.* 4 | import breeze.stats.{mean, stddev} 5 | import io.picnicml.doddlemodel.data.loadIrisDataset 6 | import io.picnicml.doddlemodel.preprocessing.StandardScaler 7 | import io.picnicml.doddlemodel.syntax.TransformerSyntax._ 8 | 9 | object StandardScalerExample extends App { 10 | val (x, _, featureIndex) = loadIrisDataset 11 | println(s"features: $featureIndex") 12 | println(s"mean values before preprocessing:\n${mean(x(::, *)).t}") 13 | println(s"standard deviations before preprocessing:\n${stddev(x(::, *)).t}\n") 14 | 15 | val scaler = StandardScaler(featureIndex) 16 | val trainedScaler = scaler.fit(x) 17 | val xTransformed = trainedScaler.transform(x) 18 | println(s"mean values after preprocessing:\n${mean(xTransformed(::, *)).t}") 19 | println(s"standard deviations after preprocessing:\n${stddev(xTransformed(::, *)).t}\n") 20 | 21 | // only transform a subset of columns 22 | val scalerSubset = StandardScaler(featureIndex.subset("sepal_length", "petal_width")) 23 | val trainedScalerSubset = scalerSubset.fit(x) 24 | val xTransformedSubset = trainedScalerSubset.transform(x) 25 | println(s"mean values after preprocessing a subset of features:\n${mean(xTransformedSubset(::, *)).t}") 26 | println(s"standard deviations after preprocessing a subset of features:\n${stddev(xTransformedSubset(::, *)).t}") 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/modelselection/RandomSearchExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.modelselection 2 | 3 | import breeze.stats.distributions.Gamma 4 | import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset 5 | import io.picnicml.doddlemodel.data.loadBreastCancerDataset 6 | import io.picnicml.doddlemodel.linear.LogisticRegression 7 | import io.picnicml.doddlemodel.metrics.accuracy 8 | import io.picnicml.doddlemodel.modelselection.{CrossValidation, HyperparameterSearch, KFoldSplitter} 9 | import io.picnicml.doddlemodel.syntax.ClassifierSyntax._ 10 | 11 | import scala.util.Random 12 | 13 | object RandomSearchExample extends App { 14 | val (x, y, featureIndex) = loadBreastCancerDataset 15 | println(s"features: $featureIndex") 16 | println(s"number of examples: ${x.rows}, number of features: ${x.cols}") 17 | 18 | val split = splitDataset(x, y) 19 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 20 | 21 | val numSearchIterations = 100 22 | val cv: CrossValidation = CrossValidation(accuracy, KFoldSplitter(numFolds = 5)) 23 | val search = HyperparameterSearch(numSearchIterations, cv) 24 | 25 | implicit val rand: Random = new Random(42) 26 | val gamma = Gamma(shape = 2, scale = 2) 27 | val bestModel = search.bestOf(split.xTr, split.yTr) { 28 | LogisticRegression(lambda = gamma.draw()) 29 | } 30 | 31 | val score = accuracy(split.yTe, bestModel.predict(split.xTe)) 32 | println(f"test accuracy: $score%1.4f") 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/pipeline/PipelineExample.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.pipeline 2 | 3 | import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset} 4 | import io.picnicml.doddlemodel.data.loadIrisDataset 5 | import io.picnicml.doddlemodel.impute.MeanValueImputer 6 | import io.picnicml.doddlemodel.linear.SoftmaxClassifier 7 | import io.picnicml.doddlemodel.metrics.accuracy 8 | import io.picnicml.doddlemodel.pipeline.Pipeline.pipe 9 | import io.picnicml.doddlemodel.pipeline.{Pipeline, PipelineTransformers} 10 | import io.picnicml.doddlemodel.preprocessing.StandardScaler 11 | import io.picnicml.doddlemodel.syntax.PredictorSyntax._ 12 | 13 | import scala.util.Random 14 | 15 | object PipelineExample extends App { 16 | // load and shuffle the data 17 | implicit val rand: Random = new Random(42) 18 | val (features, target, featureIndex) = loadIrisDataset 19 | println(s"features: $featureIndex") 20 | 21 | val (x, y) = shuffleDataset(features, target) 22 | 23 | val split = splitDataset(x, y) 24 | println(s"training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}") 25 | 26 | val transformers: PipelineTransformers = List( 27 | pipe(MeanValueImputer(featureIndex)), 28 | pipe(StandardScaler(featureIndex)) 29 | ) 30 | val pipeline = Pipeline(transformers)(pipe(SoftmaxClassifier())) 31 | val trainedPipeline = pipeline.fit(split.xTr, split.yTr) 32 | 33 | val score = accuracy(split.yTe, trainedPipeline.predict(split.xTe)) 34 | println(f"test accuracy: $score%1.4f") 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/io/picnicml/doddlemodel/examples/usecase/HousePrices.scala: -------------------------------------------------------------------------------- 1 | package io.picnicml.doddlemodel.examples.usecase 2 | 3 | import java.io.File 4 | 5 | import breeze.numerics.log 6 | import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset 7 | import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset} 8 | import io.picnicml.doddlemodel.data.Feature.FeatureIndex 9 | import io.picnicml.doddlemodel.data.{Features, Target, TrainTestSplit} 10 | import io.picnicml.doddlemodel.impute.{MeanValueImputer, MostFrequentValueImputer} 11 | import io.picnicml.doddlemodel.linear.LinearRegression 12 | import io.picnicml.doddlemodel.metrics.rmse 13 | import io.picnicml.doddlemodel.modelselection.{CrossValidation, HyperparameterSearch, KFoldSplitter} 14 | import io.picnicml.doddlemodel.pipeline.Pipeline.pipe 15 | import io.picnicml.doddlemodel.pipeline.{Pipeline, PipelineTransformers} 16 | import io.picnicml.doddlemodel.preprocessing.{OneHotEncoder, StandardScaler} 17 | import io.picnicml.doddlemodel.syntax.PredictorSyntax._ 18 | 19 | import scala.util.Random 20 | 21 | object HousePrices extends App { 22 | implicit val seed: Random = new Random(0) 23 | 24 | // data downloaded from https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data 25 | // note that an additional header line that encodes feature types as described on 26 | // https://github.com/picnicml/doddle-model-examples/wiki/Reading-CSV-Data is required 27 | val (x, y, featureIndex) = loadData("/path/to/house-prices.csv") 28 | val split = shuffleSplitData(x, y) 29 | val selectedModel = gridSearch(split, featureIndex) 30 | 31 | val score = rmse(split.yTe, selectedModel.predict(split.xTe)) 32 | println(f"Test RMSE of the selected model: $score%1.4f") 33 | 34 | def loadData(path: String): (Features, Target, FeatureIndex) = { 35 | println("Loading data") 36 | val (data, allFeatures) = loadCsvDataset(new File(path)) 37 | val (x, y) = (data(::, 0 to -2), data(::, -1)) 38 | val featureIndex = allFeatures.drop(x.cols) 39 | 40 | println(s"Features: $featureIndex") 41 | println(s"Number of rows: ${x.rows}") 42 | println(s"Number of columns: ${x.cols}\n") 43 | 44 | // we'll be predicting a logarithm of the price 45 | (x, log(y), featureIndex) 46 | } 47 | 48 | def shuffleSplitData(x: Features, y: Target): TrainTestSplit = { 49 | println("Shuffling and splitting data") 50 | val (xShuffled, yShuffled) = shuffleDataset(x, y) 51 | val split = splitDataset(xShuffled, yShuffled, proportionTrain = 0.85) 52 | 53 | println(s"Training set size: ${split.xTr.rows}, test set size: ${split.xTe.rows}\n") 54 | split 55 | } 56 | 57 | def gridSearch(split: TrainTestSplit, featureIndex: FeatureIndex): Pipeline = { 58 | val numGridSearchIterations = 50 59 | 60 | val cv: CrossValidation = CrossValidation(rmse, KFoldSplitter(numFolds = 10)) 61 | val search = HyperparameterSearch(numGridSearchIterations, cv) 62 | 63 | val (start, end, step) = (1e-5, 5.0, (5.0 - 1e-5) / numGridSearchIterations) 64 | val grid = Range.BigDecimal(start, end, step).map(_.toDouble).iterator 65 | 66 | println("Searching the hyperparameter space") 67 | search.bestOf(split.xTr, split.yTr) { generateModel(lambda = grid.next) } 68 | } 69 | 70 | def generateModel(lambda: Double): Pipeline = { 71 | val transformers: PipelineTransformers = List( 72 | // numerical features 73 | pipe(MeanValueImputer(featureIndex)), 74 | pipe(StandardScaler(featureIndex)), 75 | // categorical features 76 | pipe(MostFrequentValueImputer(featureIndex)), 77 | pipe(OneHotEncoder(featureIndex)) 78 | ) 79 | // lambda is L2 regularization strength 80 | Pipeline(transformers)(pipe(LinearRegression(lambda))) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## doddle-model examples 2 | This repository contains code examples for the [doddle-model](https://github.com/picnicml/doddle-model) machine learning library. 3 | 4 | ### Table of Contents 5 | 6 | #### 1. Feature Preprocessing 7 | * [Standard Scaler](src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/StandardScalerExample.scala) 8 | * [Range Scaler](src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/RangeScalerExample.scala) 9 | * [Binarizer](src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/BinarizerExample.scala) 10 | * [Normalizer](src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/NormalizerExample.scala) 11 | * [One-Hot Encoder](src/main/scala/io/picnicml/doddlemodel/examples/preprocessing/OneHotEncoderExample.scala) 12 | * [Mean Value Imputation](src/main/scala/io/picnicml/doddlemodel/examples/impute/MeanValueImputerExample.scala) 13 | * [Most Frequent Value Imputation](src/main/scala/io/picnicml/doddlemodel/examples/impute/MostFrequentValueImputerExample.scala) 14 | 15 | #### 2. Metrics 16 | * [Classification Metrics](https://github.com/picnicml/doddle-model/blob/master/src/main/scala/io/picnicml/doddlemodel/metrics/ClassificationMetrics.scala) 17 | * [Regression Metrics](https://github.com/picnicml/doddle-model/blob/master/src/main/scala/io/picnicml/doddlemodel/metrics/RegressionMetrics.scala) 18 | * [Ranking Metrics](https://github.com/picnicml/doddle-model/blob/master/src/main/scala/io/picnicml/doddlemodel/metrics/RankingMetrics.scala) 19 | * [ROC curve visualization](https://picnicml.github.io/doddle-model-examples/roc-curve-visualization.html) 20 | 21 | #### 3. Baseline models 22 | * [Most Frequent Classifier](src/main/scala/io/picnicml/doddlemodel/examples/dummy/MostFrequentClassifierExample.scala) 23 | * [Stratified Classifier](src/main/scala/io/picnicml/doddlemodel/examples/dummy/StratifiedClassifierExample.scala) 24 | * [Uniform Classifier](src/main/scala/io/picnicml/doddlemodel/examples/dummy/UniformClassifierExample.scala) 25 | * [Mean Regressor](src/main/scala/io/picnicml/doddlemodel/examples/dummy/MeanRegressorExample.scala) 26 | * [Median Regressor](src/main/scala/io/picnicml/doddlemodel/examples/dummy/MedianRegressorExample.scala) 27 | 28 | #### 4. Linear models 29 | * [Linear Regression](src/main/scala/io/picnicml/doddlemodel/examples/linear/LinearRegressionExample.scala) 30 | * [Logistic Regression](src/main/scala/io/picnicml/doddlemodel/examples/linear/LogisticRegressionExample.scala) 31 | * [Softmax Classifier](src/main/scala/io/picnicml/doddlemodel/examples/linear/SoftmaxClassifierExample.scala) 32 | * [Poisson Regression](src/main/scala/io/picnicml/doddlemodel/examples/linear/PoissonRegressionExample.scala) 33 | 34 | #### 5. Model Selection 35 | * [K-Fold Cross-Validation](src/main/scala/io/picnicml/doddlemodel/examples/modelselection/KFoldExample.scala) 36 | * [Group K-Fold Cross-Validation](src/main/scala/io/picnicml/doddlemodel/examples/modelselection/GroupKFoldExample.scala) 37 | * [Grid Search](src/main/scala/io/picnicml/doddlemodel/examples/modelselection/GridSearchExample.scala) 38 | * [Random Search](src/main/scala/io/picnicml/doddlemodel/examples/modelselection/RandomSearchExample.scala) 39 | 40 | #### 6. Miscellaneous 41 | * [Reading Data](https://github.com/picnicml/doddle-model-examples/wiki/Reading-CSV-Data) 42 | * [Shuffling Data](src/main/scala/io/picnicml/doddlemodel/examples/misc/ShuffleDatasetExample.scala) 43 | * [Splitting Data](src/main/scala/io/picnicml/doddlemodel/examples/misc/SplitDatasetExample.scala) 44 | * [Feature Preprocessing Pipeline](src/main/scala/io/picnicml/doddlemodel/examples/pipeline/PipelineExample.scala) 45 | * [Estimator Persistence](src/main/scala/io/picnicml/doddlemodel/examples/misc/EstimatorPersistenceExample.scala) 46 | 47 | #### 7. Use Cases 48 | * [Kaggle House Prices](src/main/scala/io/picnicml/doddlemodel/examples/usecase/HousePrices.scala) 49 | --------------------------------------------------------------------------------