├── .editorconfig ├── .gitattributes ├── .github └── workflows │ └── gradle-wrapper-validation.yml ├── .gitignore ├── .gitpod.yml ├── LICENSE ├── README.md ├── binder ├── environment.yml └── postBuild ├── build.gradle ├── buildSrc └── src │ └── main │ └── groovy │ ├── FileUtil.groovy │ └── JavaFXUtil.groovy ├── docs ├── RunningBeakerX.md ├── RunningConsole.md ├── RunningGitpod.md ├── RunningLocal.md └── images │ ├── 2020-Nov-28_KatePetrovaOnTwitter.png │ ├── BeakerX.png │ ├── BeakerXRun.png │ ├── Chimpanzee.png │ ├── Gitpod.png │ ├── GitpodFailedToShowImageDownload.png │ ├── GitpodFailedToShowImageGradleError.png │ ├── GitpodFailedToShowImageSavedFile.png │ ├── GitpodResult.png │ ├── GroovyConsole.png │ ├── GroovyWebConsole.png │ ├── Intellij.png │ ├── IrisClasses.png │ ├── IrisDecisionTree.png │ ├── IrisPCA.png │ ├── IrisSpecies.png │ ├── candles.png │ ├── clustering_bottles.jpg │ ├── clustering_centroids.png │ ├── clustering_dendogram.png │ ├── clustering_jfreechart.png │ ├── clustering_kmeans.png │ ├── clustering_scree.png │ ├── clustering_som_heatmap.png │ ├── clustering_various.png │ ├── cottage.png │ ├── cp_screenshot.png │ ├── houses.png │ ├── iris.png │ ├── lang_detect_notebook.png │ ├── lp_screenshot.png │ ├── mnist_gui.png │ ├── mxnet.png │ ├── reviews.png │ ├── scented.png │ ├── textsimularityheatmap.png │ ├── unscented.png │ └── whiskey.png ├── gradle.properties ├── gradle ├── LICENSE_HEADER └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── subprojects ├── Candles ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── CandleRatings.groovy │ ├── CandleReviews.groovy │ ├── CandleReviewsApachePoiGinq.groovy │ └── TablesawHelper.groovy │ ├── notebook │ └── Candles.ipynb │ └── resources │ ├── Scented_all.xlsx │ └── Unscented_all.xlsx ├── ChartUtil ├── build.gradle └── src │ └── main │ └── groovy │ ├── JFreeChartUtil.groovy │ └── SwingUtil.groovy ├── DeepLearningMxnet ├── README.md ├── build.gradle └── src │ └── main │ └── groovy │ └── ObjectDetect.groovy ├── HousePrices ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── ExploreOutlierClass_JacksonCsvMapper.groovy │ ├── ExploreOutlierClass_OpenCSV.groovy │ ├── ExploreOutlierRecord_JacksonCsvMapper_JDK17.groovy │ ├── ExploreOutlier_CommonsCSV.groovy │ ├── ExploreOutlier_JacksonCsvMapper.groovy │ ├── Explore_Tablesaw.groovy │ ├── FindOutlierSVM_Tribuo.groovy │ ├── FindOutlierZScore_CommonsMath.groovy │ ├── HistogramPrice_JoineryCommonsMathXChart.groovy │ ├── MultiRegressionCART_TribuoXChart.groovy │ ├── MultiRegressionOLS_CommonsMathXChart.groovy │ ├── MultiRegressionOLS_Smile.groovy │ ├── MultiRegressionOLS_WekaXChart.groovy │ ├── MultiRegressionSGD_TribuoXChart.groovy │ ├── MultiRegressionSGD_WekaXChart.groovy │ ├── MultiRegressionSVM_TribuoXChart.groovy │ ├── SplitData.groovy │ └── TablesawUtil.groovy │ ├── notebook │ └── HousePrices.ipynb │ └── resources │ ├── kc_house_data.csv │ └── kc_house_data_source.txt ├── HousePricesBeam ├── build.gradle └── src │ └── main │ ├── groovy │ ├── AggregateModelStats.groovy │ ├── EvaluateModel.java │ ├── HousePricesBeam.groovy │ ├── HousePricesBeamMeta.groovy │ ├── Log.groovy │ └── MeanDoubleArrayCols.groovy │ └── resources │ ├── kc_house_data.csv │ └── kc_house_data_source.txt ├── HousePricesCamel ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ └── ExploreOutlier_ApacheCamelCSV.groovy │ └── resources │ ├── kc_house_data.csv │ └── kc_house_data_source.txt ├── HousePricesGPars ├── build.gradle └── src │ └── main │ ├── groovy │ ├── HelloActors.groovy │ ├── HelloAgent.groovy │ ├── HelloDataflow.groovy │ ├── HelloParallelCollectionProcessing.groovy │ ├── HelloStm.groovy │ └── LinearRegressionMultiGPars.groovy │ └── resources │ ├── kc_house_data.csv │ └── kc_house_data_source.txt ├── HousePricesGroovyFX ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── HistogramBedrooms_CommonsMathGroovyFX.groovy │ ├── HistogramPrice_CommonsMathGroovyFX.groovy │ ├── MultiRegressionOLS_CommonsMathGroovyFX.groovy │ ├── SimpleRegressionErrors_CommonsMathGroovyFX.groovy │ └── SimpleRegressionOLS_CommonsMathGroovyFX.groovy │ └── resources │ ├── kc_house_data.csv │ ├── kc_house_data_source.txt │ └── style.css ├── HousePricesIgnite ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ └── HousePricesIgnite.groovy │ └── resources │ ├── kc_house_data.csv │ └── kc_house_data_source.txt ├── HousePricesSpark ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ └── HousePricesSpark.groovy │ └── resources │ ├── kc_house_data.csv │ └── kc_house_data_source.txt ├── Iris ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── InstallWekaPackages.groovy │ ├── J48Tree_WekaXChart.groovy │ ├── KNN_TablesawSmile.groovy │ ├── LogisticRegression_WekaXChart.groovy │ ├── MLP_Weka.groovy │ ├── NNFF_DeepNetts.groovy │ ├── NNFF_Dl4j.groovy │ ├── NNFF_Encog.groovy │ ├── NaiveBayes_JsatJavaFX.groovy │ ├── NaiveBayes_WekaXChart.groovy │ ├── SOM_WekaXChart.groovy │ ├── SoftMaxRegression_Datumbox.groovy │ ├── SupportVectorMachine_Datumbox.groovy │ ├── TablesawUtil.groovy │ └── Various_Tribuo.groovy │ ├── notebook │ └── Iris.ipynb │ └── resources │ ├── iris_data.csv │ └── iris_data_source.txt ├── IrisGraalVM ├── README.md ├── build.gradle ├── iris.groovy └── iris_data.csv ├── LanguageProcessing ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── DetectEntities_OpenNLP_JDK11.groovy │ ├── DetectEntities_OpenNLP_JDK8only.groovy │ ├── DetectLanguageWithFail.groovy │ ├── DetectLanguage_Datumbox_JDK11.groovy │ ├── DetectLanguage_OpenNLP.groovy │ ├── DetectPOS_NLP4j.groovy │ ├── DetectPOS_OpenNLP_JDK11.groovy │ ├── DetectPOS_OpenNLP_JDK8only.groovy │ ├── DetectPOS_Smile.groovy │ ├── DetectPOS_TikaOpenNLP.groovy │ ├── DetectSentences_OpenNLP.groovy │ ├── DetectSentences_Smile.groovy │ ├── DetectTriplesAnnotation_CoreNLP.groovy │ ├── DetectTriplesPOS_CoreNLP.groovy │ ├── DetectTriples_MinIE.groovy │ ├── ResourceHelper.groovy │ ├── SentimentAnalysis_CoreNLP.groovy │ ├── SentimentAnalysis_Datumbox_JDK11.groovy │ └── SentimentAnalysis_OpenNLP.groovy │ ├── notebook │ └── LanguageProcessing.ipynb │ └── resources │ ├── OutputTransforms.groovy │ ├── PartsOfSpeech.pdf │ ├── dummy.txt │ ├── rt-polarity-source.txt │ ├── rt-polarity.neg │ ├── rt-polarity.pos │ ├── training.language.de.txt │ ├── training.language.en.txt │ ├── training.language.es.txt │ ├── training.language.fr.txt │ ├── training.language.id.txt │ └── training.language.source.txt ├── LanguageProcessingDjl ├── README.md ├── build.gradle └── src │ └── main │ └── groovy │ └── UniversalSentenceEncoder.groovy ├── LanguageProcessingNLPCraft ├── build.gradle └── src │ └── main │ └── groovy │ └── Lights.groovy ├── LanguageProcessingSparkNLP ├── README.md ├── build.gradle └── src │ └── main │ └── groovy │ └── DetectEntities_SparkNLP.groovy ├── Mnist ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── GroovyFXUtil.groovy │ ├── Gui.groovy │ ├── MnistInfer.groovy │ ├── MnistReader.groovy │ ├── MnistTrainer.groovy │ ├── Mnist_Tribuo.groovy │ ├── OneLayerMLP.groovy │ ├── TwoLayerMLP.groovy │ └── Util.groovy │ └── resources │ └── weights ├── Whiskey ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── CobwebPca_WekaJFreeChart.groovy │ ├── ExploreStrength_CommonsCSV.groovy │ ├── GMeansPca_TablesawSmile.groovy │ ├── HierClustPcaBeer_Smile.groovy │ ├── HierClustPca_Smile.groovy │ ├── HierClustPca_WekaJFreeChart.groovy │ ├── InstallWekaPackages.groovy │ ├── KMeansPcaBeer_TablesawSmile.groovy │ ├── KMeansPcaCentroidsMedoids_CommonsMathJFreeChart.groovy │ ├── KMeansPcaCentroidsMedoids_HipparchusJFreeChart.groovy │ ├── KMeansPcaCentroids_DatumboxJFreeChart.groovy │ ├── KMeansPcaCentroids_TablesawSmile.groovy │ ├── KMeansPcaCentroids_WekaJFreeChart.groovy │ ├── KMeansPcaScree_Smile.groovy │ ├── KMeansPca_TablesawSmile.groovy │ ├── KMeans_Elki.groovy │ ├── KMeans_Encog.groovy │ ├── KMeans_Tribuo.groovy │ ├── PairwiseAutoGrid_Smile.groovy │ ├── PairwiseHistogram_Smile.groovy │ ├── PairwiseManualGrid_Smile.groovy │ ├── SomHeatmapFancy_Smile.groovy │ ├── SomHeatmap_Smile.groovy │ ├── StandaloneSpiderPlot.groovy │ ├── TablesawUtil.groovy │ ├── VariousPca_WekaJFreeChart.groovy │ └── XMeansPca_TablesawSmile.groovy │ ├── notebook │ └── Whiskey.ipynb │ └── resources │ ├── beer.csv │ ├── beer_source.txt │ ├── whiskey.csv │ └── whiskey_source.txt ├── WhiskeyBeam ├── build.gradle └── src │ └── main │ ├── groovy │ ├── AssignClusters.java │ ├── Log.groovy │ ├── MeanDoubleArrayCols.groovy │ ├── Point.groovy │ ├── Points.groovy │ ├── Squash.groovy │ ├── WhiskeyBeam.groovy │ └── WhiskeyBeamMeta.groovy │ └── resources │ ├── whiskey.csv │ └── whiskey_source.txt ├── WhiskeyFlink ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ ├── WhiskeyFlink.groovy │ └── WhiskeyFlinkOnlineKmeans.groovy │ └── resources │ ├── whiskey.csv │ └── whiskey_source.txt ├── WhiskeyIgnite ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ └── WhiskeyIgnite.groovy │ ├── notebook │ ├── DrunkenSailor.ipynb │ └── WhiskeyIgnite.ipynb │ └── resources │ ├── whiskey.csv │ └── whiskey_source.txt ├── WhiskeySpark ├── README.md ├── build.gradle └── src │ └── main │ ├── groovy │ └── WhiskeySpark.groovy │ └── resources │ ├── whiskey.csv │ └── whiskey_source.txt └── WhiskeyWayang ├── README.md ├── build.gradle └── src └── main ├── groovy ├── WhiskeyWayang.groovy └── WhiskeyWayangML.groovy ├── notebook └── WhiskeyWayang.ipynb └── resources ├── whiskey.csv ├── whiskey_noheader.csv └── whiskey_source.txt /.editorconfig: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | root = true 16 | 17 | [*] 18 | charset = utf-8 19 | indent_size = 4 20 | insert_final_newline = true 21 | trim_trailing_whitespace = true 22 | 23 | [*.{yml,yaml}] 24 | indent_size = 2 25 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | 3 | *.bat text eol=crlf 4 | *.jar binary -------------------------------------------------------------------------------- /.github/workflows/gradle-wrapper-validation.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | name: "Validate Gradle Wrapper" 17 | on: [push, pull_request] 18 | 19 | jobs: 20 | validation: 21 | name: "Validation" 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v2 25 | - uses: gradle/wrapper-validation-action@e2c57acffb2c9aa5a8dc6eda2bbae0b6e495bd4c 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | user.gradle 2 | .gradle/ 3 | target/ 4 | ignite/ 5 | build/ 6 | out/ 7 | testoutput/ 8 | 9 | *.DS_Store 10 | *.class 11 | *.swp 12 | *~ 13 | 14 | .idea 15 | *.iml 16 | *.ipr 17 | *.iws 18 | .shelf 19 | *.model 20 | 21 | .settings/ 22 | .classpath 23 | .project 24 | bin/ 25 | work/ 26 | .jqwik-database 27 | .ipynb_checkpoints/ 28 | /subprojects/Iris/logs/ 29 | /subprojects/IrisGraalVM/conf/ 30 | /subprojects/IrisGraalVM/iris 31 | /subprojects/IrisGraalVM/iris.build_artifacts.txt 32 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | tasks: 2 | - init: ./gradlew -q projects 3 | -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | name: beakerx 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy 6 | -------------------------------------------------------------------------------- /binder/postBuild: -------------------------------------------------------------------------------- 1 | conda install beakerx -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | plugins { 18 | id 'com.github.ben-manes.versions' version '0.51.0' 19 | } 20 | 21 | ext.UNSTABLE = /^([0-9,.-]+[.-](alpha|beta|rc)[.\d-]*)|20030203.000550$/ 22 | // ignore non-stable releases 23 | tasks.named("dependencyUpdates").configure { 24 | gradleReleaseChannel = 'current' 25 | outputFormatter = 'plain' 26 | rejectVersionIf { 27 | !(it.currentVersion.toLowerCase() ==~ UNSTABLE) && it.candidate.version.toLowerCase() ==~ UNSTABLE 28 | } 29 | } 30 | 31 | allprojects { 32 | repositories { 33 | mavenCentral() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/FileUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | class FileUtil { 17 | private FileUtil() {} 18 | 19 | static List baseNames(File subdir, List exclusions = [], String ext = '.groovy') { 20 | baseNames(subdir.listFiles().toList(), exclusions, [ext]) 21 | } 22 | 23 | static List baseNames(Collection files, List exclusions = [], List exts = ['.groovy']) { 24 | exts.collect { ext -> 25 | files*.name 26 | .findAll { it.endsWith(ext) } 27 | .collect { it - ext } 28 | .findAll { !(it in exclusions) && !it.endsWith('Util') } 29 | }.sum() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/JavaFXUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | class JavaFXUtil { 17 | private JavaFXUtil() {} 18 | 19 | // this should really be checking for jfxrt.jar in sourceSets.main.runtimeClasspath 20 | // but we can't do that prior to configuration and we don't do anything fancy like 21 | // using toolchains so it will do here 22 | static boolean checkForJavaFX() { 23 | try { 24 | Class.forName('javafx.beans.DefaultProperty') 25 | return true 26 | } catch (Exception ignore) { 27 | return false 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /docs/RunningBeakerX.md: -------------------------------------------------------------------------------- 1 | ## Running via Jupyter/BeakerX 2 | 3 | Some of the examples have links (look out for ![Binder](https://mybinder.org/badge_logo.svg)). 4 | Clicking on the binder icon will, after a short delay, load a web-hosted environment for running examples within a notebook. 5 | Running is done via "Cell -> Run All": 6 | ![BeakerX Run](images/BeakerXRun.png) 7 | 8 | Output will be similar to below: 9 | ![BeakerX Result](images/BeakerX.png) 10 | -------------------------------------------------------------------------------- /docs/RunningConsole.md: -------------------------------------------------------------------------------- 1 | ## Running via Groovy Console/Groovy Web Console 2 | 3 | If you have Groovy installed, simply start your Groovy Console and 4 | cut-and-paste examples into the Console. You might need to add @Grab statements 5 | at the top of the file to include any necessary libraries (some source files have such statements 6 | already included but commented out). Run the example script and the output should be similar to: 7 | 8 | ![GroovyConsole Result](images/GroovyConsole.png) 9 | 10 | For scripts not requiring an external library (limitation to be removed in a future version of the web console), 11 | you can run via the Groovy Web Console: 12 | 13 | ![GroovyWebConsole Result](images/GroovyWebConsole.png) 14 | -------------------------------------------------------------------------------- /docs/RunningGitpod.md: -------------------------------------------------------------------------------- 1 | ## Running via Gitpod 2 | 3 | [![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/paulk-asert/groovy-constraint-programming) 4 | 5 | To run via gitpod. Click on the Gitpod button on the github site. 6 | Once loaded, run one of the available script tasks. To find 7 | available script tasks you can try something like: 8 | 9 | ``` 10 | > ./gradlew :HousePricesBeam:tasks --group="Application" 11 | ``` 12 | 13 | You should see something like below: 14 | ![Gitpod tasks](images/Gitpod.png) 15 | 16 | And you can run a script with something like: 17 | ``` 18 | > ./gradlew :HousePricesBeam:runHousePricesBeam 19 | ``` 20 | 21 | With the following result: 22 | ![Gitpod result](images/GitpodResult.png) 23 | 24 | ### Troubleshooting 25 | 26 | Gitpod isn't currently set up for scripts which 27 | use GroovyFX/JavaFX or display plots using Swing (since they'd display on 28 | some server in the cloud - not on your machine). 29 | The same also applies for scripts which fire open a browser 30 | but there is a workaround in some cases. 31 | 32 | If you see an error in the Gradle output which mentions a file in your workspace (under the `build/resources/main` directory in our case), e.g.: 33 | 34 | ![Gitpod open browser error](images/GitpodFailedToShowImageGradleError.png) 35 | 36 | then you should be able to find that file in the Gitpod explorer, e.g.: 37 | 38 | ![Gitpod open browser error](images/GitpodFailedToShowImageSavedFile.png) 39 | 40 | Earlier versions of Gitpod had an option to preview such files but this 41 | capability seemed to break on the change to using VS Code. 42 | There are some workarounds which involve using some VS Code extensions 43 | but there are compromises with the variants I tried. 44 | Similarly, commandline solutions, e.g. `gp preview file://path/to/file/MyFile.html` 45 | had issues on various systems I tried. 46 | 47 | By the time you read this, this issue might be fixed, but 48 | as a workaround, I suggest _downloading_ such files for the time being 49 | and view them on your local machine, e.g.: 50 | ![Gitpod open browser error](images/GitpodFailedToShowImageDownload.png) 51 | -------------------------------------------------------------------------------- /docs/images/2020-Nov-28_KatePetrovaOnTwitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/2020-Nov-28_KatePetrovaOnTwitter.png -------------------------------------------------------------------------------- /docs/images/BeakerX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/BeakerX.png -------------------------------------------------------------------------------- /docs/images/BeakerXRun.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/BeakerXRun.png -------------------------------------------------------------------------------- /docs/images/Chimpanzee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/Chimpanzee.png -------------------------------------------------------------------------------- /docs/images/Gitpod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/Gitpod.png -------------------------------------------------------------------------------- /docs/images/GitpodFailedToShowImageDownload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/GitpodFailedToShowImageDownload.png -------------------------------------------------------------------------------- /docs/images/GitpodFailedToShowImageGradleError.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/GitpodFailedToShowImageGradleError.png -------------------------------------------------------------------------------- /docs/images/GitpodFailedToShowImageSavedFile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/GitpodFailedToShowImageSavedFile.png -------------------------------------------------------------------------------- /docs/images/GitpodResult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/GitpodResult.png -------------------------------------------------------------------------------- /docs/images/GroovyConsole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/GroovyConsole.png -------------------------------------------------------------------------------- /docs/images/GroovyWebConsole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/GroovyWebConsole.png -------------------------------------------------------------------------------- /docs/images/Intellij.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/Intellij.png -------------------------------------------------------------------------------- /docs/images/IrisClasses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/IrisClasses.png -------------------------------------------------------------------------------- /docs/images/IrisDecisionTree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/IrisDecisionTree.png -------------------------------------------------------------------------------- /docs/images/IrisPCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/IrisPCA.png -------------------------------------------------------------------------------- /docs/images/IrisSpecies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/IrisSpecies.png -------------------------------------------------------------------------------- /docs/images/candles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/candles.png -------------------------------------------------------------------------------- /docs/images/clustering_bottles.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_bottles.jpg -------------------------------------------------------------------------------- /docs/images/clustering_centroids.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_centroids.png -------------------------------------------------------------------------------- /docs/images/clustering_dendogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_dendogram.png -------------------------------------------------------------------------------- /docs/images/clustering_jfreechart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_jfreechart.png -------------------------------------------------------------------------------- /docs/images/clustering_kmeans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_kmeans.png -------------------------------------------------------------------------------- /docs/images/clustering_scree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_scree.png -------------------------------------------------------------------------------- /docs/images/clustering_som_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_som_heatmap.png -------------------------------------------------------------------------------- /docs/images/clustering_various.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/clustering_various.png -------------------------------------------------------------------------------- /docs/images/cottage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/cottage.png -------------------------------------------------------------------------------- /docs/images/cp_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/cp_screenshot.png -------------------------------------------------------------------------------- /docs/images/houses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/houses.png -------------------------------------------------------------------------------- /docs/images/iris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/iris.png -------------------------------------------------------------------------------- /docs/images/lang_detect_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/lang_detect_notebook.png -------------------------------------------------------------------------------- /docs/images/lp_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/lp_screenshot.png -------------------------------------------------------------------------------- /docs/images/mnist_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/mnist_gui.png -------------------------------------------------------------------------------- /docs/images/mxnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/mxnet.png -------------------------------------------------------------------------------- /docs/images/reviews.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/reviews.png -------------------------------------------------------------------------------- /docs/images/scented.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/scented.png -------------------------------------------------------------------------------- /docs/images/textsimularityheatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/textsimularityheatmap.png -------------------------------------------------------------------------------- /docs/images/unscented.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/unscented.png -------------------------------------------------------------------------------- /docs/images/whiskey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/docs/images/whiskey.png -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | version = 0.0.1 18 | group = org.groovycookbook 19 | sourceCompatibility = 1.8 20 | targetCompatibility = 1.8 21 | 22 | #arpackNgPlatformVersion = 3.9.1-1.5.10 23 | arpackNgPlatformVersion = 3.9.0-1.5.9 24 | beamVersion = 2.56.0 25 | camelLegacyVersion = 3.14.4 26 | camelVersion = 3.20.2 27 | chocoVersion = 4.10.5 28 | commonsCsvVersion = 1.11.0 29 | commonsIoVersion = 2.11.0 30 | commonsMath3Version = 3.6.1 31 | commonsMath4Version = 4.0-beta1 32 | djlVersion = 0.28.0 33 | flinkVersion = 1.18.1 34 | flinkMlVersion = 2.3.0 35 | flinkStatefunVersion = 3.2.0 36 | groovy3Version = 3.0.21 37 | groovy4Version = 4.0.21 38 | groovy5Version = 5.0.0-alpha-12 39 | igniteVersion = 2.16.0 40 | igniteMlVersion = 2.15.0 41 | jacksonVersion = 2.17.1 42 | jfreechartVersion = 1.5.4 43 | junitVersion = 4.13.1 44 | knowmXchartVersion = 3.8.8 45 | log4j2Version = 2.23.1 46 | nd4jVersion = 1.0.0-M2.1 47 | nlpcraftVersion = 0.9.0 48 | openblasPlatformVersion = 0.3.23-1.5.9 49 | #openblasPlatformVersion = 0.3.26-1.5.10 50 | opencsvVersion = 5.9 51 | opennlpJdk8Version = 1.9.4 52 | opennlpJdk11Version = 2.2.0 53 | opennlpLatestVersion = 2.3.3 54 | slf4jVersion = 2.0.16 55 | smilePreviousVersion = 3.0.3 56 | smileVersion = 3.1.1 57 | tablesawVersion = 0.43.1 58 | tikaVersion = 2.9.2 59 | tribuoVersion = 4.3.1 60 | wekaVersion = 3.9.6 61 | -------------------------------------------------------------------------------- /gradle/LICENSE_HEADER: -------------------------------------------------------------------------------- 1 | SPDX-License-Identifier: Apache-2.0 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.12.1-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | buildscript { 17 | repositories { 18 | gradlePluginPortal() 19 | } 20 | } 21 | 22 | rootProject.name = 'groovy-data-science' 23 | 24 | def subprojects = [ 25 | 'Candles', 26 | 'ChartUtil', 27 | 'DeepLearningMxnet', 28 | 'HousePrices', 29 | 'HousePricesBeam', 30 | 'HousePricesCamel', 31 | 'HousePricesGPars', 32 | 'HousePricesGroovyFX', 33 | 'HousePricesIgnite', 34 | 'HousePricesSpark', 35 | 'Iris', 36 | 'IrisGraalVM', 37 | 'LanguageProcessing', 38 | 'LanguageProcessingDjl', 39 | 'LanguageProcessingNLPCraft', 40 | 'LanguageProcessingSparkNLP', 41 | 'Mnist', 42 | 'Whiskey', 43 | 'WhiskeyBeam', 44 | 'WhiskeyFlink', 45 | 'WhiskeyIgnite', 46 | 'WhiskeySpark', 47 | 'WhiskeyWayang' 48 | ] 49 | 50 | include(subprojects as String[]) 51 | 52 | rootProject.children.each { prj -> 53 | prj.projectDir = new File("$rootDir/subprojects/$prj.name") 54 | } 55 | -------------------------------------------------------------------------------- /subprojects/Candles/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | 18 | def runAll = tasks.register('runAll') { 19 | group 'Application' 20 | } 21 | 22 | FileUtil.baseNames(sourceSets.main.allSource.files, ['TablesawHelper']).each { name -> 23 | def subtask = tasks.register("run$name", JavaExec) { 24 | dependsOn compileGroovy 25 | group 'Application' 26 | description "Run ${name}.groovy as a JVM application/Groovy script" 27 | classpath = sourceSets.main.runtimeClasspath 28 | mainClass = name 29 | } 30 | runAll.configure { 31 | dependsOn subtask 32 | } 33 | } 34 | 35 | dependencies { 36 | implementation "org.apache.groovy:groovy:$groovy4Version" 37 | implementation "org.apache.groovy:groovy-dateutil:$groovy4Version" 38 | implementation "org.apache.groovy:groovy-ginq:$groovy4Version" 39 | implementation "org.apache.groovy:groovy-macro:$groovy4Version" 40 | implementation "tech.tablesaw:tablesaw-core:$tablesawVersion" 41 | implementation "tech.tablesaw:tablesaw-excel:$tablesawVersion" 42 | implementation "tech.tablesaw:tablesaw-aggregate:$tablesawVersion" 43 | implementation "org.slf4j:slf4j-simple:$slf4jVersion" 44 | implementation "org.apache.poi:poi-ooxml:5.2.5" 45 | runtimeOnly "org.apache.logging.log4j:log4j-core:$log4j2Version" 46 | } 47 | -------------------------------------------------------------------------------- /subprojects/Candles/src/main/groovy/CandleReviewsApachePoiGinq.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import org.apache.poi.xssf.usermodel.XSSFWorkbook 18 | import java.text.SimpleDateFormat 19 | 20 | var url = getClass().classLoader.getResource('Scented_all.xlsx') 21 | var table = [] 22 | var candidates = ['[Nn]o scent', '[Nn]o smell', '[Ff]aint smell', 23 | '[Ff]aint scent', "[Cc]an't smell", '[Dd]oes not smell like', 24 | "[Dd]oesn't smell like", '[Cc]annot smell', "[Dd]on't smell", 25 | '[Ll]ike nothing'] 26 | url.withInputStream { ins -> 27 | new XSSFWorkbook(ins).getSheetAt(0).eachWithIndex { row, idx -> 28 | if (idx > 0) { // skip header 29 | var date = row.getCell(1).dateCellValue 30 | var month = date.format('MMM') 31 | var review = row.getCell(3) 32 | var noscent = candidates.any { review =~ it } 33 | table << [NoScent: noscent, Date: date, Month: month] 34 | } 35 | } 36 | } 37 | 38 | var sdf = new SimpleDateFormat('dd-MMM-yyyy', Locale.US) 39 | var start2020 = sdf.parse('01-Jan-2020') 40 | println GQL { 41 | from row in table 42 | where row.Date > start2020 43 | groupby row.Month 44 | orderby row.Date 45 | select row.Month, agg(_g.toList().count{ it.row.NoScent }) / count(row.Date) 46 | }.join('\n') 47 | -------------------------------------------------------------------------------- /subprojects/Candles/src/main/groovy/TablesawHelper.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import tech.tablesaw.plotly.Plot 18 | import tech.tablesaw.plotly.components.Figure 19 | 20 | class TablesawHelper { 21 | private File parent 22 | 23 | /** 24 | * Creates the plot files in a suitable temporary location 25 | * determined from the parent of the passed file - typically 26 | * a build folder or IDE temporary folder. 27 | * 28 | * @param filename Of a file in a suitable temporary directory 29 | */ 30 | TablesawHelper(String filename) { 31 | parent = new File(filename).parentFile 32 | } 33 | 34 | def show(Figure figure, String filename) { 35 | def file = new File(parent, filename + '.html') 36 | try { 37 | Plot.show(figure, file) 38 | } catch(ex) { 39 | println "Unable to show file '$file' due to '$ex.message'" 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /subprojects/Candles/src/main/resources/Scented_all.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/Candles/src/main/resources/Scented_all.xlsx -------------------------------------------------------------------------------- /subprojects/Candles/src/main/resources/Unscented_all.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/Candles/src/main/resources/Unscented_all.xlsx -------------------------------------------------------------------------------- /subprojects/ChartUtil/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | plugins { 17 | id 'java-library' 18 | id 'groovy' 19 | } 20 | 21 | repositories { 22 | mavenCentral() 23 | } 24 | 25 | dependencies { 26 | api "org.jfree:jfreechart:$jfreechartVersion" 27 | implementation "org.apache.groovy:groovy:$groovy4Version" 28 | implementation "org.apache.groovy:groovy-swing:$groovy4Version" 29 | } 30 | -------------------------------------------------------------------------------- /subprojects/DeepLearningMxnet/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Deep-learning with DJL and Apache MXNet 18 | 19 | Neural networks with numerous layers of nodes allow for more complex, rich and _deeper_ processing and understanding. 20 | This example detects objects within an image. 21 | It uses a pre-trained model and the 22 | [Deep Java Library](https://djl.ai/) backed by the 23 | [Apache MXNet](https://mxnet.apache.org/) engine. 24 | 25 | ![MXNet.groovy](../../docs/images/mxnet.png) 26 | 27 | Groovy code examples can be found in the [DeepLearningMxnet](subprojects/DeepLearningMxnet/src/main/groovy) subproject. 28 | If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 29 | 30 | __Requirements__: The code has been tested on JDK8, JDK11 and JDK17. 31 | -------------------------------------------------------------------------------- /subprojects/DeepLearningMxnet/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | apply plugin: 'application' 18 | 19 | repositories { 20 | // maven { 21 | // url 'https://repository.apache.org/content/groups/snapshots' 22 | // } 23 | } 24 | 25 | ext.appName = 'ObjectDetect' 26 | 27 | application { 28 | mainClass = appName 29 | } 30 | 31 | tasks.named('run').configure { 32 | description = "Run $appName as a JVM application/Groovy script" 33 | } 34 | 35 | dependencies { 36 | implementation "ai.djl:api:$djlVersion" 37 | implementation "org.apache.groovy:groovy:$groovy4Version" 38 | implementation "org.apache.groovy:groovy-swing:$groovy4Version" 39 | runtimeOnly "ai.djl:model-zoo:$djlVersion" 40 | runtimeOnly "ai.djl.mxnet:mxnet-engine:$djlVersion" 41 | runtimeOnly "ai.djl.mxnet:mxnet-model-zoo:$djlVersion" 42 | runtimeOnly "ai.djl.mxnet:mxnet-native-auto:1.8.0" 43 | runtimeOnly "org.apache.groovy:groovy-nio:$groovy4Version" 44 | runtimeOnly "org.slf4j:slf4j-jdk14:$slf4jVersion" 45 | } 46 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/ExploreOutlierClass_JacksonCsvMapper.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import com.fasterxml.jackson.annotation.JsonIncludeProperties 17 | import com.fasterxml.jackson.annotation.JsonProperty 18 | import com.fasterxml.jackson.annotation.JsonPropertyOrder 19 | import com.fasterxml.jackson.dataformat.csv.CsvMapper 20 | import com.fasterxml.jackson.dataformat.csv.CsvSchema 21 | import groovy.transform.ToString 22 | 23 | import static com.fasterxml.jackson.dataformat.csv.CsvParser.Feature.IGNORE_TRAILING_UNMAPPABLE 24 | 25 | @JsonPropertyOrder(['bedrooms', 'bathrooms', 'sqft_lot']) 26 | @JsonIncludeProperties(['bedrooms', 'bathrooms', 'sqft_lot']) 27 | @ToString(includeNames=true) 28 | class HouseClass { 29 | Integer bedrooms 30 | String bathrooms 31 | @JsonProperty("sqft_lot") Integer area_lot 32 | } 33 | 34 | def data = getClass().classLoader.getResource('kc_house_data.csv').file as File 35 | 36 | var schema = CsvSchema.builder() 37 | .addColumn("id") 38 | .addColumn("date") 39 | .addColumn("price") 40 | .addColumn("bedrooms") 41 | .addColumn("bathrooms") 42 | .addColumn("sqft_living") 43 | .addColumn("sqft_lot") 44 | .build() 45 | .withHeader() 46 | 47 | def mapper = new CsvMapper().configure(IGNORE_TRAILING_UNMAPPABLE, true) 48 | def records = mapper.readerFor(HouseClass).with(schema).readValues(data).readAll() 49 | 50 | records.findAll{ it.bedrooms > 10 }.each{ println it } 51 | 52 | /* 53 | HouseClass(bedrooms:11, bathrooms:3, area_lot:4960) 54 | HouseClass(bedrooms:33, bathrooms:1.75, area_lot:6000) 55 | */ 56 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/ExploreOutlierClass_OpenCSV.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import com.opencsv.bean.* 17 | import groovy.transform.ToString 18 | 19 | @ToString(includeNames = true) 20 | class House { 21 | @CsvBindByName 22 | Integer bedrooms 23 | 24 | @CsvBindByName 25 | String bathrooms 26 | 27 | @CsvBindByName(column = 'sqft_lot') 28 | Integer area_lot 29 | } 30 | 31 | def full = getClass().classLoader.getResource('kc_house_data.csv').file as File 32 | 33 | def builder = new CsvToBeanBuilder(new FileReader(full)) 34 | def records = builder.withType(House).build().parse() 35 | 36 | records.findAll{ it.bedrooms > 10 }.each{ println it } 37 | 38 | /* 39 | House(bedrooms:11, bathrooms:3, area_lot:4960) 40 | House(bedrooms:33, bathrooms:1.75, area_lot:6000) 41 | */ 42 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/ExploreOutlierRecord_JacksonCsvMapper_JDK17.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | //import com.fasterxml.jackson.annotation.JsonCreator 18 | import com.fasterxml.jackson.annotation.JsonIncludeProperties 19 | //import com.fasterxml.jackson.annotation.JsonProperty 20 | import com.fasterxml.jackson.annotation.JsonPropertyOrder 21 | import com.fasterxml.jackson.dataformat.csv.CsvMapper 22 | import com.fasterxml.jackson.dataformat.csv.CsvSchema 23 | 24 | import static com.fasterxml.jackson.dataformat.csv.CsvParser.Feature.IGNORE_TRAILING_UNMAPPABLE 25 | 26 | @JsonPropertyOrder(['bedrooms', 'bathrooms', 'sqft_lot']) 27 | @JsonIncludeProperties(['bedrooms', 'bathrooms', 'sqft_lot']) 28 | //@JsonCreator 29 | //record HouseRecord( 30 | // @JsonProperty("bedrooms") Integer bedrooms, 31 | // @JsonProperty("bathrooms") String bathrooms, 32 | // @JsonProperty("sqft_lot") Integer area_lot) { } 33 | record HouseRecord(Integer bedrooms, String bathrooms, Integer sqft_lot/*, @JsonProperty("sqft_lot") Integer area_lot*/) { } 34 | 35 | def full = getClass().classLoader.getResource('kc_house_data.csv').file as File 36 | 37 | var schema = CsvSchema.builder() 38 | .addColumn("id") 39 | .addColumn("date") 40 | .addColumn("price") 41 | .addColumn("bedrooms") 42 | .addColumn("bathrooms") 43 | .addColumn("sqft_living") 44 | .addColumn("sqft_lot") 45 | .build() 46 | .withHeader() 47 | 48 | def mapper = new CsvMapper().configure(IGNORE_TRAILING_UNMAPPABLE, true) 49 | def records = mapper.readerFor(HouseRecord).with(schema).readValues(full).readAll() 50 | 51 | records.findAll{ it.bedrooms > 10 }.each{ println it } 52 | 53 | /* 54 | HouseRecord[bedrooms=11, bathrooms=3, sqft_lot=4960] 55 | HouseRecord[bedrooms=33, bathrooms=1.75, sqft_lot=6000] 56 | */ 57 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/ExploreOutlier_CommonsCSV.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 17 | 18 | //def file = 'kc_house_data.csv' as File 19 | def file = getClass().classLoader.getResource('kc_house_data.csv').file 20 | def records = CSV.withFirstRecordAsHeader().parse(new FileReader(file)) 21 | records.findAll{ it.bedrooms.toInteger() > 10 }.each{ println it.toMap() as TreeMap } 22 | 23 | /* 24 | [bathrooms:3, bedrooms:11, condition:3, date:20140821T000000, floors:2, grade:7, id:1773100755, lat:47.556, 25 | long:-122.363, price:520000, sqft_above:2400, sqft_basement:600, sqft_living:3000, sqft_living15:1420, 26 | sqft_lot:4960, sqft_lot15:4960, view:0, waterfront:0, yr_built:1918, yr_renovated:1999, zipcode:98106] 27 | [bathrooms:1.75, bedrooms:33, condition:5, date:20140625T000000, floors:1, grade:7, id:2402100895, lat:47.6878, 28 | long:-122.331, price:640000, sqft_above:1040, sqft_basement:580, sqft_living:1620, sqft_living15:1330, 29 | sqft_lot:6000, sqft_lot15:4700, view:0, waterfront:0, yr_built:1947, yr_renovated:0, zipcode:98103] 30 | */ 31 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/ExploreOutlier_JacksonCsvMapper.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import com.fasterxml.jackson.dataformat.csv.CsvMapper 17 | import com.fasterxml.jackson.dataformat.csv.CsvSchema 18 | 19 | import static com.fasterxml.jackson.dataformat.csv.CsvParser.Feature.IGNORE_TRAILING_UNMAPPABLE 20 | 21 | def data = getClass().classLoader.getResource('kc_house_data.csv').file as File 22 | 23 | var schema = CsvSchema.builder() 24 | .addColumn("id") 25 | .addColumn("date") 26 | .addColumn("price") 27 | .addColumn("bedrooms") 28 | .addColumn("bathrooms") 29 | .addColumn("sqft_living") 30 | .addColumn("sqft_lot") 31 | .build() 32 | .withHeader() 33 | 34 | def mapper = new CsvMapper().configure(IGNORE_TRAILING_UNMAPPABLE, true) 35 | def records = mapper.readerForMapOf(String).with(schema).readValues(data).readAll() 36 | 37 | records.findAll{ it.bedrooms.toInteger() > 10 }.each{ println it } 38 | 39 | /* 40 | [id:1773100755, date:20140821T000000, price:520000, bedrooms:11, bathrooms:3, sqft_living:3000, sqft_lot:4960] 41 | [id:2402100895, date:20140625T000000, price:640000, bedrooms:33, bathrooms:1.75, sqft_living:1620, sqft_lot:6000] 42 | */ 43 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/Explore_Tablesaw.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import tech.tablesaw.api.* 18 | import tech.tablesaw.plotly.api.* 19 | 20 | import static tech.tablesaw.aggregate.AggregateFunctions.* 21 | 22 | //def file = '/path/to/kc_house_data.csv' as File 23 | def file = getClass().classLoader.getResource('kc_house_data.csv').file 24 | def helper = new TablesawUtil(file) 25 | Table rows = Table.read().csv(file) 26 | 27 | println rows.shape() 28 | 29 | println rows.structure() 30 | 31 | println rows.column("bedrooms").summary().print() 32 | 33 | println rows.where(rows.column("bedrooms").isGreaterThan(10)) 34 | 35 | def cleaned = rows.dropWhere(rows.column("bedrooms").isGreaterThan(30)) 36 | println cleaned.shape() 37 | println cleaned.summarize("price", mean, min, max).by("bedrooms") 38 | 39 | helper.show(ScatterPlot.create("Price x bathrooms x grade", cleaned, "bathrooms", "price", 'grade'), 'PriceBathroomsGrade') 40 | 41 | cleaned.addColumns( 42 | StringColumn.create("waterfrontDesc", cleaned.column("waterfront").collect{ it ? 'waterfront' : 'interior' }), 43 | DoubleColumn.create("scaledGrade", cleaned.column("grade").collect{ it * 2 }), 44 | DoubleColumn.create("scaledPrice", cleaned.column("price").collect{ it / 100000 }) 45 | ) 46 | 47 | helper.show(BubblePlot.create("Price vs living area and grade (bubble size)", 48 | cleaned, "sqft_living", "price", "scaledGrade", "waterfrontDesc"), 'LivingPriceGradeWaterfront') 49 | 50 | helper.show(Scatter3DPlot.create("Grade, living space, bathrooms and price (bubble size)", 51 | cleaned, "sqft_living", "bathrooms", "grade", "scaledPrice", "waterfrontDesc"), 'LivingBathroomsGradePriceWaterfront') 52 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/HistogramPrice_JoineryCommonsMathXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | // joinery has some neat capabilities but has a restrictive GPL3 license 17 | import joinery.DataFrame 18 | import org.apache.commons.math4.legacy.distribution.EmpiricalDistribution 19 | import org.knowm.xchart.CategoryChartBuilder 20 | import org.knowm.xchart.Histogram 21 | import org.knowm.xchart.SwingWrapper 22 | 23 | def binCount = 50 24 | def is = getClass().classLoader.getResourceAsStream('kc_house_data.csv') 25 | def price = DataFrame.readCsv(is).select{ values -> values[3] < 30 }.retain("price") 26 | def dist = EmpiricalDistribution.from(binCount, price.toArray(double[])) 27 | def hist1 = new DataFrame("idx", "price") 28 | dist.binStats.withIndex().each { v, i -> hist1.append([i, v.n]) } 29 | hist1 = hist1.retain("price") 30 | hist1.plot(DataFrame.PlotType.BAR) 31 | 32 | // hist.plot use an older version of xchart under the covers 33 | // we can also use xchart directly (using new version shown) 34 | 35 | def hist2 = new Histogram(price.collect{ it[0] }, binCount) 36 | def chart = new CategoryChartBuilder().width(900).height(450) 37 | .title("Price Histogram").xAxisTitle("Price").yAxisTitle("Count").build() 38 | chart.addSeries("Price", hist2.xAxisData, hist2.yAxisData) 39 | chart.styler.with { 40 | XAxisLabelRotation = 90 41 | availableSpaceFill = 0.98 42 | XAxisMin = 0 43 | XAxisMax = 8_000_000 44 | } 45 | new SwingWrapper(chart).displayChart() 46 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/MultiRegressionOLS_CommonsMathXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.commons.math4.legacy.stat.regression.OLSMultipleLinearRegression 17 | import org.knowm.xchart.SwingWrapper 18 | import org.knowm.xchart.XYChartBuilder 19 | 20 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 21 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Line 22 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 23 | import static org.knowm.xchart.style.markers.SeriesMarkers.NONE 24 | 25 | def file = getClass().classLoader.getResource('kc_house_data.csv').file 26 | def csv = CSV.withFirstRecordAsHeader().parse(new FileReader(file)) 27 | def all = csv.toList() 28 | def price = all.collect{ it[2].toDouble() } 29 | def features = all.collect{ it.toList()[3..-1]*.toDouble() } 30 | def start = System.currentTimeMillis() 31 | def reg = new OLSMultipleLinearRegression() 32 | reg.newSampleData(price as double[], features as double[][]) 33 | def betas = reg.estimateRegressionParameters() 34 | def end = System.currentTimeMillis() 35 | println "Took ${end - start}ms" 36 | def predicted = features.collect{ row -> row.indices.collect{ i -> betas[i+1] * row[i] }.sum() + betas[0] } 37 | 38 | def chart = new XYChartBuilder().width(900).height(450).title("Actual vs predicted price").xAxisTitle("Actual").yAxisTitle("Predicted").build() 39 | chart.addSeries("Price", price as double[], predicted as double[]).with { 40 | XYSeriesRenderStyle = Scatter 41 | } 42 | def from = [price.min(), predicted.min()].min() 43 | def to = [price.max(), predicted.max()].max() 44 | chart.addSeries("Ideal", [from, to] as double[], [from, to] as double[]).with { 45 | marker = NONE 46 | XYSeriesRenderStyle = Line 47 | } 48 | new SwingWrapper(chart).displayChart() 49 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/MultiRegressionOLS_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.data.DataFrame 17 | import smile.data.formula.Formula 18 | import smile.io.Read 19 | import smile.plot.swing.LinePlot 20 | import smile.plot.swing.ScatterPlot 21 | import smile.regression.OLS 22 | 23 | import static java.awt.Color.BLUE 24 | import static java.awt.Color.RED 25 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 26 | import static smile.plot.swing.Line.Style.DASH 27 | 28 | var file = getClass().classLoader.getResource('kc_house_data.csv').file as File 29 | var table = Read.csv(file.toPath(), CSV.withFirstRecordAsHeader()) 30 | table = table.drop(0, 1) // remove 'id' and 'date' 31 | var filtered = table.toList().findAll { it.apply('bedrooms') <= 30 } 32 | table = DataFrame.of(filtered) 33 | 34 | var price = table.column('price').toDoubleArray() 35 | var model = OLS.fit(Formula.lhs('price'), table) 36 | var predicted = model.predict(table) 37 | double[][] data = [price, predicted].transpose() 38 | 39 | var from = [price.toList().min(), predicted.min()].min() 40 | var to = [price.toList().max(), predicted.max()].max() 41 | var pts = [[from, from], [to, to]] 42 | var ideal = LinePlot.of(pts as double[][], DASH, RED) 43 | 44 | ScatterPlot.of(data, BLUE).canvas().with { 45 | title = 'Actual vs predicted price' 46 | setAxisLabels('Actual', 'Predicted') 47 | add(ideal) 48 | window() 49 | } 50 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/MultiRegressionOLS_WekaXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.knowm.xchart.SwingWrapper 17 | import org.knowm.xchart.XYChartBuilder 18 | import weka.classifiers.functions.LinearRegression 19 | import weka.core.converters.CSVLoader 20 | import weka.filters.Filter 21 | import weka.filters.unsupervised.attribute.Remove 22 | 23 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Line 24 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 25 | import static org.knowm.xchart.style.markers.SeriesMarkers.NONE 26 | 27 | def file = getClass().classLoader.getResource('kc_house_data.csv').file as File 28 | 29 | def loader = new CSVLoader(file: file) 30 | def model = new LinearRegression() 31 | def allInstances = loader.dataSet 32 | def priceIndex = 2 33 | allInstances.classIndex = priceIndex 34 | // remove "id" and "date" columns 35 | def rm = new Remove(attributeIndices: '1,2', inputFormat: allInstances) 36 | def instances = Filter.useFilter(allInstances, rm) 37 | model.buildClassifier(instances) 38 | println model 39 | 40 | def actual = instances.collect{ it.value(0).toDouble() } 41 | def predicted = instances.collect{ model.classifyInstance(it) } 42 | 43 | def chart = new XYChartBuilder().width(900).height(450).title("Actual vs predicted price").xAxisTitle("Actual").yAxisTitle("Predicted").build() 44 | chart.addSeries("Price", actual as double[], predicted as double[]).with { 45 | XYSeriesRenderStyle = Scatter 46 | } 47 | def from = [actual.min(), predicted.min()].min() 48 | def to = [actual.max(), predicted.max()].min() 49 | chart.addSeries("Ideal", [from, to] as double[], [from, to] as double[]).with { 50 | marker = NONE 51 | XYSeriesRenderStyle = Line 52 | } 53 | new SwingWrapper(chart).displayChart() 54 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/MultiRegressionSGD_WekaXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.knowm.xchart.SwingWrapper 17 | import org.knowm.xchart.XYChartBuilder 18 | import weka.classifiers.functions.SGD 19 | import weka.core.converters.CSVLoader 20 | import weka.filters.Filter 21 | import weka.filters.unsupervised.attribute.Remove 22 | 23 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Line 24 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 25 | import static org.knowm.xchart.style.markers.SeriesMarkers.NONE 26 | 27 | def file = getClass().classLoader.getResource('kc_house_data.csv').file as File 28 | 29 | def loader = new CSVLoader(file: file) 30 | def model = new SGD() 31 | model.options = ['-F', '4', '-N'] as String[] // Huber loss, unscaled 32 | def allInstances = loader.dataSet 33 | def priceIndex = 2 34 | allInstances.classIndex = priceIndex 35 | // remove "id", "date", 'zip', 'lat', 'long' columns 36 | def rm = new Remove(attributeIndices: '1,2,17,18,19', inputFormat: allInstances) 37 | def instances = Filter.useFilter(allInstances, rm) 38 | model.buildClassifier(instances) 39 | println model 40 | 41 | def actual = instances.collect{ it.value(0).toDouble() } 42 | def predicted = instances.collect{ model.classifyInstance(it) } 43 | 44 | def chart = new XYChartBuilder().width(900).height(450).title("Actual vs predicted price").xAxisTitle("Actual").yAxisTitle("Predicted").build() 45 | chart.addSeries("Price", actual as double[], predicted as double[]).with { 46 | XYSeriesRenderStyle = Scatter 47 | } 48 | def from = [actual.min(), predicted.min()].min() 49 | def to = [actual.max(), predicted.max()].min() 50 | chart.addSeries("Ideal", [from, to] as double[], [from, to] as double[]).with { 51 | marker = NONE 52 | XYSeriesRenderStyle = Line 53 | } 54 | new SwingWrapper(chart).displayChart() 55 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/SplitData.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | def full = getClass().classLoader.getResource('kc_house_data.csv').file as File 17 | def parent = full.parentFile 18 | def lines = full.readLines() 19 | println lines.size() 20 | 21 | def (trainLines, testLines) = lines.chop(lines.size() * 0.8 as int, -1) 22 | 23 | def train = new File(parent, 'house_train.csv') 24 | train.text = trainLines.join('\n') 25 | println train.readLines().size() 26 | 27 | def test = new File(parent, 'house_test.csv') 28 | test.delete() 29 | test << lines[0] << '\n' << testLines.join('\n') 30 | println test.readLines().size() 31 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/groovy/TablesawUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import tech.tablesaw.plotly.Plot 18 | import tech.tablesaw.plotly.components.Figure 19 | 20 | class TablesawUtil { 21 | private File parent 22 | 23 | /** 24 | * Creates the plot files in a suitable temporary location 25 | * determined from the parent of the passed file - typically 26 | * a build folder or IDE temporary folder. 27 | * 28 | * @param filename Of a file in a suitable temporary directory 29 | */ 30 | TablesawUtil(String filename) { 31 | parent = new File(filename).parentFile 32 | } 33 | 34 | def show(Figure figure, String filename) { 35 | def file = new File(parent, filename + '.html') 36 | try { 37 | Plot.show(figure, file) 38 | } catch(ex) { 39 | println "Unable to show file '$file' due to '$ex.message'" 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /subprojects/HousePrices/src/main/resources/kc_house_data_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.kaggle.com/harlfoxem/housesalesprediction/data 2 | 3 | See also: 4 | https://nicolai92.github.io/posts/pentaho-weka-prediction 5 | -------------------------------------------------------------------------------- /subprojects/HousePricesBeam/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | 18 | sourceCompatibility = 1.8 19 | 20 | dependencies { 21 | implementation "org.apache.beam:beam-sdks-java-core:$beamVersion" 22 | implementation "org.apache.beam:beam-runners-direct-java:$beamVersion" 23 | implementation "org.slf4j:slf4j-api:$slf4jVersion" 24 | implementation "org.apache.groovy:groovy:$groovy4Version" 25 | implementation("com.github.haifengl:smile-core:$smileVersion") { 26 | transitive = false 27 | } 28 | implementation("com.github.haifengl:smile-base:$smileVersion") { 29 | transitive = false 30 | } 31 | implementation "org.apache.commons:commons-csv:$commonsCsvVersion" 32 | implementation "org.apache.commons:commons-math4-legacy:$commonsMath4Version" 33 | runtimeOnly "org.slf4j:slf4j-jdk14:$slf4jVersion" 34 | runtimeOnly "org.bytedeco:openblas-platform:$openblasPlatformVersion" 35 | } 36 | 37 | FileUtil.baseNames(sourceSets.main.allSource.files).each { name -> 38 | if (name.startsWith('House')) { 39 | tasks.register("run$name", JavaExec) { 40 | dependsOn compileGroovy 41 | group 'Application' 42 | description "Run ${name}.groovy as a JVM application/Groovy script" 43 | classpath = sourceSets.main.runtimeClasspath 44 | mainClass = name 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /subprojects/HousePricesBeam/src/main/groovy/AggregateModelStats.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.beam.sdk.transforms.SerializableFunction 17 | 18 | import static java.lang.Math.sqrt 19 | 20 | class AggregateModelStats implements SerializableFunction, double[]> { 21 | @Override 22 | double[] apply(Iterable input) { 23 | double[] sum = null 24 | for (double[] next : input) { 25 | if (sum == null) { 26 | sum = new double[next.size()] 27 | (0.. { 23 | private PCollectionView model; 24 | private Closure clos; 25 | 26 | public EvaluateModel(PCollectionView model, Closure clos) { 27 | this.model = model; 28 | this.clos = clos; 29 | } 30 | 31 | @DoFn.ProcessElement 32 | public void processElement(@DoFn.Element double[][] chunk, DoFn.OutputReceiver out, ProcessContext c) throws IOException { 33 | out.output(clos.call(chunk, c.sideInput(model))); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /subprojects/HousePricesBeam/src/main/groovy/Log.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.beam.sdk.transforms.DoFn; 17 | import org.apache.beam.sdk.transforms.PTransform; 18 | import org.apache.beam.sdk.transforms.ParDo; 19 | import org.apache.beam.sdk.values.PCollection; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | class Log { 24 | private static final Logger LOGGER = LoggerFactory.getLogger(Log.class); 25 | private Log() { } 26 | 27 | static PTransform, PCollection> ofElements() { 28 | return new LoggingTransform<>() 29 | } 30 | 31 | private static class LoggingTransform extends PTransform, PCollection> { 32 | @Override 33 | PCollection expand(PCollection input) { 34 | return input.apply(ParDo.of(new DoFn() { 35 | @ProcessElement 36 | void processElement(@Element T element, OutputReceiver out) { 37 | LOGGER.info(element.toString()) 38 | out.output(element) 39 | } 40 | })); 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /subprojects/HousePricesBeam/src/main/groovy/MeanDoubleArrayCols.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.beam.sdk.transforms.SerializableFunction 17 | 18 | class MeanDoubleArrayCols implements SerializableFunction, double[]> { 19 | @Override 20 | double[] apply(Iterable input) { 21 | double[] sum = null 22 | def count = 0 23 | for (double[] next : input) { 24 | if (sum == null) { 25 | sum = new double[next.size()] 26 | (0.. 40 | def subtask = tasks.register("run$name", JavaExec) { 41 | dependsOn compileGroovy 42 | group 'Application' 43 | description "Run ${name}.groovy as a JVM application/Groovy script" 44 | classpath = sourceSets.main.runtimeClasspath 45 | mainClass = name 46 | } 47 | runall.configure { 48 | dependsOn subtask 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /subprojects/HousePricesGPars/src/main/groovy/HelloActors.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import static groovyx.gpars.actor.Actors.actor 17 | 18 | def decryptor = actor { 19 | loop { 20 | react { String message -> 21 | reply message.reverse() 22 | } 23 | } 24 | } 25 | 26 | def console = actor { 27 | decryptor << 'lellarap si yvoorG' 28 | react { 29 | println 'Decrypted message: ' + it 30 | } 31 | } 32 | 33 | console.join() 34 | -------------------------------------------------------------------------------- /subprojects/HousePricesGPars/src/main/groovy/HelloAgent.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import groovyx.gpars.agent.Agent 17 | 18 | def agent = new Agent([]) 19 | agent {it << 'Dave'} 20 | agent {it << 'Joe'} 21 | assert agent.val.size() == 2 22 | -------------------------------------------------------------------------------- /subprojects/HousePricesGPars/src/main/groovy/HelloDataflow.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import groovyx.gpars.dataflow.Dataflows 17 | import static groovyx.gpars.dataflow.Dataflow.task 18 | 19 | def df = new Dataflows() 20 | 21 | task { 22 | df.z = df.x + df.y 23 | } 24 | 25 | task { 26 | df.x = 10 27 | } 28 | 29 | task { 30 | df.y = 5 31 | } 32 | 33 | assert df.z == 15 34 | -------------------------------------------------------------------------------- /subprojects/HousePricesGPars/src/main/groovy/HelloParallelCollectionProcessing.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import groovyx.gpars.GParsPool 17 | 18 | GParsPool.withPool { 19 | def result = [1, 2, 3, 4, 5].collectParallel{it * 2} 20 | assert result == [2, 4, 6, 8, 10] 21 | } 22 | -------------------------------------------------------------------------------- /subprojects/HousePricesGPars/src/main/groovy/HelloStm.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import static groovyx.gpars.stm.GParsStm.* 17 | import static org.multiverse.api.StmUtils.newTxnInteger 18 | 19 | class Account { 20 | private amount = newTxnInteger(0) 21 | 22 | void transfer(final int a) { 23 | atomic { 24 | amount.increment(a) 25 | } 26 | } 27 | 28 | int getCurrentAmount() { 29 | atomicWithInt { 30 | amount.get() 31 | } 32 | } 33 | } 34 | 35 | def a = new Account() 36 | a.transfer(10) 37 | a.transfer(15) 38 | assert a.currentAmount == 25 39 | -------------------------------------------------------------------------------- /subprojects/HousePricesGPars/src/main/resources/kc_house_data_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.kaggle.com/harlfoxem/housesalesprediction/data 2 | 3 | See also: 4 | https://nicolai92.github.io/posts/pentaho-weka-prediction 5 | -------------------------------------------------------------------------------- /subprojects/HousePricesGroovyFX/src/main/groovy/HistogramBedrooms_CommonsMathGroovyFX.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.commons.math4.legacy.distribution.EmpiricalDistribution 17 | import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics 18 | 19 | import static groovyx.javafx.GroovyFX.start 20 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 21 | 22 | def full = getClass().classLoader.getResource('kc_house_data.csv').file 23 | def csv = CSV.withFirstRecordAsHeader().parse(new FileReader(full)) 24 | def all = csv.collect { it.bedrooms.toInteger() }.findAll{ it < 30 } 25 | 26 | def stats = new SummaryStatistics() 27 | all.each{ stats.addValue(it as double) } 28 | println stats.summary 29 | 30 | def dist = EmpiricalDistribution.from(all.max(), all as double[]) 31 | def bins = dist.binStats.withIndex().collectMany { v, i -> [i.toString(), v.n] } 32 | 33 | start { 34 | stage(title: 'Number of bedrooms histogram', show: true, width: 800, height: 600) { 35 | scene { 36 | barChart(title: 'Bedroom count', barGap: 0, categoryGap: 2) { 37 | series(name: 'Number of properties', data: bins) 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /subprojects/HousePricesGroovyFX/src/main/groovy/HistogramPrice_CommonsMathGroovyFX.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import org.apache.commons.math4.legacy.distribution.EmpiricalDistribution 18 | import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics 19 | 20 | import static groovyx.javafx.GroovyFX.start 21 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 22 | 23 | def file = getClass().classLoader.getResource('kc_house_data.csv').file 24 | def csv = CSV.withFirstRecordAsHeader().parse(new FileReader(file)) 25 | def all = csv.findAll { it.bedrooms.toInteger() < 30 }.collect { it.price.toDouble() } 26 | def info = new SummaryStatistics(); all.each(info::addValue) 27 | def head = "Price percentile (min=\$$info.min, mean=\$${info.mean as int}, max=\$$info.max)" 28 | def dist = EmpiricalDistribution.from(100, all as double[]) 29 | def bins = dist.binStats.withIndex().collectMany { v, i -> [i.toString(), v.n] } 30 | //println info 31 | 32 | start { 33 | stage(title: 'Price histogram', show: true, width: 800, height: 600) { 34 | scene { 35 | barChart(title: head, barGap: 0, categoryGap: 0) { 36 | series(name: 'Number of properties', data: bins) 37 | } 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /subprojects/HousePricesGroovyFX/src/main/groovy/SimpleRegressionOLS_CommonsMathGroovyFX.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.commons.math4.legacy.stat.regression.SimpleRegression 17 | import static groovyx.javafx.GroovyFX.start 18 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 19 | 20 | def feature = 'bedrooms' 21 | def nonOutliers = feature == 'bedrooms' ? { it[0] < 30 } : { true } 22 | def file = getClass().classLoader.getResource('kc_house_data.csv').file 23 | def csv = CSV.withFirstRecordAsHeader().parse(new FileReader(file)) 24 | def all = csv.collect { [it[feature].toDouble(), it.price.toDouble()] }.findAll(nonOutliers) 25 | def reg = new SimpleRegression().tap{ addData(all as double[][]) } 26 | def (min, max) = all.transpose().with{ [it[0].min(), it[0].max()] } 27 | def predicted = [[min, reg.predict(min)], [max, reg.predict(max)]] 28 | 29 | start { 30 | stage(title: "Price vs $feature", show: true, width: 800, height: 600) { 31 | scene { 32 | // NOTE using css trick to allow multiple chart types 33 | // TODO consider using JavaFXMultiChart 34 | // scatterChart(opacity: 50) { 35 | // series(name: 'Actual', data: all) 36 | // } 37 | // lineChart { 38 | lineChart(stylesheets: resource('/style.css')) { 39 | series(name: 'Actual', data: all) 40 | series(name: 'Predicted', data: predicted) 41 | } 42 | } 43 | } 44 | } 45 | /* */ 46 | -------------------------------------------------------------------------------- /subprojects/HousePricesGroovyFX/src/main/resources/kc_house_data_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.kaggle.com/harlfoxem/housesalesprediction/data 2 | 3 | See also: 4 | https://nicolai92.github.io/posts/pentaho-weka-prediction 5 | -------------------------------------------------------------------------------- /subprojects/HousePricesGroovyFX/src/main/resources/style.css: -------------------------------------------------------------------------------- 1 | /** 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | .default-color0.chart-series-line { 17 | -fx-stroke: transparent; 18 | } 19 | 20 | .default-color1.chart-series-line { /* blue */ 21 | -fx-stroke: #0181e2; 22 | } 23 | 24 | .default-color1.chart-line-symbol { /* blue cross */ 25 | -fx-background-color: #0181e2; 26 | -fx-background-radius: 0; 27 | -fx-background-insets: 0; 28 | -fx-shape: "M2,0 L5,4 L8,0 L10,0 L10,2 L6,5 L10,8 L10,10 L8,10 L5,6 L2,10 L0,10 L0,8 L4,5 L0,2 L0,0 Z"; 29 | } 30 | -------------------------------------------------------------------------------- /subprojects/HousePricesIgnite/README.md: -------------------------------------------------------------------------------- 1 | # House price prediction with Apache Ignite 2 | 3 | This project looks at scaling up the [HousePrices](../HousePrices/) 4 | project using [Apache Ignite](https://ignite.apache.org/). 5 | 6 | house 7 | 8 | ## Ignite overview 9 | 10 | [Apache Ignite](https://ignite.apache.org/) is a "distributed database" for high-performance computing with in-memory speed. 11 | 12 | ## Implementation overview 13 | 14 | Linear regression is a common algorithm used for training a model which can then be used for prediction. It is described further in the 15 | main [HousePrices](../HousePrices#linear-regression) project. 16 | Ignite supports a machine learning library `ML` which includes a clustered regression implementation (we used `LSQRTrainer`). 17 | 18 | ### Running the examples 19 | 20 | Groovy code examples can be found in the [src/main/groovy](src/main/groovy) directory. 21 | 22 | You have several options for running the programs (see more details from the main [README](../../README.md#running-the-examples) in the root project): 23 | 24 | * If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 25 | 26 | * From the command line, invoke the application using gradlew (use `./gradlew` on unix-like systems) with the run command.\ 27 | `gradlew :HousePricesIgnite:run` 28 | 29 | * If the example has @Grab statements commented out at the top, you can cut and paste the examples into the groovyConsole 30 | and uncomment the grab statements. Make sure to cut and paste any helper classes too if appropriate. 31 | 32 | ### Requirements 33 | 34 | It has been tested on JDK8, JDK11 and JDK17. 35 | -------------------------------------------------------------------------------- /subprojects/HousePricesIgnite/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | apply plugin: 'application' 18 | 19 | ext.appName = 'HousePricesIgnite' 20 | 21 | application { 22 | mainClass = appName 23 | if (JavaVersion.current().isCompatibleWith(JavaVersion.VERSION_17)) { 24 | applicationDefaultJvmArgs = [ 25 | '--add-opens=java.base/java.io=ALL-UNNAMED', 26 | '--add-opens=java.base/java.lang=ALL-UNNAMED', 27 | '--add-opens=java.base/java.nio=ALL-UNNAMED', 28 | '--add-opens=java.base/java.util=ALL-UNNAMED' 29 | ] 30 | } 31 | } 32 | 33 | tasks.named('run').configure { 34 | description = "Run $appName as a JVM application/Groovy script" 35 | } 36 | 37 | dependencies { 38 | implementation "org.apache.groovy:groovy:$groovy4Version" 39 | implementation "org.apache.ignite:ignite-core:$igniteVersion" 40 | implementation "org.apache.ignite:ignite-ml:$igniteMlVersion" 41 | runtimeOnly "org.apache.ignite:ignite-spring:$igniteVersion" 42 | runtimeOnly "org.slf4j:slf4j-simple:$slf4jVersion" 43 | implementation "tech.tablesaw:tablesaw-core:$tablesawVersion" 44 | implementation "com.google.guava:guava:32.1.3-jre" // non-vulnerable dependency tablesaw-core 45 | } 46 | 47 | tasks.register('copyToLib', Copy) { 48 | into layout.buildDirectory.dir('lib') 49 | from configurations.runtimeClasspath 50 | } 51 | 52 | tasks.register('versionInfo') { 53 | doLast { 54 | File javaHome = new File(System.getProperty('java.home')) 55 | logger.lifecycle "Using Java from $javaHome (version ${System.getProperty('java.version')})" 56 | } 57 | } 58 | 59 | run.dependsOn versionInfo 60 | -------------------------------------------------------------------------------- /subprojects/HousePricesIgnite/src/main/resources/kc_house_data_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.kaggle.com/harlfoxem/housesalesprediction/ 2 | -------------------------------------------------------------------------------- /subprojects/HousePricesSpark/README.md: -------------------------------------------------------------------------------- 1 | # House price prediction with Apache Spark 2 | 3 | This project looks at scaling up the [HousePrices](../HousePrices/) 4 | project using [Apache Spark](https://spark.apache.org/). 5 | 6 | house 7 | 8 | ## Spark overview 9 | 10 | [Apache Spark™](https://spark.apache.org/) is a multi-language engine for 11 | executing data engineering, data science, and machine learning on single-node 12 | machines or clusters. 13 | 14 | ## Implementation overview 15 | 16 | Linear regression is a common algorithm used for training a model which can then be used for prediction. It is described further in the 17 | main [HousePrices](../HousePrices#linear-regression) project. 18 | Spark supports a machine learning library `MLlib` which includes a 19 | scalable implementation, `LinearRegression`. 20 | 21 | ### Running the examples 22 | 23 | Groovy code examples can be found in the [src/main/groovy](src/main/groovy) directory. 24 | 25 | You have several options for running the programs (see more details from the main [README](../../README.md#running-the-examples) in the root project): 26 | 27 | * If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 28 | 29 | * From the command line, invoke the application using gradlew (use `./gradlew` on unix-like systems) with the run command.\ 30 | `gradlew :HousePricesSpark:run` 31 | 32 | * If the example has @Grab statements commented out at the top, you can cut and paste the examples into the groovyConsole 33 | and uncomment the grab statements. Make sure to cut and paste any helper classes too if appropriate. 34 | 35 | ### Requirements 36 | 37 | It has been tested on JDK8 and JDK11. The current Spark versions are not compatible with JDK17. 38 | -------------------------------------------------------------------------------- /subprojects/HousePricesSpark/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | apply plugin: 'application' 18 | 19 | ext { 20 | appName = 'HousePricesSpark' 21 | sparkVariant = '2.13' 22 | sparkVersion = '3.5.1' 23 | } 24 | 25 | application { 26 | mainClass = appName 27 | } 28 | 29 | tasks.named('run').configure { 30 | description = "Run $appName as a JVM application/Groovy script" 31 | } 32 | 33 | dependencies { 34 | implementation "org.apache.groovy:groovy:$groovy4Version" 35 | implementation "org.apache.spark:spark-sql_$sparkVariant:$sparkVersion" 36 | implementation "org.apache.avro:avro:1.11.3" // non-vulnerable dependency spark-sql 37 | implementation "org.apache.avro:avro-mapred:1.11.3" // non-vulnerable dependency spark-sql 38 | implementation "org.apache.spark:spark-mllib_$sparkVariant:$sparkVersion" 39 | implementation "com.fasterxml.jackson:jackson-bom:$jacksonVersion" 40 | runtimeOnly "org.apache.spark:spark-core_$sparkVariant:$sparkVersion" 41 | } 42 | 43 | tasks.register('copyToLib', Copy) { 44 | into layout.buildDirectory.dir('deps') 45 | from configurations.runtimeClasspath 46 | } 47 | 48 | tasks.register('versionInfo') { 49 | doLast { 50 | File javaHome = new File(System.getProperty('java.home')) 51 | logger.lifecycle "Using Java from $javaHome (version ${JavaVersion.current()})" 52 | assert JavaVersion.current().isJava8() || JavaVersion.current().isJava11(), "JDK8 or JDK11 is required but found ${JavaVersion.current()}!" 53 | } 54 | } 55 | 56 | run.dependsOn versionInfo 57 | -------------------------------------------------------------------------------- /subprojects/HousePricesSpark/src/main/resources/kc_house_data_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.kaggle.com/harlfoxem/housesalesprediction/ 2 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/InstallWekaPackages.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import weka.core.WekaPackageManager 17 | 18 | def pkgMgr = new WekaPackageManager() 19 | pkgMgr.startupCheck(false, System.out) 20 | 21 | def packages = ['wekaDeeplearning4j', 'SelfOrganizingMap'] 22 | packages.each { pkgName -> 23 | def pkg = pkgMgr.getRepositoryPackageInfo(pkgName) 24 | if (pkg.installed) println "$pkgName already installed" 25 | else { 26 | pkg.install() 27 | println "Installing $pkgName ($pkg.packageMetaData.Category):" 28 | println "Title: $pkg.packageMetaData.Title" 29 | println "Description: $pkg.packageMetaData.Description" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/J48Tree_WekaXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.knowm.xchart.SwingWrapper 17 | import org.knowm.xchart.XYChartBuilder 18 | import weka.classifiers.trees.J48 19 | import weka.core.converters.CSVLoader 20 | 21 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 22 | 23 | def file = getClass().classLoader.getResource('iris_data.csv').file as File 24 | 25 | def species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] 26 | def loader = new CSVLoader(file: file) 27 | def model = new J48() 28 | def allInstances = loader.dataSet 29 | allInstances.classIndex = 4 30 | model.buildClassifier(allInstances) 31 | println model 32 | 33 | double[] actual = allInstances.collect{ it.value(4) } 34 | double[] predicted = allInstances.collect{ model.classifyInstance(it) } 35 | double[] petalW = allInstances.collect{ it.value(2) } 36 | double[] petalL = allInstances.collect{ it.value(3) } 37 | def indices = actual.indices 38 | 39 | def chart = new XYChartBuilder().width(900).height(450). 40 | title("Species").xAxisTitle("Petal length").yAxisTitle("Petal width").build() 41 | species.eachWithIndex{ String name, int i -> 42 | def groups = indices.findAll{ predicted[it] == i }.groupBy{ actual[it] == i } 43 | Collection found = groups[true] ?: [] 44 | Collection errors = groups[false] ?: [] 45 | println "$name: ${found.size()} correct, ${errors.size()} incorrect" 46 | chart.addSeries("$name correct", petalW[found], petalL[found]).with { 47 | XYSeriesRenderStyle = Scatter 48 | } 49 | if (errors) { 50 | chart.addSeries("$name incorrect", petalW[errors], petalL[errors]).with { 51 | XYSeriesRenderStyle = Scatter 52 | } 53 | } 54 | } 55 | new SwingWrapper(chart).displayChart() 56 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/LogisticRegression_WekaXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.knowm.xchart.SwingWrapper 17 | import org.knowm.xchart.XYChartBuilder 18 | import weka.classifiers.functions.SimpleLogistic 19 | import weka.core.converters.CSVLoader 20 | 21 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 22 | 23 | def file = getClass().classLoader.getResource('iris_data.csv').file as File 24 | 25 | def species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] 26 | def loader = new CSVLoader(file: file) 27 | def model = new SimpleLogistic() 28 | def allInstances = loader.dataSet 29 | allInstances.classIndex = 4 30 | model.buildClassifier(allInstances) 31 | 32 | double[] actual = allInstances.collect{ it.value(4) } 33 | double[] predicted = allInstances.collect{ model.classifyInstance(it) } 34 | double[] petalW = allInstances.collect{ it.value(2) } 35 | double[] petalL = allInstances.collect{ it.value(3) } 36 | def indices = actual.indices 37 | 38 | def chart = new XYChartBuilder().width(900).height(450). 39 | title("Species").xAxisTitle("Petal length").yAxisTitle("Petal width").build() 40 | species.eachWithIndex{ String name, int i -> 41 | def groups = indices.findAll{ predicted[it] == i }.groupBy{ actual[it] == i } 42 | Collection found = groups[true] ?: [] 43 | Collection errors = groups[false] ?: [] 44 | println "$name: ${found.size()} correct, ${errors.size()} incorrect" 45 | chart.addSeries("$name correct", petalW[found], petalL[found]).with { 46 | XYSeriesRenderStyle = Scatter 47 | } 48 | if (errors) { 49 | chart.addSeries("$name incorrect", petalW[errors], petalL[errors]).with { 50 | XYSeriesRenderStyle = Scatter 51 | } 52 | } 53 | } 54 | new SwingWrapper(chart).displayChart() 55 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/MLP_Weka.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import weka.classifiers.AbstractClassifier 18 | import weka.classifiers.Evaluation 19 | import weka.core.Instances 20 | import weka.core.Utils 21 | import weka.core.WekaPackageManager 22 | import weka.core.converters.CSVLoader 23 | 24 | WekaPackageManager.loadPackages(true) 25 | 26 | def file = getClass().classLoader.getResource('iris_data.csv').file as File 27 | def loader = new CSVLoader(file: file) 28 | def data = loader.dataSet 29 | data.classIndex = 4 30 | 31 | def options = Utils.splitOptions("-S 1 -numEpochs 10 -layer \"weka.dl4j.layers.OutputLayer -activation weka.dl4j.activations.ActivationSoftmax -lossFn weka.dl4j.lossfunctions.LossMCXENT\"") 32 | AbstractClassifier myClassifier = Utils.forName(AbstractClassifier, "weka.classifiers.functions.Dl4jMlpClassifier", options) 33 | 34 | // Stratify and split 35 | Random rand = new Random(0) 36 | Instances randData = new Instances(data) 37 | randData.randomize(rand) 38 | randData.stratify(3) 39 | Instances train = randData.trainCV(3, 0) 40 | Instances test = randData.testCV(3, 0) 41 | 42 | // Build the classifier on the training data 43 | myClassifier.buildClassifier(train) 44 | 45 | // Evaluate the model on test data 46 | Evaluation eval = new Evaluation(test) 47 | eval.evaluateModel(myClassifier, test) 48 | 49 | println eval.toSummaryString() 50 | println eval.toMatrixString() 51 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/NaiveBayes_WekaXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.knowm.xchart.SwingWrapper 17 | import org.knowm.xchart.XYChartBuilder 18 | import weka.classifiers.bayes.NaiveBayes 19 | import weka.core.converters.CSVLoader 20 | 21 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 22 | 23 | def file = getClass().classLoader.getResource('iris_data.csv').file as File 24 | 25 | def species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] 26 | def loader = new CSVLoader(file: file) 27 | def model = new NaiveBayes() 28 | def allInstances = loader.dataSet 29 | allInstances.classIndex = 4 30 | model.buildClassifier(allInstances) 31 | 32 | double[] actual = allInstances.collect{ it.value(4) } 33 | double[] predicted = allInstances.collect{ model.classifyInstance(it) } 34 | double[] petalW = allInstances.collect{ it.value(2) } 35 | double[] petalL = allInstances.collect{ it.value(3) } 36 | def indices = actual.indices 37 | 38 | def chart = new XYChartBuilder().width(900).height(450). 39 | title("Species").xAxisTitle("Petal length").yAxisTitle("Petal width").build() 40 | species.eachWithIndex{ String name, int i -> 41 | def groups = indices.findAll{ predicted[it] == i }.groupBy{ actual[it] == i } 42 | Collection found = groups[true] ?: [] 43 | Collection errors = groups[false] ?: [] 44 | println "$name: ${found.size()} correct, ${errors.size()} incorrect" 45 | chart.addSeries("$name correct", petalW[found], petalL[found]).with { 46 | XYSeriesRenderStyle = Scatter 47 | } 48 | if (errors) { 49 | chart.addSeries("$name incorrect", petalW[errors], petalL[errors]).with { 50 | XYSeriesRenderStyle = Scatter 51 | } 52 | } 53 | } 54 | new SwingWrapper(chart).displayChart() 55 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/SOM_WekaXChart.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.knowm.xchart.SwingWrapper 17 | import org.knowm.xchart.XYChartBuilder 18 | import weka.core.Instances 19 | import weka.core.converters.CSVLoader 20 | 21 | import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter 22 | 23 | def file = getClass().classLoader.getResource('iris_data.csv').file as File 24 | 25 | def loader = new CSVLoader(file: file) 26 | def somClass 27 | try { 28 | somClass = Class.forName("weka.clusterers.SelfOrganizingMap") 29 | } catch(ex) { 30 | println "Optional Weka package 'SelfOrganizingMap' not found: $ex.message" 31 | } 32 | 33 | def model = somClass.getConstructor().newInstance() 34 | def allInstances = loader.dataSet 35 | model.buildClusterer(allInstances) 36 | println model 37 | 38 | def chart = new XYChartBuilder().width(900).height(450). 39 | title("Species").xAxisTitle("Petal length").yAxisTitle("Petal width").build() 40 | model.clusterInstances.eachWithIndex { Instances instances, int i -> 41 | println "Cluster $i:\n" + instances.join('\n') 42 | chart.addSeries("Cluster $i", instances.attributeToDoubleArray(0), instances.attributeToDoubleArray(1)).with { 43 | XYSeriesRenderStyle = Scatter 44 | } 45 | } 46 | new SwingWrapper(chart).displayChart() 47 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/groovy/TablesawUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import tech.tablesaw.plotly.Plot 18 | import tech.tablesaw.plotly.components.Figure 19 | 20 | class TablesawUtil { 21 | private File parent 22 | 23 | /** 24 | * Creates the plot files in a suitable temporary location 25 | * determined from the parent of the passed file - typically 26 | * a build folder or IDE temporary folder. 27 | * 28 | * @param filename Of a file in a suitable temporary directory 29 | */ 30 | TablesawUtil(String filename) { 31 | parent = new File(filename).parentFile 32 | } 33 | 34 | def show(Figure figure, String filename) { 35 | def file = new File(parent, filename + '.html') 36 | try { 37 | Plot.show(figure, file) 38 | } catch(ex) { 39 | println "Unable to show file '$file' due to '$ex.message'" 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /subprojects/Iris/src/main/resources/iris_data_source.txt: -------------------------------------------------------------------------------- 1 | https://archive.ics.uci.edu/ml/machine-learning-databases/iris/ -------------------------------------------------------------------------------- /subprojects/IrisGraalVM/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'groovy' 3 | } 4 | 5 | dependencies { 6 | implementation "org.apache.groovy:groovy:$groovy4Version" 7 | implementation("com.deepnetts:deepnetts-core:1.13.2") { 8 | exclude(group: 'org.apache.logging.log4j', module: 'log4j-core') 9 | exclude(group: 'org.apache.commons', module: 'commons-lang3') 10 | exclude(group: 'org.apache.commons', module: 'commons-configuration2') 11 | exclude(group: 'org.json') 12 | } 13 | runtimeOnly "org.apache.logging.log4j:log4j-to-slf4j:$log4j2Version" 14 | runtimeOnly "org.slf4j:slf4j-simple:$slf4jVersion" 15 | } 16 | 17 | sourceSets { 18 | main { 19 | groovy { 20 | srcDirs = ['.'] 21 | } 22 | } 23 | } 24 | 25 | tasks.register("copyDependenciesToLib", Copy) { 26 | into "$buildDir/lib" 27 | from project.configurations.runtimeClasspath.files 28 | doLast { 29 | println "copyDependenciesToLib:\n ${project.configurations.runtimeClasspath.files*.absolutePath.join('\n ')}\n ->\n $buildDir/lib" 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectLanguageWithFail.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('org.apache.opennlp:opennlp-tools:2.0.0') // or 1.9.4 on JDK8 17 | import opennlp.tools.langdetect.* 18 | 19 | u = "https://dlcdn.apache.org/opennlp/models/langdetect/1.8.3/langdetect-183.bin" 20 | d = new LanguageDetectorME(new LanguageDetectorModel(new URL(u))) 21 | a = 'Bienvenue à Paris' 22 | b = 'Velkommen til København' 23 | 24 | // Expected to fail (French != Danish) to illustrate Groovy's power assert feature 25 | assert d.predictLanguage(a).lang == d.predictLanguage(b).lang 26 | 27 | /* 28 | 29 | Assertion failed: 30 | 31 | assert d.predictLanguage(a).lang == d.predictLanguage(b).lang 32 | | | | | | | | | | 33 | | | | 'fra'| | | | 'dan' 34 | | | | | | | 'Velkommen til København' 35 | | | | | | dan (0.024727160814654276) 36 | | | | | opennlp.tools.langdetect.LanguageDetectorME@184497d1 37 | | | | false 38 | | | 'Bienvenue à Paris' 39 | | fra (0.018630393459062138) 40 | opennlp.tools.langdetect.LanguageDetectorME@184497d1 41 | 42 | */ 43 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectLanguage_OpenNLP.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('org.apache.opennlp:opennlp-tools:2.0.0') // or 1.9.4 on JDK8 17 | import opennlp.tools.langdetect.* 18 | 19 | // use a helper to cache models 20 | def helper = new ResourceHelper('https://dlcdn.apache.org/opennlp/models/langdetect/1.8.3/') 21 | def model = new LanguageDetectorModel(helper.load('langdetect-183')) 22 | def detector = new LanguageDetectorME(model) 23 | 24 | [ 25 | spa: 'Bienvenido a Madrid', 26 | fra: 'Bienvenue à Paris', 27 | dan: 'Velkommen til København', 28 | bul: 'Добре дошли в София' 29 | ].each { k, v -> 30 | assert detector.predictLanguage(v).lang == k 31 | } 32 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectPOS_NLP4j.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import nlp4j.impl.DefaultDocument 17 | import nlp4j.krmj.annotator.KuromojiAnnotator 18 | import nlp4j.stanford.StanfordPosAnnotator 19 | 20 | var doc = new DefaultDocument() 21 | doc.putAttribute('text', 'I eat sushi with chopsticks.') 22 | var ann = new StanfordPosAnnotator() 23 | ann.setProperty('target', 'text') 24 | ann.annotate(doc) 25 | println doc.keywords.collect{ k -> "${k.facet - 'word.'}(${k.str})" }.join(' ') 26 | 27 | doc = new DefaultDocument() 28 | doc.putAttribute('text', '私は学校に行きました。') 29 | ann = new KuromojiAnnotator() 30 | ann.setProperty('target', 'text') 31 | ann.annotate(doc) 32 | println doc.keywords.collect{ k -> "${k.facet}(${k.str})" }.join(' ') 33 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectPOS_OpenNLP_JDK11.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('org.apache.opennlp:opennlp-tools:2.0.0') 17 | import opennlp.tools.postag.* 18 | import opennlp.tools.tokenize.SimpleTokenizer 19 | 20 | def sentences = [ 21 | 'Paul has two sisters, Maree and Christine.', 22 | 'No wise fish would go anywhere without a porpoise', 23 | 'His bark was much worse than his bite', 24 | 'Turn on the lights to the main bedroom', 25 | "Light 'em all up", 26 | 'Make it dark downstairs' 27 | ] 28 | 29 | def tokenizer = SimpleTokenizer.INSTANCE 30 | sentences.each { 31 | String[] tokens = tokenizer.tokenize(it) 32 | def posTagger = new POSTaggerME('en') 33 | String[] tags = posTagger.tag(tokens) 34 | println tokens.indices.collect{tags[it] == tokens[it] ? tags[it] : "${tags[it]}(${tokens[it]})" }.join(' ') 35 | } 36 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectPOS_OpenNLP_JDK8only.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('org.apache.opennlp:opennlp-tools:1.9.4') 17 | import opennlp.tools.postag.* 18 | import opennlp.tools.tokenize.SimpleTokenizer 19 | 20 | // use a helper to cache models 21 | def helper = new ResourceHelper('http://opennlp.sourceforge.net/models-1.5') 22 | def sentences = [ 23 | 'Paul has two sisters, Maree and Christine.', 24 | 'No wise fish would go anywhere without a porpoise', 25 | 'His bark was much worse than his bite', 26 | 'Turn on the lights to the main bedroom', 27 | "Light 'em all up", 28 | 'Make it dark downstairs' 29 | ] 30 | def tokenizer = SimpleTokenizer.INSTANCE 31 | sentences.each { 32 | String[] tokens = tokenizer.tokenize(it) 33 | def model = new POSModel(helper.load('en-pos-maxent')) 34 | def posTagger = new POSTaggerME(model) 35 | String[] tags = posTagger.tag(tokens) 36 | println tokens.indices.collect{tags[it] == tokens[it] ? tags[it] : "${tags[it]}(${tokens[it]})" }.join(' ') 37 | } 38 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectPOS_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.nlp.pos.HMMPOSTagger 17 | import smile.nlp.tokenizer.SimpleTokenizer 18 | 19 | def sentences = [ 20 | 'Paul has two sisters, Maree and Christine.', 21 | 'No wise fish would go anywhere without a porpoise', 22 | 'His bark was much worse than his bite', 23 | 'Turn on the lights to the main bedroom', 24 | "Light 'em all up", 25 | 'Make it dark downstairs' 26 | ] 27 | def tokenizer = new SimpleTokenizer(true) 28 | sentences.each { 29 | def tokens = Arrays.stream(tokenizer.split(it)).toArray(String[]::new) 30 | def tags = HMMPOSTagger.default.tag(tokens)*.toString() 31 | println tokens.indices.collect{tags[it] == tokens[it] ? tags[it] : "${tags[it]}(${tokens[it]})" }.join(' ') 32 | } 33 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectPOS_TikaOpenNLP.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('org.apache.opennlp:opennlp-tools:2.0.0') // or 1.9.4 on JDK8 17 | //@Grab('org.apache.tika:tika-parsers-standard-package:2.4.1') 18 | import opennlp.tools.postag.* 19 | import opennlp.tools.tokenize.SimpleTokenizer 20 | import org.apache.tika.metadata.Metadata 21 | import org.apache.tika.parser.ParseContext 22 | import org.apache.tika.parser.pdf.PDFParser 23 | import org.apache.tika.sax.BodyContentHandler 24 | 25 | // use a helper to cache models 26 | def helper = new ResourceHelper('https://opennlp.sourceforge.net/models-1.5') 27 | def tokenizer = SimpleTokenizer.INSTANCE 28 | 29 | def pdf = getClass().classLoader.getResource("PartsOfSpeech.pdf").file as File 30 | pdf.withInputStream {is -> 31 | def handler = new BodyContentHandler() 32 | def metadata = new Metadata() 33 | def context = new ParseContext() 34 | def parser = new PDFParser() 35 | parser.parse(is, handler, metadata, context) 36 | 37 | // extract and tag content 38 | def sentences = handler.toString().readLines().grep() 39 | sentences.each { 40 | String[] tokens = tokenizer.tokenize(it) 41 | def model = new POSModel(helper.load('en-pos-maxent')) 42 | def posTagger = new POSTaggerME(model) 43 | String[] tags = posTagger.tag(tokens) 44 | println tokens.indices.collect{tags[it] == tokens[it] ? tags[it] : "${tags[it]}(${tokens[it]})" }.join(' ') 45 | } 46 | 47 | // extract some metadata 48 | def metadataOfInterest = ['dc:creator', 'Content-Type', 'pdf:encrypted', 'dc:title'] 49 | def expectedMetadataValues = ['Paul King', 'application/pdf', 'false', 'POS.txt'] 50 | assert metadataOfInterest.every{ it in metadata.names() } 51 | assert metadataOfInterest.collect{metadata.get(it) } == expectedMetadataValues 52 | } 53 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectSentences_OpenNLP.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('org.apache.opennlp:opennlp-tools:2.0.0') 17 | 18 | import groovy.test.GroovyAssert 19 | import opennlp.tools.sentdetect.* 20 | 21 | def text = ''' 22 | The most referenced scientific paper of all time is "Protein measurement with the 23 | Folin phenol reagent" by Lowry, O. H., Rosebrough, N. J., Farr, A. L. & Randall, 24 | R. J. and was published in the J. BioChem. in 1951. It describes a method for 25 | measuring the amount of protein (even as small as 0.2 γ, were γ is the specific 26 | weight) in solutions and has been cited over 300,000 times and can be found here: 27 | https://www.jbc.org/content/193/1/265.full.pdf. Dr. Lowry completed 28 | two doctoral degrees under an M.D.-Ph.D. program from the University of Chicago 29 | before moving to Harvard under A. Baird Hastings. He was also the H.O.D of 30 | Pharmacology at Washington University in St. Louis for 29 years. 31 | ''' 32 | 33 | // use a helper to cache models 34 | def helper = new ResourceHelper('https://opennlp.sourceforge.net/models-1.5') 35 | 36 | def model = new SentenceModel(helper.load('en-sent')) 37 | def detector = new SentenceDetectorME(model) 38 | def sentences = detector.sentDetect(text) 39 | assert text.count('.') == 28 40 | assert sentences.size() == 4 41 | println "Found ${sentences.size()} sentences:\n" + sentences.join('\n\n') 42 | 43 | if (GroovyAssert.isAtLeastJdk('11.0')) { 44 | // also try the built-in model 45 | detector = new SentenceDetectorME('en') 46 | sentences = detector.sentDetect(text) 47 | println "\nThe built-in model found ${sentences.size()} sentences:\n" + sentences.join('\n\n') 48 | } 49 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectSentences_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import smile.nlp.normalizer.SimpleNormalizer 18 | import smile.nlp.tokenizer.SimpleSentenceSplitter 19 | 20 | def text = ''' 21 | The most referenced scientific paper of all time is "Protein measurement with the 22 | Folin phenol reagent" by Lowry, O. H., Rosebrough, N. J., Farr, A. L. & Randall, 23 | R. J. and was published in the J. BioChem. in 1951. It describes a method for 24 | measuring the amount of protein (even as small as 0.2 γ, were γ is the specific 25 | weight) in solutions and has been cited over 300,000 times and can be found here: 26 | https://www.jbc.org/content/193/1/265.full.pdf. Dr. Lowry completed 27 | two doctoral degrees under an M.D.-Ph.D. program from the University of Chicago 28 | before moving to Harvard under A. Baird Hastings. He was also the H.O.D of 29 | Pharmacology at Washington University in St. Louis for 29 years. 30 | ''' 31 | 32 | def sentences = SimpleSentenceSplitter.instance.split(SimpleNormalizer.instance.normalize(text.trim())) 33 | assert text.count('.') == 28 34 | assert sentences.size() == 6 // 2 cases detected by OpenNLP not handled here 35 | println "Found ${sentences.size()} sentences:\n" + sentences.join('\n\n') 36 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectTriplesAnnotation_CoreNLP.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import edu.stanford.nlp.pipeline.Annotation 18 | import edu.stanford.nlp.pipeline.StanfordCoreNLP 19 | 20 | import static edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation 21 | import static edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation 22 | import static edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation 23 | import static edu.stanford.nlp.semgraph.SemanticGraph.OutputFormat.LIST 24 | import static edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation 25 | 26 | def text = ''' 27 | Paul has two sisters, Maree and Christine. 28 | No wise fish would go anywhere without a porpoise. 29 | His bark was much worse than his bite. 30 | The Groovy in Action book is a bargain at $50, or indeed any price. 31 | The conference wrapped up yesterday at 5:30 p.m. in Copenhagen, Denmark. 32 | I saw Ms. May Smith waving to June Jones. 33 | The parcel was passed from May to June. 34 | ''' 35 | 36 | def props = [annotators: 'tokenize,ssplit,pos,lemma,depparse,natlog,openie'] as Properties 37 | def pipeline = new StanfordCoreNLP(props) 38 | 39 | def doc = new Annotation(text) 40 | pipeline.annotate(doc) 41 | 42 | int sentNo = 0 43 | for (sentence in doc.get(SentencesAnnotation)) { 44 | println "\nSentence #${++sentNo}: ${sentence.get(TextAnnotation)}" 45 | 46 | println sentence.get(EnhancedDependenciesAnnotation).toString(LIST) 47 | 48 | def triples = sentence.get(RelationTriplesAnnotation) 49 | if (triples) { 50 | println 'Triples:' 51 | for (triple in triples) { 52 | triple.with { 53 | println "$confidence\t${subjectGloss()}\t${relationGloss()}\t${objectGloss()}" 54 | } 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectTriplesPOS_CoreNLP.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import edu.stanford.nlp.simple.Document 18 | 19 | def text = ''' 20 | Paul has two sisters, Maree and Christine. 21 | No wise fish would go anywhere without a porpoise. 22 | His bark was much worse than his bite. 23 | The Groovy in Action book is a bargain at $50, or indeed any price. 24 | The conference wrapped up yesterday at 5:30 p.m. in Copenhagen, Denmark. 25 | I saw Ms. May Smith waving to June Jones. 26 | The parcel was passed from May to June. 27 | ''' 28 | 29 | def doc = new Document(text) 30 | for (sent in doc.sentences()) { 31 | print '\nPOS:\n\t' 32 | def words = sent.words() 33 | sent.posTags().eachWithIndex { String entry, int i -> 34 | print "$entry(${words[i]}) " 35 | } 36 | println('\nTriples:') 37 | for (triple in sent.openieTriples()) { 38 | triple.with { 39 | println "\t$confidence\t${subjectGloss()}\t${relationGloss()}\t${objectGloss()}" 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/DetectTriples_MinIE.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import de.uni_mannheim.minie.MinIE 18 | import de.uni_mannheim.utils.coreNLP.CoreNLPUtils 19 | 20 | def sentences = [ 21 | 'Paul has two sisters, Maree and Christine.', 22 | 'No wise fish would go anywhere without a porpoise', 23 | 'His bark was much worse than his bite', 24 | 'The Groovy in Action book is a bargain at $50, or indeed any price.', 25 | 'The conference wrapped up yesterday at 5:30 p.m. in Copenhagen, Denmark.', 26 | 'I saw Ms. May Smith waving to June Jones.', 27 | 'The parcel was passed from May to June.' 28 | ] 29 | 30 | def parser = CoreNLPUtils.StanfordDepNNParser() 31 | sentences.each { sentence -> 32 | def minie = new MinIE(sentence, parser, MinIE.Mode.SAFE) 33 | 34 | println "\nInput sentence: $sentence" 35 | println '=============================' 36 | println 'Extractions:' 37 | for (ap in minie.propositions) { 38 | println "\tTriple: $ap.tripleAsString" 39 | def attr = ap.attribution.attributionPhrase ? ap.attribution.toStringCompact() : 'NONE' 40 | println "\tFactuality: $ap.factualityAsString\tAttribution: $attr" 41 | println '\t----------' 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/ResourceHelper.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import java.nio.channels.Channels 18 | import java.nio.channels.ReadableByteChannel 19 | 20 | class ResourceHelper { 21 | private File parent // place to store file 22 | private String urlPrefix 23 | 24 | /** 25 | * Helper class to save downloading bin models over the net each time. 26 | * 27 | * @param urlPrefix Where to download resource from 28 | */ 29 | ResourceHelper(String urlPrefix) { 30 | this.urlPrefix = urlPrefix 31 | // place them alongside wherever build tool/IDE placed dummy resource 32 | parent = new File(getClass().classLoader.getResource('dummy.txt').file).parentFile 33 | } 34 | 35 | File load(String suffix) { 36 | def file = new File(parent, suffix + '.bin') 37 | if (!file.exists()) { 38 | URL url = new URL("$urlPrefix/${suffix}.bin") 39 | println "Downloading $suffix" 40 | ReadableByteChannel readableByteChannel = Channels.newChannel(url.openStream()) 41 | FileOutputStream fileOutputStream = new FileOutputStream(file) 42 | fileOutputStream.channel.transferFrom(readableByteChannel, 0, Long.MAX_VALUE) 43 | } 44 | file 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/groovy/SentimentAnalysis_CoreNLP.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import edu.stanford.nlp.simple.Document 17 | 18 | def doc = new Document(''' 19 | StanfordNLP is fantastic! 20 | Groovy is great fun! 21 | Math can be hard! 22 | ''') 23 | for (sent in doc.sentences()) { 24 | println "${sent.toString().padRight(40)} ${sent.sentiment()}" 25 | } 26 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/OutputTransforms.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import javax.swing.JLabel 17 | 18 | transforms << { result -> 19 | if (result.getClass().name.endsWith('List')) { 20 | return new JLabel(pretty(result)) 21 | } 22 | } 23 | 24 | def pretty(lines) { 25 | binding.nextColor = 0 26 | binding.colors = [:].withDefault{ 27 | def COLORS = [ 28 | '#0088FF', '#2B5F19', '#DF401C', '#A4772B', '#C54AA8', 29 | '#895C9F', '#5B6AA4', '#5B6633', '#FC5F00', '#561B06', 30 | '#32CD32', '#0000CD', '#CD853F', '#8B4513', '#57411B'] 31 | result = COLORS[binding.nextColor] 32 | binding.nextColor = (binding.nextColor + 1) % COLORS.size() 33 | result 34 | } 35 | """ 36 |
40 | ${lines.collect{line -> prettyLine(line)}.join('\n')} 41 |
42 | """ 43 | } 44 | 45 | def prettyLine(line) { 46 | def result = '' 47 | def PAT = ~/([^(]+)?(\b[\p{IsHan}\w][\p{IsHan}\w$]*)\(([^)]*)\)\s*/ 48 | def m = line =~ PAT 49 | (0..
54 | $b
55 | $a
/$ 56 | } 57 | if (!m.matches()) result += "
" 58 | result + '
${line.substring(m.last)}
' 59 | } 60 | 61 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/PartsOfSpeech.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/PartsOfSpeech.pdf -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/dummy.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/dummy.txt -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/rt-polarity-source.txt: -------------------------------------------------------------------------------- 1 | Source: https://github.com/datumbox/NaiveBayesClassifier/tree/master/resources/datasets 2 | Original source: http://www.cs.cornell.edu/people/pabo/movie-review-data/ -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/rt-polarity.neg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/rt-polarity.neg -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/training.language.en.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/training.language.en.txt -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/training.language.es.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/training.language.es.txt -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/training.language.fr.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/training.language.fr.txt -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/training.language.id.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/LanguageProcessing/src/main/resources/training.language.id.txt -------------------------------------------------------------------------------- /subprojects/LanguageProcessing/src/main/resources/training.language.source.txt: -------------------------------------------------------------------------------- 1 | de: https://github.com/datumbox/NaiveBayesClassifier/tree/master/resources/datasets/training.language.de.txt 2 | en,es,fr,id: derived from https://www.kaggle.com/zarajamshaid/language-identification-datasst -------------------------------------------------------------------------------- /subprojects/LanguageProcessingDjl/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Language processing with DJL and TensorFlow 18 | 19 | Neural networks with numerous layers of nodes allow for more complex, rich and _deeper_ processing and understanding. 20 | This example uses a universal sentence encoder model from TensorFlow Hub. 21 | It uses a pre-trained model and the 22 | [Deep Java Library](https://djl.ai/) backed by the 23 | [TensorFlow](https://www.tensorflow.org/) engine. 24 | 25 | ![MXNet.groovy](../../docs/images/textsimularityheatmap.png) 26 | 27 | Groovy code examples can be found in the [src/main/groovy](src/main/groovy) subdirectory. 28 | If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 29 | 30 | __Requirements__: The code has been tested on JDK8, JDK11 and JDK17. 31 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessingDjl/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.tools.ant.taskdefs.condition.Os 17 | 18 | apply plugin: 'groovy' 19 | apply plugin: 'application' 20 | 21 | repositories { 22 | mavenCentral() 23 | } 24 | 25 | ext.appName = 'UniversalSentenceEncoder' 26 | 27 | application { 28 | mainClass = appName 29 | } 30 | 31 | tasks.named('run').configure { 32 | description = "Run $appName as a JVM application/Groovy script" 33 | } 34 | 35 | ext.sv = JavaVersion.current().isCompatibleWith(JavaVersion.VERSION_17) ? smileVersion : smilePreviousVersion 36 | 37 | dependencies { 38 | implementation "ai.djl:api:$djlVersion" 39 | implementation "org.apache.groovy:groovy:$groovy4Version" 40 | implementation("com.github.haifengl:smile-core:$sv") { 41 | transitive = false 42 | } 43 | implementation("com.github.haifengl:smile-base:$sv") { 44 | transitive = false 45 | } 46 | implementation("com.github.haifengl:smile-plot:$sv") { 47 | transitive = false 48 | } 49 | runtimeOnly "ai.djl.tensorflow:tensorflow-engine:$djlVersion" 50 | runtimeOnly "ai.djl.tensorflow:tensorflow-model-zoo:$djlVersion" 51 | //runtimeOnly "ai.djl.tensorflow:tensorflow-native-auto:2.10.1" 52 | if (Os.isFamily(Os.FAMILY_WINDOWS)) { 53 | runtimeOnly "ai.djl.tensorflow:tensorflow-native-cpu:2.10.1:win-x86_64" 54 | } else if (Os.isFamily(Os.FAMILY_MAC)) { 55 | runtimeOnly "ai.djl.tensorflow:tensorflow-native-cpu:2.10.1:osx-x86_64" 56 | } else if (Os.isFamily(Os.FAMILY_UNIX)) { 57 | runtimeOnly "ai.djl.tensorflow:tensorflow-native-cpu:2.10.1:linux-x86_64" 58 | } 59 | runtimeOnly "org.slf4j:slf4j-jdk14:$slf4jVersion" 60 | // used by smile-plot 61 | runtimeOnly 'org.swinglabs:swingx:1.6.1' 62 | } 63 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessingNLPCraft/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | plugins { 17 | // avoids "The filename or extension is too long" issue on windows 18 | id "com.github.ManifestClasspath" version "0.1.0-RELEASE" 19 | } 20 | apply plugin: 'groovy' 21 | apply plugin: 'application' 22 | 23 | mainClassName = 'Lights' 24 | 25 | repositories { 26 | // mavenLocal() 27 | mavenCentral() 28 | } 29 | 30 | dependencies { 31 | // Using Groovy 3 because model is compiled with Groovy 3 and calls super 32 | implementation "org.codehaus.groovy:groovy:$groovy3Version" 33 | implementation "org.codehaus.groovy:groovy-ant:$groovy3Version" 34 | // only required for LightSwitchKotlinModel 35 | implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.5.32' 36 | 37 | implementation "org.apache.nlpcraft:nlpcraft:$nlpcraftVersion" 38 | implementation "org.apache.nlpcraft:nlpcraft-example-lightswitch:$nlpcraftVersion" 39 | 40 | // Alternative: instead of above two lines 41 | // If you have trouble running the example in the normal way, 42 | // first consider running on the command-line using Gradle. 43 | // If that fails consider downloading the nlpcraft "all" jar to the lib dir. 44 | // Comment out the above nlpcraft line and uncomment the next line instead. 45 | // implementation fileTree(dir: 'lib', include: '*.jar') 46 | } 47 | 48 | tasks.register('checkCompatibility') { 49 | doLast { 50 | assert JavaVersion.current().isJava11(), "JDK11 is required but found ${JavaVersion.current()}!" 51 | } 52 | } 53 | 54 | run.dependsOn('checkCompatibility') 55 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessingSparkNLP/README.md: -------------------------------------------------------------------------------- 1 | # Language processing with Spark NLP 2 | -------------------------------------------------------------------------------- /subprojects/LanguageProcessingSparkNLP/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | apply plugin: 'application' 18 | 19 | repositories { 20 | mavenCentral() 21 | // mavenLocal() 22 | } 23 | 24 | ext { 25 | appName = 'DetectEntities_SparkNLP' 26 | sparkVariant = '2.12' 27 | sparkVersion = '3.4.0' 28 | } 29 | 30 | application { 31 | mainClass = appName 32 | } 33 | 34 | tasks.named('run').configure { 35 | description = "Run $appName as a JVM application/Groovy script" 36 | } 37 | 38 | dependencies { 39 | implementation "org.apache.groovy:groovy:$groovy4Version" 40 | implementation("org.apache.spark:spark-mllib_$sparkVariant:$sparkVersion") { 41 | exclude(group: 'org.scala-lang.modules', module: 'scala-parser-combinators_2.12') 42 | exclude(group: 'org.apache.spark', module: 'spark-graphx_2.12') 43 | exclude(group: 'com.fasterxml.jackson', module: 'jackson-bom') 44 | } 45 | implementation('com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.3') { 46 | exclude(group: 'com.github.universal-automata', module: 'liblevenshtein') 47 | exclude(group: 'com.navigamez', module: 'greex') 48 | } 49 | implementation "com.fasterxml.jackson:jackson-bom:$jacksonVersion" 50 | runtimeOnly "org.apache.spark:spark-core_$sparkVariant:$sparkVersion" 51 | } 52 | 53 | tasks.register('versionInfo') { 54 | doLast { 55 | File javaHome = new File(System.getProperty('java.home')) 56 | logger.lifecycle "Using Java from $javaHome (version ${System.getProperty('java.version')})" 57 | assert JavaVersion.current().isJava8() || JavaVersion.current().isJava11(), "JDK8 or JDK11 is required but found ${JavaVersion.current()}!" 58 | } 59 | } 60 | 61 | run.dependsOn versionInfo 62 | -------------------------------------------------------------------------------- /subprojects/Mnist/src/main/groovy/GroovyFXUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import groovy.transform.CompileStatic 17 | import javafx.embed.swing.SwingFXUtils 18 | import javafx.scene.canvas.Canvas 19 | import javafx.scene.canvas.GraphicsContext 20 | import javafx.scene.image.Image 21 | import javafx.scene.image.PixelFormat 22 | import javafx.scene.paint.Color 23 | 24 | import javax.imageio.ImageIO 25 | 26 | @CompileStatic 27 | class GroovyFXUtil { 28 | private GroovyFXUtil() {} 29 | 30 | static clear(GraphicsContext g, double size) { 31 | g.fill = Color.WHITE; g.fillRect(0, 0, size, size); g.fill = Color.BLACK 32 | } 33 | 34 | static int[] imageToArray(Image img) { 35 | def (w, h) = [img.width as int, img.height as int] 36 | int[] buf = new int[h * w] 37 | img.pixelReader.getPixels(0, 0, w, h, PixelFormat.intArgbInstance, buf, 0, w) 38 | buf 39 | } 40 | 41 | static Image snapshot(Canvas canvas) { 42 | def baos = new ByteArrayOutputStream() 43 | ImageIO.write(SwingFXUtils.fromFXImage(canvas.snapshot(null, null), null), "png", baos) 44 | new Image(new ByteArrayInputStream(baos.toByteArray()), 28, 28, true, true) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /subprojects/Mnist/src/main/groovy/Gui.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import static GroovyFXUtil.* 17 | import static MnistInfer.* 18 | import static Util.displayResult 19 | import static Util.maxIndex 20 | import static groovyx.javafx.GroovyFX.start 21 | import static javafx.scene.paint.Color.WHITE 22 | 23 | def size = 280 24 | def pen = 12 25 | load(getClass()) 26 | 27 | start { 28 | stage(title: 'MNIST', visible: true) { 29 | scene(id: "scene", fill: WHITE) { 30 | borderPane { 31 | top { 32 | canvas(id: "canvas", width: size, height: size) 33 | def g = canvas.graphicsContext2D 34 | clear(g, size) 35 | canvas.onMouseDragged { e -> g.fillOval e.x - pen, e.y - pen, pen * 2, pen * 2 } 36 | } 37 | center { 38 | hbox(alignment: 'Center') { 39 | button('Clear', onAction: { 40 | clear(canvas.graphicsContext2D, size) 41 | out.text = displayResult([null] * 10, null) 42 | }) 43 | button('Predict', onAction: { 44 | def result = query(normalize(imageToArray(snapshot(canvas)))) 45 | def predictLabel = maxIndex(result) 46 | out.text = displayResult(result.data.collect{ it[0] }, predictLabel) 47 | }) 48 | } 49 | } 50 | bottom { 51 | textArea(displayResult([null] * 10, null), id: 'out', editable: false, prefColumnCount: 16) 52 | } 53 | } 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /subprojects/Mnist/src/main/groovy/MnistInfer.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import groovy.transform.CompileStatic 17 | import org.apache.commons.math3.linear.MatrixUtils 18 | import org.apache.commons.math3.linear.RealMatrix 19 | 20 | import static Util.scalarSigmoid 21 | import static org.apache.commons.math3.linear.MatrixUtils.createColumnRealMatrix 22 | 23 | // inspired from https://github.com/ralscha/blog/tree/master/mnist 24 | @CompileStatic 25 | class MnistInfer { 26 | private static RealMatrix inputHidden 27 | private static RealMatrix hiddenOutput 28 | 29 | static void load(Class klass) { 30 | def weights = klass.classLoader.getResourceAsStream('weights') 31 | try (ObjectInputStream ois = new ObjectInputStream(weights)) { 32 | inputHidden = MatrixUtils.createRealMatrix((double[][]) ois.readObject()) 33 | hiddenOutput = MatrixUtils.createRealMatrix((double[][]) ois.readObject()) 34 | } 35 | } 36 | 37 | static RealMatrix query(double[] inputArray) { 38 | RealMatrix inputs = createColumnRealMatrix(inputArray) 39 | RealMatrix hiddenInputs = inputHidden * inputs 40 | RealMatrix hiddenOutputs = scalarSigmoid(hiddenInputs) 41 | RealMatrix finalInputs = hiddenOutput * hiddenOutputs 42 | return scalarSigmoid(finalInputs) 43 | } 44 | 45 | static double[] normalize(int[] img) { 46 | double[] result = new double[img.length] 47 | for (i in 0..> 16) & 0xff 49 | int green = (img[i] >> 8) & 0xff 50 | int blue = img[i] & 0xff 51 | result[i] = 1 - ((red + green + blue) / 765.0 * 999 + 1) / 1000 52 | } 53 | return result 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /subprojects/Mnist/src/main/groovy/MnistReader.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import java.nio.ByteBuffer 17 | import java.nio.file.Files 18 | import java.nio.file.Path 19 | import java.util.zip.GZIPInputStream 20 | 21 | static int[] getLabels(Path labelsFile) { 22 | ByteBuffer bb = ByteBuffer.wrap(decompress(labelsFile.bytes)) 23 | if (bb.int != 2049) throw new IOException("not a labels file") 24 | int numLabels = bb.int 25 | (0.. getImages(Path imagesFile) { 29 | ByteBuffer bb = ByteBuffer.wrap(decompress(Files.readAllBytes(imagesFile))) 30 | if (bb.int != 2051) throw new IOException("not an images file") 31 | 32 | int numImages = bb.int 33 | int numRows = bb.int 34 | int numColumns = bb.int 35 | List images = (0.. 0) { 55 | out.write(buf, 0, n) 56 | } 57 | return out.toByteArray() 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /subprojects/Mnist/src/main/groovy/Mnist_Tribuo.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import org.tribuo.MutableDataset 18 | import org.tribuo.classification.LabelFactory 19 | import org.tribuo.classification.evaluation.LabelEvaluator 20 | import org.tribuo.classification.sgd.linear.LinearSGDTrainer 21 | import org.tribuo.classification.sgd.objectives.LogMulticlass 22 | import org.tribuo.datasource.IDXDataSource 23 | import org.tribuo.math.optimisers.AdaGrad 24 | 25 | import java.nio.file.Paths 26 | 27 | def labelFactory = new LabelFactory() 28 | def trainSource = new IDXDataSource(Paths.get("/path/to/train-images-idx3-ubyte.gz"), 29 | Paths.get("/path/to/train-labels-idx1-ubyte.gz"), labelFactory) 30 | def train = new MutableDataset(trainSource) 31 | def trainer = new LinearSGDTrainer(new LogMulticlass(), new AdaGrad(0.5), 5, 42) 32 | 33 | println '\nTraining ...' 34 | def model = trainer.train(train) 35 | 36 | def testSource = new IDXDataSource<>(Paths.get("/path/to/t10k-images-idx3-ubyte.gz"), 37 | Paths.get("/path/to/t10k-labels-idx1-ubyte.gz"), labelFactory) 38 | println '\nTesting ...' 39 | def predictions = model.predict(testSource) 40 | 41 | def evaluator = new LabelEvaluator() 42 | def evaluation = evaluator.evaluate(model, predictions, testSource.provenance) 43 | println "\nEvaluation:\n$evaluation" 44 | -------------------------------------------------------------------------------- /subprojects/Mnist/src/main/resources/weights: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulk-asert/groovy-data-science/1f1e0134fd6e1bed5a4644ecf8816adbf973b635/subprojects/Mnist/src/main/resources/weights -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/ExploreStrength_CommonsCSV.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 17 | 18 | def file = getClass().classLoader.getResource('whiskey.csv').file 19 | def parser = CSV.builder().setHeader().setSkipHeaderRecord(true).build() 20 | def records = parser.parse(new FileReader(file)) 21 | def rows = records.collectEntries{ row -> [row[1], row.toList()[2..13]] } 22 | def zeros = rows.collectEntries{ e -> [e.key, e.value.count{ it == '0' }] } 23 | def fours = rows.collectEntries{ e -> [e.key, e.value.count{ it == '4' }] } 24 | def maxZeros = zeros.values().toSet().max() 25 | def maxFours = fours.values().toSet().max() 26 | println "Distinctively flavored (most zeros): ${zeros.findAll{ e -> e.value == maxZeros }*.key.join(', ')}" 27 | println "Powerfully flavored (most fours): ${fours.findAll{ e -> e.value == maxFours }*.key.join(', ')}" 28 | /* 29 | Distinctively flavored (most zeros): Glenfiddich 30 | Powerfully flavored (most fours): Ardbeg, Lagavulin, Laphroig 31 | */ 32 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/GMeansPca_TablesawSmile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.clustering.GMeans 17 | import smile.feature.extraction.PCA 18 | import tech.tablesaw.api.* 19 | import tech.tablesaw.plotly.api.* 20 | 21 | def file = getClass().classLoader.getResource('whiskey.csv').file 22 | def helper = new TablesawUtil(file) 23 | def rows = Table.read().csv(file) 24 | 25 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 26 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 27 | def data = rows.as().doubleMatrix(*cols) 28 | 29 | def dims = 4 // can be 2, 3 or 4 30 | def pca = PCA.fit(data).getProjection(dims) 31 | def projected = pca.apply(data) 32 | def adj = [1, 1, 1, 5] // adjustment to make graph pretty 33 | def kmax = 10 34 | def clusters = GMeans.fit(data, kmax) 35 | def labels = clusters.y.collect { "Cluster " + (it + 1) } 36 | rows = rows.addColumns( 37 | *(0.. 38 | DoubleColumn.create("PCA${idx+1}", (0.. 23 | def pkg = pkgMgr.getRepositoryPackageInfo(pkgName) 24 | if (pkg.installed) println "$pkgName already installed" 25 | else { 26 | pkg.install() 27 | println "Installing $pkgName ($pkg.packageMetaData.Category):" 28 | println "Title: $pkg.packageMetaData.Title" 29 | println "Description: $pkg.packageMetaData.Description" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/KMeansPcaBeer_TablesawSmile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.clustering.KMeans 17 | import smile.data.DataFrame 18 | import smile.feature.transform.Standardizer 19 | import smile.feature.extraction.PCA 20 | import tech.tablesaw.api.* 21 | import tech.tablesaw.plotly.api.* 22 | 23 | def file = getClass().classLoader.getResource('beer.csv').file 24 | def helper = new TablesawUtil(file) 25 | def rows = Table.read().csv(file) 26 | 27 | String[] cols = ['alcohol', 'calories', 'sodium', 'cost'] 28 | def df = DataFrame.of(rows.as().doubleMatrix(*cols), cols) 29 | def scaler = Standardizer.fit(df) 30 | def data = scaler.apply(df).toArray() 31 | 32 | def dims = 4 // can be 2, 3 or 4 33 | def pca = PCA.fit(data).getProjection(dims) 34 | def projected = pca.apply(data) 35 | def adj = [1, 1, 1, 5] // scaling factor to make graph pretty 36 | def clusters = KMeans.fit(data, 3) 37 | println clusters 38 | def labels = clusters.y.collect { 'Cluster ' + (it + 1) } 39 | rows = rows.addColumns( 40 | *(0.. 41 | DoubleColumn.create("PCA${idx+1}", (0.. 36 | DoubleColumn.create("PCA${idx+1}", (0.. 46 | toAdd[0].setString('Cluster', 'Cluster ' + (idx+1)) 47 | (1..3).each { toAdd[0].setDouble('PCA' + it, centroids[idx][it-1]) } 48 | toAdd[0].setDouble('Centroid', 50) 49 | rows.append(toAdd) 50 | } 51 | 52 | def title = 'Clusters x Principal Components w/ centroids' 53 | def type = dims == 2 ? ScatterPlot : Scatter3DPlot 54 | helper.show(type.create(title, rows, *(1..dims).collect { "PCA$it" }, 'Centroid', 'Cluster'), 'KMeansClustersPcaCentroids') 55 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/KMeansPcaScree_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.clustering.KMeans 17 | import smile.io.Read 18 | import smile.plot.swing.PlotGrid 19 | import smile.plot.swing.ScatterPlot 20 | import smile.plot.swing.ScreePlot 21 | import smile.feature.extraction.PCA 22 | 23 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 24 | 25 | def file = new File(getClass().classLoader.getResource('whiskey.csv').file) 26 | def table = Read.csv(file.toPath(), CSV.withFirstRecordAsHeader()) 27 | 28 | String[] cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 29 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 30 | def data = table.select(cols).toArray() 31 | 32 | def p = 2 // number of dimensions in projection (2 or 3) 33 | def pca = PCA.fit(data).getProjection(p) 34 | def plots = [new ScreePlot(pca.varianceProportion()).canvas()] 35 | def projected = pca.apply(data) 36 | char mark = '#' 37 | String[] labels = (1..p).collect { "PCA$it" } 38 | 39 | (2..6).each { k -> 40 | println "Processing cluster size $k" 41 | def clusters = KMeans.fit(data, k) 42 | plots << ScatterPlot.of(projected, clusters.y, mark).canvas().tap { setAxisLabels(labels) } 43 | } 44 | 45 | new PlotGrid(*plots*.panel()).window() 46 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/KMeansPca_TablesawSmile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.clustering.KMeans 17 | import smile.feature.extraction.PCA 18 | import tech.tablesaw.api.* 19 | import tech.tablesaw.plotly.api.* 20 | 21 | def file = getClass().classLoader.getResource('whiskey.csv').file 22 | def helper = new TablesawUtil(file) 23 | def rows = Table.read().csv(file) 24 | 25 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 26 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 27 | def data = rows.as().doubleMatrix(*cols) 28 | 29 | def dims = 4 // can be 2, 3 or 4 30 | def pca = PCA.fit(data).getProjection(dims) 31 | def projected = pca.apply(data) 32 | def adj = [1, 1, 1, 5] // scaling factor to make graph pretty 33 | def clusters = KMeans.fit(data, 5) 34 | println clusters 35 | def labels = clusters.y.collect { 'Cluster ' + (it + 1) } 36 | rows = rows.addColumns( 37 | *(0.. 38 | DoubleColumn.create("PCA${i+1}", (0.. 39 | adj[i] * (projected[j][i] + adj[i]) 40 | }) 41 | }, 42 | StringColumn.create('Cluster', labels) 43 | ) 44 | 45 | def title = 'Clusters x Principal Components' 46 | def type = dims == 2 ? ScatterPlot : Scatter3DPlot 47 | helper.show(type.create(title, rows, *(1..dims).collect { "PCA$it" }, 'Cluster'), 'KMeansClustersPca') 48 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/KMeans_Elki.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | //@Grab('de.lmu.ifi.dbs.elki:elki:0.7.5') 17 | import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd 18 | import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase 19 | import de.lmu.ifi.dbs.elki.database.ids.DBIDIter 20 | import de.lmu.ifi.dbs.elki.result.AutomaticVisualization 21 | import de.lmu.ifi.dbs.elki.utilities.ELKIBuilder 22 | //import de.lmu.ifi.dbs.elki.visualization.VisualizerParameterizer 23 | 24 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 25 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 26 | 27 | def file = getClass().classLoader.getResource('whiskey.csv').file 28 | def db = new ELKIBuilder(StaticArrayDatabase) 29 | .with('parser.labelIndices', '0,1') 30 | .with('dbc.in', file) 31 | .build() 32 | db.initialize() 33 | 34 | def kmeans = new ELKIBuilder(KMeansLloyd).with('kmeans.k', 4).build() 35 | def c = kmeans.run(db) 36 | 37 | def auto = new ELKIBuilder(AutomaticVisualization) 38 | .with(AutomaticVisualization.Parameterizer.WINDOW_TITLE_ID, 'Whiskey clusters') 39 | //.with(VisualizerParameterizer.Parameterizer.ENABLEVIS_ID, 'scatter|parallel|key') 40 | .build() 41 | 42 | def hier = db.hierarchy 43 | hier.add(db, c) 44 | auto.processNewResult(hier, c) 45 | 46 | def centroids = [] 47 | c.allClusters.eachWithIndex{clu, i -> 48 | def pts = clu.model.prototype.collect { sprintf '%.3f', it } 49 | centroids << "$clu.nameAutomatic$i: ${pts.join(', ')}" 50 | println "$clu.nameAutomatic$i, ${clu.size()} distilleries:" 51 | def names = [] 52 | for (DBIDIter it = clu.IDs.iter(); it.valid(); it.advance()) { 53 | names << db.getBundle(it).data(2) // labels 54 | } 55 | println names.join(', ') 56 | } 57 | println '\nCentroids: ' + cols.join(', ') 58 | centroids.each{ println it } 59 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/KMeans_Encog.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.encog.ml.data.basic.BasicMLData 17 | import org.encog.ml.data.basic.BasicMLDataSet 18 | import org.encog.ml.kmeans.KMeansClustering 19 | import org.encog.util.csv.CSVFormat 20 | import org.encog.util.csv.ReadCSV 21 | 22 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 23 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 24 | 25 | def file = getClass().classLoader.getResource('whiskey.csv').file 26 | def csv = new ReadCSV(file, true, CSVFormat.EG_FORMAT) 27 | def set = new BasicMLDataSet() 28 | while (csv.next()) { 29 | set.add(new BasicMLData(cols.collect{col -> csv.getDouble(col) } as double[])) 30 | } 31 | 32 | def clusterer = new KMeansClustering(3, set) 33 | clusterer.iteration(100) 34 | 35 | clusterer.clusters.eachWithIndex { cluster, idx -> 36 | println "Cluster $idx: " 37 | cluster.createDataSet().each { 38 | println it.inputArray 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/PairwiseAutoGrid_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.io.Read 17 | import smile.plot.swing.PlotGrid 18 | import java.awt.Color 19 | 20 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 21 | 22 | def file = new File(getClass().classLoader.getResource('whiskey.csv').file) 23 | def table = Read.csv(file.toPath(), CSV.withFirstRecordAsHeader()) 24 | 25 | String[] cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 26 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 27 | table = table.select(cols) 28 | char mark = '#' 29 | 30 | PlotGrid.splom(table, mark, Color.BLUE).window() 31 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/PairwiseHistogram_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.io.Read 17 | import smile.plot.swing.Histogram3D 18 | import smile.plot.swing.PlotGrid 19 | import java.awt.Color 20 | 21 | import static java.awt.Color.* 22 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 23 | 24 | def file = new File(getClass().classLoader.getResource('whiskey.csv').file) 25 | def table = Read.csv(file.toPath(), CSV.withFirstRecordAsHeader()) 26 | 27 | String[] cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 28 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 29 | 30 | Color[] colors = [CYAN, PINK, MAGENTA, ORANGE, GREEN, BLUE, RED, YELLOW] 31 | 32 | new PlotGrid( 33 | *[cols, cols].combinations().collect { first, second -> 34 | def f = table.column(first).toDoubleArray() 35 | def s = table.column(second).toDoubleArray() 36 | Histogram3D.of([f, s].transpose() as double[][], 4, colors) 37 | }*.canvas()*.panel() 38 | ).window() 39 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/PairwiseManualGrid_Smile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.io.Read 17 | import smile.plot.swing.PlotGrid 18 | import smile.plot.swing.ScatterPlot 19 | import java.awt.Color 20 | import java.awt.Font 21 | 22 | import static org.apache.commons.csv.CSVFormat.RFC4180 as CSV 23 | 24 | def file = new File(getClass().classLoader.getResource('whiskey.csv').file) 25 | def table = Read.csv(file.toPath(), CSV.withFirstRecordAsHeader()) 26 | 27 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 28 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 29 | table = table.select(*cols) 30 | char mark = '#' 31 | 32 | new PlotGrid( 33 | *[cols, cols].combinations().collect { first, second -> 34 | def (f, s) = [cols.indexOf(first), cols.indexOf(second)] 35 | def color = new Color(72 + (f * 16), 72 + (s * 16), 200 - (f * 4) - (s * 4)) 36 | ScatterPlot.of(table, first, second, mark, color).canvas().tap { 37 | margin = 0.28 38 | setAxisLabels('', '') 39 | title = first.take(6) + ' x ' + second.take(6) 40 | titleColor = Color.DARK_GRAY 41 | titleFont = new Font("Arial", Font.ITALIC, 12) 42 | }.panel() 43 | } 44 | ).window() 45 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/StandaloneSpiderPlot.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import static JFreeChartUtil.* 17 | 18 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 19 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 20 | 21 | def data = [ 22 | [2.7037, 2.4444, 1.4074, 0.0370, 0.0000, 1.8519, 1.6667, 1.8519, 1.8889, 2.0370, 2.1481, 1.6667], 23 | [1.8500, 1.9000, 2.0000, 0.9500, 0.1500, 1.1000, 1.5000, 0.6000, 1.5500, 1.7000, 1.3000, 1.5000], 24 | [1.2667, 2.1333, 0.9333, 0.1333, 0.0000, 1.0667, 0.8000, 0.5333, 1.8000, 1.7333, 2.2667, 2.2667], 25 | [3.6667, 1.5000, 3.6667, 3.3333, 0.6667, 0.1667, 1.6667, 0.5000, 1.1667, 1.3333, 1.1667, 0.1667], 26 | [1.5000, 2.8889, 1.0000, 0.2778, 0.1667, 1.0000, 1.2222, 0.6111, 0.5556, 1.7778, 1.6667, 2.0000] 27 | ] 28 | def centroids = categoryDataset() 29 | data.eachWithIndex { nums, i -> 30 | nums.eachWithIndex { val, j -> centroids.addValue(val, "Cluster ${i + 1}", cols[j]) } 31 | } 32 | 33 | def centroidPlot = spiderWebPlot(dataset: centroids) 34 | def centroidChart = chart('Centroid spider plot', centroidPlot) 35 | 36 | SwingUtil.showH(centroidChart, size: [400, 400], title: 'Whiskey clusters') 37 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/TablesawUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import tech.tablesaw.plotly.Plot 18 | import tech.tablesaw.plotly.components.Figure 19 | 20 | class TablesawUtil { 21 | private File parent 22 | 23 | /** 24 | * Creates the plot files in a suitable temporary location 25 | * determined from the parent of the passed file - typically 26 | * a build folder or IDE temporary folder. 27 | * 28 | * @param filename Of a file in a suitable temporary directory 29 | */ 30 | TablesawUtil(String filename) { 31 | parent = new File(filename).parentFile 32 | } 33 | 34 | def show(Figure figure, String filename) { 35 | def file = new File(parent, filename + '.html') 36 | try { 37 | Plot.show(figure, file) 38 | } catch(ex) { 39 | println "Unable to show file '$file' due to '$ex.message'" 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /subprojects/Whiskey/src/main/groovy/XMeansPca_TablesawSmile.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import smile.clustering.XMeans 17 | import smile.feature.extraction.PCA 18 | import tech.tablesaw.api.* 19 | import tech.tablesaw.plotly.api.* 20 | 21 | def file = getClass().classLoader.getResource('whiskey.csv').file 22 | def helper = new TablesawUtil(file) 23 | def rows = Table.read().csv(file) 24 | 25 | def cols = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 26 | 'Spicy', 'Winey', 'Nutty', 'Malty', 'Fruity', 'Floral'] 27 | def data = rows.as().doubleMatrix(*cols) 28 | 29 | def dims = 4 // can be 2, 3 or 4 30 | def pca = PCA.fit(data).getProjection(dims) 31 | def projected = pca.apply(data) 32 | def adj = [1, 1, 1, 5] 33 | def kmax = 10 34 | def clusters = XMeans.fit(data, kmax) 35 | def labels = clusters.y.collect { 'Cluster ' + (it + 1) } 36 | rows = rows.addColumns( 37 | *(0.. 38 | DoubleColumn.create("PCA${idx+1}", (0.. 43 | if (name.startsWith('Whiskey')) { 44 | def subtask = tasks.register("run$name", JavaExec) { 45 | dependsOn compileGroovy 46 | group 'Application' 47 | description "Run ${name}.groovy as a JVM application/Groovy script" 48 | classpath = sourceSets.main.runtimeClasspath 49 | mainClass = name 50 | } 51 | runAll.configure { 52 | dependsOn subtask 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/groovy/AssignClusters.java: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import groovy.lang.Closure; 17 | import org.apache.beam.sdk.transforms.DoFn; 18 | import org.apache.beam.sdk.values.KV; 19 | import org.apache.beam.sdk.values.PCollectionView; 20 | 21 | import java.io.IOException; 22 | 23 | public class AssignClusters extends DoFn> { 24 | final private PCollectionView centroidsView; 25 | final private Closure> clos; 26 | 27 | public AssignClusters(PCollectionView centroidsView, Closure> clos) { 28 | this.centroidsView = centroidsView; 29 | this.clos = clos; 30 | } 31 | 32 | @ProcessElement 33 | public void processElement(@Element Point pt, OutputReceiver> out, ProcessContext c) throws IOException { 34 | out.output(clos.call(pt, c.sideInput(centroidsView))); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/groovy/Log.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.beam.sdk.transforms.DoFn 17 | import org.apache.beam.sdk.transforms.PTransform 18 | import org.apache.beam.sdk.transforms.ParDo 19 | import org.apache.beam.sdk.values.PCollection 20 | import org.slf4j.Logger 21 | import org.slf4j.LoggerFactory 22 | 23 | class Log { 24 | private static final Logger LOGGER = LoggerFactory.getLogger(Log.class) 25 | private Log() { } 26 | 27 | static PTransform, PCollection> ofElements() { 28 | new LoggingTransform<>() 29 | } 30 | 31 | private static class LoggingTransform extends PTransform, PCollection> { 32 | @Override 33 | PCollection expand(PCollection input) { 34 | return input.apply(ParDo.of(new DoFn() { 35 | @DoFn.ProcessElement 36 | void processElement(@DoFn.Element T element, DoFn.OutputReceiver out) { 37 | LOGGER.info(element.toString()) 38 | out.output(element) 39 | } 40 | })) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/groovy/MeanDoubleArrayCols.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import org.apache.beam.sdk.transforms.SerializableFunction 17 | 18 | class MeanDoubleArrayCols implements SerializableFunction, Point> { 19 | @Override 20 | Point apply(Iterable inputs) { 21 | double[] result = new double[12] 22 | int count = 0 23 | for (Point input : inputs) { 24 | result.indices.each { 25 | result[it] += input.pts()[it] 26 | } 27 | count++ 28 | } 29 | result.indices.each { 30 | result[it] /= count 31 | } 32 | new Point(result) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/groovy/Point.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | record Point(double[] pts) implements Serializable { 18 | private static Random r = new Random() 19 | private static Closure randomPoint = { dims -> 20 | (1..dims).collect { r.nextGaussian() + 2 } as double[] 21 | } 22 | 23 | static Point ofRandom(int dims) { 24 | new Point(randomPoint(dims)) 25 | } 26 | 27 | String toString() { 28 | "Point[${pts.collect{ sprintf '%.2f', it }.join('. ')}]" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/groovy/Points.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | record Points(List pts) implements Serializable { } 18 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/groovy/Squash.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import groovy.transform.CompileStatic 18 | import groovy.transform.stc.POJO 19 | import org.apache.beam.sdk.transforms.Combine 20 | import org.apache.beam.sdk.values.KV 21 | 22 | @CompileStatic 23 | @POJO 24 | class Squash extends Combine.CombineFn, Accum, Points> { 25 | int k, dims 26 | 27 | @Override 28 | Accum createAccumulator() { 29 | new Accum() 30 | } 31 | 32 | @Override 33 | Accum addInput(Accum mutableAccumulator, KV input) { 34 | mutableAccumulator.pts << input.value 35 | mutableAccumulator 36 | } 37 | 38 | @Override 39 | Accum mergeAccumulators(Iterable accumulators) { 40 | Accum result = createAccumulator() 41 | accumulators.each { 42 | result.pts += it.pts 43 | } 44 | result 45 | } 46 | 47 | @Override 48 | Points extractOutput(Accum accumulator) { 49 | var pts = accumulator.pts 50 | if (k && dims) { 51 | while (pts.size() < k) { 52 | pts << Point.ofRandom(dims) 53 | } 54 | } 55 | new Points(pts) 56 | } 57 | 58 | static class Accum implements Serializable { 59 | List pts = [] 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /subprojects/WhiskeyBeam/src/main/resources/whiskey_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.niss.org/sites/default/files/ScotchWhisky01.txt -------------------------------------------------------------------------------- /subprojects/WhiskeyFlink/README.md: -------------------------------------------------------------------------------- 1 | # Whiskey clustering with Apache Flink® 2 | 3 | This project looks at scaling up the [Whiskey](../Whiskey/) 4 | project using [Apache Flink](https://flink.apache.org/). 5 | 6 | ![Clustering](../../docs/images/clustering_bottles.jpg) 7 | 8 | ## Flink overview 9 | 10 | [Apache Flink](https://flink.apache.org/) is a framework and distributed processing engine for stateful computations over unbounded and bounded data streams. 11 | 12 | ## Implementation overview 13 | 14 | K-Means is the most common form of _centroid_ clustering 15 | and is described further in the main [Whiskey](../Whiskey#kmeans) project. 16 | Flink supports a machine learning library `ML` which includes a K-Means implementation. 17 | 18 | ### Running the examples 19 | 20 | Groovy code examples can be found in the [src/main/groovy](src/main/groovy) directory. 21 | 22 | You have several options for running the programs (see more details from the main [README](../../README.md#running-the-examples) in the root project): 23 | 24 | * If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 25 | 26 | * From the command line, invoke the application using gradlew (use `./gradlew` on unix-like systems) with the run command.\ 27 | `gradlew :WhiskeyFlink:run` 28 | 29 | ### Requirements 30 | 31 | It has been tested on JDK8, JDK11 and JDK17. 32 | -------------------------------------------------------------------------------- /subprojects/WhiskeyFlink/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | apply plugin: 'groovy' 18 | apply plugin: 'application' 19 | 20 | ext.appName = 'WhiskeyFlink' 21 | 22 | application { 23 | mainClass = appName 24 | } 25 | 26 | tasks.named('run').configure { 27 | description = "Run $appName as a JVM application/Groovy script" 28 | } 29 | 30 | dependencies { 31 | implementation "org.apache.groovy:groovy:$groovy4Version" 32 | // implementation project(':ChartUtil') 33 | implementation("org.apache.flink:statefun-flink-core:$flinkStatefunVersion") { 34 | exclude(group: 'org.apache.flink', module: 'flink-streaming-java_2.12') 35 | exclude(group: 'org.apache.flink', module: 'flink-metrics-dropwizard') 36 | } 37 | implementation "org.apache.flink:flink-ml-uber-1.17:$flinkMlVersion" 38 | // implementation "org.apache.flink:flink-csv:$flinkVersion" 39 | implementation "org.apache.flink:flink-table-runtime:$flinkVersion" 40 | implementation "org.apache.flink:flink-connector-files:$flinkVersion" 41 | runtimeOnly "org.apache.flink:flink-table-api-java-bridge:$flinkVersion" 42 | runtimeOnly "org.apache.flink:flink-table-planner-loader:$flinkVersion" 43 | runtimeOnly "org.apache.flink:flink-clients:$flinkVersion" 44 | runtimeOnly "org.slf4j:slf4j-simple:$slf4jVersion" 45 | // implementation "org.apache.commons:commons-csv:$commonsCsvVersion" 46 | } 47 | 48 | tasks.register('versionInfo') { 49 | doLast { 50 | File javaHome = new File(System.getProperty('java.home')) 51 | logger.lifecycle "Using Java from $javaHome (version ${System.getProperty('java.version')})" 52 | } 53 | } 54 | 55 | run.dependsOn versionInfo 56 | -------------------------------------------------------------------------------- /subprojects/WhiskeyFlink/src/main/resources/whiskey_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.niss.org/sites/default/files/ScotchWhisky01.txt -------------------------------------------------------------------------------- /subprojects/WhiskeyIgnite/README.md: -------------------------------------------------------------------------------- 1 | # Whiskey clustering with Apache Ignite 2 | 3 | This project looks at scaling up the [Whiskey](../Whiskey/) 4 | project using [Apache Ignite](https://ignite.apache.org/). 5 | 6 | Additional slides related to this example can be found 7 | [here](https://speakerdeck.com/paulk/whiskey-groovy-ignite). 8 | 9 | ![Clustering](../../docs/images/clustering_bottles.jpg) 10 | 11 | ## Ignite overview 12 | 13 | [Apache Ignite](https://ignite.apache.org/) is a "distributed database" for high-performance computing with in-memory speed. 14 | 15 | ## Implementation overview 16 | 17 | K-Means is the most common form of _centroid_ clustering 18 | and is described further in the main [Whiskey](../Whiskey#kmeans) project. 19 | Ignite supports a machine learning library `ML` which includes a clustered K-Means implementation. 20 | 21 | ### Running the examples 22 | 23 | Groovy code examples can be found in the [src/main/groovy](src/main/groovy) directory. 24 | 25 | You have several options for running the programs (see more details from the main [README](../../README.md#running-the-examples) in the root project): 26 | 27 | * You can run the main examples online using a Jupyter/Beakerx notebook: 28 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/paulk-asert/groovy-data-science/master?filepath=subprojects%2FWhiskeyIgnite%2Fsrc%2Fmain%2Fnotebook%2FWhiskeyIgnite.ipynb) 29 | 30 | * If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 31 | 32 | * From the command line, invoke the application using gradlew (use `./gradlew` on unix-like systems) with the run command.\ 33 | `gradlew :WhiskeyIgnite:run` 34 | 35 | * If the example has @Grab statements commented out at the top, you can cut and paste the examples into the groovyConsole 36 | and uncomment the grab statements. Make sure to cut and paste any helper classes too if appropriate. 37 | 38 | ### Requirements 39 | 40 | It has been tested on JDK8, JDK11 and JDK17. 41 | -------------------------------------------------------------------------------- /subprojects/WhiskeyIgnite/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | apply plugin: 'groovy' 18 | apply plugin: 'application' 19 | 20 | ext.appName = 'WhiskeyIgnite' 21 | 22 | application { 23 | mainClass = appName 24 | if (JavaVersion.current().isCompatibleWith(JavaVersion.VERSION_17)) { 25 | applicationDefaultJvmArgs = [ 26 | '--add-opens=java.base/java.io=ALL-UNNAMED', 27 | '--add-opens=java.base/java.lang=ALL-UNNAMED', 28 | '--add-opens=java.base/java.nio=ALL-UNNAMED', 29 | '--add-opens=java.base/java.util=ALL-UNNAMED' 30 | ] 31 | } 32 | } 33 | 34 | tasks.named('run').configure { 35 | description = "Run $appName as a JVM application/Groovy script" 36 | } 37 | 38 | dependencies { 39 | implementation "org.apache.groovy:groovy:$groovy4Version" 40 | // implementation project(':ChartUtil') 41 | implementation "org.apache.ignite:ignite-core:$igniteVersion" 42 | implementation "org.apache.ignite:ignite-ml:$igniteMlVersion" 43 | runtimeOnly "org.apache.ignite:ignite-spring:$igniteVersion" 44 | runtimeOnly "org.slf4j:slf4j-simple:$slf4jVersion" 45 | implementation "org.apache.commons:commons-csv:$commonsCsvVersion" 46 | } 47 | 48 | tasks.register('versionInfo') { 49 | doLast { 50 | File javaHome = new File(System.getProperty('java.home')) 51 | logger.lifecycle "Using Java from $javaHome (version ${System.getProperty('java.version')})" 52 | } 53 | } 54 | 55 | run.dependsOn versionInfo 56 | -------------------------------------------------------------------------------- /subprojects/WhiskeyIgnite/src/main/notebook/DrunkenSailor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "70ecff8d", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "// generate a random walk\n", 11 | "steps = 10000\n", 12 | "random = new Random()\n", 13 | "def walk(ssize) {\n", 14 | " def cur = 0.0\n", 15 | " def x = []\n", 16 | " def y = []\n", 17 | " for (i in 0..steps) {\n", 18 | " y[i] = cur\n", 19 | " x[i] = i\n", 20 | " cur += random.nextGaussian() * ssize\n", 21 | " }\n", 22 | " return [x:x, y:y]\n", 23 | "}\n", 24 | "// now x is time for the x-axis, and y is the random variable\n", 25 | "beer = walk(10)\n", 26 | "whiskey = walk(100)\n", 27 | "OutputCell.HIDDEN" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "id": "39f2a84b", 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "application/vnd.jupyter.widget-view+json": { 39 | "model_id": "4a6244d8-417a-4728-9a57-302179ef5950", 40 | "version_major": 2, 41 | "version_minor": 0 42 | }, 43 | "method": "display_data" 44 | }, 45 | "metadata": {}, 46 | "output_type": "display_data" 47 | } 48 | ], 49 | "source": [ 50 | "p = new TimePlot(title: \"Drunken Sailor Walks\", showLegend: true, lodThreshold: 2000)\n", 51 | "p << new Line(x:beer.x, y:beer.y, displayName:\"Beer Walk\")\n", 52 | "p << new Line(x:whiskey.x, y:whiskey.y, displayName:\"Whiskey Walk\")" 53 | ] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "Groovy", 59 | "language": "groovy", 60 | "name": "groovy" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": "groovy", 64 | "file_extension": ".groovy", 65 | "mimetype": "", 66 | "name": "Groovy", 67 | "nbconverter_exporter": "", 68 | "version": "2.5.6" 69 | }, 70 | "toc": { 71 | "base_numbering": 1, 72 | "nav_menu": {}, 73 | "number_sections": false, 74 | "sideBar": false, 75 | "skip_h1_title": false, 76 | "title_cell": "Table of Contents", 77 | "title_sidebar": "Contents", 78 | "toc_cell": false, 79 | "toc_position": {}, 80 | "toc_section_display": false, 81 | "toc_window_display": false 82 | } 83 | }, 84 | "nbformat": 4, 85 | "nbformat_minor": 5 86 | } 87 | -------------------------------------------------------------------------------- /subprojects/WhiskeyIgnite/src/main/resources/whiskey_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.niss.org/sites/default/files/ScotchWhisky01.txt -------------------------------------------------------------------------------- /subprojects/WhiskeySpark/README.md: -------------------------------------------------------------------------------- 1 | # Whiskey clustering with Apache Spark 2 | 3 | This project looks at scaling up the [Whiskey](../Whiskey/) 4 | project using [Apache Spark](https://spark.apache.org/). 5 | 6 | ![Clustering](../../docs/images/clustering_bottles.jpg) 7 | 8 | ## Spark overview 9 | 10 | [Apache Spark™](https://spark.apache.org/) is a multi-language engine for 11 | executing data engineering, data science, and machine learning on single-node 12 | machines or clusters. 13 | 14 | ## Implementation overview 15 | 16 | K-Means is the most common form of _centroid_ clustering 17 | and is described further in the main [Whiskey](../Whiskey#kmeans) project. 18 | Spark supports a machine learning library `MLlib` which includes a scalable K-Means implementation. 19 | The `MLlib` K-Means implementation a parallelized variant of the [k-means++](http://en.wikipedia.org/wiki/K-means%2B%2B) method 20 | called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf). 21 | 22 | ### Running the examples 23 | 24 | Groovy code examples can be found in the [src/main/groovy](src/main/groovy) directory. 25 | 26 | You have several options for running the programs (see more details from the main [README](../../README.md#running-the-examples) in the root project): 27 | 28 | * If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE. 29 | 30 | * From the command line, invoke the application using gradlew (use `./gradlew` on unix-like systems) with the run command.\ 31 | `gradlew :WhiskeySpark:run` 32 | 33 | * If the example has @Grab statements commented out at the top, you can cut and paste the examples into the groovyConsole 34 | and uncomment the grab statements. Make sure to cut and paste any helper classes too if appropriate. 35 | 36 | ### Requirements 37 | 38 | It has been tested on JDK8 and JDK11. The current Spark versions are not compatible with JDK17. 39 | -------------------------------------------------------------------------------- /subprojects/WhiskeySpark/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | apply plugin: 'groovy' 17 | apply plugin: 'application' 18 | 19 | ext { 20 | appName = 'WhiskeySpark' 21 | sparkVariant = '2.13' 22 | sparkVersion = '3.5.1' 23 | } 24 | 25 | application { 26 | mainClass = appName 27 | } 28 | 29 | tasks.named('run').configure { 30 | description = "Run $appName as a JVM application/Groovy script" 31 | } 32 | 33 | dependencies { 34 | implementation "org.apache.groovy:groovy:$groovy5Version" 35 | implementation "org.apache.spark:spark-mllib_$sparkVariant:$sparkVersion" 36 | implementation "org.apache.spark:spark-sql_$sparkVariant:$sparkVersion" 37 | implementation "com.fasterxml.jackson:jackson-bom:$jacksonVersion" 38 | } 39 | 40 | tasks.register('versionInfo') { 41 | doLast { 42 | File javaHome = new File(System.getProperty('java.home')) 43 | logger.lifecycle "Using Java from $javaHome (version ${System.getProperty('java.version')})" 44 | assert JavaVersion.current().isJava8() || JavaVersion.current().isJava11(), "JDK8 or JDK11 is required but found ${JavaVersion.current()}!" 45 | } 46 | } 47 | 48 | run.dependsOn versionInfo 49 | -------------------------------------------------------------------------------- /subprojects/WhiskeySpark/src/main/resources/whiskey_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.niss.org/sites/default/files/ScotchWhisky01.txt -------------------------------------------------------------------------------- /subprojects/WhiskeyWayang/src/main/resources/whiskey_source.txt: -------------------------------------------------------------------------------- 1 | source: https://www.niss.org/sites/default/files/ScotchWhisky01.txt --------------------------------------------------------------------------------