├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── build.yml
    │   ├── generate_docs.yml
    │   ├── publish_dev_version.yml
    │   └── publish_release_version.yml
├── .gitignore
├── .grenrc.yml
├── .travis.yml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── build.gradle.kts
├── buildSrc
    ├── build.gradle.kts
    └── src
    │   └── main
    │       └── kotlin
    │           ├── Dependencies.kt
    │           ├── Helpers.kt
    │           ├── Plugins.kt
    │           ├── Projects.kt
    │           └── Versions.kt
├── core
    ├── build.gradle.kts
    └── src
    │   └── main
    │       └── scala
    │           └── org
    │               ├── apache
    │                   └── spark
    │                   │   └── sql
    │                   │       ├── KotlinReflection.scala
    │                   │       ├── KotlinWrappers.scala
    │                   │       └── catalyst
    │                   │           └── CatalystTypeConverters.scala
    │               └── jetbrains
    │                   └── kotlinx
    │                       └── spark
    │                           └── extensions
    │                               ├── DemoCaseClass.scala
    │                               ├── KSparkExtensions.scala
    │                               └── VarargUnwrapper.scala
├── docs
    ├── _config.yml
    └── quick-start-guide.md
├── examples
    ├── build.gradle.kts
    └── src
    │   └── main
    │       ├── kotlin
    │           └── org
    │           │   └── jetbrains
    │           │       └── kotlinx
    │           │           └── spark
    │           │               └── examples
    │           │                   ├── Broadcasting.kt
    │           │                   ├── CachedOperations.kt
    │           │                   ├── Collect.kt
    │           │                   ├── Group.kt
    │           │                   ├── Join.kt
    │           │                   ├── JupyterExample.ipynb
    │           │                   ├── MLlib.kt
    │           │                   ├── Main.kt
    │           │                   ├── MapAndListOperations.kt
    │           │                   ├── RddGroupCalculation.kt
    │           │                   ├── UDFs.kt
    │           │                   ├── UdtRegistration.kt
    │           │                   ├── WordCount.kt
    │           │                   └── streaming
    │           │                       ├── JupyterStreamingExample.ipynb
    │           │                       ├── KotlinDirectKafkaWordCount.kt
    │           │                       ├── KotlinRecoverableNetworkWordCount.kt
    │           │                       ├── KotlinSqlNetworkWordCount.kt
    │           │                       ├── KotlinStatefulNetworkCount.kt
    │           │                       └── Streaming.kt
    │       └── resources
    │           └── the-catcher-in-the-rye.txt
├── gradle.properties
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── gradlew_all_versions
├── jupyter
    ├── build.gradle.kts
    └── src
    │   ├── main
    │       ├── kotlin
    │       │   └── org
    │       │   │   └── jetbrains
    │       │   │       └── kotlinx
    │       │   │           └── spark
    │       │   │               └── api
    │       │   │                   └── jupyter
    │       │   │                       ├── HtmlRendering.kt
    │       │   │                       ├── Integration.kt
    │       │   │                       ├── JupyterConfiguration.kt
    │       │   │                       ├── Properties.kt
    │       │   │                       ├── SparkIntegration.kt
    │       │   │                       └── SparkStreamingIntegration.kt
    │       └── resources
    │       │   └── table.css
    │   └── test
    │       └── kotlin
    │           └── org
    │               └── jetbrains
    │                   └── kotlinx
    │                       └── spark
    │                           └── api
    │                               └── jupyter
    │                                   └── JupyterTests.kt
├── kotlin-spark-api
    ├── build.gradle.kts
    └── src
    │   ├── main
    │       └── kotlin
    │       │   └── org
    │       │       └── jetbrains
    │       │           └── kotlinx
    │       │               └── spark
    │       │                   └── api
    │       │                       ├── Arities.kt
    │       │                       ├── Column.kt
    │       │                       ├── Conversions.kt
    │       │                       ├── DataStreamWriter.kt
    │       │                       ├── Dataset.kt
    │       │                       ├── Encoding.kt
    │       │                       ├── GroupState.kt
    │       │                       ├── Iterators.kt
    │       │                       ├── KeyValueGroupedDataset.kt
    │       │                       ├── Rdd.kt
    │       │                       ├── RddDouble.kt
    │       │                       ├── RddKeyValue.kt
    │       │                       ├── Seq.kt
    │       │                       ├── SparkSession.kt
    │       │                       ├── StreamingKeyValues.kt
    │       │                       ├── UDFRegister.kt
    │       │                       ├── UserDefinedAggregateFunction.kt
    │       │                       ├── UserDefinedFunction.kt
    │       │                       ├── UserDefinedFunctionVararg.kt
    │       │                       └── UserDefinedFunctions.kt
    │   └── test
    │       └── kotlin
    │           └── org
    │               └── jetbrains
    │                   └── kotlinx
    │                       └── spark
    │                           └── api
    │                               ├── ApiTest.kt
    │                               ├── DatasetFunctionTest.kt
    │                               ├── EncodingTest.kt
    │                               ├── KafkaStreamingTest.kt
    │                               ├── ProjectConfig.kt
    │                               ├── RddTest.kt
    │                               ├── StreamingTest.kt
    │                               ├── TypeInferenceTest.kt
    │                               ├── UDFTest.kt
    │                               ├── UdtTest.kt
    │                               └── struct
    │                                   └── model
    │                                       └── models.kt
├── qodana.yaml
├── scala-tuples-in-kotlin
    ├── build.gradle.kts
    └── src
    │   ├── main
    │       └── kotlin
    │       │   └── org
    │       │       └── jetbrains
    │       │           └── kotlinx
    │       │               └── spark
    │       │                   └── api
    │       │                       ├── Conversions.kt
    │       │                       └── tuples
    │       │                           ├── DestructuredTupleBuilders.kt
    │       │                           ├── DropFunctions.kt
    │       │                           ├── EmptyTuple.kt
    │       │                           ├── MapTuples.kt
    │       │                           ├── ProductDestructuring.kt
    │       │                           ├── ProductExtensions.kt
    │       │                           ├── ProductTextualAccessors.kt
    │       │                           ├── TupleBuilders.kt
    │       │                           ├── TupleConcatenation.kt
    │       │                           ├── TupleCopy.kt
    │       │                           ├── TupleDrop.kt
    │       │                           ├── TupleExtending.kt
    │       │                           ├── TupleSplit.kt
    │       │                           ├── TupleTake.kt
    │       │                           ├── TupleZip.kt
    │       │                           └── TypedProductExtensions.kt
    │   └── test
    │       └── kotlin
    │           └── org
    │               └── jetbrains
    │                   └── kotlinx
    │                       └── spark
    │                           └── api
    │                               └── tuples
    │                                   └── TuplesTest.kt
└── settings.gradle.kts


/.gitattributes:
--------------------------------------------------------------------------------
1 | kotlin-spark-api/src/main/kotlin/org/jetbrains/spark/api/VarArities.kt linguist-generated
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "maven" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: '*'
 6 | #  pull_request:
 7 | #    branches: '*'
 8 | 
 9 | jobs:
10 |   build-all-versions:
11 |     timeout-minutes: 30
12 |     strategy:
13 |       matrix:
14 |         scala: [ "2.12.17", "2.13.10" ]
15 |         spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
16 |         exclude:
17 |           - scala: "2.13.10"
18 |             spark: "3.1.3"
19 |           - scala: "2.13.10"
20 |             spark: "3.1.2"
21 |           - scala: "2.13.10"
22 |             spark: "3.1.1"
23 |           - scala: "2.13.10"
24 |             spark: "3.1.0"
25 |           - scala: "2.13.10"
26 |             spark: "3.0.3"
27 |           - scala: "2.13.10"
28 |             spark: "3.0.2"
29 |           - scala: "2.13.10"
30 |             spark: "3.0.1"
31 |           - scala: "2.13.10"
32 |             spark: "3.0.0"
33 |     runs-on: ubuntu-latest
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v3
37 | 
38 |       - name: Set up JDK 11
39 |         uses: actions/setup-java@v3
40 |         with:
41 |           distribution: adopt
42 |           java-version: 11
43 |           check-latest: true
44 | 
45 |       - name: Cache Gradle packages
46 |         uses: actions/cache@v3
47 |         with:
48 |           path: |
49 |             ~/.gradle/caches
50 |             ~/.gradle/wrapper
51 |             ~/.gradle/jdks
52 |           key: ${{ runner.os }}-gradle-spark-${{ matrix.spark }}-${{ matrix.scala }}
53 |           restore-keys: |
54 |             ${{ runner.os }}-gradle-
55 | 
56 |       - name: Build with Gradle
57 |         uses: gradle/gradle-build-action@v2
58 |         with:
59 |           arguments: |
60 |             -Pspark=${{ matrix.spark }}
61 |             -Pscala=${{ matrix.scala }}
62 |             clean
63 |             test
64 |             --scan
65 | 
66 |   #  qodana:
67 |   #    runs-on: ubuntu-latest
68 |   #    steps:
69 |   #      - uses: actions/checkout@v3
70 |   #      - name: 'Qodana Scan'
71 |   #        uses: JetBrains/qodana-action@v5.0.2
72 | 
73 | 
74 | 
75 | # vim: ts=2:sts=2:sw=2:expandtab
76 | 


--------------------------------------------------------------------------------
/.github/workflows/generate_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Generate and publish docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "release"
 7 | 
 8 | jobs:
 9 |   generate-and-publish-docs:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v3
14 | 
15 |       - name: Set up JDK 11
16 |         uses: actions/setup-java@v3
17 |         with:
18 |           distribution: adopt
19 |           java-version: 11
20 |           check-latest: true
21 | 
22 |       - name: Cache Gradle packages
23 |         uses: actions/cache@v3
24 |         with:
25 |           path: |
26 |             ~/.gradle/caches
27 |             ~/.gradle/wrapper
28 |             ~/.gradle/jdks
29 |           key: ${{ runner.os }}-gradle-spark-${{ matrix.spark }}-${{ matrix.scala }}
30 |           restore-keys: |
31 |             ${{ runner.os }}-gradle-
32 | 
33 |       - name: Set Swap Space
34 |         uses: pierotofy/set-swap-space@master
35 |         with:
36 |           swap-size-gb: 12
37 | 
38 |       - name: Generate docs with Gradle
39 |         uses: gradle/gradle-build-action@v2
40 |         with:
41 |           arguments: |
42 |             clean
43 |             build
44 |             dokkaHtmlMultiModule
45 |             --scan
46 | 
47 |       - name: Copy docs to "docs" branch
48 |         uses: peaceiris/actions-gh-pages@v3
49 |         with:
50 |           github_token: ${{ secrets.GITHUB_TOKEN }}
51 |           publish_branch: docs
52 |           publish_dir: ./build/dokka/htmlMultiModule
53 |           force_orphan: true
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_dev_version.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy dev version to GH packages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "main"
 7 | 
 8 | jobs:
 9 |   build-and-deploy:
10 |     strategy:
11 |       matrix:
12 |         scala: [ "2.12.17", "2.13.10" ]
13 |         spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
14 |         exclude:
15 |           - scala: "2.13.10"
16 |             spark: "3.1.3"
17 |           - scala: "2.13.10"
18 |             spark: "3.1.2"
19 |           - scala: "2.13.10"
20 |             spark: "3.1.1"
21 |           - scala: "2.13.10"
22 |             spark: "3.1.0"
23 |           - scala: "2.13.10"
24 |             spark: "3.0.3"
25 |           - scala: "2.13.10"
26 |             spark: "3.0.2"
27 |           - scala: "2.13.10"
28 |             spark: "3.0.1"
29 |           - scala: "2.13.10"
30 |             spark: "3.0.0"
31 |     runs-on: ubuntu-latest
32 |     permissions:
33 |       contents: read
34 |       packages: write
35 | 
36 |     steps:
37 |       - uses: actions/checkout@v3
38 | 
39 |       - name: Set up JDK 11
40 |         uses: actions/setup-java@v3
41 |         with:
42 |           distribution: adopt
43 |           java-version: 11
44 |           check-latest: true
45 | 
46 |       - name: Cache Gradle packages
47 |         uses: actions/cache@v3
48 |         with:
49 |           path: |
50 |             ~/.gradle/caches
51 |             ~/.gradle/wrapper
52 |             ~/.gradle/jdks
53 |           key: ${{ runner.os }}-gradle-spark-${{ matrix.spark }}-${{ matrix.scala }}
54 |           restore-keys: |
55 |             ${{ runner.os }}-gradle-
56 | 
57 |       - name: Validate Gradle wrapper
58 |         uses: gradle/wrapper-validation-action@v1
59 | 
60 |       - name: Setup Gradle
61 |         uses: gradle/gradle-build-action@v2
62 | 
63 |       - name: Set Swap Space
64 |         uses: pierotofy/set-swap-space@master
65 |         with:
66 |           swap-size-gb: 12
67 | 
68 |       - name: Deploy to GH Packages with Gradle
69 |         env:
70 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
71 |         run: >
72 |           ./gradlew 
73 |           -Pspark=${{ matrix.spark }} 
74 |           -Pscala=${{ matrix.scala }} 
75 |           -PskipScalaTuplesInKotlin=${{ !(matrix.spark == '3.0.0' || matrix.scala == '2.13.10' && matrix.spark == '3.2.0') }} 
76 |           clean 
77 |           publishMavenPublicationToGitHubPackagesRepository 
78 |           --scan
79 |         
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_release_version.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy release version to Maven Central
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   build-and-deploy-mvn-central:
 9 |     strategy:
10 |       matrix:
11 |         scala: [ "2.12.17", "2.13.10" ]
12 |         spark: [ "3.3.2", "3.3.1", "3.3.0", "3.2.3", "3.2.2", "3.2.1", "3.2.0", "3.1.3", "3.1.2", "3.1.1", "3.1.0", "3.0.3", "3.0.2", "3.0.1", "3.0.0" ]
13 |         exclude:
14 |           - scala: "2.13.10"
15 |             spark: "3.1.3"
16 |           - scala: "2.13.10"
17 |             spark: "3.1.2"
18 |           - scala: "2.13.10"
19 |             spark: "3.1.1"
20 |           - scala: "2.13.10"
21 |             spark: "3.1.0"
22 |           - scala: "2.13.10"
23 |             spark: "3.0.3"
24 |           - scala: "2.13.10"
25 |             spark: "3.0.2"
26 |           - scala: "2.13.10"
27 |             spark: "3.0.1"
28 |           - scala: "2.13.10"
29 |             spark: "3.0.0"
30 |     runs-on: ubuntu-latest
31 |     permissions:
32 |       contents: read
33 |       packages: write
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v3
37 | 
38 |       - name: Set up JDK 11
39 |         uses: actions/setup-java@v3
40 |         with:
41 |           distribution: adopt
42 |           java-version: 11
43 |           check-latest: true
44 | 
45 |       - name: Cache Gradle packages
46 |         uses: actions/cache@v3
47 |         with:
48 |           path: |
49 |             ~/.gradle/caches
50 |             ~/.gradle/wrapper
51 |             ~/.gradle/jdks
52 |           key: ${{ runner.os }}-gradle-spark-${{ matrix.spark }}-${{ matrix.scala }}
53 |           restore-keys: |
54 |             ${{ runner.os }}-gradle-
55 | 
56 |       - name: Validate Gradle wrapper
57 |         uses: gradle/wrapper-validation-action@v1
58 | 
59 |       - name: Setup Gradle
60 |         uses: gradle/gradle-build-action@v2
61 | 
62 |       - name: Set Swap Space
63 |         uses: pierotofy/set-swap-space@master
64 |         with:
65 |           swap-size-gb: 12
66 | 
67 |       - name: Upload to Maven Central with Gradle
68 |         env:
69 |           ORG_GRADLE_PROJECT_mavenCentralUsername: ${{ secrets.OSSRH_USERNAME }}
70 |           ORG_GRADLE_PROJECT_mavenCentralPassword: ${{ secrets.OSSRH_TOKEN }}
71 |           ORG_GRADLE_PROJECT_signingInMemoryKey: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }}
72 |           ORG_GRADLE_PROJECT_signingInMemoryKeyPassword: ${{ secrets.MAVEN_GPG_PASSPHRASE }}
73 |         run: >
74 |           ./gradlew 
75 |           -Pspark=${{ matrix.spark }} 
76 |           -Pscala=${{ matrix.scala }} 
77 |           -PskipScalaTuplesInKotlin=${{ !(matrix.spark == '3.0.0' || matrix.scala == '2.13.10' && matrix.spark == '3.2.0') }} 
78 |           clean 
79 |           publishMavenPublicationToMavenCentralRepository
80 |           --scan
81 |         
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/git,vim,maven,emacs,spark,kotlin,jetbrains+all,visualstudiocode
  3 | # Edit at https://www.gitignore.io/?templates=git,vim,maven,emacs,spark,kotlin,jetbrains+all,visualstudiocode
  4 | 
  5 | ### Emacs ###
  6 | # -*- mode: gitignore; -*-
  7 | *~
  8 | \#*\#
  9 | /.emacs.desktop
 10 | /.emacs.desktop.lock
 11 | *.elc
 12 | auto-save-list
 13 | tramp
 14 | .\#*
 15 | 
 16 | # Org-mode
 17 | .org-id-locations
 18 | *_archive
 19 | 
 20 | # flymake-mode
 21 | *_flymake.*
 22 | 
 23 | # eshell files
 24 | /eshell/history
 25 | /eshell/lastdir
 26 | 
 27 | # elpa packages
 28 | /elpa/
 29 | 
 30 | # reftex files
 31 | *.rel
 32 | 
 33 | # AUCTeX auto folder
 34 | /auto/
 35 | 
 36 | # cask packages
 37 | .cask/
 38 | dist/
 39 | 
 40 | # Flycheck
 41 | flycheck_*.el
 42 | 
 43 | # server auth directory
 44 | /server/
 45 | 
 46 | # projectiles files
 47 | .projectile
 48 | 
 49 | # directory configuration
 50 | .dir-locals.el
 51 | 
 52 | # network security
 53 | /network-security.data
 54 | 
 55 | 
 56 | ### Git ###
 57 | # Created by git for backups. To disable backups in Git:
 58 | # $ git config --global mergetool.keepBackup false
 59 | *.orig
 60 | 
 61 | # Created by git when using merge tools for conflicts
 62 | *.BACKUP.*
 63 | *.BASE.*
 64 | *.LOCAL.*
 65 | *.REMOTE.*
 66 | *_BACKUP_*.txt
 67 | *_BASE_*.txt
 68 | *_LOCAL_*.txt
 69 | *_REMOTE_*.txt
 70 | 
 71 | ### JetBrains+all ###
 72 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
 73 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 74 | 
 75 | # User-specific stuff
 76 | .idea/**/workspace.xml
 77 | .idea/**/tasks.xml
 78 | .idea/**/usage.statistics.xml
 79 | .idea/**/dictionaries
 80 | .idea/**/shelf
 81 | 
 82 | # Generated files
 83 | .idea/**/contentModel.xml
 84 | 
 85 | # Sensitive or high-churn files
 86 | .idea/**/dataSources/
 87 | .idea/**/dataSources.ids
 88 | .idea/**/dataSources.local.xml
 89 | .idea/**/sqlDataSources.xml
 90 | .idea/**/dynamic.xml
 91 | .idea/**/uiDesigner.xml
 92 | .idea/**/dbnavigator.xml
 93 | 
 94 | # Gradle
 95 | .idea/**/gradle.xml
 96 | .idea/**/libraries
 97 | 
 98 | # Gradle and Maven with auto-import
 99 | # When using Gradle or Maven with auto-import, you should exclude module files,
100 | # since they will be recreated, and may cause churn.  Uncomment if using
101 | # auto-import.
102 | # .idea/modules.xml
103 | # .idea/*.iml
104 | # .idea/modules
105 | # *.iml
106 | # *.ipr
107 | 
108 | # CMake
109 | cmake-build-*/
110 | 
111 | # Mongo Explorer plugin
112 | .idea/**/mongoSettings.xml
113 | 
114 | # File-based project format
115 | *.iws
116 | 
117 | # IntelliJ
118 | out/
119 | 
120 | # mpeltonen/sbt-idea plugin
121 | .idea_modules/
122 | 
123 | # JIRA plugin
124 | atlassian-ide-plugin.xml
125 | 
126 | # Cursive Clojure plugin
127 | .idea/replstate.xml
128 | 
129 | # Crashlytics plugin (for Android Studio and IntelliJ)
130 | com_crashlytics_export_strings.xml
131 | crashlytics.properties
132 | crashlytics-build.properties
133 | fabric.properties
134 | 
135 | # Editor-based Rest Client
136 | .idea/httpRequests
137 | 
138 | # Android studio 3.1+ serialized cache file
139 | .idea/caches/build_file_checksums.ser
140 | 
141 | ### JetBrains+all Patch ###
142 | # Ignores the whole .idea folder and all .iml files
143 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
144 | 
145 | .idea/
146 | 
147 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
148 | 
149 | *.iml
150 | modules.xml
151 | .idea/misc.xml
152 | *.ipr
153 | 
154 | # Sonarlint plugin
155 | .idea/sonarlint
156 | 
157 | ### Kotlin ###
158 | # Compiled class file
159 | *.class
160 | 
161 | # Log file
162 | *.log
163 | 
164 | # BlueJ files
165 | *.ctxt
166 | 
167 | # Mobile Tools for Java (J2ME)
168 | .mtj.tmp/
169 | 
170 | # Package Files #
171 | *.jar
172 | *.war
173 | *.nar
174 | *.ear
175 | *.zip
176 | *.tar.gz
177 | *.rar
178 | 
179 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
180 | hs_err_pid*
181 | 
182 | ### Maven ###
183 | target/
184 | pom*.xml.tag
185 | pom*.xml.bak
186 | pom*.xml.releaseBackup
187 | pom*.xml.versionsBackup
188 | pom*.xml.next
189 | release.properties
190 | dependency-reduced-pom.xml
191 | buildNumber.properties
192 | .mvn/timing.properties
193 | .mvn/wrapper/maven-wrapper.jar
194 | .flattened-pom.xml
195 | 
196 | ### Spark ###
197 | *#*#
198 | *.#*
199 | *.pyc
200 | *.pyo
201 | *.swp
202 | .DS_Store
203 | .cache
204 | .classpath
205 | .ensime
206 | .ensime_cache/
207 | .ensime_lucene
208 | .generated-mima*
209 | .project
210 | .pydevproject
211 | .scala_dependencies
212 | .settings
213 | /lib/
214 | R-unit-tests.log
215 | R/unit-tests.out
216 | R/cran-check.out
217 | R/pkg/vignettes/sparkr-vignettes.html
218 | R/pkg/tests/fulltests/Rplots.pdf
219 | build/*.jar
220 | build/apache-maven*
221 | build/scala*
222 | build/zinc*
223 | cache
224 | checkpoint
225 | conf/*.cmd
226 | conf/*.conf
227 | conf/*.properties
228 | conf/*.sh
229 | conf/*.xml
230 | conf/java-opts
231 | conf/slaves
232 | derby.log
233 | dev/create-release/*final
234 | dev/create-release/*txt
235 | dev/pr-deps/
236 | docs/_site
237 | docs/api
238 | sql/docs
239 | sql/site
240 | lib_managed/
241 | lint-r-report.log
242 | log/
243 | logs/
244 | project/boot/
245 | project/build/target/
246 | project/plugins/lib_managed/
247 | project/plugins/project/build.properties
248 | project/plugins/src_managed/
249 | project/plugins/target/
250 | python/lib/pyspark.zip
251 | python/deps
252 | python/test_coverage/coverage_data
253 | python/test_coverage/htmlcov
254 | python/pyspark/python
255 | reports/
256 | scalastyle-on-compile.generated.xml
257 | scalastyle-output.xml
258 | scalastyle.txt
259 | spark-*-bin-*.tgz
260 | spark-tests.log
261 | src_managed/
262 | streaming-tests.log
263 | unit-tests.log
264 | work/
265 | docs/.jekyll-metadata
266 | 
267 | # For Hive
268 | TempStatsStore/
269 | metastore/
270 | metastore_db/
271 | sql/hive-thriftserver/test_warehouses
272 | warehouse/
273 | spark-warehouse/
274 | 
275 | # For R session data
276 | .RData
277 | .RHistory
278 | .Rhistory
279 | *.Rproj
280 | *.Rproj.*
281 | 
282 | .Rproj.user
283 | 
284 | # For SBT
285 | .jvmopts
286 | 
287 | 
288 | ### Vim ###
289 | # Swap
290 | [._]*.s[a-v][a-z]
291 | [._]*.sw[a-p]
292 | [._]s[a-rt-v][a-z]
293 | [._]ss[a-gi-z]
294 | [._]sw[a-p]
295 | 
296 | # Session
297 | Session.vim
298 | Sessionx.vim
299 | 
300 | # Temporary
301 | .netrwhist
302 | # Auto-generated tag files
303 | tags
304 | # Persistent undo
305 | [._]*.un~
306 | 
307 | ### VisualStudioCode ###
308 | .vscode/*
309 | !.vscode/settings.json
310 | !.vscode/tasks.json
311 | !.vscode/launch.json
312 | !.vscode/extensions.json
313 | 
314 | ### VisualStudioCode Patch ###
315 | # Ignore all local history of files
316 | .history
317 | 
318 | # End of https://www.gitignore.io/api/git,vim,maven,emacs,spark,kotlin,jetbrains+all,visualstudiocode
319 | 
320 | 
321 | # Created by https://www.gitignore.io/api/scala,gradle,kotlin
322 | # Edit at https://www.gitignore.io/?templates=scala,gradle,kotlin
323 | 
324 | ### Kotlin ###
325 | # Compiled class file
326 | *.class
327 | 
328 | # Log file
329 | *.log
330 | 
331 | # BlueJ files
332 | *.ctxt
333 | 
334 | # Mobile Tools for Java (J2ME)
335 | .mtj.tmp/
336 | 
337 | # Package Files #
338 | *.jar
339 | *.war
340 | *.nar
341 | *.ear
342 | *.zip
343 | *.tar.gz
344 | *.rar
345 | 
346 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
347 | hs_err_pid*
348 | 
349 | ### Scala ###
350 | *.metals
351 | 
352 | ### Gradle ###
353 | .gradle
354 | build/
355 | 
356 | # Ignore Gradle GUI config
357 | gradle-app.setting
358 | 
359 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
360 | !gradle-wrapper.jar
361 | 
362 | # Cache of project
363 | .gradletasknamecache
364 | 
365 | # # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
366 | # gradle/wrapper/gradle-wrapper.properties
367 | 
368 | ### Gradle Patch ###
369 | **/build/
370 | 
371 | # End of https://www.gitignore.io/api/scala,gradle,kotlin
372 | 
373 | csvpath/
374 | orcpath/
375 | 
376 | .env
377 | **/.allure/
378 | **/allure-results/
379 | /generated_*
380 | 


--------------------------------------------------------------------------------
/.grenrc.yml:
--------------------------------------------------------------------------------
1 | ---
2 |   dataSource: "commits"
3 |   prefix: ""
4 |   includeMessages: "commits"
5 |   changelogFilename: "CHANGELOG.md"
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | jdk:
3 |   openjdk8
4 | cache:
5 |   directories:
6 |     - $HOME/.m2
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 0.3.1 (05/08/2020)
 4 | - [chore: 🤖 Version bump to 0.3.1](https://github.com/JetBrains/kotlin-spark-api/commit/88d4e31cd6d76fe1fc0b9c10a078658aa6178e36) - @asm0dey
 5 | - [test: 💍 Adds integration and more unit tests](https://github.com/JetBrains/kotlin-spark-api/commit/bca98f77b743c9456882f49702f9cea55d153320) - @asm0dey
 6 | - [fix: 🐛 Fixes incorrect order of fields in data class](https://github.com/JetBrains/kotlin-spark-api/commit/56bc82f9ff0de6948386e8c4a330ba715285ad55) - @asm0dey
 7 | - [fix: 🐛 Fixes incorrect handling of primitive types](https://github.com/JetBrains/kotlin-spark-api/commit/868e47919d41309e1ebebc327acdd53e3df094ea) - @asm0dey
 8 | - [Bump klaxon from 5.2 to 5.3](https://github.com/JetBrains/kotlin-spark-api/commit/edfd06d4895ded7772a42d7e13e2eb2aed8031bf) - @dependabot[bot]
 9 | - [Updated README.md (#32)](https://github.com/JetBrains/kotlin-spark-api/commit/c2cc6fe7e7a4d5e0d501ab0a75e371c6cc041d77) - @MKhalusova
10 | - [Bump kotest.version from 4.1.1 to 4.1.3](https://github.com/JetBrains/kotlin-spark-api/commit/a1ca5fe8c11c7aeeb70a8335c2252549e1cc7f81) - @dependabot[bot]
11 | 
12 | ---
13 | 
14 | ## 0.3.0 (08/07/2020)
15 | - [Version bump to 0.3.0](https://github.com/JetBrains/kotlin-spark-api/commit/0d5bcf57575d8906a219b4143a67df8939c46b0c) - @asm0dey
16 | - [Inproves README — adds information on collection of data from Dataset](https://github.com/JetBrains/kotlin-spark-api/commit/d81b98622ac816c9224f980d92407249333cf6d0) - @asm0dey
17 | - [Fixes #27](https://github.com/JetBrains/kotlin-spark-api/commit/5c05b6f02e30289535fe5f2d45dc99ede3c1eff3) - @asm0dey
18 | - [Fixes #26](https://github.com/JetBrains/kotlin-spark-api/commit/ffdb41d418b53e9336b73d51f3e5237d09c06ef2) - @asm0dey
19 | - [Bump maven-site-plugin from 3.9.0 to 3.9.1](https://github.com/JetBrains/kotlin-spark-api/commit/ee5a4ea1512d756e19549bc50f5c73ecf61108d2) - @dependabot[bot]
20 | - [Create CODE_OF_CONDUCT.md](https://github.com/JetBrains/kotlin-spark-api/commit/96dcfbbf8882de0e8446db3c4485e9febccaf8c5) - @asm0dey
21 | - [Fixes changelog](https://github.com/JetBrains/kotlin-spark-api/commit/1121550089cbec91b8ac915260a9d3593bb0138b) - @asm0dey
22 | 
23 | ---
24 | 
25 | ## 0.2.3 (23/06/2020)
26 | - [Updates version to 0.2.3](https://github.com/JetBrains/kotlin-spark-api/commit/91ee4faf392792642be5a8c58800b343df02da5b) - @asm0dey
27 | - [Fixes #21](https://github.com/JetBrains/kotlin-spark-api/commit/e8c1c5973087b3dd3f755d9d408893d3d2f19c94) - @asm0dey
28 | - [Updates kotest to 4.1.0 and moves it to property](https://github.com/JetBrains/kotlin-spark-api/commit/c26ad2e514421c4a1e8eaa10a76c035d8c0a0f11) - @asm0dey
29 | - [Fixes #20](https://github.com/JetBrains/kotlin-spark-api/commit/0b1bd9875cbb9ea85f1ccb66250d434ba5384c06) - @asm0dey
30 | - [Fixes #16](https://github.com/JetBrains/kotlin-spark-api/commit/875709459df946542bd133c2a3164deda5909fbc) - @asm0dey
31 | - [Bump kotest-assertions-core-jvm from 4.0.6 to 4.1.0](https://github.com/JetBrains/kotlin-spark-api/commit/b072f8fc2b4b30d40c8fee08f598941c896175bd) - @dependabot[bot]
32 | 
33 | ---
34 | 
35 | ## Fixes for #16 and #17 (22/06/2020)
36 | 
37 | ---
38 | 
39 | ## Fixes #15 (22/06/2020)
40 | 
41 | ---
42 | 
43 | ## Update to Spark 3.0.0 release (18/06/2020)
44 | 
45 | ---
46 | 
47 | ## 0.1.0 (01/06/2020)
48 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # JetBrains Open Source and Community Code of Conduct
 2 | 
 3 | This code of conduct outlines our expectations for all those who participate in our open source projects and communities (community programs), as well as the consequences for unacceptable behaviour. We invite all those who participate to help us create safe and positive experiences for everyone. Communities mirror the societies in which they exist and positive action is essential to counteract the many forms of inequality and abuses of power that exist in society. 
 4 | 
 5 | ## How to behave
 6 | 
 7 | The following behaviours are expected and requested of all community members:
 8 | 
 9 | * Participate in an authentic and active way. In doing so, you contribute to the health and longevity of this community.
10 | * Exercise consideration, respect and empathy in your speech and actions. Remember, we have all been through different stages of learning when adopting technologies.
11 | * Refrain from demeaning, discriminatory, or harassing behaviour and speech.
12 | * Disagreements on things are fine, argumentative behaviour or trolling are not.
13 | 
14 | ## How not to behave
15 | 
16 | * Do not perform threats of violence or use violent language directed against another person.
17 | * Do not make jokes of sexist, racist, homophobic, transphobic, ableist or otherwise discriminatory nature, or use language of this nature.
18 | * Do not post or display sexually explicit or violent material.
19 | * Do not post or threaten to post other people’s personally identifying information ("doxing").
20 | * Do not make personal insults, particularly those related to gender, sexual orientation, race, religion, or disability.
21 | * Do not engage in sexual attention. This includes, sexualised comments or jokes and sexual advances.
22 | * Do not advocate for, or encourage, any of the above behaviour.
23 | 
24 | 
25 | Please take into account that online communities bring together people from many different cultures and backgrounds. It's important to understand that sometimes the combination of cultural differences and online interaction can lead to misunderstandings. That is why having empathy is very important.
26 | 
27 | ## How to report issues
28 | 
29 | If someone is acting inappropriately or violating this Code of Conduct in any shape or form, and they are not receptive to your feedback or you prefer not to confront them, please reach out to JetBrains via codeofconduct@jetbrains.com 
30 | 
31 | ## Consequences of Unacceptable Behaviour
32 | 
33 | Unacceptable behaviour from any community member will not be tolerated. Anyone asked to stop unacceptable behaviour is expected to comply immediately. If a community member engages in unacceptable behaviour, JetBrains and/or community organisers may take any action they deem appropriate, up to and including a temporary ban or permanent expulsion from the community without warning.
34 | 
35 | ## License and attribution
36 | 
37 | The license is based off of The Citizen Code of Conduct is distributed by [Stumptown Syndicate](http://stumptownsyndicate.org/) under a [Creative Commons Attribution-ShareAlike license](http://creativecommons.org/licenses/by-sa/3.0/).
38 | 


--------------------------------------------------------------------------------
/build.gradle.kts:
--------------------------------------------------------------------------------
  1 | @file:Suppress("UnstableApiUsage")
  2 | 
  3 | buildscript {
  4 |     repositories {
  5 |         mavenCentral()
  6 |     }
  7 |     dependencies {
  8 |         classpath(jcp)
  9 |         classpath(mavenPublish)
 10 |     }
 11 | }
 12 | 
 13 | 
 14 | plugins {
 15 |     mavenPublish version Versions.mavenPublish
 16 |     dokka version Versions.dokka
 17 |     idea
 18 |     kotlin version Versions.kotlin apply false
 19 | }
 20 | 
 21 | group = Versions.groupID
 22 | version = Versions.project
 23 | 
 24 | tasks.withType<Test>().configureEach {
 25 |     useJUnitPlatform()
 26 | }
 27 | 
 28 | repositories {
 29 |     mavenCentral()
 30 | }
 31 | 
 32 | allprojects {
 33 |     plugins.withId(mavenPublishBase) {
 34 |         group = Versions.groupID
 35 |         version = Versions.project
 36 | 
 37 |         publishing {
 38 |             repositories {
 39 |                 maven {
 40 |                     name = "GitHubPackages"
 41 |                     url = uri("https://maven.pkg.github.com/Kotlin/kotlin-spark-api")
 42 |                     credentials {
 43 |                         username = project.findProperty("gpr.user") as String?
 44 |                             ?: System.getenv("GITHUB_ACTOR")
 45 |                         password = project.findProperty("gpr.key") as String?
 46 |                             ?: System.getenv("GITHUB_TOKEN")
 47 |                     }
 48 |                 }
 49 |             }
 50 |         }
 51 | 
 52 |         mavenPublishing {
 53 |             pomFromGradleProperties()
 54 |             publishToMavenCentral()
 55 |             // The username and password for Sonatype OSS can be provided as Gradle properties
 56 |             // called mavenCentralUsername and mavenCentralPassword to avoid having to commit them.
 57 |             // You can also supply them as environment variables called
 58 |             // ORG_GRADLE_PROJECT_mavenCentralUsername and
 59 |             // ORG_GRADLE_PROJECT_mavenCentralPassword.
 60 | 
 61 |             // also ORG_GRADLE_PROJECT_signingInMemoryKey=exported_ascii_armored_key
 62 |             // # optional
 63 |             // ORG_GRADLE_PROJECT_signingInMemoryKeyId=24875D73
 64 |             // # if key was created with a password
 65 |             // ORG_GRADLE_PROJECT_signingInMemoryKeyPassword=secret
 66 | 
 67 |             signAllPublications()
 68 |             pom {
 69 |                 name.set("Kotlin Spark API")
 70 |                 description.set("Kotlin for Apache Spark")
 71 |                 packaging = "pom"
 72 | 
 73 |                 url.set("https://maven.apache.org")
 74 |                 inceptionYear.set("2019")
 75 | 
 76 |                 organization {
 77 |                     name.set("JetBrains")
 78 |                     url.set("https://www.jetbrains.com/")
 79 |                 }
 80 | 
 81 |                 licenses {
 82 |                     license {
 83 |                         name.set("Apache License, Version 2.0")
 84 |                         url.set("https://www.apache.org/licenses/LICENSE-2.0.txt")
 85 |                     }
 86 |                 }
 87 | 
 88 |                 developers {
 89 |                     developer {
 90 |                         id.set("asm0dey")
 91 |                         name.set("Pasha Finkelshteyn")
 92 |                         email.set("asm0dey@jetbrains.com")
 93 |                         timezone.set("GMT+3")
 94 |                     }
 95 |                     developer {
 96 |                         id.set("vitaly.khudobakhshov")
 97 |                         name.set("Vitaly Khudobakhshov")
 98 |                         email.set("vitaly.khudobakhshov@jetbrains.com")
 99 |                         timezone.set("GMT+3")
100 |                     }
101 |                     developer {
102 |                         id.set("Jolanrensen")
103 |                         name.set("Jolan Rensen")
104 |                         email.set("jolan.rensen@jetbrains.com")
105 |                         timezone.set("GMT+1")
106 |                     }
107 |                 }
108 | 
109 |                 scm {
110 |                     connection.set("scm:git:https://github.com/Kotlin/kotlin-spark-api.git")
111 |                     url.set("https://github.com/Kotlin/kotlin-spark-api")
112 |                     tag.set("HEAD")
113 |                 }
114 |             }
115 |         }
116 |     }
117 | }


--------------------------------------------------------------------------------
/buildSrc/build.gradle.kts:
--------------------------------------------------------------------------------
 1 | import org.gradle.kotlin.dsl.`kotlin-dsl`
 2 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
 3 | 
 4 | plugins {
 5 |     `kotlin-dsl`
 6 | }
 7 | 
 8 | repositories {
 9 |     mavenCentral()
10 | }
11 | 


--------------------------------------------------------------------------------
/buildSrc/src/main/kotlin/Dependencies.kt:
--------------------------------------------------------------------------------
 1 | object Dependencies {
 2 |     inline val kotlinStdLib get() = "org.jetbrains.kotlin:kotlin-stdlib-jdk8:${Versions.kotlin}"
 3 |     inline val reflect get() = "org.jetbrains.kotlin:kotlin-reflect:${Versions.kotlin}"
 4 |     inline val scalaLibrary get() = "org.scala-lang:scala-library:${Versions.scala}"
 5 |     inline val kotlinxHtml get() = "org.jetbrains.kotlinx:kotlinx-html-jvm:${Versions.kotlinxHtml}"
 6 |     inline val sparkSql get() = "org.apache.spark:spark-sql_${Versions.scalaCompat}:${Versions.spark}"
 7 |     inline val sparkMl get() = "org.apache.spark:spark-mllib_${Versions.scalaCompat}:${Versions.spark}"
 8 |     inline val sparkStreaming get() = "org.apache.spark:spark-streaming_${Versions.scalaCompat}:${Versions.spark}"
 9 |     inline val hadoopClient get() = "org.apache.hadoop:hadoop-client:${Versions.hadoop}"
10 |     inline val sparkRepl get() = "org.apache.spark:spark-repl_${Versions.scalaCompat}:${Versions.spark}"
11 |     inline val jupyter get() = "org.jetbrains.kotlinx:kotlin-jupyter-api:${Versions.jupyter}"
12 |     inline val junit get() = "org.junit.jupiter:junit-jupiter-engine:5.8.1"
13 |     inline val sparkStreamingKafka get() = "org.apache.spark:spark-streaming-kafka-0-10_${Versions.scalaCompat}:${Versions.spark}"
14 |     inline val kotest get() = "io.kotest:kotest-runner-junit5:${Versions.kotest}"
15 |     inline val kotestTestcontainers get() = "io.kotest.extensions:kotest-extensions-testcontainers:${Versions.kotestTestContainers}"
16 |     inline val klaxon get() = "com.beust:klaxon:${Versions.klaxon}"
17 |     inline val atrium get() = "ch.tutteli.atrium:atrium-fluent-en_GB:${Versions.atrium}"
18 |     inline val kafkaStreamsTestUtils get() = "org.apache.kafka:kafka-streams-test-utils:${Versions.kafkaStreamsTestUtils}"
19 |     inline val jupyterTest get() = "org.jetbrains.kotlinx:kotlin-jupyter-test-kit:${Versions.jupyter}"
20 |     inline val kotlinTest get() = "org.jetbrains.kotlin:kotlin-test:${Versions.kotlin}"
21 |     inline val kotlinScriptingCommon get() = "org.jetbrains.kotlin:kotlin-scripting-common"
22 |     inline val kotlinScriptingJvm get() = "org.jetbrains.kotlin:kotlin-scripting-jvm"
23 |     inline val jacksonDatabind get() = "com.fasterxml.jackson.core:jackson-databind:${Versions.jacksonDatabind}"
24 | }
25 | 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/buildSrc/src/main/kotlin/Helpers.kt:
--------------------------------------------------------------------------------
 1 | import org.gradle.api.artifacts.Dependency
 2 | import org.gradle.api.artifacts.ProjectDependency
 3 | import org.gradle.api.artifacts.dsl.DependencyHandler
 4 | 
 5 | fun DependencyHandler.testApi(vararg dependencyNotations: Any): List<Dependency?> =
 6 |     dependencyNotations.map {
 7 |         add("testApi", it)
 8 |     }
 9 | 
10 | fun DependencyHandler.api(vararg dependencyNotations: Any): List<Dependency?> =
11 |     dependencyNotations.map {
12 |         add("api", it)
13 |     }
14 | 
15 | 
16 | fun DependencyHandler.testImplementation(vararg dependencyNotations: Any): List<Dependency?> =
17 |     dependencyNotations.map {
18 |         add("testImplementation", it)
19 |     }
20 | 
21 | fun DependencyHandler.implementation(vararg dependencyNotations: Any): List<Dependency?> =
22 |     dependencyNotations.map {
23 |         add("implementation", it)
24 |     }
25 | 
26 | fun DependencyHandler.runtimeOnly(vararg dependencyNotations: Any): List<Dependency?> =
27 |     dependencyNotations.map {
28 |         add("runtimeOnly", it)
29 |     }
30 | 
31 | fun DependencyHandler.project(
32 |     path: String,
33 |     configuration: String? = null
34 | ): ProjectDependency = project(
35 |     if (configuration != null) mapOf("path" to path, "configuration" to configuration)
36 |     else mapOf("path" to path)
37 | ) as ProjectDependency
38 | 


--------------------------------------------------------------------------------
/buildSrc/src/main/kotlin/Plugins.kt:
--------------------------------------------------------------------------------
 1 | import org.gradle.api.Project
 2 | import org.gradle.kotlin.dsl.*
 3 | import org.gradle.plugin.use.PluginDependenciesSpec
 4 | 
 5 | 
 6 | inline val PluginDependenciesSpec.kotlin
 7 |     get() = kotlin("jvm")
 8 | 
 9 | inline val PluginDependenciesSpec.dokka
10 |     get() = id("org.jetbrains.dokka")
11 | 
12 | inline val PluginDependenciesSpec.license
13 |     get() = id("com.github.hierynomus.license") version Versions.licenseGradlePluginVersion
14 | 
15 | inline val PluginDependenciesSpec.jcp
16 |     get() = id("com.igormaznitsa.jcp")
17 | 
18 | inline val DependencyHandlerScope.jcp
19 |     get() = "com.igormaznitsa:jcp:${Versions.jcp}"
20 | 
21 | inline val DependencyHandlerScope.mavenPublish
22 |     get() = "com.vanniktech:gradle-maven-publish-plugin:${Versions.mavenPublish}"
23 | 
24 | inline val PluginDependenciesSpec.mavenPublish
25 |     get() = id("com.vanniktech.maven.publish")
26 | 
27 | inline val PluginDependenciesSpec.mavenPublishBase
28 |     get() = id("com.vanniktech.maven.publish.base")
29 | 
30 | inline val Project.mavenPublishBase
31 |     get() = "com.vanniktech.maven.publish.base"
32 | 
33 | inline val PluginDependenciesSpec.jupyter
34 |     get() = kotlin("jupyter.api") version Versions.jupyter
35 | 
36 | 


--------------------------------------------------------------------------------
/buildSrc/src/main/kotlin/Projects.kt:
--------------------------------------------------------------------------------
 1 | @file:Suppress("NOTHING_TO_INLINE")
 2 | 
 3 | import org.gradle.api.Project
 4 | import org.gradle.api.artifacts.dsl.DependencyHandler
 5 | import org.gradle.kotlin.dsl.support.delegates.ProjectDelegate
 6 | 
 7 | object Projects {
 8 | 
 9 |     inline fun Project.searchProject(name: String): Project =
10 |         rootProject
11 |             .childProjects
12 |             .filterKeys { name in it }
13 |             .entries
14 |             .singleOrNull()
15 |             ?.value ?: error("Project $name not found")
16 | 
17 |     inline val Project.kotlinSparkApi
18 |         get() = searchProject("kotlin-spark-api")
19 | 
20 |     inline val Project.core
21 |         get() = searchProject("core")
22 | 
23 |     inline val Project.examples
24 |         get() = searchProject("examples")
25 | 
26 |     inline val Project.jupyter
27 |         get() = searchProject("jupyter")
28 | 
29 |     inline val Project.scalaTuplesInKotlin
30 |         get() = searchProject("scala-tuples-in-kotlin")
31 | }


--------------------------------------------------------------------------------
/buildSrc/src/main/kotlin/Versions.kt:
--------------------------------------------------------------------------------
 1 | object Versions {
 2 |     const val project = "1.2.4"
 3 |     const val groupID = "org.jetbrains.kotlinx.spark"
 4 |     const val kotlin = "1.8.20"
 5 |     const val jvmTarget = "8"
 6 |     const val jupyterJvmTarget = "8"
 7 | 
 8 |     inline val spark get() = System.getProperty("spark") as String
 9 |     inline val scala get() = System.getProperty("scala") as String
10 |     inline val sparkMinor get() = spark.substringBeforeLast('.')
11 |     inline val scalaCompat get() = scala.substringBeforeLast('.')
12 | 
13 |     const val jupyter = "0.12.0-32-1"
14 |     const val kotest = "5.5.4"
15 |     const val kotestTestContainers = "1.3.3"
16 |     const val dokka = "1.8.20"
17 |     const val jcp = "7.0.5"
18 |     const val mavenPublish = "0.20.0"
19 |     const val atrium = "0.17.0"
20 |     const val licenseGradlePluginVersion = "0.15.0"
21 |     const val kafkaStreamsTestUtils = "3.1.0"
22 |     const val hadoop = "3.3.6"
23 |     const val kotlinxHtml = "0.7.5"
24 |     const val klaxon = "5.5"
25 |     const val jacksonDatabind = "2.13.4.2"
26 | 
27 |     inline val versionMap
28 |         get() = mapOf(
29 |             "kotlin" to kotlin,
30 |             "scala" to scala,
31 |             "scalaCompat" to scalaCompat,
32 |             "spark" to spark,
33 |             "sparkMinor" to sparkMinor,
34 |             "version" to project,
35 |         )
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/core/build.gradle.kts:
--------------------------------------------------------------------------------
  1 | @file:Suppress("UnstableApiUsage", "NOTHING_TO_INLINE")
  2 | 
  3 | import com.igormaznitsa.jcp.gradle.JcpTask
  4 | import com.vanniktech.maven.publish.JavaLibrary
  5 | import com.vanniktech.maven.publish.JavadocJar.Javadoc
  6 | 
  7 | plugins {
  8 |     scala
  9 |     `java-library`
 10 |     jcp
 11 |     mavenPublishBase
 12 | }
 13 | 
 14 | group = Versions.groupID
 15 | version = Versions.project
 16 | 
 17 | repositories {
 18 |     mavenCentral()
 19 | }
 20 | 
 21 | dependencies {
 22 | 
 23 |     with(Dependencies) {
 24 |         api(
 25 |             scalaLibrary,
 26 |             reflect,
 27 |         )
 28 | 
 29 |         // https://github.com/FasterXML/jackson-bom/issues/52
 30 |         if (Versions.spark == "3.3.1") implementation(jacksonDatabind)
 31 | 
 32 |         implementation(
 33 |             sparkSql,
 34 |         )
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | java {
 40 |     toolchain {
 41 |         if (Versions.scalaCompat.toDouble() > 2.12) { // scala 2.12 will always target java 8
 42 |             languageVersion.set(
 43 |                 JavaLanguageVersion.of(Versions.jvmTarget)
 44 |             )
 45 |         } else if (Versions.jvmTarget == "1.8" || Versions.jvmTarget == "8") {
 46 |             languageVersion.set(
 47 |                 JavaLanguageVersion.of(8)
 48 |             )
 49 |         }
 50 |     }
 51 | }
 52 | 
 53 | tasks.withType<ScalaCompile> {
 54 |     if (Versions.scalaCompat.toDouble() > 2.12) { // scala 2.12 will always target java 8
 55 |         targetCompatibility = Versions.jvmTarget
 56 |     } else if (Versions.jvmTarget == "1.8" || Versions.jvmTarget == "8") {
 57 |         targetCompatibility = "1.8"
 58 |     }
 59 | }
 60 | 
 61 | val scalaMainSources = sourceSets.main.get().scala.sourceDirectories
 62 | 
 63 | val preprocessMain by tasks.creating(JcpTask::class)  {
 64 |     sources.set(scalaMainSources)
 65 |     clearTarget.set(true)
 66 |     fileExtensions.set(listOf("scala"))
 67 |     vars.set(Versions.versionMap)
 68 |     outputs.upToDateWhen { target.get().exists() }
 69 | }
 70 | 
 71 | tasks.compileScala {
 72 |     dependsOn(preprocessMain)
 73 |     outputs.upToDateWhen {
 74 |         preprocessMain.outcomingFiles.files.isEmpty()
 75 |     }
 76 | 
 77 |     doFirst {
 78 |         scala {
 79 |             sourceSets {
 80 |                 main {
 81 |                     scala.setSrcDirs(listOf(preprocessMain.target.get()))
 82 |                 }
 83 |             }
 84 |         }
 85 |     }
 86 | 
 87 |     doLast {
 88 |         scala {
 89 |             sourceSets {
 90 |                 main {
 91 |                     scala.setSrcDirs(scalaMainSources)
 92 |                 }
 93 |             }
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | mavenPublishing {
 99 |     configure(JavaLibrary(Javadoc()))
100 | }
101 | 
102 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/jetbrains/kotlinx/spark/extensions/DemoCaseClass.scala:
--------------------------------------------------------------------------------
1 | package org.jetbrains.kotlinx.spark.extensions
2 | 
3 | case class DemoCaseClass[T](a: Int, b: T)
4 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.extensions
21 | 
22 | import org.apache.spark.SparkContext
23 | import org.apache.spark.sql._
24 | import java.util
25 | import scala.reflect.ClassTag
26 | 
27 | object KSparkExtensions {
28 | 
29 |   val kotlinVersion = /*$"\""+kotlin+"\""$*/ /*-*/ ""
30 |   val scalaVersion = /*$"\""+scala+"\""$*/ /*-*/ ""
31 |   val scalaCompatVersion = /*$"\""+scalaCompat+"\""$*/ /*-*/ ""
32 |   val sparkVersion = /*$"\""+spark+"\""$*/ /*-*/ ""
33 |   val sparkMinorVersion = /*$"\""+sparkMinor+"\""$*/ /*-*/ ""
34 | 
35 |   def col(d: Dataset[_], name: String): Column = d.col(name)
36 | 
37 |   def col(name: String): Column = functions.col(name)
38 | 
39 |   def lit(literal: Any): Column = functions.lit(literal)
40 | 
41 |   def collectAsList[T](ds: Dataset[T]): util.List[T] = {
42 |     //#if scalaCompat >= 2.13
43 |     scala.jdk.javaapi.CollectionConverters.asJava(ds.collect())
44 |     //#else
45 |     //$scala.collection.JavaConverters.seqAsJavaList(ds.collect())
46 |     //#endif
47 |   }
48 | 
49 | 
50 |   def debugCodegen(df: Dataset[_]): Unit = {
51 |     import org.apache.spark.sql.execution.debug._
52 |     df.debugCodegen()
53 |   }
54 | 
55 |   def debug(df: Dataset[_]): Unit = {
56 |     import org.apache.spark.sql.execution.debug._
57 |     df.debug()
58 |   }
59 | 
60 |   def sparkContext(s: SparkSession): SparkContext = s.sparkContext
61 | 
62 |   /**
63 |    * Produces a ClassTag[T], which is actually just a casted ClassTag[AnyRef].
64 |    *
65 |    * This method is used to keep ClassTags out of the external Java API, as the Java compiler
66 |    * cannot produce them automatically. While this ClassTag-faking does please the compiler,
67 |    * it can cause problems at runtime if the Scala API relies on ClassTags for correctness.
68 |    *
69 |    * Often, though, a ClassTag[AnyRef] will not lead to incorrect behavior, just worse performance
70 |    * or security issues. For instance, an Array[AnyRef] can hold any type T, but may lose primitive
71 |    * specialization.
72 |    */
73 |   def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
74 | }
75 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/jetbrains/kotlinx/spark/extensions/VarargUnwrapper.scala:
--------------------------------------------------------------------------------
 1 | package org.jetbrains.kotlinx.spark.extensions
 2 | 
 3 | import org.apache.spark.sql.api.java.{UDF1, UDF2}
 4 | 
 5 | /**
 6 |  * Allows any simple vararg function reference to be treated as 23 different Scala functions.
 7 |  * Used to make vararg UDFs for `ScalaUDF`.
 8 |  *
 9 |  * @param varargFunc
10 |  * @param newArray
11 |  * @tparam T
12 |  * @tparam Array
13 |  * @tparam R
14 |  */
15 | class VarargUnwrapper[T, Array, R](
16 |     val varargFunc: UDF1[Array, R],
17 |     val newArray: UDF2[Integer, UDF1[Integer, T], Array],
18 | ) extends Serializable
19 |   with Function0[R]
20 |   with Function1[T, R]
21 |   with Function2[T, T, R]
22 |   with Function3[T, T, T, R]
23 |   with Function4[T, T, T, T, R]
24 |   with Function5[T, T, T, T, T, R]
25 |   with Function6[T, T, T, T, T, T, R]
26 |   with Function7[T, T, T, T, T, T, T, R]
27 |   with Function8[T, T, T, T, T, T, T, T, R]
28 |   with Function9[T, T, T, T, T, T, T, T, T, R]
29 |   with Function10[T, T, T, T, T, T, T, T, T, T, R]
30 |   with Function11[T, T, T, T, T, T, T, T, T, T, T, R]
31 |   with Function12[T, T, T, T, T, T, T, T, T, T, T, T, R]
32 |   with Function13[T, T, T, T, T, T, T, T, T, T, T, T, T, R]
33 |   with Function14[T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
34 |   with Function15[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
35 |   with Function16[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
36 |   with Function17[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
37 |   with Function18[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
38 |   with Function19[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
39 |   with Function20[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
40 |   with Function21[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R]
41 |   with Function22[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] {
42 | 
43 |   private def vararg(t: T*): R = varargFunc.call(newArray.call(t.size, { t(_) }))
44 | 
45 |   override def curried: Nothing = throw new UnsupportedOperationException()
46 |   override def tupled: Nothing = throw new UnsupportedOperationException()
47 | 
48 |   override def apply(): R = vararg()
49 | 
50 |   override def apply(v0: T): R = vararg(v0)
51 | 
52 |   override def apply(v0: T, v1: T): R = vararg(v0, v1)
53 | 
54 |   override def apply(v0: T, v1: T, v2: T): R = vararg(v0, v1, v2)
55 | 
56 |   override def apply(v0: T, v1: T, v2: T, v3: T): R = vararg(v0, v1, v2, v3)
57 | 
58 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T): R = vararg(v0, v1, v2, v3, v4)
59 | 
60 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T): R = vararg(v0, v1, v2, v3, v4, v5)
61 | 
62 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T): R = vararg(v0, v1, v2, v3, v4, v5, v6)
63 | 
64 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7)
65 | 
66 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8)
67 | 
68 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9)
69 | 
70 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10)
71 | 
72 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11)
73 | 
74 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12)
75 | 
76 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13)
77 | 
78 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14)
79 | 
80 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)
81 | 
82 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16)
83 | 
84 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17)
85 | 
86 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18)
87 | 
88 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T, v19: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19)
89 | 
90 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T, v19: T, v20: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20)
91 | 
92 |   override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T, v19: T, v20: T, v21: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21)
93 | }
94 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-dinky


--------------------------------------------------------------------------------
/docs/quick-start-guide.md:
--------------------------------------------------------------------------------
  1 | # Quick Start Guide
  2 | 
  3 | This tutorial provides instructions to help you get started with Kotlin Spark API. We use an example similar to the official [Apache Spark 
  4 | Quick Start Guide](https://spark.apache.org/docs/3.0.0/quick-start.html#self-contained-applications). 
  5 | You'll learn what you need to set up your environment, how to write, package and execute a simple self-contained application.
  6 |  
  7 | Prerequisites:
  8 | - You need to have Java installed and have the JAVA_HOME environment variable pointing to the Java installation.
  9 | - You need to have Apache Spark installed and have SPARK_HOME environment variable pointing to the Spark installation. 
 10 | We recommend using Apache Spark 3.0.0 version. You can download it from the [Spark official website](https://spark.apache.org/downloads.html).
 11 |    
 12 | 
 13 | ## Self-contained application
 14 | 
 15 | For the purposes of this tutorial, let's write a Kotlin program that counts the number of lines containing 'a', 
 16 | and the number containing 'b' in the Spark README. Note that you'll need to replace `YOUR_SPARK_HOME` with the 
 17 | location where Spark is installed:
 18 | 
 19 | ```kotlin
 20 | /* SimpleApp.kt */
 21 | @file:JvmName("SimpleApp")
 22 | import org.jetbrains.kotlinx.spark.api.*
 23 | 
 24 | fun main() {
 25 |     val logFile = "YOUR_SPARK_HOME/README.md" // Change to your Spark Home path
 26 |     withSpark {
 27 |         spark.read().textFile(logFile).withCached {
 28 |             val numAs = filter { it.contains("a") }.count()
 29 |             val numBs = filter { it.contains("b") }.count()
 30 |             println("Lines with a: $numAs, lines with b: $numBs")
 31 |         }
 32 |     }
 33 | }
 34 | ``` 
 35 | 
 36 | ## Building the application
 37 | Because Kotlin Spark API is not part of the official Apache Spark distribution yet, it is not enough to add Spark 
 38 | as a dependency to your build file. 
 39 | You need to: 
 40 | - Add Spark as a dependency
 41 | - Add Kotlin Spark API as a dependency
 42 | - Add Kotlin Standard Library as a dependency
 43 | 
 44 | When packaging your project into a jar file, you need to explicitly include Kotlin Spark API and Kotlin Standard Library 
 45 | dependencies. Here you can find an example of building your application with Maven, and with Gradle. 
 46 | 
 47 | ### Building the application with Maven
 48 | 
 49 | Here's what the `pom.xml` looks like for this example:
 50 | ```xml
 51 | <project>
 52 |     <modelVersion>4.0.0</modelVersion>
 53 | 
 54 |     <groupId>org.example</groupId>
 55 |     <artifactId>kotlin-spark-example</artifactId>
 56 |     <version>1.0-SNAPSHOT</version>
 57 | 
 58 |     <name>Sample Project</name>
 59 |     <packaging>jar</packaging>
 60 | 
 61 |     <properties>
 62 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 63 |         <kotlin.version>1.8.0</kotlin.version>
 64 |         <kotlin.code.style>official</kotlin.code.style>
 65 |     </properties>
 66 | 
 67 |     <dependencies>
 68 |         <dependency>
 69 |             <groupId>org.jetbrains.kotlin</groupId>
 70 |             <artifactId>kotlin-stdlib</artifactId>
 71 |             <version>${kotlin.version}</version>
 72 |         </dependency>
 73 |         <dependency> <!-- Kotlin Spark API dependency -->
 74 |             <groupId>org.jetbrains.kotlinx.spark</groupId>
 75 |             <artifactId>kotlin-spark-api_3.3.2_2.13</artifactId>
 76 |             <version>1.2.3</version>
 77 |         </dependency>
 78 |         <dependency> <!-- Spark dependency -->
 79 |             <groupId>org.apache.spark</groupId>
 80 |             <artifactId>spark-sql_2.12</artifactId>
 81 |             <version>3.3.2</version>
 82 |         </dependency>
 83 |     </dependencies>
 84 | 
 85 |     <build>
 86 |         <plugins>
 87 |             <plugin>
 88 |                 <groupId>org.apache.maven.plugins</groupId>
 89 |                 <artifactId>maven-shade-plugin</artifactId>
 90 |                 <version>3.2.4</version>
 91 |                 <executions>
 92 |                     <execution>
 93 |                         <phase>package</phase>
 94 |                         <goals>
 95 |                             <goal>shade</goal>
 96 |                         </goals>
 97 |                         <configuration>
 98 |                             <artifactSet>
 99 |                                 <includes>
100 |                                     <include>org.jetbrains.kotlinx.spark:*</include>
101 |                                     <include>org.jetbrains.kotlin:* </include>
102 |                                 </includes>
103 |                             </artifactSet>
104 |                         </configuration>
105 |                     </execution>
106 |                 </executions>
107 |             </plugin>
108 | 
109 |             <plugin>
110 |                 <groupId>org.jetbrains.kotlin</groupId>
111 |                 <artifactId>kotlin-maven-plugin</artifactId>
112 |                 <version>${kotlin.version}</version>
113 |                 <configuration>
114 |                     <sourceDirs>src/main/kotlin</sourceDirs>
115 |                     <jvmTarget>1.8</jvmTarget>
116 |                     <myIncremental>true</myIncremental>
117 |                 </configuration>
118 |                 <executions>
119 |                     <execution>
120 |                         <id>compile</id>
121 |                         <goals> 
122 |                             <goal>compile</goal> 
123 |                         </goals>
124 |                     </execution>
125 |                 </executions>
126 |             </plugin>
127 |         </plugins>
128 |     </build>
129 | </project>
130 | ```  
131 | 
132 | Here's what the project structure should look like:
133 | ```bash
134 | ./pom.xml
135 | ./src
136 | ./src/main
137 | ./src/main/kotlin
138 | ./src/main/kotlin/SimpleApp.kt
139 | ```
140 | 
141 | 
142 | Now you can package the application using Maven:
143 | `mvn package`
144 | 
145 | ### Building the application with Gradle
146 | 
147 | Here's what the `build.gradle` looks like for this example:
148 | 
149 | ```groovy
150 | plugins {
151 |   id 'org.jetbrains.kotlin.jvm' version '1.4.0'
152 |   id 'com.github.johnrengelman.shadow' version '5.2.0'
153 | }
154 | 
155 | group = 'org.example'
156 | version = '1.0-SNAPSHOT'
157 | 
158 | repositories {
159 |   mavenCentral()
160 | }
161 | 
162 | dependencies {
163 |   // Kotlin stdlib
164 |   implementation 'org.jetbrains.kotlin:kotlin-stdlib:1.8.0'
165 |   // Kotlin Spark API
166 |   implementation 'org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.2_2.13:1.2.3'  // Apache Spark
167 |   compileOnly 'org.apache.spark:spark-sql_2.12:3.3.2'
168 | }
169 | 
170 | compileKotlin {
171 |   kotlinOptions.jvmTarget = '1.8'
172 | }
173 | 
174 | shadowJar {
175 |   dependencies {
176 |     exclude(dependency {
177 |       it.moduleGroup == 'org.apache.spark' || it.moduleGroup == "org.scala-lang"
178 |     })
179 |   }
180 | }
181 | ```
182 | 
183 | build.gradle.kts (Kotlin DSL)
184 | ```
185 | import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
186 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
187 | 
188 | plugins {
189 |   id ("org.jetbrains.kotlin.jvm") version "1.8.0"
190 |   id ("com.github.johnrengelman.shadow") version "5.2.0"
191 | }
192 | 
193 | repositories {
194 |   mavenCentral()
195 | }
196 | 
197 | dependencies {
198 |   // Kotlin stdlib
199 |   implementation ("org.jetbrains.kotlin:kotlin-stdlib:1.4.0")
200 |   // Kotlin Spark API
201 |   implementation ("org.jetbrains.kotlinx.spark:kotlin-spark-api_3.3.2_2.13:1.2.3")
202 |   // Apache Spark
203 |   compileOnly ("org.apache.spark:spark-sql_2.12:3.3.2")
204 | }
205 | 
206 | compileKotlin.kotlinOptions.jvmTarget = "1.8"
207 | 
208 | tasks {
209 |   named<ShadowJar>("shadowJar") {
210 |     dependencies {
211 |       exclude{
212 |          it.moduleGroup == "org.apache.spark" || it.moduleGroup == "org.scala-lang"
213 |      }
214 |     }
215 |   }
216 | }
217 | ```
218 | 
219 | 
220 | Now you can package the application using Gradle:
221 | `gradle shadowJar`
222 | 
223 |  
224 | ## Executing the application with spark-submit
225 | 
226 | Once you have your jar, you can execute the packaged application with `./bin/spark-submit`:
227 | 
228 | `YOUR_SPARK_HOME/bin/spark-submit --class "SimpleApp" --master local [path to your jar]`
229 | 
230 | This example is also available as a [GitHub repo](https://github.com/MKhalusova/kotlin-spark-example), feel free to give it a try.
231 | 


--------------------------------------------------------------------------------
/examples/build.gradle.kts:
--------------------------------------------------------------------------------
 1 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
 2 | 
 3 | plugins {
 4 |     kotlin
 5 |     idea
 6 | }
 7 | 
 8 | group = Versions.groupID
 9 | version = Versions.project
10 | 
11 | repositories {
12 |     mavenCentral()
13 | }
14 | 
15 | dependencies {
16 | 
17 |     with(Projects) {
18 |         implementation(
19 |             kotlinSparkApi,
20 |         )
21 |     }
22 | 
23 |     with(Dependencies) {
24 | 
25 |         // https://github.com/FasterXML/jackson-bom/issues/52
26 |         if (Versions.spark == "3.3.1") implementation(jacksonDatabind)
27 | 
28 |         implementation(
29 |             sparkSql,
30 |             sparkMl,
31 |             sparkStreaming,
32 |             sparkStreamingKafka,
33 |         )
34 | 
35 |     }
36 | }
37 | 
38 | kotlin {
39 |     jvmToolchain {
40 |         languageVersion.set(
41 |             JavaLanguageVersion.of(Versions.jvmTarget)
42 |         )
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Broadcasting.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples for Spark 2.4+ (Scala 2.11)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2021 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.jetbrains.kotlinx.spark.api.broadcast
23 | import org.jetbrains.kotlinx.spark.api.map
24 | import org.jetbrains.kotlinx.spark.api.withSpark
25 | import java.io.Serializable
26 | 
27 | // (data) class must be Serializable to be broadcast
28 | data class SomeClass(val a: IntArray, val b: Int) : Serializable
29 | 
30 | fun main() = withSpark {
31 |     val broadcastVariable = spark.broadcast(SomeClass(a = intArrayOf(5, 6), b = 3))
32 |     val result = listOf(1, 2, 3, 4, 5)
33 |         .toDS()
34 |         .map {
35 |             val receivedBroadcast = broadcastVariable.value
36 |             it + receivedBroadcast.a.first()
37 |         }
38 |         .collectAsList()
39 | 
40 |     println(result)
41 | }
42 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/CachedOperations.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.jetbrains.kotlinx.spark.api.*
23 | import org.jetbrains.kotlinx.spark.api.tuples.*
24 | 
25 | fun main() {
26 |     withSpark {
27 |         dsOf(1, 2, 3, 4, 5)
28 |             .map { it X (it + 2) }
29 |             .withCached {
30 |                 showDS()
31 | 
32 |                 filter { it._1 % 2 == 0 }.showDS()
33 |             }
34 |             .map { it.appendedBy(it._1 + it._2 * 2) }
35 |             .show()
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Collect.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.apache.spark.sql.Row
23 | import org.jetbrains.kotlinx.spark.api.*
24 | import org.jetbrains.kotlinx.spark.api.tuples.*
25 | 
26 | fun main() {
27 |     withSpark {
28 |         val sd = dsOf(1, 2, 3)
29 |         sd.createOrReplaceTempView("ds")
30 |         spark.sql("select * from ds")
31 |             .withCached {
32 |                 println("asList: ${toList<Int>()}")
33 |                 println("asArray: ${toArray<Int>().contentToString()}")
34 |                 this
35 |             }
36 |             .to<Int>()
37 |             .withCached {
38 |                 println("typed collect: " + (collect() as Array<Int>).contentToString())
39 |                 println("type collectAsList: " + collectAsList())
40 |             }
41 | 
42 |         dsOf(1, 2, 3)
43 |             .map { t(it, it + 1, it + 2) }
44 |             .to<Row>()
45 |             .select("_1")
46 |             .collectAsList()
47 |             .forEach { println(it) }
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Group.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples for Spark 2.4+ (Scala 2.11)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2021 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.jetbrains.kotlinx.spark.api.*
23 | import org.jetbrains.kotlinx.spark.api.tuples.*
24 | 
25 | fun main() {
26 |     withSpark {
27 |         dsOf(
28 |             1 X "a",
29 |             1 X "b",
30 |             2 X "c",
31 |         )
32 |             .groupByKey { it._1 }
33 |             .reduceGroupsK { a, b ->
34 |                 tupleOf(a._1 + b._1, a._2 + b._2)
35 |             }
36 |             .show()
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Join.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.jetbrains.kotlinx.spark.api.*
23 | import org.jetbrains.kotlinx.spark.api.tuples.*
24 | 
25 | 
26 | data class Left(val id: Int, val name: String)
27 | 
28 | data class Right(val id: Int, val value: Int)
29 | 
30 | 
31 | fun main() {
32 |     withSpark(logLevel = SparkLogLevel.INFO) {
33 |         val first = dsOf(Left(1, "a"), Left(2, "b"))
34 |         val second = dsOf(Right(1, 100), Right(3, 300))
35 |         first
36 |             .leftJoin(second, first.col("id") eq second.col("id"))
37 |             .debugCodegen()
38 |             .also { it.show() }
39 |             .map { (left, right) ->
40 |                 left.id X left.name X right?.value
41 |             }
42 |             .show()
43 | 
44 |     }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.apache.spark.sql.Dataset
23 | import org.jetbrains.kotlinx.spark.api.*
24 | import org.jetbrains.kotlinx.spark.api.tuples.*
25 | import scala.Tuple2
26 | import scala.Tuple3
27 | 
28 | data class Q<T>(val id: Int, val text: T)
29 | 
30 | @Suppress("RedundantLambdaArrow", "UsePropertyAccessSyntax")
31 | object Main {
32 | 
33 |     @JvmStatic
34 |     fun main(args: Array<String>) {
35 |         val spark = SparkSession
36 |             .builder()
37 |             .master("local[2]")
38 |             .appName("Simple Application")
39 |             .getOrCreate()
40 | 
41 |         val triples: Dataset<Tuple3<Int, Int?, Int>> = spark
42 |             .toDS(
43 |                 listOf(
44 |                     Q(1, 1 X null),
45 |                     Q(2, 2 X "22"),
46 |                     Q(3, 3 X "333"),
47 |                 )
48 |             )
49 |             .map { (a, b) -> t(a + b._1, b._2?.length) }
50 |             .map { it: Tuple2<Int, Int?> -> it + 1 } // add counter
51 | 
52 |         val pairs = spark
53 |             .toDS(
54 |                 listOf(
55 |                     2 X "hell",
56 |                     4 X "moon",
57 |                     6 X "berry",
58 |                 )
59 |             )
60 | 
61 |         triples
62 |             .leftJoin(
63 |                 right = pairs,
64 |                 col = triples.col("_1").multiply(2) eq pairs.col("_1"),
65 |             )
66 | //                .also { it.printSchema() }
67 |             .map { (triple, pair) -> Five(triple._1, triple._2, triple._3, pair?._1, pair?._2) }
68 |             .groupByKey { it.a }
69 |             .reduceGroupsK { v1, v2 -> v1.copy(a = v1.a + v2.a, b = v1.a + v2.a) }
70 |             .map { it._2 }
71 |             .repartition(1)
72 |             .withCached {
73 |                 write()
74 |                     .also { it.csv("csvpath") }
75 |                     .also { it.orc("orcpath") }
76 |                 showDS()
77 |             }
78 | 
79 | 
80 | 
81 |         spark.stop()
82 |     }
83 | 
84 |     data class Five<A, B, C, D, E>(val a: A, val b: B, val c: C, val d: D, val e: E)
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/MapAndListOperations.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.jetbrains.kotlinx.spark.api.*
23 | import org.jetbrains.kotlinx.spark.api.tuples.*
24 | 
25 | fun main() {
26 |     withSpark(props = mapOf("spark.sql.codegen.wholeStage" to true)) {
27 |         dsOf(
28 |             mapOf(1 to t(1, 2, 3), 2 to t(1, 2, 3)),
29 |             mapOf(3 to t(1, 2, 3), 4 to t(1, 2, 3)),
30 |         )
31 |             .flatMap {
32 |                 it.toList()
33 |                     .map { (first, tuple) -> (first + tuple).toList() }
34 |                     .iterator()
35 |             }
36 |             .flatten()
37 |             .map { tupleOf(it) }
38 |             .also { it.printSchema() }
39 |             .distinct()
40 |             .sort("_1")
41 |             .debugCodegen()
42 |             .show()
43 |     }
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/UdtRegistration.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2022 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.apache.spark.sql.catalyst.InternalRow
23 | import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
24 | import org.apache.spark.sql.types.*
25 | import org.apache.spark.unsafe.types.UTF8String
26 | import org.glassfish.jersey.internal.guava.MoreObjects
27 | import org.jetbrains.kotlinx.spark.api.*
28 | import org.jetbrains.kotlinx.spark.api.tuples.tupleOf
29 | 
30 | class CityUserDefinedType : UserDefinedType<City>() {
31 | 
32 |     override fun sqlType(): DataType = DATA_TYPE
33 | 
34 |     override fun serialize(city: City): InternalRow = GenericInternalRow(2).apply {
35 |         setInt(DEPT_NUMBER_INDEX, city.departmentNumber)
36 |         update(NAME_INDEX, UTF8String.fromString(city.name))
37 |     }
38 | 
39 |     override fun deserialize(datum: Any): City =
40 |         if (datum is InternalRow)
41 |             City(
42 |                 name = datum.getString(NAME_INDEX),
43 |                 departmentNumber = datum.getInt(DEPT_NUMBER_INDEX),
44 |             )
45 |         else throw IllegalStateException("Unsupported conversion")
46 | 
47 |     override fun userClass(): Class<City> = City::class.java
48 | 
49 |     companion object {
50 |         private const val DEPT_NUMBER_INDEX = 0
51 |         private const val NAME_INDEX = 1
52 |         private val DATA_TYPE = DataTypes.createStructType(
53 |             arrayOf(
54 |                 DataTypes.createStructField(
55 |                     "departmentNumber",
56 |                     DataTypes.IntegerType,
57 |                     false,
58 |                     MetadataBuilder().putLong("maxNumber", 99).build(),
59 |                 ),
60 |                 DataTypes.createStructField("name", DataTypes.StringType, false)
61 |             )
62 |         )
63 |     }
64 | }
65 | 
66 | @SQLUserDefinedType(udt = CityUserDefinedType::class)
67 | class City(val name: String, val departmentNumber: Int) {
68 | 
69 |     override fun toString(): String =
70 |         MoreObjects
71 |             .toStringHelper(this)
72 |             .add("name", name)
73 |             .add("departmentNumber", departmentNumber)
74 |             .toString()
75 | }
76 | 
77 | fun main() = withSpark {
78 | 
79 | //  Either use @SQLUserDefinedType or:
80 | //  UDTRegistration.register(org.jetbrains.kotlinx.spark.examples.City::class.jvmName, org.jetbrains.kotlinx.spark.examples.CityUserDefinedType::class.jvmName)
81 | 
82 |     val items = listOf(
83 |         City("Amsterdam", 1),
84 |         City("Breda", 2),
85 |         City("Oosterhout", 3),
86 |     )
87 | 
88 |     val ds = items.map(::tupleOf).toDS()
89 |     ds.showDS()
90 | 
91 |     // Unlike in Scala, you can also directly encode UDT registered types to a Dataset!
92 |     val ds2 = items.toDS()
93 |     ds2.showDS()
94 | }
95 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/WordCount.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples
21 | 
22 | import org.apache.spark.sql.Dataset
23 | import org.jetbrains.kotlinx.spark.api.*
24 | import org.jetbrains.kotlinx.spark.api.tuples.*
25 | 
26 | const val MEANINGFUL_WORD_LENGTH = 4
27 | 
28 | fun main() {
29 |     withSpark {
30 |         spark
31 |             .read()
32 |             .textFile(this::class.java.classLoader.getResource("the-catcher-in-the-rye.txt")?.path)
33 |             .map { it.split(Regex("\\s")) }
34 |             .flatten()
35 |             .cleanup()
36 |             .groupByKey { it }
37 |             .mapGroups { k, iter -> k X iter.asSequence().count() }
38 |             .sort { arrayOf(it.col("_2").desc()) }
39 |             .limit(20)
40 |             .map { it.swap() }
41 |             .show(false)
42 |     }
43 | }
44 | 
45 | fun Dataset<String>.cleanup(): Dataset<String> =
46 |     filter { it.isNotBlank() }
47 |         .map { it.trim(',', ' ', '\n', ':', '.', ';', '?', '!', '"', '\'', '\t', '　') }
48 |         .filter { !it.endsWith("n’t") }
49 |         .filter { it.length >= MEANINGFUL_WORD_LENGTH }
50 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/JupyterStreamingExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "source": [
  6 |     "By default the latest version of the API and the latest supported Spark version is chosen. To specify your own: %use spark-streaming(spark=3.2, v=1.1.0)"
  7 |    ],
  8 |    "metadata": {
  9 |     "collapsed": false,
 10 |     "pycharm": {
 11 |      "name": "#%% md\n"
 12 |     }
 13 |    }
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "To start a spark streaming session, simply use `withSparkStreaming { }` inside a cell. To use Spark normally, use `withSpark { }` in a cell, or use `%use spark` to start a Spark session for the whole notebook.\n"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "%use spark-streaming"
 29 |    ],
 30 |    "metadata": {
 31 |     "collapsed": false,
 32 |     "pycharm": {
 33 |      "name": "#%%\n"
 34 |     }
 35 |    }
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "source": [
 40 |     "Let's define some data class to work with."
 41 |    ],
 42 |    "metadata": {
 43 |     "collapsed": false,
 44 |     "pycharm": {
 45 |      "name": "#%% md\n"
 46 |     }
 47 |    }
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 4,
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "data class TestRow(\n",
 55 |     "    val word: String,\n",
 56 |     ")"
 57 |    ],
 58 |    "metadata": {
 59 |     "collapsed": false,
 60 |     "pycharm": {
 61 |      "name": "#%%\n"
 62 |     }
 63 |    }
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "source": [
 68 |     "To run this on your local machine, you need to first run a Netcat server: `$ nc -lk 9999`.\n",
 69 |     "\n",
 70 |     "This example will collect the data from this stream for 10 seconds and 1 second intervals, splitting and counting the input per word."
 71 |    ],
 72 |    "metadata": {
 73 |     "collapsed": false,
 74 |     "pycharm": {
 75 |      "name": "#%% md\n"
 76 |     }
 77 |    }
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 5,
 82 |    "outputs": [
 83 |     {
 84 |      "name": "stdout",
 85 |      "output_type": "stream",
 86 |      "text": [
 87 |       "+---+--------+\n",
 88 |       "|key|count(1)|\n",
 89 |       "+---+--------+\n",
 90 |       "+---+--------+\n",
 91 |       "\n",
 92 |       "+-----+--------+\n",
 93 |       "|  key|count(1)|\n",
 94 |       "+-----+--------+\n",
 95 |       "|hello|       8|\n",
 96 |       "|Hello|       6|\n",
 97 |       "|world|       3|\n",
 98 |       "|     |       2|\n",
 99 |       "| test|       4|\n",
100 |       "+-----+--------+\n",
101 |       "\n",
102 |       "+-----+--------+\n",
103 |       "|  key|count(1)|\n",
104 |       "+-----+--------+\n",
105 |       "|hello|       3|\n",
106 |       "+-----+--------+\n",
107 |       "\n",
108 |       "+---+--------+\n",
109 |       "|key|count(1)|\n",
110 |       "+---+--------+\n",
111 |       "+---+--------+\n",
112 |       "\n",
113 |       "+---+--------+\n",
114 |       "|key|count(1)|\n",
115 |       "+---+--------+\n",
116 |       "+---+--------+\n",
117 |       "\n",
118 |       "+---+--------+\n",
119 |       "|key|count(1)|\n",
120 |       "+---+--------+\n",
121 |       "+---+--------+\n",
122 |       "\n",
123 |       "+---+--------+\n",
124 |       "|key|count(1)|\n",
125 |       "+---+--------+\n",
126 |       "+---+--------+\n",
127 |       "\n",
128 |       "+---+--------+\n",
129 |       "|key|count(1)|\n",
130 |       "+---+--------+\n",
131 |       "+---+--------+\n",
132 |       "\n",
133 |       "+-----+--------+\n",
134 |       "|  key|count(1)|\n",
135 |       "+-----+--------+\n",
136 |       "|hello|       1|\n",
137 |       "|world|       2|\n",
138 |       "+-----+--------+\n",
139 |       "\n",
140 |       "+---+--------+\n",
141 |       "|key|count(1)|\n",
142 |       "+---+--------+\n",
143 |       "+---+--------+\n",
144 |       "\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "withSparkStreaming(batchDuration = Durations.seconds(1), timeout = 10_000) { // this: KSparkStreamingSession\n",
150 |     "\n",
151 |     "    val lines: JavaReceiverInputDStream<String> = ssc.socketTextStream(\"localhost\", 9999)\n",
152 |     "    val words: JavaDStream<String> = lines.flatMap { it.split(\" \").iterator() }\n",
153 |     "\n",
154 |     "    words.foreachRDD { rdd: JavaRDD<String>, _: Time ->\n",
155 |     "        withSpark(rdd) { // this: KSparkSession\n",
156 |     "            val dataframe: Dataset<TestRow> = rdd.map { TestRow(it) }.toDS()\n",
157 |     "            dataframe\n",
158 |     "                .groupByKey { it.word }\n",
159 |     "                .count()\n",
160 |     "                .show()\n",
161 |     "        }\n",
162 |     "    }\n",
163 |     "}"
164 |    ],
165 |    "metadata": {
166 |     "collapsed": false,
167 |     "pycharm": {
168 |      "name": "#%%\n"
169 |     }
170 |    }
171 |   }
172 |  ],
173 |  "metadata": {
174 |   "kernelspec": {
175 |    "display_name": "Kotlin",
176 |    "language": "kotlin",
177 |    "name": "kotlin"
178 |   },
179 |   "language_info": {
180 |    "name": "kotlin",
181 |    "version": "1.7.0-dev-1825",
182 |    "mimetype": "text/x-kotlin",
183 |    "file_extension": ".kt",
184 |    "pygments_lexer": "kotlin",
185 |    "codemirror_mode": "text/x-kotlin",
186 |    "nbconvert_exporter": ""
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 0
191 | }


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/KotlinDirectKafkaWordCount.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.examples.streaming
 21 | 
 22 | import org.apache.kafka.clients.consumer.ConsumerConfig.*
 23 | import org.apache.kafka.clients.consumer.ConsumerRecord
 24 | import org.apache.kafka.common.serialization.StringDeserializer
 25 | import org.apache.spark.streaming.Durations
 26 | import org.apache.spark.streaming.api.java.JavaDStream
 27 | import org.apache.spark.streaming.api.java.JavaInputDStream
 28 | import org.apache.spark.streaming.kafka010.ConsumerStrategies
 29 | import org.apache.spark.streaming.kafka010.KafkaUtils
 30 | import org.apache.spark.streaming.kafka010.LocationStrategies
 31 | import org.jetbrains.kotlinx.spark.api.reduceByKey
 32 | import org.jetbrains.kotlinx.spark.api.tuples.*
 33 | import org.jetbrains.kotlinx.spark.api.withSparkStreaming
 34 | import scala.Tuple2
 35 | import java.io.Serializable
 36 | import java.util.regex.Pattern
 37 | import kotlin.system.exitProcess
 38 | 
 39 | 
 40 | /**
 41 |  * Src: https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java
 42 |  *
 43 |  * Consumes messages from one or more topics in Kafka and does wordcount.
 44 |  * Usage: JavaDirectKafkaWordCount <brokers> <groupId> <topics>
 45 |  * <brokers> is a list of one or more Kafka brokers
 46 |  * <groupId> is a consumer group name to consume from topics
 47 |  * <topics> is a list of one or more kafka topics to consume from
 48 |  *
 49 |  * Example:
 50 |  *
 51 |  * First make sure you have a Kafka producer running. For instance, when running locally:
 52 |  * $ kafka-console-producer.sh --topic quickstart-events --bootstrap-server localhost:9092
 53 |  *
 54 |  * Then start the program normally or like this:
 55 |  * $ bin/run-example streaming.JavaDirectKafkaWordCount broker1-host:port,broker2-host:port \
 56 |  * consumer-group topic1,topic2
 57 |  */
 58 | object KotlinDirectKafkaWordCount {
 59 | 
 60 |     private val SPACE = Pattern.compile(" ")
 61 | 
 62 |     private const val DEFAULT_BROKER = "localhost:9092"
 63 |     private const val DEFAULT_GROUP_ID = "consumer-group"
 64 |     private const val DEFAULT_TOPIC = "quickstart-events"
 65 | 
 66 |     @JvmStatic
 67 |     fun main(args: Array<String>) {
 68 |         if (args.size < 3 && args.isNotEmpty()) {
 69 |             System.err.println(
 70 |                 """Usage: JavaDirectKafkaWordCount <brokers> <groupId> <topics>
 71 |                   <brokers> is a list of one or more Kafka brokers
 72 |                   <groupId> is a consumer group name to consume from topics
 73 |                   <topics> is a list of one or more kafka topics to consume from
 74 |                 """.trimIndent()
 75 |             )
 76 |             exitProcess(1)
 77 |         }
 78 | 
 79 |         val brokers: String = args.getOrElse(0) { DEFAULT_BROKER }
 80 |         val groupId: String = args.getOrElse(1) { DEFAULT_GROUP_ID }
 81 |         val topics: String = args.getOrElse(2) { DEFAULT_TOPIC }
 82 | 
 83 |         // Create context with a 2 seconds batch interval
 84 |         withSparkStreaming(batchDuration = Durations.seconds(2), appName = "KotlinDirectKafkaWordCount") {
 85 | 
 86 |             val topicsSet: Set<String> = topics.split(',').toSet()
 87 | 
 88 |             val kafkaParams: Map<String, Serializable> = mapOf(
 89 |                 BOOTSTRAP_SERVERS_CONFIG to brokers,
 90 |                 GROUP_ID_CONFIG to groupId,
 91 |                 KEY_DESERIALIZER_CLASS_CONFIG to StringDeserializer::class.java,
 92 |                 VALUE_DESERIALIZER_CLASS_CONFIG to StringDeserializer::class.java,
 93 |             )
 94 | 
 95 |             // Create direct kafka stream with brokers and topics
 96 |             val messages: JavaInputDStream<ConsumerRecord<String, String>> = KafkaUtils.createDirectStream(
 97 |                 ssc,
 98 |                 LocationStrategies.PreferConsistent(),
 99 |                 ConsumerStrategies.Subscribe(topicsSet, kafkaParams),
100 |             )
101 | 
102 |             // Get the lines, split them into words, count the words and print
103 |             val lines: JavaDStream<String> = messages.map { it.value() }
104 |             val words: JavaDStream<String> = lines.flatMap { it.split(SPACE).iterator() }
105 | 
106 |             val wordCounts: JavaDStream<Tuple2<String, Int>> = words
107 |                 .map { it X 1 }
108 |                 .reduceByKey { a: Int, b: Int -> a + b }
109 | 
110 |             wordCounts.print()
111 | 
112 |         }
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/KotlinSqlNetworkWordCount.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.examples.streaming
 21 | 
 22 | import org.apache.spark.api.java.JavaRDD
 23 | import org.apache.spark.api.java.StorageLevels
 24 | import org.apache.spark.streaming.Durations
 25 | import org.apache.spark.streaming.Time
 26 | import org.jetbrains.kotlinx.spark.api.withSparkStreaming
 27 | import java.io.Serializable
 28 | import java.util.regex.Pattern
 29 | import kotlin.system.exitProcess
 30 | 
 31 | 
 32 | /**
 33 |  * Use DataFrames and SQL to count words in UTF8 encoded, '\n' delimited text received from the
 34 |  * network every second.
 35 |  *
 36 |  * Usage: KotlinSqlNetworkWordCount <hostname> <port>
 37 |  * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
 38 |  *
 39 |  * To run this on your local machine, you need to first run a Netcat server
 40 |  * `$ nc -lk 9999`
 41 |  * and then run the example
 42 |  * `$ bin/run-example org.apache.spark.examples.streaming.KotlinSqlNetworkWordCount localhost 9999`
 43 | </port></hostname></port></hostname> */
 44 | object KotlinSqlNetworkWordCount {
 45 | 
 46 |     private val SPACE = Pattern.compile(" ")
 47 | 
 48 |     private const val DEFAULT_IP = "localhost"
 49 |     private const val DEFAULT_PORT = "9999"
 50 | 
 51 |     @Throws(Exception::class)
 52 |     @JvmStatic
 53 |     fun main(args: Array<String>) {
 54 |         if (args.size < 2 && args.isNotEmpty()) {
 55 |             System.err.println("Usage: KotlinNetworkWordCount <hostname> <port>")
 56 |             exitProcess(1)
 57 |         }
 58 | 
 59 |         // Create the context with a 1 second batch size
 60 |         withSparkStreaming(
 61 |             batchDuration = Durations.seconds(1),
 62 |             appName = "KotlinSqlNetworkWordCount",
 63 |         ) {
 64 | 
 65 | 
 66 |             // Create a KotlinReceiverInputDStream on target ip:port and count the
 67 |             // words in input stream of \n delimited text (e.g. generated by 'nc')
 68 |             // Note that no duplication in storage level only for running locally.
 69 |             // Replication necessary in distributed scenario for fault tolerance.
 70 |             val lines = ssc.socketTextStream(
 71 |                 args.getOrElse(0) { DEFAULT_IP },
 72 |                 args.getOrElse(1) { DEFAULT_PORT }.toInt(),
 73 |                 StorageLevels.MEMORY_AND_DISK_SER,
 74 |             )
 75 |             val words = lines.flatMap { it.split(SPACE).iterator() }
 76 | 
 77 |             // Convert RDDs of the words DStream to DataFrame and run SQL query
 78 |             words.foreachRDD { rdd: JavaRDD<String>, time: Time ->
 79 |                 withSpark(rdd) {
 80 | 
 81 |                     // Convert JavaRDD<String> to JavaRDD<bean class> to DataFrame (Dataset<Row>)
 82 |                     val rowRDD = rdd.map(::KotlinRecord)
 83 |                     val wordsDataFrame = rowRDD.toDF()
 84 | 
 85 |                     // Creates a temporary view using the DataFrame
 86 |                     wordsDataFrame.createOrReplaceTempView("words")
 87 | 
 88 |                     // Do word count on table using SQL and print it
 89 |                     val wordCountsDataFrame =
 90 |                         spark.sql("select word, count(*) as total from words group by word")
 91 |                     println("========= $time=========")
 92 |                     wordCountsDataFrame.show()
 93 |                 }
 94 |             }
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | data class KotlinRecord(val word: String): Serializable
100 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/KotlinStatefulNetworkCount.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.examples.streaming
 21 | 
 22 | import org.apache.spark.api.java.Optional
 23 | import org.apache.spark.api.java.StorageLevels
 24 | import org.apache.spark.streaming.Durations
 25 | import org.apache.spark.streaming.State
 26 | import org.apache.spark.streaming.StateSpec
 27 | import org.jetbrains.kotlinx.spark.api.*
 28 | import org.jetbrains.kotlinx.spark.api.tuples.X
 29 | import java.util.regex.Pattern
 30 | import kotlin.system.exitProcess
 31 | 
 32 | 
 33 | /**
 34 |  * Src: https://github.com/apache/spark/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java
 35 |  *
 36 |  * Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every
 37 |  * second starting with initial value of word count.
 38 |  * Usage: JavaStatefulNetworkWordCount <hostname> <port>
 39 |  * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
 40 |  * data.
 41 |  *
 42 |  *
 43 |  * To run this on your local machine, you need to first run a Netcat server
 44 |  * `$ nc -lk 9999`
 45 |  * and then run the example
 46 |  * `$ bin/run-example
 47 |  * org.apache.spark.examples.streaming.JavaStatefulNetworkWordCount localhost 9999` */
 48 | object KotlinStatefulNetworkCount {
 49 | 
 50 |     private val SPACE = Pattern.compile(" ")
 51 | 
 52 |     private const val DEFAULT_HOSTNAME = "localhost"
 53 |     private const val DEFAULT_PORT = "9999"
 54 | 
 55 |     @Throws(Exception::class)
 56 |     @JvmStatic
 57 |     fun main(args: Array<String>) {
 58 |         if (args.size < 2 && args.isNotEmpty()) {
 59 |             System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>")
 60 |             exitProcess(1)
 61 |         }
 62 | 
 63 |         // Create the context with a 1 second batch size
 64 |         withSparkStreaming(
 65 |             batchDuration = Durations.seconds(1),
 66 |             checkpointPath = ".",
 67 |             appName = "JavaStatefulNetworkWordCount",
 68 |         ) {
 69 | 
 70 |             // Initial state RDD input to mapWithState
 71 |             val tuples = arrayOf("hello" X 1, "world" X 1)
 72 |             val initialRDD = ssc.sparkContext().rddOf(*tuples)
 73 | 
 74 |             val lines = ssc.socketTextStream(
 75 |                 args.getOrElse(0) { DEFAULT_HOSTNAME },
 76 |                 args.getOrElse(1) { DEFAULT_PORT }.toInt(),
 77 |                 StorageLevels.MEMORY_AND_DISK_SER_2,
 78 |             )
 79 |             val words = lines.flatMap { it.split(SPACE).iterator() }
 80 | 
 81 |             val wordsDstream = words.map { it X 1 }
 82 | 
 83 |             // Update the cumulative count function
 84 |             val mappingFunc = { word: String, one: Optional<Int>, state: State<Int> ->
 85 |                 val sum = one.getOrElse(0) + state.getOrElse(0)
 86 |                 val output = word X sum
 87 |                 state.update(sum)
 88 |                 output
 89 |             }
 90 | 
 91 |             // DStream made of get cumulative counts that get updated in every batch
 92 |             val stateDstream = wordsDstream.mapWithState(
 93 |                 StateSpec
 94 |                     .function(mappingFunc)
 95 |                     .initialState(initialRDD.toJavaPairRDD())
 96 |             )
 97 | 
 98 |             stateDstream.print()
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/streaming/Streaming.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2022 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.examples.streaming
21 | 
22 | import org.apache.spark.api.java.JavaRDD
23 | import org.apache.spark.sql.Dataset
24 | import org.apache.spark.streaming.Durations
25 | import org.apache.spark.streaming.Time
26 | import org.apache.spark.streaming.api.java.JavaDStream
27 | import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
28 | import org.jetbrains.kotlinx.spark.api.*
29 | 
30 | data class TestRow(
31 |     val word: String,
32 | )
33 | 
34 | /**
35 |  * To run this on your local machine, you need to first run a Netcat server
36 |  *
37 |  * `$ nc -lk 9999`
38 |  */
39 | fun main() = withSparkStreaming(batchDuration = Durations.seconds(1), timeout = 10_000) { // this: KSparkStreamingSession
40 | 
41 |     val lines: JavaReceiverInputDStream<String> = ssc.socketTextStream("localhost", 9999)
42 |     val words: JavaDStream<String> = lines.flatMap { it.split(" ").iterator() }
43 | 
44 |     words.foreachRDD { rdd: JavaRDD<String>, _: Time ->
45 |         withSpark(rdd) { // this: KSparkSession
46 |             val dataframe: Dataset<TestRow> = rdd.map { TestRow(it) }.toDS()
47 |             dataframe
48 |                 .groupByKey { it.word }
49 |                 .count()
50 |                 .show()
51 |         }
52 |     }
53 | }


--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
 1 | kotlin.daemon.jvmargs=-Xmx8g
 2 | org.gradle.jvmargs=-Xmx8g -XX:MaxMetaspaceSize=1g -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
 3 | mavenCentralUsername=dummy
 4 | mavenCentralPassword=dummy
 5 | 
 6 | GROUP=org.jetbrains.kotlinx.spark
 7 | 
 8 | # Controls the spark and scala version for the entire project
 9 | # can also be defined like ./gradlew -Pspark=X.X.X -Pscala=X.X.X build
10 | spark=3.3.2
11 | scala=2.13.10
12 | # scala=2.12.17
13 | skipScalaTuplesInKotlin=false
14 | 
15 | org.gradle.caching=true
16 | org.gradle.parallel=false
17 | #kotlin.incremental.useClasspathSnapshot=true
18 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kotlin/kotlin-spark-api/470bcf4dd6a0318a1cd0e947670f921f8f62969e/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | 


--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | #
  4 | # Copyright © 2015-2021 the original authors.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #      https://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | #
 18 | 
 19 | ##############################################################################
 20 | #
 21 | #   Gradle start up script for POSIX generated by Gradle.
 22 | #
 23 | #   Important for running:
 24 | #
 25 | #   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
 26 | #       noncompliant, but you have some other compliant shell such as ksh or
 27 | #       bash, then to run this script, type that shell name before the whole
 28 | #       command line, like:
 29 | #
 30 | #           ksh Gradle
 31 | #
 32 | #       Busybox and similar reduced shells will NOT work, because this script
 33 | #       requires all of these POSIX shell features:
 34 | #         * functions;
 35 | #         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
 36 | #           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
 37 | #         * compound commands having a testable exit status, especially «case»;
 38 | #         * various built-in commands including «command», «set», and «ulimit».
 39 | #
 40 | #   Important for patching:
 41 | #
 42 | #   (2) This script targets any POSIX shell, so it avoids extensions provided
 43 | #       by Bash, Ksh, etc; in particular arrays are avoided.
 44 | #
 45 | #       The "traditional" practice of packing multiple parameters into a
 46 | #       space-separated string is a well documented source of bugs and security
 47 | #       problems, so this is (mostly) avoided, by progressively accumulating
 48 | #       options in "$@", and eventually passing that to Java.
 49 | #
 50 | #       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
 51 | #       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
 52 | #       see the in-line comments for details.
 53 | #
 54 | #       There are tweaks for specific operating systems such as AIX, CygWin,
 55 | #       Darwin, MinGW, and NonStop.
 56 | #
 57 | #   (3) This script is generated from the Groovy template
 58 | #       https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
 59 | #       within the Gradle project.
 60 | #
 61 | #       You can find Gradle at https://github.com/gradle/gradle/.
 62 | #
 63 | ##############################################################################
 64 | 
 65 | # Attempt to set APP_HOME
 66 | 
 67 | # Resolve links: $0 may be a link
 68 | app_path=$0
 69 | 
 70 | # Need this for daisy-chained symlinks.
 71 | while
 72 |     APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
 73 |     [ -h "$app_path" ]
 74 | do
 75 |     ls=$( ls -ld "$app_path" )
 76 |     link=${ls#*' -> '}
 77 |     case $link in             #(
 78 |       /*)   app_path=$link ;; #(
 79 |       *)    app_path=$APP_HOME$link ;;
 80 |     esac
 81 | done
 82 | 
 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
 84 | 
 85 | APP_NAME="Gradle"
 86 | APP_BASE_NAME=${0##*/}
 87 | 
 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
 90 | 
 91 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 92 | MAX_FD=maximum
 93 | 
 94 | warn () {
 95 |     echo "$*"
 96 | } >&2
 97 | 
 98 | die () {
 99 |     echo
100 |     echo "$*"
101 |     echo
102 |     exit 1
103 | } >&2
104 | 
105 | # OS specific support (must be 'true' or 'false').
106 | cygwin=false
107 | msys=false
108 | darwin=false
109 | nonstop=false
110 | case "$( uname )" in                #(
111 |   CYGWIN* )         cygwin=true  ;; #(
112 |   Darwin* )         darwin=true  ;; #(
113 |   MSYS* | MINGW* )  msys=true    ;; #(
114 |   NONSTOP* )        nonstop=true ;;
115 | esac
116 | 
117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
118 | 
119 | 
120 | # Determine the Java command to use to start the JVM.
121 | if [ -n "$JAVA_HOME" ] ; then
122 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
123 |         # IBM's JDK on AIX uses strange locations for the executables
124 |         JAVACMD=$JAVA_HOME/jre/sh/java
125 |     else
126 |         JAVACMD=$JAVA_HOME/bin/java
127 |     fi
128 |     if [ ! -x "$JAVACMD" ] ; then
129 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
130 | 
131 | Please set the JAVA_HOME variable in your environment to match the
132 | location of your Java installation."
133 |     fi
134 | else
135 |     JAVACMD=java
136 |     which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137 | 
138 | Please set the JAVA_HOME variable in your environment to match the
139 | location of your Java installation."
140 | fi
141 | 
142 | # Increase the maximum file descriptors if we can.
143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
144 |     case $MAX_FD in #(
145 |       max*)
146 |         MAX_FD=$( ulimit -H -n ) ||
147 |             warn "Could not query maximum file descriptor limit"
148 |     esac
149 |     case $MAX_FD in  #(
150 |       '' | soft) :;; #(
151 |       *)
152 |         ulimit -n "$MAX_FD" ||
153 |             warn "Could not set maximum file descriptor limit to $MAX_FD"
154 |     esac
155 | fi
156 | 
157 | # Collect all arguments for the java command, stacking in reverse order:
158 | #   * args from the command line
159 | #   * the main class name
160 | #   * -classpath
161 | #   * -D...appname settings
162 | #   * --module-path (only if needed)
163 | #   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
164 | 
165 | # For Cygwin or MSYS, switch paths to Windows format before running java
166 | if "$cygwin" || "$msys" ; then
167 |     APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
168 |     CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
169 | 
170 |     JAVACMD=$( cygpath --unix "$JAVACMD" )
171 | 
172 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
173 |     for arg do
174 |         if
175 |             case $arg in                                #(
176 |               -*)   false ;;                            # don't mess with options #(
177 |               /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
178 |                     [ -e "$t" ] ;;                      #(
179 |               *)    false ;;
180 |             esac
181 |         then
182 |             arg=$( cygpath --path --ignore --mixed "$arg" )
183 |         fi
184 |         # Roll the args list around exactly as many times as the number of
185 |         # args, so each arg winds up back in the position where it started, but
186 |         # possibly modified.
187 |         #
188 |         # NB: a `for` loop captures its iteration list before it begins, so
189 |         # changing the positional parameters here affects neither the number of
190 |         # iterations, nor the values presented in `arg`.
191 |         shift                   # remove old arg
192 |         set -- "$@" "$arg"      # push replacement arg
193 |     done
194 | fi
195 | 
196 | # Collect all arguments for the java command;
197 | #   * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
198 | #     shell script including quotes and variable substitutions, so put them in
199 | #     double quotes to make sure that they get re-expanded; and
200 | #   * put everything else in single quotes, so that it's not re-expanded.
201 | 
202 | set -- \
203 |         "-Dorg.gradle.appname=$APP_BASE_NAME" \
204 |         -classpath "$CLASSPATH" \
205 |         org.gradle.wrapper.GradleWrapperMain \
206 |         "$@"
207 | 
208 | # Use "xargs" to parse quoted args.
209 | #
210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
211 | #
212 | # In Bash we could simply go:
213 | #
214 | #   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
215 | #   set -- "${ARGS[@]}" "$@"
216 | #
217 | # but POSIX shell has neither arrays nor command substitution, so instead we
218 | # post-process each arg (as a line of input to sed) to backslash-escape any
219 | # character that might be a shell metacharacter, then use eval to reverse
220 | # that process (while maintaining the separation between arguments), and wrap
221 | # the whole thing up as a single "set" statement.
222 | #
223 | # This will of course break if any of these variables contains a newline or
224 | # an unmatched quote.
225 | #
226 | 
227 | eval "set -- $(
228 |         printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
229 |         xargs -n1 |
230 |         sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
231 |         tr '\n' ' '
232 |     )" '"$@"'
233 | 
234 | exec "$JAVACMD" "$@"
235 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @rem
 2 | @rem Copyright 2015 the original author or authors.
 3 | @rem
 4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
 5 | @rem you may not use this file except in compliance with the License.
 6 | @rem You may obtain a copy of the License at
 7 | @rem
 8 | @rem      https://www.apache.org/licenses/LICENSE-2.0
 9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 | 
17 | @if "%DEBUG%" == "" @echo off
18 | @rem ##########################################################################
19 | @rem
20 | @rem  Gradle startup script for Windows
21 | @rem
22 | @rem ##########################################################################
23 | 
24 | @rem Set local scope for the variables with windows NT shell
25 | if "%OS%"=="Windows_NT" setlocal
26 | 
27 | set DIRNAME=%~dp0
28 | if "%DIRNAME%" == "" set DIRNAME=.
29 | set APP_BASE_NAME=%~n0
30 | set APP_HOME=%DIRNAME%
31 | 
32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
34 | 
35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
37 | 
38 | @rem Find java.exe
39 | if defined JAVA_HOME goto findJavaFromJavaHome
40 | 
41 | set JAVA_EXE=java.exe
42 | %JAVA_EXE% -version >NUL 2>&1
43 | if "%ERRORLEVEL%" == "0" goto execute
44 | 
45 | echo.
46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
47 | echo.
48 | echo Please set the JAVA_HOME variable in your environment to match the
49 | echo location of your Java installation.
50 | 
51 | goto fail
52 | 
53 | :findJavaFromJavaHome
54 | set JAVA_HOME=%JAVA_HOME:"=%
55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
56 | 
57 | if exist "%JAVA_EXE%" goto execute
58 | 
59 | echo.
60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
61 | echo.
62 | echo Please set the JAVA_HOME variable in your environment to match the
63 | echo location of your Java installation.
64 | 
65 | goto fail
66 | 
67 | :execute
68 | @rem Setup the command line
69 | 
70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
71 | 
72 | 
73 | @rem Execute Gradle
74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
75 | 
76 | :end
77 | @rem End local scope for the variables with windows NT shell
78 | if "%ERRORLEVEL%"=="0" goto mainEnd
79 | 
80 | :fail
81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
82 | rem the _cmd.exe /c_ return code!
83 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
84 | exit /b 1
85 | 
86 | :mainEnd
87 | if "%OS%"=="Windows_NT" endlocal
88 | 
89 | :omega
90 | 


--------------------------------------------------------------------------------
/gradlew_all_versions:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | # Run like you would `./gradlew arguments`
 5 | # but now like `./gradlew_all_versions arguments`.
 6 | 
 7 | DRY_RUN=${DRY_RUN:-false}
 8 | SCALA2_12VERSION="2.12.16"
 9 | SCALA2_13VERSION="2.13.8"
10 | SparkVersionsForBothScalaVersions=("3.3.0" "3.2.1" "3.2.0")
11 | SparkVersionsForScala2_12=("3.1.3" "3.1.2" "3.1.1" "3.1.0" "3.0.3" "3.0.2" "3.0.1" "3.0.0")
12 | 
13 | echo Running for "$(expr ${#SparkVersionsForBothScalaVersions[@]} \* 2 + ${#SparkVersionsForScala2_12[@]}) versions of the library."
14 | 
15 | echo "Cleaning the project first."
16 | if [ "$DRY_RUN" = false ]; then
17 |   ./gradlew clean
18 | fi
19 | 
20 | ARGS=("$@")
21 | execute() {
22 |   echo "running ./gradlew -Pspark=$SPARK -Pscala=$SCALA -PskipScalaTuplesInKotlin=$SKIP_SCALA_TUPLES -PenforceCleanJCP=true ${ARGS[*]}"
23 |   if [ "$DRY_RUN" = false ]; then
24 |     ./gradlew -Pspark="$SPARK" -Pscala="$SCALA" -PskipScalaTuplesInKotlin="$SKIP_SCALA_TUPLES" "${ARGS[@]}"
25 |   fi
26 | }
27 | 
28 | SCALA="$SCALA2_12VERSION"
29 | SKIP_SCALA_TUPLES=false
30 | for spark in "${SparkVersionsForScala2_12[@]}"; do
31 |   SPARK="$spark"
32 |   execute
33 |   SKIP_SCALA_TUPLES=true
34 | done
35 | 
36 | 
37 | execute_for_both_scala_versions() {
38 |   for spark in "${SparkVersionsForBothScalaVersions[@]}"; do
39 |     SPARK="$spark"
40 |     execute
41 |     SKIP_SCALA_TUPLES=true
42 |   done
43 | }
44 | SCALA="$SCALA2_12VERSION"
45 | execute_for_both_scala_versions
46 | 
47 | SCALA="$SCALA2_13VERSION"
48 | SKIP_SCALA_TUPLES=false
49 | execute_for_both_scala_versions
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/jupyter/build.gradle.kts:
--------------------------------------------------------------------------------
  1 | @file:Suppress("UnstableApiUsage", "NOTHING_TO_INLINE")
  2 | 
  3 | import com.igormaznitsa.jcp.gradle.JcpTask
  4 | import com.vanniktech.maven.publish.JavadocJar.Dokka
  5 | import com.vanniktech.maven.publish.KotlinJvm
  6 | import org.jetbrains.dokka.gradle.AbstractDokkaLeafTask
  7 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
  8 | 
  9 | plugins {
 10 |     scala
 11 |     kotlin
 12 |     dokka
 13 |     mavenPublishBase
 14 |     jupyter
 15 |     jcp
 16 | }
 17 | 
 18 | group = Versions.groupID
 19 | version = Versions.project
 20 | 
 21 | repositories {
 22 |     mavenCentral()
 23 |     maven(url = "https://maven.pkg.jetbrains.space/public/p/kotlinx-html/maven")
 24 |     maven(url = "https://maven.pkg.jetbrains.space/kotlin/p/kotlin/dev")
 25 | }
 26 | 
 27 | tasks.withType<Test>().configureEach {
 28 |     useJUnitPlatform()
 29 |     maxHeapSize = "2g"
 30 | }
 31 | 
 32 | tasks.processJupyterApiResources {
 33 |     libraryProducers = listOf(
 34 |         "org.jetbrains.kotlinx.spark.api.jupyter.SparkIntegration",
 35 |         "org.jetbrains.kotlinx.spark.api.jupyter.SparkStreamingIntegration",
 36 |     )
 37 | }
 38 | 
 39 | dependencies {
 40 |     with(Projects) {
 41 |         api(
 42 |             kotlinSparkApi,
 43 |         )
 44 |     }
 45 | 
 46 |     with(Dependencies) {
 47 | 
 48 |         // https://github.com/FasterXML/jackson-bom/issues/52
 49 |         if (Versions.spark == "3.3.1") implementation(jacksonDatabind)
 50 | 
 51 |         api(
 52 |             kotlinxHtml,
 53 |             sparkSql,
 54 |             sparkRepl,
 55 |             sparkStreaming,
 56 |             hadoopClient,
 57 |         )
 58 | 
 59 |         implementation(
 60 |             kotlinStdLib,
 61 |         )
 62 | 
 63 |         testImplementation(
 64 |             kotest,
 65 |             kotlinScriptingCommon,
 66 |             kotlinScriptingJvm,
 67 |         )
 68 | 
 69 |     }
 70 | }
 71 | 
 72 | // Setup preprocessing with JCP for main sources
 73 | 
 74 | val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories
 75 | 
 76 | val preprocessMain by tasks.creating(JcpTask::class) {
 77 |     sources.set(kotlinMainSources)
 78 |     clearTarget.set(true)
 79 |     fileExtensions.set(listOf("kt"))
 80 |     vars.set(Versions.versionMap)
 81 |     outputs.upToDateWhen { target.get().exists() }
 82 | }
 83 | 
 84 | tasks.compileKotlin {
 85 |     dependsOn(preprocessMain)
 86 |     outputs.upToDateWhen { preprocessMain.outcomingFiles.files.isEmpty() }
 87 |     doFirst {
 88 |         kotlin {
 89 |             sourceSets {
 90 |                 main {
 91 |                     kotlin.setSrcDirs(listOf(preprocessMain.target.get()))
 92 |                 }
 93 |             }
 94 |         }
 95 |     }
 96 | 
 97 |     doLast {
 98 |         kotlin {
 99 |             sourceSets {
100 |                 main {
101 |                     kotlin.setSrcDirs(kotlinMainSources)
102 |                 }
103 |             }
104 |         }
105 |     }
106 | }
107 | 
108 | // Setup preprocessing with JCP for test sources
109 | 
110 | val kotlinTestSources = kotlin.sourceSets.test.get().kotlin.sourceDirectories
111 | 
112 | val preprocessTest by tasks.creating(JcpTask::class) {
113 |     sources.set(kotlinTestSources)
114 |     clearTarget.set(true)
115 |     fileExtensions.set(listOf("java", "kt"))
116 |     vars.set(Versions.versionMap)
117 |     outputs.upToDateWhen { target.get().exists() }
118 | }
119 | 
120 | tasks.compileTestKotlin {
121 |     dependsOn(preprocessTest)
122 |     outputs.upToDateWhen { preprocessTest.outcomingFiles.files.isEmpty() }
123 |     doFirst {
124 |         kotlin {
125 |             sourceSets {
126 |                 test {
127 |                     kotlin.setSrcDirs(listOf(preprocessTest.target.get()))
128 |                 }
129 |             }
130 |         }
131 |     }
132 | 
133 |     doLast {
134 |         kotlin {
135 |             sourceSets {
136 |                 test {
137 |                     kotlin.setSrcDirs(kotlinTestSources)
138 |                 }
139 |             }
140 |         }
141 |     }
142 | }
143 | 
144 | kotlin {
145 |     jvmToolchain {
146 |         languageVersion.set(
147 |             JavaLanguageVersion.of(Versions.jupyterJvmTarget)
148 |         )
149 |     }
150 | }
151 | 
152 | tasks.withType<AbstractDokkaLeafTask> {
153 |     dokkaSourceSets {
154 |         all {
155 |             sourceRoot(preprocessMain.target.get())
156 |         }
157 |     }
158 | }
159 | 
160 | 
161 | mavenPublishing {
162 |     configure(KotlinJvm(Dokka("dokkaHtml")))
163 | }


--------------------------------------------------------------------------------
/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/HtmlRendering.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api.jupyter
 21 | 
 22 | import kotlinx.html.*
 23 | import kotlinx.html.stream.appendHTML
 24 | import org.apache.spark.SparkException
 25 | import org.apache.spark.api.java.JavaRDDLike
 26 | import org.apache.spark.sql.Dataset
 27 | import org.apache.spark.unsafe.array.ByteArrayMethods
 28 | import org.jetbrains.kotlinx.spark.api.asKotlinIterable
 29 | import org.jetbrains.kotlinx.spark.api.asKotlinIterator
 30 | import org.jetbrains.kotlinx.spark.api.asKotlinList
 31 | import scala.Product
 32 | import java.io.InputStreamReader
 33 | import java.io.Serializable
 34 | 
 35 | private fun createHtmlTable(fillTable: TABLE.() -> Unit): String = buildString {
 36 |     appendHTML().head {
 37 |         style("text/css") {
 38 |             unsafe {
 39 |                 val resource = "/table.css"
 40 |                 val res = SparkIntegration::class.java
 41 |                     .getResourceAsStream(resource) ?: error("Resource '$resource' not found")
 42 |                 val readRes = InputStreamReader(res).readText()
 43 |                 raw("\n" + readRes)
 44 |             }
 45 |         }
 46 |     }
 47 | 
 48 |     appendHTML().table("dataset", fillTable)
 49 | }
 50 | 
 51 | 
 52 | internal fun <T> JavaRDDLike<T, *>.toHtml(limit: Int = 20, truncate: Int = 30): String = try {
 53 |     createHtmlTable {
 54 |         val numRows = limit.coerceIn(0 until ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
 55 |         val tmpRows = take(numRows).toList()
 56 | 
 57 |         val hasMoreData = tmpRows.size - 1 > numRows
 58 |         val rows = tmpRows.take(numRows)
 59 | 
 60 |         tr { th { +"Values" } }
 61 | 
 62 |         for (row in rows) tr {
 63 |             td {
 64 |                 val string = when (row) {
 65 |                     is ByteArray -> row.joinToString(prefix = "[", postfix = "]") { "%02X".format(it) }
 66 | 
 67 |                     is CharArray -> row.iterator().asSequence().toList().toString()
 68 |                     is ShortArray -> row.iterator().asSequence().toList().toString()
 69 |                     is IntArray -> row.iterator().asSequence().toList().toString()
 70 |                     is LongArray -> row.iterator().asSequence().toList().toString()
 71 |                     is FloatArray -> row.iterator().asSequence().toList().toString()
 72 |                     is DoubleArray -> row.iterator().asSequence().toList().toString()
 73 |                     is BooleanArray -> row.iterator().asSequence().toList().toString()
 74 |                     is Array<*> -> row.iterator().asSequence().toList().toString()
 75 |                     is Iterable<*> -> row.iterator().asSequence().toList().toString()
 76 |                     is scala.collection.Iterable<*> -> row.asKotlinIterable().iterator().asSequence().toList().toString()
 77 |                     is Iterator<*> -> row.asSequence().toList().toString()
 78 |                     is scala.collection.Iterator<*> -> row.asKotlinIterator().asSequence().toList().toString()
 79 |                     is Product -> row.productIterator().asKotlinIterator().asSequence().toList().toString()
 80 |                     is Serializable -> row.toString()
 81 |                     // maybe others?
 82 | 
 83 |                     is Any? -> row.toString()
 84 |                     else -> row.toString()
 85 |                 }
 86 | 
 87 |                 +if (truncate > 0 && string.length > truncate) {
 88 |                     // do not show ellipses for strings shorter than 4 characters.
 89 |                     if (truncate < 4) string.substring(0, truncate)
 90 |                     else string.substring(0, truncate - 3) + "..."
 91 |                 } else {
 92 |                     string
 93 |                 }
 94 |             }
 95 |         }
 96 | 
 97 |         if (hasMoreData) tr {
 98 |             +"only showing top $numRows ${if (numRows == 1) "row" else "rows"}"
 99 |         }
100 |     }
101 | } catch (e: SparkException) {
102 |     // Whenever toString() on the contents doesn't work, since the class might be unknown...
103 |     """${toString()}
104 |         |Cannot render this RDD of this class.""".trimMargin()
105 | }
106 | 
107 | internal fun <T> Dataset<T>.toHtml(limit: Int = 20, truncate: Int = 30): String = createHtmlTable {
108 |     val numRows = limit.coerceIn(0 until ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
109 |     val tmpRows = getRows(numRows, truncate).asKotlinList().map { it.asKotlinList() }
110 | 
111 |     val hasMoreData = tmpRows.size - 1 > numRows
112 |     val rows = tmpRows.take(numRows + 1)
113 | 
114 |     tr {
115 |         for (header in rows.first()) th {
116 |             +if (truncate > 0 && header.length > truncate) {
117 |                 // do not show ellipses for strings shorter than 4 characters.
118 |                 if (truncate < 4) header.substring(0, truncate)
119 |                 else header.substring(0, truncate - 3) + "..."
120 |             } else {
121 |                 header
122 |             }
123 | 
124 |         }
125 |     }
126 | 
127 |     for (row in rows.drop(1)) tr {
128 |         for (item in row) td {
129 |             +item
130 |         }
131 |     }
132 | 
133 |     if (hasMoreData) tr {
134 |         +"only showing top $numRows ${if (numRows == 1) "row" else "rows"}"
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api.jupyter
 21 | 
 22 | import kotlinx.serialization.Serializable
 23 | import kotlinx.serialization.json.*
 24 | import org.apache.spark.api.java.JavaRDDLike
 25 | import org.apache.spark.rdd.RDD
 26 | import org.apache.spark.sql.Dataset
 27 | import org.intellij.lang.annotations.Language
 28 | import org.jetbrains.kotlinx.jupyter.api.*
 29 | import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
 30 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.displayLimitName
 31 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.displayTruncateName
 32 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.scalaName
 33 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkName
 34 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkPropertiesName
 35 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.versionName
 36 | import kotlin.reflect.KProperty1
 37 | import kotlin.reflect.typeOf
 38 | 
 39 | 
 40 | abstract class Integration(private val notebook: Notebook, private val options: MutableMap<String, String?>) :
 41 |     JupyterIntegration() {
 42 | 
 43 |     protected val kotlinVersion = /*$"\""+kotlin+"\""$*/ /*-*/ ""
 44 |     protected val scalaCompatVersion = /*$"\""+scalaCompat+"\""$*/ /*-*/ ""
 45 |     protected val scalaVersion = /*$"\""+scala+"\""$*/ /*-*/ ""
 46 |     protected val sparkVersion = /*$"\""+spark+"\""$*/ /*-*/ ""
 47 |     protected val version = /*$"\""+version+"\""$*/ /*-*/ ""
 48 | 
 49 |     protected val displayLimitOld = "DISPLAY_LIMIT"
 50 |     protected val displayTruncateOld = "DISPLAY_TRUNCATE"
 51 | 
 52 |     protected val properties: Properties
 53 |         get() = notebook
 54 |             .variablesState[sparkPropertiesName]!!
 55 |             .value
 56 |             .getOrThrow() as Properties
 57 | 
 58 | 
 59 |     protected open val usingProperties = arrayOf(
 60 |         displayLimitName,
 61 |         displayTruncateName,
 62 |         sparkName,
 63 |         scalaName,
 64 |         versionName,
 65 |     )
 66 | 
 67 |     /**
 68 |      * Will be run after importing all dependencies
 69 |      */
 70 |     open fun KotlinKernelHost.onLoaded() = Unit
 71 | 
 72 |     open fun KotlinKernelHost.onShutdown() = Unit
 73 | 
 74 |     open fun KotlinKernelHost.onInterrupt() = Unit
 75 | 
 76 |     open fun KotlinKernelHost.beforeCellExecution() = Unit
 77 | 
 78 |     open fun KotlinKernelHost.afterCellExecution(snippetInstance: Any, result: FieldValue) = Unit
 79 | 
 80 |     open fun Builder.onLoadedAlsoDo() = Unit
 81 | 
 82 |     open val dependencies: Array<String> = arrayOf(
 83 |         "org.apache.spark:spark-repl_$scalaCompatVersion:$sparkVersion",
 84 |         "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlinVersion",
 85 |         "org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion",
 86 |         "org.apache.spark:spark-sql_$scalaCompatVersion:$sparkVersion",
 87 |         "org.apache.spark:spark-yarn_$scalaCompatVersion:$sparkVersion",
 88 |         "org.apache.spark:spark-streaming_$scalaCompatVersion:$sparkVersion",
 89 |         "org.apache.spark:spark-mllib_$scalaCompatVersion:$sparkVersion",
 90 |         "org.apache.spark:spark-sql_$scalaCompatVersion:$sparkVersion",
 91 |         "org.apache.spark:spark-graphx_$scalaCompatVersion:$sparkVersion",
 92 |         "org.apache.spark:spark-launcher_$scalaCompatVersion:$sparkVersion",
 93 |         "org.apache.spark:spark-catalyst_$scalaCompatVersion:$sparkVersion",
 94 |         "org.apache.spark:spark-streaming_$scalaCompatVersion:$sparkVersion",
 95 |         "org.apache.spark:spark-core_$scalaCompatVersion:$sparkVersion",
 96 |         "org.scala-lang:scala-library:$scalaVersion",
 97 |         "org.scala-lang.modules:scala-xml_$scalaCompatVersion:2.0.1",
 98 |         "org.scala-lang:scala-reflect:$scalaVersion",
 99 |         "org.scala-lang:scala-compiler:$scalaVersion",
100 |         "commons-io:commons-io:2.11.0",
101 |     )
102 | 
103 |     open val imports: Array<String> = arrayOf(
104 |         "org.jetbrains.kotlinx.spark.api.*",
105 |         "org.jetbrains.kotlinx.spark.api.tuples.*",
106 |         *(1..22).map { "scala.Tuple$it" }.toTypedArray(),
107 |         "org.apache.spark.sql.functions.*",
108 |         "org.apache.spark.*",
109 |         "org.apache.spark.sql.*",
110 |         "org.apache.spark.api.java.*",
111 |         "scala.collection.Seq",
112 |         "org.apache.spark.rdd.*",
113 |         "java.io.Serializable",
114 |         "org.apache.spark.streaming.api.java.*",
115 |         "org.apache.spark.streaming.api.*",
116 |         "org.apache.spark.streaming.*",
117 |     )
118 | 
119 |     override fun Builder.onLoaded() {
120 |         dependencies(*dependencies)
121 |         import(*imports)
122 | 
123 |         onLoaded {
124 | 
125 |             val mutableOptions = options.toMutableMap()
126 | 
127 |             declare(
128 |                 VariableDeclaration(
129 |                     name = sparkPropertiesName,
130 |                     value = object : Properties, MutableMap<String, String?> by mutableOptions {
131 |                         override fun toString(): String = "Properties: $mutableOptions"
132 |                     },
133 |                     type = typeOf<Properties>(),
134 |                     isMutable = true,
135 |                 )
136 |             )
137 | 
138 |             @Language("kts")
139 |             val _0 = execute(
140 |                 """
141 |                 @Deprecated("Use ${displayLimitName}=${properties.displayLimit} in %use magic or ${sparkPropertiesName}.${displayLimitName} = ${properties.displayLimit} instead", ReplaceWith("${sparkPropertiesName}.${displayLimitName}"))
142 |                 var $displayLimitOld: Int
143 |                     get() = ${sparkPropertiesName}.${displayLimitName}
144 |                     set(value) {
145 |                         println("$displayLimitOld is deprecated: Use ${sparkPropertiesName}.${displayLimitName} instead")
146 |                         ${sparkPropertiesName}.${displayLimitName} = value
147 |                     }
148 |                 
149 |                 @Deprecated("Use ${displayTruncateName}=${properties.displayTruncate} in %use magic or ${sparkPropertiesName}.${displayTruncateName} = ${properties.displayTruncate} instead", ReplaceWith("${sparkPropertiesName}.${displayTruncateName}"))
150 |                 var $displayTruncateOld: Int
151 |                     get() = ${sparkPropertiesName}.${displayTruncateName}
152 |                     set(value) {
153 |                         println("$displayTruncateOld is deprecated: Use ${sparkPropertiesName}.${displayTruncateName} instead")
154 |                         ${sparkPropertiesName}.${displayTruncateName} = value
155 |                     }
156 |             """.trimIndent()
157 |             )
158 | 
159 |             onLoaded()
160 |         }
161 | 
162 |         beforeCellExecution {
163 |             if (scalaCompatVersion.toDouble() >= 2.13)
164 |                 execute("scala.`Console\$`.`MODULE\$`.setOutDirect(System.out)")
165 |             else
166 |                 execute("""scala.Console.setOut(System.out)""")
167 | 
168 |             beforeCellExecution()
169 |         }
170 | 
171 |         afterCellExecution { snippetInstance, result ->
172 |             afterCellExecution(snippetInstance, result)
173 |         }
174 | 
175 |         onInterrupt {
176 |             onInterrupt()
177 |         }
178 | 
179 |         onShutdown {
180 |             onShutdown()
181 |         }
182 | 
183 | 
184 |         // Render Dataset
185 |         render<Dataset<*>> {
186 |             with(properties) {
187 |                 HTML(it.toHtml(limit = displayLimit, truncate = displayTruncate))
188 |             }
189 |         }
190 | 
191 |         render<RDD<*>> {
192 |             with(properties) {
193 |                 HTML(it.toJavaRDD().toHtml(limit = displayLimit, truncate = displayTruncate))
194 |             }
195 |         }
196 | 
197 |         render<JavaRDDLike<*, *>> {
198 |             with(properties) {
199 |                 HTML(it.toHtml(limit = displayLimit, truncate = displayTruncate))
200 |             }
201 | 
202 |         }
203 | 
204 |         onLoadedAlsoDo()
205 |     }
206 | }
207 | 


--------------------------------------------------------------------------------
/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterConfiguration.kt:
--------------------------------------------------------------------------------
 1 | package org.jetbrains.kotlinx.spark.api.jupyter
 2 | 
 3 | //class JupyterConfiguration(init: JupyterConfiguration.() -> Unit = {}) {
 4 | //    init { invoke(init) }
 5 | //    val sparkProps: MutableMap<String, Any> = mutableMapOf()
 6 | //    operator fun invoke(block: JupyterConfiguration.() -> Unit): JupyterConfiguration {
 7 | //        block(this)
 8 | //        return this
 9 | //    }
10 | //}
11 | 
12 | interface JupyterConfiguration {
13 |     val sparkProps: MutableMap<String, Any?>
14 | 
15 |     operator fun invoke(block: JupyterConfiguration.() -> Unit): JupyterConfiguration {
16 |         block(this)
17 |         return this
18 |     }
19 | }


--------------------------------------------------------------------------------
/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Properties.kt:
--------------------------------------------------------------------------------
 1 | package org.jetbrains.kotlinx.spark.api.jupyter
 2 | 
 3 | import kotlinx.serialization.Serializable
 4 | import kotlinx.serialization.json.Json
 5 | import kotlinx.serialization.json.buildJsonObject
 6 | import kotlinx.serialization.json.decodeFromJsonElement
 7 | import kotlinx.serialization.json.put
 8 | 
 9 | interface Properties : MutableMap<String, String?> {
10 | 
11 |     companion object {
12 |         internal const val sparkPropertiesName = "sparkProperties"
13 | 
14 |         internal const val sparkMasterName = "spark.master"
15 |         internal const val appNameName = "spark.app.name"
16 |         internal const val sparkName = "spark"
17 |         internal const val scalaName = "scala"
18 |         internal const val versionName = "v"
19 |         internal const val displayLimitName = "displayLimit"
20 |         internal const val displayTruncateName = "displayTruncate"
21 |     }
22 | 
23 |     /** The value which limits the number of rows while displaying an RDD or Dataset.
24 |      * Default: 20
25 |      */
26 |     var displayLimit: Int
27 |         set(value) { this[displayLimitName] = value.toString() }
28 |         get() = this[displayLimitName]?.toIntOrNull() ?: 20
29 | 
30 |     /** The value which limits the number characters per cell while displaying an RDD or Dataset.
31 |      * `-1` for no limit.
32 |      * Default: 30
33 |      */
34 |     var displayTruncate: Int
35 |         set(value) { this[displayTruncateName] = value.toString() }
36 |         get() = this[displayTruncateName]?.toIntOrNull() ?: 30
37 | 
38 | 
39 |     operator fun invoke(block: Properties.() -> Unit): Properties = apply(block)
40 | }
41 | 


--------------------------------------------------------------------------------
/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | @file:Suppress("UsePropertyAccessSyntax")
 21 | 
 22 | package org.jetbrains.kotlinx.spark.api.jupyter
 23 | 
 24 | 
 25 | import org.intellij.lang.annotations.Language
 26 | import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
 27 | import org.jetbrains.kotlinx.jupyter.api.Notebook
 28 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.appNameName
 29 | import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkMasterName
 30 | 
 31 | 
 32 | /**
 33 |  * %use spark
 34 |  */
 35 | @Suppress("UNUSED_VARIABLE", "LocalVariableName")
 36 | @OptIn(ExperimentalStdlibApi::class)
 37 | class SparkIntegration(notebook: Notebook, options: MutableMap<String, String?>) : Integration(notebook, options) {
 38 | 
 39 |     override fun KotlinKernelHost.onLoaded() {
 40 |         val _0 = execute("""%dumpClassesForSpark""")
 41 | 
 42 |         properties {
 43 |             getOrPut(sparkMasterName) { "local[*]" }
 44 |             getOrPut(appNameName) { "Kotlin Spark API - Jupyter" }
 45 |             getOrPut("spark.sql.codegen.wholeStage") { "false" }
 46 |             getOrPut("fs.hdfs.impl") { org.apache.hadoop.hdfs.DistributedFileSystem::class.java.name }
 47 |             getOrPut("fs.file.impl") { org.apache.hadoop.fs.LocalFileSystem::class.java.name }
 48 |         }
 49 | 
 50 |         @Language("kts")
 51 |         val _1 = listOf(
 52 |             """
 53 |                 val spark = org.jetbrains.kotlinx.spark.api.SparkSession
 54 |                     .builder()
 55 |                     .apply {
 56 |                         ${
 57 |                 buildString {
 58 |                     val sparkProps = properties.filterKeys { it !in usingProperties }
 59 |                     println("received properties: $properties, providing Spark with: $sparkProps")
 60 | 
 61 |                     sparkProps.forEach { (key, value) ->
 62 |                         appendLine("config(\"${key}\", \"$value\")")
 63 |                     }
 64 |                 }
 65 |             }
 66 |                      }
 67 |                     .getOrCreate()""".trimIndent(),
 68 |             """
 69 |                 spark.sparkContext.setLogLevel(org.jetbrains.kotlinx.spark.api.SparkLogLevel.ERROR)""".trimIndent(),
 70 |             """
 71 |                 val sc by lazy {
 72 |                     org.apache.spark.api.java.JavaSparkContext(spark.sparkContext)
 73 |                 }""".trimIndent(),
 74 |             """
 75 |                 println("Spark session (Spark: $sparkVersion, Scala: $scalaCompatVersion, v: $version)  has been started and is running. No `withSpark { }` necessary, you can access `spark` and `sc` directly. To use Spark streaming, use `%use spark-streaming` instead.")""".trimIndent(),
 76 |             """
 77 |                 inline fun <reified T> List<T>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
 78 |             """
 79 |                 inline fun <reified T> List<T>.toDF(vararg colNames: String): Dataset<Row> = toDF(spark, *colNames)""".trimIndent(),
 80 |             """
 81 |                 inline fun <reified T> Array<T>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
 82 |             """
 83 |                 inline fun <reified T> Array<T>.toDF(vararg colNames: String): Dataset<Row> = toDF(spark, *colNames)""".trimIndent(),
 84 |             """
 85 |                 inline fun <reified T> dsOf(vararg arg: T): Dataset<T> = spark.dsOf(*arg)""".trimIndent(),
 86 |             """
 87 |                 inline fun <reified T> dfOf(vararg arg: T): Dataset<Row> = spark.dfOf(*arg)""".trimIndent(),
 88 |             """
 89 |                 inline fun <reified T> emptyDataset(): Dataset<T> = spark.emptyDataset(encoder<T>())""".trimIndent(),
 90 |             """
 91 |                 inline fun <reified T> dfOf(colNames: Array<String>, vararg arg: T): Dataset<Row> = spark.dfOf(colNames, *arg)""".trimIndent(),
 92 |             """
 93 |                 inline fun <reified T> RDD<T>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
 94 |             """
 95 |                 inline fun <reified T> JavaRDDLike<T, *>.toDS(): Dataset<T> = toDS(spark)""".trimIndent(),
 96 |             """
 97 |                 inline fun <reified T> RDD<T>.toDF(vararg colNames: String): Dataset<Row> = toDF(spark, *colNames)""".trimIndent(),
 98 |             """
 99 |                 inline fun <reified T> JavaRDDLike<T, *>.toDF(vararg colNames: String): Dataset<Row> = toDF(spark, *colNames)""".trimIndent(),
100 |             """
101 |                 fun <T> List<T>.toRDD(numSlices: Int = sc.defaultParallelism()): JavaRDD<T> = sc.toRDD(this, numSlices)""".trimIndent(),
102 |             """
103 |                 fun <T> rddOf(vararg elements: T, numSlices: Int = sc.defaultParallelism()): JavaRDD<T> = sc.toRDD(elements.toList(), numSlices)""".trimIndent(),
104 |             """
105 |                 val udf: UDFRegistration get() = spark.udf()""".trimIndent(),
106 |             """
107 |                 inline fun <RETURN, reified NAMED_UDF : NamedUserDefinedFunction<RETURN, *>> NAMED_UDF.register(): NAMED_UDF = spark.udf().register(namedUdf = this)""".trimIndent(),
108 |             """
109 |                 inline fun <RETURN, reified NAMED_UDF : NamedUserDefinedFunction<RETURN, *>> UserDefinedFunction<RETURN, NAMED_UDF>.register(name: String): NAMED_UDF = spark.udf().register(name = name, udf = this)""".trimIndent(),
110 |         ).map(::execute)
111 |     }
112 | 
113 |     override fun KotlinKernelHost.onShutdown() {
114 |         execute("""spark.stop()""")
115 |     }
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkStreamingIntegration.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api.jupyter
 21 | 
 22 | 
 23 | import org.apache.spark.streaming.StreamingContextState
 24 | import org.apache.spark.streaming.api.java.JavaStreamingContext
 25 | import org.intellij.lang.annotations.Language
 26 | import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
 27 | import org.jetbrains.kotlinx.jupyter.api.Notebook
 28 | import org.jetbrains.kotlinx.jupyter.api.VariableDeclaration
 29 | import org.jetbrains.kotlinx.jupyter.api.declare
 30 | import kotlin.reflect.typeOf
 31 | 
 32 | /**
 33 |  * %use spark-streaming
 34 |  */
 35 | @Suppress("UNUSED_VARIABLE", "LocalVariableName")
 36 | class SparkStreamingIntegration(notebook: Notebook, options: MutableMap<String, String?>) : Integration(notebook, options) {
 37 | 
 38 |     override val imports: Array<String> = super.imports + arrayOf(
 39 |         "org.apache.spark.deploy.SparkHadoopUtil",
 40 |         "org.apache.hadoop.conf.Configuration",
 41 |     )
 42 | 
 43 |     private val sscCollection = mutableSetOf<JavaStreamingContext>()
 44 | 
 45 |     override fun KotlinKernelHost.onLoaded() {
 46 | 
 47 |         declare(
 48 |             VariableDeclaration(
 49 |                 name = ::sscCollection.name,
 50 |                 value = sscCollection,
 51 |                 isMutable = false,
 52 |                 type = typeOf<MutableSet<JavaStreamingContext>>(),
 53 |             )
 54 |         )
 55 | 
 56 |         val _0 = execute("""%dumpClassesForSpark""")
 57 | 
 58 |         @Language("kts")
 59 |         val _1 = listOf(
 60 |             """
 61 |                 @JvmOverloads
 62 |                 fun withSparkStreaming(
 63 |                     batchDuration: Duration = Durations.seconds(1L),
 64 |                     checkpointPath: String? = null,
 65 |                     hadoopConf: Configuration = SparkHadoopUtil.get().conf(),
 66 |                     createOnError: Boolean = false,
 67 |                     props: Map<String, Any> = emptyMap(),
 68 |                     master: String = SparkConf().get("spark.master", "local[*]"),
 69 |                     appName: String = "Kotlin Spark Sample",
 70 |                     timeout: Long = -1L,
 71 |                     startStreamingContext: Boolean = true,
 72 |                     func: KSparkStreamingSession.() -> Unit,
 73 |                 ) {
 74 | 
 75 |                     // will only be set when a new context is created
 76 |                     var kSparkStreamingSession: KSparkStreamingSession? = null
 77 | 
 78 |                     val creatingFunc = {
 79 |                         val sc = SparkConf()
 80 |                             .setAppName(appName)
 81 |                             .setMaster(master)
 82 |                             .setAll(
 83 |                                 props
 84 |                                     .map { (key, value) -> key X value.toString() }
 85 |                                     .asScalaIterable()
 86 |                             )
 87 | 
 88 |                         val ssc = JavaStreamingContext(sc, batchDuration)
 89 |                         ssc.checkpoint(checkpointPath)
 90 | 
 91 |                         kSparkStreamingSession = KSparkStreamingSession(ssc)
 92 |                         func(kSparkStreamingSession!!)
 93 | 
 94 |                         ssc
 95 |                     }
 96 | 
 97 |                     val ssc = when {
 98 |                         checkpointPath != null ->
 99 |                             JavaStreamingContext.getOrCreate(checkpointPath, creatingFunc, hadoopConf, createOnError)
100 | 
101 |                         else -> creatingFunc()
102 |                     }
103 |                     sscCollection += ssc
104 | 
105 |                     if (startStreamingContext) {
106 |                         ssc.start()
107 |                         kSparkStreamingSession?.invokeRunAfterStart()
108 |                     }
109 |                     ssc.awaitTerminationOrTimeout(timeout)
110 |                     ssc.stop()
111 |                 }
112 |             """.trimIndent(),
113 |             """
114 |                 println("To start a Spark (Spark: $sparkVersion, Scala: $scalaCompatVersion, v: $version) Streaming session, simply use `withSparkStreaming { }` inside a cell. To use Spark normally, use `withSpark { }` in a cell, or use `%use spark` to start a Spark session for the whole notebook.")""".trimIndent(),
115 |         ).map(::execute)
116 |     }
117 | 
118 |     private fun cleanUp(e: Throwable): String {
119 |         while (sscCollection.isNotEmpty())
120 |             sscCollection.first().let {
121 |                 while (it.state != StreamingContextState.STOPPED) {
122 |                     try {
123 |                         it.stop(true, true)
124 |                     } catch (_: Exception) {
125 |                     }
126 |                 }
127 |                 sscCollection.remove(it)
128 |             }
129 | 
130 |         return "Spark streams cleaned up. Cause: $e"
131 |     }
132 | 
133 |     override fun Builder.onLoadedAlsoDo() {
134 |         renderThrowable<IllegalMonitorStateException> {
135 |             cleanUp(it)
136 |         }
137 |     }
138 | 
139 |     override fun KotlinKernelHost.onInterrupt() {
140 |         println(
141 |             cleanUp(InterruptedException("Kernel was interrupted."))
142 |         )
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/jupyter/src/main/resources/table.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |     --background: #fff;
  3 |     --background-odd: #f5f5f5;
  4 |     --background-hover: #d9edfd;
  5 |     --header-text-color: #474747;
  6 |     --text-color: #848484;
  7 |     --text-color-dark: #000;
  8 |     --text-color-medium: #737373;
  9 |     --text-color-pale: #b3b3b3;
 10 |     --inner-border-color: #aaa;
 11 |     --bold-border-color: #000;
 12 |     --link-color: #296eaa;
 13 |     --link-color-pale: #296eaa;
 14 |     --link-hover: #1a466c;
 15 | }
 16 | 
 17 | :root[theme="dark"], :root [data-jp-theme-light="false"]{
 18 |     --background: #303030;
 19 |     --background-odd: #3c3c3c;
 20 |     --background-hover: #464646;
 21 |     --header-text-color: #dddddd;
 22 |     --text-color: #b3b3b3;
 23 |     --text-color-dark: #dddddd;
 24 |     --text-color-medium: #b2b2b2;
 25 |     --text-color-pale: #737373;
 26 |     --inner-border-color: #707070;
 27 |     --bold-border-color: #777777;
 28 |     --link-color: #008dc0;
 29 |     --link-color-pale: #97e1fb;
 30 |     --link-hover: #00688e;
 31 | }
 32 | 
 33 | table.dataset {
 34 |     font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
 35 |     font-size: 12px;
 36 |     background-color: var(--background);
 37 |     color: var(--text-color);
 38 |     border: none;
 39 |     border-collapse: collapse;
 40 | }
 41 | 
 42 | table.dataset th, td {
 43 |     padding: 6px;
 44 |     border: 1px solid transparent;
 45 |     text-align: left;
 46 | }
 47 | 
 48 | table.dataset th {
 49 |     background-color: var(--background);
 50 |     color: var(--header-text-color);
 51 | }
 52 | 
 53 | table.dataset td {
 54 |     vertical-align: top;
 55 | }
 56 | 
 57 | table.dataset th.bottomBorder {
 58 |     border-bottom-color: var(--bold-border-color);
 59 | }
 60 | 
 61 | table.dataset tbody > tr:nth-child(odd) {
 62 |     background: var(--background-odd);
 63 | }
 64 | 
 65 | table.dataset tbody > tr:nth-child(even) {
 66 |     background: var(--background);
 67 | }
 68 | 
 69 | table.dataset tbody > tr:hover {
 70 |     background: var(--background-hover);
 71 | }
 72 | 
 73 | table.dataset a {
 74 |     cursor: pointer;
 75 |     color: var(--link-color);
 76 |     text-decoration: none;
 77 | }
 78 | 
 79 | table.dataset tr:hover > td a {
 80 |     color: var(--link-color-pale);
 81 | }
 82 | 
 83 | table.dataset a:hover {
 84 |     color: var(--link-hover);
 85 |     text-decoration: underline;
 86 | }
 87 | 
 88 | table.dataset img {
 89 |     max-width: fit-content;
 90 | }
 91 | 
 92 | table.dataset th.complex {
 93 |     background-color: var(--background);
 94 |     border: 1px solid var(--background);
 95 | }
 96 | 
 97 | table.dataset .leftBorder {
 98 |     border-left-color: var(--inner-border-color);
 99 | }
100 | 
101 | table.dataset .rightBorder {
102 |     border-right-color: var(--inner-border-color);
103 | }
104 | 
105 | table.dataset .rightAlign {
106 |     text-align: right;
107 | }
108 | 
109 | table.dataset .expanderSvg {
110 |     width: 8px;
111 |     height: 8px;
112 |     margin-right: 3px;
113 | }
114 | 
115 | table.dataset .expander {
116 |     display: flex;
117 |     align-items: center;
118 | }
119 | 
120 | /* formatting */
121 | 
122 | table.dataset .null {
123 |     color: var(--text-color-pale);
124 | }
125 | 
126 | table.dataset .structural {
127 |     color: var(--text-color-medium);
128 |     font-weight: bold;
129 | }
130 | 
131 | table.dataset .datasetCaption {
132 |     font-weight: bold;
133 | }
134 | 
135 | table.dataset .numbers {
136 |     color: var(--text-color-dark);
137 | }
138 | 
139 | table.dataset td:hover .formatted .structural, .null {
140 |     color: var(--text-color-dark);
141 | }
142 | 
143 | table.dataset tr:hover .formatted .structural, .null {
144 |     color: var(--text-color-dark);
145 | }
146 | 
147 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/build.gradle.kts:
--------------------------------------------------------------------------------
  1 | @file:Suppress("UnstableApiUsage", "NOTHING_TO_INLINE")
  2 | 
  3 | import com.igormaznitsa.jcp.gradle.JcpTask
  4 | import com.vanniktech.maven.publish.JavadocJar.Dokka
  5 | import com.vanniktech.maven.publish.KotlinJvm
  6 | import org.jetbrains.dokka.gradle.AbstractDokkaLeafTask
  7 | 
  8 | plugins {
  9 |     kotlin
 10 |     dokka
 11 |     mavenPublishBase
 12 |     jcp
 13 |     idea
 14 | }
 15 | 
 16 | group = Versions.groupID
 17 | version = Versions.project
 18 | 
 19 | 
 20 | repositories {
 21 |     mavenCentral()
 22 | }
 23 | 
 24 | tasks.withType<Test>().configureEach {
 25 |     useJUnitPlatform()
 26 |     maxHeapSize = "8g"
 27 | }
 28 | 
 29 | dependencies {
 30 | 
 31 |     with(Projects) {
 32 |         api(
 33 |             core,
 34 |             scalaTuplesInKotlin,
 35 |         )
 36 |     }
 37 | 
 38 |     with(Dependencies) {
 39 | 
 40 |         // https://github.com/FasterXML/jackson-bom/issues/52
 41 |         if (Versions.spark == "3.3.1") implementation(jacksonDatabind)
 42 | 
 43 |         implementation(
 44 |             kotlinStdLib,
 45 |             reflect,
 46 |             sparkSql,
 47 |             sparkStreaming,
 48 |             hadoopClient,
 49 |         )
 50 | 
 51 |         testImplementation(
 52 |             sparkStreamingKafka,
 53 |             kotest,
 54 |             kotestTestcontainers,
 55 |             klaxon,
 56 |             atrium,
 57 |             sparkStreaming,
 58 |             kafkaStreamsTestUtils,
 59 |             sparkMl,
 60 |         )
 61 |     }
 62 | }
 63 | 
 64 | // Setup preprocessing with JCP for main sources
 65 | 
 66 | val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories
 67 | 
 68 | val preprocessMain by tasks.creating(JcpTask::class) {
 69 |     sources.set(kotlinMainSources)
 70 |     clearTarget.set(true)
 71 |     fileExtensions.set(listOf("kt"))
 72 |     vars.set(Versions.versionMap)
 73 |     outputs.upToDateWhen { target.get().exists() }
 74 | }
 75 | 
 76 | tasks.compileKotlin {
 77 |     dependsOn(preprocessMain)
 78 |     outputs.upToDateWhen {
 79 |         preprocessMain.outcomingFiles.files.isEmpty()
 80 |     }
 81 | 
 82 |     doFirst {
 83 |         kotlin {
 84 |             sourceSets {
 85 |                 main {
 86 |                     kotlin.setSrcDirs(listOf(preprocessMain.target.get()))
 87 |                 }
 88 |             }
 89 |         }
 90 |     }
 91 | 
 92 |     doLast {
 93 |         kotlin {
 94 |             sourceSets {
 95 |                 main {
 96 |                     kotlin.setSrcDirs(kotlinMainSources)
 97 |                 }
 98 |             }
 99 |         }
100 |     }
101 | }
102 | 
103 | // Setup preprocessing with JCP for test sources
104 | 
105 | val kotlinTestSources = kotlin.sourceSets.test.get().kotlin.sourceDirectories
106 | 
107 | val preprocessTest by tasks.creating(JcpTask::class) {
108 |     sources.set(kotlinTestSources)
109 |     clearTarget.set(true)
110 |     fileExtensions.set(listOf("kt"))
111 |     vars.set(Versions.versionMap)
112 |     outputs.upToDateWhen { target.get().exists() }
113 | }
114 | 
115 | tasks.compileTestKotlin {
116 |     dependsOn(preprocessTest)
117 |     outputs.upToDateWhen {
118 |         preprocessTest.outcomingFiles.files.isEmpty()
119 |     }
120 | 
121 |     doFirst {
122 |         kotlin {
123 |             sourceSets {
124 |                 test {
125 |                     kotlin.setSrcDirs(listOf(preprocessTest.target.get()))
126 |                 }
127 |             }
128 |         }
129 |     }
130 | 
131 |     doLast {
132 |         kotlin {
133 |             sourceSets {
134 |                 test {
135 |                     kotlin.setSrcDirs(kotlinTestSources)
136 |                 }
137 |             }
138 |         }
139 |     }
140 | }
141 | 
142 | kotlin {
143 |     jvmToolchain {
144 |         languageVersion.set(
145 |             JavaLanguageVersion.of(Versions.jvmTarget)
146 |         )
147 |     }
148 | }
149 | 
150 | tasks.withType<AbstractDokkaLeafTask> {
151 |     dokkaSourceSets {
152 |         all {
153 |             sourceRoot(preprocessMain.target.get())
154 |         }
155 |     }
156 | }
157 | 
158 | mavenPublishing {
159 |     configure(KotlinJvm(Dokka("dokkaHtml")))
160 | }
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/DataStreamWriter.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2022 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.api
21 | 
22 | import org.apache.spark.api.java.function.VoidFunction2
23 | import org.apache.spark.sql.Dataset
24 | import org.apache.spark.sql.streaming.DataStreamWriter
25 | 
26 | /**
27 |  * :: Experimental ::
28 |  *
29 |  * (Scala-specific) Sets the output of the streaming query to be processed using the provided
30 |  * function. This is supported only in the micro-batch execution modes (that is, when the
31 |  * trigger is not continuous). In every micro-batch, the provided function will be called in
32 |  * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier.
33 |  * The batchId can be used to deduplicate and transactionally write the output
34 |  * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed
35 |  * to be exactly the same for the same batchId (assuming all operations are deterministic
36 |  * in the query).
37 |  *
38 |  * @since 2.4.0
39 |  */
40 | fun <T> DataStreamWriter<T>.forEachBatch(
41 |     func: (batch: Dataset<T>, batchId: Long) -> Unit,
42 | ): DataStreamWriter<T> = foreachBatch(VoidFunction2(func))
43 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/GroupState.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: API for Spark 3.0+ (Scala 2.12)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2021 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | 
21 | /**
22 |  * This file contains some helper functions to more easily work with [GroupState] from Kotlin.
23 |  */
24 | 
25 | package org.jetbrains.kotlinx.spark.api
26 | 
27 | import org.apache.spark.sql.streaming.GroupState
28 | import kotlin.reflect.KProperty
29 | 
30 | /**
31 |  * (Kotlin-specific)
32 |  * Returns the group state value if it exists, else `null`.
33 |  * This is comparable to [GroupState.getOption], but instead utilises Kotlin's nullability features
34 |  * to get the same result.
35 |  */
36 | fun <S> GroupState<S>.getOrNull(): S? = if (exists()) get() else null
37 | 
38 | /**
39 |  * (Kotlin-specific)
40 |  * Allows the group state object to be used as a delegate. Will be `null` if it does not exist.
41 |  *
42 |  * For example:
43 |  * ```kotlin
44 |  * groupedDataset.mapGroupsWithState(GroupStateTimeout.NoTimeout()) { key, values, state: GroupState<Int> ->
45 |  *     var s by state
46 |  *     ...
47 |  * }
48 |  * ```
49 |  */
50 | operator fun <S> GroupState<S>.getValue(thisRef: Any?, property: KProperty<*>): S? = getOrNull()
51 | 
52 | /**
53 |  * (Kotlin-specific)
54 |  * Allows the group state object to be used as a delegate. Will be `null` if it does not exist.
55 |  *
56 |  * For example:
57 |  * ```kotlin
58 |  * groupedDataset.mapGroupsWithState(GroupStateTimeout.NoTimeout()) { key, values, state: GroupState<Int> ->
59 |  *     var s by state
60 |  *     ...
61 |  * }
62 |  * ```
63 |  */
64 | operator fun <S> GroupState<S>.setValue(thisRef: Any?, property: KProperty<*>, value: S?): Unit = update(value)
65 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Iterators.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2020 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | 
 21 | /**
 22 |  * This file contains several ways to wrap and modify iterators lazily.
 23 |  * This includes mapping, filtering, and partitioning.
 24 |  */
 25 | 
 26 | package org.jetbrains.kotlinx.spark.api
 27 | 
 28 | /** Partitions the values of the iterator lazily in groups of [size]. */
 29 | class PartitioningIterator<T>(
 30 |     private val source: Iterator<T>,
 31 |     private val size: Int,
 32 |     private val cutIncomplete: Boolean = false,
 33 | ) : AbstractIterator<List<T>>() {
 34 | 
 35 |     override fun computeNext() {
 36 |         if (!source.hasNext()) return done()
 37 |         val interimResult = arrayListOf<T>()
 38 |         repeat(size) {
 39 |             if (source.hasNext())
 40 |                 interimResult.add(source.next())
 41 |             else
 42 |                 return if (cutIncomplete)
 43 |                     done()
 44 |                 else
 45 |                     setNext(interimResult)
 46 |         }
 47 |         setNext(interimResult)
 48 |     }
 49 | 
 50 | }
 51 | 
 52 | /** Maps the values of the iterator lazily using [func]. */
 53 | @Deprecated("[Iterator.map] now uses the [Sequence.map] function")
 54 | class MappingIterator<T, R>(
 55 |     private val source: Iterator<T>,
 56 |     private val func: (T) -> R,
 57 | ) : AbstractIterator<R>() {
 58 | 
 59 |     override fun computeNext(): Unit =
 60 |         if (source.hasNext())
 61 |             setNext(func(source.next()))
 62 |         else
 63 |             done()
 64 | }
 65 | 
 66 | /** Filters the values of the iterator lazily using [predicate]. */
 67 | @Deprecated("[Iterator.filter] now uses the [Sequence.filter] function")
 68 | class FilteringIterator<T>(
 69 |     private val source: Iterator<T>,
 70 |     private val predicate: (T) -> Boolean,
 71 | ) : AbstractIterator<T>() {
 72 | 
 73 |     override fun computeNext() {
 74 |         while (source.hasNext()) {
 75 |             val next = source.next()
 76 |             if (predicate(next)) {
 77 |                 setNext(next)
 78 |                 return
 79 |             }
 80 |         }
 81 |         done()
 82 |     }
 83 | 
 84 | }
 85 | 
 86 | /** Allows to transform an Iterator using the Sequence functions. */
 87 | fun <T, R> Iterator<T>.transformAsSequence(func: Sequence<T>.() -> Sequence<R>): Iterator<R> =
 88 |     func(this.asSequence()).iterator()
 89 | 
 90 | /** Flattens iterator. */
 91 | fun <T> Iterator<Iterator<T>>.flatten(): Iterator<T> = transformAsSequence { flatMap { it.asSequence() } }
 92 | 
 93 | /** Maps the values of the iterator lazily using [func]. */
 94 | fun <T, R> Iterator<T>.map(func: (T) -> R): Iterator<R> = transformAsSequence { map(func) }
 95 | 
 96 | /** Filters the values of the iterator lazily using [predicate]. */
 97 | fun <T> Iterator<T>.filter(predicate: (T) -> Boolean): Iterator<T> = transformAsSequence { filter(predicate) }
 98 | 
 99 | /** Partitions the values of the iterator lazily in groups of [size]. */
100 | fun <T> Iterator<T>.partition(size: Int, cutIncomplete: Boolean = false): Iterator<List<T>> =
101 |     PartitioningIterator(this, size, cutIncomplete)
102 | 
103 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Rdd.kt:
--------------------------------------------------------------------------------
 1 | package org.jetbrains.kotlinx.spark.api
 2 | 
 3 | import org.apache.spark.api.java.JavaRDD
 4 | import org.apache.spark.api.java.JavaSparkContext
 5 | import java.io.Serializable
 6 | 
 7 | /**
 8 |  * Utility method to create an RDD from a list.
 9 |  * NOTE: [T] must be [Serializable].
10 |  */
11 | fun <T> JavaSparkContext.rddOf(
12 |     vararg elements: T,
13 |     numSlices: Int = defaultParallelism(),
14 | ): JavaRDD<T> = parallelize(elements.toList(), numSlices)
15 | 
16 | /**
17 |  * Utility method to create an RDD from a list.
18 |  * NOTE: [T] must be [Serializable].
19 |  */
20 | fun <T> JavaSparkContext.toRDD(
21 |     elements: List<T>,
22 |     numSlices: Int = defaultParallelism(),
23 | ): JavaRDD<T> = parallelize(elements, numSlices)
24 | 
25 | /**
26 |  * Returns the minimum element from this RDD as defined by the specified
27 |  * [Comparator].
28 |  *
29 |  * @return the minimum of the RDD
30 |  */
31 | fun <T : Comparable<T>> JavaRDD<T>.min(): T = min(
32 |     object : Comparator<T>, Serializable {
33 |         override fun compare(o1: T, o2: T): Int = o1.compareTo(o2)
34 |     }
35 | )
36 | 
37 | /**
38 |  * Returns the maximum element from this RDD as defined by the specified
39 |  * [Comparator].
40 |  *
41 |  * @return the maximum of the RDD
42 |  */
43 | fun <T : Comparable<T>> JavaRDD<T>.max(): T = max(
44 |     object : Comparator<T>, Serializable {
45 |         override fun compare(o1: T, o2: T): Int = o1.compareTo(o2)
46 |     }
47 | )
48 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/RddDouble.kt:
--------------------------------------------------------------------------------
  1 | package org.jetbrains.kotlinx.spark.api
  2 | 
  3 | import org.apache.spark.api.java.JavaDoubleRDD
  4 | import org.apache.spark.api.java.JavaRDD
  5 | import org.apache.spark.partial.BoundedDouble
  6 | import org.apache.spark.partial.PartialResult
  7 | import org.apache.spark.rdd.RDD
  8 | import org.apache.spark.util.StatCounter
  9 | import scala.Tuple2
 10 | 
 11 | /** Utility method to convert [JavaRDD]<[Number]> to [JavaDoubleRDD]. */
 12 | @Suppress("UNCHECKED_CAST")
 13 | inline fun <reified T : Number> JavaRDD<T>.toJavaDoubleRDD(): JavaDoubleRDD =
 14 |     JavaDoubleRDD.fromRDD(
 15 |         when (T::class) {
 16 |             Double::class -> this
 17 |             else -> map(Number::toDouble)
 18 |         }.rdd() as RDD<Any>
 19 |     )
 20 | 
 21 | /** Utility method to convert [JavaDoubleRDD] to [JavaRDD]<[Double]>. */
 22 | @Suppress("UNCHECKED_CAST")
 23 | fun JavaDoubleRDD.toDoubleRDD(): JavaRDD<Double> =
 24 |     JavaDoubleRDD.toRDD(this).toJavaRDD() as JavaRDD<Double>
 25 | 
 26 | /** Add up the elements in this RDD. */
 27 | inline fun <reified T : Number> JavaRDD<T>.sum(): Double = toJavaDoubleRDD().sum()
 28 | 
 29 | /**
 30 |  * Return a [org.apache.spark.util.StatCounter] object that captures the mean, variance and
 31 |  * count of the RDD's elements in one operation.
 32 |  */
 33 | inline fun <reified T : Number> JavaRDD<T>.stats(): StatCounter = toJavaDoubleRDD().stats()
 34 | 
 35 | /** Compute the mean of this RDD's elements. */
 36 | inline fun <reified T : Number> JavaRDD<T>.mean(): Double = toJavaDoubleRDD().mean()
 37 | 
 38 | /** Compute the population variance of this RDD's elements. */
 39 | inline fun <reified T : Number> JavaRDD<T>.variance(): Double = toJavaDoubleRDD().variance()
 40 | 
 41 | /** Compute the population standard deviation of this RDD's elements. */
 42 | inline fun <reified T : Number> JavaRDD<T>.stdev(): Double = toJavaDoubleRDD().stdev()
 43 | 
 44 | /**
 45 |  * Compute the sample standard deviation of this RDD's elements (which corrects for bias in
 46 |  * estimating the standard deviation by dividing by N-1 instead of N).
 47 |  */
 48 | inline fun <reified T : Number> JavaRDD<T>.sampleStdev(): Double = toJavaDoubleRDD().sampleStdev()
 49 | 
 50 | /**
 51 |  * Compute the sample variance of this RDD's elements (which corrects for bias in
 52 |  * estimating the variance by dividing by N-1 instead of N).
 53 |  */
 54 | inline fun <reified T : Number> JavaRDD<T>.sampleVariance(): Double = toJavaDoubleRDD().sampleVariance()
 55 | 
 56 | /** Compute the population standard deviation of this RDD's elements. */
 57 | inline fun <reified T : Number> JavaRDD<T>.popStdev(): Double = toJavaDoubleRDD().popStdev()
 58 | 
 59 | /** Compute the population variance of this RDD's elements. */
 60 | inline fun <reified T : Number> JavaRDD<T>.popVariance(): Double = toJavaDoubleRDD().popVariance()
 61 | 
 62 | /** Approximate operation to return the mean within a timeout. */
 63 | inline fun <reified T : Number> JavaRDD<T>.meanApprox(
 64 |     timeout: Long,
 65 |     confidence: Double = 0.95,
 66 | ): PartialResult<BoundedDouble> = toJavaDoubleRDD().meanApprox(timeout, confidence)
 67 | 
 68 | /** Approximate operation to return the sum within a timeout. */
 69 | inline fun <reified T : Number> JavaRDD<T>.sumApprox(
 70 |     timeout: Long,
 71 |     confidence: Double = 0.95,
 72 | ): PartialResult<BoundedDouble> = toJavaDoubleRDD().sumApprox(timeout, confidence)
 73 | 
 74 | /**
 75 |  * Compute a histogram of the data using bucketCount number of buckets evenly
 76 |  *  spaced between the minimum and maximum of the RDD. For example if the min
 77 |  *  value is 0 and the max is 100 and there are two buckets the resulting
 78 |  *  buckets will be `[0, 50)` `[50, 100]`. bucketCount must be at least 1
 79 |  * If the RDD contains infinity, NaN throws an exception
 80 |  * If the elements in RDD do not vary (max == min) always returns a single bucket.
 81 |  */
 82 | inline fun <reified T : Number> JavaRDD<T>.histogram(bucketCount: Int): Tuple2<DoubleArray, LongArray> =
 83 |     toJavaDoubleRDD().histogram(bucketCount)
 84 | 
 85 | /**
 86 |  * Compute a histogram using the provided buckets. The buckets are all open
 87 |  * to the right except for the last which is closed.
 88 |  *  e.g. for the array
 89 |  *  `[1, 10, 20, 50]` the buckets are `[1, 10) [10, 20) [20, 50]`
 90 |  *  e.g. ` <=x<10, 10<=x<20, 20<=x<=50`
 91 |  *  And on the input of 1 and 50 we would have a histogram of 1, 0, 1
 92 |  *
 93 |  * Note: If your histogram is evenly spaced (e.g. `[0, 10, 20, 30]`) this can be switched
 94 |  * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
 95 |  * to true.
 96 |  * buckets must be sorted and not contain any duplicates.
 97 |  * buckets array must be at least two elements
 98 |  * All NaN entries are treated the same. If you have a NaN bucket it must be
 99 |  * the maximum value of the last position and all NaN entries will be counted
100 |  * in that bucket.
101 |  */
102 | inline fun <reified T : Number> JavaRDD<T>.histogram(
103 |     buckets: Array<Double>,
104 |     evenBuckets: Boolean = false,
105 | ): LongArray = toJavaDoubleRDD().histogram(buckets, evenBuckets)


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Seq.kt:
--------------------------------------------------------------------------------
 1 | package org.jetbrains.kotlinx.spark.api
 2 | 
 3 | import scala.collection.immutable.`Seq$`.`MODULE$` as Seq
 4 | import scala.collection.immutable.Seq as Seq
 5 | import scala.collection.mutable.`Seq$`.`MODULE$` as MutableSeq
 6 | import scala.collection.mutable.Seq as MutableSeq
 7 | 
 8 | /** Returns a new empty immutable Seq. */
 9 | fun <T> emptySeq(): Seq<T> = Seq.empty<T>() as Seq<T>
10 | 
11 | /** Returns a new immutable Seq with the given elements. */
12 | fun <T> seqOf(vararg elements: T): Seq<T> =
13 |     if (elements.isEmpty())
14 |         emptySeq()
15 |     else
16 |         Seq.newBuilder<T>().apply {
17 |             for (it in elements)
18 |                 `$plus$eq`(it)
19 |         }.result() as Seq<T>
20 | 
21 | /** Returns a new mutable Seq with the given elements. */
22 | fun <T> emptyMutableSeq(): MutableSeq<T> = MutableSeq.empty<T>() as MutableSeq<T>
23 | 
24 | /** Returns a new mutable Seq with the given elements. */
25 | fun <T> mutableSeqOf(vararg elements: T): MutableSeq<T> =
26 |     if (elements.isEmpty())
27 |         emptyMutableSeq()
28 |     else
29 |         MutableSeq.newBuilder<T>().apply {
30 |             for (it in elements)
31 |                 `$plus$eq`(it)
32 |         }.result() as MutableSeq<T>
33 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunction.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | @file:Suppress("unused")
 21 | 
 22 | package org.jetbrains.kotlinx.spark.api
 23 | 
 24 | import org.apache.spark.sql.*
 25 | import org.apache.spark.sql.types.DataType
 26 | import scala.collection.Seq
 27 | import java.io.Serializable
 28 | import kotlin.reflect.KClass
 29 | import kotlin.reflect.KProperty
 30 | import kotlin.reflect.full.isSubclassOf
 31 | import kotlin.reflect.full.primaryConstructor
 32 | import org.apache.spark.sql.expressions.UserDefinedFunction as SparkUserDefinedFunction
 33 | 
 34 | /** Unwraps [DataTypeWithClass]. */
 35 | fun DataType.unWrap(): DataType =
 36 |     when (this) {
 37 |         is DataTypeWithClass -> DataType.fromJson(dt().json())
 38 |         else -> this
 39 |     }
 40 | 
 41 | /**
 42 |  * Checks if [this] is of a valid type for a UDF, otherwise it throws a [TypeOfUDFParameterNotSupportedException]
 43 |  */
 44 | @PublishedApi
 45 | internal fun KClass<*>.checkForValidType(parameterName: String) {
 46 |     if (this == String::class || isSubclassOf(Seq::class)
 47 |     //#if scalaCompat < 2.13
 48 |     //$|| isSubclassOf(scala.collection.mutable.WrappedArray::class)
 49 |     //#endif
 50 |     )
 51 |         return // Most of the time we need strings or WrappedArrays/Seqs
 52 | 
 53 |     if (isSubclassOf(Iterable::class)
 54 |         || java.isArray
 55 |         || isSubclassOf(Char::class)
 56 |         || isSubclassOf(Map::class)
 57 |         || isSubclassOf(Array::class)
 58 |         || isSubclassOf(ByteArray::class)
 59 |         || isSubclassOf(CharArray::class)
 60 |         || isSubclassOf(ShortArray::class)
 61 |         || isSubclassOf(IntArray::class)
 62 |         || isSubclassOf(LongArray::class)
 63 |         || isSubclassOf(FloatArray::class)
 64 |         || isSubclassOf(DoubleArray::class)
 65 |         || isSubclassOf(BooleanArray::class)
 66 |     ) throw TypeOfUDFParameterNotSupportedException(this, parameterName)
 67 | }
 68 | 
 69 | /**
 70 |  * An exception thrown when the UDF is generated with illegal types for the parameters
 71 |  */
 72 | class TypeOfUDFParameterNotSupportedException(kClass: KClass<*>, parameterName: String) : IllegalArgumentException(
 73 |     "Parameter $parameterName is subclass of ${kClass.qualifiedName}. If you need to process an array use ${Seq::class.qualifiedName}. You can convert any typed array/list-like column using [asSeq()]."
 74 | )
 75 | 
 76 | @JvmName("arrayColumnAsSeq")
 77 | fun <DsType, T> TypedColumn<DsType, Array<T>>.asSeq(): TypedColumn<DsType, Seq<T>> = typed()
 78 | @JvmName("iterableColumnAsSeq")
 79 | fun <DsType, T, I : Iterable<T>> TypedColumn<DsType, I>.asSeq(): TypedColumn<DsType, Seq<T>> = typed()
 80 | @JvmName("byteArrayColumnAsSeq")
 81 | fun <DsType> TypedColumn<DsType, ByteArray>.asSeq(): TypedColumn<DsType, Seq<Byte>> = typed()
 82 | @JvmName("charArrayColumnAsSeq")
 83 | fun <DsType> TypedColumn<DsType, CharArray>.asSeq(): TypedColumn<DsType, Seq<Char>> = typed()
 84 | @JvmName("shortArrayColumnAsSeq")
 85 | fun <DsType> TypedColumn<DsType, ShortArray>.asSeq(): TypedColumn<DsType, Seq<Short>> = typed()
 86 | @JvmName("intArrayColumnAsSeq")
 87 | fun <DsType> TypedColumn<DsType, IntArray>.asSeq(): TypedColumn<DsType, Seq<Int>> = typed()
 88 | @JvmName("longArrayColumnAsSeq")
 89 | fun <DsType> TypedColumn<DsType, LongArray>.asSeq(): TypedColumn<DsType, Seq<Long>> = typed()
 90 | @JvmName("floatArrayColumnAsSeq")
 91 | fun <DsType> TypedColumn<DsType, FloatArray>.asSeq(): TypedColumn<DsType, Seq<Float>> = typed()
 92 | @JvmName("doubleArrayColumnAsSeq")
 93 | fun <DsType> TypedColumn<DsType, DoubleArray>.asSeq(): TypedColumn<DsType, Seq<Double>> = typed()
 94 | @JvmName("booleanArrayColumnAsSeq")
 95 | fun <DsType> TypedColumn<DsType, BooleanArray>.asSeq(): TypedColumn<DsType, Seq<Boolean>> = typed()
 96 | 
 97 | 
 98 | /**
 99 |  * Registers a user-defined function (UDF) with name, for a UDF that's already defined using the Dataset
100 |  * API (i.e. of type [NamedUserDefinedFunction]).
101 |  * @see UDFRegistration.register
102 |  */
103 | inline fun <Return, reified NamedUdf : NamedUserDefinedFunction<Return, *>> UDFRegistration.register(
104 |     namedUdf: NamedUdf,
105 | ): NamedUdf = namedUdf.copy(udf = register(namedUdf.name, namedUdf.udf))
106 | 
107 | inline fun <Return, reified NamedUdf : NamedUserDefinedFunction<Return, *>> UDFRegistration.register(
108 |     name: String,
109 |     udf: UserDefinedFunction<Return, NamedUdf>,
110 | ): NamedUdf = udf.withName(name).copy(udf = register(name, udf.udf))
111 | 
112 | /**
113 |  * Typed wrapper around [SparkUserDefinedFunction] with defined encoder.
114 |  *
115 |  * @param Return the return type of the udf
116 |  * @param NamedUdf a type reference to the named version of the [SparkUserDefinedFunction] implementing class
117 |  */
118 | sealed interface UserDefinedFunction<Return, NamedUdf> : Serializable {
119 |     val udf: SparkUserDefinedFunction
120 |     val encoder: Encoder<Return>
121 | 
122 |     /** Returns true when the UDF can return a nullable value. */
123 |     val nullable: Boolean get() = udf.nullable()
124 | 
125 |     /** Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the same input. */
126 |     val deterministic: Boolean get() = udf.deterministic()
127 | 
128 |     /** Converts this [UserDefinedFunction] to a [NamedUserDefinedFunction]. */
129 |     fun withName(name: String): NamedUdf
130 | 
131 |     /**
132 |      * Converts this [UserDefinedFunction] to a [NamedUserDefinedFunction].
133 |      * @see withName
134 |      */
135 |     operator fun getValue(thisRef: Any?, property: KProperty<*>): NamedUdf
136 | }
137 | 
138 | /**
139 |  * Typed and named wrapper around [SparkUserDefinedFunction] with defined encoder.
140 |  *
141 |  * @param Return    the return type of the udf
142 |  * @param This      a self reference to the named version of the [SparkUserDefinedFunction] implementing class.
143 |  *                  Unfortunately needed to allow functions to treat any [NamedTypedUserDefinedFunction] as a normal [TypedUserDefinedFunction].
144 |  */
145 | sealed interface NamedUserDefinedFunction<Return, This> : UserDefinedFunction<Return, This> {
146 |     val name: String
147 | }
148 | 
149 | /** Copy method for all [NamedUserDefinedFunction] functions. */
150 | inline fun <R, reified T : NamedUserDefinedFunction<R, *>> T.copy(
151 |     name: String = this.name,
152 |     udf: SparkUserDefinedFunction = this.udf,
153 |     encoder: Encoder<R> = this.encoder,
154 | ): T = T::class.primaryConstructor!!.run {
155 |     callBy(
156 |         parameters.associateWith {
157 |             when (it.name) {
158 |                 NamedUserDefinedFunction<*, *>::name.name -> name
159 |                 NamedUserDefinedFunction<*, *>::udf.name -> udf
160 |                 NamedUserDefinedFunction<*, *>::encoder.name -> encoder
161 |                 else -> error("Wrong arguments")
162 |             }
163 |         }
164 |     )
165 | }
166 | 
167 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt:
--------------------------------------------------------------------------------
  1 | package org.jetbrains.kotlinx.spark.api/*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2020 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | import ch.tutteli.atrium.api.fluent.en_GB.*
 21 | import ch.tutteli.atrium.api.verbs.expect
 22 | import io.kotest.core.spec.style.ShouldSpec
 23 | import io.kotest.matchers.shouldBe
 24 | import scala.collection.Seq
 25 | import java.io.Serializable
 26 | import kotlin.collections.Iterator
 27 | import scala.collection.Iterator as ScalaIterator
 28 | import scala.collection.Map as ScalaMap
 29 | import scala.collection.mutable.Map as ScalaMutableMap
 30 | 
 31 | class ApiTest : ShouldSpec({
 32 | 
 33 |     context("miscellaneous integration tests") {
 34 |         withSpark(props = mapOf("spark.sql.codegen.comments" to true)) {
 35 | 
 36 |             should("Create Seqs") {
 37 |                 spark.createDataset(seqOf(1, 2, 3), encoder())
 38 |                     .collectAsList() shouldBe listOf(1, 2, 3)
 39 | 
 40 | 
 41 |                 seqOf(1, 2, 3) shouldBe seqOf(1, 2, 3)
 42 |                 mutableSeqOf(1, 2, 3) shouldBe mutableSeqOf(1, 2, 3)
 43 | 
 44 |                 seqOf<Int>() shouldBe emptySeq<Int>()
 45 |                 mutableSeqOf<Int>() shouldBe emptyMutableSeq<Int>()
 46 |             }
 47 | 
 48 |             @OptIn(ExperimentalStdlibApi::class)
 49 |             should("broadcast variables") {
 50 |                 val largeList = (1..15).map { SomeClass(a = (it..15).toList().toIntArray(), b = it) }
 51 |                 val broadcast = spark.broadcast(largeList)
 52 |                 val broadcast2 = spark.broadcast(arrayOf(doubleArrayOf(1.0, 2.0, 3.0, 4.0)))
 53 | 
 54 |                 val result: List<Double> = listOf(1, 2, 3, 4, 5)
 55 |                     .toDS()
 56 |                     .mapPartitions { iterator ->
 57 |                         val receivedBroadcast = broadcast.value
 58 |                         val receivedBroadcast2 = broadcast2.value
 59 | 
 60 |                         buildList {
 61 |                             iterator.forEach {
 62 |                                 this.add(it + receivedBroadcast[it].b * receivedBroadcast2[0][0])
 63 |                             }
 64 |                         }.iterator()
 65 |                     }
 66 |                     .collectAsList()
 67 | 
 68 |                 expect(result).toContain.inOrder.only.values(3.0, 5.0, 7.0, 9.0, 11.0)
 69 |             }
 70 | 
 71 |             should("Handle JavaConversions in Kotlin") {
 72 |                 // Test the iterator conversion
 73 |                 val scalaIterator: ScalaIterator<String> = listOf("test1", "test2").iterator().asScalaIterator()
 74 |                 scalaIterator.next() shouldBe "test1"
 75 | 
 76 |                 val kotlinIterator: Iterator<String> = scalaIterator.asKotlinIterator()
 77 |                 kotlinIterator.next() shouldBe "test2"
 78 | 
 79 | 
 80 |                 val scalaMap: ScalaMap<Int, String> = mapOf(1 to "a", 2 to "b").asScalaMap()
 81 |                 scalaMap.get(1).get() shouldBe "a"
 82 |                 scalaMap.get(2).get() shouldBe "b"
 83 | 
 84 |                 val kotlinMap: Map<Int, String> = scalaMap.asKotlinMap()
 85 |                 kotlinMap[1] shouldBe "a"
 86 |                 kotlinMap[2] shouldBe "b"
 87 | 
 88 | 
 89 |                 val scalaMutableMap: ScalaMutableMap<Int, String> = mutableMapOf(1 to "a").asScalaMutableMap()
 90 |                 scalaMutableMap.get(1).get() shouldBe "a"
 91 | 
 92 |                 scalaMutableMap.put(2, "b")
 93 | 
 94 |                 val kotlinMutableMap: MutableMap<Int, String> = scalaMutableMap.asKotlinMutableMap()
 95 |                 kotlinMutableMap[1] shouldBe "a"
 96 |                 kotlinMutableMap[2] shouldBe "b"
 97 | 
 98 |                 val scalaSeq: Seq<String> = listOf("a", "b").iterator().asScalaIterator().toSeq()
 99 |                 scalaSeq.take(1).toList().last() shouldBe "a"
100 |                 scalaSeq.take(2).toList().last() shouldBe "b"
101 | 
102 |                 val kotlinList: List<String> = scalaSeq.asKotlinList()
103 |                 kotlinList.first() shouldBe "a"
104 |                 kotlinList.last() shouldBe "b"
105 |             }
106 | 
107 |             should("Map iterators") {
108 |                 val data = (1..50).toList()
109 |                 val iterator = iterator { yieldAll(data) }
110 |                     .map { it.toString() }
111 | 
112 |                 iterator.asSequence().toList() shouldBe data.map { it.toString() }
113 |             }
114 | 
115 |             should("Filter iterators") {
116 |                 val data = (1..50).toList()
117 |                 val iterator = iterator { yieldAll(data) }
118 |                     .filter { it % 2 == 0 }
119 | 
120 |                 iterator.asSequence().toList() shouldBe data.filter { it % 2 == 0 }
121 |             }
122 | 
123 |             should("Partition iterators") {
124 |                 val data = (1..50).toList()
125 | 
126 |                 val iterator1 = iterator { yieldAll(data) }
127 |                     .partition(8, cutIncomplete = false)
128 |                 val result1 = iterator1.asSequence().toList()
129 |                 result1.size shouldBe (50 / 8 + 1)
130 |                 result1.map { it.size }.distinct().size shouldBe 2 // two difference sizes should exist, 8 and the rest
131 | 
132 |                 val iterator2 = iterator { yieldAll(data) }
133 |                     .partition(8, cutIncomplete = true)
134 | 
135 |                 val result2 = iterator2.asSequence().toList()
136 |                 result2.size shouldBe (50 / 8)
137 |                 result2.forEach { it.size shouldBe 8 }
138 |             }
139 | 
140 |             should("Flatten iterators") {
141 |                 val data = (1..50).toList()
142 |                 val (data1, data2) = data.partition { it <= 25 }
143 |                 val iterator = iterator {
144 |                     yield(data1.iterator())
145 |                     yield(data2.iterator())
146 |                 }.flatten()
147 | 
148 |                 iterator.asSequence().toList() shouldBe data
149 |             }
150 | 
151 |             should("Flatmap iterators using transformAsSequence") {
152 |                 val data = (1..50).toList()
153 |                 val iterator = data.iterator()
154 |                     .transformAsSequence {
155 |                         flatMap {
156 |                             listOf(it.toDouble(), it + 0.5)
157 |                         }
158 |                     }
159 | 
160 |                 iterator.asSequence().toList() shouldBe data.flatMap { listOf(it.toDouble(), it + 0.5) }
161 |             }
162 |         }
163 |     }
164 | })
165 | 
166 | 
167 | // (data) class must be Serializable to be broadcast
168 | data class SomeClass(val a: IntArray, val b: Int) : Serializable
169 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaStreamingTest.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  *
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  *
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api
 21 | 
 22 | import io.kotest.core.Tag
 23 | import io.kotest.core.extensions.install
 24 | import io.kotest.core.spec.style.FunSpec
 25 | import io.kotest.extensions.testcontainers.TestContainerExtension
 26 | import io.kotest.extensions.testcontainers.kafka.createStringStringConsumer
 27 | import io.kotest.extensions.testcontainers.kafka.createStringStringProducer
 28 | import io.kotest.matchers.collections.shouldContain
 29 | import io.kotest.matchers.collections.shouldContainAll
 30 | import org.apache.kafka.clients.consumer.ConsumerConfig
 31 | import org.apache.kafka.clients.consumer.ConsumerRecord
 32 | import org.apache.kafka.clients.producer.ProducerRecord
 33 | import org.apache.kafka.common.serialization.StringDeserializer
 34 | import org.apache.spark.streaming.Durations
 35 | import org.apache.spark.streaming.api.java.JavaInputDStream
 36 | import org.apache.spark.streaming.kafka010.ConsumerStrategies
 37 | import org.apache.spark.streaming.kafka010.KafkaUtils
 38 | import org.apache.spark.streaming.kafka010.LocationStrategies
 39 | import org.jetbrains.kotlinx.spark.api.tuples.*
 40 | import org.testcontainers.containers.ContainerLaunchException
 41 | import org.testcontainers.containers.KafkaContainer
 42 | import org.testcontainers.utility.DockerImageName
 43 | import scala.Tuple3
 44 | import java.io.Serializable
 45 | import java.time.Duration
 46 | 
 47 | object Kafka : Tag()
 48 | 
 49 | class KafkaStreamingTest : FunSpec() {
 50 | 
 51 |     init {
 52 | 
 53 |         tags(Kafka)
 54 | 
 55 |         val kafka = run {
 56 |             var attempts = 0
 57 |             while (true) {
 58 |                 try {
 59 |                     return@run install(
 60 |                         TestContainerExtension(
 61 |                             KafkaContainer(DockerImageName.parse("confluentinc/cp-kafka:7.0.1"))
 62 |                         )
 63 |                     ) {
 64 |                         withEmbeddedZookeeper()
 65 |                         withEnv("KAFKA_AUTO_CREATE_TOPICS_ENABLE", "true")
 66 |                     }
 67 |                 } catch (e: ContainerLaunchException) {
 68 |                     attempts++
 69 |                     if (attempts > 10) throw e
 70 |                 }
 71 |             }
 72 |             @Suppress("UNREACHABLE_CODE")
 73 |             error("Unreachable")
 74 |         }
 75 | 
 76 |         println(kafka.bootstrapServers)
 77 |         test("Streaming should support kafka") {
 78 |             val topic1 = "test1"
 79 |             val topic2 = "test2"
 80 | 
 81 |             val resultLists = mapOf(
 82 |                 topic1 to listOf(
 83 |                     "Hello" X 1,
 84 |                     "this" X 1,
 85 |                     "is" X 1,
 86 |                     "a" X 1,
 87 |                     "test" X 3,
 88 |                 ),
 89 |                 topic2 to listOf(
 90 |                     "This" X 1,
 91 |                     "is" X 1,
 92 |                     "also" X 2,
 93 |                     "a" X 1,
 94 |                     "test" X 2,
 95 |                     "something" X 1,
 96 |                 )
 97 |             )
 98 |             val data = arrayListOf<List<Tuple3<String, String, Int>>>()
 99 | 
100 |             withSparkStreaming(
101 |                 batchDuration = Durations.milliseconds(1000),
102 |                 appName = "KotlinDirectKafkaWordCount",
103 |                 timeout = 10_000L,
104 |                 master = "local"
105 |             ) {
106 | 
107 |                 setRunAfterStart {
108 |                     val producer = autoClose(kafka.createStringStringProducer())
109 |                     producer.send(ProducerRecord(topic1, "Hello this is a test test test"))
110 |                     producer.send(ProducerRecord(topic2, "This is also also a test test something"))
111 |                 }
112 | 
113 |                 val kafkaParams: Map<String, Serializable> = mapOf(
114 |                     ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG to "${kafka.host}:${kafka.getMappedPort(KafkaContainer.KAFKA_PORT)}",
115 |                     ConsumerConfig.GROUP_ID_CONFIG to "consumer-group",
116 |                     ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG to StringDeserializer::class.java,
117 |                     ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG to StringDeserializer::class.java,
118 |                 )
119 |                 // Create direct kafka stream with brokers and topics
120 |                 val messages: JavaInputDStream<ConsumerRecord<String, String>> = KafkaUtils.createDirectStream(
121 |                     ssc,
122 |                     LocationStrategies.PreferBrokers(),
123 |                     ConsumerStrategies.Subscribe(setOf(topic1, topic2), kafkaParams),
124 |                 )
125 | 
126 |                 // Get the lines, split them into words, count the words and print
127 | 
128 |                 val wordCounts = messages
129 |                     .map { it.topic() X it.value() }
130 |                     .flatMapValues { it.split(" ").iterator() }
131 |                     .map { t(it, 1) }
132 |                     .reduceByKey { a: Int, b: Int -> a + b }
133 |                     .map { (tup, counter) -> tup + counter }
134 | 
135 | 
136 |                 wordCounts.foreachRDD { rdd, _ ->
137 |                     data.add(rdd.collect())
138 |                 }
139 |             }
140 | 
141 |             val resultList = resultLists.flatMap { (topic, tuples) ->
142 |                 tuples.map { it.prependedBy(topic) }
143 |             }
144 |             data.flatten() shouldContainAll resultList
145 |         }
146 |     }
147 | }


--------------------------------------------------------------------------------
/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2020 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.api
21 | 
22 | import io.kotest.core.config.AbstractProjectConfig
23 | 
24 | @Suppress("unused")
25 | object ProjectConfig : AbstractProjectConfig() {
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt:
--------------------------------------------------------------------------------
  1 | package org.jetbrains.kotlinx.spark.api
  2 | 
  3 | import io.kotest.core.spec.style.ShouldSpec
  4 | import io.kotest.matchers.collections.shouldContainAll
  5 | import io.kotest.matchers.shouldBe
  6 | import org.apache.spark.api.java.JavaRDD
  7 | import org.jetbrains.kotlinx.spark.api.tuples.*
  8 | import scala.Tuple2
  9 | 
 10 | class RddTest : ShouldSpec({
 11 |     context("RDD extension functions") {
 12 | 
 13 |         withSpark(logLevel = SparkLogLevel.DEBUG) {
 14 | 
 15 |             context("Key/value") {
 16 |                 should("work with spark example") {
 17 |                     val rdd = rddOf(1, 1, 2, 2, 2, 3).map(Int::toString)
 18 | 
 19 |                     val pairs = rdd.map { it X 1 }
 20 |                     val counts = pairs.reduceByKey { a, b -> a + b }
 21 |                     val list = counts.collect().toList()
 22 |                     list.shouldContainAll("1" X 2, "2" X 3, "3" X 1)
 23 |                 }
 24 | 
 25 |                 should("Have handy functions") {
 26 |                     val rdd = rddOf(
 27 |                         1 X "a",
 28 |                         2 X "b",
 29 |                         3 X "c",
 30 |                         4 X "d",
 31 |                         5 X "e",
 32 |                         6 X "f",
 33 |                     )
 34 | 
 35 |                     //#if sparkMinor >= 3.1
 36 |                     val rangeFiltered: JavaRDD<Tuple2<Int, String>> = rdd.filterByRange(2..5)
 37 |                     rangeFiltered.collect().shouldContainAll(
 38 |                         2 X "b",
 39 |                         3 X "c",
 40 |                         4 X "d",
 41 |                         5 X "e",
 42 |                     )
 43 |                     //#endif
 44 | 
 45 |                     val result = rdd
 46 |                         .flatMapValues {
 47 |                             listOf(it + 1, it + 2, it + 3, it + 4).iterator()
 48 |                         }
 49 |                         .also {
 50 |                             it.countByKey().values.forEach { it shouldBe 4 }
 51 |                         }
 52 |                         .foldByKey("", String::plus) // (1,"a1a2a3a4") etc.
 53 |                         .mapValues { it.toSortedSet().fold("", String::plus) } // (1,"1234a") etc.
 54 |                         .map { it.swap() } // ("1234a",1) etc.
 55 |                         .mapKeys { it.take(4) } // ("1234",1) etc.
 56 |                         .groupByKey()
 57 |                         .mapValues { it.toList().sorted() } // ("1234",[1,2,3,4,5,6])
 58 |                         .collect()
 59 |                         .single()
 60 | 
 61 |                     result shouldBe t("1234", listOf(1, 2, 3, 4, 5, 6))
 62 |                 }
 63 |             }
 64 | 
 65 |             context("Double functions") {
 66 |                 should("get max/min") {
 67 |                     val rdd = rddOf(1, 1, 2, 2, 2, 3)
 68 | 
 69 |                     rdd.max() shouldBe 3.0
 70 |                     rdd.min() shouldBe 1.0
 71 |                 }
 72 | 
 73 |                 context("Work with any number") {
 74 | 
 75 |                     should("Work with Bytes") {
 76 |                         val data = listOf(1, 1, 2, 2, 2, 3).map(Int::toByte)
 77 |                         val rdd = data.toRDD()
 78 |                         rdd.sum() shouldBe data.sum().toDouble()
 79 |                     }
 80 | 
 81 |                     should("Work with Shorts") {
 82 |                         val data = listOf(1, 1, 2, 2, 2, 3).map(Int::toShort)
 83 |                         val rdd = data.toRDD()
 84 |                         rdd.sum() shouldBe data.sum().toDouble()
 85 |                     }
 86 | 
 87 |                     should("Work with Ints") {
 88 |                         val data = listOf(1, 1, 2, 2, 2, 3)
 89 |                         val rdd = data.toRDD()
 90 |                         rdd.sum() shouldBe data.sum().toDouble()
 91 |                     }
 92 | 
 93 |                     should("Work with Longs") {
 94 |                         val data = listOf(1, 1, 2, 2, 2, 3).map(Int::toLong)
 95 |                         val rdd = data.toRDD()
 96 |                         rdd.sum() shouldBe data.sum().toDouble()
 97 |                     }
 98 | 
 99 |                     should("Work with Floats") {
100 |                         val data = listOf(1, 1, 2, 2, 2, 3).map(Int::toFloat)
101 |                         val rdd = data.toRDD()
102 |                         rdd.sum() shouldBe data.sum().toDouble()
103 |                     }
104 | 
105 |                     should("Work with Doubles") {
106 |                         val data = listOf(1, 1, 2, 2, 2, 3).map(Int::toDouble)
107 |                         val rdd = data.toRDD().toJavaDoubleRDD()
108 |                         rdd.sum() shouldBe data.sum().toDouble()
109 |                     }
110 |                 }
111 |             }
112 |         }
113 |     }
114 | })


--------------------------------------------------------------------------------
/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UdtTest.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api
 21 | 
 22 | import io.kotest.core.spec.style.ShouldSpec
 23 | import io.kotest.matchers.shouldBe
 24 | import org.glassfish.jersey.internal.guava.MoreObjects
 25 | import org.apache.spark.ml.linalg.*
 26 | import org.apache.spark.sql.catalyst.InternalRow
 27 | import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 28 | import org.apache.spark.sql.types.*
 29 | import org.apache.spark.unsafe.types.UTF8String
 30 | import org.jetbrains.kotlinx.spark.api.tuples.t
 31 | import kotlin.reflect.jvm.jvmName
 32 | 
 33 | class UdtTest : ShouldSpec({
 34 |     context("udt") {
 35 |         withSpark {
 36 |             should("Recognize UDTs from libraries like MlLib") {
 37 |                 val input = t(
 38 |                     Vectors.dense(doubleArrayOf(1.0, 2.0, 3.0)),
 39 |                     DenseVector(doubleArrayOf(1.0, 2.0, 3.0)),
 40 |                     SparseVector(3, intArrayOf(0, 1, 2), doubleArrayOf(1.0, 2.0, 3.0)),
 41 |                     Matrices.eye(1),
 42 |                     DenseMatrix.eye(2),
 43 |                     SparseMatrix.speye(2),
 44 |                 )
 45 | 
 46 |                 val ds = dsOf(input)
 47 | 
 48 |                 ds.collectAsList().single() shouldBe input
 49 |             }
 50 | 
 51 |             should("Recognize locally registered UDTs with annotation") {
 52 |                 val input = t(
 53 |                     City("Amsterdam", 1),
 54 |                     City("Breda", 2),
 55 |                     City("Oosterhout", 3),
 56 |                 )
 57 | 
 58 |                 val ds = dsOf(input)
 59 | 
 60 |                 ds.collectAsList().single() shouldBe input
 61 |             }
 62 | 
 63 |             should("Recognize locally registered UDTs with register function") {
 64 |                 UDTRegistration.register(City::class.jvmName, CityUserDefinedType::class.jvmName)
 65 | 
 66 |                 val input = t(
 67 |                     City("Amsterdam", 1),
 68 |                     City("Breda", 2),
 69 |                     City("Oosterhout", 3),
 70 |                 )
 71 | 
 72 |                 val ds = dsOf(input)
 73 | 
 74 |                 ds.collectAsList().single() shouldBe input
 75 |             }
 76 | 
 77 |             should("Be able to create encoder from UDT too") {
 78 | 
 79 |                 val input = listOf(
 80 |                     City("Amsterdam", 1),
 81 |                     City("Breda", 2),
 82 |                     City("Oosterhout", 3),
 83 |                 )
 84 | 
 85 |                 val ds = input.toDS()
 86 | 
 87 |                 ds.collectAsList() shouldBe input
 88 |             }
 89 |         }
 90 |     }
 91 | })
 92 | 
 93 | class CityUserDefinedType : UserDefinedType<City>() {
 94 | 
 95 |     override fun sqlType(): DataType = DATA_TYPE
 96 | 
 97 |     override fun serialize(city: City): InternalRow = GenericInternalRow(2).apply {
 98 |         setInt(DEPT_NUMBER_INDEX, city.departmentNumber)
 99 |         update(NAME_INDEX, UTF8String.fromString(city.name))
100 |     }
101 | 
102 |     override fun deserialize(datum: Any): City =
103 |         if (datum is InternalRow)
104 |             City(
105 |                 name = datum.getString(NAME_INDEX),
106 |                 departmentNumber = datum.getInt(DEPT_NUMBER_INDEX),
107 |             )
108 |         else throw IllegalStateException("Unsupported conversion")
109 | 
110 |     override fun userClass(): Class<City> = City::class.java
111 | 
112 |     companion object {
113 |         private const val DEPT_NUMBER_INDEX = 0
114 |         private const val NAME_INDEX = 1
115 |         private val DATA_TYPE = DataTypes.createStructType(
116 |             arrayOf(
117 |                 DataTypes.createStructField(
118 |                     "departmentNumber",
119 |                     DataTypes.IntegerType,
120 |                     false,
121 |                     MetadataBuilder().putLong("maxNumber", 99).build(),
122 |                 ),
123 |                 DataTypes.createStructField("name", DataTypes.StringType, false)
124 |             )
125 |         )
126 |     }
127 | }
128 | 
129 | @SQLUserDefinedType(udt = CityUserDefinedType::class)
130 | class City(val name: String, val departmentNumber: Int) {
131 | 
132 |     override fun toString(): String =
133 |         MoreObjects
134 |             .toStringHelper(this)
135 |             .add("name", name)
136 |             .add("departmentNumber", departmentNumber)
137 |             .toString()
138 | 
139 |     override fun equals(other: Any?): Boolean {
140 |         if (this === other) return true
141 |         if (javaClass != other?.javaClass) return false
142 | 
143 |         other as City
144 | 
145 |         if (name != other.name) return false
146 |         if (departmentNumber != other.departmentNumber) return false
147 | 
148 |         return true
149 |     }
150 | 
151 |     override fun hashCode(): Int {
152 |         var result = name.hashCode()
153 |         result = 31 * result + departmentNumber
154 |         return result
155 |     }
156 | }
157 | 


--------------------------------------------------------------------------------
/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2020 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api.struct.model
 21 | 
 22 | import com.beust.klaxon.Converter
 23 | import com.beust.klaxon.JsonObject
 24 | import com.beust.klaxon.JsonValue
 25 | import com.beust.klaxon.Klaxon
 26 | 
 27 | private fun <T> Klaxon.convert(
 28 |     k: kotlin.reflect.KClass<*>,
 29 |     fromJson: (JsonValue) -> T,
 30 |     toJson: (T) -> String,
 31 |     isUnion: Boolean = false,
 32 | ) =
 33 |     this.converter(object : Converter {
 34 |         @Suppress("UNCHECKED_CAST")
 35 |         override fun toJson(value: Any) = toJson(value as T)
 36 | 
 37 |         override fun fromJson(jv: JsonValue) = fromJson(jv) as Any
 38 |         override fun canConvert(cls: Class<*>) = cls == k.java || (isUnion && cls.superclass == k.java)
 39 |     })
 40 | 
 41 | private val klaxon = Klaxon()
 42 |     .convert(JsonObject::class, { it.obj!! }, { it.toJsonString() })
 43 |     .convert(DataType::class, { DataType.fromJson(it) }, { it.toJson() }, true)
 44 |     .convert(ElementType::class, { ElementType.fromJson(it) }, { it.toJson() }, true)
 45 | 
 46 | data class Struct(
 47 |     val type: String,
 48 |     val fields: List<StructField>? = null,
 49 |     val containsNull: Boolean? = null,
 50 |     val elementType: ElementType? = null,
 51 | ) {
 52 |     public fun toJson() = klaxon.toJsonString(this)
 53 | 
 54 |     companion object {
 55 |         public fun fromJson(json: String) = klaxon.parse<Struct>(json)
 56 |     }
 57 | }
 58 | 
 59 | data class StructField(
 60 |     val name: String,
 61 |     val type: DataType,
 62 |     val nullable: Boolean,
 63 |     val metadata: Metadata,
 64 | )
 65 | 
 66 | typealias Metadata = JsonObject
 67 | 
 68 | sealed class DataType {
 69 |     data class StructType(val value: Struct) : DataType()
 70 |     data class TypeName(val value: String) : DataType()
 71 | 
 72 |     public fun toJson(): String = klaxon.toJsonString(when (this) {
 73 |         is StructType -> this.value
 74 |         is TypeName -> this.value
 75 |     })
 76 | 
 77 |     companion object {
 78 |         public fun fromJson(jv: JsonValue): DataType = when (jv.inside) {
 79 |             is JsonObject -> StructType(jv.obj?.let { klaxon.parseFromJsonObject<Struct>(it) }!!)
 80 |             is String -> TypeName(jv.string!!)
 81 |             else -> throw IllegalArgumentException()
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | sealed class ElementType {
 87 |     data class SimpleElement(val value: String) : ElementType()
 88 |     data class ComplexElement(val value: Struct) : ElementType()
 89 | 
 90 |     public fun toJson(): String = klaxon.toJsonString(when (this) {
 91 |         is SimpleElement -> this.value
 92 |         is ComplexElement -> this.value
 93 |     })
 94 | 
 95 |     companion object {
 96 |         public fun fromJson(jv: JsonValue): ElementType = when (jv.inside) {
 97 |             is JsonObject -> ComplexElement(jv.obj?.let { klaxon.parseFromJsonObject<Struct>(it) }!!)
 98 |             is String -> SimpleElement(jv.string!!)
 99 |             else -> throw IllegalArgumentException()
100 |         }
101 |     }
102 | 
103 | }
104 | 
105 | 


--------------------------------------------------------------------------------
/qodana.yaml:
--------------------------------------------------------------------------------
 1 | version: "1.0"
 2 | linter: jetbrains/qodana-jvm-community:2021.3
 3 | profile:
 4 |   name: qodana.recommended
 5 | exclude:
 6 |   - name: All
 7 |     paths:
 8 |       - scala-tuples-in-kotlin/src/main/kotlin/org/jetbrains/kotlinx/spark/api/tuples
 9 |       - kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Arities.kt
10 | 


--------------------------------------------------------------------------------
/scala-tuples-in-kotlin/build.gradle.kts:
--------------------------------------------------------------------------------
 1 | @file:Suppress("UnstableApiUsage")
 2 | 
 3 | import com.vanniktech.maven.publish.JavadocJar.Dokka
 4 | import com.vanniktech.maven.publish.KotlinJvm
 5 | import org.jetbrains.dokka.gradle.AbstractDokkaLeafTask
 6 | import org.jetbrains.dokka.gradle.DokkaTask
 7 | import org.jetbrains.dokka.gradle.DokkaTaskPartial
 8 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
 9 | 
10 | plugins {
11 |     scala
12 |     kotlin
13 |     dokka
14 |     mavenPublishBase
15 | }
16 | 
17 | group = Versions.groupID
18 | version = Versions.project
19 | 
20 | repositories {
21 |     mavenCentral()
22 | }
23 | 
24 | tasks.withType<Test>().configureEach {
25 |     useJUnitPlatform()
26 |     maxHeapSize = "4g"
27 | }
28 | 
29 | dependencies {
30 |     with(Dependencies) {
31 |         implementation(
32 |             kotlinStdLib,
33 |             scalaLibrary,
34 |         )
35 |         testImplementation(
36 |             kotest,
37 |             atrium,
38 |             kotlinTest,
39 |         )
40 |     }
41 | }
42 | 
43 | 
44 | kotlin {
45 |     jvmToolchain {
46 |         languageVersion.set(
47 |             JavaLanguageVersion.of(Versions.jvmTarget)
48 |         )
49 |     }
50 | }
51 | 
52 | 
53 | tasks.withType<AbstractDokkaLeafTask> {
54 |     dokkaSourceSets {
55 |         all {
56 |             sourceRoot(
57 |                 kotlin.sourceSets
58 |                     .main.get()
59 |                     .kotlin
60 |                     .srcDirs
61 |                     .first { it.path.endsWith("kotlin") }
62 |             )
63 |         }
64 |     }
65 | }
66 | 
67 | mavenPublishing {
68 |     configure(KotlinJvm(Dokka("dokkaHtml")))
69 | }
70 | 
71 | 
72 | // Publishing of scala-tuples-in-kotlin can be skipped since it's only dependent on the Scala version
73 | val skipScalaTuplesInKotlin = System.getProperty("skipScalaTuplesInKotlin").toBoolean()
74 | tasks
75 |     .filter { "publish" in it.name }
76 |     .forEach { it.onlyIf { !skipScalaTuplesInKotlin } }
77 | 
78 | 


--------------------------------------------------------------------------------
/scala-tuples-in-kotlin/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: API for Spark 3.0+ (Scala 2.12)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2021 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | 
21 | /**
22 |  * This files contains conversions of Tuples between the Scala-
23 |  * and Kotlin/Java variants.
24 |  */
25 | 
26 | @file:Suppress("NOTHING_TO_INLINE", "RemoveExplicitTypeArguments", "unused")
27 | 
28 | package org.jetbrains.kotlinx.spark.api
29 | 
30 | import scala.*
31 | 
32 | 
33 | /**
34 |  * Returns a new [Tuple2] based on the arguments in the current [Pair].
35 |  */
36 | fun <T1, T2> Pair<T1, T2>.toTuple(): Tuple2<T1, T2> = Tuple2<T1, T2>(first, second)
37 | 
38 | /**
39 |  * Returns a new [Pair] based on the arguments in the current [Tuple2].
40 |  */
41 | fun <T1, T2> Tuple2<T1, T2>.toPair(): Pair<T1, T2> = Pair<T1, T2>(_1(), _2())
42 | 
43 | /**
44 |  * Returns a new [Tuple3] based on the arguments in the current [Triple].
45 |  */
46 | fun <T1, T2, T3> Triple<T1, T2, T3>.toTuple(): Tuple3<T1, T2, T3> = Tuple3<T1, T2, T3>(first, second, third)
47 | 
48 | /**
49 |  * Returns a new [Triple] based on the arguments in the current [Tuple3].
50 |  */
51 | fun <T1, T2, T3> Tuple3<T1, T2, T3>.toTriple(): Triple<T1, T2, T3> = Triple<T1, T2, T3>(_1(), _2(), _3())
52 | 


--------------------------------------------------------------------------------
/scala-tuples-in-kotlin/src/main/kotlin/org/jetbrains/kotlinx/spark/api/tuples/EmptyTuple.kt:
--------------------------------------------------------------------------------
 1 | /*-
 2 |  * =LICENSE=
 3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
 4 |  * ----------
 5 |  * Copyright (C) 2019 - 2022 JetBrains
 6 |  * ----------
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  * 
11 |  *      http://www.apache.org/licenses/LICENSE-2.0
12 |  * 
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  * =LICENSEEND=
19 |  */
20 | package org.jetbrains.kotlinx.spark.api.tuples
21 | 
22 | import scala.*
23 | import java.io.Serializable
24 | 
25 | /**
26 |  * Just as in Scala3, we provide the [EmptyTuple]. It is the result of dropping the last item from a [Tuple1]
27 |  * or when calling `tupleOf()` for instance.
28 |  */
29 | 
30 | object EmptyTuple : Product, Serializable {
31 |     override fun canEqual(that: Any?): Boolean = that == EmptyTuple
32 |     override fun productElement(n: Int): Nothing = throw IndexOutOfBoundsException("EmptyTuple has no members")
33 |     override fun productArity(): Int = 0
34 |     override fun toString(): String = "()"
35 | }
36 | 
37 | public fun emptyTuple(): EmptyTuple = EmptyTuple
38 | 


--------------------------------------------------------------------------------
/scala-tuples-in-kotlin/src/main/kotlin/org/jetbrains/kotlinx/spark/api/tuples/ProductExtensions.kt:
--------------------------------------------------------------------------------
  1 | /*-
  2 |  * =LICENSE=
  3 |  * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12)
  4 |  * ----------
  5 |  * Copyright (C) 2019 - 2022 JetBrains
  6 |  * ----------
  7 |  * Licensed under the Apache License, Version 2.0 (the "License");
  8 |  * you may not use this file except in compliance with the License.
  9 |  * You may obtain a copy of the License at
 10 |  * 
 11 |  *      http://www.apache.org/licenses/LICENSE-2.0
 12 |  * 
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  * =LICENSEEND=
 19 |  */
 20 | package org.jetbrains.kotlinx.spark.api.tuples
 21 | 
 22 | import scala.Product
 23 | import scala.collection.JavaConverters
 24 | import kotlin.jvm.Throws
 25 | 
 26 | /**
 27 |  * Extra extensions for Scala [Product]s such as Tuples.
 28 |  * In most cases, the functions of `SameTypeProductExtensions.kt` will be used
 29 |  * instead of these. But these help for the overview and generic case.
 30 |  *
 31 |  * For example:
 32 |  *
 33 |  * ```kotlin
 34 |  * 1 in tupleOf(1, 2, 3) == true
 35 |  *
 36 |  * for (x in tupleOf("a", "b", "c")) { ... }
 37 |  *
 38 |  * val a: List<Any?> = tupleOf(1, "a", 3L).asIterable().toList()
 39 |  *
 40 |  * tupleOf(1, 2, 3).size == 3
 41 |  *
 42 |  * tupleOf(1, 2, 3)[0] == 1
 43 |  *
 44 |  * tupleOf(1, 1, 2)[1..2] == tupleOf(1, 2, 2)[0..1]
 45 |  * ```
 46 |  *
 47 |  */
 48 | 
 49 | /** Tests whether this iterator contains a given value as an element.
 50 |  *  Note: may not terminate for infinite iterators.
 51 |  *
 52 |  *  @param item  the element to test.
 53 |  *  @return     `true` if this iterator produces some value that
 54 |  *               is equal (as determined by `==`) to `elem`, `false` otherwise.
 55 |  *  @note Reuse: After calling this method, one should discard the iterator it was called on.
 56 |  *               Using it is undefined and subject to change.
 57 |  */
 58 | operator fun Product.contains(item: Any?): Boolean = productIterator().contains(item)
 59 | 
 60 | /**
 61 |  * An iterator over all the elements of this product.
 62 |  *  @return     in the default implementation, an `Iterator<Any?>`
 63 |  */
 64 | operator fun Product.iterator(): Iterator<Any?> = JavaConverters.asJavaIterator(productIterator())
 65 | 
 66 | /**
 67 |  * Converts this product to an `Any?` iterable.
 68 |  */
 69 | fun Product.asIterable(): Iterable<Any?> = object : Iterable<Any?> {
 70 |     override fun iterator(): Iterator<Any?> = JavaConverters.asJavaIterator(productIterator())
 71 | }
 72 | 
 73 | /** The size of this product.
 74 |  *  @return     for a product `A(x,,1,,, ..., x,,k,,)`, returns `k`
 75 |  */
 76 | val Product.size: Int
 77 |     get() = productArity()
 78 | 
 79 | /** The n'th element of this product, 0-based.  In other words, for a
 80 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
 81 |  *
 82 |  *  @param    n   the index of the element to return
 83 |  *  @throws       IndexOutOfBoundsException
 84 |  *  @return       the element `n` elements after the first element
 85 |  */
 86 | @Throws(IndexOutOfBoundsException::class)
 87 | operator fun Product.get(n: Int): Any? = productElement(n)
 88 | 
 89 | /** The n'th element of this product, 0-based.  In other words, for a
 90 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
 91 |  *
 92 |  *  @param    n   the index of the element to return
 93 |  *  @return       the element `n` elements after the first element, `null` if out of bounds
 94 |  */
 95 | fun Product.getOrNull(n: Int): Any? = if (n in 0 until size) productElement(n) else null
 96 | 
 97 | /** The n'th element of this product, 0-based.  In other words, for a
 98 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
 99 |  *  The result is cast to the given type [T].
100 |  *
101 |  *  @param    n   the index of the element to return
102 |  *  @throws       IndexOutOfBoundsException
103 |  *  @throws       ClassCastException
104 |  *  @return       the element `n` elements after the first element
105 |  */
106 | @Suppress("UNCHECKED_CAST")
107 | @Throws(IndexOutOfBoundsException::class, ClassCastException::class)
108 | inline fun <reified T> Product.getAs(n: Int): T = productElement(n) as T
109 | 
110 | /** The n'th element of this product, 0-based.  In other words, for a
111 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
112 |  *  The result is cast to the given type [T].
113 |  *
114 |  *  @param    n   the index of the element to return
115 |  *  @return       the element `n` elements after the first element, `null` if out of bounds or unable to be cast
116 |  */
117 | @Suppress("UNCHECKED_CAST")
118 | inline fun <reified T> Product.getAsOrNull(n: Int): T? = getOrNull(n) as? T
119 | 
120 | /** The range of n'th elements of this product, 0-based.  In other words, for a
121 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
122 |  *
123 |  *  @param        indexRange   the indices of the elements to return
124 |  *  @throws       IndexOutOfBoundsException
125 |  *  @return       the elements in [indexRange]
126 |  */
127 | @Throws(IndexOutOfBoundsException::class)
128 | operator fun Product.get(indexRange: IntRange): List<Any?> = indexRange.map(::get)
129 | 
130 | /** The range of n'th elements of this product, 0-based.  In other words, for a
131 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
132 |  *
133 |  *  @param        indexRange   the indices of the elements to return
134 |  *  @return       the elements in [indexRange], `null` if out of bounds
135 |  */
136 | fun Product.getOrNull(indexRange: IntRange): List<Any?> = indexRange.map(::getOrNull)
137 | 
138 | /** The range of n'th elements of this product, 0-based.  In other words, for a
139 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
140 |  *  The results are cast to the given type [T].
141 |  *
142 |  *  @param        indexRange   the indices of the elements to return
143 |  *  @throws       IndexOutOfBoundsException
144 |  *  @throws       ClassCastException
145 |  *  @return       the elements in [indexRange]
146 |  */
147 | @Throws(IndexOutOfBoundsException::class, ClassCastException::class)
148 | inline fun <reified T> Product.getAs(indexRange: IntRange): List<T> = indexRange.map(::getAs)
149 | 
150 | /** The range of n'th elements of this product, 0-based.  In other words, for a
151 |  *  product `A(x,,1,,, ..., x,,k,,)`, returns `x,,(n+1),,` where `0 <= n < k`.
152 |  *  The results are cast to the given type [T].
153 |  *
154 |  *  @param        indexRange   the indices of the elements to return
155 |  *  @return       the elements in [indexRange], `null` is out of bounds or unable to be cast
156 |  */
157 | inline fun <reified T> Product.getAsOrNull(indexRange: IntRange): List<T?> = indexRange.map(::getAsOrNull)
158 | 


--------------------------------------------------------------------------------
/settings.gradle.kts:
--------------------------------------------------------------------------------
 1 | plugins {
 2 |     id("com.gradle.enterprise") version "3.10.3"
 3 | }
 4 | 
 5 | gradleEnterprise {
 6 |     buildScan {
 7 |         termsOfServiceUrl = "https://gradle.com/terms-of-service"
 8 |         termsOfServiceAgree = "yes"
 9 |     }
10 | }
11 | 
12 | 
13 | val spark: String by settings
14 | val scala: String by settings
15 | val skipScalaTuplesInKotlin: String by settings
16 | System.setProperty("spark", spark)
17 | System.setProperty("scala", scala)
18 | System.setProperty("skipScalaTuplesInKotlin", skipScalaTuplesInKotlin)
19 | 
20 | 
21 | val scalaCompat
22 |     get() = scala.substringBeforeLast('.')
23 | 
24 | val versions = "${spark}_${scalaCompat}"
25 | 
26 | rootProject.name = "kotlin-spark-api-parent_$versions"
27 | 
28 | include("core")
29 | include("scala-tuples-in-kotlin")
30 | include("kotlin-spark-api")
31 | include("jupyter")
32 | include("examples")
33 | 
34 | project(":core").name = "core_$versions"
35 | project(":scala-tuples-in-kotlin").name = "scala-tuples-in-kotlin_$scalaCompat"
36 | project(":kotlin-spark-api").name = "kotlin-spark-api_$versions"
37 | project(":jupyter").name = "jupyter_$versions"
38 | project(":examples").name = "examples_$versions"
39 | 
40 | buildCache {
41 |     local {
42 |         removeUnusedEntriesAfterDays = 30
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------