├── .cargo
    └── config.toml
├── .git-blame-ignore-revs
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── pr-lint.yml
    │   └── release.yml
├── .gitignore
├── .scalafmt.conf
├── LICENSE
├── README.md
├── build.sbt
├── core
    └── src
    │   ├── main
    │       └── scala
    │       │   └── org
    │       │       └── polars
    │       │           └── scala
    │       │               └── polars
    │       │                   ├── NativeLoader.scala
    │       │                   ├── Polars.scala
    │       │                   ├── api
    │       │                       ├── DataFrame.scala
    │       │                       ├── JSeries.java
    │       │                       ├── LazyFrame.scala
    │       │                       ├── Row.scala
    │       │                       ├── Series.scala
    │       │                       ├── expressions
    │       │                       │   ├── Column.scala
    │       │                       │   └── Expression.scala
    │       │                       ├── io
    │       │                       │   ├── Scannable.scala
    │       │                       │   └── Writeable.scala
    │       │                       └── types
    │       │                       │   ├── DataTypes.scala
    │       │                       │   └── Schema.scala
    │       │                   ├── config
    │       │                       ├── Config.scala
    │       │                       └── constants.scala
    │       │                   ├── functions.scala
    │       │                   ├── internal
    │       │                       └── jni
    │       │                       │   ├── Natively.scala
    │       │                       │   ├── common.scala
    │       │                       │   ├── data_frame.scala
    │       │                       │   ├── expressions
    │       │                       │       ├── column_expr.scala
    │       │                       │       └── literal_expr.scala
    │       │                       │   ├── io
    │       │                       │       ├── scan.scala
    │       │                       │       └── write.scala
    │       │                       │   ├── lazy_frame.scala
    │       │                       │   ├── row.scala
    │       │                       │   └── series.scala
    │       │                   └── package.scala
    │   └── site
    │       └── index.html
├── examples
    └── src
    │   └── main
    │       ├── java
    │           └── examples
    │           │   └── java
    │           │       ├── InstantiateDataFrame.java
    │           │       ├── InstantiateSeries.java
    │           │       ├── configuration
    │           │           └── ConfiguringPolars.java
    │           │       ├── expressions
    │           │           └── ApplyingSimpleExpressions.java
    │           │       └── io
    │           │           ├── LazyAndEagerAPI.java
    │           │           ├── ReadingFileDatasets.java
    │           │           └── WritingToFileDatasets.java
    │       ├── resources
    │           └── files
    │           │   └── web-ds
    │           │       ├── data.csv
    │           │       ├── data.ipc
    │           │       ├── data.json
    │           │       └── data.parquet
    │       └── scala
    │           └── examples
    │               └── scala
    │                   ├── InstantiateDataFrame.scala
    │                   ├── InstantiateSeries.scala
    │                   ├── configuration
    │                       └── ConfiguringPolars.scala
    │                   ├── expressions
    │                       └── ApplyingSimpleExpressions.scala
    │                   ├── io
    │                       ├── LazyAndEagerAPI.scala
    │                       ├── ReadingFileDatasets.scala
    │                       └── WritingToFileDatasets.scala
    │                   └── utils
    │                       └── CommonUtils.scala
├── native
    ├── Cargo.lock
    ├── Cargo.toml
    ├── rustfmt.toml
    └── src
    │   ├── internal_jni
    │       ├── expr
    │       │   ├── column.rs
    │       │   ├── literal.rs
    │       │   └── mod.rs
    │       ├── frame.rs
    │       ├── io
    │       │   ├── mod.rs
    │       │   ├── scan
    │       │   │   ├── csv.rs
    │       │   │   ├── ipc.rs
    │       │   │   ├── json_lines.rs
    │       │   │   ├── mod.rs
    │       │   │   └── parquet.rs
    │       │   └── write
    │       │   │   ├── avro.rs
    │       │   │   ├── csv.rs
    │       │   │   ├── ipc.rs
    │       │   │   ├── json.rs
    │       │   │   ├── mod.rs
    │       │   │   └── parquet.rs
    │       ├── lazy.rs
    │       ├── mod.rs
    │       ├── row.rs
    │       ├── series.rs
    │       └── utils.rs
    │   ├── lib.rs
    │   └── utils
    │       ├── error.rs
    │       └── mod.rs
├── project
    ├── DocSettings.scala
    ├── ExtraCommands.scala
    ├── GeneralSettings.scala
    ├── NativeBuildSettings.scala
    ├── ProjectDependencies.scala
    ├── PublishingSettings.scala
    ├── Utils.scala
    ├── build.properties
    └── plugins.sbt
└── version.sbt


/.cargo/config.toml:
--------------------------------------------------------------------------------
1 | [target.aarch64-unknown-linux-gnu]
2 | linker = "aarch64-linux-gnu-gcc"
3 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Scala Steward: Reformat with scalafmt 3.8.6
2 | 7500ed15d9d50a19828222fc9a521c84d7d8b2e1
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"


--------------------------------------------------------------------------------
/.github/workflows/pr-lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint All
 2 | 
 3 | on: [ pull_request ]
 4 | 
 5 | permissions:
 6 |   contents: read
 7 | 
 8 | env:
 9 |   RUSTFLAGS: -C debuginfo=0
10 | 
11 | jobs:
12 |   check-formatting:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_TOKEN: ${{ secrets.MY_GITHUB_TOKEN }}
16 |     steps:
17 |       - uses: actions/checkout@v4
18 | 
19 |       - name: Set up JDK
20 |         uses: actions/setup-java@v4
21 |         with:
22 |           java-version: "8"
23 |           distribution: 'temurin'
24 |           cache: 'sbt'
25 | 
26 |       - name: Install rust toolchain
27 |         uses: dtolnay/rust-toolchain@nightly
28 |         with:
29 |           components: "clippy, rustfmt"
30 | 
31 |       - uses: Swatinem/rust-cache@v2
32 |         with:
33 |           workspaces: native
34 |           prefix-key: lint
35 | 
36 |       - name: Check all formatting
37 |         run: |
38 |           cargo install cargo-sort
39 |           sbt fmtCheckAll
40 | 
41 |       - name: Check doc issues
42 |         run: sbt makeSite
43 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | name: Publish Artifacts
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - main
  6 | 
  7 | permissions:
  8 |   contents: read
  9 | 
 10 | env:
 11 |   NATIVE_LIB_LOCATION: /tmp/native-libs/
 12 |   SBT_OPTS: "-Dsbt.ci=true"
 13 |   JAVA_OPTS: "-XX:+UseG1GC -Xms2G -Xmx8G -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8"
 14 | 
 15 | jobs:
 16 |   check-formatting:
 17 |     runs-on: ubuntu-latest
 18 |     env:
 19 |       GITHUB_TOKEN: ${{ secrets.MY_GITHUB_TOKEN }}
 20 |     steps:
 21 |       - uses: actions/checkout@v4
 22 | 
 23 |       - name: Set up JDK
 24 |         uses: actions/setup-java@v4
 25 |         with:
 26 |           java-version: "8"
 27 |           distribution: "zulu"
 28 |           cache: "sbt"
 29 | 
 30 |       - uses: sbt/setup-sbt@v1
 31 | 
 32 |       - name: Install rust toolchain
 33 |         uses: dtolnay/rust-toolchain@nightly
 34 |         with:
 35 |           components: "clippy, rustfmt"
 36 | 
 37 |       - uses: Swatinem/rust-cache@v2
 38 |         with:
 39 |           workspaces: native
 40 |           prefix-key: lint
 41 | 
 42 |       - name: Check all formatting
 43 |         run: |
 44 |           cargo install cargo-sort
 45 |           sbt fmtCheckAll
 46 | 
 47 |       - name: Check doc issues
 48 |         run: sbt makeSite
 49 | 
 50 |   build-natives:
 51 |     name: ${{ matrix.arch }} build
 52 |     runs-on: ${{ matrix.os }}
 53 |     env:
 54 |       TARGET_TRIPLE: ${{ matrix.arch }}
 55 |     needs: [check-formatting]
 56 |     strategy:
 57 |       fail-fast: true
 58 |       matrix:
 59 |         include:
 60 |           - os: ubuntu-latest
 61 |             arch: aarch64-unknown-linux-gnu
 62 |             packages: "sudo apt update && sudo apt-get install gcc-aarch64-linux-gnu"
 63 | 
 64 |           - os: ubuntu-latest
 65 |             arch: x86_64-unknown-linux-gnu
 66 |             packages: ""
 67 | 
 68 |           - os: windows-latest
 69 |             arch: aarch64-pc-windows-msvc
 70 |             packages: ""
 71 | 
 72 |           - os: windows-latest
 73 |             arch: x86_64-pc-windows-msvc
 74 |             packages: ""
 75 | 
 76 |           - os: macos-latest
 77 |             arch: x86_64-apple-darwin
 78 |             packages: "brew install sbt"
 79 | 
 80 |           - os: macos-latest
 81 |             arch: aarch64-apple-darwin
 82 |             packages: ""
 83 |     steps:
 84 |       - name: Install system packages
 85 |         run: ${{ matrix.packages }}
 86 | 
 87 |       - uses: actions/checkout@v4
 88 | 
 89 |       - name: Set up JDK
 90 |         uses: actions/setup-java@v4
 91 |         with:
 92 |           java-version: "8"
 93 |           distribution: "zulu"
 94 |           cache: "sbt"
 95 | 
 96 |       - uses: sbt/setup-sbt@v1
 97 | 
 98 |       - name: Install rust toolchain
 99 |         uses: dtolnay/rust-toolchain@nightly
100 | 
101 |       - uses: Swatinem/rust-cache@v2
102 |         with:
103 |           workspaces: native
104 |           prefix-key: ${{ matrix.arch }}
105 | 
106 |       - name: Cross publish artifacts containing native library
107 |         run: |
108 |           rustup target add ${{ matrix.arch }}
109 |           sbt generateNativeLibrary
110 | 
111 |       - name: Temporarily save native library for ${{ matrix.arch }}
112 |         uses: actions/upload-artifact@v4
113 |         with:
114 |           name: native_libs-${{ matrix.arch }}
115 |           path: ${{env.NATIVE_LIB_LOCATION}}
116 |           retention-days: 1
117 |           if-no-files-found: error
118 | 
119 |   test-build:
120 |     name: ${{ matrix.os }} ${{ matrix.java }} test
121 |     runs-on: ${{ matrix.os }}
122 |     needs: [build-natives]
123 |     env:
124 |       SKIP_NATIVE_GENERATION: true
125 |     strategy:
126 |       fail-fast: false
127 |       matrix:
128 |         java: ["8", "11", "17", "21"]
129 |         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
130 | 
131 |     steps:
132 |       - uses: actions/checkout@v4
133 | 
134 |       - name: Set up JDK
135 |         uses: actions/setup-java@v4
136 |         with:
137 |           java-version: ${{ matrix.java }}
138 |           distribution: "zulu"
139 |           cache: "sbt"
140 | 
141 |       - uses: sbt/setup-sbt@v1
142 | 
143 |       - name: Download artifacts
144 |         uses: actions/download-artifact@v4
145 |         with:
146 |           pattern: native_libs-*
147 |           path: ${{env.NATIVE_LIB_LOCATION}}
148 |           merge-multiple: true
149 | 
150 |       - name: Test for ${{ matrix.os }} ${{ matrix.java }}
151 |         run: |
152 |           sbt +assembly
153 |           java -cp ./examples/target/scala-2.12/scala-polars-examples-assembly-0.1.0-SNAPSHOT.jar examples.scala.io.LazyAndEagerAPI
154 |           java -cp ./examples/target/scala-2.13/scala-polars-examples-assembly-0.1.0-SNAPSHOT.jar examples.scala.io.LazyAndEagerAPI
155 |           java -cp ./examples/target/scala-3.3.4/scala-polars-examples-assembly-0.1.0-SNAPSHOT.jar examples.scala.io.LazyAndEagerAPI
156 | 
157 |   publish:
158 |     timeout-minutes: 15
159 |     runs-on: ubuntu-latest
160 |     env:
161 |       GITHUB_TOKEN: ${{ secrets.MY_GITHUB_TOKEN }}
162 |       SKIP_NATIVE_GENERATION: true
163 |     needs: [test-build]
164 |     steps:
165 |       - uses: actions/checkout@v4
166 | 
167 |       - name: Configure SSH
168 |         uses: webfactory/ssh-agent@v0.9.1
169 |         with:
170 |           ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
171 | 
172 |       - name: Set up JDK
173 |         uses: actions/setup-java@v4
174 |         with:
175 |           java-version: "8"
176 |           distribution: "zulu"
177 |           cache: "sbt"
178 | 
179 |       - uses: sbt/setup-sbt@v1
180 | 
181 |       - name: Download artifacts
182 |         uses: actions/download-artifact@v4
183 |         with:
184 |           pattern: native_libs-*
185 |           path: ${{env.NATIVE_LIB_LOCATION}}
186 |           merge-multiple: true
187 | 
188 |       - name: List the built artifacts
189 |         run: ls -lhtR
190 |         working-directory: ${{env.NATIVE_LIB_LOCATION}}
191 | 
192 |       - name: Publish Artifacts
193 |         run: sbt +aetherDeploy
194 | 
195 |       - name: Publish API Docs
196 |         run: |
197 |           git config --global user.email "git@github.com"
198 |           git config --global user.name  "git"
199 |           sbt ghpagesPushSite
200 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | .DS_Store
 4 | 
 5 | # IntelliJ IDEA specific
 6 | .idea/
 7 | .fleet/
 8 | *.iml
 9 | 
10 | # SBT specific
11 | .bsp/
12 | coverage.xml
13 | target/
14 | .classpath
15 | .project
16 | .settings/
17 | .metals
18 | .bloop
19 | metals.sbt
20 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
 1 | version = 3.8.6
 2 | runner.dialect = scala213
 3 | project.git = true
 4 | maxColumn = 98
 5 | rewrite {
 6 |   rules = [
 7 |     Imports,
 8 |     RedundantBraces,
 9 |     RedundantParens
10 |   ]
11 |   imports {
12 |     sort = ascii
13 |     groups = [
14 |       ["javax?\\..*"],
15 |       ["sbt\\..*"],
16 |       ["scala\\..*"],
17 |       ["org\\..*"]
18 |     ]
19 |   }
20 | }
21 | align.tokens = none
22 | assumeStandardLibraryStripMargin = true
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | scala-polars
  2 | ============
  3 | 
  4 | `scala-polars` is a library for using the awesome [Polars](https://www.pola.rs/) DataFrame library in
  5 | Scala and Java projects.
  6 | 
  7 | ## About
  8 | 
  9 | ### About Polars
 10 | 
 11 | Polars is a blazing fast DataFrames library implemented in Rust using
 12 | [Apache Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html) as the memory model.
 13 | 
 14 | - Lazy / eager execution
 15 | - Multithreaded
 16 | - SIMD
 17 | - Query optimization
 18 | - Powerful expression API
 19 | - Hybrid Streaming (larger than RAM datasets)
 20 | - Rust | Python | NodeJS | ...
 21 | 
 22 | ### About scala-polars
 23 | 
 24 | This library has been written mostly in scala and leverages [JNI](https://en.wikipedia.org/wiki/Java_Native_Interface)
 25 | to offload heavy data processing tasks to its native counterpart written completely in rust. The aim of this library is
 26 | to provide an easy-to-use interface for Scala/ Java developers though which they can leverage the amazing Polars library
 27 | in their existing projects.
 28 | 
 29 | The project is mainly divided into 2 submodules,
 30 | 
 31 | - `core` - Contains the user facing interfaces written in scala that will be used to work with data. Internally this
 32 |   module relies on native submodule.
 33 | - `native` - This is an internal module written in rust which relies on the official rust implementation of Polars.
 34 | 
 35 | ### Examples
 36 | 
 37 | - [Java Examples](examples/src/main/java/examples/java/)
 38 | - [Scala Examples](examples/src/main/scala/examples/scala/)
 39 | 
 40 | ## Compatibility
 41 | 
 42 | - JDK version `>=8`
 43 | - Scala version `2.12.x`, `2.13.x` and `3.3.x`. Default is `2.13.x`
 44 | - Rust version `>=1.58`
 45 | 
 46 | ## Building
 47 | 
 48 | ### Prerequisites
 49 | 
 50 | The following tooling is required to start building `scala-polars`,
 51 | 
 52 | - JDK 8+ ([OpenJDK](https://openjdk.org/projects/jdk/)
 53 |   or [Oracle Java SE](https://www.oracle.com/java/technologies/javase/))
 54 | - [Rust](https://www.rust-lang.org/tools/install) (cargo, rustc etc.)
 55 | - [sbt](https://www.scala-sbt.org/index.html)
 56 | 
 57 | ### How to Compile?
 58 | 
 59 | sbt is the primary build tool for this project and all the required interlinking has been done in such a way that your
 60 | IntelliJ IDE or an external build works in the same way. This means that whether you are in development mode or want to
 61 | build to distribute, the process of the build remains the same and is more or less abstracted.
 62 | 
 63 | The build process that sbt triggers involves the following steps,
 64 | 
 65 | - Compile the rust code present in the `native` module to a binary.
 66 | - Compile the scala and java (if any) facade code.
 67 | - Copy the built rust binary to the classpath of scala code during its build at a fixed location.
 68 | 
 69 | All of the above steps happen automatically when you run an sbt build job that triggers `compile` phase. Other than
 70 | this, during package phase, the scala, java code and the built rust binary is added to the built jar(s). To keep
 71 | everything monolithic, the `native` module is not packaged as a jar, only `core` module is.
 72 | 
 73 | The above process might look complicated, and it actually is 😂, but since all the internally sbt wiring is already in
 74 | place, the user facing process is fairly straight-forward. This can be done by going through the following steps in
 75 | sequence firstly ensure JDK 8+, sbt and the latest rust
 76 | compiler are installed, then follow the commands below as per the need.
 77 | 
 78 | **Compilation**
 79 | 
 80 | ```shell
 81 | # To compile the whole project (scala/ java/ rust) in one go
 82 | sbt compile
 83 | ```
 84 | 
 85 | **Local packaging/ installation**
 86 | 
 87 | ```shell
 88 | # To package the project and install locally as slim jars with default scala version.
 89 | sbt publishLocal
 90 | 
 91 | # To package the project and install locally as slim jars for all supported scala versions.
 92 | sbt +publishLocal
 93 | ```
 94 | 
 95 | **Build Assembly (fat jar)**
 96 | 
 97 | ```shell
 98 | # To package the project and install locally as fat jars with default scala version.
 99 | sbt assembly
100 | 
101 | # To package the project and install locally as slim jars for all supported scala versions.
102 | sbt +assembly
103 | ```
104 | 
105 | **Generate Native Binary Only**
106 | 
107 | ```shell
108 | # To compile only the native module containing rust code to binary.
109 | sbt generateNativeLibrary
110 | ```
111 | 
112 | ## License
113 | 
114 | Apache License 2.0, see [LICENSE](LICENSE).
115 | 
116 | ## Community
117 | 
118 | Reach out to the Polars community on [Discord](https://discord.gg/4UfP5cfBE7).
119 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | import DocSettings.*
 2 | import Utils.*
 3 | 
 4 | ThisBuild / publish / skip := true
 5 | ThisBuild / publishArtifact := false
 6 | 
 7 | /*
 8 |  ***********************
 9 |  * Core Module *
10 |  ***********************
11 |  */
12 | 
13 | lazy val core = project
14 |   .in(file("core"))
15 |   .withId("scala-polars")
16 |   .settings(name := "scala-polars")
17 |   .enablePlugins(GhpagesPlugin, SiteScaladocPlugin)
18 |   .settings(
19 | //    unidocSourceFilePatterns := Nil,
20 |     git.remoteRepo := "git@github.com:chitralverma/scala-polars.git",
21 |     SiteScaladoc / siteSubdirName := "api/latest"
22 |   )
23 |   .settings(ProjectDependencies.dependencies)
24 |   .settings(GeneralSettings.commonSettings)
25 |   .settings(PublishingSettings.settings)
26 |   .settings(
27 |     nativeRoot := baseDirectory.value.toPath.resolveSibling("native").toFile,
28 |     inConfig(Compile)(NativeBuildSettings.settings)
29 |   )
30 |   .settings(ExtraCommands.commands)
31 |   .settings(ExtraCommands.commandAliases)
32 | //  .configureUnidoc("scala-polars API Reference")
33 | 
34 | /*
35 |  ***********************
36 |  * Examples Module *
37 |  ***********************
38 |  */
39 | 
40 | lazy val examples = project
41 |   .in(file("examples"))
42 |   .withId("scala-polars-examples")
43 |   .settings(name := "scala-polars-examples")
44 |   .settings(GeneralSettings.commonSettings)
45 |   .settings(
46 |     Compile / packageBin / publishArtifact := false,
47 |     Compile / packageDoc / publishArtifact := false,
48 |     Compile / packageSrc / publishArtifact := false
49 |   )
50 |   .dependsOn(core)
51 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/NativeLoader.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars
 2 | 
 3 | import java.nio.file._
 4 | 
 5 | class NativeLoader(nativeLibrary: String) {
 6 |   NativeLoader.load(nativeLibrary)
 7 | }
 8 | 
 9 | object NativeLoader {
10 |   def load(nativeLibrary: String): Unit = {
11 |     def loadPackaged(arch: String): Unit = {
12 |       val lib: String = System.mapLibraryName(nativeLibrary)
13 |       val resourcePath: String = s"/native/$arch/$lib"
14 | 
15 |       val resourceStream = Option(
16 |         this.getClass.getResourceAsStream(resourcePath)
17 |       ) match {
18 |         case Some(s) => s
19 |         case None =>
20 |           throw new UnsatisfiedLinkError(
21 |             s"Native library $lib ($resourcePath) cannot be found on the classpath."
22 |           )
23 |       }
24 | 
25 |       val tmp: Path = Files.createTempDirectory("jni-")
26 |       val extractedPath = tmp.resolve(lib)
27 | 
28 |       try
29 |         Files.copy(resourceStream, extractedPath)
30 |       catch {
31 |         case ex: Exception =>
32 |           throw new UnsatisfiedLinkError(
33 |             s"Error while extracting native library:\n$ex"
34 |           )
35 |       }
36 | 
37 |       System.load(extractedPath.toAbsolutePath.toString)
38 |     }
39 | 
40 |     def load(): Unit = try
41 |       System.loadLibrary(nativeLibrary)
42 |     catch {
43 |       case e: Throwable =>
44 |         try
45 |           loadPackaged("aarch64")
46 |         catch {
47 |           case t: Throwable =>
48 |             t.addSuppressed(e)
49 |             try
50 |               loadPackaged("x86_64")
51 |             catch {
52 |               case ex: Throwable =>
53 |                 ex.addSuppressed(t)
54 |                 throw new IllegalStateException(
55 |                   s"Unable to load the provided native library '$nativeLibrary'.",
56 |                   ex
57 |                 )
58 |             }
59 |         }
60 | 
61 |     }
62 | 
63 |     load()
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/Polars.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars
 2 | 
 3 | import org.polars.scala.polars.api.io.Scannable
 4 | import org.polars.scala.polars.api.{DataFrame, LazyFrame}
 5 | import org.polars.scala.polars.config.Config
 6 | import org.polars.scala.polars.internal.jni.{common, data_frame, lazy_frame}
 7 | 
 8 | object Polars {
 9 | 
10 |   def config: Config = Config.getConfig
11 | 
12 |   def version(): String = common.version()
13 | 
14 |   /** Returns a [[org.polars.scala.polars.api.io.Scannable Scannable]] that can be used to lazily
15 |     * scan datasets of various formats ([[org.polars.scala.polars.api.io.Scannable.parquet
16 |     * parquet]], [[org.polars.scala.polars.api.io.Scannable.ipc ipc]],
17 |     * [[org.polars.scala.polars.api.io.Scannable.csv csv]] and
18 |     * [[org.polars.scala.polars.api.io.Scannable.jsonLines jsonLines]]) from local filesystems and
19 |     * cloud object stores (aws, gcp and azure) as a
20 |     * [[org.polars.scala.polars.api.LazyFrame LazyFrame]].
21 |     * @return
22 |     *   [[org.polars.scala.polars.api.io.Scannable Scannable]]
23 |     */
24 |   def scan: Scannable = new Scannable()
25 | 
26 |   def concat(lazyFrame: LazyFrame, lazyFrames: Array[LazyFrame]): LazyFrame =
27 |     concat(lazyFrame, lazyFrames, reChunk = false, parallel = true)
28 | 
29 |   def concat(
30 |       lazyFrame: LazyFrame,
31 |       lazyFrames: Array[LazyFrame],
32 |       reChunk: Boolean = false,
33 |       parallel: Boolean = true
34 |   ): LazyFrame =
35 |     if (lazyFrames.isEmpty) lazyFrame
36 |     else {
37 |       val ptr =
38 |         lazy_frame.concatLazyFrames(
39 |           lazyFrames.+:(lazyFrame).map(_.ptr),
40 |           reChunk = reChunk,
41 |           parallel = parallel
42 |         )
43 | 
44 |       LazyFrame.withPtr(ptr)
45 |     }
46 | 
47 |   def concat(dataFrame: DataFrame, dataFrames: Array[DataFrame]): DataFrame =
48 |     if (dataFrames.isEmpty) dataFrame
49 |     else {
50 |       val ptr = data_frame.concatDataFrames(dataFrames.+:(dataFrame).map(_.ptr))
51 | 
52 |       DataFrame.withPtr(ptr)
53 |     }
54 | 
55 | }
56 | 
57 | private[polars] object LibraryStates extends Enumeration {
58 |   type LibraryState = Value
59 | 
60 |   val NOT_LOADED, LOADING, LOADED = Value
61 | }
62 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/DataFrame.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.api
  2 | 
  3 | import java.util.Collections
  4 | 
  5 | import scala.annotation.varargs
  6 | import scala.jdk.CollectionConverters._
  7 | 
  8 | import org.polars.scala.polars.api.expressions.Expression
  9 | import org.polars.scala.polars.api.io.Writeable
 10 | import org.polars.scala.polars.api.types.Schema
 11 | import org.polars.scala.polars.config.UniqueKeepStrategies
 12 | import org.polars.scala.polars.internal.jni.data_frame
 13 | 
 14 | class DataFrame private (private[polars] val ptr: Long) {
 15 | 
 16 |   val schema: Schema = {
 17 |     val schemaString = data_frame.schemaString(ptr)
 18 |     Schema.fromString(schemaString)
 19 |   }
 20 | 
 21 |   val width: Int = schema.getFields.length
 22 | 
 23 |   val height: Long = count()
 24 | 
 25 |   val shape: (Long, Int) = (height, width)
 26 | 
 27 |   @varargs
 28 |   def select(colName: String, colNames: String*): DataFrame =
 29 |     toLazy.select(colName, colNames: _*).collect(noOptimization = true)
 30 | 
 31 |   @varargs
 32 |   def select(column: Expression, columns: Expression*): DataFrame =
 33 |     toLazy.select(column, columns: _*).collect(noOptimization = true)
 34 | 
 35 |   def filter(predicate: Expression): DataFrame =
 36 |     toLazy.filter(predicate).collect(noOptimization = true)
 37 | 
 38 |   def sort(
 39 |       cols: Array[String],
 40 |       descending: Array[Boolean],
 41 |       nullLast: Array[Boolean],
 42 |       maintainOrder: Boolean
 43 |   ): DataFrame =
 44 |     toLazy.sort(cols, descending, nullLast, maintainOrder).collect(noOptimization = true)
 45 | 
 46 |   def sort(
 47 |       expr: String,
 48 |       descending: Boolean,
 49 |       nullLast: Boolean,
 50 |       maintainOrder: Boolean
 51 |   ): DataFrame =
 52 |     toLazy
 53 |       .sort(
 54 |         cols = Array(expr),
 55 |         descending = Array(descending),
 56 |         nullLast = Array(nullLast),
 57 |         maintainOrder = maintainOrder
 58 |       )
 59 |       .collect(noOptimization = true)
 60 | 
 61 |   def sort(
 62 |       exprs: Array[Expression],
 63 |       null_last: Array[Boolean],
 64 |       maintain_order: Boolean
 65 |   ): DataFrame =
 66 |     toLazy.sort(exprs, null_last, maintain_order).collect(noOptimization = true)
 67 | 
 68 |   def sort(expr: Expression, null_last: Boolean, maintain_order: Boolean): DataFrame =
 69 |     toLazy
 70 |       .sort(Array(expr), Array(null_last), maintainOrder = maintain_order)
 71 |       .collect(noOptimization = true)
 72 | 
 73 |   def set_sorted(mapping: Map[String, Boolean]): DataFrame =
 74 |     set_sorted(mapping.asJava)
 75 | 
 76 |   def set_sorted(mapping: java.util.Map[String, Boolean]): DataFrame =
 77 |     toLazy.set_sorted(mapping).collect(noOptimization = true)
 78 | 
 79 |   def top_k(
 80 |       k: Int,
 81 |       cols: Array[String],
 82 |       descending: Array[Boolean],
 83 |       nullLast: Array[Boolean],
 84 |       maintainOrder: Boolean
 85 |   ): DataFrame =
 86 |     toLazy
 87 |       .top_k(k, cols, descending, nullLast, maintainOrder)
 88 |       .collect(projectionPushdown = false, predicatePushdown = false, commSubplanElim = false)
 89 | 
 90 |   def top_k(
 91 |       k: Int,
 92 |       expr: String,
 93 |       descending: Boolean,
 94 |       nullLast: Boolean,
 95 |       maintainOrder: Boolean
 96 |   ): DataFrame =
 97 |     toLazy
 98 |       .top_k(
 99 |         k = k,
100 |         cols = Array(expr),
101 |         descending = Array(descending),
102 |         nullLast = Array(nullLast),
103 |         maintainOrder = maintainOrder
104 |       )
105 |       .collect(projectionPushdown = false, predicatePushdown = false, commSubplanElim = false)
106 | 
107 |   def top_k(
108 |       k: Int,
109 |       exprs: Array[Expression],
110 |       null_last: Array[Boolean],
111 |       maintain_order: Boolean
112 |   ): DataFrame =
113 |     toLazy
114 |       .top_k(k, exprs, null_last, maintain_order)
115 |       .collect(projectionPushdown = false, predicatePushdown = false, commSubplanElim = false)
116 | 
117 |   def top_k(k: Int, expr: Expression, null_last: Boolean, maintain_order: Boolean): DataFrame =
118 |     toLazy
119 |       .top_k(k, Array(expr), Array(null_last), maintainOrder = maintain_order)
120 |       .collect(projectionPushdown = false, predicatePushdown = false, commSubplanElim = false)
121 | 
122 |   def limit(n: Long): DataFrame = DataFrame.withPtr(data_frame.limit(ptr, n))
123 | 
124 |   def head(n: Long): DataFrame = limit(n)
125 | 
126 |   def first(): DataFrame = limit(1)
127 | 
128 |   def tail(n: Long): DataFrame = DataFrame.withPtr(data_frame.tail(ptr, n))
129 | 
130 |   def last(): DataFrame = tail(1)
131 | 
132 |   def with_column(name: String, expr: Expression): DataFrame =
133 |     toLazy.with_column(name, expr).collect(noOptimization = true)
134 | 
135 |   @varargs
136 |   def drop(colName: String, colNames: String*): DataFrame =
137 |     toLazy.drop(colName, colNames: _*).collect(noOptimization = true)
138 | 
139 |   def drop_nulls: DataFrame = drop_nulls()
140 | 
141 |   def drop_nulls(
142 |       subset: Array[String] = Array.empty
143 |   ): DataFrame =
144 |     toLazy.drop_nulls(subset).collect(noOptimization = true)
145 | 
146 |   def rename(oldName: String, newName: String): DataFrame =
147 |     rename(Collections.singletonMap(oldName, newName))
148 | 
149 |   def rename(mapping: Map[String, String]): DataFrame =
150 |     rename(mapping.asJava)
151 | 
152 |   def rename(mapping: java.util.Map[String, String]): DataFrame =
153 |     toLazy.rename(mapping).collect(noOptimization = true)
154 | 
155 |   def unique: DataFrame = unique()
156 | 
157 |   def unique(
158 |       subset: Array[String] = Array.empty,
159 |       keep: UniqueKeepStrategies.UniqueKeepStrategy = UniqueKeepStrategies.any,
160 |       maintainOrder: Boolean = false
161 |   ): DataFrame =
162 |     toLazy.unique(subset, keep, maintainOrder).collect(noOptimization = true)
163 | 
164 |   def toLazy: LazyFrame = LazyFrame.withPtr(data_frame.toLazy(ptr))
165 | 
166 |   def show(): Unit = data_frame.show(ptr)
167 | 
168 |   def count(): Long = data_frame.count(ptr)
169 | 
170 |   /** Provides an iterator to traverse a specified number of rows from the DataFrame.
171 |     * @param nRows
172 |     *   number of rows to traverse
173 |     * @note
174 |     *   if `nRows` is greater than the total rows in DataFrame then all rows are included.
175 |     * @return
176 |     *   Iterator of [[Row]]
177 |     */
178 |   def rows(nRows: Long): Iterator[Row] = RowIterator.withPtr(ptr).lazyIterator(nRows)
179 | 
180 |   /** Provides an iterator to traverse a all rows from the DataFrame.
181 |     * @return
182 |     *   Iterator of [[Row]]
183 |     */
184 |   def rows(): Iterator[Row] = rows(-1L)
185 | 
186 |   def write(): Writeable = new Writeable(ptr)
187 | 
188 | }
189 | 
190 | object DataFrame {
191 | 
192 |   private[polars] def withPtr(ptr: Long) = new DataFrame(ptr)
193 | 
194 |   /** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
195 |     * [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
196 |     * values are the values of this column.
197 |     *
198 |     * @param series
199 |     *   Series
200 |     * @param more
201 |     *   Series as a scala or java array
202 |     *
203 |     * @return
204 |     *   [[org.polars.scala.polars.api.DataFrame]] formed from the provided
205 |     *   [[org.polars.scala.polars.api.Series]]
206 |     */
207 |   @varargs
208 |   def fromSeries(series: Series, more: Series*): DataFrame =
209 |     DataFrame.withPtr(data_frame.fromSeries(more.+:(series).map(_.ptr).toArray))
210 | 
211 |   /** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
212 |     * [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
213 |     * values are the values of this column.
214 |     *
215 |     * @param series
216 |     *   Series
217 |     * @param more
218 |     *   Series as a scala iterable
219 |     *
220 |     * @return
221 |     *   [[org.polars.scala.polars.api.DataFrame]] formed from the provided
222 |     *   [[org.polars.scala.polars.api.Series]]
223 |     */
224 |   def fromSeries(series: Series, more: Iterable[Series]): DataFrame =
225 |     DataFrame.withPtr(data_frame.fromSeries(more.toSeq.+:(series).map(_.ptr).toArray))
226 | 
227 |   /** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
228 |     * [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
229 |     * values are the values of this column.
230 |     *
231 |     * @param series
232 |     *   Series
233 |     * @param more
234 |     *   Series as a java iterable
235 |     *
236 |     * @return
237 |     *   [[org.polars.scala.polars.api.DataFrame]] formed from the provided
238 |     *   [[org.polars.scala.polars.api.Series]]
239 |     */
240 |   def fromSeries(series: Series, more: java.lang.Iterable[Series]): DataFrame =
241 |     fromSeries(series, more.asScala)
242 | 
243 | }
244 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/JSeries.java:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.api;
 2 | 
 3 | import scala.Boolean;
 4 | import scala.Int;
 5 | import scala.jdk.javaapi.CollectionConverters;
 6 | 
 7 | import java.time.LocalDate;
 8 | import java.time.LocalDateTime;
 9 | import java.util.ArrayList;
10 | import java.util.Arrays;
11 | import java.util.Iterator;
12 | import java.util.List;
13 | import java.util.stream.Collectors;
14 | import java.util.stream.StreamSupport;
15 | 
16 | import org.polars.scala.polars.internal.jni.series;
17 | 
18 | class JSeries {
19 |     final static String EmptyString = "";
20 | 
21 |     @SuppressWarnings({ "unchecked", "rawtypes" })
22 |     static Series ofList(String name, Iterable<Iterable> values) {
23 |         Iterator<Iterable> valuesIter = values.iterator();
24 |         List<Series> sList = new ArrayList<>();
25 | 
26 |         while (valuesIter.hasNext()) {
27 |             Iterable subList = valuesIter.next();
28 |             Object head = subList.iterator().next();
29 | 
30 |             Series thisSeries;
31 |             if (head instanceof Integer || head instanceof Int) {
32 |                 thisSeries = Series.ofInt(EmptyString, subList);
33 |             } else if (head instanceof Long) {
34 |                 thisSeries = Series.ofLong(EmptyString, subList);
35 |             } else if (head instanceof Float) {
36 |                 thisSeries = Series.ofFloat(EmptyString, subList);
37 |             } else if (head instanceof Double) {
38 |                 thisSeries = Series.ofDouble(EmptyString, subList);
39 |             } else if (head instanceof Boolean) {
40 |                 thisSeries = Series.ofBoolean(EmptyString, subList);
41 |             } else if (head instanceof LocalDate) {
42 |                 thisSeries = Series.ofDate(EmptyString, subList);
43 |             } else if (head instanceof LocalDateTime) {
44 |                 thisSeries = Series.ofDateTime(EmptyString, subList);
45 |             } else if (head instanceof String) {
46 |                 thisSeries = Series.ofString(EmptyString, subList);
47 |             } else if (head instanceof java.lang.Iterable) {
48 |                 thisSeries = ofList(EmptyString, subList);
49 |             } else if (head instanceof scala.collection.Iterable) {
50 |                 Iterable<Iterable> s = (Iterable<Iterable>) StreamSupport.stream(subList.spliterator(), false)
51 |                         .map(v -> CollectionConverters.asJava((scala.collection.Iterable) v))
52 |                         .collect(Collectors.toList());
53 | 
54 |                 thisSeries = ofList(EmptyString, s);
55 |             } else if (head.getClass().isArray()) {
56 |                 Iterable<Iterable> s = (Iterable<Iterable>) StreamSupport.stream(subList.spliterator(), false)
57 |                         .map(v -> Arrays.asList((Object[]) v))
58 |                         .collect(Collectors.toList());
59 | 
60 |                 thisSeries = ofList(EmptyString, s);
61 |             } else {
62 |                 throw new IllegalArgumentException(
63 |                         String.format("Nested series of provided internal type `%s` is currently not supported.", head.getClass().getSimpleName())
64 |                 );
65 |             }
66 | 
67 |             sList.add(thisSeries);
68 |         }
69 | 
70 |         long[] ptrs = sList.stream().map(Series::ptr).mapToLong(Long::longValue).toArray();
71 |         return Series.withPtr(series.new_list_series(name, ptrs));
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/LazyFrame.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.api
  2 | 
  3 | import java.util.Collections
  4 | 
  5 | import scala.annotation.varargs
  6 | import scala.jdk.CollectionConverters._
  7 | 
  8 | import org.polars.scala.polars.api.expressions.Expression
  9 | import org.polars.scala.polars.api.types.Schema
 10 | import org.polars.scala.polars.config.UniqueKeepStrategies
 11 | import org.polars.scala.polars.internal.jni.expressions.column_expr
 12 | import org.polars.scala.polars.internal.jni.lazy_frame
 13 | 
 14 | class LazyFrame private (private[polars] val ptr: Long) {
 15 | 
 16 |   val schema: Schema = {
 17 |     val schemaString = lazy_frame.schemaString(ptr)
 18 |     Schema.fromString(schemaString)
 19 |   }
 20 | 
 21 |   val width: Int = schema.getFields.length
 22 | 
 23 |   @varargs
 24 |   def select(colName: String, colNames: String*): LazyFrame = {
 25 |     val ldfPtr = lazy_frame.selectFromStrings(ptr, colNames.+:(colName).distinct.toArray)
 26 | 
 27 |     LazyFrame.withPtr(ldfPtr)
 28 |   }
 29 | 
 30 |   @varargs
 31 |   def select(column: Expression, columns: Expression*): LazyFrame = {
 32 |     val ldfPtr = lazy_frame.selectFromExprs(ptr, columns.+:(column).map(_.ptr).distinct.toArray)
 33 | 
 34 |     LazyFrame.withPtr(ldfPtr)
 35 |   }
 36 | 
 37 |   def filter(predicate: Expression): LazyFrame = {
 38 |     val ldfPtr = lazy_frame.filterFromExprs(ptr, predicate.ptr)
 39 | 
 40 |     LazyFrame.withPtr(ldfPtr)
 41 |   }
 42 | 
 43 |   def sort(
 44 |       cols: Array[String],
 45 |       descending: Array[Boolean],
 46 |       nullLast: Array[Boolean],
 47 |       maintainOrder: Boolean
 48 |   ): LazyFrame = {
 49 |     assert(
 50 |       cols.length == descending.length,
 51 |       s"Length of provided list columns(${cols.length}) and their " +
 52 |         s"sorting directions((${descending.length})) is not equal."
 53 |     )
 54 | 
 55 |     val exprs = cols.zip(descending).map { case (column, bool) =>
 56 |       Expression.withPtr(column_expr.sort_column_by_name(column, bool))
 57 |     }
 58 | 
 59 |     sort(exprs, nullLast, maintainOrder = maintainOrder)
 60 |   }
 61 | 
 62 |   def sort(
 63 |       col: String,
 64 |       descending: Boolean,
 65 |       nullLast: Boolean,
 66 |       maintainOrder: Boolean
 67 |   ): LazyFrame =
 68 |     sort(Array(col), Array(descending), Array(nullLast), maintainOrder = maintainOrder)
 69 | 
 70 |   def sort(
 71 |       exprs: Array[Expression],
 72 |       null_last: Array[Boolean],
 73 |       maintainOrder: Boolean
 74 |   ): LazyFrame = {
 75 |     assert(
 76 |       exprs.length == null_last.length,
 77 |       s"Length of provided expressions (${exprs.length}) and their " +
 78 |         s"null_last (${null_last.length}) is not equal."
 79 |     )
 80 | 
 81 |     val ldfPtr =
 82 |       lazy_frame.sortFromExprs(ptr, exprs.map(_.ptr).distinct, null_last, maintainOrder)
 83 | 
 84 |     LazyFrame.withPtr(ldfPtr)
 85 |   }
 86 | 
 87 |   def sort(expr: Expression, nullLast: Boolean, maintainOrder: Boolean): LazyFrame =
 88 |     sort(Array(expr), Array(nullLast), maintainOrder = maintainOrder)
 89 | 
 90 |   def set_sorted(mapping: Map[String, Boolean]): LazyFrame =
 91 |     set_sorted(mapping.asJava)
 92 | 
 93 |   def set_sorted(mapping: java.util.Map[String, Boolean]): LazyFrame = {
 94 |     val ldfPtr = lazy_frame.set_sorted(ptr, mapping)
 95 | 
 96 |     LazyFrame.withPtr(ldfPtr)
 97 |   }
 98 | 
 99 |   def top_k(
100 |       k: Int,
101 |       exprs: Array[Expression],
102 |       null_last: Array[Boolean],
103 |       maintainOrder: Boolean
104 |   ): LazyFrame = {
105 |     assert(
106 |       exprs.length == null_last.length,
107 |       s"Length of provided expressions (${exprs.length}) and their " +
108 |         s"null_last (${null_last.length}) is not equal."
109 |     )
110 |     val ldfPtr =
111 |       lazy_frame.topKFromExprs(ptr, k, exprs.map(_.ptr).distinct, null_last, maintainOrder)
112 | 
113 |     LazyFrame.withPtr(ldfPtr)
114 |   }
115 | 
116 |   def top_k(k: Int, expr: Expression, nullLast: Boolean, maintainOrder: Boolean): LazyFrame =
117 |     top_k(k, Array(expr), Array(nullLast), maintainOrder = maintainOrder)
118 | 
119 |   def top_k(
120 |       k: Int,
121 |       cols: Array[String],
122 |       descending: Array[Boolean],
123 |       nullLast: Array[Boolean],
124 |       maintainOrder: Boolean
125 |   ): LazyFrame = {
126 |     assert(
127 |       cols.length == descending.length,
128 |       s"Length of provided list columns (${cols.length}) and their " +
129 |         s"sorting directions (${descending.length}) is not equal."
130 |     )
131 | 
132 |     val exprs = cols.zip(descending).map { case (column, bool) =>
133 |       Expression.withPtr(column_expr.sort_column_by_name(column, bool))
134 |     }
135 | 
136 |     top_k(k, exprs, null_last = nullLast, maintainOrder = maintainOrder)
137 |   }
138 | 
139 |   def top_k(
140 |       k: Int,
141 |       col: String,
142 |       descending: Boolean,
143 |       nullLast: Boolean,
144 |       maintainOrder: Boolean
145 |   ): LazyFrame =
146 |     top_k(k, Array(col), Array(descending), Array(nullLast), maintainOrder = maintainOrder)
147 | 
148 |   def limit(n: Long): LazyFrame = LazyFrame.withPtr(lazy_frame.limit(ptr, n))
149 | 
150 |   def head(n: Long): LazyFrame = limit(n)
151 | 
152 |   def first(): LazyFrame = limit(1)
153 | 
154 |   def tail(n: Long): LazyFrame = LazyFrame.withPtr(lazy_frame.tail(ptr, n))
155 | 
156 |   def last(): LazyFrame = tail(1)
157 | 
158 |   @varargs
159 |   def drop(colName: String, colNames: String*): LazyFrame = {
160 |     val ldfPtr = lazy_frame.drop(ptr, colNames.+:(colName).distinct.toArray)
161 | 
162 |     LazyFrame.withPtr(ldfPtr)
163 |   }
164 | 
165 |   def with_column(name: String, expr: Expression): LazyFrame = {
166 |     val ldfPtr = lazy_frame.withColumn(ptr, name, expr.ptr)
167 | 
168 |     LazyFrame.withPtr(ldfPtr)
169 |   }
170 | 
171 |   def rename(oldName: String, newName: String): LazyFrame =
172 |     rename(Collections.singletonMap(oldName, newName))
173 | 
174 |   def rename(mapping: Map[String, String]): LazyFrame = rename(mapping.asJava)
175 | 
176 |   def rename(mapping: java.util.Map[String, String]): LazyFrame = {
177 |     val ldfPtr = lazy_frame.rename(ptr, mapping)
178 | 
179 |     LazyFrame.withPtr(ldfPtr)
180 |   }
181 | 
182 |   def unique: LazyFrame = unique()
183 | 
184 |   def unique(
185 |       subset: Array[String] = Array.empty,
186 |       keep: UniqueKeepStrategies.UniqueKeepStrategy = UniqueKeepStrategies.any,
187 |       maintainOrder: Boolean = false
188 |   ): LazyFrame = {
189 |     val ldfPtr = lazy_frame.unique(ptr, subset, keep.toString, maintainOrder)
190 | 
191 |     LazyFrame.withPtr(ldfPtr)
192 |   }
193 | 
194 |   def drop_nulls: LazyFrame = drop_nulls()
195 | 
196 |   def drop_nulls(
197 |       subset: Array[String] = Array.empty
198 |   ): LazyFrame = {
199 |     val ldfPtr = lazy_frame.drop_nulls(ptr, subset)
200 | 
201 |     LazyFrame.withPtr(ldfPtr)
202 |   }
203 | 
204 |   def explain: Unit = explain()
205 | 
206 |   def explain(
207 |       optimized: Boolean = true,
208 |       typeCoercion: Boolean = true,
209 |       predicatePushdown: Boolean = true,
210 |       projectionPushdown: Boolean = true,
211 |       simplifyExpression: Boolean = true,
212 |       slicePushdown: Boolean = true,
213 |       commSubplanElim: Boolean = true,
214 |       commSubexprElim: Boolean = true,
215 |       streaming: Boolean = false,
216 |       treeFormat: Boolean = false
217 |   ): Unit = {
218 |     val planStr = if (optimized) {
219 |       lazy_frame.explain(
220 |         lazy_frame.optimization_toggle(
221 |           ptr,
222 |           typeCoercion = typeCoercion,
223 |           predicatePushdown = predicatePushdown,
224 |           projectionPushdown = projectionPushdown,
225 |           simplifyExpr = simplifyExpression,
226 |           slicePushdown = slicePushdown,
227 |           commSubplanElim = commSubplanElim,
228 |           commSubexprElim = commSubexprElim,
229 |           streaming = streaming
230 |         ),
231 |         optimized = true,
232 |         treeFormat
233 |       )
234 |     } else lazy_frame.explain(ptr, optimized = false, treeFormat)
235 | 
236 |     println(planStr)
237 |   }
238 | 
239 |   def cache: LazyFrame = {
240 |     val ldfPtr = lazy_frame.cache(ptr)
241 | 
242 |     LazyFrame.withPtr(ldfPtr)
243 |   }
244 | 
245 |   def collect: DataFrame = collect()
246 | 
247 |   def collect(
248 |       typeCoercion: Boolean = true,
249 |       predicatePushdown: Boolean = true,
250 |       projectionPushdown: Boolean = true,
251 |       simplifyExpression: Boolean = true,
252 |       noOptimization: Boolean = false,
253 |       slicePushdown: Boolean = true,
254 |       commSubplanElim: Boolean = true,
255 |       commSubexprElim: Boolean = true,
256 |       streaming: Boolean = false
257 |   ): DataFrame = {
258 |     val ldf = LazyFrame.withPtr(
259 |       lazy_frame.optimization_toggle(
260 |         ptr,
261 |         typeCoercion = typeCoercion,
262 |         predicatePushdown = if (noOptimization) false else predicatePushdown,
263 |         projectionPushdown = if (noOptimization) false else projectionPushdown,
264 |         simplifyExpr = simplifyExpression,
265 |         slicePushdown = if (noOptimization) false else slicePushdown,
266 |         commSubplanElim = if (noOptimization || streaming) false else commSubplanElim,
267 |         commSubexprElim = if (noOptimization) false else commSubexprElim,
268 |         streaming = streaming
269 |       )
270 |     )
271 | 
272 |     val dfPtr = lazy_frame.collect(ldf.ptr)
273 |     DataFrame.withPtr(dfPtr)
274 |   }
275 | 
276 | }
277 | 
278 | object LazyFrame {
279 | 
280 |   def withPtr(ptr: Long) = new LazyFrame(ptr)
281 | }
282 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/expressions/Column.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.api.expressions
  2 | 
  3 | import org.polars.scala.polars.functions.lit
  4 | import org.polars.scala.polars.internal.jni.expressions.column_expr
  5 | 
  6 | object UnaryOperators extends Enumeration {
  7 |   type UnaryOperator = Value
  8 | 
  9 |   val NOT, IS_NULL, IS_NOT_NULL, IS_NAN, IS_NOT_NAN, BETWEEN, IS_IN, LIKE, CAST = Value
 10 | }
 11 | 
 12 | object BinaryOperators extends Enumeration {
 13 |   type BinaryOperator = Value
 14 | 
 15 |   val EQUAL_TO, NOT_EQUAL_TO, LESS_THAN, LESS_THAN_EQUAL_TO, GREATER_THAN, GREATER_THAN_EQUAL_TO,
 16 |       OR, AND, PLUS, MINUS, MULTIPLY, DIVIDE, MODULUS = Value
 17 | }
 18 | 
 19 | class Column private (override protected[polars] val ptr: Long) extends Expression(ptr) {
 20 |   import BinaryOperators._
 21 |   import UnaryOperators._
 22 | 
 23 |   /** Not. */
 24 |   def unary_! : Column = Column.withPtr(column_expr.applyUnary(ptr, NOT.id))
 25 | 
 26 |   /** Is Null. */
 27 |   def isNull: Column = Column.withPtr(column_expr.applyUnary(ptr, IS_NULL.id))
 28 | 
 29 |   /** Is Not Null. */
 30 |   def isNotNull: Column = Column.withPtr(column_expr.applyUnary(ptr, IS_NOT_NULL.id))
 31 | 
 32 |   /** Is NaN. */
 33 |   def isNaN: Column = Column.withPtr(column_expr.applyUnary(ptr, IS_NAN.id))
 34 | 
 35 |   /** Is Not NaN. */
 36 |   def isNotNaN: Column = Column.withPtr(column_expr.applyUnary(ptr, IS_NOT_NAN.id))
 37 | 
 38 |   /** Plus. */
 39 |   def +(value: Any): Column = {
 40 |     val rightPtr = lit(value).ptr
 41 | 
 42 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, PLUS.id))
 43 |   }
 44 | 
 45 |   def plus(other: Any): Column = this && other
 46 | 
 47 |   /** Minus. */
 48 |   def -(value: Any): Column = {
 49 |     val rightPtr = lit(value).ptr
 50 | 
 51 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, MINUS.id))
 52 |   }
 53 | 
 54 |   def minus(other: Any): Column = this && other
 55 | 
 56 |   /** Divide. */
 57 |   def *(value: Any): Column = {
 58 |     val rightPtr = lit(value).ptr
 59 | 
 60 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, MULTIPLY.id))
 61 |   }
 62 | 
 63 |   def multiply(other: Any): Column = this && other
 64 | 
 65 |   /** Divide. */
 66 |   def /(value: Any): Column = {
 67 |     val rightPtr = lit(value).ptr
 68 | 
 69 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, DIVIDE.id))
 70 |   }
 71 | 
 72 |   def divide(other: Any): Column = this && other
 73 | 
 74 |   /** Modulus. */
 75 |   def %(value: Any): Column = {
 76 |     val rightPtr = lit(value).ptr
 77 | 
 78 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, MODULUS.id))
 79 |   }
 80 | 
 81 |   def mod(other: Any): Column = this && other
 82 | 
 83 |   /** And. */
 84 |   def &&(value: Any): Column = {
 85 |     val rightPtr = lit(value).ptr
 86 | 
 87 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, AND.id))
 88 |   }
 89 | 
 90 |   def and(other: Any): Column = this && other
 91 | 
 92 |   /** And. */
 93 |   def ||(value: Any): Column = {
 94 |     val rightPtr = lit(value).ptr
 95 | 
 96 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, OR.id))
 97 |   }
 98 | 
 99 |   def or(other: Any): Column = this || other
100 | 
101 |   /** EqualTo. */
102 |   def ===(value: Any): Column = {
103 |     val rightPtr = lit(value).ptr
104 | 
105 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, EQUAL_TO.id))
106 |   }
107 | 
108 |   def equalTo(other: Any): Column = this === other
109 | 
110 |   /** NotEqualTo. */
111 |   def <>(value: Any): Column = {
112 |     val rightPtr = lit(value).ptr
113 | 
114 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, NOT_EQUAL_TO.id))
115 |   }
116 | 
117 |   def notEqualTo(other: Any): Column = this <> other
118 | 
119 |   /** LessThan. */
120 |   def <(value: Any): Column = {
121 |     val rightPtr = lit(value).ptr
122 | 
123 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, LESS_THAN.id))
124 |   }
125 | 
126 |   def lessThan(other: Any): Column = this < other
127 | 
128 |   /** LessThanEqualTo. */
129 |   def <=(value: Any): Column = {
130 |     val rightPtr = lit(value).ptr
131 | 
132 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, LESS_THAN_EQUAL_TO.id))
133 | 
134 |   }
135 | 
136 |   def lessThanEqualTo(other: Any): Column = this <= other
137 | 
138 |   /** GreaterThan. */
139 |   def >(value: Any): Column = {
140 |     val rightPtr = lit(value).ptr
141 | 
142 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, GREATER_THAN.id))
143 | 
144 |   }
145 | 
146 |   def greaterThan(other: Any): Column = this > other
147 | 
148 |   /** GreaterThanEqualTo. */
149 |   def >=(value: Any): Column = {
150 |     val rightPtr = lit(value).ptr
151 | 
152 |     Column.withPtr(column_expr.applyBinary(ptr, rightPtr, GREATER_THAN_EQUAL_TO.id))
153 |   }
154 | 
155 |   def greaterThanEqualTo(other: Any): Column = this >= other
156 | 
157 | }
158 | 
159 | object Column {
160 | 
161 |   private[polars] def withPtr(ptr: Long) = new Column(ptr)
162 | 
163 |   private[polars] def from(name: String): Column = {
164 |     val ptr = column_expr.column(name)
165 |     new Column(ptr)
166 |   }
167 | 
168 | }
169 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/expressions/Expression.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.api.expressions
 2 | 
 3 | class Expression(protected[polars] val ptr: Long) {}
 4 | 
 5 | object Expression {
 6 | 
 7 |   private[polars] def withPtr(ptr: Long) = new Expression(ptr)
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/io/Writeable.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.api.io
  2 | 
  3 | import scala.collection.mutable.{Map => MutableMap}
  4 | import scala.jdk.CollectionConverters._
  5 | 
  6 | import org.polars.scala.polars.api.DataFrame
  7 | import org.polars.scala.polars.internal.jni.io.write._
  8 | 
  9 | /** Interface used to write a [[DataFrame]] in various formats to local filesystems and cloud
 10 |   * object stores (aws, gcp and azure). Use [[DataFrame.write write()]] to access this.
 11 |   *
 12 |   * Cloud options are global and can be set by methods like [[option option[s]()]]
 13 |   *   - For amazon s3 options, see
 14 |   *     [[https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants here]]
 15 |   *   - For google cloud options, see
 16 |   *     [[https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants here]]
 17 |   *   - For azure options, see
 18 |   *     [[https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants here]]
 19 |   *
 20 |   * This interface also supports the following global options,
 21 |   *   - `write_mode`: Specifies the behavior when data already exists at provided path. Supported
 22 |   *     values 'overwrite', 'error'. Default: error.
 23 |   *     - overwrite: Overwrites the existing data at the provided location.
 24 |   *     - error: Throw an exception if data already exists at the provided location.
 25 |   */
 26 | class Writeable private[polars] (ptr: Long) {
 27 |   import org.polars.scala.polars.jsonMapper
 28 | 
 29 |   private val _options: MutableMap[String, String] = MutableMap("write_mode" -> "error")
 30 | 
 31 |   /** Adds options for the underlying output format. */
 32 |   def options(opts: Iterable[(String, String)]): Writeable = synchronized {
 33 |     opts.foreach { case (key, value) => option(key, value) }
 34 |     this
 35 |   }
 36 | 
 37 |   /** Adds options for the underlying output format. */
 38 |   def options(opts: java.util.Map[String, String]): Writeable = synchronized {
 39 |     opts.asScala.foreach { case (key, value) => option(key, value) }
 40 |     this
 41 |   }
 42 | 
 43 |   /** Adds an option for the underlying output format. */
 44 |   def option(key: String, value: String): Writeable = synchronized {
 45 |     if (Option(key).exists(_.trim.isEmpty) || Option(value).exists(_.trim.isEmpty)) {
 46 |       throw new IllegalArgumentException("Option key or value cannot be null or empty.")
 47 |     }
 48 | 
 49 |     _options.put(key.trim, value.trim)
 50 |     this
 51 |   }
 52 | 
 53 |   /** Saves the content of the [[DataFrame]] in Parquet format at the specified path (local and
 54 |     * cloud).
 55 |     *
 56 |     * Supported options:
 57 |     *   - `write_parquet_parallel`: Serializes columns in parallel. Default: true.
 58 |     *   - `write_parquet_data_page_size`: Sets the maximum bytes size of a data page. Default:
 59 |     *     1024^2^ bytes.
 60 |     *   - `write_parquet_row_group_size`: Sets the row group size (in number of rows) during
 61 |     *     writing. This can reduce memory pressure and improve writing performance. Default:
 62 |     *     512^2^ rows.
 63 |     *   - `write_compression`: Sets the compression codec used for pages, for more compatibility
 64 |     *     guarantees, consider using Snappy. Supported values 'uncompressed', 'snappy', 'gzip',
 65 |     *     'lzo', 'brotli', 'lz4', 'zstd'. Default: zstd.
 66 |     *   - `write_compression_level`: Sets a valid level for codecs like 'gzip', 'brotli', 'zstd'.
 67 |     *     Defaults to compression default.
 68 |     *   - `write_parquet_stats`: Allows computation and writing of column statistics. Supported
 69 |     *     values 'full', 'none', 'some'. Default: some
 70 |     *
 71 |     * @param filePath
 72 |     *   output file location
 73 |     */
 74 |   def parquet(filePath: String): Unit =
 75 |     writeParquet(
 76 |       ptr = ptr,
 77 |       filePath = filePath,
 78 |       options = jsonMapper.writeValueAsString(_options)
 79 |     )
 80 | 
 81 |   /** Saves the content of the [[DataFrame]] in IPC format at the specified path (local and
 82 |     * cloud).
 83 |     *
 84 |     * Supported options:
 85 |     *   - `write_ipc_compat_level`: Sets compatibility. Supported values 'oldest', 'newest'.
 86 |     *     Default: newest.
 87 |     *   - `write_compression`: Sets the compression codec used for pages. Supported values
 88 |     *     'uncompressed', 'lz4', 'zstd'. Default: zstd.
 89 |     *
 90 |     * @param filePath
 91 |     *   output file location
 92 |     */
 93 |   def ipc(filePath: String): Unit =
 94 |     writeIPC(
 95 |       ptr = ptr,
 96 |       filePath = filePath,
 97 |       options = jsonMapper.writeValueAsString(_options)
 98 |     )
 99 | 
100 |   /** Saves the content of the [[DataFrame]] in Avro format at the specified path (local and
101 |     * cloud).
102 |     *
103 |     * Supported options:
104 |     *   - `write_avro_record_name`: Sets the name of avro record. Default: "".
105 |     *   - `write_compression`: Sets the compression codec used for blocks. Supported values
106 |     *     'uncompressed', 'deflate', 'snappy'. Default: uncompressed.
107 |     *
108 |     * @param filePath
109 |     *   output file location
110 |     */
111 |   def avro(filePath: String): Unit =
112 |     writeAvro(
113 |       ptr = ptr,
114 |       filePath = filePath,
115 |       options = jsonMapper.writeValueAsString(_options)
116 |     )
117 | 
118 |   /** Saves the content of the [[DataFrame]] in CSV format at the specified path (local and
119 |     * cloud).
120 |     *
121 |     * Supported options:
122 |     *   - `write_csv_include_bom`: Sets whether to include UTF-8 Byte Order Mark (BOM) in the CSV
123 |     *     output. Default: `false`.
124 |     *   - `write_csv_include_header`: Sets whether to include header in the CSV output. Default:
125 |     *     `true`.
126 |     *   - `write_csv_float_scientific`: Sets whether to use scientific form always (true), never
127 |     *     (false), or automatically (if not set) for `Float` and `Double` datatypes.
128 |     *   - `write_csv_float_precision`: Sets the number of decimal places to write for `Float` and
129 |     *     `Double` datatypes.
130 |     *   - `write_csv_separator`: Sets the CSV file's column separator, defaulting to `,`
131 |     *     character.
132 |     *   - `write_csv_quote_char`: Sets the single byte character used for quoting, defaulting to
133 |     *     `"` character.
134 |     *   - `write_csv_date_format`: Sets the CSV file's date format defined by
135 |     *     [[https://docs.rs/chrono/latest/chrono/format/strftime/index.html chrono]]. If no format
136 |     *     specified, the default fractional-second precision is inferred from the maximum timeunit
137 |     *     found in the frame's Datetime cols (if any).
138 |     *   - `write_csv_time_format`: Sets the CSV file's time format defined by
139 |     *     [[https://docs.rs/chrono/latest/chrono/format/strftime/index.html chrono]].
140 |     *   - `write_csv_datetime_format`: Sets the CSV file's datetime format defined by
141 |     *     [[https://docs.rs/chrono/latest/chrono/format/strftime/index.html chrono]].
142 |     *   - `write_csv_line_terminator`: Sets the CSV file's line terminator. Default: "\n".
143 |     *   - `write_csv_null_value`: Sets the CSV file's null value representation defaulting to the
144 |     *     empty string.
145 |     *   - `write_csv_quote_style`: Sets the CSV file's quoting style which indicates when to
146 |     *     insert quotes around a field. Supported values 'necessary', 'always', 'non_numeric',
147 |     *     'never'.
148 |     *     - necessary (default): This puts quotes around fields only when necessary. They are
149 |     *       necessary when fields contain a quote, separator or record terminator. Quotes are also
150 |     *       necessary when writing an empty record (which is indistinguishable from a record with
151 |     *       one empty field).
152 |     *     - always: This puts quotes around every field. Always.
153 |     *     - never: This never puts quotes around fields, even if that results in invalid CSV data
154 |     *       (e.g.: by not quoting strings containing the separator).
155 |     *     - non_numeric: This puts quotes around all fields that are non-numeric. Namely, when
156 |     *       writing a field that does not parse as a valid float or integer, then quotes will be
157 |     *       used even if they aren't strictly necessary.
158 |     *
159 |     * @note
160 |     *   compression is not supported for this format.
161 |     * @param filePath
162 |     *   output file location
163 |     */
164 |   def csv(filePath: String): Unit =
165 |     writeCSV(
166 |       ptr = ptr,
167 |       filePath = filePath,
168 |       options = jsonMapper.writeValueAsString(_options)
169 |     )
170 | 
171 |   /** Saves the content of the [[DataFrame]] in JSON format at the specified path (local and
172 |     * cloud).
173 |     *
174 |     * A single JSON array containing each DataFrame row as an object. The length of the array is
175 |     * the number of rows in the DataFrame. Use this to create valid JSON that can be deserialized
176 |     * back into an array in one fell swoop.
177 |     *
178 |     * @note
179 |     *   compression is not supported for this format.
180 |     *
181 |     * @param filePath
182 |     *   output file location
183 |     */
184 |   def json(filePath: String): Unit = {
185 |     option("write_json_format", "json")
186 |     writeJson(
187 |       ptr = ptr,
188 |       filePath = filePath,
189 |       options = jsonMapper.writeValueAsString(_options)
190 |     )
191 |   }
192 | 
193 |   /** Saves the content of the [[DataFrame]] in Newline Delimited JSON (ndjson) format at the
194 |     * specified path (local and cloud).
195 |     *
196 |     * Each DataFrame row is serialized as a JSON object on a separate line. The number of lines in
197 |     * the output is the number of rows in the DataFrame.
198 |     *
199 |     * The [[https://pola-rs.github.io/polars/py-polars/html/reference/config.html JSON Lines]]
200 |     * format makes it easy to read records in a streaming fashion, one (line) at a time. But the
201 |     * output in its entirety is not valid JSON; only the individual lines are. It is recommended
202 |     * to use the file extension `.jsonl` when saving as JSON Lines.
203 |     *
204 |     * @note
205 |     *   compression is not supported for this format.
206 |     * @param filePath
207 |     *   output file location
208 |     */
209 |   def jsonLines(filePath: String): Unit = {
210 |     option("write_json_format", "json_lines")
211 |     writeJson(
212 |       ptr = ptr,
213 |       filePath = filePath,
214 |       options = jsonMapper.writeValueAsString(_options)
215 |     )
216 |   }
217 | }
218 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/types/DataTypes.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.api.types
  2 | 
  3 | import java.time.ZoneId
  4 | import java.util.Locale
  5 | import java.util.concurrent.TimeUnit
  6 | 
  7 | import scala.reflect.ClassTag
  8 | import scala.util.Try
  9 | import scala.util.matching.Regex
 10 | 
 11 | trait DataType {
 12 |   def simpleName: String =
 13 |     this.getClass.getSimpleName
 14 |       .stripSuffix("$")
 15 |       .stripSuffix("Type")
 16 |       .stripSuffix("UDT")
 17 |       .toLowerCase(Locale.ROOT)
 18 | }
 19 | 
 20 | trait BasicDataType extends DataType
 21 | 
 22 | case object StringType extends BasicDataType
 23 | 
 24 | case object BooleanType extends BasicDataType
 25 | 
 26 | case object IntegerType extends BasicDataType
 27 | 
 28 | case object LongType extends BasicDataType
 29 | 
 30 | case object FloatType extends BasicDataType
 31 | 
 32 | case object DoubleType extends BasicDataType
 33 | 
 34 | case object DateType extends BasicDataType
 35 | 
 36 | case object TimeType extends DataType
 37 | 
 38 | case object DateTimeType extends DataType
 39 | 
 40 | case object ListType extends DataType
 41 | 
 42 | case object StructType extends DataType
 43 | 
 44 | case class TimeType(protected val unitStr: String) extends DataType {
 45 |   val timeUnit: Option[TimeUnit] =
 46 |     unitStr match {
 47 |       case s if s.toLowerCase(Locale.ROOT).contains("nano") => Some(TimeUnit.NANOSECONDS)
 48 |       case s if s.toLowerCase(Locale.ROOT).contains("micro") => Some(TimeUnit.MICROSECONDS)
 49 |       case s if s.toLowerCase(Locale.ROOT).contains("milli") => Some(TimeUnit.MILLISECONDS)
 50 |       case _ => None
 51 |     }
 52 | 
 53 |   override def simpleName: String = timeUnit match {
 54 |     case Some(TimeUnit.NANOSECONDS) => "time[ns]"
 55 |     case Some(TimeUnit.MICROSECONDS) => "time[us]"
 56 |     case Some(TimeUnit.MILLISECONDS) => "time[ms]"
 57 |     case _ => "time"
 58 |   }
 59 | }
 60 | 
 61 | case class DateTimeType(protected val unitStr: String, protected val tzStr: String)
 62 |     extends DataType {
 63 |   val timeUnit: Option[TimeUnit] =
 64 |     unitStr match {
 65 |       case null => None
 66 |       case s if s.toLowerCase(Locale.ROOT).contains("nano") => Some(TimeUnit.NANOSECONDS)
 67 |       case s if s.toLowerCase(Locale.ROOT).contains("micro") => Some(TimeUnit.MICROSECONDS)
 68 |       case s if s.toLowerCase(Locale.ROOT).contains("milli") => Some(TimeUnit.MILLISECONDS)
 69 |       case _ => None
 70 |     }
 71 | 
 72 |   val timeZone: Option[ZoneId] = Try(ZoneId.of(tzStr)).toOption
 73 | 
 74 |   override def simpleName: String = {
 75 |     val tu = timeUnit match {
 76 |       case Some(TimeUnit.NANOSECONDS) => "ns"
 77 |       case Some(TimeUnit.MICROSECONDS) => "us"
 78 |       case Some(TimeUnit.MILLISECONDS) => "ms"
 79 |       case _ => null
 80 |     }
 81 | 
 82 |     val tz = timeZone.orNull
 83 | 
 84 |     (tu, tz) match {
 85 |       case (null, null) => "datetime"
 86 |       case (tu, null) => s"datetime[$tu]"
 87 |       case (null, tz) => s"datetime[$tz]"
 88 |       case (tu, tz) => s"datetime[$tu, $tz]"
 89 |     }
 90 | 
 91 |   }
 92 | }
 93 | 
 94 | case class Duration(protected val unitStr: String) extends DataType {
 95 |   val timeUnit: Option[TimeUnit] =
 96 |     unitStr match {
 97 |       case s if s.toLowerCase(Locale.ROOT).contains("nano") => Some(TimeUnit.NANOSECONDS)
 98 |       case s if s.toLowerCase(Locale.ROOT).contains("micro") => Some(TimeUnit.MICROSECONDS)
 99 |       case s if s.toLowerCase(Locale.ROOT).contains("milli") => Some(TimeUnit.MILLISECONDS)
100 |       case _ => None
101 |     }
102 | 
103 |   override def simpleName: String = timeUnit match {
104 |     case Some(TimeUnit.NANOSECONDS) => "duration[ns]"
105 |     case Some(TimeUnit.MICROSECONDS) => "duration[us]"
106 |     case Some(TimeUnit.MILLISECONDS) => "duration[ms]"
107 |     case _ => "duration"
108 |   }
109 | }
110 | 
111 | case class ListType(tpe: DataType) extends DataType {
112 |   override def simpleName: String = "list"
113 | 
114 |   /** Borrowed from Apache Spark source to represent [[ListType]] as a tree string. */
115 |   private[polars] def buildFormattedString(prefix: String, buffer: StringBuffer): Unit = {
116 |     buffer.append(s"$prefix-- element: ${tpe.simpleName}\n")
117 |     DataType.buildFormattedString(tpe, s"$prefix    |", buffer)
118 |   }
119 | 
120 | }
121 | 
122 | case class StructType(fields: Array[Field]) extends DataType {
123 |   override def simpleName: String = "struct"
124 | 
125 |   def toSchema: Schema = Schema.fromFields(fields)
126 | 
127 |   /** Borrowed from Apache Spark source to represent [[StructType]] as a tree string. */
128 |   private[polars] def buildFormattedString(prefix: String, buffer: StringBuffer): Unit =
129 |     fields.foreach(field => field.buildFormattedString(prefix, buffer))
130 | }
131 | 
132 | object DataType {
133 | 
134 |   private[polars] final val StringRegex: Regex = """^(?i)Utf8|LargeUtf8|String$""".r
135 |   private[polars] final val BooleanRegex: Regex = """^(?i)Boolean$""".r
136 |   private[polars] final val IntRegex: Regex = """^(?i)Int8|Int16|Int32|UInt8|UInt16|UInt32$""".r
137 |   private[polars] final val LongRegex: Regex = """^(?i)Int64|UInt64$""".r
138 |   private[polars] final val FloatRegex: Regex = """^(?i)Float32$""".r
139 |   private[polars] final val DoubleRegex: Regex = """^(?i)Float64$""".r
140 |   private[polars] final val DateRegex: Regex = """^(?i)Date|Date32|Date64$""".r
141 | 
142 |   def fromBasicType(typeStr: String): DataType = typeStr match {
143 |     case StringRegex() => StringType
144 |     case BooleanRegex() => BooleanType
145 |     case IntRegex() => IntegerType
146 |     case LongRegex() => LongType
147 |     case FloatRegex() => FloatType
148 |     case DoubleRegex() => DoubleType
149 |     case DateRegex() => DateType
150 |     case typeStr =>
151 |       throw new IllegalArgumentException(s"Unknown basic type `$typeStr` is not supported.")
152 |   }
153 | 
154 |   def typeToDataType[T: ClassTag](): DataType = {
155 |     val clazz = implicitly[ClassTag[T]].runtimeClass
156 |     clazz match {
157 |       case c if c == classOf[java.lang.Integer] || c == classOf[Int] => IntegerType
158 |       case c if c == classOf[java.lang.Long] || c == classOf[Long] => LongType
159 |       case c if c == classOf[java.lang.Boolean] || c == classOf[Boolean] => BooleanType
160 |       case c if c == classOf[java.lang.Float] || c == classOf[Float] => FloatType
161 |       case c if c == classOf[java.lang.Double] || c == classOf[Double] => DoubleType
162 |       case c if c == classOf[java.time.LocalDate] => DateType
163 |       case c if c == classOf[java.time.LocalTime] => TimeType
164 |       case c if c == classOf[java.time.ZonedDateTime] => DateTimeType
165 |       case c if c == classOf[java.lang.String] || c == classOf[String] => StringType
166 |       case c if c == classOf[java.util.List[_]] => ListType
167 |       case c =>
168 |         throw new IllegalArgumentException(
169 |           s"Data type could not be found for class `${c.getSimpleName}`"
170 |         )
171 |     }
172 |   }
173 | 
174 |   /** Borrowed from Apache Spark source to represent [[DataType]] as a tree string. */
175 |   private[polars] def buildFormattedString(
176 |       dataType: DataType,
177 |       prefix: String,
178 |       buffer: StringBuffer
179 |   ): Unit =
180 |     dataType match {
181 |       case array: ListType => array.buildFormattedString(prefix, buffer)
182 |       case struct: StructType => struct.buildFormattedString(prefix, buffer)
183 |       case _ =>
184 |     }
185 | 
186 | }
187 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/api/types/Schema.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.api.types
  2 | 
  3 | import scala.jdk.CollectionConverters._
  4 | import scala.util.Try
  5 | 
  6 | import org.polars.scala.polars.jsonMapper
  7 | 
  8 | import com.fasterxml.jackson.databind.JsonNode
  9 | import com.fasterxml.jackson.databind.node.JsonNodeType
 10 | 
 11 | case class Field(name: String, dataType: DataType) {
 12 | 
 13 |   /** Borrowed from Apache Spark source to represent [[Field]] as a tree string. */
 14 |   private[polars] def buildFormattedString(prefix: String, buffer: StringBuffer): Unit = {
 15 |     buffer.append(s"$prefix-- $name: ${dataType.simpleName} \n")
 16 |     DataType.buildFormattedString(dataType, s"$prefix    |", buffer)
 17 |   }
 18 | 
 19 | }
 20 | 
 21 | class Schema {
 22 | 
 23 |   private var _fields: Array[Field] = _
 24 | 
 25 |   def getFields: Array[Field] = _fields
 26 | 
 27 |   def getFieldNames: Array[String] = _fields.map(f => f.name)
 28 | 
 29 |   def getField(i: Int): Option[Field] = Try(getFields(i)).toOption
 30 | 
 31 |   def getField(name: String, ignoreCase: Boolean = false): Option[Field] =
 32 |     getFields.find { field =>
 33 |       val fieldName = field.name
 34 |       if (ignoreCase) fieldName.equalsIgnoreCase(name)
 35 |       else fieldName.equals(name)
 36 |     }
 37 | 
 38 |   def getFieldIndex(name: String, ignoreCase: Boolean = false): Option[Int] =
 39 |     getField(name, ignoreCase).map(f => getFields.indexOf(f))
 40 | 
 41 |   override def toString: String = treeString
 42 | 
 43 |   private def toField(field: (String, JsonNode, JsonNodeType)): Field = field match {
 44 |     // For Basic Types
 45 |     case (name, node, _ @JsonNodeType.STRING) =>
 46 |       Field(name, DataType.fromBasicType(node.textValue()))
 47 | 
 48 |     // For Time Type
 49 |     case (name, node, _ @JsonNodeType.OBJECT)
 50 |         if node.hasNonNull("Time") || node.hasNonNull("Time32") || node.hasNonNull("Time64") =>
 51 |       Seq(node.get("Time"), node.get("Time32"), node.get("Time64"))
 52 |         .map(Option(_))
 53 |         .collectFirst { case Some(v) => v } match {
 54 |         case Some(timeUnit) => Field(name, TimeType(timeUnit.textValue()))
 55 | 
 56 |         case None =>
 57 |           throw new IllegalArgumentException("Invalid time cannot be parsed.")
 58 |       }
 59 | 
 60 |     // For Duration Type
 61 |     case (name, node, _ @JsonNodeType.OBJECT) if node.hasNonNull("Duration") =>
 62 |       val timeUnit = node.get("Duration")
 63 |       Field(name, Duration(timeUnit.textValue()))
 64 | 
 65 |     // For DateTime Type
 66 |     case (name, node, _ @JsonNodeType.OBJECT) if node.hasNonNull("Timestamp") =>
 67 |       node.get("Timestamp").elements().asScala.map(_.asText(null)).toSeq match {
 68 |         case Seq(tu, tz) =>
 69 |           Field(name, DateTimeType(tu, tz))
 70 |         case _ =>
 71 |           Field(name, DateTimeType(null, null))
 72 |       }
 73 | 
 74 |     // For (Nested) List Type
 75 |     case (name, node, _ @JsonNodeType.OBJECT)
 76 |         if node.hasNonNull("List") || node.hasNonNull("LargeList") =>
 77 |       Seq(node.get("List"), node.get("LargeList"))
 78 |         .map(Option(_))
 79 |         .collectFirst { case Some(v) => v } match {
 80 |         case Some(listNode) =>
 81 |           val listNodeType = listNode.get("dtype")
 82 |           Field(name, ListType(toField((name, listNodeType, listNodeType.getNodeType)).dataType))
 83 | 
 84 |         case None =>
 85 |           throw new IllegalArgumentException("Invalid list cannot be parsed as a JSON.")
 86 |       }
 87 | 
 88 |     // For (Nested) Struct Type
 89 |     case (name, node, _ @JsonNodeType.OBJECT) if node.has("Struct") =>
 90 |       val structNode = node.get("Struct")
 91 |       val structFields = structNode.iterator().asScala
 92 |       val sf = structFields.map {
 93 |         case node: JsonNode if node.hasNonNull("name") && node.hasNonNull("dtype") =>
 94 |           val structFieldName: String = node.get("name").textValue()
 95 |           val structFieldType: JsonNode = node.get("dtype")
 96 | 
 97 |           Field(
 98 |             structFieldName,
 99 |             toField(name, structFieldType, structFieldType.getNodeType).dataType
100 |           )
101 | 
102 |         case _ =>
103 |           throw new IllegalArgumentException("Invalid struct cannot be parsed as a JSON.")
104 |       }.toArray
105 | 
106 |       Field(name, StructType(sf))
107 | 
108 |     case _ =>
109 |       throw new IllegalArgumentException("Invalid field cannot be parsed as a JSON.")
110 |   }
111 | 
112 |   private def setFields(fields: Array[Field]): Schema = {
113 |     fields match {
114 |       case f if f == null || f.isEmpty =>
115 |         throw new IllegalArgumentException("Provided fields cannot be null or empty.")
116 | 
117 |       case _ =>
118 |         _fields = fields
119 |     }
120 | 
121 |     this
122 |   }
123 | 
124 |   private def deserialize(json: String): Schema = {
125 |     Try(jsonMapper.reader.readTree(json)).toOption match {
126 |       case None =>
127 |         throw new IllegalArgumentException("Provided schema string cannot be parsed as a JSON.")
128 | 
129 |       case Some(node: JsonNode) if node.hasNonNull("fields") =>
130 |         val fields = node.get("fields").elements().asScala.toList
131 |         _fields = fields
132 |           .map(f =>
133 |             toField(f.get("name").textValue(), f.get("dtype"), f.get("dtype").getNodeType)
134 |           )
135 |           .toArray
136 | 
137 |       case _ =>
138 |         throw new IllegalArgumentException("Provided schema string is an invalid JSON.")
139 |     }
140 | 
141 |     this
142 |   }
143 | 
144 |   /** Borrowed from Apache Spark source to represent Schema as a tree string. */
145 |   private[polars] def treeString: String = {
146 |     val stringBuffer = new StringBuffer()
147 |     stringBuffer.append("root\n")
148 |     val prefix = " |"
149 |     getFields.foreach(field => field.buildFormattedString(prefix, stringBuffer))
150 | 
151 |     stringBuffer.toString
152 |   }
153 | 
154 | }
155 | 
156 | object Schema {
157 |   def fromString(jsonString: String): Schema = new Schema().deserialize(jsonString)
158 | 
159 |   def fromFields(fields: Array[Field]): Schema = new Schema().setFields(fields)
160 | }
161 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/config/Config.scala:
--------------------------------------------------------------------------------
  1 | package org.polars.scala.polars.config
  2 | 
  3 | import java.io.File
  4 | import java.nio.charset.StandardCharsets
  5 | import java.nio.file.{Files, Paths, StandardOpenOption}
  6 | 
  7 | import scala.jdk.CollectionConverters._
  8 | 
  9 | import org.polars.scala.polars.internal.jni.common
 10 | import org.polars.scala.polars.jsonMapper
 11 | 
 12 | private case class ConfigExport(
 13 |     environment: Map[String, String],
 14 |     local: Map[String, String] = Map.empty[String, String]
 15 | )
 16 | 
 17 | class Config private (val options: Map[String, String]) {
 18 | 
 19 |   class ConfigUpdateBuilder private[config] () {
 20 |     private[this] val options = new java.util.HashMap[String, String]()
 21 | 
 22 |     /** Sets a configs from a Java Map.
 23 |       *
 24 |       * For more details, see
 25 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/config.html this.]]
 26 |       */
 27 |     def withOptions(opts: java.util.Map[String, String]): ConfigUpdateBuilder = synchronized {
 28 |       withOptions(opts.asScala)
 29 | 
 30 |       this
 31 |     }
 32 | 
 33 |     /** Sets a configs from an Iterable of key and value pairs.
 34 |       *
 35 |       * For more details, see
 36 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/config.html this.]]
 37 |       */
 38 |     def withOptions(opts: Iterable[(String, String)]): ConfigUpdateBuilder = synchronized {
 39 |       opts.foreach { case (key, value) => withOption(key, value) }
 40 | 
 41 |       this
 42 |     }
 43 | 
 44 |     /** Sets a config option from a key and value pair.
 45 |       *
 46 |       * For more details, see
 47 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/config.html this]] and
 48 |       * [[https://github.com/pola-rs/polars/blob/d3f4d63d6fcd02e4bddb445dc24ad8533f8b069d/py-polars/polars/config.py#L24 this]].
 49 |       */
 50 |     def withOption(key: String, value: String): ConfigUpdateBuilder = synchronized {
 51 |       (key, value) match {
 52 |         case (_, null) | (null, _) | (null, null) =>
 53 |           throw new IllegalArgumentException("Config key or value cannot be null or empty.")
 54 | 
 55 |         case (k, v) =>
 56 |           options.put(k.trim, v.trim)
 57 |           this
 58 |       }
 59 |     }
 60 | 
 61 |     /** Sets a configs from an existing file. */
 62 |     def fromPath(path: String): ConfigUpdateBuilder = synchronized {
 63 |       val configFile = new File(path)
 64 | 
 65 |       if (!configFile.exists() || !configFile.isFile)
 66 |         throw new IllegalArgumentException("Provided path must point to an existing file.")
 67 | 
 68 |       fromPath(configFile)
 69 |     }
 70 | 
 71 |     /** Sets a configs from an existing file. */
 72 |     def fromPath(file: File): ConfigUpdateBuilder = synchronized {
 73 |       val content: String =
 74 |         new String(Files.readAllBytes(Paths.get(file.toURI)), StandardCharsets.UTF_8)
 75 | 
 76 |       fromString(content)
 77 |     }
 78 | 
 79 |     /** Sets a configs from a JSON config string. */
 80 |     def fromString(configStr: String): ConfigUpdateBuilder = synchronized {
 81 |       val config = jsonMapper.readValue(configStr, classOf[ConfigExport])
 82 | 
 83 |       withOptions(config.environment)
 84 |       this
 85 |     }
 86 | 
 87 |     /** Set table formatting style.
 88 |       *
 89 |       * For more details, see
 90 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.Config.set_tbl_formatting.html this.]]
 91 |       */
 92 |     def withTableFormatting(format: TableFormats.TableFormat): ConfigUpdateBuilder =
 93 |       synchronized {
 94 |         options.put("POLARS_FMT_TABLE_FORMATTING", format.toString)
 95 |         this
 96 |       }
 97 | 
 98 |     /** Set the max number of columns used to print tables.
 99 |       *
100 |       * If n < 0, then print all the columns.
101 |       *
102 |       * For more details, see
103 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.Config.set_tbl_cols.html this.]]
104 |       */
105 |     def withMaxTableColumns(nCols: Int): ConfigUpdateBuilder = synchronized {
106 |       options.put("POLARS_FMT_MAX_COLS", nCols.toString)
107 |       this
108 |     }
109 | 
110 |     /** Set the max number of rows used to print tables.
111 |       *
112 |       * If n < 0, then print all the rows.
113 |       *
114 |       * For more details, see
115 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.Config.set_tbl_rows.html this.]]
116 |       */
117 |     def withMaxTableRows(nRows: Int): ConfigUpdateBuilder = synchronized {
118 |       options.put("POLARS_FMT_MAX_ROWS", nRows.toString)
119 |       this
120 |     }
121 | 
122 |     /** Print the dataframe shape below the dataframe when displaying tables.
123 |       *
124 |       * For more details, see
125 |       * [[https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.Config.set_tbl_dataframe_shape_below.html this.]]
126 |       */
127 |     def withDataFrameShapeBelow(active: Boolean): ConfigUpdateBuilder = synchronized {
128 |       options.put("POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW", if (active) "1" else "0")
129 |       this
130 |     }
131 | 
132 |     /** Clear the current state of config. */
133 |     def reset(): ConfigUpdateBuilder = {
134 |       options.clear()
135 |       this
136 |     }
137 | 
138 |     /** Applies current configuration in a persistent way. */
139 |     def apply(): Boolean = synchronized {
140 |       Config.updateConfig(new Config(options.asScala.toMap))
141 |       common.setConfigs(options)
142 |     }
143 |   }
144 | 
145 |   /** Creates a builder for Polars [[Config]]. */
146 |   def update(): ConfigUpdateBuilder = new ConfigUpdateBuilder()
147 | 
148 |   /** Save the config to a specified path as a JSON config string. */
149 |   def saveTo(path: String, overwrite: Boolean): Unit = {
150 |     val configFile = new File(path)
151 | 
152 |     saveTo(configFile, overwrite)
153 |   }
154 | 
155 |   /** Save the config to a specified path as a JSON config string. */
156 |   def saveTo(path: File, overwrite: Boolean): Unit = synchronized {
157 |     val configStr = this.toString
158 | 
159 |     if (path.exists() && path.isDirectory)
160 |       throw new IllegalArgumentException("Provided path points to an existing directory.")
161 | 
162 |     val openOption =
163 |       if (overwrite) Nil else Seq(StandardOpenOption.CREATE_NEW)
164 | 
165 |     Files.write(
166 |       Paths.get(path.toURI),
167 |       s"$configStr\n".getBytes(StandardCharsets.UTF_8),
168 |       openOption: _*
169 |     )
170 |   }
171 | 
172 |   override def toString: String =
173 |     jsonMapper.writeValueAsString(ConfigExport(environment = options))
174 | }
175 | 
176 | object Config {
177 | 
178 |   private var _instance: Config = _
179 | 
180 |   private[polars] def updateConfig(config: Config): Unit = synchronized {
181 |     _instance = config
182 |   }
183 | 
184 |   private[polars] def getConfig: Config = synchronized {
185 |     Option(_instance) match {
186 |       case None =>
187 |         _instance = new Config(Map.empty[String, String])
188 | 
189 |       case _ =>
190 |     }
191 | 
192 |     _instance
193 |   }
194 | }
195 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/config/constants.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.config
 2 | 
 3 | object TableFormats extends Enumeration {
 4 |   type TableFormat = Value
 5 | 
 6 |   val NOTHING, ASCII_FULL, ASCII_FULL_CONDENSED, ASCII_NO_BORDERS, ASCII_BORDERS_ONLY,
 7 |       ASCII_BORDERS_ONLY_CONDENSED, ASCII_HORIZONTAL_ONLY, ASCII_MARKDOWN, UTF8_FULL,
 8 |       UTF8_FULL_CONDENSED, UTF8_NO_BORDERS, UTF8_BORDERS_ONLY, UTF8_HORIZONTAL_ONLY = Value
 9 | }
10 | 
11 | object UniqueKeepStrategies extends Enumeration {
12 |   type UniqueKeepStrategy = Value
13 | 
14 |   val first, last, any, none = Value
15 | }
16 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/functions.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars
 2 | 
 3 | import java.time.format.DateTimeFormatter
 4 | import java.time.{LocalDate, LocalTime, ZonedDateTime}
 5 | 
 6 | import org.polars.scala.polars.api.expressions.{Column, Expression}
 7 | import org.polars.scala.polars.internal.jni.expressions.{column_expr, literal_expr}
 8 | 
 9 | object functions {
10 | 
11 |   def col(name: String): Column = Column.from(name)
12 | 
13 |   def lit(value: Any): Expression = {
14 |     val ptr = value match {
15 |       case null => literal_expr.nullLit()
16 |       case v: Expression => v.ptr
17 |       case v: Boolean => literal_expr.fromBool(v)
18 |       case v: Int => literal_expr.fromInt(v)
19 |       case v: Long => literal_expr.fromLong(v)
20 |       case v: Float => literal_expr.fromFloat(v)
21 |       case v: Double => literal_expr.fromDouble(v)
22 |       case v: LocalDate =>
23 |         literal_expr.fromDate(java.time.format.DateTimeFormatter.ISO_LOCAL_DATE.format(v))
24 |       case v: LocalTime =>
25 |         literal_expr.fromTime(java.time.format.DateTimeFormatter.ISO_LOCAL_TIME.format(v))
26 |       case v: ZonedDateTime =>
27 |         literal_expr.fromDateTime(
28 |           java.time.format.DateTimeFormatter.ISO_LOCAL_DATE_TIME.format(v)
29 |         )
30 |       case v: String => literal_expr.fromString(v)
31 |       case _ =>
32 |         throw new IllegalArgumentException(
33 |           s"Unsupported value `$value` of type `${value.getClass.getSimpleName}` was provided."
34 |         )
35 |     }
36 | 
37 |     Expression.withPtr(ptr)
38 |   }
39 | 
40 |   def desc(col_name: String): Expression = {
41 |     val ptr = column_expr.sort_column_by_name(col_name, descending = true)
42 |     Expression.withPtr(ptr)
43 |   }
44 | 
45 |   def asc(col_name: String): Expression = {
46 |     val ptr = column_expr.sort_column_by_name(col_name, descending = false)
47 |     Expression.withPtr(ptr)
48 |   }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/Natively.scala:
--------------------------------------------------------------------------------
1 | package org.polars.scala.polars.internal.jni
2 | 
3 | import org.polars.scala.polars.loadLibraryIfRequired
4 | 
5 | private[jni] trait Natively { loadLibraryIfRequired() }
6 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/common.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni
 2 | 
 3 | private[polars] object common extends Natively {
 4 | 
 5 |   @native def version(): String
 6 | 
 7 |   @native def setConfigs(options: java.util.Map[String, String]): Boolean
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/data_frame.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni
 2 | 
 3 | private[polars] object data_frame extends Natively {
 4 | 
 5 |   @native def concatDataFrames(ptrs: Array[Long]): Long
 6 | 
 7 |   @native def schemaString(ptr: Long): String
 8 | 
 9 |   @native def toLazy(ptr: Long): Long
10 | 
11 |   @native def show(ptr: Long): Unit
12 | 
13 |   @native def count(ptr: Long): Long
14 | 
15 |   @native def limit(ptr: Long, n: Long): Long
16 | 
17 |   @native def tail(ptr: Long, n: Long): Long
18 | 
19 |   @native def fromSeries(ptrs: Array[Long]): Long
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/expressions/column_expr.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni.expressions
 2 | 
 3 | import org.polars.scala.polars.internal.jni.Natively
 4 | 
 5 | private[polars] object column_expr extends Natively {
 6 | 
 7 |   @native def column(name: String): Long
 8 | 
 9 |   @native def sort_column_by_name(name: String, descending: Boolean): Long
10 | 
11 |   @native def applyUnary(ptr: Long, op: Int): Long
12 | 
13 |   @native def applyBinary(leftPtr: Long, rightPtr: Long, op: Int): Long
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/expressions/literal_expr.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni.expressions
 2 | 
 3 | import org.polars.scala.polars.internal.jni.Natively
 4 | 
 5 | private[polars] object literal_expr extends Natively {
 6 | 
 7 |   @native def nullLit(): Long
 8 | 
 9 |   @native def fromString(value: String): Long
10 | 
11 |   @native def fromBool(value: Boolean): Long
12 | 
13 |   @native def fromInt(value: Int): Long
14 | 
15 |   @native def fromLong(value: Long): Long
16 | 
17 |   @native def fromFloat(value: Float): Long
18 | 
19 |   @native def fromDouble(value: Double): Long
20 | 
21 |   @native def fromDate(value: String): Long
22 | 
23 |   @native def fromTime(value: String): Long
24 | 
25 |   @native def fromDateTime(value: String): Long
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/io/scan.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni.io
 2 | 
 3 | import org.polars.scala.polars.internal.jni.Natively
 4 | 
 5 | private[polars] object scan extends Natively {
 6 | 
 7 |   @native def scanParquet(paths: Array[String], options: String): Long
 8 | 
 9 |   @native def scanIPC(paths: Array[String], options: String): Long
10 | 
11 |   @native def scanCSV(paths: Array[String], options: String): Long
12 | 
13 |   @native def scanJsonLines(paths: Array[String], options: String): Long
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/io/write.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni.io
 2 | 
 3 | import org.polars.scala.polars.internal.jni.Natively
 4 | 
 5 | object write extends Natively {
 6 | 
 7 |   @native def writeParquet(
 8 |       ptr: Long,
 9 |       filePath: String,
10 |       options: String
11 |   ): Unit
12 | 
13 |   @native def writeIPC(
14 |       ptr: Long,
15 |       filePath: String,
16 |       options: String
17 |   ): Unit
18 | 
19 |   @native def writeAvro(
20 |       ptr: Long,
21 |       filePath: String,
22 |       options: String
23 |   ): Unit
24 | 
25 |   @native def writeCSV(
26 |       ptr: Long,
27 |       filePath: String,
28 |       options: String
29 |   ): Unit
30 | 
31 |   @native def writeJson(
32 |       ptr: Long,
33 |       filePath: String,
34 |       options: String
35 |   ): Unit
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/lazy_frame.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni
 2 | 
 3 | private[polars] object lazy_frame extends Natively {
 4 | 
 5 |   @native def concatLazyFrames(ptrs: Array[Long], reChunk: Boolean, parallel: Boolean): Long
 6 | 
 7 |   @native def schemaString(ptr: Long): String
 8 | 
 9 |   @native def selectFromStrings(ptr: Long, cols: Array[String]): Long
10 | 
11 |   @native def selectFromExprs(ptr: Long, exprs: Array[Long]): Long
12 | 
13 |   @native def filterFromExprs(ldfPtr: Long, exprPtr: Long): Long
14 | 
15 |   @native def limit(ptr: Long, n: Long): Long
16 | 
17 |   @native def tail(ptr: Long, n: Long): Long
18 | 
19 |   @native def drop(ptr: Long, cols: Array[String]): Long
20 | 
21 |   @native def drop_nulls(ptr: Long, subset: Array[String]): Long
22 | 
23 |   @native def rename(ptr: Long, mapping: java.util.Map[String, String]): Long
24 | 
25 |   @native def sortFromExprs(
26 |       ldfPtr: Long,
27 |       exprPtrs: Array[Long],
28 |       nullLast: Array[Boolean],
29 |       maintainOrder: Boolean
30 |   ): Long
31 | 
32 |   @native def topKFromExprs(
33 |       ldfPtr: Long,
34 |       k: Int,
35 |       exprPtrs: Array[Long],
36 |       nullLast: Array[Boolean],
37 |       maintainOrder: Boolean
38 |   ): Long
39 | 
40 |   @native def withColumn(ldfPtr: Long, name: String, exprPtr: Long): Long
41 | 
42 |   @native def unique(ptr: Long, subset: Array[String], keep: String, maintainOrder: Boolean): Long
43 | 
44 |   @native def explain(ptr: Long, optimized: Boolean, tree_format: Boolean): String
45 | 
46 |   @native def set_sorted(ptr: Long, mapping: java.util.Map[String, Boolean]): Long
47 | 
48 |   @native def cache(ptr: Long): Long
49 | 
50 |   @native def collect(ptr: Long): Long
51 | 
52 |   @native def optimization_toggle(
53 |       ptr: Long,
54 |       typeCoercion: Boolean,
55 |       predicatePushdown: Boolean,
56 |       projectionPushdown: Boolean,
57 |       simplifyExpr: Boolean,
58 |       slicePushdown: Boolean,
59 |       commSubplanElim: Boolean,
60 |       commSubexprElim: Boolean,
61 |       streaming: Boolean
62 |   ): Long
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/row.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni
 2 | 
 3 | private[polars] object row extends Natively {
 4 | 
 5 |   @native def createIterator(dfPtr: Long, nRows: Long): Long
 6 | 
 7 |   @native def advanceIterator(ptr: Long): Array[Object]
 8 | 
 9 |   @native def schemaString(ptr: Long): String
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/internal/jni/series.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala.polars.internal.jni
 2 | 
 3 | private[polars] object series extends Natively {
 4 | 
 5 |   @native def show(ptr: Long): Unit
 6 | 
 7 |   @native def new_str_series(name: String, data: Array[String]): Long
 8 | 
 9 |   @native def new_int_series(name: String, data: Array[Int]): Long
10 | 
11 |   @native def new_float_series(name: String, data: Array[Float]): Long
12 | 
13 |   @native def new_double_series(name: String, data: Array[Double]): Long
14 | 
15 |   @native def new_long_series(name: String, data: Array[Long]): Long
16 | 
17 |   @native def new_boolean_series(name: String, data: Array[Boolean]): Long
18 | 
19 |   @native def new_date_series(name: String, data: Array[String]): Long
20 | 
21 |   @native def new_datetime_series(name: String, data: Array[String]): Long
22 | 
23 |   @native def new_time_series(name: String, data: Array[String]): Long
24 | 
25 |   @native def new_list_series(name: String, ptrs: Array[Long]): Long
26 | 
27 |   @native def new_struct_series(name: String, ptrs: Array[Long]): Long
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/core/src/main/scala/org/polars/scala/polars/package.scala:
--------------------------------------------------------------------------------
 1 | package org.polars.scala
 2 | 
 3 | import java.util.concurrent.atomic.AtomicReference
 4 | 
 5 | import scala.util.{Failure, Success, Try}
 6 | 
 7 | import com.fasterxml.jackson.databind.SerializationFeature
 8 | import com.fasterxml.jackson.databind.json.JsonMapper
 9 | import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
10 | import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
11 | 
12 | package object polars {
13 | 
14 |   private final val NATIVE_LIB_NAME = "scala_polars"
15 | 
16 |   private[polars] val libraryLoaded =
17 |     new AtomicReference[LibraryStates.LibraryState](LibraryStates.NOT_LOADED)
18 | 
19 |   final val jsonMapper =
20 |     JsonMapper
21 |       .builder()
22 |       .addModules(
23 |         DefaultScalaModule,
24 |         new JavaTimeModule()
25 |       )
26 |       .build() :: ClassTagExtensions
27 | 
28 |   jsonMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS)
29 | 
30 |   private[polars] def loadLibraryIfRequired(): Unit = {
31 |     if (libraryLoaded.get() == LibraryStates.LOADED)
32 |       return
33 | 
34 |     if (libraryLoaded.compareAndSet(LibraryStates.NOT_LOADED, LibraryStates.LOADING)) {
35 |       Try(NativeLoader.load(NATIVE_LIB_NAME)) match {
36 |         case Success(_) =>
37 |           libraryLoaded.set(LibraryStates.LOADED)
38 | 
39 |         case Failure(e) =>
40 |           libraryLoaded.set(LibraryStates.NOT_LOADED)
41 |           throw new RuntimeException(s"Unable to load the `$NATIVE_LIB_NAME` native library.", e)
42 |       }
43 | 
44 |       return
45 |     }
46 | 
47 |     while (libraryLoaded.get() == LibraryStates.LOADING)
48 |       Thread.sleep(10)
49 |   }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/core/src/site/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>Project Documentation</title>
 6 |     <script language="JavaScript">
 7 |         <!--
 8 |         function doRedirect()
 9 |         {
10 |             window.location.replace("api/latest");
11 |         }
12 | 
13 |         doRedirect();
14 |         //-->
15 |     </script>
16 | </head>
17 | <body>
18 | <a href="api/latest">Go to the project documentation
19 | </a>
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/InstantiateDataFrame.java:
--------------------------------------------------------------------------------
 1 | package examples.java;
 2 | 
 3 | import java.util.Arrays;
 4 | import org.polars.scala.polars.api.DataFrame;
 5 | import org.polars.scala.polars.api.Series;
 6 | 
 7 | public class InstantiateDataFrame {
 8 | 
 9 |   public static void main(String[] args) {
10 |     DataFrame.fromSeries(Series.ofBoolean("bool_col", new boolean[] {true, false, true})).show();
11 | 
12 |     DataFrame.fromSeries(
13 |             Series.ofInt("i32_col", new int[] {1, 2, 3}),
14 |             Series.ofLong("i64_col", new long[] {1L, 2L, 3L}),
15 |             Series.ofBoolean("bool_col", new boolean[] {true, false, true}),
16 |             Series.ofList(
17 |                 "nested_str_col",
18 |                 new String[][] {
19 |                   {"a", "b", "c"},
20 |                   {"a", "b", "c"},
21 |                   {"a", "b", "c"},
22 |                 }))
23 |         .show();
24 | 
25 |     /* Values as Java array(s) */
26 | 
27 |     DataFrame.fromSeries(
28 |             Series.ofInt("i32_col", new int[] {1, 2, 3}),
29 |             new Series[] {
30 |               Series.ofLong("i64_col", new long[] {1L, 2L, 3L}),
31 |               Series.ofBoolean("bool_col", new boolean[] {true, false, true}),
32 |               Series.ofList(
33 |                   "nested_str_col",
34 |                   new String[][] {
35 |                     {"a", "b", "c"},
36 |                     {"a", "b", "c"},
37 |                     {"a", "b", "c"},
38 |                   }),
39 |             })
40 |         .show();
41 | 
42 |     DataFrame.fromSeries(
43 |             Series.ofInt("i32_col", new Integer[] {1, 2, 3}),
44 |             new Series[] {
45 |               Series.ofLong("i64_col", new Long[] {1L, 2L, 3L}),
46 |               Series.ofBoolean("bool_col", new Boolean[] {true, false, true}),
47 |               Series.ofFloat("f32_col", new Float[] {1F, 2F, 3F}),
48 |             })
49 |         .show();
50 | 
51 |     /* Values as Java lists(s) */
52 | 
53 |     DataFrame.fromSeries(
54 |             Series.ofInt("i32_col", Arrays.asList(1, 2, 3)),
55 |             new Series[] {
56 |               Series.ofLong("i64_col", Arrays.asList(1L, 2L, 3L)),
57 |               Series.ofBoolean("bool_col", Arrays.asList(true, false, true)),
58 |               Series.ofFloat("f32_col", Arrays.asList(1F, 2F, 3F)),
59 |             })
60 |         .show();
61 | 
62 |     /* Values as a mix of Java lists(s) and array(s) */
63 | 
64 |     DataFrame.fromSeries(
65 |             Series.ofInt("i32_col", Arrays.asList(1, 2, 3)),
66 |             new Series[] {
67 |               Series.ofLong("i64_col", new Long[] {1L, 2L, 3L}),
68 |               Series.ofBoolean("bool_col", new Boolean[] {true, false, true}),
69 |               Series.ofFloat("f32_col", Arrays.asList(1F, 2F, 3F)),
70 |             })
71 |         .show();
72 | 
73 |     DataFrame.fromSeries(
74 |             Series.ofInt("i32_col", Arrays.asList(1, 2, 3)),
75 |             new Series[] {
76 |               Series.ofLong("i64_col", new Long[] {1L, 2L, 3L}),
77 |               Series.ofBoolean("bool_col", new Boolean[] {true, false, true}),
78 |               Series.ofSeries(
79 |                   "struct_col",
80 |                   new Series[] {
81 |                     Series.ofLong("i64_col", new Long[] {1L, 2L, 3L}),
82 |                     Series.ofBoolean("bool_col", new Boolean[] {true, false, true}),
83 |                     Series.ofFloat("f32_col", Arrays.asList(1F, 2F, 3F)),
84 |                   }),
85 |             })
86 |         .show();
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/InstantiateSeries.java:
--------------------------------------------------------------------------------
  1 | package examples.java;
  2 | 
  3 | import java.util.Arrays;
  4 | import java.util.Collections;
  5 | import org.polars.scala.polars.api.Series;
  6 | 
  7 | public class InstantiateSeries {
  8 |   public static void main(String[] args) {
  9 | 
 10 |     /* Values as Java array/ list of Basic Types */
 11 | 
 12 |     // int or java.lang.Integer
 13 |     Series.ofInt("series_i32_java_array_primitive", new int[] {1, 2, 3}).show();
 14 |     Series.ofInt("series_i32_java_array", new java.lang.Integer[] {1, 2, 3}).show();
 15 |     Series.ofInt("series_i32_java_list", Arrays.asList(1, 2, 3)).show();
 16 | 
 17 |     // long or java.lang.Long
 18 |     Series.ofLong("series_i64_java_array_primitive", new long[] {1L, 2L, 3L}).show();
 19 |     Series.ofLong("series_i64_java_array", new java.lang.Long[] {1L, 2L, 3L}).show();
 20 |     Series.ofLong("series_i64_java_list", Arrays.asList(1L, 2L, 3L)).show();
 21 | 
 22 |     // float or java.lang.Float
 23 |     Series.ofFloat("series_f32_java_array_primitive", new float[] {1f, 2f, 3f}).show();
 24 |     Series.ofFloat("series_f32_java_array", new java.lang.Float[] {1f, 2f, 3f}).show();
 25 |     Series.ofFloat("series_f32_java_list", Arrays.asList(1f, 2f, 3f)).show();
 26 | 
 27 |     // double or java.lang.Double
 28 |     Series.ofDouble("series_f64_java_array_primitive", new double[] {1d, 2d, 3d}).show();
 29 |     Series.ofDouble("series_f64_java_array", new java.lang.Double[] {1d, 2d, 3d}).show();
 30 |     Series.ofDouble("series_f64_java_list", Arrays.asList(1d, 2d, 3d)).show();
 31 | 
 32 |     // boolean or java.lang.Boolean
 33 |     Series.ofBoolean("series_bool_java_array_primitive", new boolean[] {true, false, true, true})
 34 |         .show();
 35 |     Series.ofBoolean("series_bool_java_array", new java.lang.Boolean[] {true, false, true, true})
 36 |         .show();
 37 |     Series.ofBoolean("series_bool_java_list", Arrays.asList(true, false, true, true)).show();
 38 | 
 39 |     // String
 40 |     Series.ofString("series_str_java_array_primitive", new String[] {"a", "b"}).show();
 41 |     Series.ofString("series_str_java_list", Arrays.asList("a", "b")).show();
 42 | 
 43 |     // java.time.LocalDate
 44 |     Series.ofDate(
 45 |             "series_date_java_array_primitive",
 46 |             new java.time.LocalDate[] {java.time.LocalDate.now()})
 47 |         .show();
 48 |     Series.ofDate("series_date_java_list", Collections.singletonList(java.time.LocalDate.now()))
 49 |         .show();
 50 | 
 51 |     // java.time.LocalTime
 52 |     Series.ofTime(
 53 |             "series_time_java_array_primitive",
 54 |             new java.time.LocalTime[] {java.time.LocalTime.now()})
 55 |         .show();
 56 |     Series.ofTime("series_time_java_list", Collections.singletonList(java.time.LocalTime.now()))
 57 |         .show();
 58 | 
 59 |     // java.time.ZonedDateTime
 60 |     Series.ofDateTime(
 61 |             "series_datetime_java_array_primitive",
 62 |             new java.time.ZonedDateTime[] {java.time.ZonedDateTime.now()})
 63 |         .show();
 64 |     Series.ofDateTime(
 65 |             "series_datetime_java_list", Collections.singletonList(java.time.ZonedDateTime.now()))
 66 |         .show();
 67 | 
 68 |     /* Values as Java array/ list of Nested List Types */
 69 | 
 70 |     // int or java.lang.Integer
 71 |     Series.ofList("series_list_int_java_array", new java.lang.Integer[][] {{1, 2, 3}}).show();
 72 |     Series.ofList("series_list_int_java_list", Collections.singletonList(Arrays.asList(1, 2, 3)))
 73 |         .show();
 74 | 
 75 |     // String
 76 |     Series.ofList("series_list_str_java_array", new String[][] {{"a", "b"}}).show();
 77 |     Series.ofList("series_list_str_java_list", Collections.singletonList(Arrays.asList("a", "b")))
 78 |         .show();
 79 | 
 80 |     // Deep Nested
 81 |     Series.ofList("series_list_list_str_java_array", new String[][][] {{{"a", "b"}}}).show();
 82 |     Series.ofList(
 83 |             "series_list_list_str_java_list",
 84 |             Collections.singletonList(Collections.singletonList(Arrays.asList("a", "b"))))
 85 |         .show();
 86 | 
 87 |     /* Values as Java array/ list of Struct Types */
 88 | 
 89 |     Series.ofSeries(
 90 |             "series_struct_java_array",
 91 |             new Series[] {
 92 |               Series.ofInt("int_col", new int[] {1, 2, 3}),
 93 |               Series.ofString("str_col", new String[] {"a", "b", "c"}),
 94 |               Series.ofBoolean("bool_col", new boolean[] {true, false, true}),
 95 |             })
 96 |         .show();
 97 |     Series.ofSeries(
 98 |             "series_struct_java_list",
 99 |             Arrays.asList(
100 |                 Series.ofInt("int_col", Arrays.asList(1, 2, 3)),
101 |                 Series.ofString("str_col", Arrays.asList("a", "b", "c")),
102 |                 Series.ofBoolean("bool_col", Arrays.asList(true, false, true))))
103 |         .show();
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/configuration/ConfiguringPolars.java:
--------------------------------------------------------------------------------
 1 | package examples.java.configuration;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.nio.file.Files;
 6 | import java.nio.file.Path;
 7 | import java.util.HashMap;
 8 | import org.polars.scala.polars.Polars;
 9 | 
10 | public class ConfiguringPolars {
11 | 
12 |   public static void main(String[] args) throws IOException {
13 | 
14 |     /* Checking the version scala-polars is compiled against. */
15 |     String version = Polars.version();
16 |     System.out.printf("scala-polars has been compiled against version '%s'%n%n", version);
17 | 
18 |     /* Get default configuration. */
19 |     System.out.printf("Default Configuration:%n%s%n%n", Polars.config());
20 | 
21 |     /* Updating configuration. */
22 | 
23 |     /* Update the number of rows shown while doing `df.show()` */
24 |     Polars.config().update().withMaxTableRows(20).apply();
25 |     System.out.printf("After updating number of rows:%n%s%n%n", Polars.config());
26 | 
27 |     /* Update the number of columns shown while doing `df.show()` */
28 |     Polars.config().update().withMaxTableColumns(20).apply();
29 |     System.out.printf("After updating number of columns:%n%s%n%n", Polars.config());
30 | 
31 |     /* Reset config */
32 |     Polars.config().update().reset().apply();
33 |     System.out.printf("After resetting config:%n%s%n%n", Polars.config());
34 | 
35 |     /* Chaining configuration options */
36 |     HashMap<String, String> options = new HashMap<>();
37 |     options.put("POLARS_TABLE_WIDTH", "5000");
38 | 
39 |     Polars.config()
40 |         .update()
41 |         .withMaxTableRows(20)
42 |         .withMaxTableColumns(20)
43 |         .withOption("POLARS_FMT_TABLE_CELL_ALIGNMENT", "RIGHT")
44 |         .withOptions(options)
45 |         .apply();
46 | 
47 |     System.out.printf("After chained configs:%n%s%n%n", Polars.config());
48 | 
49 |     /* Persisting current configuration to file */
50 |     Path tempDirectory = Files.createTempDirectory("polars-config-");
51 |     File tempFile = Files.createTempFile(tempDirectory, "temp-polars-config-", "plcfg").toFile();
52 |     Polars.config().saveTo(tempFile, true);
53 | 
54 |     /* Reloading current configuration to file */
55 |     Polars.config().update().reset().apply();
56 |     System.out.printf("After resetting config:%n%s%n%n", Polars.config());
57 | 
58 |     Polars.config().update().fromPath(tempFile).apply();
59 |     System.out.printf("After reloading config from file path:%n%s%n", Polars.config());
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/expressions/ApplyingSimpleExpressions.java:
--------------------------------------------------------------------------------
 1 | package examples.java.expressions;
 2 | 
 3 | import static org.polars.scala.polars.functions.*;
 4 | 
 5 | import examples.scala.utils.CommonUtils;
 6 | import java.util.Collections;
 7 | import java.util.Random;
 8 | import org.polars.scala.polars.Polars;
 9 | import org.polars.scala.polars.api.DataFrame;
10 | import org.polars.scala.polars.api.LazyFrame;
11 | 
12 | public class ApplyingSimpleExpressions {
13 | 
14 |   public static void main(String[] args) {
15 |     /* Read a dataset as a DataFrame lazily or eagerly */
16 |     String path = CommonUtils.getResource("/files/web-ds/data.json");
17 |     LazyFrame input = Polars.scan().jsonLines(path);
18 | 
19 |     /* Apply multiple operations on the LazyFrame or DataFrame */
20 |     LazyFrame ldf =
21 |         input
22 |             .cache()
23 |             .select("id", "name")
24 |             .with_column("lower_than_four", col("id").lessThanEqualTo(4))
25 |             .filter(col("lower_than_four"))
26 |             .with_column("long_value", lit(new Random().nextLong()))
27 |             .with_column("date", lit(java.time.LocalDate.now()))
28 |             .with_column("time", lit(java.time.LocalTime.now()))
29 |             .with_column("current_ts", lit(java.time.ZonedDateTime.now()))
30 |             .sort(asc("name"), true, false)
31 |             .set_sorted(Collections.singletonMap("name", false))
32 |             .top_k(2, "id", true, true, false)
33 |             .limit(2) // .head(2)
34 |             .tail(2)
35 |             .drop("long_value")
36 |             .rename("lower_than_four", "less_than_four")
37 |             .drop_nulls();
38 | 
39 |     ldf = Polars.concat(ldf, new LazyFrame[] {ldf, ldf});
40 |     ldf = ldf.unique();
41 | 
42 |     System.out.println("Showing LazyFrame plan to stdout.");
43 |     ldf.explain();
44 | 
45 |     DataFrame df = ldf.collect();
46 | 
47 |     System.out.println("Showing resultant DataFrame to stdout.");
48 |     df.show();
49 | 
50 |     System.out.printf("Total rows: %s%n%n", df.count());
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/io/LazyAndEagerAPI.java:
--------------------------------------------------------------------------------
 1 | package examples.java.io;
 2 | 
 3 | import examples.scala.utils.CommonUtils;
 4 | import org.polars.scala.polars.Polars;
 5 | import org.polars.scala.polars.api.DataFrame;
 6 | import org.polars.scala.polars.api.LazyFrame;
 7 | import org.polars.scala.polars.api.Row;
 8 | import scala.collection.Iterator;
 9 | 
10 | /**
11 |  * Polars provides 2 API for reading datasets lazily ({@code scan}) or eagerly ({@code read}).
12 |  *
13 |  * <p>These APIs serve different purposes and result in either a {@link LazyFrame} or a {@link
14 |  * DataFrame}. A LazyFrame can be materialized to a DataFrame and vice-versa if required.
15 |  *
16 |  * <p>
17 |  *
18 |  * <h2>Lazy API</h2>
19 |  *
20 |  * With the lazy API Polars doesn't run each query line-by-line but instead processes the full query
21 |  * end-to-end. To get the most out of Polars it is important that you use the lazy API because:
22 |  *
23 |  * <ul>
24 |  *   <li>the lazy API allows Polars to apply automatic query optimization with the query optimizer.
25 |  *   <li>the lazy API allows you to work with larger than memory datasets using streaming.
26 |  *   <li>the lazy API can catch schema errors before processing the data.
27 |  * </ul>
28 |  *
29 |  * <p>More info can be found <a
30 |  * href="https://pola-rs.github.io/polars-book/user-guide/lazy-api/intro.html">here</a>
31 |  *
32 |  * <p>
33 |  *
34 |  * <h2>Eager API</h2>
35 |  *
36 |  * With eager API the queries are executed line-by-line in contrast to the lazy API.
37 |  */
38 | public class LazyAndEagerAPI {
39 | 
40 |   public static void main(String[] args) {
41 |     /* Lazily read data from file based datasets */
42 |     String path = CommonUtils.getResource("/files/web-ds/data.csv");
43 |     LazyFrame ldf = Polars.scan().option("scan_csv_n_rows", "2").csv(path);
44 | 
45 |     /* Materialize LazyFrame to DataFrame */
46 |     DataFrame df = ldf.collect();
47 | 
48 |     System.out.println("Showing CSV file as a DataFrame to stdout");
49 |     df.show();
50 | 
51 |     System.out.printf("Total rows: %s%n%n", df.count());
52 |     System.out.printf("Total columns: %s%n%n", df.schema().getFields().length);
53 | 
54 |     /* Lazily read only first 3 rows */
55 |     df = Polars.scan().option("scan_csv_n_rows", "3").csv(path).collect();
56 |     System.out.printf("Total rows: %s%n%n", df.count());
57 | 
58 |     System.out.println("Rows:");
59 |     Iterator<Row> rows = df.rows();
60 | 
61 |     while (rows.hasNext()) {
62 |       System.out.println(rows.next());
63 |     }
64 |     System.out.println("\n");
65 | 
66 |     /* Convert DataFrame back to LazyFrame */
67 |     LazyFrame backToLdf = df.toLazy();
68 |     System.out.printf("Show schema: %s%n%n", backToLdf.schema());
69 | 
70 |     /* Eagerly read data from file based datasets */
71 |     df = Polars.scan().csv(path).collect();
72 | 
73 |     System.out.println("Showing CSV file as a DataFrame to stdout");
74 |     df.show();
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/io/ReadingFileDatasets.java:
--------------------------------------------------------------------------------
 1 | package examples.java.io;
 2 | 
 3 | import examples.scala.utils.CommonUtils;
 4 | import org.polars.scala.polars.Polars;
 5 | import org.polars.scala.polars.api.DataFrame;
 6 | import org.polars.scala.polars.api.LazyFrame;
 7 | 
 8 | /**
 9 |  * Polars supports various input file formats like the following,
10 |  *
11 |  * <ul>
12 |  *   <li>{@link org.polars.scala.polars.api.io.Scannable#csv CSV} (delimited format like CSV, TSV,
13 |  *       etc.)
14 |  *   <li>{@link org.polars.scala.polars.api.io.Scannable#parquet Apache Parquet}
15 |  *   <li>{@link org.polars.scala.polars.api.io.Scannable#ipc Apache Arrow IPC}
16 |  *   <li>{@link org.polars.scala.polars.api.io.Scannable#jsonLines New line delimited JSON}
17 |  * </ul>
18 |  *
19 |  * <p>All the above formats are compatible with the lazy or eager input API and users can supply 1
20 |  * or more file paths which will be read in parallel to return a {@link LazyFrame} or a {@link
21 |  * DataFrame}.
22 |  *
23 |  * <p>Since each format may have its own additional options (example: delimiter for CSV format),
24 |  * Polars allows a simple builder pattern which can be used to supply these options.
25 |  *
26 |  * <p>While the examples below have been provided for Parquet files only, they also similarly apply
27 |  * on the other supported file formats.
28 |  *
29 |  * <p>Some additional examples may also be found in {@link LazyAndEagerAPI}.
30 |  */
31 | public class ReadingFileDatasets {
32 | 
33 |   public static void main(String[] args) {
34 | 
35 |     /* For one Parquet file */
36 |     String path = CommonUtils.getResource("/files/web-ds/data.parquet");
37 |     DataFrame df = Polars.scan().parquet(path).collect();
38 | 
39 |     System.out.println("Showing parquet file as a DataFrame to stdout.");
40 |     df.show();
41 | 
42 |     System.out.printf("Total rows: %s%n%n", df.count());
43 | 
44 |     /* For multiple Parquet file(s) */
45 |     DataFrame multiLdf = Polars.scan().parquet(path, path, path).collect();
46 | 
47 |     System.out.println("Showing multiple parquet files as 1 DataFrame to stdout.");
48 |     multiLdf.show();
49 |     System.out.printf("Total rows: %s%n%n", multiLdf.count());
50 | 
51 |     /* Providing additional options with Parquet file input */
52 |     DataFrame pqDfWithOpts =
53 |         Polars.scan()
54 |             .option("scan_parquet_low_memory", "true")
55 |             .option("scan_parquet_n_rows", "3")
56 |             .option("scan_parquet_cache", "false")
57 |             .option("scan_parquet_row_index_name", "SerialNum")
58 |             .parquet(path)
59 |             .collect();
60 | 
61 |     System.out.println("Showing parquet file as a DataFrame to stdout.");
62 |     pqDfWithOpts.show();
63 | 
64 |     System.out.printf("Total rows: %s%n%n", pqDfWithOpts.count());
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/examples/src/main/java/examples/java/io/WritingToFileDatasets.java:
--------------------------------------------------------------------------------
 1 | package examples.java.io;
 2 | 
 3 | import examples.scala.utils.CommonUtils;
 4 | import org.polars.scala.polars.Polars;
 5 | import org.polars.scala.polars.api.DataFrame;
 6 | 
 7 | /**
 8 |  * Polars supports various output file formats like the following,
 9 |  *
10 |  * <ul>
11 |  *   <li>{@link org.polars.scala.polars.api.io.Writeable#parquet(String) Apache Parquet}
12 |  *   <li>{@link org.polars.scala.polars.api.io.Writeable#ipc(String) Apache IPC}
13 |  * </ul>
14 |  *
15 |  * <p>A {@link DataFrame} can be written to an object storage as a file in one of the supported
16 |  * formats mentioned above.
17 |  *
18 |  * <p>Since each format and storage may have its own additional options, Polars allows a simple
19 |  * builder pattern which can be used to supply these options.
20 |  *
21 |  * <p>While the examples below have been provided for Parquet files only, they also similarly apply
22 |  * on the other supported file formats.
23 |  */
24 | public class WritingToFileDatasets {
25 | 
26 |   public static void main(String[] args) {
27 | 
28 |     /* Read a dataset as a DataFrame lazily or eagerly */
29 |     String path = CommonUtils.getResource("/files/web-ds/data.ipc");
30 |     DataFrame df = Polars.scan().ipc(path).collect();
31 | 
32 |     System.out.println("Showing ipc file as a DataFrame to stdout.");
33 |     df.show();
34 | 
35 |     System.out.printf("Total rows: %s%n%n", df.count());
36 | 
37 |     /* Write this DataFrame to local filesystem at the provided path */
38 |     String outputPath = CommonUtils.getOutputLocation("output.pq");
39 |     df.write().parquet(outputPath);
40 |     System.out.printf("File written to location: %s%n%n", outputPath);
41 | 
42 |     /* Overwrite output if already exists */
43 |     df.write().option("write_mode", "overwrite").parquet(outputPath);
44 |     System.out.printf("File overwritten at location: %s%n%n", outputPath);
45 | 
46 |     /* Write output file with compression */
47 |     df.write()
48 |         .option("write_compression", "zstd")
49 |         .option("write_mode", "overwrite")
50 |         .option("write_parquet_stats", "full")
51 |         .parquet(outputPath);
52 |     System.out.printf("File overwritten at location: %s with compression%n%n", outputPath);
53 | 
54 |     /* Write output file to Amazon S3 object store */
55 |     String s3Path = "s3://bucket/output.pq";
56 |     df.write()
57 |         .option("write_compression", "zstd")
58 |         .option("write_mode", "overwrite")
59 |         .option("write_parquet_stats", "full")
60 |         .option("aws_default_region", "us‑east‑2")
61 |         .option("aws_access_key_id", "ABC")
62 |         .option("aws_secret_access_key", "XYZ")
63 |         .parquet(s3Path);
64 |     System.out.printf("File overwritten at location: %s with compression%n%n", s3Path);
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/examples/src/main/resources/files/web-ds/data.csv:
--------------------------------------------------------------------------------
 1 | id,name,created_utc,updated_on,comment_karma,link_karma
 2 | 1,truman48lamb_jasonbroken,1397113470,1536527864,0,0
 3 | 2,johnethen06_jasonbroken,1397113483,1536527864,0,0
 4 | 3,yaseinrez_jasonbroken,1397113483,1536527864,0,1
 5 | 4,Valve92_jasonbroken,1397113503,1536527864,0,0
 6 | 5,srbhuyan_jasonbroken,1397113506,1536527864,0,0
 7 | 6,taojianlong_jasonbroken,1397113510,1536527864,4,0
 8 | 7,YourPalGrant92_jasonbroken,1397113513,1536527864,0,0
 9 | 8,Lucki87_jasonbroken,1397113515,1536527864,0,0
10 | 9,punkstock_jasonbroken,1397113517,1536527864,0,0
11 | 10,duder_con_chile_jasonbroken,1397113519,1536527864,0,2
12 | 11,IHaveBigBalls_jasonbroken,1397113520,1536527864,0,0
13 | 12,Foggybanana_jasonbroken,1397113523,1536527864,0,0
14 | 13,Thedrinkdriver_jasonbroken,1397113527,1536527864,-9,0
15 | 14,littlemissd_jasonbroken,1397113530,1536527864,0,-3
16 | 15,phonethaway_jasonbroken,1397113537,1536527864,0,0
17 | 16,DreamingOfWinterfell_jasonbroken,1397113538,1536527864,0,0
18 | 17,ssaig_jasonbroken,1397113544,1536527864,1,0
19 | 18,divinetribe_jasonbroken,1397113549,1536527864,0,0
20 | 19,fdbvfdssdgfds_jasonbroken,1397113552,1536527864,3,0
21 | 20,hjtrsh54yh43_jasonbroken,1397113559,1536527864,-1,-1
22 | 


--------------------------------------------------------------------------------
/examples/src/main/resources/files/web-ds/data.ipc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chitralverma/scala-polars/9997baf445782bf8b5642a19d3346df2595b292b/examples/src/main/resources/files/web-ds/data.ipc


--------------------------------------------------------------------------------
/examples/src/main/resources/files/web-ds/data.json:
--------------------------------------------------------------------------------
 1 | {"id":1,"name":null,"created_utc":1397113470,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
 2 | {"id":2,"name":"johnethen06_jasonbroken","created_utc":1397113483,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
 3 | {"id":3,"name":"yaseinrez_jasonbroken","created_utc":1397113483,"updated_on":1536527864,"comment_karma":0,"link_karma":1}
 4 | {"id":null,"name":"Valve92_jasonbroken","created_utc":1397113503,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
 5 | {"id":5,"name":"srbhuyan_jasonbroken","created_utc":1397113506,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
 6 | {"id":6,"name":"taojianlong_jasonbroken","created_utc":1397113510,"updated_on":1536527864,"comment_karma":4,"link_karma":0}
 7 | {"id":7,"name":"YourPalGrant92_jasonbroken","created_utc":1397113513,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
 8 | {"id":8,"name":"Lucki87_jasonbroken","created_utc":1397113515,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
 9 | {"id":9,"name":"punkstock_jasonbroken","created_utc":1397113517,"updated_on":null,"comment_karma":0,"link_karma":0}
10 | {"id":10,"name":"duder_con_chile_jasonbroken","created_utc":1397113519,"updated_on":1536527864,"comment_karma":0,"link_karma":2}
11 | {"id":null,"name":"IHaveBigBalls_jasonbroken","created_utc":1397113520,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
12 | {"id":12,"name":"Foggybanana_jasonbroken","created_utc":1397113523,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
13 | {"id":13,"name":"Thedrinkdriver_jasonbroken","created_utc":1397113527,"updated_on":1536527864,"comment_karma":-9,"link_karma":0}
14 | {"id":14,"name":"littlemissd_jasonbroken","created_utc":1397113530,"updated_on":1536527864,"comment_karma":0,"link_karma":-3}
15 | {"id":15,"name":"phonethaway_jasonbroken","created_utc":1397113537,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
16 | {"id":16,"name":"DreamingOfWinterfell_jasonbroken","created_utc":1397113538,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
17 | {"id":17,"name":"ssaig_jasonbroken","created_utc":1397113544,"updated_on":1536527864,"comment_karma":1,"link_karma":0}
18 | {"id":18,"name":"divinetribe_jasonbroken","created_utc":1397113549,"updated_on":1536527864,"comment_karma":0,"link_karma":0}
19 | {"id":19,"name":"fdbvfdssdgfds_jasonbroken","created_utc":1397113552,"updated_on":1536527864,"comment_karma":3,"link_karma":0}
20 | {"id":20,"name":"hjtrsh54yh43_jasonbroken","created_utc":1397113559,"updated_on":1536527864,"comment_karma":-1,"link_karma":-1}
21 | 


--------------------------------------------------------------------------------
/examples/src/main/resources/files/web-ds/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chitralverma/scala-polars/9997baf445782bf8b5642a19d3346df2595b292b/examples/src/main/resources/files/web-ds/data.parquet


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/InstantiateDataFrame.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala
 2 | 
 3 | import org.polars.scala.polars.api.{DataFrame, Series}
 4 | 
 5 | object InstantiateDataFrame {
 6 | 
 7 |   def main(args: Array[String]): Unit = {
 8 |     DataFrame.fromSeries(Series.ofBoolean("bool_col", Array[Boolean](true, false, true))).show()
 9 | 
10 |     DataFrame
11 |       .fromSeries(
12 |         Series.ofInt("i32_col", Array[Int](1, 2, 3)),
13 |         Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
14 |         Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
15 |         Series.ofList(
16 |           "nested_str_col",
17 |           Array[Array[String]](Array("a", "b", "c"), Array("a", "b", "c"), Array("a", "b", "c"))
18 |         )
19 |       )
20 |       .show()
21 | 
22 |     /* Values as Scala array(s) */
23 |     DataFrame
24 |       .fromSeries(
25 |         Series.ofInt("i32_col", Array[Int](1, 2, 3)),
26 |         Array[Series](
27 |           Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
28 |           Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
29 |           Series.ofList(
30 |             "nested_str_col",
31 |             Array[Array[String]](Array("a", "b", "c"), Array("a", "b", "c"), Array("a", "b", "c"))
32 |           )
33 |         )
34 |       )
35 |       .show()
36 | 
37 |     /* Values as scala lists(s) */
38 | 
39 |     DataFrame
40 |       .fromSeries(
41 |         Series.ofInt("i32_col", Seq(1, 2, 3)),
42 |         Array[Series](
43 |           Series.ofLong("i64_col", Seq(1L, 2L, 3L)),
44 |           Series.ofBoolean("bool_col", Seq(true, false, true)),
45 |           Series.ofFloat("f32_col", Seq(1f, 2f, 3f))
46 |         )
47 |       )
48 |       .show()
49 | 
50 |     /* Values as a mix of Scala lists(s) and array(s) */
51 | 
52 |     DataFrame
53 |       .fromSeries(
54 |         Series.ofInt("i32_col", Seq(1, 2, 3)),
55 |         Array[Series](
56 |           Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
57 |           Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
58 |           Series.ofFloat("f32_col", Seq(1f, 2f, 3f))
59 |         )
60 |       )
61 |       .show()
62 | 
63 |     DataFrame
64 |       .fromSeries(
65 |         Series.ofInt("i32_col", Array[Int](1, 2, 3)),
66 |         Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
67 |         Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
68 |         Series.ofSeries(
69 |           "struct_col",
70 |           Array[Series](
71 |             Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
72 |             Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
73 |             Series.ofFloat("f32_col", Seq(1f, 2f, 3f))
74 |           )
75 |         )
76 |       )
77 |       .show()
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/InstantiateSeries.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala
 2 | 
 3 | import org.polars.scala.polars.api.Series
 4 | 
 5 | object InstantiateSeries {
 6 | 
 7 |   def main(args: Array[String]): Unit = {
 8 | 
 9 |     /* Values as Scala array/ iterable of Basic Types */
10 | 
11 |     // Int
12 |     Series.ofInt("series_i32_scala_array", Array(1, 2, 3)).show()
13 |     Series.ofInt("series_i32_scala_iterable", Seq(1, 2, 3)).show()
14 | 
15 |     // Long
16 |     Series.ofLong("series_i64_scala_array", Array(1L, 2L, 3L)).show()
17 |     Series.ofLong("series_i64_scala_iterable", Seq(1L, 2L, 3L)).show()
18 | 
19 |     // Float
20 |     Series.ofFloat("series_f32_scala_array", Array(1f, 2f, 3f)).show()
21 |     Series.ofFloat("series_f32_scala_iterable", Seq(1f, 2f, 3f)).show()
22 | 
23 |     // Double
24 |     Series.ofDouble("series_f64_scala_array", Array(1d, 2d, 3d)).show()
25 |     Series.ofDouble("series_f64_scala_iterable", Seq(1d, 2d, 3d)).show()
26 | 
27 |     // Boolean
28 |     Series.ofBoolean("series_bool_scala_array", Array(true, false, true, true)).show()
29 |     Series.ofBoolean("series_bool_scala_iterable", Seq(true, false, true, true)).show()
30 | 
31 |     // String
32 |     Series.ofString("series_str_scala_array", Array("a", "b")).show()
33 |     Series.ofString("series_str_scala_iterable", Seq("a", "b")).show()
34 | 
35 |     // java.time.LocalDate
36 |     Series.ofDate("series_date_scala_array", Array(java.time.LocalDate.now())).show()
37 |     Series.ofDate("series_date_scala_iterable", Seq(java.time.LocalDate.now())).show()
38 | 
39 |     // java.time.LocalTime
40 |     Series.ofTime("series_time_scala_array", Array(java.time.LocalTime.now())).show()
41 |     Series.ofTime("series_time_scala_iterable", Seq(java.time.LocalTime.now())).show()
42 | 
43 |     // java.time.ZonedDateTime
44 |     Series.ofDateTime("series_datetime_scala_array", Array(java.time.ZonedDateTime.now())).show()
45 |     Series.ofDateTime("series_datetime_scala_iterable", Seq(java.time.ZonedDateTime.now())).show()
46 | 
47 |     /* Values as Java array/ list of Nested List Types */
48 | 
49 |     // int or java.lang.Integer
50 |     Series.ofList("series_list_int_scala_array", Array(Array(1, 2, 3))).show()
51 |     Series.ofList("series_list_int_scala_iterable", Seq(Seq(1, 2, 3))).show()
52 | 
53 |     // String
54 |     Series.ofList("series_list_str_scala_array", Array(Array("a", "b"))).show()
55 |     Series.ofList("series_list_str_scala_iterable", Seq(Seq("a", "b"))).show()
56 | 
57 |     // Deep Nested
58 |     Series.ofList("series_list_list_str_scala_array", Array(Array(Array("a", "b")))).show()
59 |     Series.ofList("series_list_list_str_scala_iterable", Seq(Seq(Seq("a", "b")))).show()
60 | 
61 |     /* Values as Java array/ list of Struct Types */
62 |     Series
63 |       .ofSeries(
64 |         "series_struct_scala_array",
65 |         Array(
66 |           Series.ofInt("int_col", Array(1, 2, 3)),
67 |           Series.ofString("str_col", Array("a", "b", "c")),
68 |           Series.ofBoolean("bool_col", Array(true, false, true))
69 |         )
70 |       )
71 |       .show()
72 |     Series
73 |       .ofSeries(
74 |         "series_struct_scala_iterable",
75 |         Seq(
76 |           Series.ofInt("int_col", Seq(1, 2, 3)),
77 |           Series.ofString("str_col", Seq("a", "b", "c")),
78 |           Series.ofBoolean("bool_col", Seq(true, false, true))
79 |         )
80 |       )
81 |       .show()
82 |   }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/configuration/ConfiguringPolars.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala.configuration
 2 | 
 3 | import java.io.File
 4 | import java.nio.file.{Files, Path}
 5 | 
 6 | import org.polars.scala.polars.Polars
 7 | 
 8 | object ConfiguringPolars {
 9 | 
10 |   def main(args: Array[String]): Unit = {
11 | 
12 |     /* Checking the version scala-polars is compiled against. */
13 |     val version: String = Polars.version()
14 |     printf("scala-polars has been compiled against version '%s'%n%n", version)
15 | 
16 |     /* Get default configuration. */
17 |     printf("Default Configuration:%n%s%n%n", Polars.config)
18 | 
19 |     /* Updating configuration. */
20 | 
21 |     /* Update the number of rows shown while doing `df.show()` */
22 |     Polars.config.update().withMaxTableRows(20).apply()
23 |     printf("After updating number of rows:%n%s%n%n", Polars.config)
24 | 
25 |     /* Update the number of columns shown while doing `df.show()` */
26 |     Polars.config.update().withMaxTableColumns(20).apply()
27 |     printf("After updating number of columns:%n%s%n%n", Polars.config)
28 | 
29 |     /* Reset config */
30 |     Polars.config.update().reset().apply()
31 |     printf("After resetting config:%n%s%n%n", Polars.config)
32 | 
33 |     /* Chaining configuration options */
34 |     val options = Map("POLARS_TABLE_WIDTH" -> "5000")
35 | 
36 |     Polars.config
37 |       .update()
38 |       .withMaxTableRows(20)
39 |       .withMaxTableColumns(20)
40 |       .withOption("POLARS_FMT_TABLE_CELL_ALIGNMENT", "RIGHT")
41 |       .withOptions(options)
42 |       .apply()
43 | 
44 |     printf("After chained configs:%n%s%n%n", Polars.config)
45 | 
46 |     /* Persisting current configuration to file */
47 |     val tempDirectory: Path = Files.createTempDirectory("polars-config-")
48 |     val tempFile: File =
49 |       Files.createTempFile(tempDirectory, "temp-polars-config-", "plcfg").toFile
50 |     Polars.config.saveTo(tempFile, overwrite = true)
51 | 
52 |     /* Reloading current configuration to file */ /* Reloading current configuration to file */
53 |     Polars.config.update().reset().apply()
54 |     printf("After resetting config:%n%s%n%n", Polars.config)
55 | 
56 |     Polars.config.update().fromPath(tempFile).apply()
57 |     printf("After reloading config from file path:%n%s%n", Polars.config)
58 | 
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/expressions/ApplyingSimpleExpressions.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala.expressions
 2 | 
 3 | import scala.util.Random
 4 | 
 5 | import org.polars.scala.polars.Polars
 6 | import org.polars.scala.polars.functions._
 7 | 
 8 | import examples.scala.utils.CommonUtils
 9 | 
10 | object ApplyingSimpleExpressions {
11 |   def main(args: Array[String]): Unit = {
12 | 
13 |     /* Read a dataset as a DataFrame lazily or eagerly */
14 |     val path = CommonUtils.getResource("/files/web-ds/data.json")
15 |     val input = Polars.scan.jsonLines(path)
16 | 
17 |     /* Apply multiple operations on the LazyFrame or DataFrame */
18 |     var ldf = input.cache
19 |       .select("id", "name")
20 |       .with_column("lower_than_four", col("id") <= 4)
21 |       .filter(col("lower_than_four"))
22 |       .with_column("long_value", lit(Random.nextLong()))
23 |       .with_column("date", lit(java.time.LocalDate.now()))
24 |       .with_column("time", lit(java.time.LocalTime.now()))
25 |       .with_column("current_ts", lit(java.time.ZonedDateTime.now()))
26 |       .sort(asc("name"), nullLast = true, maintainOrder = false)
27 |       .set_sorted(Map("name" -> false))
28 |       .top_k(2, "id", descending = true, nullLast = true, maintainOrder = false)
29 |       .limit(2) // .head(2)
30 |       .tail(2)
31 |       .drop("long_value")
32 |       .rename("lower_than_four", "less_than_four")
33 |       .drop_nulls()
34 | 
35 |     ldf = Polars.concat(ldf, Array(ldf, ldf))
36 |     ldf = ldf.unique()
37 | 
38 |     println("Showing LazyFrame plan to stdout.")
39 |     ldf.explain()
40 | 
41 |     val df = ldf.collect()
42 | 
43 |     println("Showing resultant DataFrame to stdout.")
44 |     df.show()
45 | 
46 |     printf("Total rows: %s%n%n", df.count())
47 | 
48 |   }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/io/LazyAndEagerAPI.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala.io
 2 | 
 3 | import org.polars.scala.polars.Polars
 4 | import org.polars.scala.polars.api.{DataFrame, LazyFrame}
 5 | 
 6 | import examples.scala.utils.CommonUtils
 7 | 
 8 | /** Polars provides 2 API for reading datasets lazily (`scan`) or eagerly (`read`).
 9 |   *
10 |   * These APIs serve different purposes and result in either a [[LazyFrame]] or a [[DataFrame]]. A
11 |   * LazyFrame can be materialized to a DataFrame and vice-versa if required.
12 |   * ==Lazy API==
13 |   * With the lazy API Polars doesn't run each query line-by-line but instead processes the full
14 |   * query end-to-end. To get the most out of Polars it is important that you use the lazy API
15 |   * because:
16 |   *
17 |   *   - the lazy API allows Polars to apply automatic query optimization with the query optimizer.
18 |   *   - the lazy API allows you to work with larger than memory datasets using streaming.
19 |   *   - the lazy API can catch schema errors before processing the data.
20 |   *
21 |   * More info can be found
22 |   * [[https://pola-rs.github.io/polars-book/user-guide/lazy-api/intro.html here]].
23 |   * ==Eager API==
24 |   * With eager API the queries are executed line-by-line in contrast to the lazy API.
25 |   */
26 | 
27 | object LazyAndEagerAPI {
28 | 
29 |   def main(args: Array[String]): Unit = {
30 |     /* Lazily read data from file based datasets */
31 |     val path = CommonUtils.getResource("/files/web-ds/data.csv")
32 |     val ldf = Polars.scan.csv(path)
33 | 
34 |     /* Materialize LazyFrame to DataFrame */
35 |     var df: DataFrame = ldf.collect()
36 | 
37 |     println("Showing CSV file as a DataFrame to stdout.")
38 |     df.show()
39 | 
40 |     printf("Total rows: %s%n%n", df.count())
41 |     printf("Total columns: %s%n%n", df.schema.getFields.length)
42 | 
43 |     /* Lazily read only first 3 rows */
44 |     df = Polars.scan.option("scan_csv_n_rows", "3").csv(path).collect()
45 |     printf("Total rows: %s%n%n", df.count())
46 | 
47 |     println("Rows:")
48 |     df.rows().foreach(println)
49 |     println("\n")
50 | 
51 |     /* Convert DataFrame back to LazyFrame */
52 |     val backToLdf: LazyFrame = df.toLazy
53 |     printf("Show schema: %s%n%n", backToLdf.schema)
54 | 
55 |     /* Eagerly read data from file based datasets */
56 |     df = Polars.scan.csv(path).collect
57 | 
58 |     println("Showing CSV file as a DataFrame to stdout")
59 |     df.show()
60 | 
61 |   }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/io/ReadingFileDatasets.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala.io
 2 | 
 3 | import org.polars.scala.polars.Polars
 4 | import org.polars.scala.polars.api.{DataFrame, LazyFrame}
 5 | 
 6 | import examples.scala.utils.CommonUtils
 7 | 
 8 | /** Polars supports various input file formats like the following,
 9 |   *   - [[org.polars.scala.polars.api.io.Scannable.csv CSV]] (delimited format like CSV, TSV,
10 |   *     etc.)
11 |   *   - [[org.polars.scala.polars.api.io.Scannable.parquet Apache Parquet]]
12 |   *   - [[org.polars.scala.polars.api.io.Scannable.ipc Apache Arrow IPC]]
13 |   *   - [[org.polars.scala.polars.api.io.Scannable.jsonLines New line delimited JSON]]
14 |   *
15 |   * All the above formats are compatible with the lazy or eager input API and users can supply 1
16 |   * or more file paths which will be read in parallel to return a [[LazyFrame]] or a
17 |   * [[DataFrame]].
18 |   *
19 |   * Since each format may have its own additional options (example: delimiter for CSV format),
20 |   * Polars allows a simple builder pattern which can be used to supply these options.
21 |   *
22 |   * While the examples below have been provided for Parquet files only, they also similarly apply
23 |   * on the other supported file formats.
24 |   *
25 |   * Some additional examples may also be found in [[examples.scala.io.LazyAndEagerAPI]].
26 |   */
27 | 
28 | object ReadingFileDatasets {
29 | 
30 |   def main(args: Array[String]): Unit = {
31 | 
32 |     /* For one Parquet file */
33 |     val path = CommonUtils.getResource("/files/web-ds/data.parquet")
34 |     val df = Polars.scan
35 |       .parquet(path)
36 |       .collect()
37 | 
38 |     println("Showing parquet file as a DataFrame to stdout.")
39 |     df.show()
40 | 
41 |     printf("Total rows: %s%n%n", df.count())
42 | 
43 |     /* For multiple Parquet file(s) */
44 |     val multiLdf = Polars.scan.parquet(path, path, path).collect()
45 | 
46 |     println("Showing multiple parquet files as 1 DataFrame to stdout.")
47 |     multiLdf.show()
48 |     printf("Total rows: %s%n%n", multiLdf.count())
49 | 
50 |     /* Providing additional options with Parquet file input */
51 |     val pqDfWithOpts = Polars.scan
52 |       .option("scan_parquet_low_memory", "true")
53 |       .option("scan_parquet_n_rows", "3")
54 |       .option("scan_parquet_cache", "false")
55 |       .option("scan_parquet_row_index_name", "SerialNum")
56 |       .parquet(path)
57 |       .collect()
58 | 
59 |     println("Showing parquet file as a DataFrame to stdout.")
60 |     pqDfWithOpts.show()
61 | 
62 |     printf("Total rows: %s%n%n", pqDfWithOpts.count())
63 | 
64 |   }
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/io/WritingToFileDatasets.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala.io
 2 | 
 3 | import org.polars.scala.polars.Polars
 4 | 
 5 | import examples.scala.utils.CommonUtils
 6 | 
 7 | /** Polars supports various output file formats like the following,
 8 |   *   - [[org.polars.scala.polars.api.io.Writeable.parquet Apache Parquet]]
 9 |   *   - [[org.polars.scala.polars.api.io.Writeable.ipc Apache IPC]]
10 |   *
11 |   * A [[org.polars.scala.polars.api.DataFrame DataFrame]] can be written to an object storage as a
12 |   * file in one of the supported formats mentioned above.
13 |   *
14 |   * Since each format and storage may have its own additional options, Polars allows a simple
15 |   * builder pattern which can be used to supply these options.
16 |   *
17 |   * While the examples below have been provided for Parquet files only, they also similarly apply
18 |   * on the other supported file formats.
19 |   */
20 | object WritingToFileDatasets {
21 | 
22 |   def main(args: Array[String]): Unit = {
23 | 
24 |     /* Read a dataset as a DataFrame lazily or eagerly */
25 |     val path = CommonUtils.getResource("/files/web-ds/data.ipc")
26 |     val df = Polars.scan.ipc(path).collect
27 | 
28 |     println("Showing ipc file as a DataFrame to stdout.")
29 |     df.show()
30 | 
31 |     printf("Total rows: %s%n%n", df.count())
32 | 
33 |     /* Write this DataFrame to local filesystem at the provided path */
34 |     val outputPath = CommonUtils.getOutputLocation("output.pq")
35 |     df.write().parquet(outputPath)
36 |     printf("File written to location: %s%n%n", outputPath)
37 | 
38 |     /* Overwrite output if already exists */
39 |     df.write().option("write_mode", "full").parquet(outputPath)
40 |     printf("File overwritten at location: %s%n%n", outputPath)
41 | 
42 |     /* Write output file with compression */
43 |     df.write()
44 |       .options(
45 |         Map(
46 |           "write_compression" -> "zstd",
47 |           "write_mode" -> "overwrite",
48 |           "write_parquet_stats" -> "full"
49 |         )
50 |       )
51 |       .parquet(outputPath)
52 |     printf("File overwritten at location: %s with compression%n%n", outputPath)
53 | 
54 |     /* Write output file to Amazon S3 object store */
55 |     val s3Path: String = "s3://bucket/output.pq"
56 |     df.write()
57 |       .options(
58 |         Map(
59 |           "write_compression" -> "zstd",
60 |           "write_mode" -> "overwrite",
61 |           "write_parquet_stats" -> "full",
62 |           "aws_default_region" -> "us‑east‑2",
63 |           "aws_access_key_id" -> "ABC",
64 |           "aws_secret_access_key" -> "XYZ"
65 |         )
66 |       )
67 |       .parquet(s3Path)
68 |     printf("File overwritten at location: %s with compression%n%n", s3Path)
69 |   }
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/examples/src/main/scala/examples/scala/utils/CommonUtils.scala:
--------------------------------------------------------------------------------
 1 | package examples.scala.utils
 2 | 
 3 | import java.nio.file.{Files, Paths, StandardCopyOption}
 4 | 
 5 | object CommonUtils {
 6 | 
 7 |   def getResource(path: String): String = {
 8 |     val target =
 9 |       Files.createTempFile("tmp-resource-", s"-${Paths.get(path).getFileName.toString}")
10 |     Files.copy(
11 |       this.getClass.getResourceAsStream(path),
12 |       target,
13 |       StandardCopyOption.REPLACE_EXISTING
14 |     )
15 | 
16 |     target.toAbsolutePath.toString
17 |   }
18 | 
19 |   def getOutputLocation(path: String): String = {
20 |     val target =
21 |       Files.createTempFile("tmp-resource-", s"-${Paths.get(path).getFileName.toString}")
22 |     Files.deleteIfExists(target)
23 | 
24 |     target.toAbsolutePath.toString
25 |   }
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/native/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "scala-polars-native"
 3 | version = "0.1.0"
 4 | authors = ["chitralverma <chitral.verma@gmail.com>"]
 5 | edition = "2021"
 6 | license = "Apache-2.0"
 7 | readme = "../README.md"
 8 | publish = false
 9 | 
10 | [lib]
11 | name = "scala_polars"
12 | crate-type = ["cdylib"]
13 | doc = false
14 | 
15 | [dependencies]
16 | anyhow = "1"
17 | jni = "0.21.1"
18 | jni_fn = "0.1"
19 | num-derive = "0.4"
20 | num-traits = "0.2"
21 | object_store = { version = "0.11", features = ["aws", "azure", "gcp", "http"] }
22 | polars-core = { version = "0.45", default-features = false }
23 | rust_decimal = "1.36.0"
24 | serde_json = "1"
25 | toml = "0.8"
26 | 
27 | [dependencies.polars]
28 | version = "0.45"
29 | default-features = false
30 | features = [
31 |     "json",
32 |     "parquet",
33 |     "ipc",
34 |     "ipc_streaming",
35 |     "avro",
36 |     "csv",
37 |     "cloud",
38 |     "approx_unique",
39 |     "array_any_all",
40 |     "array_count",
41 |     "bitwise",
42 |     "is_in",
43 |     "repeat_by",
44 |     "trigonometry",
45 |     "sign",
46 |     "list_gather",
47 |     "list_count",
48 |     "list_sets",
49 |     "list_any_all",
50 |     "list_drop_nulls",
51 |     "list_sample",
52 |     "cutqcut",
53 |     "rle",
54 |     "extract_groups",
55 |     "pivot",
56 |     "extract_jsonpath",
57 |     "asof_join",
58 |     "cross_join",
59 |     "pct_change",
60 |     "search_sorted",
61 |     "merge_sorted",
62 |     "top_k",
63 |     "propagate_nans",
64 |     "timezones",
65 |     "peaks",
66 |     "hist",
67 |     "find_many",
68 |     "dtype-full",
69 |     "meta",
70 |     "decompress",
71 |     "regex",
72 |     "binary_encoding",
73 |     "polars_cloud",
74 |     "performant",
75 |     "lazy",
76 |     "fmt",
77 |     "temporal",
78 |     "strings",
79 |     "serde",
80 |     "serde-lazy",
81 |     "rows",
82 |     "async",
83 |     "aws",
84 |     "gcp",
85 |     "azure",
86 |     "http",
87 | ]
88 | 
89 | [profile.release]
90 | codegen-units = 1
91 | lto = true
92 | 


--------------------------------------------------------------------------------
/native/rustfmt.toml:
--------------------------------------------------------------------------------
1 | group_imports = "StdExternalCrate"
2 | imports_granularity = "Module"
3 | match_block_trailing_comma = true
4 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/expr/column.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use std::ops::{Add, Div, Mul, Rem, Sub};
  4 | 
  5 | use anyhow::Context;
  6 | use jni::objects::{JClass, JString};
  7 | use jni::sys::{jint, jlong};
  8 | use jni::JNIEnv;
  9 | use jni_fn::jni_fn;
 10 | use num_derive::FromPrimitive;
 11 | use num_traits::FromPrimitive;
 12 | use polars::prelude::*;
 13 | 
 14 | use crate::internal_jni::utils::{j_string_to_string, to_ptr};
 15 | use crate::utils::error::ResultExt;
 16 | 
 17 | #[derive(Clone, PartialEq, Eq, Debug, FromPrimitive)]
 18 | pub enum BinaryOperator {
 19 |     EqualTo = 0,
 20 |     NotEqualTo = 1,
 21 |     LessThan = 2,
 22 |     LessThanEqualTo = 3,
 23 |     GreaterThan = 4,
 24 |     GreaterThanEqualTo = 5,
 25 |     Or = 6,
 26 |     And = 7,
 27 |     Plus = 8,
 28 |     Minus = 9,
 29 |     Multiply = 10,
 30 |     Divide = 11,
 31 |     Modulus = 12,
 32 | }
 33 | 
 34 | #[derive(Clone, PartialEq, Eq, Debug, FromPrimitive)]
 35 | pub enum UnaryOperator {
 36 |     NOT = 0,
 37 |     IsNull = 1,
 38 |     IsNotNull = 2,
 39 |     IsNan = 3,
 40 |     IsNotNan = 4,
 41 |     Between = 5,
 42 |     IsIn = 6,
 43 |     Like = 7,
 44 |     Cast = 8,
 45 | }
 46 | 
 47 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.column_expr$")]
 48 | pub fn column(mut env: JNIEnv, _: JClass, value: JString) -> jlong {
 49 |     let name = j_string_to_string(
 50 |         &mut env,
 51 |         &value,
 52 |         Some("Failed to parse provided column name as string"),
 53 |     );
 54 | 
 55 |     let expr = col(name.as_str());
 56 |     to_ptr(expr)
 57 | }
 58 | 
 59 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.column_expr$")]
 60 | pub fn sort_column_by_name(mut env: JNIEnv, _: JClass, value: JString, descending: bool) -> jlong {
 61 |     let name = j_string_to_string(
 62 |         &mut env,
 63 |         &value,
 64 |         Some("Failed to parse provided column name as string"),
 65 |     );
 66 | 
 67 |     let expr = Expr::Sort {
 68 |         expr: Arc::new(col(name.as_str())),
 69 |         options: SortOptions {
 70 |             descending,
 71 |             ..Default::default()
 72 |         },
 73 |     };
 74 | 
 75 |     to_ptr(expr)
 76 | }
 77 | 
 78 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.column_expr$")]
 79 | pub unsafe fn applyUnary(mut env: JNIEnv, _: JClass, expr_ptr: *mut Expr, operator: jint) -> jlong {
 80 |     let l_expr = (*expr_ptr).clone();
 81 | 
 82 |     let expr = UnaryOperator::from_i32(operator)
 83 |         .and_then(|option| match option {
 84 |             UnaryOperator::NOT => Some(l_expr.not()),
 85 |             UnaryOperator::IsNull => Some(l_expr.is_null()),
 86 |             UnaryOperator::IsNotNull => Some(l_expr.is_not_null()),
 87 |             UnaryOperator::IsNan => Some(l_expr.is_nan()),
 88 |             UnaryOperator::IsNotNan => Some(l_expr.is_not_nan()),
 89 |             _ => None,
 90 |         })
 91 |         .context(format!(
 92 |             "Failed to parse provided ID `{operator}` as unary operator."
 93 |         ))
 94 |         .unwrap_or_throw(&mut env);
 95 | 
 96 |     to_ptr(expr)
 97 | }
 98 | 
 99 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.column_expr$")]
100 | pub unsafe fn applyBinary(
101 |     mut env: JNIEnv,
102 |     _: JClass,
103 |     left_ptr: *mut Expr,
104 |     right_ptr: *mut Expr,
105 |     operator: jint,
106 | ) -> jlong {
107 |     let l_expr = (*left_ptr).clone();
108 |     let r_expr = (*right_ptr).clone();
109 | 
110 |     let expr = BinaryOperator::from_i32(operator)
111 |         .map(|option| match option {
112 |             BinaryOperator::EqualTo => l_expr.eq(r_expr),
113 |             BinaryOperator::NotEqualTo => l_expr.neq(r_expr),
114 |             BinaryOperator::LessThan => l_expr.lt(r_expr),
115 |             BinaryOperator::LessThanEqualTo => l_expr.lt_eq(r_expr),
116 |             BinaryOperator::GreaterThan => l_expr.gt(r_expr),
117 |             BinaryOperator::GreaterThanEqualTo => l_expr.gt_eq(r_expr),
118 |             BinaryOperator::Or => l_expr.or(r_expr),
119 |             BinaryOperator::And => l_expr.and(r_expr),
120 |             BinaryOperator::Plus => l_expr.add(r_expr),
121 |             BinaryOperator::Minus => l_expr.sub(r_expr),
122 |             BinaryOperator::Multiply => l_expr.mul(r_expr),
123 |             BinaryOperator::Divide => l_expr.div(r_expr),
124 |             BinaryOperator::Modulus => l_expr.rem(r_expr),
125 |         })
126 |         .context(format!(
127 |             "Failed to parse provided ID `{operator}` as binary operator."
128 |         ))
129 |         .unwrap_or_throw(&mut env);
130 | 
131 |     to_ptr(expr)
132 | }
133 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/expr/literal.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use anyhow::Context;
  4 | use jni::objects::{JClass, JString};
  5 | use jni::sys::{jboolean, jdouble, jfloat, jint, jlong};
  6 | use jni::JNIEnv;
  7 | use jni_fn::jni_fn;
  8 | use polars::export::chrono::{NaiveDate, NaiveDateTime};
  9 | use polars::prelude::*;
 10 | use polars_core::export::chrono::{NaiveTime, Timelike};
 11 | 
 12 | use crate::internal_jni::utils::{j_string_to_string, to_ptr};
 13 | use crate::utils::error::ResultExt;
 14 | 
 15 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 16 | pub fn nullLit(_: JNIEnv, _: JClass) -> jlong {
 17 |     let expr = NULL.lit();
 18 |     to_ptr(expr)
 19 | }
 20 | 
 21 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 22 | pub fn fromString(mut env: JNIEnv, _: JClass, value: JString) -> jlong {
 23 |     let string_value = j_string_to_string(
 24 |         &mut env,
 25 |         &value,
 26 |         Some("Failed to parse provided literal value as string"),
 27 |     );
 28 |     let expr = lit(string_value);
 29 |     to_ptr(expr)
 30 | }
 31 | 
 32 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 33 | pub fn fromBool(_: JNIEnv, _: JClass, value: jboolean) -> jlong {
 34 |     let expr = lit(value);
 35 |     to_ptr(expr)
 36 | }
 37 | 
 38 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 39 | pub fn fromInt(_: JNIEnv, _: JClass, value: jint) -> jlong {
 40 |     let expr = lit(value);
 41 |     to_ptr(expr)
 42 | }
 43 | 
 44 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 45 | pub fn fromLong(_: JNIEnv, _: JClass, value: jlong) -> jlong {
 46 |     let expr = lit(value);
 47 |     to_ptr(expr)
 48 | }
 49 | 
 50 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 51 | pub fn fromFloat(_: JNIEnv, _: JClass, value: jfloat) -> jlong {
 52 |     let expr = lit(value);
 53 |     to_ptr(expr)
 54 | }
 55 | 
 56 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 57 | pub fn fromDouble(_: JNIEnv, _: JClass, value: jdouble) -> jlong {
 58 |     let expr = lit(value);
 59 |     to_ptr(expr)
 60 | }
 61 | 
 62 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 63 | pub fn fromDate(mut env: JNIEnv, _: JClass, value: JString) -> jlong {
 64 |     let string_value = j_string_to_string(
 65 |         &mut env,
 66 |         &value,
 67 |         Some("Failed to parse provided literal value as string"),
 68 |     );
 69 | 
 70 |     let date = NaiveDate::parse_from_str(string_value.as_str(), "%Y-%m-%d")
 71 |         .context(format!(
 72 |             "Failed to parse value `{}` as date with format `%Y-%m-%d`",
 73 |             string_value
 74 |         ))
 75 |         .unwrap_or_throw(&mut env);
 76 | 
 77 |     let expr = lit(date);
 78 |     to_ptr(expr)
 79 | }
 80 | 
 81 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
 82 | pub fn fromTime(mut env: JNIEnv, _: JClass, value: JString) -> jlong {
 83 |     let string_value = j_string_to_string(
 84 |         &mut env,
 85 |         &value,
 86 |         Some("Failed to parse provided literal value as string"),
 87 |     );
 88 | 
 89 |     let time = NaiveTime::parse_from_str(string_value.as_str(), "%H:%M:%S%.f")
 90 |         .context(format!(
 91 |             "Failed to parse value `{}` as time with format `%H:%M:%S%.f`",
 92 |             string_value
 93 |         ))
 94 |         .unwrap_or_throw(&mut env);
 95 | 
 96 |     let total_seconds = time.num_seconds_from_midnight() as i64;
 97 |     let nanos = time.nanosecond() as i64;
 98 | 
 99 |     let expr = Expr::Literal(LiteralValue::Time((total_seconds) * 1_000_000_000 + nanos));
100 |     to_ptr(expr)
101 | }
102 | 
103 | #[jni_fn("org.polars.scala.polars.internal.jni.expressions.literal_expr$")]
104 | pub fn fromDateTime(mut env: JNIEnv, _: JClass, value: JString) -> jlong {
105 |     let string_value = j_string_to_string(
106 |         &mut env,
107 |         &value,
108 |         Some("Failed to parse provided literal value as string"),
109 |     );
110 | 
111 |     let datetime = NaiveDateTime::parse_from_str(string_value.as_str(), "%FT%T%.f")
112 |         .context(format!(
113 |             "Failed to parse value `{}` as datetime with format `%FT%T%.f`",
114 |             string_value
115 |         ))
116 |         .unwrap_or_throw(&mut env);
117 | 
118 |     let expr = lit(datetime);
119 |     to_ptr(expr)
120 | }
121 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/expr/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod column;
2 | pub mod literal;
3 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/frame.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | use std::borrow::ToOwned;
 3 | use std::iter::Iterator;
 4 | 
 5 | use anyhow::Context;
 6 | use jni::objects::{JClass, JLongArray};
 7 | use jni::sys::{jlong, jstring};
 8 | use jni::JNIEnv;
 9 | use jni_fn::jni_fn;
10 | use polars::prelude::*;
11 | use polars_core::utils::concat_df;
12 | 
13 | use crate::internal_jni::utils::*;
14 | use crate::utils::error::ResultExt;
15 | 
16 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
17 | pub unsafe fn schemaString(mut env: JNIEnv, _: JClass, df_ptr: *mut DataFrame) -> jstring {
18 |     let df = &mut *df_ptr;
19 | 
20 |     serde_json::to_string(&df.schema().to_arrow(CompatLevel::oldest()))
21 |         .map(|schema_string| string_to_j_string(&mut env, schema_string, None::<&str>))
22 |         .context("Failed to serialize schema")
23 |         .unwrap_or_throw(&mut env)
24 | }
25 | 
26 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
27 | pub unsafe fn show(_: JNIEnv, _: JClass, df_ptr: *mut DataFrame) {
28 |     let df = &mut *df_ptr;
29 |     println!("{:?}", df)
30 | }
31 | 
32 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
33 | pub unsafe fn count(_: JNIEnv, _: JClass, df_ptr: *mut DataFrame) -> jlong {
34 |     (*df_ptr).shape().0 as i64
35 | }
36 | 
37 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
38 | pub unsafe fn concatDataFrames(mut env: JNIEnv, _: JClass, inputs: JLongArray) -> jlong {
39 |     let dfs: Vec<_> = JavaArrayToVec::to_vec(&mut env, inputs)
40 |         .into_iter()
41 |         .map(|ptr| (*(ptr as *mut DataFrame)).to_owned())
42 |         .collect();
43 | 
44 |     let concatenated_df = concat_df(dfs.iter())
45 |         .context("Failed to concatenate dataframes")
46 |         .unwrap_or_throw(&mut env);
47 | 
48 |     to_ptr(concatenated_df)
49 | }
50 | 
51 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
52 | pub unsafe fn toLazy(_: JNIEnv, _: JClass, df_ptr: *mut DataFrame) -> jlong {
53 |     let ldf = (*df_ptr).clone().lazy();
54 |     to_ptr(ldf)
55 | }
56 | 
57 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
58 | pub unsafe fn limit(_: JNIEnv, _: JClass, df_ptr: *mut DataFrame, n: jlong) -> jlong {
59 |     let limited_df = (*df_ptr).head(Some(n as usize));
60 |     to_ptr(limited_df)
61 | }
62 | 
63 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
64 | pub unsafe fn tail(_: JNIEnv, _: JClass, df_ptr: *mut DataFrame, n: jlong) -> jlong {
65 |     let limited_df = (*df_ptr).tail(Some(n as usize));
66 |     to_ptr(limited_df)
67 | }
68 | 
69 | #[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
70 | pub unsafe fn fromSeries(mut env: JNIEnv, _: JClass, ptrs: JLongArray) -> jlong {
71 |     let data: Vec<_> = JavaArrayToVec::to_vec(&mut env, ptrs)
72 |         .into_iter()
73 |         .map(|ptr| (*(ptr as *mut Series)).to_owned())
74 |         .collect();
75 | 
76 |     let df = DataFrame::from_iter(data);
77 |     to_ptr(df)
78 | }
79 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/mod.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Context;
 2 | use jni::objects::JString;
 3 | use jni::sys::jint;
 4 | use jni::JNIEnv;
 5 | use polars::io::RowIndex;
 6 | use polars::prelude::{IdxSize, PlHashMap};
 7 | 
 8 | use super::utils::j_string_to_string;
 9 | use crate::utils::error::ResultExt;
10 | 
11 | pub mod scan;
12 | pub mod write;
13 | 
14 | pub fn get_file_path(env: &mut JNIEnv, file_path: JString) -> String {
15 |     j_string_to_string(env, &file_path, Some("Failed to get provided path"))
16 | }
17 | 
18 | fn parse_json_to_options(env: &mut JNIEnv, options: JString) -> PlHashMap<String, String> {
19 |     Ok(j_string_to_string(
20 |         env,
21 |         &options,
22 |         Some("Failed to deserialize the provided options"),
23 |     ))
24 |     .and_then(|s| serde_json::from_str(&s))
25 |     .context("Failed to parse the provided options")
26 |     .unwrap_or_throw(env)
27 | }
28 | 
29 | pub fn get_row_index(
30 |     env: &mut JNIEnv,
31 |     row_count_col_name: JString,
32 |     row_count_col_offset: jint,
33 | ) -> Option<RowIndex> {
34 |     if !row_count_col_name.is_null() {
35 |         Some(RowIndex {
36 |             name: j_string_to_string(
37 |                 env,
38 |                 &row_count_col_name,
39 |                 Some("Failed to get the provided row column name"),
40 |             )
41 |             .into(),
42 |             offset: if row_count_col_offset.is_positive() {
43 |                 row_count_col_offset as IdxSize
44 |             } else {
45 |                 0
46 |             },
47 |         })
48 |     } else {
49 |         None
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/scan/csv.rs:
--------------------------------------------------------------------------------
  1 | use std::path::PathBuf;
  2 | use std::sync::Arc;
  3 | 
  4 | use anyhow::Context;
  5 | use jni::objects::{JClass, JObject, JObjectArray, JString};
  6 | use jni::sys::jlong;
  7 | use jni::JNIEnv;
  8 | use jni_fn::jni_fn;
  9 | use polars::io::cloud::CloudOptions;
 10 | use polars::io::RowIndex;
 11 | use polars::prelude::*;
 12 | 
 13 | use crate::internal_jni::io::{get_file_path, parse_json_to_options};
 14 | use crate::internal_jni::utils::{to_ptr, JavaArrayToVec};
 15 | use crate::utils::error::ResultExt;
 16 | 
 17 | #[jni_fn("org.polars.scala.polars.internal.jni.io.scan$")]
 18 | pub unsafe fn scanCSV(mut env: JNIEnv, _: JClass, paths: JObjectArray, options: JString) -> jlong {
 19 |     let mut options = parse_json_to_options(&mut env, options);
 20 | 
 21 |     let n_rows = options
 22 |         .remove("scan_csv_n_rows")
 23 |         .and_then(|s| s.parse::<usize>().ok());
 24 | 
 25 |     let row_index_offset = options
 26 |         .remove("scan_csv_row_index_offset")
 27 |         .and_then(|s| s.parse::<u32>().ok())
 28 |         .unwrap_or(0);
 29 | 
 30 |     let row_index = options
 31 |         .remove("scan_csv_row_index_name")
 32 |         .map(|name| RowIndex {
 33 |             name: name.into(),
 34 |             offset: row_index_offset,
 35 |         });
 36 | 
 37 |     let cache = options
 38 |         .remove("scan_csv_cache")
 39 |         .and_then(|s| s.parse::<bool>().ok())
 40 |         .unwrap_or(true);
 41 | 
 42 |     let glob = options
 43 |         .remove("scan_csv_glob")
 44 |         .and_then(|s| s.parse::<bool>().ok())
 45 |         .unwrap_or(true);
 46 | 
 47 |     let low_memory = options
 48 |         .remove("scan_csv_low_memory")
 49 |         .and_then(|s| s.parse::<bool>().ok())
 50 |         .unwrap_or(false);
 51 | 
 52 |     let rechunk = options
 53 |         .remove("scan_csv_rechunk")
 54 |         .and_then(|s| s.parse::<bool>().ok())
 55 |         .unwrap_or(false);
 56 | 
 57 |     let file_path_col = options
 58 |         .remove("scan_csv_include_file_paths")
 59 |         .map(PlSmallStr::from_string);
 60 | 
 61 |     let raise_if_empty = options
 62 |         .remove("scan_csv_raise_if_empty")
 63 |         .and_then(|s| s.parse::<bool>().ok())
 64 |         .unwrap_or(true);
 65 | 
 66 |     let ignore_errors = options
 67 |         .remove("scan_csv_ignore_errors")
 68 |         .and_then(|s| s.parse::<bool>().ok())
 69 |         .unwrap_or(false);
 70 | 
 71 |     let has_header = options
 72 |         .remove("scan_csv_has_header")
 73 |         .and_then(|s| s.parse::<bool>().ok())
 74 |         .unwrap_or(true);
 75 | 
 76 |     let missing_is_null = options
 77 |         .remove("scan_csv_missing_is_null")
 78 |         .and_then(|s| s.parse::<bool>().ok())
 79 |         .unwrap_or(true);
 80 | 
 81 |     let truncate_ragged_lines = options
 82 |         .remove("scan_csv_truncate_ragged_lines")
 83 |         .and_then(|s| s.parse::<bool>().ok())
 84 |         .unwrap_or(false);
 85 | 
 86 |     let try_parse_dates = options
 87 |         .remove("scan_csv_try_parse_dates")
 88 |         .and_then(|s| s.parse::<bool>().ok())
 89 |         .unwrap_or(false);
 90 | 
 91 |     let decimal_comma = options
 92 |         .remove("scan_csv_decimal_comma")
 93 |         .and_then(|s| s.parse::<bool>().ok())
 94 |         .unwrap_or(false);
 95 | 
 96 |     let chunk_size = options
 97 |         .remove("scan_csv_chunk_size")
 98 |         .and_then(|s| s.parse::<usize>().ok())
 99 |         .unwrap_or(1 << 18);
100 | 
101 |     let skip_rows = options
102 |         .remove("scan_csv_skip_rows")
103 |         .and_then(|s| s.parse::<usize>().ok())
104 |         .unwrap_or(0);
105 | 
106 |     let skip_rows_after_header = options
107 |         .remove("scan_csv_skip_rows_after_header")
108 |         .and_then(|s| s.parse::<usize>().ok())
109 |         .unwrap_or(0);
110 | 
111 |     let infer_schema_length = options
112 |         .remove("scan_csv_skip_infer_schema_length")
113 |         .and_then(|s| s.parse::<usize>().ok())
114 |         .map_or(Some(100), Some);
115 | 
116 |     let separator = options
117 |         .remove("scan_csv_separator")
118 |         .and_then(|s| s.parse::<u8>().ok())
119 |         .unwrap_or(b',');
120 | 
121 |     let eol_char = options
122 |         .remove("scan_csv_eol_char")
123 |         .and_then(|s| s.parse::<u8>().ok())
124 |         .unwrap_or(b'\n');
125 | 
126 |     let quote_char = options
127 |         .remove("scan_csv_quote_char")
128 |         .and_then(|s| s.parse::<u8>().ok())
129 |         .map_or(Some(b'"'), Some);
130 | 
131 |     let encoding = options
132 |         .remove("scan_csv_encoding")
133 |         .map(|s| match s.as_str() {
134 |             "lossy_utf8" => CsvEncoding::LossyUtf8,
135 |             _ => CsvEncoding::Utf8,
136 |         })
137 |         .unwrap_or_default();
138 | 
139 |     let null_value = options
140 |         .remove("scan_csv_null_value")
141 |         .map(|s| NullValues::AllColumnsSingle(s.as_str().into()));
142 | 
143 |     let comment_prefix = options
144 |         .remove("scan_csv_comment_prefix")
145 |         .map(PlSmallStr::from);
146 | 
147 |     let paths_vec: Vec<PathBuf> = JavaArrayToVec::to_vec(&mut env, paths)
148 |         .into_iter()
149 |         .map(|o| JObject::from_raw(o))
150 |         .map(|o| get_file_path(&mut env, JString::from(o)))
151 |         .map(PathBuf::from)
152 |         .collect();
153 | 
154 |     let first_path = paths_vec
155 |         .first()
156 |         .and_then(|p| p.to_str())
157 |         .context("Failed to get first path from provided list of paths")
158 |         .unwrap_or_throw(&mut env);
159 | 
160 |     let cloud_options = CloudOptions::from_untyped_config(first_path, &options).ok();
161 | 
162 |     let ldf = LazyCsvReader::new_paths(Arc::from(paths_vec.into_boxed_slice()))
163 |         .with_glob(glob)
164 |         .with_cache(cache)
165 |         .with_include_file_paths(file_path_col)
166 |         .with_low_memory(low_memory)
167 |         .with_rechunk(rechunk)
168 |         .with_n_rows(n_rows)
169 |         .with_row_index(row_index)
170 |         .with_raise_if_empty(raise_if_empty)
171 |         .with_ignore_errors(ignore_errors)
172 |         .with_has_header(has_header)
173 |         .with_missing_is_null(missing_is_null)
174 |         .with_truncate_ragged_lines(truncate_ragged_lines)
175 |         .with_try_parse_dates(try_parse_dates)
176 |         .with_decimal_comma(decimal_comma)
177 |         .with_chunk_size(chunk_size)
178 |         .with_skip_rows(skip_rows)
179 |         .with_skip_rows_after_header(skip_rows_after_header)
180 |         .with_infer_schema_length(infer_schema_length)
181 |         .with_separator(separator)
182 |         .with_quote_char(quote_char)
183 |         .with_eol_char(eol_char)
184 |         .with_encoding(encoding)
185 |         .with_null_values(null_value)
186 |         .with_comment_prefix(comment_prefix)
187 |         .with_cloud_options(cloud_options)
188 |         .finish()
189 |         .context("Failed to perform csv scan")
190 |         .unwrap_or_throw(&mut env);
191 | 
192 |     to_ptr(ldf)
193 | }
194 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/scan/ipc.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | use std::sync::Arc;
 3 | 
 4 | use anyhow::Context;
 5 | use jni::objects::{JClass, JObject, JObjectArray, JString};
 6 | use jni::sys::jlong;
 7 | use jni::JNIEnv;
 8 | use jni_fn::jni_fn;
 9 | use polars::io::cloud::CloudOptions;
10 | use polars::io::{HiveOptions, RowIndex};
11 | use polars::prelude::*;
12 | 
13 | use crate::internal_jni::io::{get_file_path, parse_json_to_options};
14 | use crate::internal_jni::utils::{to_ptr, JavaArrayToVec};
15 | use crate::utils::error::ResultExt;
16 | 
17 | #[jni_fn("org.polars.scala.polars.internal.jni.io.scan$")]
18 | pub unsafe fn scanIPC(mut env: JNIEnv, _: JClass, paths: JObjectArray, options: JString) -> jlong {
19 |     let mut options = parse_json_to_options(&mut env, options);
20 | 
21 |     let n_rows = options
22 |         .remove("scan_ipc_n_rows")
23 |         .and_then(|s| s.parse::<usize>().ok());
24 | 
25 |     let cache = options
26 |         .remove("scan_ipc_cache")
27 |         .and_then(|s| s.parse::<bool>().ok())
28 |         .unwrap_or(true);
29 | 
30 |     let rechunk = options
31 |         .remove("scan_ipc_rechunk")
32 |         .and_then(|s| s.parse::<bool>().ok())
33 |         .unwrap_or(false);
34 | 
35 |     let row_index_offset = options
36 |         .remove("scan_ipc_row_index_offset")
37 |         .and_then(|s| s.parse::<u32>().ok())
38 |         .unwrap_or(0);
39 | 
40 |     let row_index = options
41 |         .remove("scan_ipc_row_index_name")
42 |         .map(|name| RowIndex {
43 |             name: name.into(),
44 |             offset: row_index_offset,
45 |         });
46 | 
47 |     let file_path_col = options
48 |         .remove("scan_ipc_include_file_paths")
49 |         .map(PlSmallStr::from_string);
50 | 
51 |     let hive_scan_partitions = options
52 |         .remove("scan_ipc_hive_scan_partitions")
53 |         .and_then(|s| s.parse::<bool>().ok())
54 |         .map_or(Some(true), Some);
55 | 
56 |     let hive_try_parse_dates = options
57 |         .remove("scan_ipc_hive_try_parse_dates")
58 |         .and_then(|s| s.parse::<bool>().ok())
59 |         .unwrap_or(true);
60 | 
61 |     let paths_vec: Vec<PathBuf> = JavaArrayToVec::to_vec(&mut env, paths)
62 |         .into_iter()
63 |         .map(|o| JObject::from_raw(o))
64 |         .map(|o| get_file_path(&mut env, JString::from(o)))
65 |         .map(PathBuf::from)
66 |         .collect();
67 | 
68 |     let first_path = paths_vec
69 |         .first()
70 |         .and_then(|p| p.to_str())
71 |         .context("Failed to get first path from provided list of paths")
72 |         .unwrap_or_throw(&mut env);
73 | 
74 |     let cloud_options = CloudOptions::from_untyped_config(first_path, &options).ok();
75 | 
76 |     let scan_args = ScanArgsIpc {
77 |         n_rows,
78 |         cache,
79 |         rechunk,
80 |         row_index,
81 |         cloud_options,
82 |         hive_options: HiveOptions {
83 |             enabled: hive_scan_partitions,
84 |             hive_start_idx: 0,
85 |             schema: None,
86 |             try_parse_dates: hive_try_parse_dates,
87 |         },
88 |         include_file_paths: file_path_col,
89 |     };
90 | 
91 |     let ldf = LazyFrame::scan_ipc_files(Arc::from(paths_vec.into_boxed_slice()), scan_args)
92 |         .context("Failed to perform ipc scan")
93 |         .unwrap_or_throw(&mut env);
94 | 
95 |     to_ptr(ldf)
96 | }
97 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/scan/json_lines.rs:
--------------------------------------------------------------------------------
  1 | use std::num::NonZeroUsize;
  2 | use std::path::PathBuf;
  3 | use std::str::FromStr;
  4 | use std::sync::Arc;
  5 | 
  6 | use anyhow::Context;
  7 | use jni::objects::{JClass, JObject, JObjectArray, JString};
  8 | use jni::sys::jlong;
  9 | use jni::JNIEnv;
 10 | use jni_fn::jni_fn;
 11 | use polars::io::cloud::CloudOptions;
 12 | use polars::io::RowIndex;
 13 | use polars::prelude::*;
 14 | 
 15 | use crate::internal_jni::io::{get_file_path, parse_json_to_options};
 16 | use crate::internal_jni::utils::{to_ptr, JavaArrayToVec};
 17 | use crate::utils::error::ResultExt;
 18 | 
 19 | #[jni_fn("org.polars.scala.polars.internal.jni.io.scan$")]
 20 | pub unsafe fn scanJsonLines(
 21 |     mut env: JNIEnv,
 22 |     _: JClass,
 23 |     paths: JObjectArray,
 24 |     options: JString,
 25 | ) -> jlong {
 26 |     let mut options = parse_json_to_options(&mut env, options);
 27 | 
 28 |     let n_rows = options
 29 |         .remove("scan_ndjson_n_rows")
 30 |         .and_then(|s| s.parse::<usize>().ok());
 31 | 
 32 |     let row_index_offset = options
 33 |         .remove("scan_ndjson_row_index_offset")
 34 |         .and_then(|s| s.parse::<u32>().ok())
 35 |         .unwrap_or(0);
 36 | 
 37 |     let row_index = options
 38 |         .remove("scan_ndjson_row_index_name")
 39 |         .map(|name| RowIndex {
 40 |             name: name.into(),
 41 |             offset: row_index_offset,
 42 |         });
 43 | 
 44 |     let low_memory = options
 45 |         .remove("scan_ndjson_low_memory")
 46 |         .and_then(|s| s.parse::<bool>().ok())
 47 |         .unwrap_or(false);
 48 | 
 49 |     let rechunk = options
 50 |         .remove("scan_ndjson_rechunk")
 51 |         .and_then(|s| s.parse::<bool>().ok())
 52 |         .unwrap_or(false);
 53 | 
 54 |     let file_path_col = options
 55 |         .remove("scan_ndjson_include_file_paths")
 56 |         .map(PlSmallStr::from_string);
 57 | 
 58 |     let ignore_errors = options
 59 |         .remove("scan_ndjson_ignore_errors")
 60 |         .and_then(|s| s.parse::<bool>().ok())
 61 |         .unwrap_or(false);
 62 | 
 63 |     let batch_size = options
 64 |         .remove("scan_ndjson_batch_size")
 65 |         .and_then(|s| NonZeroUsize::from_str(s.as_str()).ok());
 66 | 
 67 |     let infer_schema_length = options
 68 |         .remove("scan_ndjson_infer_schema_length")
 69 |         .and_then(|s| NonZeroUsize::from_str(s.as_str()).ok())
 70 |         .map_or(NonZeroUsize::new(100), Some);
 71 | 
 72 |     let paths_vec: Vec<PathBuf> = JavaArrayToVec::to_vec(&mut env, paths)
 73 |         .into_iter()
 74 |         .map(|o| JObject::from_raw(o))
 75 |         .map(|o| get_file_path(&mut env, JString::from(o)))
 76 |         .map(PathBuf::from)
 77 |         .collect();
 78 | 
 79 |     let first_path = paths_vec
 80 |         .first()
 81 |         .and_then(|p| p.to_str())
 82 |         .context("Failed to get first path from provided list of paths")
 83 |         .unwrap_or_throw(&mut env);
 84 | 
 85 |     let cloud_options = CloudOptions::from_untyped_config(first_path, &options).ok();
 86 | 
 87 |     let ldf = LazyJsonLineReader::new_paths(Arc::from(paths_vec.into_boxed_slice()))
 88 |         .low_memory(low_memory)
 89 |         .with_rechunk(rechunk)
 90 |         .with_n_rows(n_rows)
 91 |         .with_row_index(row_index)
 92 |         .with_infer_schema_length(infer_schema_length)
 93 |         .with_ignore_errors(ignore_errors)
 94 |         .with_batch_size(batch_size)
 95 |         .with_include_file_paths(file_path_col)
 96 |         .with_cloud_options(cloud_options)
 97 |         .finish()
 98 |         .context("Failed to perform ndjson scan")
 99 |         .unwrap_or_throw(&mut env);
100 | 
101 |     to_ptr(ldf)
102 | }
103 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/scan/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod csv;
2 | pub mod ipc;
3 | pub mod json_lines;
4 | pub mod parquet;
5 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/scan/parquet.rs:
--------------------------------------------------------------------------------
  1 | use std::path::PathBuf;
  2 | use std::sync::Arc;
  3 | 
  4 | use anyhow::Context;
  5 | use jni::objects::{JClass, JObject, JObjectArray, JString};
  6 | use jni::sys::jlong;
  7 | use jni::JNIEnv;
  8 | use jni_fn::jni_fn;
  9 | use polars::io::cloud::CloudOptions;
 10 | use polars::io::{HiveOptions, RowIndex};
 11 | use polars::prelude::*;
 12 | 
 13 | use crate::internal_jni::io::{get_file_path, parse_json_to_options};
 14 | use crate::internal_jni::utils::{to_ptr, JavaArrayToVec};
 15 | use crate::utils::error::ResultExt;
 16 | 
 17 | #[jni_fn("org.polars.scala.polars.internal.jni.io.scan$")]
 18 | pub unsafe fn scanParquet(
 19 |     mut env: JNIEnv,
 20 |     _: JClass,
 21 |     paths: JObjectArray,
 22 |     options: JString,
 23 | ) -> jlong {
 24 |     let mut options = parse_json_to_options(&mut env, options);
 25 | 
 26 |     let n_rows = options
 27 |         .remove("scan_parquet_n_rows")
 28 |         .and_then(|s| s.parse::<usize>().ok());
 29 | 
 30 |     let parallel = options
 31 |         .remove("scan_parquet_parallel")
 32 |         .map(|s| match s.as_str() {
 33 |             "columns" => ParallelStrategy::Columns,
 34 |             "prefiltered" => ParallelStrategy::Prefiltered,
 35 |             "row_groups" => ParallelStrategy::RowGroups,
 36 |             "none" => ParallelStrategy::None,
 37 |             _ => ParallelStrategy::default(),
 38 |         })
 39 |         .unwrap_or_default();
 40 | 
 41 |     let row_index_offset = options
 42 |         .remove("scan_parquet_row_index_offset")
 43 |         .and_then(|s| s.parse::<u32>().ok())
 44 |         .unwrap_or(0);
 45 | 
 46 |     let row_index = options
 47 |         .remove("scan_parquet_row_index_name")
 48 |         .map(|name| RowIndex {
 49 |             name: name.into(),
 50 |             offset: row_index_offset,
 51 |         });
 52 | 
 53 |     let use_statistics = options
 54 |         .remove("scan_parquet_use_statistics")
 55 |         .and_then(|s| s.parse::<bool>().ok())
 56 |         .unwrap_or(true);
 57 | 
 58 |     let cache = options
 59 |         .remove("scan_parquet_cache")
 60 |         .and_then(|s| s.parse::<bool>().ok())
 61 |         .unwrap_or(true);
 62 | 
 63 |     let glob = options
 64 |         .remove("scan_parquet_glob")
 65 |         .and_then(|s| s.parse::<bool>().ok())
 66 |         .unwrap_or(true);
 67 | 
 68 |     let low_memory = options
 69 |         .remove("scan_parquet_low_memory")
 70 |         .and_then(|s| s.parse::<bool>().ok())
 71 |         .unwrap_or(false);
 72 | 
 73 |     let rechunk = options
 74 |         .remove("scan_parquet_rechunk")
 75 |         .and_then(|s| s.parse::<bool>().ok())
 76 |         .unwrap_or(false);
 77 | 
 78 |     let allow_missing_columns = options
 79 |         .remove("scan_parquet_allow_missing_columns")
 80 |         .and_then(|s| s.parse::<bool>().ok())
 81 |         .unwrap_or(false);
 82 | 
 83 |     let file_path_col = options
 84 |         .remove("scan_parquet_include_file_paths")
 85 |         .map(PlSmallStr::from_string);
 86 | 
 87 |     let hive_scan_partitions = options
 88 |         .remove("scan_parquet_hive_scan_partitions")
 89 |         .and_then(|s| s.parse::<bool>().ok())
 90 |         .map_or(Some(true), Some);
 91 | 
 92 |     let hive_try_parse_dates = options
 93 |         .remove("scan_parquet_hive_try_parse_dates")
 94 |         .and_then(|s| s.parse::<bool>().ok())
 95 |         .unwrap_or(true);
 96 | 
 97 |     let paths_vec: Vec<PathBuf> = JavaArrayToVec::to_vec(&mut env, paths)
 98 |         .into_iter()
 99 |         .map(|o| JObject::from_raw(o))
100 |         .map(|o| get_file_path(&mut env, JString::from(o)))
101 |         .map(PathBuf::from)
102 |         .collect();
103 | 
104 |     let first_path = paths_vec
105 |         .first()
106 |         .and_then(|p| p.to_str())
107 |         .context("Failed to get first path from provided list of paths")
108 |         .unwrap_or_throw(&mut env);
109 | 
110 |     let cloud_options = CloudOptions::from_untyped_config(first_path, &options).ok();
111 | 
112 |     let scan_args = ScanArgsParquet {
113 |         n_rows,
114 |         parallel,
115 |         row_index,
116 |         use_statistics,
117 |         cache,
118 |         glob,
119 |         low_memory,
120 |         rechunk,
121 |         allow_missing_columns,
122 |         cloud_options,
123 |         include_file_paths: file_path_col,
124 |         hive_options: HiveOptions {
125 |             enabled: hive_scan_partitions,
126 |             hive_start_idx: 0,
127 |             schema: None,
128 |             try_parse_dates: hive_try_parse_dates,
129 |         },
130 |         schema: None,
131 |     };
132 | 
133 |     let ldf = LazyFrame::scan_parquet_files(Arc::from(paths_vec.into_boxed_slice()), scan_args)
134 |         .context("Failed to perform parquet scan")
135 |         .unwrap_or_throw(&mut env);
136 | 
137 |     to_ptr(ldf)
138 | }
139 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/write/avro.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | 
 3 | use anyhow::Context;
 4 | use jni::objects::{JObject, JString};
 5 | use jni::JNIEnv;
 6 | use jni_fn::jni_fn;
 7 | use polars::io::avro::{AvroCompression, AvroWriter};
 8 | use polars::prelude::*;
 9 | 
10 | use crate::internal_jni::io::parse_json_to_options;
11 | use crate::internal_jni::io::write::get_df_and_writer;
12 | use crate::utils::error::ResultExt;
13 | 
14 | fn parse_avro_compression(compression: Option<String>) -> Option<AvroCompression> {
15 |     match compression {
16 |         Some(t) => match t.to_lowercase().as_str() {
17 |             "uncompressed" => None,
18 |             "deflate" => Some(AvroCompression::Deflate),
19 |             "snappy" => Some(AvroCompression::Snappy),
20 |             e => {
21 |                 polars_warn!(format!(
22 |                     "Compression must be one of {{'uncompressed', 'deflate', 'snappy'}}, got {e}. Using defaults."
23 |                 ));
24 |                 None
25 |             },
26 |         },
27 |         _ => None,
28 |     }
29 | }
30 | 
31 | #[jni_fn("org.polars.scala.polars.internal.jni.io.write$")]
32 | pub fn writeAvro(
33 |     mut env: JNIEnv,
34 |     _object: JObject,
35 |     df_ptr: *mut DataFrame,
36 |     filePath: JString,
37 |     options: JString,
38 | ) {
39 |     let mut options = parse_json_to_options(&mut env, options);
40 | 
41 |     let record_name = options.remove("write_avro_record_name");
42 | 
43 |     let overwrite_mode = options
44 |         .remove("write_mode")
45 |         .map(|s| matches!(s.to_lowercase().as_str(), "overwrite"))
46 |         .unwrap_or(false);
47 | 
48 |     let compression = options.remove("write_compression");
49 | 
50 |     let (mut dataframe, writer) =
51 |         get_df_and_writer(&mut env, df_ptr, filePath, overwrite_mode, options);
52 | 
53 |     let avro_compression = parse_avro_compression(compression);
54 | 
55 |     let mut avro_writer = AvroWriter::new(writer).with_compression(avro_compression);
56 | 
57 |     if let Some(value) = record_name {
58 |         avro_writer = avro_writer.with_name(value)
59 |     }
60 | 
61 |     avro_writer
62 |         .finish(&mut dataframe)
63 |         .context("Failed to write Avro data")
64 |         .unwrap_or_throw(&mut env);
65 | }
66 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/write/csv.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use anyhow::Context;
  4 | use jni::objects::{JObject, JString};
  5 | use jni::JNIEnv;
  6 | use jni_fn::jni_fn;
  7 | use polars::prelude::*;
  8 | 
  9 | use crate::internal_jni::io::parse_json_to_options;
 10 | use crate::internal_jni::io::write::get_df_and_writer;
 11 | use crate::utils::error::ResultExt;
 12 | 
 13 | #[jni_fn("org.polars.scala.polars.internal.jni.io.write$")]
 14 | pub fn writeCSV(
 15 |     mut env: JNIEnv,
 16 |     _object: JObject,
 17 |     df_ptr: *mut DataFrame,
 18 |     filePath: JString,
 19 |     options: JString,
 20 | ) {
 21 |     let mut options = parse_json_to_options(&mut env, options);
 22 | 
 23 |     let include_bom = options
 24 |         .remove("write_csv_include_bom")
 25 |         .and_then(|s| s.parse::<bool>().ok());
 26 | 
 27 |     let include_header = options
 28 |         .remove("write_csv_include_header")
 29 |         .and_then(|s| s.parse::<bool>().ok());
 30 | 
 31 |     let float_scientific = options
 32 |         .remove("write_csv_float_scientific")
 33 |         .and_then(|s| s.parse::<bool>().ok());
 34 | 
 35 |     let float_precision = options
 36 |         .remove("write_csv_float_precision")
 37 |         .and_then(|s| s.parse::<usize>().ok());
 38 | 
 39 |     let separator = options
 40 |         .remove("write_csv_separator")
 41 |         .and_then(|s| s.parse::<u8>().ok());
 42 | 
 43 |     let quote_char = options
 44 |         .remove("write_csv_quote_char")
 45 |         .and_then(|s| s.parse::<u8>().ok());
 46 | 
 47 |     let date_format = options.remove("write_csv_date_format");
 48 |     let time_format = options.remove("write_csv_time_format");
 49 |     let datetime_format = options.remove("write_csv_datetime_format");
 50 | 
 51 |     let line_terminator = options.remove("write_csv_line_terminator");
 52 |     let null_value = options.remove("write_csv_null_value");
 53 | 
 54 |     let quote_style = options
 55 |         .remove("write_csv_quote_style")
 56 |         .map(|s| match s.as_str() {
 57 |             "always" => QuoteStyle::Always,
 58 |             "non_numeric" => QuoteStyle::NonNumeric,
 59 |             "never" => QuoteStyle::Never,
 60 |             _ => QuoteStyle::Necessary,
 61 |         });
 62 | 
 63 |     let overwrite_mode = options
 64 |         .remove("write_mode")
 65 |         .map(|s| matches!(s.to_lowercase().as_str(), "overwrite"))
 66 |         .unwrap_or(false);
 67 | 
 68 |     let (mut dataframe, writer) =
 69 |         get_df_and_writer(&mut env, df_ptr, filePath, overwrite_mode, options);
 70 | 
 71 |     let mut csv_writer = CsvWriter::new(writer)
 72 |         .with_date_format(date_format)
 73 |         .with_time_format(time_format)
 74 |         .with_datetime_format(datetime_format)
 75 |         .with_float_precision(float_precision)
 76 |         .with_float_scientific(float_scientific);
 77 | 
 78 |     if let Some(value) = include_bom {
 79 |         csv_writer = csv_writer.include_bom(value)
 80 |     }
 81 | 
 82 |     if let Some(value) = include_header {
 83 |         csv_writer = csv_writer.include_header(value)
 84 |     }
 85 | 
 86 |     if let Some(value) = separator {
 87 |         csv_writer = csv_writer.with_separator(value)
 88 |     }
 89 | 
 90 |     if let Some(value) = quote_char {
 91 |         csv_writer = csv_writer.with_quote_char(value)
 92 |     }
 93 | 
 94 |     if let Some(value) = line_terminator {
 95 |         csv_writer = csv_writer.with_line_terminator(value)
 96 |     }
 97 | 
 98 |     if let Some(value) = null_value {
 99 |         csv_writer = csv_writer.with_null_value(value)
100 |     }
101 | 
102 |     if let Some(value) = quote_style {
103 |         csv_writer = csv_writer.with_quote_style(value)
104 |     }
105 | 
106 |     csv_writer
107 |         .finish(&mut dataframe)
108 |         .context("Failed to write CSV data")
109 |         .unwrap_or_throw(&mut env);
110 | }
111 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/write/ipc.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | 
 3 | use anyhow::Context;
 4 | use jni::objects::{JObject, JString};
 5 | use jni::JNIEnv;
 6 | use jni_fn::jni_fn;
 7 | use polars::prelude::*;
 8 | 
 9 | use crate::internal_jni::io::parse_json_to_options;
10 | use crate::internal_jni::io::write::get_df_and_writer;
11 | use crate::utils::error::ResultExt;
12 | 
13 | fn parse_ipc_compression(compression: Option<String>) -> Option<IpcCompression> {
14 |     match compression {
15 |         Some(t) => match t.to_lowercase().as_str() {
16 |             "uncompressed" => None,
17 |             "lz4" => Some(IpcCompression::LZ4),
18 |             "zstd" => Some(IpcCompression::ZSTD),
19 |             e => {
20 |                 polars_warn!(format!(
21 |                     "Compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {e}. Using defaults."
22 |                 ));
23 |                 None
24 |             },
25 |         },
26 |         _ => None,
27 |     }
28 | }
29 | 
30 | #[jni_fn("org.polars.scala.polars.internal.jni.io.write$")]
31 | pub fn writeIPC(
32 |     mut env: JNIEnv,
33 |     _object: JObject,
34 |     df_ptr: *mut DataFrame,
35 |     filePath: JString,
36 |     options: JString,
37 | ) {
38 |     let mut options = parse_json_to_options(&mut env, options);
39 | 
40 |     let compat_level =
41 |         options
42 |             .remove("write_ipc_compat_level")
43 |             .map(|s| match s.to_lowercase().as_str() {
44 |                 "newest" => CompatLevel::newest(),
45 |                 _ => CompatLevel::oldest(),
46 |             });
47 | 
48 |     let overwrite_mode = options
49 |         .remove("write_mode")
50 |         .map(|s| matches!(s.to_lowercase().as_str(), "overwrite"))
51 |         .unwrap_or(false);
52 | 
53 |     let compression = options.remove("write_compression");
54 | 
55 |     let (mut dataframe, writer) =
56 |         get_df_and_writer(&mut env, df_ptr, filePath, overwrite_mode, options);
57 | 
58 |     let ipc_compression = parse_ipc_compression(compression);
59 | 
60 |     let mut ipc_writer = IpcWriter::new(writer).with_compression(ipc_compression);
61 | 
62 |     if let Some(value) = compat_level {
63 |         ipc_writer = ipc_writer.with_compat_level(value)
64 |     }
65 | 
66 |     ipc_writer
67 |         .finish(&mut dataframe)
68 |         .context("Failed to write IPC data")
69 |         .unwrap_or_throw(&mut env);
70 | }
71 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/write/json.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | 
 3 | use anyhow::Context;
 4 | use jni::objects::{JObject, JString};
 5 | use jni::JNIEnv;
 6 | use jni_fn::jni_fn;
 7 | use polars::prelude::*;
 8 | 
 9 | use crate::internal_jni::io::parse_json_to_options;
10 | use crate::internal_jni::io::write::get_df_and_writer;
11 | use crate::utils::error::ResultExt;
12 | 
13 | #[jni_fn("org.polars.scala.polars.internal.jni.io.write$")]
14 | pub fn writeJson(
15 |     mut env: JNIEnv,
16 |     _object: JObject,
17 |     df_ptr: *mut DataFrame,
18 |     filePath: JString,
19 |     options: JString,
20 | ) {
21 |     let mut options = parse_json_to_options(&mut env, options);
22 | 
23 |     let json_format = options
24 |         .remove("write_json_format")
25 |         .and_then(|s| match s.to_lowercase().as_str() {
26 |             "json" => Some(JsonFormat::Json),
27 |             "json_lines" => Some(JsonFormat::JsonLines),
28 |             _ => None,
29 |         })
30 |         .unwrap_or(JsonFormat::Json);
31 | 
32 |     let overwrite_mode = options
33 |         .remove("write_mode")
34 |         .map(|s| matches!(s.to_lowercase().as_str(), "overwrite"))
35 |         .unwrap_or(false);
36 | 
37 |     let (mut dataframe, writer) =
38 |         get_df_and_writer(&mut env, df_ptr, filePath, overwrite_mode, options);
39 | 
40 |     let mut json_writer = JsonWriter::new(writer).with_json_format(json_format);
41 | 
42 |     json_writer
43 |         .finish(&mut dataframe)
44 |         .context("Failed to write JSON data")
45 |         .unwrap_or_throw(&mut env);
46 | }
47 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/write/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod avro;
 2 | pub mod csv;
 3 | pub mod ipc;
 4 | pub mod json;
 5 | pub mod parquet;
 6 | 
 7 | use std::sync::Arc;
 8 | 
 9 | use anyhow::Context;
10 | use jni::objects::JString;
11 | use jni::JNIEnv;
12 | use object_store::path::Path;
13 | use object_store::ObjectStore;
14 | use polars::io::cloud::{build_object_store, CloudOptions, CloudWriter};
15 | use polars::io::pl_async::get_runtime;
16 | use polars::prelude::*;
17 | 
18 | use super::get_file_path;
19 | use crate::utils::error::ResultExt;
20 | 
21 | async fn ensure_write_mode(
22 |     object_store_ref: &Arc<dyn ObjectStore>,
23 |     uri: &str,
24 |     prefix: &str,
25 |     overwrite_mode: bool,
26 | ) -> PolarsResult<()> {
27 |     let meta = object_store_ref.head(&Path::from(prefix)).await;
28 |     match meta {
29 |         Err(object_store::Error::NotFound { .. }) => Ok(()),
30 |         Err(e) => Err(PolarsError::IO {
31 |             error: Arc::new(e.into()),
32 |             msg: Some("Failed to connect to object store, recheck the provided options".into()),
33 |         }),
34 |         Ok(_) if !overwrite_mode => Err(
35 |             polars_err!(ComputeError: "File already exists at the provided location `{uri}` and overwrite option is not set"),
36 |         ),
37 |         _ => Ok(()),
38 |     }
39 | }
40 | 
41 | async fn create_cloud_writer(
42 |     uri: &str,
43 |     cloud_options: Option<&CloudOptions>,
44 |     overwrite_mode: bool,
45 | ) -> PolarsResult<CloudWriter> {
46 |     let (cloud_location, object_store) = build_object_store(uri, cloud_options, false).await?;
47 |     let dyn_store = object_store.to_dyn_object_store().await;
48 |     ensure_write_mode(
49 |         &dyn_store,
50 |         uri,
51 |         cloud_location.prefix.as_ref(),
52 |         overwrite_mode,
53 |     )
54 |     .await?;
55 | 
56 |     let cloud_writer = CloudWriter::new_with_object_store(
57 |         dyn_store.clone(),
58 |         cloud_location.prefix.clone().into(),
59 |     )?;
60 | 
61 |     Ok(cloud_writer)
62 | }
63 | 
64 | fn get_df_and_writer(
65 |     env: &mut JNIEnv,
66 |     df_ptr: *mut DataFrame,
67 |     filePath: JString,
68 |     overwrite_mode: bool,
69 |     writer_options: PlHashMap<String, String>,
70 | ) -> (DataFrame, CloudWriter) {
71 |     let full_path = get_file_path(env, filePath);
72 |     let uri = full_path.as_str();
73 | 
74 |     let cloud_options = CloudOptions::from_untyped_config(uri, &writer_options);
75 |     let writer: CloudWriter = get_runtime()
76 |         .block_on_potential_spawn(async {
77 |             create_cloud_writer(uri, cloud_options.ok().as_ref(), overwrite_mode).await
78 |         })
79 |         .context("Failed to create writer")
80 |         .unwrap_or_throw(env);
81 | 
82 |     let dataframe = unsafe { &*df_ptr }.clone();
83 |     (dataframe, writer)
84 | }
85 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/io/write/parquet.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use anyhow::Context;
  4 | use jni::objects::{JObject, JString};
  5 | use jni::JNIEnv;
  6 | use jni_fn::jni_fn;
  7 | use num_traits::ToPrimitive;
  8 | use polars::prelude::*;
  9 | 
 10 | use crate::internal_jni::io::parse_json_to_options;
 11 | use crate::internal_jni::io::write::get_df_and_writer;
 12 | use crate::utils::error::ResultExt;
 13 | 
 14 | fn parse_parquet_compression(
 15 |     compression: Option<String>,
 16 |     compression_level: Option<i32>,
 17 | ) -> Option<ParquetCompression> {
 18 |     match (compression, compression_level) {
 19 |         (Some(t), l) => match t.to_lowercase().as_str() {
 20 |             "uncompressed" => Some(ParquetCompression::Uncompressed),
 21 |             "snappy" => Some(ParquetCompression::Snappy),
 22 |             "lz4" => Some(ParquetCompression::Lz4Raw),
 23 |             "lzo" => Some(ParquetCompression::Lzo),
 24 |             "gzip" => {
 25 |                 let level = l.and_then(|v| GzipLevel::try_new(v.to_u8()?).ok());
 26 |                 Some(ParquetCompression::Gzip(level))
 27 |             },
 28 |             "brotli" => {
 29 |                 let level = l.and_then(|v| BrotliLevel::try_new(v.to_u32()?).ok());
 30 |                 Some(ParquetCompression::Brotli(level))
 31 |             },
 32 |             "zstd" => {
 33 |                 let level = l.and_then(|v| ZstdLevel::try_new(v).ok());
 34 |                 Some(ParquetCompression::Zstd(level))
 35 |             },
 36 |             e => {
 37 |                 polars_warn!(format!("Compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}. Using defaults."));
 38 |                 None
 39 |             },
 40 |         },
 41 |         _ => None,
 42 |     }
 43 | }
 44 | 
 45 | #[jni_fn("org.polars.scala.polars.internal.jni.io.write$")]
 46 | pub fn writeParquet(
 47 |     mut env: JNIEnv,
 48 |     _object: JObject,
 49 |     df_ptr: *mut DataFrame,
 50 |     filePath: JString,
 51 |     options: JString,
 52 | ) {
 53 |     let mut options = parse_json_to_options(&mut env, options);
 54 | 
 55 |     let is_parallel = options
 56 |         .remove("write_parquet_parallel")
 57 |         .and_then(|s| s.parse::<bool>().ok());
 58 | 
 59 |     let data_page_size = options
 60 |         .remove("write_parquet_data_page_size")
 61 |         .and_then(|s| s.parse::<usize>().ok());
 62 | 
 63 |     let row_group_size = options
 64 |         .remove("write_parquet_row_group_size")
 65 |         .and_then(|s| s.parse::<usize>().ok());
 66 | 
 67 |     let overwrite_mode = options
 68 |         .remove("write_mode")
 69 |         .map(|s| matches!(s.to_lowercase().as_str(), "overwrite"))
 70 |         .unwrap_or(false);
 71 | 
 72 |     let compression = options.remove("write_compression");
 73 |     let compression_level = options
 74 |         .remove("write_compression_level")
 75 |         .and_then(|s| s.parse::<i32>().ok());
 76 | 
 77 |     let write_stats = options
 78 |         .remove("write_parquet_stats")
 79 |         .map(|s| match s.as_str() {
 80 |             "full" => StatisticsOptions::full(),
 81 |             "none" => StatisticsOptions::empty(),
 82 |             _ => StatisticsOptions::default(),
 83 |         });
 84 | 
 85 |     let (mut dataframe, writer) =
 86 |         get_df_and_writer(&mut env, df_ptr, filePath, overwrite_mode, options);
 87 | 
 88 |     let parquet_compression = parse_parquet_compression(compression, compression_level);
 89 | 
 90 |     let mut parquet_writer = ParquetWriter::new(writer)
 91 |         .with_data_page_size(data_page_size)
 92 |         .with_row_group_size(row_group_size);
 93 | 
 94 |     if let Some(value) = is_parallel {
 95 |         parquet_writer = parquet_writer.set_parallel(value)
 96 |     }
 97 | 
 98 |     if let Some(value) = write_stats {
 99 |         parquet_writer = parquet_writer.with_statistics(value)
100 |     }
101 | 
102 |     if let Some(value) = parquet_compression {
103 |         parquet_writer = parquet_writer.with_compression(value)
104 |     }
105 | 
106 |     parquet_writer
107 |         .finish(&mut dataframe)
108 |         .context("Failed to write Parquet data")
109 |         .unwrap_or_throw(&mut env);
110 | }
111 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod expr;
2 | pub mod frame;
3 | pub mod io;
4 | pub mod lazy;
5 | pub mod row;
6 | pub mod series;
7 | pub mod utils;
8 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/series.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | 
  3 | use std::iter::Iterator;
  4 | 
  5 | use anyhow::{Context, Error};
  6 | use jni::objects::*;
  7 | use jni::sys::jlong;
  8 | use jni::JNIEnv;
  9 | use jni_fn::jni_fn;
 10 | use polars::export::chrono::{NaiveDate, NaiveDateTime, NaiveTime};
 11 | use polars::prelude::*;
 12 | 
 13 | use crate::internal_jni::utils::{j_string_to_string, to_ptr, JavaArrayToVec};
 14 | use crate::utils::error::ResultExt;
 15 | 
 16 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
 17 | pub unsafe fn new_str_series(
 18 |     mut env: JNIEnv,
 19 |     _: JClass,
 20 |     name: JString,
 21 |     values: JObjectArray,
 22 | ) -> jlong {
 23 |     let data: Vec<String> = JavaArrayToVec::to_vec(&mut env, values)
 24 |         .into_iter()
 25 |         .map(|o| JObject::from_raw(o))
 26 |         .map(|o| {
 27 |             j_string_to_string(
 28 |                 &mut env,
 29 |                 &JString::from(o),
 30 |                 Some("Failed to parse the provided value as a series element"),
 31 |             )
 32 |         })
 33 |         .collect();
 34 | 
 35 |     let series_name = j_string_to_string(
 36 |         &mut env,
 37 |         &name,
 38 |         Some("Failed to parse the provided value as a series name"),
 39 |     );
 40 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
 41 | 
 42 |     to_ptr(series)
 43 | }
 44 | 
 45 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
 46 | pub fn new_long_series(mut env: JNIEnv, _: JClass, name: JString, values: JLongArray) -> jlong {
 47 |     let data = JavaArrayToVec::to_vec(&mut env, values);
 48 | 
 49 |     let series_name = j_string_to_string(
 50 |         &mut env,
 51 |         &name,
 52 |         Some("Failed to parse the provided value as a series name"),
 53 |     );
 54 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
 55 | 
 56 |     to_ptr(series)
 57 | }
 58 | 
 59 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
 60 | pub fn new_int_series(mut env: JNIEnv, _: JClass, name: JString, values: JIntArray) -> jlong {
 61 |     let data = JavaArrayToVec::to_vec(&mut env, values);
 62 | 
 63 |     let series_name = j_string_to_string(
 64 |         &mut env,
 65 |         &name,
 66 |         Some("Failed to parse the provided value as a series name"),
 67 |     );
 68 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
 69 | 
 70 |     to_ptr(series)
 71 | }
 72 | 
 73 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
 74 | pub fn new_float_series(mut env: JNIEnv, _: JClass, name: JString, values: JFloatArray) -> jlong {
 75 |     let data = JavaArrayToVec::to_vec(&mut env, values);
 76 | 
 77 |     let series_name = j_string_to_string(
 78 |         &mut env,
 79 |         &name,
 80 |         Some("Failed to parse the provided value as a series name"),
 81 |     );
 82 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
 83 | 
 84 |     to_ptr(series)
 85 | }
 86 | 
 87 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
 88 | pub fn new_double_series(mut env: JNIEnv, _: JClass, name: JString, values: JDoubleArray) -> jlong {
 89 |     let data = JavaArrayToVec::to_vec(&mut env, values);
 90 | 
 91 |     let series_name = j_string_to_string(
 92 |         &mut env,
 93 |         &name,
 94 |         Some("Failed to parse the provided value as a series name"),
 95 |     );
 96 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
 97 | 
 98 |     to_ptr(series)
 99 | }
100 | 
101 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
102 | pub fn new_boolean_series(
103 |     mut env: JNIEnv,
104 |     _: JClass,
105 |     name: JString,
106 |     values: JBooleanArray,
107 | ) -> jlong {
108 |     let data = JavaArrayToVec::to_vec(&mut env, values);
109 | 
110 |     let series_name = j_string_to_string(
111 |         &mut env,
112 |         &name,
113 |         Some("Failed to parse the provided value as a series name"),
114 |     );
115 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
116 | 
117 |     to_ptr(series)
118 | }
119 | 
120 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
121 | pub unsafe fn new_date_series(
122 |     mut env: JNIEnv,
123 |     _: JClass,
124 |     name: JString,
125 |     values: JObjectArray,
126 | ) -> jlong {
127 |     let data: Vec<NaiveDate> = JavaArrayToVec::to_vec(&mut env, values)
128 |         .into_iter()
129 |         .map(|o| JObject::from_raw(o))
130 |         .map(|o| {
131 |             j_string_to_string(
132 |                 &mut env,
133 |                 &JString::from(o),
134 |                 Some("Failed to parse the provided value as a series element"),
135 |             )
136 |         })
137 |         .map(|s| {
138 |             let lit = s.as_str();
139 |             NaiveDate::parse_from_str(lit, "%Y-%m-%d").context(format!(
140 |                 "Failed to parse value `{}` as date with format `%Y-%m-%d`",
141 |                 lit
142 |             ))
143 |         })
144 |         .collect::<Result<Vec<NaiveDate>, Error>>()
145 |         .unwrap_or_throw(&mut env);
146 | 
147 |     let series_name = j_string_to_string(
148 |         &mut env,
149 |         &name,
150 |         Some("Failed to parse the provided value as a series name"),
151 |     );
152 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
153 | 
154 |     to_ptr(series)
155 | }
156 | 
157 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
158 | pub unsafe fn new_time_series(
159 |     mut env: JNIEnv,
160 |     _: JClass,
161 |     name: JString,
162 |     values: JObjectArray,
163 | ) -> jlong {
164 |     let data: Vec<NaiveTime> = JavaArrayToVec::to_vec(&mut env, values)
165 |         .into_iter()
166 |         .map(|o| JObject::from_raw(o))
167 |         .map(|o| {
168 |             j_string_to_string(
169 |                 &mut env,
170 |                 &JString::from(o),
171 |                 Some("Failed to parse the provided value as a series element"),
172 |             )
173 |         })
174 |         .map(|s| {
175 |             let lit = s.as_str();
176 |             NaiveTime::parse_from_str(lit, "%H:%M:%S%.f").context(format!(
177 |                 "Failed to parse value `{}` as time with format `%H:%M:%S.f`",
178 |                 lit
179 |             ))
180 |         })
181 |         .collect::<Result<Vec<NaiveTime>, Error>>()
182 |         .unwrap_or_throw(&mut env);
183 | 
184 |     let series_name = j_string_to_string(
185 |         &mut env,
186 |         &name,
187 |         Some("Failed to parse the provided value as a series name"),
188 |     );
189 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
190 | 
191 |     to_ptr(series)
192 | }
193 | 
194 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
195 | pub unsafe fn new_datetime_series(
196 |     mut env: JNIEnv,
197 |     _: JClass,
198 |     name: JString,
199 |     values: JObjectArray,
200 | ) -> jlong {
201 |     let data: Vec<NaiveDateTime> = JavaArrayToVec::to_vec(&mut env, values)
202 |         .into_iter()
203 |         .map(|o| JObject::from_raw(o))
204 |         .map(|o| {
205 |             j_string_to_string(
206 |                 &mut env,
207 |                 &JString::from(o),
208 |                 Some("Failed to parse the provided value as a series element"),
209 |             )
210 |         })
211 |         .map(|s| {
212 |             let lit = s.as_str();
213 |             NaiveDateTime::parse_from_str(lit, "%FT%T%.f").context(format!(
214 |                 "Failed to parse value `{}` as datetime with format `%FT%T%.f`",
215 |                 lit
216 |             ))
217 |         })
218 |         .collect::<Result<Vec<NaiveDateTime>, Error>>()
219 |         .unwrap_or_throw(&mut env);
220 | 
221 |     let series_name = j_string_to_string(
222 |         &mut env,
223 |         &name,
224 |         Some("Failed to parse the provided value as a series name"),
225 |     );
226 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
227 | 
228 |     to_ptr(series)
229 | }
230 | 
231 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
232 | pub unsafe fn new_list_series(
233 |     mut env: JNIEnv,
234 |     _: JClass,
235 |     name: JString,
236 |     values: JLongArray,
237 | ) -> jlong {
238 |     let data: Vec<Series> = JavaArrayToVec::to_vec(&mut env, values)
239 |         .into_iter()
240 |         .map(|ptr| (*(ptr as *mut Series)).to_owned())
241 |         .collect();
242 | 
243 |     let series_name = j_string_to_string(
244 |         &mut env,
245 |         &name,
246 |         Some("Failed to parse the provided value as a series name"),
247 |     );
248 |     let series = Series::new(PlSmallStr::from_string(series_name), data);
249 | 
250 |     to_ptr(series)
251 | }
252 | 
253 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
254 | pub unsafe fn new_struct_series(
255 |     mut env: JNIEnv,
256 |     _: JClass,
257 |     name: JString,
258 |     values: JLongArray,
259 | ) -> jlong {
260 |     let data: Vec<Series> = JavaArrayToVec::to_vec(&mut env, values)
261 |         .into_iter()
262 |         .map(|ptr| (*(ptr as *mut Series)).to_owned())
263 |         .collect();
264 | 
265 |     let series_name = j_string_to_string(
266 |         &mut env,
267 |         &name,
268 |         Some("Failed to parse the provided value as a series name"),
269 |     );
270 |     let series = StructChunked::from_series(
271 |         PlSmallStr::from_string(series_name),
272 |         data.len(),
273 |         data.iter(),
274 |     )
275 |     .context("Failed to create struct series from provided list of series")
276 |     .unwrap_or_throw(&mut env)
277 |     .into_series();
278 | 
279 |     to_ptr(series)
280 | }
281 | 
282 | #[jni_fn("org.polars.scala.polars.internal.jni.series$")]
283 | pub unsafe fn show(_: JNIEnv, _: JClass, series_ptr: *mut Series) {
284 |     let series = &*series_ptr;
285 |     println!("{:?}", series)
286 | }
287 | 


--------------------------------------------------------------------------------
/native/src/internal_jni/utils.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Display;
  2 | 
  3 | use anyhow::Context;
  4 | use jni::objects::ReleaseMode::NoCopyBack;
  5 | use jni::objects::*;
  6 | use jni::strings::JNIString;
  7 | use jni::sys::*;
  8 | use jni::JNIEnv;
  9 | 
 10 | use crate::utils::error::ResultExt;
 11 | 
 12 | pub trait JavaArrayToVec {
 13 |     type Output;
 14 |     type InternalType;
 15 | 
 16 |     fn get_elements<'local, 'array, 'other_local, 'env>(
 17 |         env: &'env mut JNIEnv<'local>,
 18 |         array: &'array JPrimitiveArray<'other_local, <Self as JavaArrayToVec>::InternalType>,
 19 |     ) -> AutoElementsCritical<'local, 'other_local, 'array, 'env, Self::InternalType>
 20 |     where
 21 |         <Self as JavaArrayToVec>::InternalType: TypeArray,
 22 |     {
 23 |         unsafe {
 24 |             let mut cloned_env = env.unsafe_clone();
 25 |             env.get_array_elements_critical(array, NoCopyBack)
 26 |                 .context("Failed to get elements of the array")
 27 |                 .unwrap_or_throw(&mut cloned_env)
 28 |         }
 29 |     }
 30 | 
 31 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output>;
 32 | }
 33 | 
 34 | impl JavaArrayToVec for JBooleanArray<'_> {
 35 |     type Output = bool;
 36 |     type InternalType = jboolean;
 37 | 
 38 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output> {
 39 |         let arr = Self::get_elements(env, &array);
 40 |         arr.iter().map(|&jb| jb == JNI_TRUE).collect()
 41 |     }
 42 | }
 43 | 
 44 | impl JavaArrayToVec for JIntArray<'_> {
 45 |     type Output = i32;
 46 |     type InternalType = jint;
 47 | 
 48 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output> {
 49 |         let arr = Self::get_elements(env, &array);
 50 |         arr.iter().copied().collect()
 51 |     }
 52 | }
 53 | 
 54 | impl JavaArrayToVec for JLongArray<'_> {
 55 |     type Output = i64;
 56 |     type InternalType = jlong;
 57 | 
 58 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output> {
 59 |         let arr = Self::get_elements(env, &array);
 60 |         arr.iter().copied().collect()
 61 |     }
 62 | }
 63 | 
 64 | impl JavaArrayToVec for JFloatArray<'_> {
 65 |     type Output = f32;
 66 |     type InternalType = jfloat;
 67 | 
 68 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output> {
 69 |         let arr = Self::get_elements(env, &array);
 70 |         arr.iter().copied().collect()
 71 |     }
 72 | }
 73 | 
 74 | impl JavaArrayToVec for JDoubleArray<'_> {
 75 |     type Output = f64;
 76 |     type InternalType = jdouble;
 77 | 
 78 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output> {
 79 |         let arr = Self::get_elements(env, &array);
 80 |         arr.iter().copied().collect()
 81 |     }
 82 | }
 83 | 
 84 | impl JavaArrayToVec for JObjectArray<'_> {
 85 |     type Output = jobject;
 86 |     type InternalType = jobject;
 87 |     fn to_vec(env: &mut JNIEnv, array: Self) -> Vec<Self::Output> {
 88 |         let len = env
 89 |             .get_array_length(&array)
 90 |             .context("Error getting length of the array")
 91 |             .unwrap_or_throw(env);
 92 |         let mut result = Vec::with_capacity(len as usize);
 93 | 
 94 |         for i in 0..len {
 95 |             let obj = env
 96 |                 .get_object_array_element(&array, i)
 97 |                 .context("Error getting element of the array")
 98 |                 .unwrap_or_throw(env);
 99 |             result.push(obj.into_raw());
100 |         }
101 | 
102 |         result
103 |     }
104 | }
105 | 
106 | pub fn string_to_j_string<T, S: Into<JNIString>>(env: &mut JNIEnv, s: S, msg: Option<T>) -> jstring
107 | where
108 |     T: AsRef<str> + Send + Sync + Display + 'static,
109 | {
110 |     if let Some(c) = msg {
111 |         env.new_string(s).context(c)
112 |     } else {
113 |         env.new_string(s)
114 |             .context("Error converting JString to Rust String")
115 |     }
116 |     .unwrap_or_throw(env)
117 |     .as_raw()
118 | }
119 | 
120 | pub fn j_string_to_string<T>(env: &mut JNIEnv, s: &JString, msg: Option<T>) -> String
121 | where
122 |     T: AsRef<str> + Send + Sync + Display + 'static,
123 | {
124 |     if let Some(c) = msg {
125 |         env.get_string(s).context(c)
126 |     } else {
127 |         env.get_string(s)
128 |             .context("Error converting JString to Rust String")
129 |     }
130 |     .unwrap_or_throw(env)
131 |     .into()
132 | }
133 | 
134 | pub fn get_n_rows(n_rows: jlong) -> Option<usize> {
135 |     if n_rows.is_positive() {
136 |         Some(n_rows as usize)
137 |     } else {
138 |         None
139 |     }
140 | }
141 | 
142 | pub fn to_ptr<T: Clone>(v: T) -> jlong {
143 |     Box::into_raw(Box::new(v.clone())) as jlong
144 | }
145 | 
146 | pub fn find_java_class<'a>(env: &mut JNIEnv<'a>, class: &str) -> JClass<'a> {
147 |     env.find_class(class)
148 |         .context(format!(
149 |             "Error finding Java class for provided value `{class}`"
150 |         ))
151 |         .unwrap_or_throw(env)
152 | }
153 | 


--------------------------------------------------------------------------------
/native/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![allow(non_snake_case)]
 2 | #![allow(clippy::missing_safety_doc)]
 3 | #![allow(clippy::expect_fun_call)]
 4 | 
 5 | use anyhow::Context;
 6 | use internal_jni::utils::{j_string_to_string, string_to_j_string};
 7 | use jni::objects::{JObject, JString};
 8 | use jni::sys::{jboolean, jstring, JNI_TRUE};
 9 | use jni::JNIEnv;
10 | use jni_fn::jni_fn;
11 | use utils::error::ResultExt;
12 | 
13 | pub mod internal_jni;
14 | pub mod utils;
15 | 
16 | #[jni_fn("org.polars.scala.polars.internal.jni.common$")]
17 | pub fn version(mut env: JNIEnv, _object: JObject) -> jstring {
18 |     let cargo_toml_raw = include_str!("../Cargo.toml");
19 |     let cargo_toml_res: anyhow::Result<toml::Table> =
20 |         toml::from_str(cargo_toml_raw).context("context");
21 | 
22 |     cargo_toml_res
23 |         .map(|cargo_toml| {
24 |             let polars_version = cargo_toml
25 |                 .get("dependencies")
26 |                 .and_then(|v| v.get("polars"))
27 |                 .and_then(|v| v.get("version"));
28 | 
29 |             let polars_version = match polars_version {
30 |                 Some(toml::Value::String(s)) => s.as_str(),
31 |                 _ => "unknown",
32 |             };
33 | 
34 |             string_to_j_string(&mut env, polars_version, None::<&str>)
35 |         })
36 |         .context("Failed to get polars_rs version")
37 |         .unwrap_or_throw(&mut env)
38 | }
39 | 
40 | #[jni_fn("org.polars.scala.polars.internal.jni.common$")]
41 | pub fn setConfigs(mut env: JNIEnv, _object: JObject, options: JObject) -> jboolean {
42 |     let map = env
43 |         .get_map(&options)
44 |         .context("Failed to get mapping to rename columns")
45 |         .unwrap_or_throw(&mut env);
46 | 
47 |     let mut map_iterator = map
48 |         .iter(&mut env)
49 |         .context("Failed to get mapping to rename columns")
50 |         .unwrap_or_throw(&mut env);
51 | 
52 |     while let Ok(Some((key, value))) = map_iterator.next(&mut env) {
53 |         let key_str = j_string_to_string(
54 |             &mut env,
55 |             &JString::from(key),
56 |             Some("Failed to parse the provided config key as string"),
57 |         );
58 | 
59 |         let value_str = j_string_to_string(
60 |             &mut env,
61 |             &JString::from(value),
62 |             Some("Failed to parse the provided config value as string"),
63 |         );
64 | 
65 |         std::env::set_var(key_str, value_str);
66 |     }
67 | 
68 |     JNI_TRUE
69 | }
70 | 


--------------------------------------------------------------------------------
/native/src/utils/error.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Error;
 2 | use jni::errors::Result as JniResult;
 3 | use jni::JNIEnv;
 4 | 
 5 | use crate::internal_jni::utils::find_java_class;
 6 | 
 7 | fn format_nested_error(error: &Error) -> String {
 8 |     let mut formatted = String::new();
 9 | 
10 |     for (i, cause) in error.chain().enumerate() {
11 |         if i == 0 {
12 |             formatted.push_str(&format!("{cause}\n",));
13 |         } else {
14 |             formatted.push_str(&format!("  Caused by: {cause}\n",));
15 |         }
16 |     }
17 | 
18 |     formatted.trim_end().to_string()
19 | }
20 | 
21 | pub fn throw_java_exception(env: &mut JNIEnv, err: Error) -> JniResult<()> {
22 |     // Find the Java exception class
23 |     let exception_class = find_java_class(env, "java/lang/RuntimeException");
24 | 
25 |     // Throw the exception with the provided message
26 |     env.throw_new(exception_class, format_nested_error(&err))?;
27 |     Ok(())
28 | }
29 | 
30 | /// Trait to unwrap `Result` or throw an exception.
31 | pub trait ResultExt<T> {
32 |     fn unwrap_or_throw(self, env: &mut JNIEnv) -> T;
33 | }
34 | 
35 | impl<T> ResultExt<T> for Result<T, Error> {
36 |     fn unwrap_or_throw(self, env: &mut JNIEnv) -> T {
37 |         match self {
38 |             Ok(val) => val,
39 |             Err(err) => {
40 |                 // Map the error to a Java exception
41 |                 let _ = throw_java_exception(env, err);
42 | 
43 |                 // Exit early by returning a default value for JNI
44 |                 env.exception_describe().unwrap_or(());
45 |                 std::process::abort();
46 |             },
47 |         }
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/native/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod error;
2 | 


--------------------------------------------------------------------------------
/project/DocSettings.scala:
--------------------------------------------------------------------------------
  1 | import sbt.*
  2 | import sbt.Keys.*
  3 | 
  4 | import sbtunidoc.*
  5 | import sbtunidoc.BaseUnidocPlugin.autoImport.*
  6 | import sbtunidoc.JavaUnidocPlugin.autoImport.*
  7 | import sbtunidoc.ScalaUnidocPlugin.autoImport.*
  8 | 
  9 | /* Borrowed from delta-io/delta */
 10 | 
 11 | object DocSettings {
 12 |   val unidocSourceFilePatterns = settingKey[Seq[SourceFilePattern]](
 13 |     "Patterns to match (simple substring match) against full source file paths. " +
 14 |       "Matched files will be selected for generating API docs."
 15 |   )
 16 | 
 17 |   implicit class PatternsHelper(patterns: Seq[SourceFilePattern]) {
 18 |     def scopeToProject(projectToAdd: Project): Seq[SourceFilePattern] =
 19 |       patterns.map(_.copy(project = Some(projectToAdd)))
 20 |   }
 21 |   implicit class UnidocHelper(val projectToUpdate: Project) {
 22 |     def configureUnidoc(docTitle: String = null): Project =
 23 |       projectToUpdate
 24 |         .enablePlugins(ScalaUnidocPlugin, GenJavadocPlugin, JavaUnidocPlugin)
 25 |         .settings(
 26 |           libraryDependencies ++= Seq(
 27 |             // Ensure genJavaDoc plugin is of the right version that works with Scala 2.12
 28 |             compilerPlugin(
 29 |               "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full
 30 |             )
 31 |           ),
 32 |           generateUnidocSettings(docTitle),
 33 | 
 34 |           // Ensure unidoc is run with tests.
 35 |           (Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value
 36 |         )
 37 | 
 38 |     private def generateUnidocSettings(customDocTitle: String): Def.SettingsDefinition = {
 39 | 
 40 |       val internalFilePattern = Seq("/internal/", "/execution/", "$")
 41 | 
 42 |       // Generate the full doc title
 43 |       def fullDocTitle(projectName: String, version: String, isScalaDoc: Boolean): String = {
 44 |         val namePart = Option(customDocTitle).getOrElse {
 45 |           projectName.split("-").map(_.capitalize).mkString(" ")
 46 |         }
 47 |         val versionPart = version.replaceAll("-SNAPSHOT", "")
 48 |         val langPart = if (isScalaDoc) "Scala API Docs" else "Java API Docs"
 49 |         s"$namePart $versionPart - $langPart"
 50 |       }
 51 | 
 52 |       // Remove source files that does not match the pattern
 53 |       def ignoreUndocumentedSources(
 54 |           allSourceFiles: Seq[Seq[java.io.File]],
 55 |           sourceFilePatternsToKeep: Seq[SourceFilePattern]
 56 |       ): Seq[Seq[java.io.File]] = {
 57 |         if (sourceFilePatternsToKeep.isEmpty) return Nil
 58 | 
 59 |         val projectSrcDirToFilePatternsToKeep = sourceFilePatternsToKeep.map {
 60 |           case SourceFilePattern(dirs, projOption) =>
 61 |             val projectPath = projOption.getOrElse(projectToUpdate).base.getCanonicalPath
 62 |             projectPath -> dirs
 63 |         }.toMap
 64 | 
 65 |         def shouldKeep(path: String): Boolean = {
 66 |           projectSrcDirToFilePatternsToKeep.foreach { case (projBaseDir, filePatterns) =>
 67 |             def isInProjectSrcDir =
 68 |               path.contains(s"$projBaseDir/src") || path.contains(s"$projBaseDir/target/java/")
 69 |             def matchesFilePattern = filePatterns.exists(path.contains(_))
 70 |             def matchesInternalFilePattern = internalFilePattern.exists(path.contains(_))
 71 |             if (isInProjectSrcDir && matchesFilePattern && !matchesInternalFilePattern)
 72 |               return true
 73 |           }
 74 |           false
 75 |         }
 76 |         allSourceFiles.map(_.filter(f => shouldKeep(f.getCanonicalPath)))
 77 |       }
 78 | 
 79 |       val javaUnidocSettings = Seq(
 80 |         // Configure Java unidoc
 81 |         JavaUnidoc / unidoc / javacOptions := Seq(
 82 |           "-public",
 83 |           "-windowtitle",
 84 |           fullDocTitle((projectToUpdate / name).value, version.value, isScalaDoc = false),
 85 |           "-noqualifier",
 86 |           "java.lang",
 87 |           "-tag",
 88 |           "implNote:a:Implementation Note:",
 89 |           "-tag",
 90 |           "apiNote:a:API Note:",
 91 |           "-Xdoclint:none"
 92 |         ),
 93 |         JavaUnidoc / unidoc / unidocAllSources :=
 94 |           ignoreUndocumentedSources(
 95 |             allSourceFiles = (JavaUnidoc / unidoc / unidocAllSources).value,
 96 |             sourceFilePatternsToKeep = unidocSourceFilePatterns.value
 97 |           ),
 98 | 
 99 |         // Settings for plain, old Java doc needed for successful doc generation during publishing.
100 |         Compile / doc / javacOptions ++= Seq(
101 |           "-public",
102 |           "-noqualifier",
103 |           "java.lang",
104 |           "-tag",
105 |           "implNote:a:Implementation Note:",
106 |           "-tag",
107 |           "apiNote:a:API Note:",
108 |           "-Xdoclint:all"
109 |         )
110 |       )
111 | 
112 |       val scalaUnidocSettings = Seq(
113 |         // Configure Scala unidoc
114 |         ScalaUnidoc / unidoc / scalacOptions ++= Seq(
115 |           "-doc-title",
116 |           fullDocTitle((projectToUpdate / name).value, version.value, isScalaDoc = true)
117 |         ),
118 |         ScalaUnidoc / unidoc / unidocAllSources :=
119 |           ignoreUndocumentedSources(
120 |             allSourceFiles = (ScalaUnidoc / unidoc / unidocAllSources).value,
121 |             sourceFilePatternsToKeep = unidocSourceFilePatterns.value
122 |           )
123 |       )
124 | 
125 |       javaUnidocSettings ++ scalaUnidocSettings
126 |     }
127 |   }
128 | 
129 |   /** Patterns are strings to do simple substring matches on the full path of every source file.
130 |     */
131 |   case class SourceFilePattern(patterns: Seq[String], project: Option[Project] = None)
132 | 
133 |   object SourceFilePattern {
134 |     def apply(patterns: String*): SourceFilePattern = SourceFilePattern(patterns.toSeq, None)
135 |   }
136 | 
137 | }
138 | 


--------------------------------------------------------------------------------
/project/ExtraCommands.scala:
--------------------------------------------------------------------------------
 1 | import sbt.*
 2 | import sbt.Keys.*
 3 | 
 4 | import Utils.*
 5 | import com.github.sbt.jni.plugins.JniJavah.autoImport.javah
 6 | 
 7 | object ExtraCommands {
 8 | 
 9 |   lazy val cleanHeaders =
10 |     taskKey[Unit]("Removes all previously generated headers")
11 |   lazy val cargoFmt =
12 |     taskKey[Unit]("Formats native module and its Cargo.toml.")
13 |   lazy val cargoCheck =
14 |     taskKey[Unit]("Checks the formatting of native module and its Cargo.toml.")
15 | 
16 |   lazy val commandAliases: Seq[Setting[_]] = Seq(
17 |     addCommandAlias("cleanAll", ";cleanHeaders; clean; cleanFiles; reload"),
18 |     addCommandAlias("genHeaders", ";cleanHeaders; javah"),
19 |     addCommandAlias("fmtAll", ";scalafmtAll; scalafmtSbt; javafmtAll; cargoFmt; reload"),
20 |     addCommandAlias(
21 |       "fmtCheckAll",
22 |       ";scalafmtCheckAll; scalafmtSbtCheck; javafmtCheckAll; cargoCheck"
23 |     )
24 |   ).flatten
25 | 
26 |   lazy val commands: Seq[Setting[_]] = Seq(
27 |     cleanHeaders := {
28 |       import scala.reflect.io.Directory
29 | 
30 |       val headerDir = (javah / target).value
31 |       val directory = new Directory(headerDir)
32 | 
33 |       directory.deleteRecursively()
34 |       sLog.value.info(s"Removed headers directory $headerDir")
35 |     },
36 |     cargoFmt := {
37 |       val nativeRootDir = nativeRoot.value: @sbtUnchecked
38 |       val cmds = Seq(
39 |         "cargo fix --allow-dirty --allow-staged",
40 |         "cargo sort",
41 |         "cargo fmt --verbose --all"
42 |       )
43 | 
44 |       executeProcesses(cmds, cwd = Some(nativeRootDir), sLog.value, infoOnly = true)
45 |     },
46 |     cargoCheck := {
47 |       val nativeRootDir = nativeRoot.value: @sbtUnchecked
48 |       val cmds = Seq(
49 |         "cargo fmt --check --all",
50 |         "cargo sort --check",
51 |         "cargo clippy -- -D warnings"
52 |       )
53 | 
54 |       executeProcesses(cmds, cwd = Some(nativeRootDir), sLog.value, infoOnly = true)
55 |     }
56 |   )
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/project/GeneralSettings.scala:
--------------------------------------------------------------------------------
 1 | import sbt.*
 2 | import sbt.Keys.*
 3 | 
 4 | import Utils.*
 5 | import sbtassembly.AssemblyPlugin.autoImport.*
 6 | 
 7 | object GeneralSettings {
 8 | 
 9 |   val scala212 = "2.12.20"
10 |   val scala213 = "2.13.15"
11 |   val scala33 = "3.3.4"
12 | 
13 |   val defaultScalaVersion: String = scala213
14 |   val supportedScalaVersions: Seq[String] = Seq(scala212, scala213, scala33)
15 | 
16 |   lazy val commonSettings = Seq(
17 |     organization := "org.polars",
18 |     versionScheme := Some("early-semver"),
19 |     licenses := List("Apache-2.0" -> url("https://www.apache.org/licenses/LICENSE-2.0")),
20 |     developers := List(
21 |       Developer(
22 |         id = "chitralverma",
23 |         name = "Chitral Verma",
24 |         email = "chitral.verma@gmail.com",
25 |         url = url("https://github.com/chitralverma")
26 |       )
27 |     ),
28 |     scalaVersion := defaultScalaVersion,
29 |     crossScalaVersions := supportedScalaVersions,
30 |     scalacOptions ++= Seq(
31 |       "-encoding",
32 |       "utf8",
33 |       "-deprecation",
34 |       "-feature",
35 |       "-language:existentials",
36 |       "-language:implicitConversions",
37 |       "-language:reflectiveCalls",
38 |       "-language:higherKinds",
39 |       "-language:postfixOps",
40 |       "-unchecked",
41 |       "-Xfatal-warnings"
42 |     ) ++ (if (priorTo213(scalaVersion.value)) Seq("-target:jvm-1.8")
43 |           else Seq("-release", "8")),
44 |     fork := true,
45 |     turbo := true,
46 |     assembly / assemblyMergeStrategy := {
47 |       case PathList("META-INF", xs @ _*) => MergeStrategy.discard
48 |       case x => MergeStrategy.first
49 |     }
50 |   )
51 | 
52 |   lazy val settings: Seq[Setting[_]] = Seq(
53 |     name := "scala-polars",
54 |     nativeRoot := baseDirectory.value.toPath.resolveSibling("native").toFile
55 |   )
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/project/NativeBuildSettings.scala:
--------------------------------------------------------------------------------
  1 | import java.nio.file.*
  2 | 
  3 | import sbt.*
  4 | import sbt.Keys.*
  5 | 
  6 | import scala.collection.JavaConverters.*
  7 | import scala.sys.process.*
  8 | 
  9 | import Utils.*
 10 | 
 11 | object NativeBuildSettings {
 12 | 
 13 |   lazy val generateNativeLibrary = taskKey[Unit](
 14 |     "Generates native library using Cargo which can be added as managed resource to classpath."
 15 |   )
 16 | 
 17 |   lazy val managedNativeLibraries = taskKey[Seq[Path]](
 18 |     "Maps locally built, platform-dependant libraries to their locations on the classpath."
 19 |   )
 20 | 
 21 |   lazy val settings: Seq[Setting[_]] = Seq(
 22 |     generateNativeLibrary := Def
 23 |       .taskDyn[Unit] {
 24 |         Def.task {
 25 |           val logger: Logger = sLog.value
 26 | 
 27 |           sys.env.get("SKIP_NATIVE_GENERATION") match {
 28 |             case None =>
 29 |               val processLogger = getProcessLogger(sLog.value, infoOnly = true)
 30 | 
 31 |               val targetTriple = sys.env.getOrElse(
 32 |                 "TARGET_TRIPLE", {
 33 |                   logger.warn(
 34 |                     "Environment variable TARGET_TRIPLE was not set, getting value from `rustc`."
 35 |                   )
 36 | 
 37 |                   s"rustc -vV".!!.split("\n")
 38 |                     .map(_.trim)
 39 |                     .find(_.startsWith("host"))
 40 |                     .map(_.split(" ")(1).trim)
 41 |                     .getOrElse(throw new IllegalStateException("No target triple found."))
 42 |                 }
 43 |               )
 44 | 
 45 |               val arch = targetTriple.toLowerCase(java.util.Locale.ROOT).split("-").head
 46 | 
 47 |               val nativeOutputDir = resourceManaged.value.toPath.resolve(s"native/$arch/")
 48 |               val cargoTomlPath = s"${baseDirectory.value.getParent}/native/Cargo.toml"
 49 | 
 50 |               // Build native project using cargo
 51 |               val cmd =
 52 |                 s"""cargo build
 53 |                    |-Z unstable-options
 54 |                    |--release
 55 |                    |--lib
 56 |                    |--target $targetTriple
 57 |                    |--artifact-dir $nativeOutputDir""".stripMargin.replaceAll("\n", " ")
 58 | 
 59 |               executeProcess(cmd = cmd, cwd = Some(nativeRoot.value), sLog.value, infoOnly = true)
 60 |               logger.success(s"Successfully built native library at location '$nativeOutputDir'")
 61 | 
 62 |               sys.env.get("NATIVE_LIB_LOCATION") match {
 63 |                 case Some(path) =>
 64 |                   val dest = Paths.get(path, arch).toAbsolutePath
 65 |                   logger.info(
 66 |                     "Environment variable NATIVE_LIB_LOCATION is set, " +
 67 |                       s"copying built native library from location '$nativeOutputDir' to '$dest'."
 68 |                   )
 69 | 
 70 |                   IO.copyDirectory(nativeOutputDir.toFile, dest.toFile)
 71 | 
 72 |                 case None =>
 73 |               }
 74 | 
 75 |             case Some(_) =>
 76 |               logger.info(
 77 |                 "Environment variable SKIP_NATIVE_GENERATION is set, skipping cargo build."
 78 |               )
 79 |           }
 80 |         }
 81 |       }
 82 |       .value,
 83 |     managedNativeLibraries := Def
 84 |       .taskDyn[Seq[Path]] {
 85 |         Def.task {
 86 |           val managedLibs = sys.env.get("SKIP_NATIVE_GENERATION") match {
 87 |             case None =>
 88 |               Files
 89 |                 .find(
 90 |                   resourceManaged.value.toPath.resolve("native/"),
 91 |                   Int.MaxValue,
 92 |                   (filePath, _) => filePath.toFile.isFile
 93 |                 )
 94 |                 .iterator()
 95 |                 .asScala
 96 |                 .toSeq
 97 | 
 98 |             case Some(_) => Seq.empty[Path]
 99 |           }
100 | 
101 |           val externalNativeLibs = sys.env.get("NATIVE_LIB_LOCATION") match {
102 |             case Some(path) =>
103 |               Files
104 |                 .find(
105 |                   Paths.get(path),
106 |                   Int.MaxValue,
107 |                   (filePath, _) => filePath.toFile.isFile
108 |                 )
109 |                 .iterator()
110 |                 .asScala
111 |                 .toSeq
112 | 
113 |             case None => Seq.empty[Path]
114 |           }
115 | 
116 |           // Collect paths of built resources to later include in classpath
117 |           (managedLibs ++ externalNativeLibs).distinct.map(_.toAbsolutePath)
118 |         }
119 |       }
120 |       .dependsOn(generateNativeLibrary)
121 |       .value,
122 |     resourceGenerators += Def.task {
123 |       // Add all generated resources to manage resources' classpath
124 |       managedNativeLibraries.value
125 |         .map { path =>
126 |           val pathStr = path.toString
127 |           val arch = path.getParent.getFileName.toString
128 | 
129 |           val libraryFile = path.toFile
130 | 
131 |           // native library as a managed resource file
132 |           val resource = resourceManaged.value / "native" / arch / libraryFile.getName
133 | 
134 |           // copy native library to a managed resource, so that it is always available
135 |           // on the classpath, even when not packaged as a jar
136 |           IO.copyDirectory(libraryFile, resource)
137 | 
138 |           sLog.value.success(
139 |             s"Added resource from location '$pathStr' " +
140 |               s"(size: ${libraryFile.length() / (1024 * 1024)} MBs) to classpath."
141 |           )
142 | 
143 |           resource
144 |         }
145 |     }.taskValue
146 |   )
147 | 
148 | }
149 | 


--------------------------------------------------------------------------------
/project/ProjectDependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt.*
 2 | import sbt.Keys.*
 3 | 
 4 | import Utils.*
 5 | import Versions.*
 6 | 
 7 | object ProjectDependencies {
 8 | 
 9 |   lazy val dependencies: Seq[Setting[_]] = Seq(
10 |     libraryDependencies ++=
11 |       Seq(
12 |         "org.scala-lang.modules" %% "scala-collection-compat" % scalaCollectionCompat,
13 |         "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion,
14 |         "com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion,
15 |         "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % jacksonVersion
16 |       ) ++
17 |         (if (!priorTo213(scalaVersion.value))
18 |            Seq(
19 |              "org.scala-lang.modules" %% "scala-parallel-collections" % scalaParallelCollections
20 |            )
21 |          else Nil) ++
22 |         (
23 |           scalaVersion.value match {
24 |             // Only include scala-reflect for Scala 2
25 |             case v if v.startsWith("2.") => Seq("org.scala-lang" % "scala-reflect" % v)
26 |             // No scala-reflect for Scala 3
27 |             case _ => Seq.empty
28 |           }
29 |         )
30 |   )
31 | 
32 | }
33 | 
34 | object Versions {
35 |   val scalaCollectionCompat = "2.13.0"
36 |   val scalaParallelCollections = "1.1.0"
37 |   val jacksonVersion = "2.18.4"
38 | }
39 | 


--------------------------------------------------------------------------------
/project/PublishingSettings.scala:
--------------------------------------------------------------------------------
 1 | import sbt.*
 2 | import sbt.Keys.*
 3 | 
 4 | object PublishingSettings {
 5 | 
 6 |   lazy val settings: Seq[Setting[_]] = Seq(
 7 |     publish / skip := false,
 8 |     publishArtifact := true,
 9 |     publishMavenStyle := true,
10 |     externalResolvers += "GitHub Package Registry" at "https://maven.pkg.github.com/chitralverma/scala-polars",
11 |     publishTo := Some(
12 |       "GitHub Package Registry" at "https://maven.pkg.github.com/chitralverma/scala-polars"
13 |     ),
14 |     credentials += Credentials(
15 |       realm = "GitHub Package Registry",
16 |       host = "maven.pkg.github.com",
17 |       userName = "chitralverma",
18 |       passwd = sys.env.getOrElse("GITHUB_TOKEN", "")
19 |     )
20 |   )
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/project/Utils.scala:
--------------------------------------------------------------------------------
 1 | import sbt.*
 2 | 
 3 | import scala.sys.process.*
 4 | 
 5 | object Utils {
 6 | 
 7 |   lazy val nativeRoot = taskKey[File]("Directory pointing to the native project root.")
 8 | 
 9 |   def executeProcesses(
10 |       cmds: Seq[String],
11 |       cwd: Option[File] = None,
12 |       logger: Logger,
13 |       infoOnly: Boolean = false,
14 |       extraEnv: Seq[(String, String)] = Nil
15 |   ): Unit = cmds.foreach(cmd => executeProcess(cmd, cwd, logger, infoOnly = true))
16 | 
17 |   def executeProcess(
18 |       cmd: String,
19 |       cwd: Option[File] = None,
20 |       logger: Logger,
21 |       infoOnly: Boolean = false,
22 |       extraEnv: Seq[(String, String)] = Nil
23 |   ): Unit = {
24 |     val exitCode =
25 |       Process(cmd, cwd, extraEnv: _*).run(getProcessLogger(logger, infoOnly)).exitValue()
26 | 
27 |     if (exitCode != 0) {
28 |       logger.error(s"Failed to executed command `$cmd` with exit code $exitCode.")
29 |       System.exit(exitCode)
30 |     } else {
31 |       logger.success(s"Successfully executed command `$cmd`.")
32 |     }
33 |   }
34 | 
35 |   def priorTo213(scalaVersion: String): Boolean =
36 |     CrossVersion.partialVersion(scalaVersion) match {
37 |       case Some((2, minor)) if minor < 13 => true
38 |       case _ => false
39 |     }
40 | 
41 |   def getProcessLogger(logger: Logger, infoOnly: Boolean = false): ProcessLogger =
42 |     ProcessLogger(
43 |       (o: String) => logger.info(o),
44 |       (e: String) => if (infoOnly) logger.info(e) else logger.error(e)
45 |     )
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.10.7
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | addSbtPlugin("com.github.sbt" % "sbt-jni" % "1.7.0")
 2 | 
 3 | addSbtPlugin("com.github.sbt" % "sbt-java-formatter" % "0.10.0")
 4 | 
 5 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.4")
 6 | 
 7 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.3.1")
 8 | 
 9 | addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.6.4")
10 | 
11 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.0")
12 | 
13 | addSbtPlugin("no.arktekk.sbt" % "aether-deploy" % "0.29.1")
14 | 
15 | addSbtPlugin("com.github.sbt" % "sbt-dynver" % "5.1.0")
16 | 
17 | addSbtPlugin("com.github.sbt" % "sbt-ghpages" % "0.8.0")
18 | 
19 | addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0")
20 | 
21 | addSbtPlugin("com.thoughtworks.sbt-api-mappings" % "sbt-api-mappings" % "3.0.2")
22 | 


--------------------------------------------------------------------------------
/version.sbt:
--------------------------------------------------------------------------------
1 | ThisBuild / version := "0.1.0-SNAPSHOT"
2 | 


--------------------------------------------------------------------------------