├── .github ├── release-drafter.yml └── workflows │ ├── ci.yml │ ├── clean.yml │ ├── release-drafter.yml │ └── release.yml ├── .gitignore ├── .scalafix.conf ├── .scalafmt.conf ├── LICENSE ├── README.md ├── build.sbt ├── docs ├── logo.png └── scala-cli │ ├── Datafusion.scala │ ├── IPC.scala │ ├── TabularSchemaEncoder.scala │ ├── TabularVectorSchemaRoot.scala │ ├── ValueVectorCodecs.scala │ ├── ValueVectorCodecsNullable.scala │ └── test.csv ├── modules ├── core │ └── src │ │ ├── main │ │ └── scala │ │ │ └── me │ │ │ └── mnedokushev │ │ │ └── zio │ │ │ └── apache │ │ │ └── arrow │ │ │ └── core │ │ │ ├── Allocator.scala │ │ │ ├── Tabular.scala │ │ │ ├── ValidationError.scala │ │ │ ├── Vector.scala │ │ │ ├── codec │ │ │ ├── DecoderError.scala │ │ │ ├── EncoderError.scala │ │ │ ├── SchemaEncoder.scala │ │ │ ├── SchemaEncoderDeriver.scala │ │ │ ├── ValueDecoder.scala │ │ │ ├── ValueEncoder.scala │ │ │ ├── ValueVectorCodec.scala │ │ │ ├── ValueVectorDecoder.scala │ │ │ ├── ValueVectorDecoderDeriver.scala │ │ │ ├── ValueVectorEncoder.scala │ │ │ ├── ValueVectorEncoderDeriver.scala │ │ │ ├── VectorSchemaRootCodec.scala │ │ │ ├── VectorSchemaRootDecoder.scala │ │ │ ├── VectorSchemaRootDecoderDeriver.scala │ │ │ ├── VectorSchemaRootEncoder.scala │ │ │ ├── VectorSchemaRootEncoderDeriver.scala │ │ │ └── package.scala │ │ │ ├── ipc │ │ │ └── package.scala │ │ │ └── package.scala │ │ └── test │ │ └── scala │ │ └── me │ │ └── mnedokushev │ │ └── zio │ │ └── apache │ │ └── arrow │ │ └── core │ │ ├── Fixtures.scala │ │ ├── TabularSpec.scala │ │ ├── VectorSpec.scala │ │ ├── codec │ │ ├── CodecSpec.scala │ │ └── SchemaEncoderSpec.scala │ │ └── ipc │ │ └── IpcSpec.scala └── datafusion │ └── src │ ├── main │ └── scala │ │ └── me │ │ └── mnedokushev │ │ └── zio │ │ └── apache │ │ └── arrow │ │ └── datafusion │ │ ├── Context.scala │ │ └── Dataframe.scala │ └── test │ ├── resources │ └── test.csv │ └── scala │ └── me │ └── mnedokushev │ └── zio │ └── apache │ └── arrow │ └── datafusion │ └── DataframeSpec.scala └── project ├── BuildHelper.scala ├── Dep.scala ├── build.properties └── plugins.sbt /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION 🌈' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: '🧰 Maintenance' 14 | label: 'chore' 15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 16 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. 17 | version-resolver: 18 | major: 19 | labels: 20 | - 'major' 21 | minor: 22 | labels: 23 | - 'minor' 24 | patch: 25 | labels: 26 | - 'patch' 27 | default: patch 28 | template: | 29 | ## Changes 30 | 31 | $CHANGES -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by sbt-github-actions using the 2 | # githubWorkflowGenerate task. You should add and commit this file to 3 | # your git repository. It goes without saying that you shouldn't edit 4 | # this file by hand! Instead, if you wish to make changes, you should 5 | # change your sbt build configuration to revise the workflow description 6 | # to meet your needs, then regenerate this file. 7 | 8 | name: Continuous Integration 9 | 10 | on: 11 | pull_request: 12 | branches: ['**'] 13 | push: 14 | branches: ['**'] 15 | 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | jobs: 20 | build: 21 | name: Build and Test 22 | strategy: 23 | matrix: 24 | os: [ubuntu-latest] 25 | scala: [2.12.20, 2.13.16, 3.6.4] 26 | java: [temurin@11, temurin@17] 27 | runs-on: ${{ matrix.os }} 28 | steps: 29 | - name: Checkout current branch (full) 30 | uses: actions/checkout@v4 31 | with: 32 | fetch-depth: 0 33 | 34 | - name: Setup Java (temurin@11) 35 | if: matrix.java == 'temurin@11' 36 | uses: actions/setup-java@v4 37 | with: 38 | distribution: temurin 39 | java-version: 11 40 | cache: sbt 41 | 42 | - name: Setup Java (temurin@17) 43 | if: matrix.java == 'temurin@17' 44 | uses: actions/setup-java@v4 45 | with: 46 | distribution: temurin 47 | java-version: 17 48 | cache: sbt 49 | 50 | - name: Setup sbt 51 | uses: sbt/setup-sbt@v1 52 | 53 | - name: Lint Scala code 54 | run: sbt '++ ${{ matrix.scala }}' 'scalafix --check' scalafmtCheckAll 55 | 56 | - name: Check that workflows are up to date 57 | run: sbt '++ ${{ matrix.scala }}' githubWorkflowCheck 58 | 59 | - name: Build project 60 | run: sbt '++ ${{ matrix.scala }}' test 61 | -------------------------------------------------------------------------------- /.github/workflows/clean.yml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by sbt-github-actions using the 2 | # githubWorkflowGenerate task. You should add and commit this file to 3 | # your git repository. It goes without saying that you shouldn't edit 4 | # this file by hand! Instead, if you wish to make changes, you should 5 | # change your sbt build configuration to revise the workflow description 6 | # to meet your needs, then regenerate this file. 7 | 8 | name: Clean 9 | 10 | on: push 11 | 12 | jobs: 13 | delete-artifacts: 14 | name: Delete Artifacts 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - name: Delete artifacts 20 | shell: bash {0} 21 | run: | 22 | # Customize those three lines with your repository and credentials: 23 | REPO=${GITHUB_API_URL}/repos/${{ github.repository }} 24 | 25 | # A shortcut to call GitHub API. 26 | ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; } 27 | 28 | # A temporary file which receives HTTP response headers. 29 | TMPFILE=$(mktemp) 30 | 31 | # An associative array, key: artifact name, value: number of artifacts of that name. 32 | declare -A ARTCOUNT 33 | 34 | # Process all artifacts on this repository, loop on returned "pages". 35 | URL=$REPO/actions/artifacts 36 | while [[ -n "$URL" ]]; do 37 | 38 | # Get current page, get response headers in a temporary file. 39 | JSON=$(ghapi --dump-header $TMPFILE "$URL") 40 | 41 | # Get URL of next page. Will be empty if we are at the last page. 42 | URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*.*//') 43 | rm -f $TMPFILE 44 | 45 | # Number of artifacts on this page: 46 | COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') )) 47 | 48 | # Loop on all artifacts on this page. 49 | for ((i=0; $i < $COUNT; i++)); do 50 | 51 | # Get name of artifact and count instances of this name. 52 | name=$(jq <<<$JSON -r ".artifacts[$i].name?") 53 | ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1)) 54 | 55 | id=$(jq <<<$JSON -r ".artifacts[$i].id?") 56 | size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") )) 57 | printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size 58 | ghapi -X DELETE $REPO/actions/artifacts/$id 59 | done 60 | done 61 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: ['main'] 6 | 7 | jobs: 8 | update_release_draft: 9 | runs-on: ubuntu-22.04 10 | steps: 11 | - uses: release-drafter/release-drafter@v5 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | branches: [master, main] 5 | tags: ["*"] 6 | jobs: 7 | publish: 8 | runs-on: ubuntu-20.04 9 | steps: 10 | - uses: actions/checkout@v3 11 | with: 12 | fetch-depth: 0 13 | - uses: actions/setup-java@v3 14 | with: 15 | distribution: temurin 16 | java-version: 17 17 | cache: sbt 18 | - run: sbt ci-release 19 | env: 20 | PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} 21 | PGP_SECRET: ${{ secrets.PGP_SECRET }} 22 | SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} 23 | SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | .idea/ 4 | .bsp/ 5 | target/ 6 | */target/ 7 | private/ 8 | 9 | .bloop/ 10 | .metals/ 11 | .vscode/ 12 | metals.sbt 13 | .ammonite/ 14 | *.sc 15 | .scala-build 16 | -------------------------------------------------------------------------------- /.scalafix.conf: -------------------------------------------------------------------------------- 1 | rules = [ 2 | DisableSyntax 3 | LeakingImplicitClassVal 4 | NoAutoTupling 5 | NoValInForComprehension 6 | OrganizeImports 7 | ] 8 | 9 | Disable { 10 | ifSynthetic = [ 11 | "scala/Option.option2Iterable" 12 | "scala/Predef.any2stringadd" 13 | ] 14 | } 15 | 16 | OrganizeImports { 17 | # Allign with IntelliJ IDEA so that they don't fight each other 18 | groupedImports = Merge 19 | removeUnused = false 20 | } 21 | 22 | RemoveUnused { 23 | imports = false // handled by OrganizeImports 24 | } 25 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.9.5" 2 | maxColumn = 120 3 | align.preset = most 4 | continuationIndent.defnSite = 2 5 | assumeStandardLibraryStripMargin = true 6 | docstrings.style = Asterisk 7 | lineEndings = preserve 8 | includeCurlyBraceInSelectChains = false 9 | danglingParentheses.preset = true 10 | spaces { 11 | inImportCurlyBraces = true 12 | } 13 | optIn.annotationNewlines = true 14 | runner.dialect = scala3 15 | rewrite.rules = [SortImports, RedundantBraces] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](docs/logo.png) 2 | 3 | 4 | ![Build status](https://github.com/grouzen/zio-apache-arrow/actions/workflows/ci.yml/badge.svg) 5 | ![Sonatype Nexus (Releases)](https://img.shields.io/nexus/r/me.mnedokushev/zio-apache-arrow-core_2.12?server=https%3A%2F%2Foss.sonatype.org) 6 | ![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/me.mnedokushev/zio-apache-arrow-core_2.13?server=https%3A%2F%2Foss.sonatype.org) 7 | [![Scala Steward badge](https://img.shields.io/badge/Scala_Steward-helping-blue.svg?style=flat&logo=)](https://scala-steward.org) 8 | 9 | # ZIO Apache Arrow 10 | 11 | ZIO based wrapper for [Apache Arrow Java Implementation](https://arrow.apache.org/docs/java/index.html) that leverages 12 | [ZIO Schema](https://zio.dev/zio-schema/) library to derive codecs for [ValueVector](https://arrow.apache.org/docs/java/reference/index.html) and [VectorSchemaRoot](https://arrow.apache.org/docs/java/reference/index.html). 13 | 14 | **To the best of my knowledge at the time of writing, this is the only library that provides automatic codecs derivation for Apache Arrow among all type-safe programming languages.** 15 | 16 | Want more? Checkout out my ZIO-powered library for Apache Parquet - [ZIO Apache Parquet](https://github.com/grouzen/zio-apache-parquet). 17 | 18 | ## Why? 19 | 20 | - **ZIO native** - utilizes various ZIO features to offer a FP-oriented way of working with the Arrow API. 21 | - **Resource management** - guarantees an automatic [memory management](https://arrow.apache.org/docs/java/memory.html) by wrapping up operations on [BufferAllocator](https://arrow.apache.org/docs/java/memory.html#bufferallocator) in ZIO's [Scope](https://zio.dev/reference/resource/scope/). 22 | - **ZIO Schema** - automatically derives codecs for Arrow's ValueVector and VectorSchemaRoot data types, enabling a seamless integration with the rest of the ZIO ecosystem. 23 | - **Integration with Arrow ecosystem** - the [Core](#core) module provides the groundwork for integrating your application or library with various parts of the Arrow ecosystem such as [Datafusion](https://datafusion.apache.org), [Polars](https://pola.rs), [Apache Iceberg](https://iceberg.apache.org), and more. 24 | 25 | 26 | ## Contents 27 | 28 | - [Installation](#installation) 29 | - [Usage](#usage) 30 | - [Core](#core) 31 | - [Codecs](#codecs) 32 | - [ValueVector](#valuevector) 33 | - [Built-in primitive and complex types](#built-in-primitive-and-complex-types) 34 | - [Nullable](#nullable) 35 | - [Tabular](#tabular) 36 | - [SchemaEncoder](#schemaencoder) 37 | - [VectorSchemaRoot](#vectorschemaroot) 38 | - [IPC](#ipc) 39 | - [Datafusion](#datafusion) 40 | 41 | ## Installation 42 | 43 | ```scala 44 | libraryDependencies += "me.mnedokushev" %% "zio-apache-arrow-core" % "@VERSION@" 45 | libraryDependencies += "me.mnedokushev" %% "zio-apache-arrow-datafusion" % "@VERSION@" 46 | ``` 47 | 48 | ## Usage 49 | 50 | All examples are self-contained [Scala CLI](https://scala-cli.virtuslab.org) snippets. You can find copies of them in `docs/scala-cli`. 51 | 52 | ### Core 53 | 54 | The module does all the grunt work to implement a familiar FP-oriented API for some of the basic blocks of the Java API of Apache Arrow such as: 55 | - [Memory Management](https://arrow.apache.org/docs/java/memory.html) 56 | - [Value Vector](https://arrow.apache.org/docs/java/vector.html) 57 | - [Tabular data](https://arrow.apache.org/docs/java/vector_schema_root.html) 58 | - [Reading/Writing IPC formats](https://arrow.apache.org/docs/java/ipc.html) 59 | 60 | #### Codecs 61 | 62 | It helps you to avoid dealing with a low-level, convoluted and erorr-prone Java API by providing codecs for a [bunch of primitive](https://zio.dev/zio-schema/standard-type-reference), arbitrary nested complex ([lists](https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout) and [structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)), nullable, and user-defined tabular types. 63 | 64 | ##### ValueVector 65 | 66 | https://arrow.apache.org/docs/java/vector.html 67 | > an abstraction that is used to store a sequence of values having the same type in an individual column. 68 | 69 | ###### Built-in primitive and complex types 70 | 71 | ```scala 72 | //> using scala "3.4.3" 73 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.4 74 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.1.0 75 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 76 | 77 | import me.mnedokushev.zio.apache.arrow.core.Allocator 78 | import me.mnedokushev.zio.apache.arrow.core.codec.* 79 | import zio.* 80 | import zio.schema.* 81 | import zio.schema.Factory.* 82 | 83 | object ValueVectorCodecs extends ZIOAppDefault: 84 | 85 | case class MyData(a: Int, b: String) 86 | 87 | object MyData: 88 | implicit val schema: Schema[MyData] = DeriveSchema.gen[MyData] 89 | 90 | val intCodec = ValueVectorCodec.intCodec 91 | val listStringCodec = ValueVectorCodec.listChunkCodec[String] 92 | val structMyDataCodec = ValueVectorCodec.structCodec[MyData] 93 | 94 | override def run = 95 | ZIO 96 | .scoped( 97 | for { 98 | intVec <- intCodec.encodeZIO(Chunk(1, 2, 3)) 99 | intResult <- intCodec.decodeZIO(intVec) 100 | listStringVec <- listStringCodec.encodeZIO(Chunk(Chunk("a", "b"), Chunk("c"))) 101 | listStringResult <- listStringCodec.decodeZIO(listStringVec) 102 | structMyDataVec <- structMyDataCodec.encodeZIO(Chunk(MyData(1, "a"), MyData(2, "b"))) 103 | structMyDataResult <- structMyDataCodec.decodeZIO(structMyDataVec) 104 | _ <- Console.printLine(intVec) 105 | _ <- Console.printLine(intResult) 106 | _ <- Console.printLine(listStringVec) 107 | _ <- Console.printLine(listStringResult) 108 | _ <- Console.printLine(structMyDataVec) 109 | _ <- Console.printLine(structMyDataResult) 110 | } yield () 111 | ) 112 | .provide(Allocator.rootLayer()) 113 | // Outputs: 114 | // [1, 2, 3] 115 | // Chunk(1,2,3) 116 | // [["a","b"], ["c"]] 117 | // Chunk(Chunk(a,b),Chunk(c)) 118 | // [{"a":1,"b":"a"}, {"a":2,"b":"b"}] 119 | // Chunk(MyData(1,a),MyData(2,b)) 120 | ``` 121 | 122 | ###### Nullable 123 | 124 | ```scala 125 | //> using scala "3.4.3" 126 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.4 127 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.1.0 128 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 129 | 130 | import me.mnedokushev.zio.apache.arrow.core.Allocator 131 | import me.mnedokushev.zio.apache.arrow.core.codec.* 132 | import org.apache.arrow.vector.IntVector 133 | import zio.* 134 | import zio.schema.* 135 | import zio.schema.Factory.* 136 | 137 | object ValueVectorCodecsNullable extends ZIOAppDefault: 138 | 139 | val intCodec = ValueVectorCodec.optionCodec[IntVector, Int] 140 | val listStringCodec = ValueVectorCodec.listChunkOptionCodec[String] 141 | val optionListStringCodec = ValueVectorCodec.optionListChunkCodec[String] 142 | 143 | override def run = 144 | ZIO 145 | .scoped( 146 | for { 147 | intVec <- intCodec.encodeZIO(Chunk(Some(1), None, Some(2))) 148 | intResult <- intCodec.decodeZIO(intVec) 149 | listStringVec <- listStringCodec.encodeZIO(Chunk(Chunk(Some("a"), None), Chunk(Some("b")))) 150 | listStringResult <- listStringCodec.decodeZIO(listStringVec) 151 | optionListStringVec <- optionListStringCodec.encodeZIO(Chunk(Some(Chunk("a", "b")), None, Some(Chunk("c")))) 152 | optionListStringResult <- optionListStringCodec.decodeZIO(optionListStringVec) 153 | _ <- Console.printLine(intVec) 154 | _ <- Console.printLine(intResult) 155 | _ <- Console.printLine(listStringVec) 156 | _ <- Console.printLine(listStringResult) 157 | _ <- Console.printLine(optionListStringVec) 158 | _ <- Console.printLine(optionListStringResult) 159 | } yield () 160 | ) 161 | .provide(Allocator.rootLayer()) 162 | // Outputs: 163 | // [1, null, 2] 164 | // Chunk(Some(1),None,Some(2)) 165 | // [["a",null], ["b"]] 166 | // Chunk(Chunk(Some(a),None),Chunk(None)) 167 | // [["a","b"], null, ["c"]] 168 | // Chunk(Some(Chunk(a,b)),None,Some(Chunk(c))) 169 | ``` 170 | 171 | ##### Tabular 172 | 173 | https://arrow.apache.org/docs/java/vector_schema_root.html 174 | > The recommended usage is to create a single VectorSchemaRoot based on a known schema and populate data over and over into that root in a stream of batches, rather than creating a new instance each time. 175 | 176 | The API is similar to the ValueVectorCodec. The main difference is that it is supposed to be used with user-defined case classes (aka 2D datasets) only. 177 | 178 | ###### SchemaEncoder 179 | 180 | > describes the overall structure consisting of any number of columns. It holds a sequence of fields together with some optional schema-wide metadata (in addition to per-field metadata). 181 | 182 | When working with tabular data, we need a way to convert ZIO Schema into [Arrow Schema](https://arrow.apache.org/docs/java/reference/org/apache/arrow/vector/types/pojo/Schema.html). 183 | 184 | ```scala 185 | //> using scala "3.4.3" 186 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.4 187 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.1.0 188 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 189 | 190 | import me.mnedokushev.zio.apache.arrow.core.codec.* 191 | import zio.* 192 | import zio.schema.* 193 | 194 | object TabularSchemaEncoder extends ZIOAppDefault: 195 | 196 | case class MyData(a: Int, b: String) 197 | 198 | object MyData: 199 | implicit val schema: Schema[MyData] = 200 | DeriveSchema.gen[MyData] 201 | implicit val schemaEncoder: SchemaEncoder[MyData] = 202 | SchemaEncoder.fromDefaultDeriver[MyData] 203 | 204 | override def run = 205 | for { 206 | schema <- ZIO.fromEither(MyData.schemaEncoder.encode(MyData.schema)) 207 | _ <- Console.printLine(schema) 208 | } yield () 209 | // Outputs: 210 | // Schema 211 | ``` 212 | 213 | ###### VectorSchemaRoot 214 | 215 | ```scala 216 | //> using scala "3.4.3" 217 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.4 218 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.1.0 219 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 220 | 221 | import me.mnedokushev.zio.apache.arrow.core.Allocator 222 | import me.mnedokushev.zio.apache.arrow.core.codec.* 223 | import me.mnedokushev.zio.apache.arrow.core.Tabular.* 224 | import me.mnedokushev.zio.apache.arrow.core.Tabular 225 | import zio.* 226 | import zio.schema.* 227 | import zio.schema.Factory.* 228 | 229 | object TabularVectorSchemaRoot extends ZIOAppDefault: 230 | 231 | case class MyData(a: Int, b: String) 232 | 233 | object MyData: 234 | implicit val schema: Schema[MyData] = 235 | DeriveSchema.gen[MyData] 236 | implicit val schemaEncoder: SchemaEncoder[MyData] = 237 | SchemaEncoder.fromDefaultDeriver[MyData] 238 | 239 | val myDataCodec = VectorSchemaRootCodec.codec[MyData] 240 | 241 | override def run = 242 | ZIO 243 | .scoped( 244 | for { 245 | root <- Tabular.empty[MyData] 246 | myDataVec <- myDataCodec.encodeZIO(Chunk(MyData(1, "a"), MyData(2, "b")), root) 247 | myDataResult <- myDataCodec.decodeZIO(myDataVec) 248 | _ <- Console.printLine(myDataResult) 249 | } yield () 250 | ) 251 | .provide(Allocator.rootLayer()) 252 | // Outputs: 253 | // Chunk(MyData(1,a),MyData(2,b)) 254 | ``` 255 | 256 | You also can use methods from `Tabular` to simplify encoding/decoding: 257 | 258 | ```scala 259 | ZIO.scoped( 260 | for { 261 | myDataVec <- Tabular.fromChunk(Chunk(MyData(1, "a"), MyData(2, "b"))) 262 | myDataResult <- Tabular.toChunk(myDataVec) 263 | } yield () 264 | ).provide(Allocator.rootLayer()) 265 | ``` 266 | 267 | #### IPC 268 | 269 | https://arrow.apache.org/docs/java/ipc.html 270 | > Arrow defines two types of binary formats for serializing record batches: 271 | > - Streaming format: for sending an arbitrary number of record batches. The format must be processed from start to end, and does not support random access 272 | > - File or Random Access format: for serializing a fixed number of record batches. It supports random access, and thus is very useful when used with memory maps 273 | 274 | Now, knowing how to define codecs we may use this knowledge to make some real-world serialization/deserialization: 275 | 276 | ```scala 277 | //> using scala "3.4.3" 278 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.4 279 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.1.0 280 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 281 | 282 | import me.mnedokushev.zio.apache.arrow.core.Allocator 283 | import me.mnedokushev.zio.apache.arrow.core.codec.* 284 | import me.mnedokushev.zio.apache.arrow.core.ipc.* 285 | import java.io.ByteArrayInputStream 286 | import zio.* 287 | import zio.stream.* 288 | import zio.schema.* 289 | import zio.schema.Factory.* 290 | 291 | object IPC extends ZIOAppDefault: 292 | 293 | case class MyData(a: Int, b: String) 294 | 295 | object MyData: 296 | implicit val schema: Schema[MyData] = 297 | DeriveSchema.gen[MyData] 298 | implicit val schemaEncoder: SchemaEncoder[MyData] = 299 | SchemaEncoder.fromDefaultDeriver[MyData] 300 | implicit val encoder: VectorSchemaRootDecoder[MyData] = 301 | VectorSchemaRootDecoder.fromDefaultDeriver[MyData] 302 | implicit val decoder: VectorSchemaRootEncoder[MyData] = 303 | VectorSchemaRootEncoder.fromDefaultDeriver[MyData] 304 | 305 | val payload = (1 to 8096).map(i => MyData(i, s"string ${i.toString}")) 306 | 307 | override def run = 308 | ZIO 309 | .scoped( 310 | for { 311 | out <- writeStreaming[Any, MyData](ZStream.from(payload)) 312 | result <- readStreaming[MyData](new ByteArrayInputStream(out.toByteArray)).runCollect 313 | _ <- Console.printLine(result) 314 | } yield () 315 | ) 316 | .provide(Allocator.rootLayer()) 317 | // Outputs: 318 | // Chunk(MyData(1,string 1), ..., MyData(8096,string 8096)) 319 | ``` 320 | 321 | ### Datafusion 322 | 323 | This module provides a thin wrapper around [Apache Arrow's DataFusion library](https://github.com/G-Research/datafusion-java). It enables running SQL queries on data loaded from CSV or Parquet files, and then processing the results using the power of ZIO and ZIO Streams. 324 | 325 | For this example we will use the following CSV file: 326 | ```csv 327 | fname,lname,address,age 328 | Bob,Dylan,Hollywood,80 329 | Dog,Cat,NY,3 330 | John,Doe,London,99 331 | ``` 332 | 333 | ```scala 334 | //> using scala "3.4.3" 335 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.4 336 | //> using dep me.mnedokushev::zio-apache-arrow-datafusion:0.1.4 337 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.1.0 338 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 339 | 340 | import me.mnedokushev.zio.apache.arrow.core.Allocator 341 | import me.mnedokushev.zio.apache.arrow.core.codec.* 342 | import me.mnedokushev.zio.apache.arrow.datafusion.* 343 | import zio.* 344 | import zio.schema.* 345 | 346 | import java.nio.file.Paths 347 | import java.io.File 348 | 349 | object Datafusion extends ZIOAppDefault: 350 | 351 | case class User(fname: String, lname: String, address: String, age: Long) 352 | 353 | object User: 354 | implicit val schema: Schema[User] = 355 | DeriveSchema.gen[User] 356 | implicit val schemaEncoder: SchemaEncoder[User] = 357 | Derive.derive[SchemaEncoder, User](SchemaEncoderDeriver.default) 358 | implicit val vectorSchemaRootDecoder: VectorSchemaRootDecoder[User] = 359 | VectorSchemaRootDecoder.fromDefaultDeriver[User] 360 | 361 | override def run = 362 | ( 363 | ZIO 364 | .serviceWithZIO[Context] { context => 365 | for { 366 | _ <- context.registerCsv("test", Paths.get(new File("test.csv").toURI)) 367 | df <- context.sql("SELECT * FROM test WHERE fname = 'Dog'") 368 | result <- df.collect[User].runCollect 369 | _ <- Console.printLine(result) 370 | } yield () 371 | } 372 | ) 373 | .provide(Context.create, Allocator.rootLayer()) 374 | // Outputs: 375 | // Chunk(User(Dog,Cat,NY,3)) 376 | ``` 377 | 378 | You can also write the data back to CSV or Parquet files using `df.writeCsv` and `df.writeParquet` methods. -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import BuildHelper._ 2 | 3 | inThisBuild( 4 | List( 5 | name := "ZIO Apache Arrow", 6 | organization := "me.mnedokushev", 7 | homepage := Some(url("https://github.com/grouzen/zio-apache-arrow")), 8 | licenses := List("Apache-2.0" -> url("http://www.apache.org/licenses/LICENSE-2.0")), 9 | developers := List( 10 | Developer( 11 | "grouzen", 12 | "Mykhailo Nedokushev", 13 | "michael.nedokushev@gmail.com", 14 | url("https://github.com/grouzen") 15 | ) 16 | ), 17 | scmInfo := Some( 18 | ScmInfo( 19 | url("https://github.com/grouzen/zio-apache-arrow"), 20 | "scm:git:git@github.com:grouzen/zio-apache-arrow.git" 21 | ) 22 | ), 23 | crossScalaVersions := Seq(Scala212, Scala213, Scala3), 24 | githubWorkflowJavaVersions := Seq(JavaSpec.temurin("11"), JavaSpec.temurin("17")), 25 | githubWorkflowPublishTargetBranches := Seq(), 26 | githubWorkflowBuildPreamble := Seq( 27 | WorkflowStep.Sbt( 28 | List( 29 | "scalafix --check", 30 | "scalafmtCheckAll" 31 | ), 32 | name = Some("Lint Scala code") 33 | ) 34 | ) 35 | ) 36 | ) 37 | 38 | lazy val root = 39 | project 40 | .in(file(".")) 41 | .aggregate(core, datafusion) 42 | .settings(publish / skip := true) 43 | 44 | lazy val core = 45 | project 46 | .in(file("modules/core")) 47 | .settings( 48 | stdSettings("core"), 49 | libraryDependencies ++= Dep.core, 50 | testFrameworks += new TestFramework("zio.test.sbt.ZTestFramework") 51 | ) 52 | 53 | lazy val datafusion = 54 | project 55 | .in(file("modules/datafusion")) 56 | .dependsOn(core) 57 | .settings( 58 | stdSettings("datafusion"), 59 | libraryDependencies ++= Dep.datafusion, 60 | testFrameworks += new TestFramework("zio.test.sbt.ZTestFramework") 61 | ) 62 | 63 | //lazy val docs = 64 | // project 65 | // .in(file("docs")) 66 | // .dependsOn(core) 67 | // .settings( 68 | // name := "zio-apache-arrow-docs", 69 | // organization := "me.mnedokushev", 70 | // publish / skip := true, 71 | // mdocIn := file("docs/src/main/mdoc"), 72 | // mdocVariables := Map( 73 | // "VERSION" -> version.value 74 | // ) 75 | // ) 76 | // .enablePlugins(MdocPlugin) 77 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grouzen/zio-apache-arrow/2589613bbf82738ae4ef2d4812b5cff62f5eb281/docs/logo.png -------------------------------------------------------------------------------- /docs/scala-cli/Datafusion.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.4.3" 2 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.8 3 | //> using dep me.mnedokushev::zio-apache-arrow-datafusion:0.1.8 4 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.2.0 5 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 6 | 7 | import me.mnedokushev.zio.apache.arrow.core.Allocator 8 | import me.mnedokushev.zio.apache.arrow.core.codec.* 9 | import me.mnedokushev.zio.apache.arrow.datafusion.* 10 | import zio.* 11 | import zio.schema.* 12 | 13 | import java.nio.file.Paths 14 | import java.io.File 15 | 16 | object Datafusion extends ZIOAppDefault: 17 | 18 | case class User(fname: String, lname: String, address: String, age: Long) 19 | 20 | object User: 21 | implicit val schema: Schema[User] = 22 | DeriveSchema.gen[User] 23 | implicit val schemaEncoder: SchemaEncoder[User] = 24 | Derive.derive[SchemaEncoder, User](SchemaEncoderDeriver.default) 25 | implicit val vectorSchemaRootDecoder: VectorSchemaRootDecoder[User] = 26 | VectorSchemaRootDecoder.fromDefaultDeriver[User] 27 | 28 | override def run = 29 | ( 30 | ZIO 31 | .serviceWithZIO[Context] { context => 32 | for { 33 | _ <- context.registerCsv("test", Paths.get(new File("test.csv").toURI)) 34 | df <- context.sql("SELECT * FROM test WHERE fname = 'Dog'") 35 | result <- df.collect[User].runCollect 36 | _ <- Console.printLine(result) 37 | } yield () 38 | } 39 | ) 40 | .provide(Context.create, Allocator.rootLayer()) 41 | // Outputs: 42 | // Chunk(User(Dog,Cat,NY,3)) 43 | -------------------------------------------------------------------------------- /docs/scala-cli/IPC.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.4.3" 2 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.8 3 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.2.0 4 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 5 | 6 | import me.mnedokushev.zio.apache.arrow.core.Allocator 7 | import me.mnedokushev.zio.apache.arrow.core.codec.* 8 | import me.mnedokushev.zio.apache.arrow.core.ipc.* 9 | import java.io.ByteArrayInputStream 10 | import zio.* 11 | import zio.stream.* 12 | import zio.schema.* 13 | import zio.schema.Factory.* 14 | 15 | object IPC extends ZIOAppDefault: 16 | 17 | case class MyData(a: Int, b: String) 18 | 19 | object MyData: 20 | implicit val schema: Schema[MyData] = 21 | DeriveSchema.gen[MyData] 22 | implicit val schemaEncoder: SchemaEncoder[MyData] = 23 | SchemaEncoder.fromDefaultDeriver[MyData] 24 | implicit val encoder: VectorSchemaRootDecoder[MyData] = 25 | VectorSchemaRootDecoder.fromDefaultDeriver[MyData] 26 | implicit val decoder: VectorSchemaRootEncoder[MyData] = 27 | VectorSchemaRootEncoder.fromDefaultDeriver[MyData] 28 | 29 | val payload = (1 to 8096).map(i => MyData(i, s"string ${i.toString}")) 30 | 31 | override def run = 32 | ZIO 33 | .scoped( 34 | for { 35 | out <- writeStreaming[Any, MyData](ZStream.from(payload)) 36 | result <- readStreaming[MyData](new ByteArrayInputStream(out.toByteArray)).runCollect 37 | _ <- Console.printLine(result) 38 | } yield () 39 | ) 40 | .provide(Allocator.rootLayer()) 41 | // Outputs: 42 | // Chunk(MyData(1,string 1), ..., MyData(8096,string 8096)) 43 | -------------------------------------------------------------------------------- /docs/scala-cli/TabularSchemaEncoder.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.4.3" 2 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.8 3 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.2.0 4 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 5 | 6 | import me.mnedokushev.zio.apache.arrow.core.codec.* 7 | import zio.* 8 | import zio.schema.* 9 | 10 | object TabularSchemaEncoder extends ZIOAppDefault: 11 | 12 | case class MyData(a: Int, b: String) 13 | 14 | object MyData: 15 | implicit val schema: Schema[MyData] = 16 | DeriveSchema.gen[MyData] 17 | implicit val schemaEncoder: SchemaEncoder[MyData] = 18 | SchemaEncoder.fromDefaultDeriver[MyData] 19 | 20 | override def run = 21 | for { 22 | schema <- ZIO.fromEither(MyData.schemaEncoder.encode(MyData.schema)) 23 | _ <- Console.printLine(schema) 24 | } yield () 25 | // Outputs: 26 | // Schema 27 | -------------------------------------------------------------------------------- /docs/scala-cli/TabularVectorSchemaRoot.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.4.3" 2 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.8 3 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.2.0 4 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 5 | 6 | import me.mnedokushev.zio.apache.arrow.core.Allocator 7 | import me.mnedokushev.zio.apache.arrow.core.codec.* 8 | import me.mnedokushev.zio.apache.arrow.core.Tabular.* 9 | import me.mnedokushev.zio.apache.arrow.core.Tabular 10 | import zio.* 11 | import zio.schema.* 12 | import zio.schema.Factory.* 13 | 14 | object TabularVectorSchemaRoot extends ZIOAppDefault: 15 | 16 | case class MyData(a: Int, b: String) 17 | 18 | object MyData: 19 | implicit val schema: Schema[MyData] = 20 | DeriveSchema.gen[MyData] 21 | implicit val schemaEncoder: SchemaEncoder[MyData] = 22 | SchemaEncoder.fromDefaultDeriver[MyData] 23 | 24 | val myDataCodec = VectorSchemaRootCodec.codec[MyData] 25 | 26 | override def run = 27 | ZIO 28 | .scoped( 29 | for { 30 | root <- Tabular.empty[MyData] 31 | myDataVec <- myDataCodec.encodeZIO(Chunk(MyData(1, "a"), MyData(2, "b")), root) 32 | myDataResult <- myDataCodec.decodeZIO(myDataVec) 33 | _ <- Console.printLine(myDataResult) 34 | } yield () 35 | ) 36 | .provide(Allocator.rootLayer()) 37 | // Outputs: 38 | // Chunk(MyData(1,a),MyData(2,b)) 39 | -------------------------------------------------------------------------------- /docs/scala-cli/ValueVectorCodecs.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.4.3" 2 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.8 3 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.2.0 4 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 5 | 6 | import me.mnedokushev.zio.apache.arrow.core.Allocator 7 | import me.mnedokushev.zio.apache.arrow.core.codec.* 8 | import zio.* 9 | import zio.schema.* 10 | import zio.schema.Factory.* 11 | 12 | object ValueVectorCodecs extends ZIOAppDefault: 13 | 14 | case class MyData(a: Int, b: String) 15 | 16 | object MyData: 17 | implicit val schema: Schema[MyData] = DeriveSchema.gen[MyData] 18 | 19 | val intCodec = ValueVectorCodec.intCodec 20 | val listStringCodec = ValueVectorCodec.listChunkCodec[String] 21 | val structMyDataCodec = ValueVectorCodec.structCodec[MyData] 22 | 23 | override def run = 24 | ZIO 25 | .scoped( 26 | for { 27 | intVec <- intCodec.encodeZIO(Chunk(1, 2, 3)) 28 | intResult <- intCodec.decodeZIO(intVec) 29 | listStringVec <- listStringCodec.encodeZIO(Chunk(Chunk("a", "b"), Chunk("c"))) 30 | listStringResult <- listStringCodec.decodeZIO(listStringVec) 31 | structMyDataVec <- structMyDataCodec.encodeZIO(Chunk(MyData(1, "a"), MyData(2, "b"))) 32 | structMyDataResult <- structMyDataCodec.decodeZIO(structMyDataVec) 33 | _ <- Console.printLine(intVec) 34 | _ <- Console.printLine(intResult) 35 | _ <- Console.printLine(listStringVec) 36 | _ <- Console.printLine(listStringResult) 37 | _ <- Console.printLine(structMyDataVec) 38 | _ <- Console.printLine(structMyDataResult) 39 | } yield () 40 | ) 41 | .provide(Allocator.rootLayer()) 42 | // Outputs: 43 | // [1, 2, 3] 44 | // Chunk(1,2,3) 45 | // [["a","b"], ["c"]] 46 | // Chunk(Chunk(a,b),Chunk(c)) 47 | // [{"a":1,"b":"a"}, {"a":2,"b":"b"}] 48 | // Chunk(MyData(1,a),MyData(2,b)) 49 | -------------------------------------------------------------------------------- /docs/scala-cli/ValueVectorCodecsNullable.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.4.3" 2 | //> using dep me.mnedokushev::zio-apache-arrow-core:0.1.8 3 | //> using dep org.apache.arrow:arrow-memory-unsafe:18.2.0 4 | //> using javaOpt --add-opens=java.base/java.nio=ALL-UNNAMED 5 | 6 | import me.mnedokushev.zio.apache.arrow.core.Allocator 7 | import me.mnedokushev.zio.apache.arrow.core.codec.* 8 | import org.apache.arrow.vector.IntVector 9 | import zio.* 10 | import zio.schema.* 11 | import zio.schema.Factory.* 12 | 13 | object ValueVectorCodecsNullable extends ZIOAppDefault: 14 | 15 | val intCodec = ValueVectorCodec.optionCodec[IntVector, Int] 16 | val listStringCodec = ValueVectorCodec.listChunkOptionCodec[String] 17 | val optionListStringCodec = ValueVectorCodec.optionListChunkCodec[String] 18 | 19 | override def run = 20 | ZIO 21 | .scoped( 22 | for { 23 | intVec <- intCodec.encodeZIO(Chunk(Some(1), None, Some(2))) 24 | intResult <- intCodec.decodeZIO(intVec) 25 | listStringVec <- listStringCodec.encodeZIO(Chunk(Chunk(Some("a"), None), Chunk(Some("b")))) 26 | listStringResult <- listStringCodec.decodeZIO(listStringVec) 27 | optionListStringVec <- optionListStringCodec.encodeZIO(Chunk(Some(Chunk("a", "b")), None, Some(Chunk("c")))) 28 | optionListStringResult <- optionListStringCodec.decodeZIO(optionListStringVec) 29 | _ <- Console.printLine(intVec) 30 | _ <- Console.printLine(intResult) 31 | _ <- Console.printLine(listStringVec) 32 | _ <- Console.printLine(listStringResult) 33 | _ <- Console.printLine(optionListStringVec) 34 | _ <- Console.printLine(optionListStringResult) 35 | } yield () 36 | ) 37 | .provide(Allocator.rootLayer()) 38 | // Outputs: 39 | // [1, null, 2] 40 | // Chunk(Some(1),None,Some(2)) 41 | // [["a",null], ["b"]] 42 | // Chunk(Chunk(Some(a),None),Chunk(None)) 43 | // [["a","b"], null, ["c"]] 44 | // Chunk(Some(Chunk(a,b)),None,Some(Chunk(c))) 45 | -------------------------------------------------------------------------------- /docs/scala-cli/test.csv: -------------------------------------------------------------------------------- 1 | fname,lname,address,age 2 | Bob,Dylan,Hollywood,80 3 | Dog,Cat,NY,3 4 | John,Doe,London,99 -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/Allocator.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import org.apache.arrow.memory.RootAllocator 4 | import zio._ 5 | 6 | object Allocator { 7 | 8 | def root(limit: Long = Long.MaxValue): ZIO[Scope, Throwable, RootAllocator] = 9 | ZIO.fromAutoCloseable(ZIO.attempt(new RootAllocator(limit))) 10 | 11 | def rootLayer(limit: Long = Long.MaxValue): TaskLayer[RootAllocator] = 12 | ZLayer.scoped(root(limit)) 13 | 14 | } 15 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/Tabular.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.codec._ 4 | import org.apache.arrow.memory.BufferAllocator 5 | import org.apache.arrow.vector.VectorSchemaRoot 6 | import zio._ 7 | import zio.schema.{ Schema => ZSchema } 8 | import zio.stream.ZStream 9 | 10 | import scala.jdk.CollectionConverters._ 11 | 12 | object Tabular { 13 | 14 | def empty[A: ZSchema](implicit 15 | schemaEncoder: SchemaEncoder[A] 16 | ): RIO[Scope & BufferAllocator, VectorSchemaRoot] = 17 | ZIO.fromAutoCloseable( 18 | ZIO.serviceWithZIO[BufferAllocator] { implicit alloc => 19 | for { 20 | schema0 <- ZIO.fromEither(schemaEncoder.encode) 21 | vectors <- ZIO.foreach(schema0.getFields.asScala.toList) { field => 22 | for { 23 | vec <- ZIO.attempt(field.createVector(alloc)) 24 | _ <- ZIO.attempt(vec.allocateNew()) 25 | } yield vec 26 | } 27 | root <- ZIO.attempt(new VectorSchemaRoot(schema0.getFields, vectors.asJava)) 28 | } yield root 29 | } 30 | ) 31 | 32 | def fromChunk[A: ZSchema: SchemaEncoder](chunk: Chunk[A])(implicit 33 | encoder: VectorSchemaRootEncoder[A] 34 | ): RIO[Scope & BufferAllocator, VectorSchemaRoot] = 35 | for { 36 | root <- empty 37 | _ <- encoder.encodeZIO(chunk, root) 38 | } yield root 39 | 40 | def fromStream[R, A: ZSchema: SchemaEncoder](stream: ZStream[R, Throwable, A])(implicit 41 | encoder: VectorSchemaRootEncoder[A] 42 | ): RIO[R & Scope & BufferAllocator, VectorSchemaRoot] = 43 | for { 44 | chunk <- stream.runCollect 45 | root <- fromChunk(chunk) 46 | } yield root 47 | 48 | def toChunk[A](root: VectorSchemaRoot)(implicit decoder: VectorSchemaRootDecoder[A]): Task[Chunk[A]] = 49 | decoder.decodeZIO(root) 50 | 51 | def toStream[A](root: VectorSchemaRoot)(implicit decoder: VectorSchemaRootDecoder[A]): ZStream[Any, Throwable, A] = 52 | ZStream.fromIterableZIO(decoder.decodeZIO(root)) 53 | 54 | } 55 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/ValidationError.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | final case class ValidationError( 4 | message: String, 5 | cause: Option[Throwable] = None 6 | ) extends IllegalArgumentException(message, cause.getOrElse(new Throwable())) 7 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/Vector.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.codec.{ ValueVectorDecoder, ValueVectorEncoder } 4 | import org.apache.arrow.memory.BufferAllocator 5 | import org.apache.arrow.vector.ValueVector 6 | import zio._ 7 | import zio.stream.ZStream 8 | 9 | object Vector { 10 | 11 | def fromChunk[V <: ValueVector]: FromChunkPartiallyApplied[V] = 12 | new FromChunkPartiallyApplied[V] 13 | 14 | final class FromChunkPartiallyApplied[V <: ValueVector](private val dummy: Boolean = true) extends AnyVal { 15 | def apply[A](chunk: Chunk[A])(implicit encoder: ValueVectorEncoder[V, A]): RIO[Scope & BufferAllocator, V] = 16 | encoder.encodeZIO(chunk) 17 | } 18 | 19 | def fromStream[V <: ValueVector]: FromStreamPartiallyApplied[V] = 20 | new FromStreamPartiallyApplied[V] 21 | 22 | final class FromStreamPartiallyApplied[V <: ValueVector](private val dummy: Boolean = true) extends AnyVal { 23 | def apply[R, A]( 24 | stream: ZStream[R, Throwable, A] 25 | )(implicit encoder: ValueVectorEncoder[V, A]): RIO[R & Scope & BufferAllocator, V] = 26 | for { 27 | chunk <- stream.runCollect 28 | vec <- fromChunk(chunk) 29 | } yield vec 30 | } 31 | 32 | def toChunk[A]: ToChunkPartiallyApplied[A] = 33 | new ToChunkPartiallyApplied[A] 34 | 35 | final class ToChunkPartiallyApplied[A](private val dummy: Boolean = true) extends AnyVal { 36 | def apply[V <: ValueVector](vec: V)(implicit decoder: ValueVectorDecoder[V, A]): Task[Chunk[A]] = 37 | decoder.decodeZIO(vec) 38 | } 39 | 40 | def toStream[A]: ToStreamPartiallyApplied[A] = 41 | new ToStreamPartiallyApplied[A] 42 | 43 | final class ToStreamPartiallyApplied[A](private val dummy: Boolean = true) extends AnyVal { 44 | def apply[V <: ValueVector](vec: V)(implicit decoder: ValueVectorDecoder[V, A]): ZStream[Any, Throwable, A] = 45 | ZStream.fromIterableZIO(decoder.decodeZIO(vec)) 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/DecoderError.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import java.io.IOException 4 | 5 | final case class DecoderError( 6 | message: String, 7 | cause: Option[Throwable] = None 8 | ) extends IOException(message, cause.getOrElse(new Throwable())) 9 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/EncoderError.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import java.io.IOException 4 | 5 | final case class EncoderError( 6 | message: String, 7 | cause: Option[Throwable] = None 8 | ) extends IOException(message, cause.getOrElse(new Throwable())) 9 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/SchemaEncoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector.types.pojo.{ ArrowType, Field, FieldType, Schema => JSchema } 4 | import zio.schema.{ Deriver, Factory, Schema, StandardType } 5 | 6 | import scala.annotation.nowarn 7 | import scala.jdk.CollectionConverters._ 8 | 9 | trait SchemaEncoder[A] { self => 10 | 11 | def encode(implicit schema: Schema[A]): Either[Throwable, JSchema] = 12 | Left(EncoderError(s"Given ZIO schema $schema mut be of type Schema.Record[A]")) 13 | 14 | def encodeField(name: String, nullable: Boolean): Field 15 | 16 | } 17 | 18 | object SchemaEncoder { 19 | 20 | def primitive[A](encode0: (String, Boolean) => Field)(implicit @nowarn ev: StandardType[A]): SchemaEncoder[A] = 21 | new SchemaEncoder[A] { 22 | 23 | override def encodeField(name: String, nullable: Boolean): Field = 24 | encode0(name, nullable) 25 | } 26 | 27 | implicit def encoder[A: Factory: Schema](deriver: Deriver[SchemaEncoder]): SchemaEncoder[A] = 28 | implicitly[Factory[A]].derive[SchemaEncoder](deriver) 29 | 30 | def fromDeriver[A: Factory: Schema](deriver: Deriver[SchemaEncoder]): SchemaEncoder[A] = 31 | implicitly[Factory[A]].derive[SchemaEncoder](deriver) 32 | 33 | def fromDefaultDeriver[A: Factory: Schema]: SchemaEncoder[A] = 34 | fromDeriver(SchemaEncoderDeriver.default) 35 | 36 | def fromSummonedDeriver[A: Factory: Schema]: SchemaEncoder[A] = 37 | fromDeriver(SchemaEncoderDeriver.summoned) 38 | 39 | def primitiveField(name: String, tpe: ArrowType.PrimitiveType, nullable: Boolean): Field = 40 | new Field(name, new FieldType(nullable, tpe, null), null) 41 | 42 | def listField(name: String, child: Field, nullable: Boolean): Field = 43 | new Field(name, new FieldType(nullable, new ArrowType.List, null), List(child).asJava) 44 | 45 | def structField(name: String, fields: List[Field], nullable: Boolean): Field = 46 | new Field(name, new FieldType(nullable, new ArrowType.Struct, null), fields.asJava) 47 | 48 | } 49 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/SchemaEncoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector.types.FloatingPointPrecision 4 | import org.apache.arrow.vector.types.pojo.{ ArrowType, Field, Schema => JSchema } 5 | import zio.Chunk 6 | import zio.schema.{ Deriver, Schema, StandardType } 7 | 8 | import scala.jdk.CollectionConverters._ 9 | import scala.util.control.NonFatal 10 | 11 | object SchemaEncoderDeriver { 12 | 13 | val default: Deriver[SchemaEncoder] = new Deriver[SchemaEncoder] { 14 | 15 | override def deriveRecord[A]( 16 | record: Schema.Record[A], 17 | fields: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]], 18 | summoned: => Option[SchemaEncoder[A]] 19 | ): SchemaEncoder[A] = new SchemaEncoder[A] { 20 | 21 | override def encode(implicit schema: Schema[A]): Either[Throwable, JSchema] = 22 | try { 23 | val fields0 = 24 | record.fields.zip(fields.map(_.unwrap)).map { case (field, encoder) => 25 | encoder.encodeField(field.name, nullable = false) 26 | } 27 | 28 | Right(new JSchema(fields0.toList.asJava)) 29 | } catch { 30 | case encodeError: EncoderError => Left(encodeError) 31 | case NonFatal(ex) => Left(EncoderError("Error encoding schema", Some(ex))) 32 | } 33 | 34 | override def encodeField(name: String, nullable: Boolean): Field = { 35 | val fields0 = record.fields 36 | .zip(fields.map(_.unwrap)) 37 | .map { case (field, encoder) => 38 | encoder.encodeField(field.name, nullable = false) 39 | } 40 | .toList 41 | 42 | SchemaEncoder.structField(name, fields0, nullable) 43 | } 44 | 45 | } 46 | 47 | override def deriveEnum[A]( 48 | `enum`: Schema.Enum[A], 49 | cases: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]], 50 | summoned: => Option[SchemaEncoder[A]] 51 | ): SchemaEncoder[A] = 52 | new SchemaEncoder[A] { 53 | 54 | override def encodeField(name: String, nullable: Boolean): Field = 55 | throw EncoderError(s"Unsupported ZIO Schema type ${`enum`}") 56 | 57 | } 58 | 59 | override def derivePrimitive[A]( 60 | st: StandardType[A], 61 | summoned: => Option[SchemaEncoder[A]] 62 | ): SchemaEncoder[A] = new SchemaEncoder[A] { 63 | 64 | override def encodeField(name: String, nullable: Boolean): Field = { 65 | def namedField(arrowType: ArrowType.PrimitiveType) = 66 | SchemaEncoder.primitiveField(name, arrowType, nullable) 67 | 68 | st match { 69 | case StandardType.StringType => 70 | namedField(new ArrowType.Utf8) 71 | case StandardType.BoolType => 72 | namedField(new ArrowType.Bool) 73 | case StandardType.ByteType => 74 | namedField(new ArrowType.Int(8, false)) 75 | case StandardType.ShortType => 76 | namedField(new ArrowType.Int(16, true)) 77 | case StandardType.IntType => 78 | namedField(new ArrowType.Int(32, true)) 79 | case StandardType.LongType => 80 | namedField(new ArrowType.Int(64, true)) 81 | case StandardType.FloatType => 82 | namedField(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)) 83 | case StandardType.DoubleType => 84 | namedField(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)) 85 | case StandardType.BinaryType => 86 | namedField(new ArrowType.Binary) 87 | case StandardType.CharType => 88 | namedField(new ArrowType.Int(16, false)) 89 | case StandardType.UUIDType => 90 | namedField(new ArrowType.FixedSizeBinary(8)) 91 | case StandardType.BigDecimalType => 92 | namedField(new ArrowType.Decimal(11, 2, 128)) 93 | case StandardType.BigIntegerType => 94 | namedField(new ArrowType.FixedSizeBinary(8)) 95 | case StandardType.DayOfWeekType => 96 | namedField(new ArrowType.Int(3, false)) 97 | case StandardType.MonthType => 98 | namedField(new ArrowType.Int(4, false)) 99 | case StandardType.MonthDayType => 100 | namedField(new ArrowType.Int(64, false)) 101 | case StandardType.PeriodType => 102 | namedField(new ArrowType.FixedSizeBinary(8)) 103 | case StandardType.YearType => 104 | namedField(new ArrowType.Int(16, false)) 105 | case StandardType.YearMonthType => 106 | namedField(new ArrowType.Int(64, false)) 107 | case StandardType.ZoneIdType => 108 | namedField(new ArrowType.Utf8) 109 | case StandardType.ZoneOffsetType => 110 | namedField(new ArrowType.Utf8) 111 | case StandardType.DurationType => 112 | namedField(new ArrowType.Int(64, false)) 113 | case StandardType.InstantType => 114 | namedField(new ArrowType.Int(64, false)) 115 | case StandardType.LocalDateType => 116 | namedField(new ArrowType.Utf8) 117 | case StandardType.LocalTimeType => 118 | namedField(new ArrowType.Utf8) 119 | case StandardType.LocalDateTimeType => 120 | namedField(new ArrowType.Utf8) 121 | case StandardType.OffsetTimeType => 122 | namedField(new ArrowType.Utf8) 123 | case StandardType.OffsetDateTimeType => 124 | namedField(new ArrowType.Utf8) 125 | case StandardType.ZonedDateTimeType => 126 | namedField(new ArrowType.Utf8) 127 | case other => 128 | throw EncoderError(s"Unsupported ZIO Schema StandardType $other") 129 | } 130 | } 131 | 132 | } 133 | 134 | override def deriveOption[A]( 135 | option: Schema.Optional[A], 136 | inner: => SchemaEncoder[A], 137 | summoned: => Option[SchemaEncoder[Option[A]]] 138 | ): SchemaEncoder[Option[A]] = new SchemaEncoder[Option[A]] { 139 | 140 | override def encodeField(name: String, nullable: Boolean): Field = 141 | inner.encodeField(name, nullable = true) 142 | 143 | } 144 | 145 | override def deriveSequence[C[_], A]( 146 | sequence: Schema.Sequence[C[A], A, ?], 147 | inner: => SchemaEncoder[A], 148 | summoned: => Option[SchemaEncoder[C[A]]] 149 | ): SchemaEncoder[C[A]] = new SchemaEncoder[C[A]] { 150 | 151 | override def encodeField(name: String, nullable: Boolean): Field = 152 | SchemaEncoder.listField(name, inner.encodeField("element", nullable = false), nullable) 153 | 154 | } 155 | 156 | override def deriveMap[K, V]( 157 | map: Schema.Map[K, V], 158 | key: => SchemaEncoder[K], 159 | value: => SchemaEncoder[V], 160 | summoned: => Option[SchemaEncoder[Map[K, V]]] 161 | ): SchemaEncoder[Map[K, V]] = new SchemaEncoder[Map[K, V]] { 162 | 163 | override def encodeField(name: String, nullable: Boolean): Field = 164 | throw EncoderError(s"Unsupported ZIO Schema type $map") 165 | 166 | } 167 | 168 | override def deriveTransformedRecord[A, B]( 169 | record: Schema.Record[A], 170 | transform: Schema.Transform[A, B, ?], 171 | fields: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]], 172 | summoned: => Option[SchemaEncoder[B]] 173 | ): SchemaEncoder[B] = new SchemaEncoder[B] { 174 | 175 | override def encodeField(name: String, nullable: Boolean): Field = 176 | throw EncoderError(s"Unsupported ZIO Schema type $record") 177 | 178 | } 179 | 180 | }.cached 181 | 182 | def summoned: Deriver[SchemaEncoder] = default.autoAcceptSummoned 183 | 184 | } 185 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueDecoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector.ValueVector 4 | import org.apache.arrow.vector.complex.reader.FieldReader 5 | import zio.schema.{ DynamicValue, _ } 6 | import zio.{ Duration, _ } 7 | 8 | import java.nio.ByteBuffer 9 | import java.time._ 10 | import java.util.UUID 11 | import scala.collection.immutable.ListMap 12 | 13 | trait ValueDecoder[+A] { 14 | 15 | def decodeValue[V0 <: ValueVector](name: Option[String], reader: FieldReader, vec: V0, idx: Int): DynamicValue 16 | 17 | } 18 | 19 | object ValueDecoder { 20 | 21 | private[codec] def decodeStruct[V0 <: ValueVector, A]( 22 | fields: Chunk[Schema.Field[A, ?]], 23 | decoders: Chunk[ValueDecoder[?]], 24 | reader: FieldReader, 25 | vec: V0, 26 | idx: Int 27 | ): DynamicValue = { 28 | val values = ListMap( 29 | fields 30 | .zip(decoders) 31 | .map { case (field, decoder) => 32 | val value: DynamicValue = decoder.decodeValue(Some(field.name), reader, vec, idx) 33 | 34 | field.name.toString -> value 35 | }* 36 | ) 37 | 38 | DynamicValue.Record(TypeId.Structural, values) 39 | } 40 | 41 | private[codec] def decodeList[V0 <: ValueVector, A]( 42 | decoder: ValueDecoder[A], 43 | reader: FieldReader, 44 | vec: V0, 45 | idx: Int 46 | ): DynamicValue = { 47 | val builder = ChunkBuilder.make[DynamicValue]() 48 | var idx0 = idx 49 | 50 | while (reader.next()) { 51 | builder.addOne(decoder.decodeValue(None, reader, vec, idx0)) 52 | idx0 += 1 53 | } 54 | 55 | DynamicValue.Sequence(builder.result()) 56 | } 57 | 58 | private[codec] def decodePrimitive[A](standardType: StandardType[A], reader: FieldReader): DynamicValue = 59 | standardType match { 60 | case t: StandardType.StringType.type => 61 | DynamicValue.Primitive[String](reader.readText().toString, t) 62 | case t: StandardType.BoolType.type => 63 | DynamicValue.Primitive[Boolean](reader.readBoolean(), t) 64 | case t: StandardType.ByteType.type => 65 | DynamicValue.Primitive[Byte](reader.readByte(), t) 66 | case t: StandardType.ShortType.type => 67 | DynamicValue.Primitive[Short](reader.readShort(), t) 68 | case t: StandardType.IntType.type => 69 | DynamicValue.Primitive[Int](reader.readInteger(), t) 70 | case t: StandardType.LongType.type => 71 | DynamicValue.Primitive[Long](reader.readLong(), t) 72 | case t: StandardType.FloatType.type => 73 | DynamicValue.Primitive[Float](reader.readFloat(), t) 74 | case t: StandardType.DoubleType.type => 75 | DynamicValue.Primitive[Double](reader.readDouble(), t) 76 | case t: StandardType.BinaryType.type => 77 | DynamicValue.Primitive[Chunk[Byte]](Chunk.fromArray(reader.readByteArray()), t) 78 | case t: StandardType.CharType.type => 79 | DynamicValue.Primitive[Char](reader.readCharacter(), t) 80 | case t: StandardType.UUIDType.type => 81 | val bb = ByteBuffer.wrap(reader.readByteArray()) 82 | DynamicValue.Primitive[UUID](new UUID(bb.getLong(0), bb.getLong(8)), t) 83 | case t: StandardType.BigDecimalType.type => 84 | DynamicValue.Primitive[java.math.BigDecimal](reader.readBigDecimal(), t) 85 | case t: StandardType.BigIntegerType.type => 86 | DynamicValue.Primitive[java.math.BigInteger](new java.math.BigInteger(reader.readByteArray()), t) 87 | case t: StandardType.DayOfWeekType.type => 88 | DynamicValue.Primitive[DayOfWeek](DayOfWeek.of(reader.readInteger()), t) 89 | case t: StandardType.MonthType.type => 90 | DynamicValue.Primitive[Month](Month.of(reader.readInteger()), t) 91 | case t: StandardType.MonthDayType.type => 92 | val bb = ByteBuffer.allocate(8).putLong(reader.readLong()) 93 | DynamicValue.Primitive[MonthDay](MonthDay.of(bb.getInt(0), bb.getInt(4)), t) 94 | case t: StandardType.PeriodType.type => 95 | val bb = ByteBuffer.wrap(reader.readByteArray()) 96 | DynamicValue.Primitive[Period](Period.of(bb.getInt(0), bb.getInt(4), bb.getInt(8)), t) 97 | case t: StandardType.YearType.type => 98 | DynamicValue.Primitive[Year](Year.of(reader.readInteger()), t) 99 | case t: StandardType.YearMonthType.type => 100 | val bb = ByteBuffer.allocate(8).putLong(reader.readLong()) 101 | DynamicValue.Primitive[YearMonth](YearMonth.of(bb.getInt(0), bb.getInt(4)), t) 102 | case t: StandardType.ZoneIdType.type => 103 | DynamicValue.Primitive[ZoneId](ZoneId.of(reader.readText().toString), t) 104 | case t: StandardType.ZoneOffsetType.type => 105 | DynamicValue.Primitive[ZoneOffset](ZoneOffset.of(reader.readText().toString), t) 106 | case t: StandardType.DurationType.type => 107 | DynamicValue.Primitive[Duration](Duration.fromMillis(reader.readLong()), t) 108 | case t: StandardType.InstantType.type => 109 | DynamicValue.Primitive[Instant](Instant.ofEpochMilli(reader.readLong()), t) 110 | case t: StandardType.LocalDateType.type => 111 | DynamicValue.Primitive[LocalDate](LocalDate.parse(reader.readText().toString), t) 112 | case t: StandardType.LocalTimeType.type => 113 | DynamicValue.Primitive[LocalTime](LocalTime.parse(reader.readText().toString), t) 114 | case t: StandardType.LocalDateTimeType.type => 115 | DynamicValue.Primitive[LocalDateTime](LocalDateTime.parse(reader.readText().toString), t) 116 | case t: StandardType.OffsetTimeType.type => 117 | DynamicValue.Primitive[OffsetTime](OffsetTime.parse(reader.readText().toString), t) 118 | case t: StandardType.OffsetDateTimeType.type => 119 | DynamicValue.Primitive[OffsetDateTime](OffsetDateTime.parse(reader.readText().toString), t) 120 | case t: StandardType.ZonedDateTimeType.type => 121 | DynamicValue.Primitive[ZonedDateTime](ZonedDateTime.parse(reader.readText().toString), t) 122 | case other => 123 | throw DecoderError(s"Unsupported ZIO Schema type $other") 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueEncoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.{ ArrowBuf, BufferAllocator } 4 | import org.apache.arrow.vector.complex.writer.{ FieldWriter, _ } 5 | import zio.Chunk 6 | import zio.schema.{ Schema, StandardType } 7 | 8 | import java.nio.charset.StandardCharsets 9 | import java.time._ 10 | import java.util.UUID 11 | 12 | trait ValueEncoder[-A] { 13 | 14 | def encodeValue( 15 | value: A, 16 | name: Option[String], 17 | writer: FieldWriter 18 | )(implicit alloc: BufferAllocator): Unit 19 | 20 | } 21 | 22 | object ValueEncoder { 23 | 24 | private[codec] def encodeStruct[A]( 25 | value: A, 26 | fields: Chunk[Schema.Field[A, ?]], 27 | encoders: Chunk[ValueEncoder[?]], 28 | writer: FieldWriter 29 | )(implicit alloc: BufferAllocator): Unit = { 30 | 31 | def encodeValue0[A1]( 32 | encoder: ValueEncoder[A1], 33 | value: A, 34 | get: A => Any, 35 | name: Option[String], 36 | writer: FieldWriter 37 | )(implicit alloc: BufferAllocator) = 38 | encoder.encodeValue(get(value).asInstanceOf[A1], name, writer) 39 | 40 | writer.start() 41 | fields.zip(encoders).foreach { case (Schema.Field(name, _, _, _, get, _), encoder) => 42 | encodeValue0(encoder, value, get, Some(name), writer) 43 | } 44 | writer.end() 45 | } 46 | 47 | private[codec] def encodeList[A]( 48 | chunk: Chunk[A], 49 | encoder: ValueEncoder[A], 50 | writer: FieldWriter 51 | )(implicit alloc: BufferAllocator): Unit = { 52 | val it = chunk.iterator 53 | 54 | writer.startList() 55 | it.foreach(encoder.encodeValue(_, None, writer)) 56 | writer.endList() 57 | } 58 | 59 | private[codec] def encodePrimitive0[A]( 60 | nested: Boolean, 61 | standardType: StandardType[A], 62 | value: A, 63 | writer: FieldWriter, 64 | name: Option[String] 65 | )(implicit alloc: BufferAllocator): Unit = { 66 | 67 | def resolveWriter[W <: BaseWriter](f1: => W)(f2: String => W): W = 68 | if (nested) 69 | name.fold(f1)(f2) 70 | else 71 | writer.asInstanceOf[W] 72 | 73 | def withBuffer(size: Long)(fn: ArrowBuf => Unit) = { 74 | val buffer = alloc.buffer(size) 75 | fn(buffer) 76 | buffer.close() 77 | } 78 | 79 | def writeString(s: String) = 80 | withBuffer(s.length.toLong) { buffer => 81 | buffer.writeBytes(s.getBytes(StandardCharsets.UTF_8)) 82 | resolveWriter(writer.varChar)(writer.varChar).writeVarChar(0, s.length, buffer) 83 | } 84 | 85 | def writeLong[A1](v: A1)(fst: A1 => Int)(snd: A1 => Int) = 86 | withBuffer(8) { buffer => 87 | buffer.writeInt(fst(v)) 88 | buffer.writeInt(snd(v)) 89 | resolveWriter(writer.bigInt)(writer.bigInt).writeBigInt(buffer.getLong(0)) 90 | } 91 | 92 | (standardType, value) match { 93 | case (StandardType.StringType, v: String) => 94 | writeString(v) 95 | case (StandardType.BoolType, v: Boolean) => 96 | resolveWriter(writer.bit)(writer.bit).writeBit(if (v) 1 else 0) 97 | case (StandardType.ByteType, v: Byte) => 98 | resolveWriter(writer.uInt1)(writer.uInt1).writeUInt1(v) 99 | case (StandardType.ShortType, v: Short) => 100 | resolveWriter(writer.smallInt)(writer.smallInt).writeSmallInt(v) 101 | case (StandardType.IntType, v: Int) => 102 | resolveWriter(writer.integer)(writer.integer).writeInt(v) 103 | case (StandardType.LongType, v: Long) => 104 | resolveWriter(writer.bigInt)(writer.bigInt).writeBigInt(v) 105 | case (StandardType.FloatType, v: Float) => 106 | resolveWriter(writer.float4)(writer.float4).writeFloat4(v) 107 | case (StandardType.DoubleType, v: Double) => 108 | resolveWriter(writer.float8)(writer.float8).writeFloat8(v) 109 | case (StandardType.BinaryType, v: Chunk[_]) => 110 | withBuffer(v.length.toLong) { buffer => 111 | buffer.writeBytes(v.asInstanceOf[Chunk[Byte]].toArray) 112 | resolveWriter(writer.largeVarBinary)(writer.largeVarBinary).writeLargeVarBinary(0L, v.length.toLong, buffer) 113 | } 114 | case (StandardType.CharType, v: Char) => 115 | resolveWriter(writer.uInt2)(writer.uInt2).writeUInt2(v) 116 | case (StandardType.UUIDType, v: UUID) => 117 | withBuffer(16) { buffer => 118 | buffer.writeLong(v.getMostSignificantBits) 119 | buffer.writeLong(v.getLeastSignificantBits) 120 | resolveWriter(writer.varBinary)(writer.varBinary).writeVarBinary(0, 16, buffer) 121 | } 122 | case (StandardType.BigDecimalType, v: java.math.BigDecimal) => 123 | resolveWriter(writer.decimal)(writer.decimal).writeDecimal(v) 124 | case (StandardType.BigIntegerType, v: java.math.BigInteger) => 125 | val bb = v.toByteArray 126 | withBuffer(bb.length.toLong) { buffer => 127 | buffer.writeBytes(bb) 128 | resolveWriter(writer.varBinary)(writer.varBinary).writeVarBinary(0, bb.length, buffer) 129 | } 130 | case (StandardType.DayOfWeekType, v: DayOfWeek) => 131 | resolveWriter(writer.integer)(writer.integer).writeInt(v.getValue) 132 | case (StandardType.MonthType, v: Month) => 133 | resolveWriter(writer.integer)(writer.integer).writeInt(v.getValue) 134 | case (StandardType.MonthDayType, v: MonthDay) => 135 | writeLong(v)(_.getDayOfMonth)(_.getMonthValue) 136 | case (StandardType.PeriodType, v: Period) => 137 | withBuffer(12) { buffer => 138 | buffer.writeInt(v.getDays) 139 | buffer.writeInt(v.getMonths) 140 | buffer.writeInt(v.getYears) 141 | resolveWriter(writer.varBinary)(writer.varBinary).writeVarBinary(0, 12, buffer) 142 | } 143 | case (StandardType.YearType, v: Year) => 144 | resolveWriter(writer.integer)(writer.integer).writeInt(v.getValue) 145 | case (StandardType.YearMonthType, v: YearMonth) => 146 | writeLong(v)(_.getMonthValue)(_.getYear) 147 | case (StandardType.ZoneIdType, v: ZoneId) => 148 | writeString(v.toString) 149 | case (StandardType.ZoneOffsetType, v: ZoneOffset) => 150 | writeString(v.toString) 151 | case (StandardType.DurationType, v: Duration) => 152 | resolveWriter(writer.bigInt)(writer.bigInt).writeBigInt(v.toMillis) 153 | case (StandardType.InstantType, v: Instant) => 154 | resolveWriter(writer.bigInt)(writer.bigInt).writeBigInt(v.toEpochMilli) 155 | case (StandardType.LocalDateType, v: LocalDate) => 156 | writeString(v.toString) 157 | case (StandardType.LocalTimeType, v: LocalTime) => 158 | writeString(v.toString) 159 | case (StandardType.LocalDateTimeType, v: LocalDateTime) => 160 | writeString(v.toString) 161 | case (StandardType.OffsetTimeType, v: OffsetTime) => 162 | writeString(v.toString) 163 | case (StandardType.OffsetDateTimeType, v: OffsetDateTime) => 164 | writeString(v.toString) 165 | case (StandardType.ZonedDateTimeType, v: ZonedDateTime) => 166 | writeString(v.toString) 167 | case (other, _) => 168 | throw EncoderError(s"Unsupported ZIO Schema StandardType $other") 169 | } 170 | 171 | } 172 | 173 | private[codec] def encodePrimitive[A]( 174 | standardType: StandardType[A], 175 | value: A, 176 | name: Option[String], 177 | writer: FieldWriter 178 | )(implicit alloc: BufferAllocator): Unit = 179 | encodePrimitive0(nested = true, standardType, value, writer, name) 180 | 181 | private[codec] def encodePrimitive[A]( 182 | standardType: StandardType[A], 183 | value: A, 184 | writer: FieldWriter 185 | )(implicit alloc: BufferAllocator): Unit = 186 | encodePrimitive0(nested = false, standardType, value, writer, name = None) 187 | 188 | } 189 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueVectorCodec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.BufferAllocator 4 | import org.apache.arrow.vector.complex.{ ListVector, StructVector } 5 | import org.apache.arrow.vector.{ ValueVector, _ } 6 | import zio._ 7 | import zio.schema.Schema 8 | 9 | final case class ValueVectorCodec[V <: ValueVector, A]( 10 | encoder: ValueVectorEncoder[V, A], 11 | decoder: ValueVectorDecoder[V, A] 12 | ) { self => 13 | 14 | def decodeZIO(vec: V): Task[Chunk[A]] = 15 | decoder.decodeZIO(vec) 16 | 17 | def decode(vec: V): Either[Throwable, Chunk[A]] = 18 | decoder.decode(vec) 19 | 20 | def encodeZIO(chunk: Chunk[A]): RIO[Scope & BufferAllocator, V] = 21 | encoder.encodeZIO(chunk) 22 | 23 | def encode(chunk: Chunk[A])(implicit alloc: BufferAllocator): Either[Throwable, V] = 24 | encoder.encode(chunk) 25 | 26 | def transform[B](f: A => B, g: B => A)(implicit schemaSrc: Schema[A], schemaDst: Schema[B]): ValueVectorCodec[V, B] = 27 | ValueVectorCodec(encoder.contramap(g), decoder.map(f)) 28 | 29 | } 30 | 31 | object ValueVectorCodec { 32 | 33 | implicit def codec[V <: ValueVector, A](implicit 34 | encoder: ValueVectorEncoder[V, A], 35 | decoder: ValueVectorDecoder[V, A] 36 | ): ValueVectorCodec[V, A] = 37 | ValueVectorCodec[V, A](encoder, decoder) 38 | 39 | implicit val stringCodec: ValueVectorCodec[VarCharVector, String] = 40 | codec[VarCharVector, String] 41 | implicit val boolCodec: ValueVectorCodec[BitVector, Boolean] = 42 | codec[BitVector, Boolean] 43 | implicit val byteCodec: ValueVectorCodec[UInt1Vector, Byte] = 44 | codec[UInt1Vector, Byte] 45 | implicit val shortCodec: ValueVectorCodec[SmallIntVector, Short] = 46 | codec[SmallIntVector, Short] 47 | implicit val intCodec: ValueVectorCodec[IntVector, Int] = 48 | codec[IntVector, Int] 49 | implicit val longCodec: ValueVectorCodec[BigIntVector, Long] = 50 | codec[BigIntVector, Long] 51 | implicit val floatCodec: ValueVectorCodec[Float4Vector, Float] = 52 | codec[Float4Vector, Float] 53 | implicit val doubleCodec: ValueVectorCodec[Float8Vector, Double] = 54 | codec[Float8Vector, Double] 55 | implicit val binaryCodec: ValueVectorCodec[LargeVarBinaryVector, Chunk[Byte]] = 56 | codec[LargeVarBinaryVector, Chunk[Byte]] 57 | implicit val charCodec: ValueVectorCodec[UInt2Vector, Char] = 58 | codec[UInt2Vector, Char] 59 | implicit val uuidCodec: ValueVectorCodec[VarBinaryVector, java.util.UUID] = 60 | codec[VarBinaryVector, java.util.UUID] 61 | implicit val bigDecimalCodec: ValueVectorCodec[DecimalVector, java.math.BigDecimal] = 62 | codec[DecimalVector, java.math.BigDecimal] 63 | implicit val bigIntegerCodec: ValueVectorCodec[VarBinaryVector, java.math.BigInteger] = 64 | codec[VarBinaryVector, java.math.BigInteger] 65 | implicit val dayOfWeekCodec: ValueVectorCodec[IntVector, java.time.DayOfWeek] = 66 | codec[IntVector, java.time.DayOfWeek] 67 | implicit val monthCodec: ValueVectorCodec[IntVector, java.time.Month] = 68 | codec[IntVector, java.time.Month] 69 | implicit val monthDayCodec: ValueVectorCodec[BigIntVector, java.time.MonthDay] = 70 | codec[BigIntVector, java.time.MonthDay] 71 | implicit val periodCodec: ValueVectorCodec[VarBinaryVector, java.time.Period] = 72 | codec[VarBinaryVector, java.time.Period] 73 | implicit val yearCodec: ValueVectorCodec[IntVector, java.time.Year] = 74 | codec[IntVector, java.time.Year] 75 | implicit val yearMonthCodec: ValueVectorCodec[BigIntVector, java.time.YearMonth] = 76 | codec[BigIntVector, java.time.YearMonth] 77 | implicit val zoneIdCodec: ValueVectorCodec[VarCharVector, java.time.ZoneId] = 78 | codec[VarCharVector, java.time.ZoneId] 79 | implicit val zoneOffsetCodec: ValueVectorCodec[VarCharVector, java.time.ZoneOffset] = 80 | codec[VarCharVector, java.time.ZoneOffset] 81 | implicit val durationCodec: ValueVectorCodec[BigIntVector, Duration] = 82 | codec[BigIntVector, Duration] 83 | implicit val instantCodec: ValueVectorCodec[BigIntVector, java.time.Instant] = 84 | codec[BigIntVector, java.time.Instant] 85 | implicit val localDateCodec: ValueVectorCodec[VarCharVector, java.time.LocalDate] = 86 | codec[VarCharVector, java.time.LocalDate] 87 | implicit val localTimeCodec: ValueVectorCodec[VarCharVector, java.time.LocalTime] = 88 | codec[VarCharVector, java.time.LocalTime] 89 | implicit val localDateTimeCodec: ValueVectorCodec[VarCharVector, java.time.LocalDateTime] = 90 | codec[VarCharVector, java.time.LocalDateTime] 91 | implicit val offsetTimeCodec: ValueVectorCodec[VarCharVector, java.time.OffsetTime] = 92 | codec[VarCharVector, java.time.OffsetTime] 93 | implicit val offsetDateTimeCodec: ValueVectorCodec[VarCharVector, java.time.OffsetDateTime] = 94 | codec[VarCharVector, java.time.OffsetDateTime] 95 | implicit val zonedDateTimeCodec: ValueVectorCodec[VarCharVector, java.time.ZonedDateTime] = 96 | codec[VarCharVector, java.time.ZonedDateTime] 97 | 98 | implicit def listCodec[A, C[_]](implicit 99 | encoder: ValueVectorEncoder[ListVector, C[A]], 100 | decoder: ValueVectorDecoder[ListVector, C[A]] 101 | ): ValueVectorCodec[ListVector, C[A]] = 102 | codec[ListVector, C[A]] 103 | 104 | implicit def listChunkCodec[A](implicit 105 | encoder: ValueVectorEncoder[ListVector, Chunk[A]], 106 | decoder: ValueVectorDecoder[ListVector, Chunk[A]] 107 | ): ValueVectorCodec[ListVector, Chunk[A]] = 108 | listCodec[A, Chunk] 109 | 110 | implicit def listOptionCodec[A, C[_]](implicit 111 | encoder: ValueVectorEncoder[ListVector, C[Option[A]]], 112 | decoder: ValueVectorDecoder[ListVector, C[Option[A]]] 113 | ): ValueVectorCodec[ListVector, C[Option[A]]] = 114 | listCodec[Option[A], C] 115 | 116 | implicit def listChunkOptionCodec[A](implicit 117 | encoder: ValueVectorEncoder[ListVector, Chunk[Option[A]]], 118 | decoder: ValueVectorDecoder[ListVector, Chunk[Option[A]]] 119 | ): ValueVectorCodec[ListVector, Chunk[Option[A]]] = 120 | listChunkCodec[Option[A]] 121 | 122 | implicit def structCodec[A](implicit 123 | encoder: ValueVectorEncoder[StructVector, A], 124 | decoder: ValueVectorDecoder[StructVector, A] 125 | ): ValueVectorCodec[StructVector, A] = 126 | codec[StructVector, A] 127 | 128 | implicit def optionCodec[V <: ValueVector, A](implicit 129 | encoder: ValueVectorEncoder[V, Option[A]], 130 | decoder: ValueVectorDecoder[V, Option[A]] 131 | ): ValueVectorCodec[V, Option[A]] = 132 | codec[V, Option[A]] 133 | 134 | implicit def optionListCodec[A, C[_]](implicit 135 | encoder: ValueVectorEncoder[ListVector, Option[C[A]]], 136 | decoder: ValueVectorDecoder[ListVector, Option[C[A]]] 137 | ): ValueVectorCodec[ListVector, Option[C[A]]] = 138 | optionCodec[ListVector, C[A]] 139 | 140 | implicit def optionListChunkCodec[A](implicit 141 | encoder: ValueVectorEncoder[ListVector, Option[Chunk[A]]], 142 | decoder: ValueVectorDecoder[ListVector, Option[Chunk[A]]] 143 | ): ValueVectorCodec[ListVector, Option[Chunk[A]]] = 144 | optionListCodec[A, Chunk] 145 | 146 | } 147 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueVectorDecoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector._ 4 | import org.apache.arrow.vector.complex.reader.FieldReader 5 | import org.apache.arrow.vector.complex.{ ListVector, StructVector } 6 | import zio._ 7 | import zio.schema.{ Derive, Deriver, DynamicValue, Factory, Schema, StandardType } 8 | 9 | import scala.util.control.NonFatal 10 | 11 | trait ValueVectorDecoder[V <: ValueVector, A] extends ValueDecoder[A] { self => 12 | 13 | final def decodeZIO(vec: V): Task[Chunk[A]] = 14 | ZIO.fromEither(decode(vec)) 15 | 16 | final def decode(vec: V): Either[Throwable, Chunk[A]] = 17 | try 18 | Right(decodeUnsafe(vec)) 19 | catch { 20 | case decoderError: DecoderError => Left(decoderError) 21 | case NonFatal(ex) => Left(DecoderError("Error decoding vector", Some(ex))) 22 | } 23 | 24 | def decodeUnsafe(vec: V): Chunk[A] 25 | 26 | def decodeNullableUnsafe(vec: V): Chunk[Option[A]] 27 | 28 | final def map[B](f: A => B)(implicit schemaSrc: Schema[A], schemaDst: Schema[B]): ValueVectorDecoder[V, B] = 29 | new ValueVectorDecoder[V, B] { 30 | 31 | override def decodeUnsafe(vec: V): Chunk[B] = 32 | self.decodeUnsafe(vec).map(f) 33 | 34 | override def decodeNullableUnsafe(vec: V): Chunk[Option[B]] = 35 | self.decodeNullableUnsafe(vec).map(_.map(f)) 36 | 37 | override def decodeValue[V0 <: ValueVector]( 38 | name: Option[String], 39 | reader: FieldReader, 40 | vec: V0, 41 | idx: Int 42 | ): DynamicValue = 43 | self 44 | .decodeValue(name, reader, vec, idx) 45 | .toValue(schemaSrc) 46 | .map(a => schemaDst.toDynamic(f(a))) 47 | .toTry 48 | .get 49 | 50 | } 51 | 52 | } 53 | 54 | object ValueVectorDecoder { 55 | 56 | def primitive[V <: ValueVector, A]( 57 | decode0: (StandardType[A], FieldReader) => DynamicValue 58 | )(implicit st: StandardType[A]): ValueVectorDecoder[V, A] = 59 | new ValueVectorDecoder[V, A] { 60 | 61 | override def decodeUnsafe(vec: V): Chunk[A] = { 62 | var idx = 0 63 | val len = vec.getValueCount 64 | val builder = ChunkBuilder.make[A](len) 65 | val reader = vec.getReader 66 | 67 | while (idx < len) { 68 | reader.setPosition(idx) 69 | val dynamicValue = decode0(st, reader) 70 | 71 | dynamicValue.toTypedValue(Schema.primitive(st)) match { 72 | case Right(v) => 73 | builder.addOne(v) 74 | idx += 1 75 | case Left(message) => 76 | throw DecoderError(message) 77 | } 78 | } 79 | 80 | builder.result() 81 | } 82 | 83 | override def decodeNullableUnsafe(vec: V): Chunk[Option[A]] = { 84 | var idx = 0 85 | val len = vec.getValueCount 86 | val builder = ChunkBuilder.make[Option[A]](len) 87 | val reader = vec.getReader 88 | 89 | while (idx < len) { 90 | if (!vec.isNull(idx)) { 91 | reader.setPosition(idx) 92 | val dynamicValue = decode0(st, reader) 93 | 94 | dynamicValue.toTypedValue(Schema.primitive(st)) match { 95 | case Right(v) => 96 | builder.addOne(Some(v)) 97 | case Left(message) => 98 | throw DecoderError(message) 99 | } 100 | } else { 101 | builder.addOne(None) 102 | } 103 | 104 | idx += 1 105 | } 106 | 107 | builder.result() 108 | } 109 | 110 | override def decodeValue[V0 <: ValueVector]( 111 | name: Option[String], 112 | reader: FieldReader, 113 | vec: V0, 114 | idx: Int 115 | ): DynamicValue = 116 | decode0(st, resolveReaderByName(name, reader)) 117 | 118 | } 119 | 120 | implicit def decoder[V <: ValueVector, A: Schema](deriver: Deriver[ValueVectorDecoder[V, *]])(implicit 121 | factory: Factory[A] 122 | ): ValueVectorDecoder[V, A] = 123 | factory.derive(deriver) 124 | 125 | implicit val stringDecoder: ValueVectorDecoder[VarCharVector, String] = 126 | decoder[VarCharVector, String](ValueVectorDecoderDeriver.default) 127 | implicit val boolDecoder: ValueVectorDecoder[BitVector, Boolean] = 128 | decoder[BitVector, Boolean](ValueVectorDecoderDeriver.default) 129 | implicit val byteDecoder: ValueVectorDecoder[UInt1Vector, Byte] = 130 | decoder[UInt1Vector, Byte](ValueVectorDecoderDeriver.default) 131 | implicit val shortDecoder: ValueVectorDecoder[SmallIntVector, Short] = 132 | decoder[SmallIntVector, Short](ValueVectorDecoderDeriver.default) 133 | implicit val intDecoder: ValueVectorDecoder[IntVector, Int] = 134 | decoder[IntVector, Int](ValueVectorDecoderDeriver.default) 135 | implicit val longDecoder: ValueVectorDecoder[BigIntVector, Long] = 136 | decoder[BigIntVector, Long](ValueVectorDecoderDeriver.default) 137 | implicit val floatDecoder: ValueVectorDecoder[Float4Vector, Float] = 138 | decoder[Float4Vector, Float](ValueVectorDecoderDeriver.default) 139 | implicit val doubleDecoder: ValueVectorDecoder[Float8Vector, Double] = 140 | decoder[Float8Vector, Double](ValueVectorDecoderDeriver.default) 141 | implicit val binaryDecoder: ValueVectorDecoder[LargeVarBinaryVector, Chunk[Byte]] = 142 | decoder[LargeVarBinaryVector, Chunk[Byte]](ValueVectorDecoderDeriver.default) 143 | implicit val charDecoder: ValueVectorDecoder[UInt2Vector, Char] = 144 | decoder[UInt2Vector, Char](ValueVectorDecoderDeriver.default) 145 | implicit val uuidDecoder: ValueVectorDecoder[VarBinaryVector, java.util.UUID] = 146 | decoder[VarBinaryVector, java.util.UUID](ValueVectorDecoderDeriver.default) 147 | implicit val bigDecimalDecoder: ValueVectorDecoder[DecimalVector, java.math.BigDecimal] = 148 | decoder[DecimalVector, java.math.BigDecimal](ValueVectorDecoderDeriver.default) 149 | implicit val bigIntegerDecoder: ValueVectorDecoder[VarBinaryVector, java.math.BigInteger] = 150 | decoder[VarBinaryVector, java.math.BigInteger](ValueVectorDecoderDeriver.default) 151 | implicit val dayOfWeekDecoder: ValueVectorDecoder[IntVector, java.time.DayOfWeek] = 152 | decoder[IntVector, java.time.DayOfWeek](ValueVectorDecoderDeriver.default) 153 | implicit val monthDecoder: ValueVectorDecoder[IntVector, java.time.Month] = 154 | decoder[IntVector, java.time.Month](ValueVectorDecoderDeriver.default) 155 | implicit val monthDayDecoder: ValueVectorDecoder[BigIntVector, java.time.MonthDay] = 156 | decoder[BigIntVector, java.time.MonthDay](ValueVectorDecoderDeriver.default) 157 | implicit val periodDecoder: ValueVectorDecoder[VarBinaryVector, java.time.Period] = 158 | decoder[VarBinaryVector, java.time.Period](ValueVectorDecoderDeriver.default) 159 | implicit val yearDecoder: ValueVectorDecoder[IntVector, java.time.Year] = 160 | decoder[IntVector, java.time.Year](ValueVectorDecoderDeriver.default) 161 | implicit val yearMonthDecoder: ValueVectorDecoder[BigIntVector, java.time.YearMonth] = 162 | decoder[BigIntVector, java.time.YearMonth](ValueVectorDecoderDeriver.default) 163 | implicit val zoneIdDecoder: ValueVectorDecoder[VarCharVector, java.time.ZoneId] = 164 | decoder[VarCharVector, java.time.ZoneId](ValueVectorDecoderDeriver.default) 165 | implicit val zoneOffsetDecoder: ValueVectorDecoder[VarCharVector, java.time.ZoneOffset] = 166 | decoder[VarCharVector, java.time.ZoneOffset](ValueVectorDecoderDeriver.default) 167 | implicit val durationDecoder: ValueVectorDecoder[BigIntVector, java.time.Duration] = 168 | decoder[BigIntVector, java.time.Duration](ValueVectorDecoderDeriver.default) 169 | implicit val instantDecoder: ValueVectorDecoder[BigIntVector, java.time.Instant] = 170 | decoder[BigIntVector, java.time.Instant](ValueVectorDecoderDeriver.default) 171 | implicit val localDateDecoder: ValueVectorDecoder[VarCharVector, java.time.LocalDate] = 172 | decoder[VarCharVector, java.time.LocalDate](ValueVectorDecoderDeriver.default) 173 | implicit val localTimeDecoder: ValueVectorDecoder[VarCharVector, java.time.LocalTime] = 174 | decoder[VarCharVector, java.time.LocalTime](ValueVectorDecoderDeriver.default) 175 | implicit val localDateTimeDecoder: ValueVectorDecoder[VarCharVector, java.time.LocalDateTime] = 176 | decoder[VarCharVector, java.time.LocalDateTime](ValueVectorDecoderDeriver.default) 177 | implicit val offsetTimeDecoder: ValueVectorDecoder[VarCharVector, java.time.OffsetTime] = 178 | decoder[VarCharVector, java.time.OffsetTime](ValueVectorDecoderDeriver.default) 179 | implicit val offsetDateTimeDecoder: ValueVectorDecoder[VarCharVector, java.time.OffsetDateTime] = 180 | decoder[VarCharVector, java.time.OffsetDateTime](ValueVectorDecoderDeriver.default) 181 | implicit val zonedDateTimeDecoder: ValueVectorDecoder[VarCharVector, java.time.ZonedDateTime] = 182 | decoder[VarCharVector, java.time.ZonedDateTime](ValueVectorDecoderDeriver.default) 183 | 184 | implicit def listDecoder[A, C[_]](implicit 185 | factory: Factory[C[A]], 186 | schema: Schema[C[A]] 187 | ): ValueVectorDecoder[ListVector, C[A]] = 188 | listDecoderFromDefaultDeriver[A, C] 189 | 190 | implicit def listChunkDecoder[A](implicit 191 | factory: Factory[Chunk[A]], 192 | schema: Schema[Chunk[A]] 193 | ): ValueVectorDecoder[ListVector, Chunk[A]] = 194 | listDecoder[A, Chunk] 195 | 196 | implicit def listOptionDecoder[A, C[_]](implicit 197 | factory: Factory[C[Option[A]]], 198 | schema: Schema[C[Option[A]]] 199 | ): ValueVectorDecoder[ListVector, C[Option[A]]] = 200 | listDecoder[Option[A], C] 201 | 202 | implicit def listChunkOptionDecoder[A](implicit 203 | factory: Factory[Chunk[Option[A]]], 204 | schema: Schema[Chunk[Option[A]]] 205 | ): ValueVectorDecoder[ListVector, Chunk[Option[A]]] = 206 | listChunkDecoder[Option[A]] 207 | 208 | def listDecoderFromDeriver[A, C[_]]( 209 | deriver: Deriver[ValueVectorDecoder[ListVector, *]] 210 | )(implicit factory: Factory[C[A]], schema: Schema[C[A]]): ValueVectorDecoder[ListVector, C[A]] = 211 | factory.derive[ValueVectorDecoder[ListVector, *]](deriver) 212 | 213 | def listDecoderFromDefaultDeriver[A, C[_]](implicit 214 | factory: Factory[C[A]], 215 | schema: Schema[C[A]] 216 | ): ValueVectorDecoder[ListVector, C[A]] = 217 | listDecoderFromDeriver[A, C](ValueVectorDecoderDeriver.default[ListVector]) 218 | 219 | def listDecoderFromSummonedDeriver[A, C[_]](implicit 220 | factory: Factory[C[A]], 221 | schema: Schema[C[A]] 222 | ): ValueVectorDecoder[ListVector, C[A]] = 223 | listDecoderFromDeriver(ValueVectorDecoderDeriver.summoned[ListVector]) 224 | 225 | implicit def structDecoder[A](implicit 226 | factory: Factory[A], 227 | schema: Schema[A] 228 | ): ValueVectorDecoder[StructVector, A] = 229 | structDecoderFromDefaultDeriver[A] 230 | 231 | def structDecoderFromDeriver[A]( 232 | deriver: Deriver[ValueVectorDecoder[StructVector, *]] 233 | )(implicit factory: Factory[A], schema: Schema[A]): ValueVectorDecoder[StructVector, A] = 234 | factory.derive[ValueVectorDecoder[StructVector, *]](deriver) 235 | 236 | def structDecoderFromDefaultDeriver[A](implicit 237 | factory: Factory[A], 238 | schema: Schema[A] 239 | ): ValueVectorDecoder[StructVector, A] = 240 | structDecoderFromDeriver(ValueVectorDecoderDeriver.default[StructVector]) 241 | 242 | implicit def optionDecoder[V <: ValueVector, A](implicit 243 | factory: Factory[Option[A]], 244 | schema: Schema[Option[A]] 245 | ): ValueVectorDecoder[V, Option[A]] = 246 | optionDecoderFromDefaultDeriver[V, A] 247 | 248 | implicit def optionListDecoder[A, C[_]](implicit 249 | factory: Factory[Option[C[A]]], 250 | schema: Schema[Option[C[A]]] 251 | ): ValueVectorDecoder[ListVector, Option[C[A]]] = 252 | optionDecoder[ListVector, C[A]] 253 | 254 | implicit def optionListChunkDecoder[A](implicit 255 | factory: Factory[Option[Chunk[A]]], 256 | schema: Schema[Option[Chunk[A]]] 257 | ): ValueVectorDecoder[ListVector, Option[Chunk[A]]] = 258 | optionDecoder[ListVector, Chunk[A]] 259 | 260 | def optionDecoderFromDeriver[V <: ValueVector, A]( 261 | deriver: Deriver[ValueVectorDecoder[V, *]] 262 | )(implicit factory: Factory[Option[A]], schema: Schema[Option[A]]): ValueVectorDecoder[V, Option[A]] = 263 | factory.derive[ValueVectorDecoder[V, *]](deriver) 264 | 265 | def optionDecoderFromDefaultDeriver[V <: ValueVector, A](implicit 266 | factory: Factory[Option[A]], 267 | schema: Schema[Option[A]] 268 | ): ValueVectorDecoder[V, Option[A]] = 269 | optionDecoderFromDeriver(ValueVectorDecoderDeriver.default[V]) 270 | 271 | } 272 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueVectorDecoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector.ValueVector 4 | import org.apache.arrow.vector.complex.ListVector 5 | import org.apache.arrow.vector.complex.reader.FieldReader 6 | import zio.schema.{ Deriver, DynamicValue, Schema, StandardType } 7 | import zio.{ Chunk, ChunkBuilder } 8 | 9 | object ValueVectorDecoderDeriver { 10 | 11 | def default[V1 <: ValueVector]: Deriver[ValueVectorDecoder[V1, *]] = new Deriver[ValueVectorDecoder[V1, *]] { 12 | 13 | override def deriveRecord[A]( 14 | record: Schema.Record[A], 15 | fields: => Chunk[Deriver.WrappedF[ValueVectorDecoder[V1, *], ?]], 16 | summoned: => Option[ValueVectorDecoder[V1, A]] 17 | ): ValueVectorDecoder[V1, A] = new ValueVectorDecoder[V1, A] { 18 | 19 | private val decoders = fields.map(_.unwrap) 20 | 21 | override def decodeUnsafe(vec: V1): Chunk[A] = { 22 | var idx = 0 23 | val len = vec.getValueCount 24 | val builder = ChunkBuilder.make[A](len) 25 | val reader = vec.getReader 26 | 27 | while (idx < len) { 28 | reader.setPosition(idx) 29 | val dynamicValue = ValueDecoder.decodeStruct(record.fields, decoders, reader, vec, idx) 30 | 31 | dynamicValue.toTypedValue(record) match { 32 | case Right(v) => 33 | builder.addOne(v) 34 | case Left(message) => 35 | throw DecoderError(message) 36 | } 37 | 38 | idx += 1 39 | } 40 | 41 | builder.result() 42 | } 43 | 44 | override def decodeNullableUnsafe(vec: V1): Chunk[Option[A]] = { 45 | var idx = 0 46 | val len = vec.getValueCount 47 | val builder = ChunkBuilder.make[Option[A]](len) 48 | val reader = vec.getReader 49 | 50 | while (idx < len) { 51 | if (!vec.isNull(idx)) { 52 | reader.setPosition(idx) 53 | val dynamicValue = ValueDecoder.decodeStruct(record.fields, decoders, reader, vec, idx) 54 | 55 | dynamicValue.toTypedValue(record) match { 56 | case Right(v) => 57 | builder.addOne(Some(v)) 58 | case Left(message) => 59 | throw DecoderError(message) 60 | } 61 | } else { 62 | builder.addOne(None) 63 | } 64 | 65 | idx += 1 66 | } 67 | 68 | builder.result() 69 | } 70 | 71 | override def decodeValue[V0 <: ValueVector]( 72 | name: Option[String], 73 | reader: FieldReader, 74 | vec: V0, 75 | idx: Int 76 | ): DynamicValue = 77 | ValueDecoder.decodeStruct(record.fields, decoders, resolveReaderByName(name, reader), vec, idx) 78 | 79 | } 80 | 81 | override def deriveEnum[A]( 82 | `enum`: Schema.Enum[A], 83 | cases: => Chunk[Deriver.WrappedF[ValueVectorDecoder[V1, *], ?]], 84 | summoned: => Option[ValueVectorDecoder[V1, A]] 85 | ): ValueVectorDecoder[V1, A] = ??? 86 | 87 | override def derivePrimitive[A]( 88 | st: StandardType[A], 89 | summoned: => Option[ValueVectorDecoder[V1, A]] 90 | ): ValueVectorDecoder[V1, A] = 91 | ValueVectorDecoder.primitive[V1, A](ValueDecoder.decodePrimitive)(st) 92 | 93 | override def deriveOption[A]( 94 | option: Schema.Optional[A], 95 | inner: => ValueVectorDecoder[V1, A], 96 | summoned: => Option[ValueVectorDecoder[V1, Option[A]]] 97 | ): ValueVectorDecoder[V1, Option[A]] = new ValueVectorDecoder[V1, Option[A]] { 98 | 99 | // TODO: figure out the proper implementation 100 | override def decodeNullableUnsafe(vec: V1): Chunk[Option[Option[A]]] = 101 | inner.decodeNullableUnsafe(vec).map(Some(_)) 102 | 103 | override def decodeUnsafe(vec: V1): Chunk[Option[A]] = 104 | inner.decodeNullableUnsafe(vec) 105 | 106 | override def decodeValue[V0 <: ValueVector]( 107 | name: Option[String], 108 | reader: FieldReader, 109 | vec: V0, 110 | idx: Int 111 | ): DynamicValue = 112 | if (vec.isNull(idx)) 113 | DynamicValue.NoneValue 114 | else 115 | DynamicValue.SomeValue(inner.decodeValue(name, reader, vec, idx)) 116 | 117 | } 118 | 119 | override def deriveSequence[C[_], A]( 120 | sequence: Schema.Sequence[C[A], A, ?], 121 | inner: => ValueVectorDecoder[V1, A], 122 | summoned: => Option[ValueVectorDecoder[V1, C[A]]] 123 | ): ValueVectorDecoder[V1, C[A]] = new ValueVectorDecoder[V1, C[A]] { 124 | 125 | override def decodeUnsafe(vec: V1): Chunk[C[A]] = { 126 | var idx = 0 127 | val len = vec.getValueCount 128 | val builder = ChunkBuilder.make[C[A]](len) 129 | val reader = vec.getReader 130 | val innerVec = vec.asInstanceOf[ListVector].getDataVector() 131 | 132 | while (idx < len) { 133 | reader.setPosition(idx) 134 | val dynamicValue = ValueDecoder.decodeList(inner, reader, innerVec, idx) 135 | 136 | dynamicValue.toTypedValue(sequence) match { 137 | case Right(v) => 138 | builder.addOne(v) 139 | case Left(message) => 140 | throw DecoderError(message) 141 | } 142 | 143 | idx += 1 144 | } 145 | 146 | builder.result() 147 | } 148 | 149 | override def decodeNullableUnsafe(vec: V1): Chunk[Option[C[A]]] = { 150 | var idx = 0 151 | val len = vec.getValueCount 152 | val builder = ChunkBuilder.make[Option[C[A]]](len) 153 | val reader = vec.getReader 154 | val innerVec = vec.asInstanceOf[ListVector].getDataVector() 155 | 156 | while (idx < len) { 157 | if (!vec.isNull(idx)) { 158 | reader.setPosition(idx) 159 | val dynamicValue = ValueDecoder.decodeList(inner, reader, innerVec, idx) 160 | 161 | dynamicValue.toTypedValue(sequence) match { 162 | case Right(v) => 163 | builder.addOne(Some(v)) 164 | case Left(message) => 165 | throw DecoderError(message) 166 | } 167 | } else { 168 | builder.addOne(None) 169 | } 170 | 171 | idx += 1 172 | } 173 | 174 | builder.result() 175 | } 176 | 177 | override def decodeValue[V0 <: ValueVector]( 178 | name: Option[String], 179 | reader: FieldReader, 180 | vec: V0, 181 | idx: Int 182 | ): DynamicValue = 183 | ValueDecoder.decodeList(inner, resolveReaderByName(name, reader), vec, idx) 184 | 185 | } 186 | 187 | override def deriveMap[K, V]( 188 | map: Schema.Map[K, V], 189 | key: => ValueVectorDecoder[V1, K], 190 | value: => ValueVectorDecoder[V1, V], 191 | summoned: => Option[ValueVectorDecoder[V1, Map[K, V]]] 192 | ): ValueVectorDecoder[V1, Map[K, V]] = ??? 193 | 194 | override def deriveTransformedRecord[A, B]( 195 | record: Schema.Record[A], 196 | transform: Schema.Transform[A, B, ?], 197 | fields: => Chunk[Deriver.WrappedF[ValueVectorDecoder[V1, *], ?]], 198 | summoned: => Option[ValueVectorDecoder[V1, B]] 199 | ): ValueVectorDecoder[V1, B] = ??? 200 | 201 | }.cached 202 | 203 | def summoned[V1 <: ValueVector]: Deriver[ValueVectorDecoder[V1, *]] = 204 | default.autoAcceptSummoned 205 | 206 | } 207 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueVectorEncoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.BufferAllocator 4 | import org.apache.arrow.vector._ 5 | import org.apache.arrow.vector.complex.writer.FieldWriter 6 | import org.apache.arrow.vector.complex.{ ListVector, StructVector } 7 | import zio._ 8 | import zio.schema.{ Derive, Deriver, Factory, Schema, StandardType } 9 | 10 | import scala.annotation.nowarn 11 | import scala.util.control.NonFatal 12 | 13 | trait ValueVectorEncoder[V <: ValueVector, -A] extends ValueEncoder[A] { self => 14 | 15 | final def encodeZIO(chunk: Chunk[A]): RIO[Scope & BufferAllocator, V] = 16 | ZIO.fromAutoCloseable( 17 | ZIO.serviceWithZIO[BufferAllocator] { implicit alloc => 18 | ZIO.fromEither(encode(chunk)) 19 | } 20 | ) 21 | 22 | final def encode(chunk: Chunk[A])(implicit alloc: BufferAllocator): Either[Throwable, V] = 23 | try 24 | Right(encodeUnsafe(chunk.map(Some(_)), nullable = false)) 25 | catch { 26 | case encoderError: EncoderError => Left(encoderError) 27 | case NonFatal(ex) => Left(EncoderError("Error encoding vector", Some(ex))) 28 | 29 | } 30 | 31 | def encodeUnsafe(chunk: Chunk[Option[A]], nullable: Boolean)(implicit alloc: BufferAllocator): V 32 | 33 | final def contramap[B](f: B => A): ValueVectorEncoder[V, B] = 34 | new ValueVectorEncoder[V, B] { 35 | 36 | override def encodeUnsafe(chunk: Chunk[Option[B]], nullable: Boolean)(implicit alloc: BufferAllocator): V = 37 | self.encodeUnsafe(chunk.map(_.map(f)), nullable) 38 | 39 | override def encodeValue(value: B, name: Option[String], writer: FieldWriter)(implicit 40 | alloc: BufferAllocator 41 | ): Unit = 42 | self.encodeValue(f(value), name, writer) 43 | 44 | } 45 | 46 | } 47 | 48 | object ValueVectorEncoder { 49 | 50 | def primitive[V <: ValueVector, A]( 51 | allocateVec: BufferAllocator => V, 52 | getWriter: V => FieldWriter, 53 | encodeTopLevel: (A, FieldWriter, BufferAllocator) => Unit, 54 | encodeNested: (A, Option[String], FieldWriter, BufferAllocator) => Unit 55 | )(implicit @nowarn ev: StandardType[A]): ValueVectorEncoder[V, A] = 56 | new ValueVectorEncoder[V, A] { 57 | 58 | override def encodeUnsafe(chunk: Chunk[Option[A]], nullable: Boolean)(implicit alloc: BufferAllocator): V = { 59 | val vec = allocateVec(alloc) 60 | val writer: FieldWriter = getWriter(vec) 61 | val len = chunk.length 62 | val it = chunk.iterator.zipWithIndex 63 | 64 | it.foreach { case (v, i) => 65 | writer.setPosition(i) 66 | 67 | if (nullable && v.isEmpty) 68 | writer.writeNull() 69 | else 70 | encodeTopLevel(v.get, writer, alloc) 71 | } 72 | 73 | vec.setValueCount(len) 74 | vec.asInstanceOf[V] 75 | } 76 | 77 | override def encodeValue(value: A, name: Option[String], writer: FieldWriter)(implicit 78 | alloc: BufferAllocator 79 | ): Unit = 80 | encodeNested(value, name, writer, alloc) 81 | 82 | } 83 | 84 | implicit def encoder[V <: ValueVector, A: Schema](deriver: Deriver[ValueVectorEncoder[V, *]])(implicit 85 | factory: Factory[A] 86 | ): ValueVectorEncoder[V, A] = 87 | factory.derive(deriver) 88 | 89 | implicit val stringEncoder: ValueVectorEncoder[VarCharVector, String] = 90 | encoder[VarCharVector, String](ValueVectorEncoderDeriver.default) 91 | implicit val boolEncoder: ValueVectorEncoder[BitVector, Boolean] = 92 | encoder[BitVector, Boolean](ValueVectorEncoderDeriver.default) 93 | implicit val byteEncoder: ValueVectorEncoder[UInt1Vector, Byte] = 94 | encoder[UInt1Vector, Byte](ValueVectorEncoderDeriver.default) 95 | implicit val shortEncoder: ValueVectorEncoder[SmallIntVector, Short] = 96 | encoder[SmallIntVector, Short](ValueVectorEncoderDeriver.default) 97 | implicit val intEncoder: ValueVectorEncoder[IntVector, Int] = 98 | encoder[IntVector, Int](ValueVectorEncoderDeriver.default) 99 | implicit val longEncoder: ValueVectorEncoder[BigIntVector, Long] = 100 | encoder[BigIntVector, Long](ValueVectorEncoderDeriver.default) 101 | implicit val floatEncoder: ValueVectorEncoder[Float4Vector, Float] = 102 | encoder[Float4Vector, Float](ValueVectorEncoderDeriver.default) 103 | implicit val doubleEncoder: ValueVectorEncoder[Float8Vector, Double] = 104 | encoder[Float8Vector, Double](ValueVectorEncoderDeriver.default) 105 | implicit val binaryEncoder: ValueVectorEncoder[LargeVarBinaryVector, Chunk[Byte]] = 106 | encoder[LargeVarBinaryVector, Chunk[Byte]](ValueVectorEncoderDeriver.default) 107 | implicit val charEncoder: ValueVectorEncoder[UInt2Vector, Char] = 108 | encoder[UInt2Vector, Char](ValueVectorEncoderDeriver.default) 109 | implicit val uuidEncoder: ValueVectorEncoder[VarBinaryVector, java.util.UUID] = 110 | encoder[VarBinaryVector, java.util.UUID](ValueVectorEncoderDeriver.default) 111 | implicit val bigDecimalEncoder: ValueVectorEncoder[DecimalVector, java.math.BigDecimal] = 112 | encoder[DecimalVector, java.math.BigDecimal](ValueVectorEncoderDeriver.default) 113 | implicit val bigIntegerEncoder: ValueVectorEncoder[VarBinaryVector, java.math.BigInteger] = 114 | encoder[VarBinaryVector, java.math.BigInteger](ValueVectorEncoderDeriver.default) 115 | implicit val dayOfWeekEncoder: ValueVectorEncoder[IntVector, java.time.DayOfWeek] = 116 | encoder[IntVector, java.time.DayOfWeek](ValueVectorEncoderDeriver.default) 117 | implicit val monthEncoder: ValueVectorEncoder[IntVector, java.time.Month] = 118 | encoder[IntVector, java.time.Month](ValueVectorEncoderDeriver.default) 119 | implicit val monthDayEncoder: ValueVectorEncoder[BigIntVector, java.time.MonthDay] = 120 | encoder[BigIntVector, java.time.MonthDay](ValueVectorEncoderDeriver.default) 121 | implicit val periodEncoder: ValueVectorEncoder[VarBinaryVector, java.time.Period] = 122 | encoder[VarBinaryVector, java.time.Period](ValueVectorEncoderDeriver.default) 123 | implicit val yearEncoder: ValueVectorEncoder[IntVector, java.time.Year] = 124 | encoder[IntVector, java.time.Year](ValueVectorEncoderDeriver.default) 125 | implicit val yearMonthEncoder: ValueVectorEncoder[BigIntVector, java.time.YearMonth] = 126 | encoder[BigIntVector, java.time.YearMonth](ValueVectorEncoderDeriver.default) 127 | implicit val zoneIdEncoder: ValueVectorEncoder[VarCharVector, java.time.ZoneId] = 128 | encoder[VarCharVector, java.time.ZoneId](ValueVectorEncoderDeriver.default) 129 | implicit val zoneOffsetEncoder: ValueVectorEncoder[VarCharVector, java.time.ZoneOffset] = 130 | encoder[VarCharVector, java.time.ZoneOffset](ValueVectorEncoderDeriver.default) 131 | implicit val durationEncoder: ValueVectorEncoder[BigIntVector, java.time.Duration] = 132 | encoder[BigIntVector, java.time.Duration](ValueVectorEncoderDeriver.default) 133 | implicit val instantEncoder: ValueVectorEncoder[BigIntVector, java.time.Instant] = 134 | encoder[BigIntVector, java.time.Instant](ValueVectorEncoderDeriver.default) 135 | implicit val localDateEncoder: ValueVectorEncoder[VarCharVector, java.time.LocalDate] = 136 | encoder[VarCharVector, java.time.LocalDate](ValueVectorEncoderDeriver.default) 137 | implicit val localTimeEncoder: ValueVectorEncoder[VarCharVector, java.time.LocalTime] = 138 | encoder[VarCharVector, java.time.LocalTime](ValueVectorEncoderDeriver.default) 139 | implicit val localDateTimeEncoder: ValueVectorEncoder[VarCharVector, java.time.LocalDateTime] = 140 | encoder[VarCharVector, java.time.LocalDateTime](ValueVectorEncoderDeriver.default) 141 | implicit val offsetTimeEncoder: ValueVectorEncoder[VarCharVector, java.time.OffsetTime] = 142 | encoder[VarCharVector, java.time.OffsetTime](ValueVectorEncoderDeriver.default) 143 | implicit val offsetDateTimeEncoder: ValueVectorEncoder[VarCharVector, java.time.OffsetDateTime] = 144 | encoder[VarCharVector, java.time.OffsetDateTime](ValueVectorEncoderDeriver.default) 145 | implicit val zonedDateTimeEncoder: ValueVectorEncoder[VarCharVector, java.time.ZonedDateTime] = 146 | encoder[VarCharVector, java.time.ZonedDateTime](ValueVectorEncoderDeriver.default) 147 | 148 | implicit def listEncoder[A, C[_]](implicit 149 | factory: Factory[C[A]], 150 | schema: Schema[C[A]] 151 | ): ValueVectorEncoder[ListVector, C[A]] = 152 | listEncoderFromDefaultDeriver[A, C] 153 | 154 | implicit def listChunkEncoder[A](implicit 155 | factory: Factory[Chunk[A]], 156 | schema: Schema[Chunk[A]] 157 | ): ValueVectorEncoder[ListVector, Chunk[A]] = 158 | listEncoder[A, Chunk] 159 | 160 | implicit def listOptionEncoder[A, C[_]](implicit 161 | factory: Factory[C[Option[A]]], 162 | schema: Schema[C[Option[A]]] 163 | ): ValueVectorEncoder[ListVector, C[Option[A]]] = 164 | listEncoder[Option[A], C] 165 | 166 | implicit def listChunkOptionEncoder[A](implicit 167 | factory: Factory[Chunk[Option[A]]], 168 | schema: Schema[Chunk[Option[A]]] 169 | ): ValueVectorEncoder[ListVector, Chunk[Option[A]]] = 170 | listChunkEncoder[Option[A]] 171 | 172 | def listEncoderFromDeriver[A, C[_]]( 173 | deriver: Deriver[ValueVectorEncoder[ListVector, *]] 174 | )(implicit factory: Factory[C[A]], schema: Schema[C[A]]): ValueVectorEncoder[ListVector, C[A]] = 175 | factory.derive[ValueVectorEncoder[ListVector, *]](deriver) 176 | 177 | def listEncoderFromDefaultDeriver[A, C[_]](implicit 178 | factory: Factory[C[A]], 179 | schema: Schema[C[A]] 180 | ): ValueVectorEncoder[ListVector, C[A]] = 181 | listEncoderFromDeriver[A, C](ValueVectorEncoderDeriver.default[ListVector]) 182 | 183 | def listEncoderFromSummonedDeriver[A, C[_]](implicit 184 | factory: Factory[C[A]], 185 | schema: Schema[C[A]] 186 | ): ValueVectorEncoder[ListVector, C[A]] = 187 | listEncoderFromDeriver[A, C](ValueVectorEncoderDeriver.summoned[ListVector]) 188 | 189 | implicit def structEncoder[A](implicit 190 | factory: Factory[A], 191 | schema: Schema[A] 192 | ): ValueVectorEncoder[StructVector, A] = 193 | structEncoderFromDefaultDeriver[A] 194 | 195 | def structEncoderFromDeriver[A]( 196 | deriver: Deriver[ValueVectorEncoder[StructVector, *]] 197 | )(implicit factory: Factory[A], schema: Schema[A]): ValueVectorEncoder[StructVector, A] = 198 | factory.derive[ValueVectorEncoder[StructVector, *]](deriver) 199 | 200 | def structEncoderFromDefaultDeriver[A](implicit 201 | factory: Factory[A], 202 | schema: Schema[A] 203 | ): ValueVectorEncoder[StructVector, A] = 204 | structEncoderFromDeriver[A](ValueVectorEncoderDeriver.default[StructVector]) 205 | 206 | implicit def optionEncoder[V <: ValueVector, A](implicit 207 | factory: Factory[Option[A]], 208 | schema: Schema[Option[A]] 209 | ): ValueVectorEncoder[V, Option[A]] = 210 | optionEncoderFromDefaultDeriver[V, A] 211 | 212 | implicit def optionListEncoder[A, C[_]](implicit 213 | factory: Factory[Option[C[A]]], 214 | schema: Schema[Option[C[A]]] 215 | ): ValueVectorEncoder[ListVector, Option[C[A]]] = 216 | optionEncoder[ListVector, C[A]] 217 | 218 | implicit def optionListChunkEncoder[A](implicit 219 | factory: Factory[Option[Chunk[A]]], 220 | schema: Schema[Option[Chunk[A]]] 221 | ): ValueVectorEncoder[ListVector, Option[Chunk[A]]] = 222 | optionEncoder[ListVector, Chunk[A]] 223 | 224 | def optionEncoderFromDeriver[V <: ValueVector, A](deriver: Deriver[ValueVectorEncoder[V, *]])(implicit 225 | factory: Factory[Option[A]], 226 | schema: Schema[Option[A]] 227 | ): ValueVectorEncoder[V, Option[A]] = 228 | factory.derive[ValueVectorEncoder[V, *]](deriver) 229 | 230 | def optionEncoderFromDefaultDeriver[V <: ValueVector, A](implicit 231 | factory: Factory[Option[A]], 232 | schema: Schema[Option[A]] 233 | ): ValueVectorEncoder[V, Option[A]] = 234 | optionEncoderFromDeriver(ValueVectorEncoderDeriver.default[V]) 235 | 236 | } 237 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/ValueVectorEncoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.BufferAllocator 4 | import org.apache.arrow.vector.complex.impl.{ PromotableWriter, UnionListWriter } 5 | import org.apache.arrow.vector.complex.writer.FieldWriter 6 | import org.apache.arrow.vector.complex.{ ListVector, StructVector } 7 | import org.apache.arrow.vector.{ ValueVector, _ } 8 | import zio.Chunk 9 | import zio.schema.{ Deriver, Schema, StandardType } 10 | 11 | object ValueVectorEncoderDeriver { 12 | 13 | def default[V1 <: ValueVector]: Deriver[ValueVectorEncoder[V1, *]] = new Deriver[ValueVectorEncoder[V1, *]] { 14 | 15 | override def deriveRecord[A]( 16 | record: Schema.Record[A], 17 | fields: => Chunk[Deriver.WrappedF[ValueVectorEncoder[V1, *], ?]], 18 | summoned: => Option[ValueVectorEncoder[V1, A]] 19 | ): ValueVectorEncoder[V1, A] = new ValueVectorEncoder[V1, A] { 20 | 21 | private val encoders = fields.map(_.unwrap) 22 | 23 | override def encodeUnsafe(chunk: Chunk[Option[A]], nullable: Boolean)(implicit alloc: BufferAllocator): V1 = { 24 | val vec = StructVector.empty("structVector", alloc) 25 | val writer = vec.getWriter 26 | val len = chunk.length 27 | val it = chunk.iterator.zipWithIndex 28 | 29 | it.foreach { case (v, i) => 30 | writer.setPosition(i) 31 | 32 | if (nullable && v.isEmpty) 33 | writer.writeNull() 34 | else 35 | ValueEncoder.encodeStruct(v.get, record.fields, encoders, writer) 36 | 37 | vec.setIndexDefined(i) 38 | } 39 | writer.setValueCount(len) 40 | 41 | vec.asInstanceOf[V1] 42 | } 43 | 44 | override def encodeValue(value: A, name: Option[String], writer: FieldWriter)(implicit 45 | alloc: BufferAllocator 46 | ): Unit = { 47 | val writer0 = name.fold[FieldWriter](writer.struct().asInstanceOf[UnionListWriter])( 48 | writer.struct(_).asInstanceOf[PromotableWriter] 49 | ) 50 | 51 | ValueEncoder.encodeStruct(value, record.fields, encoders, writer0) 52 | 53 | } 54 | 55 | } 56 | 57 | override def deriveEnum[A]( 58 | `enum`: Schema.Enum[A], 59 | cases: => Chunk[Deriver.WrappedF[ValueVectorEncoder[V1, *], ?]], 60 | summoned: => Option[ValueVectorEncoder[V1, A]] 61 | ): ValueVectorEncoder[V1, A] = ??? 62 | 63 | override def derivePrimitive[A]( 64 | st: StandardType[A], 65 | summoned: => Option[ValueVectorEncoder[V1, A]] 66 | ): ValueVectorEncoder[V1, A] = 67 | ValueVectorEncoder.primitive[V1, A]( 68 | allocateVec = { alloc => 69 | val vec = st match { 70 | case StandardType.StringType => 71 | new VarCharVector("stringVector", alloc) 72 | case StandardType.BoolType => 73 | new BitVector("boolVector", alloc) 74 | case StandardType.ByteType => 75 | new UInt1Vector("byteVector", alloc) 76 | case StandardType.ShortType => 77 | new SmallIntVector("shortVector", alloc) 78 | case StandardType.IntType => 79 | new IntVector("intVector", alloc) 80 | case StandardType.LongType => 81 | new BigIntVector("longVector", alloc) 82 | case StandardType.FloatType => 83 | new Float4Vector("floatVector", alloc) 84 | case StandardType.DoubleType => 85 | new Float8Vector("doubleVector", alloc) 86 | case StandardType.BinaryType => 87 | new LargeVarBinaryVector("binaryVector", alloc) 88 | case StandardType.CharType => 89 | new UInt2Vector("charVector", alloc) 90 | case StandardType.UUIDType => 91 | new VarBinaryVector("uuidVector", alloc) 92 | case StandardType.BigDecimalType => 93 | new DecimalVector("bigDecimalVector", alloc, 11, 2) 94 | case StandardType.BigIntegerType => 95 | new VarBinaryVector("bigIntVector", alloc) 96 | case StandardType.DayOfWeekType => 97 | new IntVector("dayOfWeekVector", alloc) 98 | case StandardType.MonthType => 99 | new IntVector("monthVector", alloc) 100 | case StandardType.MonthDayType => 101 | new BigIntVector("monthDayVector", alloc) 102 | case StandardType.PeriodType => 103 | new VarBinaryVector("periodVector", alloc) 104 | case StandardType.YearType => 105 | new IntVector("yearVector", alloc) 106 | case StandardType.YearMonthType => 107 | new BigIntVector("yearMonthVector", alloc) 108 | case StandardType.ZoneIdType => 109 | new VarCharVector("zoneIdVector", alloc) 110 | case StandardType.ZoneOffsetType => 111 | new VarCharVector("zoneOffsetVector", alloc) 112 | case StandardType.DurationType => 113 | new BigIntVector("durationVector", alloc) 114 | case StandardType.InstantType => 115 | new BigIntVector("instantVector", alloc) 116 | case StandardType.LocalDateType => 117 | new VarCharVector("localDateVector", alloc) 118 | case StandardType.LocalTimeType => 119 | new VarCharVector("localTimeVector", alloc) 120 | case StandardType.LocalDateTimeType => 121 | new VarCharVector("localDateTimeVector", alloc) 122 | case StandardType.OffsetTimeType => 123 | new VarCharVector("offsetTimeVector", alloc) 124 | case StandardType.OffsetDateTimeType => 125 | new VarCharVector("offsetDateTimeVector", alloc) 126 | case StandardType.ZonedDateTimeType => 127 | new VarCharVector("zoneDateTimeVector", alloc) 128 | case other => 129 | throw EncoderError(s"Unsupported ZIO Schema StandardType $other") 130 | } 131 | vec.asInstanceOf[V1] 132 | }, 133 | getWriter = vec => primitiveWriter(st, vec.asInstanceOf[FieldVector]), 134 | encodeTopLevel = (v, writer, alloc) => ValueEncoder.encodePrimitive(st, v, writer)(alloc), 135 | encodeNested = (v, name, writer, alloc) => ValueEncoder.encodePrimitive(st, v, name, writer)(alloc) 136 | )(st) 137 | 138 | override def deriveOption[A]( 139 | option: Schema.Optional[A], 140 | inner: => ValueVectorEncoder[V1, A], 141 | summoned: => Option[ValueVectorEncoder[V1, Option[A]]] 142 | ): ValueVectorEncoder[V1, Option[A]] = new ValueVectorEncoder[V1, Option[A]] { 143 | 144 | override def encodeUnsafe(chunk: Chunk[Option[Option[A]]], nullable: Boolean)(implicit 145 | alloc: BufferAllocator 146 | ): V1 = 147 | inner.encodeUnsafe(chunk.map(_.get), nullable = true) 148 | 149 | override def encodeValue(value: Option[A], name: Option[String], writer: FieldWriter)(implicit 150 | alloc: BufferAllocator 151 | ): Unit = 152 | value match { 153 | case Some(value0) => 154 | inner.encodeValue(value0, name, writer) 155 | case None => 156 | writer.writeNull() 157 | } 158 | 159 | } 160 | 161 | override def deriveSequence[C[_], A]( 162 | sequence: Schema.Sequence[C[A], A, ?], 163 | inner: => ValueVectorEncoder[V1, A], 164 | summoned: => Option[ValueVectorEncoder[V1, C[A]]] 165 | ): ValueVectorEncoder[V1, C[A]] = 166 | new ValueVectorEncoder[V1, C[A]] { 167 | 168 | override def encodeUnsafe(chunk: Chunk[Option[C[A]]], nullable: Boolean)(implicit 169 | alloc: BufferAllocator 170 | ): V1 = { 171 | val vec = ListVector.empty("listVector", alloc) 172 | val writer = vec.getWriter 173 | val len = chunk.length 174 | val it = chunk.iterator.zipWithIndex 175 | 176 | it.foreach { case (vs, i) => 177 | writer.setPosition(i) 178 | 179 | if (nullable && vs.isEmpty) 180 | writer.writeNull() 181 | else 182 | ValueEncoder.encodeList(sequence.toChunk(vs.get), inner, writer) 183 | } 184 | 185 | vec.setValueCount(len) 186 | vec.asInstanceOf[V1] 187 | } 188 | 189 | override def encodeValue(value: C[A], name: Option[String], writer: FieldWriter)(implicit 190 | alloc: BufferAllocator 191 | ): Unit = { 192 | val writer0 = name.fold(writer.list)(writer.list).asInstanceOf[PromotableWriter] 193 | 194 | ValueEncoder.encodeList(sequence.toChunk(value), inner, writer0) 195 | } 196 | 197 | } 198 | 199 | override def deriveMap[K, V]( 200 | map: Schema.Map[K, V], 201 | key: => ValueVectorEncoder[V1, K], 202 | value: => ValueVectorEncoder[V1, V], 203 | summoned: => Option[ValueVectorEncoder[V1, Map[K, V]]] 204 | ): ValueVectorEncoder[V1, Map[K, V]] = ??? 205 | 206 | override def deriveTransformedRecord[A, B]( 207 | record: Schema.Record[A], 208 | transform: Schema.Transform[A, B, ?], 209 | fields: => Chunk[Deriver.WrappedF[ValueVectorEncoder[V1, *], ?]], 210 | summoned: => Option[ValueVectorEncoder[V1, B]] 211 | ): ValueVectorEncoder[V1, B] = ??? 212 | 213 | }.cached 214 | 215 | def summoned[V1 <: ValueVector]: Deriver[ValueVectorEncoder[V1, *]] = 216 | default.autoAcceptSummoned 217 | 218 | } 219 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/VectorSchemaRootCodec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.BufferAllocator 4 | import org.apache.arrow.vector.VectorSchemaRoot 5 | import zio._ 6 | import zio.schema.Schema 7 | 8 | final case class VectorSchemaRootCodec[A]( 9 | encoder: VectorSchemaRootEncoder[A], 10 | decoder: VectorSchemaRootDecoder[A] 11 | ) { self => 12 | 13 | def decodeZIO(root: VectorSchemaRoot): Task[Chunk[A]] = 14 | decoder.decodeZIO(root) 15 | 16 | def decode(root: VectorSchemaRoot): Either[Throwable, Chunk[A]] = 17 | decoder.decode(root) 18 | 19 | def encodeZIO(chunk: Chunk[A], root: VectorSchemaRoot): RIO[Scope & BufferAllocator, VectorSchemaRoot] = 20 | encoder.encodeZIO(chunk, root) 21 | 22 | def encode( 23 | chunk: Chunk[A], 24 | root: VectorSchemaRoot 25 | )(implicit alloc: BufferAllocator): Either[Throwable, VectorSchemaRoot] = 26 | encoder.encode(chunk, root) 27 | 28 | def transform[B](f: A => B, g: B => A)(implicit 29 | schemaSrc: Schema[A], 30 | schemaDst: Schema[B] 31 | ): VectorSchemaRootCodec[B] = 32 | VectorSchemaRootCodec(encoder.contramap(g), decoder.map(f)) 33 | 34 | } 35 | 36 | object VectorSchemaRootCodec { 37 | 38 | implicit def codec[A](implicit 39 | encoder: VectorSchemaRootEncoder[A], 40 | decoder: VectorSchemaRootDecoder[A] 41 | ): VectorSchemaRootCodec[A] = 42 | VectorSchemaRootCodec(encoder, decoder) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/VectorSchemaRootDecoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector.complex.reader.FieldReader 4 | import org.apache.arrow.vector.{ ValueVector, VectorSchemaRoot } 5 | import zio._ 6 | import zio.schema.{ Deriver, DynamicValue, Factory, Schema, StandardType } 7 | 8 | import scala.annotation.unused 9 | import scala.util.control.NonFatal 10 | 11 | trait VectorSchemaRootDecoder[A] extends ValueDecoder[A] { self => 12 | 13 | final def decodeZIO(root: VectorSchemaRoot): Task[Chunk[A]] = 14 | ZIO.fromEither(decode(root)) 15 | 16 | final def decode(root: VectorSchemaRoot): Either[Throwable, Chunk[A]] = 17 | try 18 | Right(decodeUnsafe(root)) 19 | catch { 20 | case decoderError: DecoderError => Left(decoderError) 21 | case NonFatal(ex) => Left(DecoderError("Error decoding vector schema root", Some(ex))) 22 | } 23 | 24 | protected def decodeUnsafe(@unused root: VectorSchemaRoot): Chunk[A] = 25 | throw DecoderError(s"Given ZIO schema must be of type Schema.Record[A]") 26 | 27 | def decodeField[V0 <: ValueVector](reader: FieldReader, vec: V0, idx: Int): DynamicValue 28 | 29 | final def map[B](f: A => B)(implicit schemaSrc: Schema[A], schemaDst: Schema[B]): VectorSchemaRootDecoder[B] = 30 | new VectorSchemaRootDecoder[B] { 31 | 32 | override protected def decodeUnsafe(root: VectorSchemaRoot): Chunk[B] = 33 | self.decodeUnsafe(root).map(f) 34 | 35 | override def decodeValue[V0 <: ValueVector]( 36 | name: Option[String], 37 | reader: FieldReader, 38 | vec: V0, 39 | idx: Int 40 | ): DynamicValue = 41 | self 42 | .decodeValue(name, reader, vec, idx) 43 | .toValue(schemaSrc) 44 | .map(a => schemaDst.toDynamic(f(a))) 45 | .toTry 46 | .get 47 | 48 | override def decodeField[V0 <: ValueVector](reader: FieldReader, vec: V0, idx: Int): DynamicValue = 49 | self 50 | .decodeField(reader, vec, idx) 51 | .toValue(schemaSrc) 52 | .map(a => schemaDst.toDynamic(f(a))) 53 | .toTry 54 | .get 55 | 56 | } 57 | 58 | } 59 | 60 | object VectorSchemaRootDecoder { 61 | 62 | def primitive[A]( 63 | decode0: (StandardType[A], FieldReader) => DynamicValue 64 | )(implicit st: StandardType[A]): VectorSchemaRootDecoder[A] = 65 | new VectorSchemaRootDecoder[A] { 66 | 67 | override def decodeValue[V0 <: ValueVector]( 68 | name: Option[String], 69 | reader: FieldReader, 70 | vec: V0, 71 | idx: Int 72 | ): DynamicValue = 73 | decode0(st, resolveReaderByName(name, reader)) 74 | 75 | override def decodeField[V0 <: ValueVector](reader: FieldReader, vec: V0, idx: Int): DynamicValue = 76 | decode0(st, reader) 77 | 78 | } 79 | 80 | implicit def decoder[A: Factory: Schema]: VectorSchemaRootDecoder[A] = 81 | fromDefaultDeriver[A] 82 | 83 | def fromDeriver[A: Factory: Schema](deriver: Deriver[VectorSchemaRootDecoder]): VectorSchemaRootDecoder[A] = 84 | implicitly[Factory[A]].derive[VectorSchemaRootDecoder](deriver) 85 | 86 | def fromDefaultDeriver[A: Factory: Schema]: VectorSchemaRootDecoder[A] = 87 | fromDeriver[A](VectorSchemaRootDecoderDeriver.default) 88 | 89 | def fromSummonedDeriver[A: Factory: Schema]: VectorSchemaRootDecoder[A] = 90 | fromDeriver[A](VectorSchemaRootDecoderDeriver.summoned) 91 | 92 | } 93 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/VectorSchemaRootDecoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.vector.complex.ListVector 4 | import org.apache.arrow.vector.complex.reader.FieldReader 5 | import org.apache.arrow.vector.{ ValueVector, VectorSchemaRoot } 6 | import zio.schema.{ Deriver, DynamicValue, Schema, StandardType, TypeId } 7 | import zio.{ Chunk, ChunkBuilder } 8 | 9 | import scala.collection.immutable.ListMap 10 | 11 | object VectorSchemaRootDecoderDeriver { 12 | 13 | val default: Deriver[VectorSchemaRootDecoder] = new Deriver[VectorSchemaRootDecoder] { 14 | 15 | override def deriveRecord[A]( 16 | record: Schema.Record[A], 17 | fields: => Chunk[Deriver.WrappedF[VectorSchemaRootDecoder, ?]], 18 | summoned: => Option[VectorSchemaRootDecoder[A]] 19 | ): VectorSchemaRootDecoder[A] = new VectorSchemaRootDecoder[A] { 20 | 21 | private val decoders = fields.map(_.unwrap) 22 | 23 | override protected def decodeUnsafe(root: VectorSchemaRoot): Chunk[A] = { 24 | val fields0 = record.fields.zip(decoders).map { case (field, decoder) => 25 | val vec = 26 | Option(root.getVector(field.name)) 27 | .getOrElse(throw DecoderError(s"Couldn't get vector by name ${field.name}")) 28 | val reader = vec.getReader 29 | 30 | (decoder, field.name.toString, reader, vec) 31 | } 32 | 33 | var idx = 0 34 | val len = root.getRowCount 35 | val builder = ChunkBuilder.make[A]() 36 | 37 | while (idx < len) { 38 | val values = ListMap(fields0.map { case (decoder, name, reader, vec) => 39 | reader.setPosition(idx) 40 | val value = decoder.decodeField(reader, vec, idx) 41 | 42 | name.toString -> value 43 | }*) 44 | 45 | DynamicValue.Record(TypeId.Structural, values).toTypedValue(record) match { 46 | case Right(v) => 47 | builder.addOne(v) 48 | idx += 1 49 | case Left(message) => 50 | throw DecoderError(message) 51 | } 52 | } 53 | 54 | builder.result() 55 | } 56 | 57 | override def decodeValue[V0 <: ValueVector]( 58 | name: Option[String], 59 | reader: FieldReader, 60 | vec: V0, 61 | idx: Int 62 | ): DynamicValue = 63 | ValueDecoder.decodeStruct(record.fields, decoders, resolveReaderByName(name, reader), vec, idx) 64 | 65 | override def decodeField[V0 <: ValueVector](reader: FieldReader, vec: V0, idx: Int): DynamicValue = 66 | ValueDecoder.decodeStruct(record.fields, decoders, reader, vec, idx) 67 | 68 | } 69 | 70 | override def deriveEnum[A]( 71 | `enum`: Schema.Enum[A], 72 | cases: => Chunk[Deriver.WrappedF[VectorSchemaRootDecoder, ?]], 73 | summoned: => Option[VectorSchemaRootDecoder[A]] 74 | ): VectorSchemaRootDecoder[A] = ??? 75 | 76 | override def derivePrimitive[A]( 77 | st: StandardType[A], 78 | summoned: => Option[VectorSchemaRootDecoder[A]] 79 | ): VectorSchemaRootDecoder[A] = 80 | VectorSchemaRootDecoder.primitive[A](ValueDecoder.decodePrimitive)(st) 81 | 82 | override def deriveOption[A]( 83 | option: Schema.Optional[A], 84 | inner: => VectorSchemaRootDecoder[A], 85 | summoned: => Option[VectorSchemaRootDecoder[Option[A]]] 86 | ): VectorSchemaRootDecoder[Option[A]] = new VectorSchemaRootDecoder[Option[A]] { 87 | 88 | override def decodeValue[V0 <: ValueVector]( 89 | name: Option[String], 90 | reader: FieldReader, 91 | vec: V0, 92 | idx: Int 93 | ): DynamicValue = 94 | if (vec.isNull(idx)) 95 | DynamicValue.NoneValue 96 | else 97 | DynamicValue.SomeValue(inner.decodeValue(name, reader, vec, idx)) 98 | 99 | override def decodeField[V0 <: ValueVector](reader: FieldReader, vec: V0, idx: Int): DynamicValue = 100 | if (vec.isNull(idx)) 101 | DynamicValue.NoneValue 102 | else 103 | DynamicValue.SomeValue(inner.decodeField(reader, vec, idx)) 104 | 105 | } 106 | 107 | override def deriveSequence[C[_], A]( 108 | sequence: Schema.Sequence[C[A], A, ?], 109 | inner: => VectorSchemaRootDecoder[A], 110 | summoned: => Option[VectorSchemaRootDecoder[C[A]]] 111 | ): VectorSchemaRootDecoder[C[A]] = new VectorSchemaRootDecoder[C[A]] { 112 | 113 | override def decodeValue[V0 <: ValueVector]( 114 | name: Option[String], 115 | reader: FieldReader, 116 | vec: V0, 117 | idx: Int 118 | ): DynamicValue = { 119 | val innerVec = vec.asInstanceOf[ListVector].getDataVector() 120 | 121 | ValueDecoder.decodeList(inner, resolveReaderByName(name, reader), innerVec, idx) 122 | } 123 | 124 | override def decodeField[V0 <: ValueVector](reader: FieldReader, vec: V0, idx: Int): DynamicValue = { 125 | val innerVec = vec.asInstanceOf[ListVector].getDataVector() 126 | 127 | ValueDecoder.decodeList(inner, reader, innerVec, idx) 128 | } 129 | 130 | } 131 | 132 | override def deriveMap[K, V]( 133 | map: Schema.Map[K, V], 134 | key: => VectorSchemaRootDecoder[K], 135 | value: => VectorSchemaRootDecoder[V], 136 | summoned: => Option[VectorSchemaRootDecoder[Map[K, V]]] 137 | ): VectorSchemaRootDecoder[Map[K, V]] = ??? 138 | 139 | override def deriveTransformedRecord[A, B]( 140 | record: Schema.Record[A], 141 | transform: Schema.Transform[A, B, ?], 142 | fields: => Chunk[Deriver.WrappedF[VectorSchemaRootDecoder, ?]], 143 | summoned: => Option[VectorSchemaRootDecoder[B]] 144 | ): VectorSchemaRootDecoder[B] = ??? 145 | 146 | }.cached 147 | 148 | def summoned: Deriver[VectorSchemaRootDecoder] = default.autoAcceptSummoned 149 | 150 | } 151 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/VectorSchemaRootEncoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.BufferAllocator 4 | import org.apache.arrow.vector.complex.writer.FieldWriter 5 | import org.apache.arrow.vector.{ FieldVector, VectorSchemaRoot } 6 | import zio._ 7 | import zio.schema.{ Deriver, Factory, Schema, StandardType } 8 | 9 | import scala.annotation.{ nowarn, unused } 10 | import scala.util.control.NonFatal 11 | 12 | trait VectorSchemaRootEncoder[-A] extends ValueEncoder[A] { self => 13 | 14 | final def encodeZIO(chunk: Chunk[A], root: VectorSchemaRoot): RIO[Scope & BufferAllocator, VectorSchemaRoot] = 15 | ZIO.fromAutoCloseable( 16 | ZIO.serviceWithZIO[BufferAllocator] { implicit alloc => 17 | ZIO.fromEither(encode(chunk, root)) 18 | } 19 | ) 20 | 21 | final def encode( 22 | chunk: Chunk[A], 23 | root: VectorSchemaRoot 24 | )(implicit alloc: BufferAllocator): Either[Throwable, VectorSchemaRoot] = 25 | try 26 | Right(encodeUnsafe(chunk, root)) 27 | catch { 28 | case encoderError: EncoderError => Left(encoderError) 29 | case NonFatal(ex) => Left(EncoderError("Error encoding vector schema root", Some(ex))) 30 | } 31 | 32 | protected def encodeUnsafe( 33 | @unused chunk: Chunk[A], 34 | @unused root: VectorSchemaRoot 35 | )(implicit @unused alloc: BufferAllocator): VectorSchemaRoot = 36 | throw EncoderError(s"Given ZIO schema must be of type Schema.Record[A]") 37 | 38 | def encodeField(value: A, writer: FieldWriter)(implicit alloc: BufferAllocator): Unit 39 | 40 | def getWriter(vec: FieldVector): FieldWriter 41 | 42 | final def contramap[B](f: B => A): VectorSchemaRootEncoder[B] = 43 | new VectorSchemaRootEncoder[B] { 44 | 45 | override protected def encodeUnsafe(chunk: Chunk[B], root: VectorSchemaRoot)(implicit 46 | alloc: BufferAllocator 47 | ): VectorSchemaRoot = 48 | self.encodeUnsafe(chunk.map(f), root) 49 | 50 | override def encodeValue(value: B, name: Option[String], writer: FieldWriter)(implicit 51 | alloc: BufferAllocator 52 | ): Unit = 53 | self.encodeValue(f(value), name, writer) 54 | 55 | override def encodeField(value: B, writer: FieldWriter)(implicit alloc: BufferAllocator): Unit = 56 | self.encodeField(f(value), writer) 57 | 58 | override def getWriter(vec: FieldVector): FieldWriter = 59 | self.getWriter(vec) 60 | 61 | } 62 | 63 | } 64 | 65 | object VectorSchemaRootEncoder { 66 | 67 | def primitive[A]( 68 | encodeValue0: (A, Option[String], FieldWriter, BufferAllocator) => Unit, 69 | encodeField0: (A, FieldWriter, BufferAllocator) => Unit, 70 | getWriter0: FieldVector => FieldWriter 71 | )(implicit @nowarn ev: StandardType[A]): VectorSchemaRootEncoder[A] = 72 | new VectorSchemaRootEncoder[A] { 73 | 74 | override def encodeValue(value: A, name: Option[String], writer: FieldWriter)(implicit 75 | alloc: BufferAllocator 76 | ): Unit = 77 | encodeValue0(value, name, writer, alloc) 78 | 79 | override def encodeField(value: A, writer: FieldWriter)(implicit alloc: BufferAllocator): Unit = 80 | encodeField0(value, writer, alloc) 81 | 82 | override def getWriter(vec: FieldVector): FieldWriter = 83 | getWriter0(vec) 84 | 85 | } 86 | 87 | implicit def encoder[A: Factory: Schema]: VectorSchemaRootEncoder[A] = 88 | fromDefaultDeriver[A] 89 | 90 | def fromDeriver[A: Factory: Schema](deriver: Deriver[VectorSchemaRootEncoder]): VectorSchemaRootEncoder[A] = 91 | implicitly[Factory[A]].derive[VectorSchemaRootEncoder](deriver) 92 | 93 | def fromDefaultDeriver[A: Factory: Schema]: VectorSchemaRootEncoder[A] = 94 | fromDeriver[A](VectorSchemaRootEncoderDeriver.default) 95 | 96 | def fromSummonedDeriver[A: Factory: Schema]: VectorSchemaRootEncoder[A] = 97 | fromDeriver[A](VectorSchemaRootEncoderDeriver.summoned) 98 | 99 | } 100 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/VectorSchemaRootEncoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import org.apache.arrow.memory.BufferAllocator 4 | import org.apache.arrow.vector.complex.impl.{ PromotableWriter, UnionListWriter } 5 | import org.apache.arrow.vector.complex.writer.FieldWriter 6 | import org.apache.arrow.vector.complex.{ ListVector, StructVector } 7 | import org.apache.arrow.vector.{ FieldVector, VectorSchemaRoot } 8 | import zio.Chunk 9 | import zio.schema.{ Deriver, Schema, StandardType } 10 | 11 | object VectorSchemaRootEncoderDeriver { 12 | 13 | val default: Deriver[VectorSchemaRootEncoder] = new Deriver[VectorSchemaRootEncoder] { 14 | 15 | override def deriveRecord[A]( 16 | record: Schema.Record[A], 17 | fields: => Chunk[Deriver.WrappedF[VectorSchemaRootEncoder, ?]], 18 | summoned: => Option[VectorSchemaRootEncoder[A]] 19 | ): VectorSchemaRootEncoder[A] = new VectorSchemaRootEncoder[A] { 20 | 21 | private val encoders = fields.map(_.unwrap) 22 | 23 | private def encodeField0[A1]( 24 | encoder: VectorSchemaRootEncoder[?], 25 | value: A1, 26 | writer: FieldWriter 27 | )(implicit 28 | alloc: BufferAllocator 29 | ) = 30 | encoder.asInstanceOf[VectorSchemaRootEncoder[A1]].encodeField(value, writer) 31 | 32 | override protected def encodeUnsafe( 33 | chunk: Chunk[A], 34 | root: VectorSchemaRoot 35 | )(implicit alloc: BufferAllocator): VectorSchemaRoot = { 36 | val fields0 = 37 | record.fields.zip(encoders).map { case (Schema.Field(name, _, _, _, g, _), encoder) => 38 | val vec = Option(root.getVector(name)) 39 | .getOrElse(throw EncoderError(s"Couldn't find vector by name $name")) 40 | 41 | vec.reset() 42 | 43 | // val writer = resolveWriter(fieldSchema, vec) 44 | val writer = encoder.getWriter(vec) 45 | 46 | (encoder, vec, writer, g) 47 | } 48 | 49 | val len = chunk.length 50 | val it = chunk.iterator.zipWithIndex 51 | 52 | it.foreach { case (v, i) => 53 | fields0.foreach { case (encoder, _, writer, get) => 54 | writer.setPosition(i) 55 | encodeField0(encoder, get(v), writer) 56 | } 57 | } 58 | 59 | fields0.foreach { case (_, vec, _, _) => 60 | vec.setValueCount(len) 61 | } 62 | 63 | root.setRowCount(len) 64 | root 65 | } 66 | 67 | def encodeValue( 68 | value: A, 69 | name: Option[String], 70 | writer: FieldWriter 71 | )(implicit alloc: BufferAllocator): Unit = { 72 | val writer0 = name.fold[FieldWriter](writer.struct().asInstanceOf[UnionListWriter])( 73 | writer.struct(_).asInstanceOf[PromotableWriter] 74 | ) 75 | 76 | ValueEncoder.encodeStruct(value, record.fields, encoders, writer0) 77 | } 78 | 79 | override def encodeField(value: A, writer: FieldWriter)(implicit alloc: BufferAllocator): Unit = 80 | ValueEncoder.encodeStruct(value, record.fields, encoders, writer) 81 | 82 | override def getWriter(vec: FieldVector): FieldWriter = 83 | vec.asInstanceOf[StructVector].getWriter 84 | 85 | } 86 | 87 | override def deriveEnum[A]( 88 | `enum`: Schema.Enum[A], 89 | cases: => Chunk[Deriver.WrappedF[VectorSchemaRootEncoder, ?]], 90 | summoned: => Option[VectorSchemaRootEncoder[A]] 91 | ): VectorSchemaRootEncoder[A] = ??? 92 | 93 | override def derivePrimitive[A]( 94 | st: StandardType[A], 95 | summoned: => Option[VectorSchemaRootEncoder[A]] 96 | ): VectorSchemaRootEncoder[A] = 97 | VectorSchemaRootEncoder.primitive[A]( 98 | encodeValue0 = (v, name, writer, alloc) => ValueEncoder.encodePrimitive(st, v, name, writer)(alloc), 99 | encodeField0 = (v, writer, alloc) => ValueEncoder.encodePrimitive(st, v, writer)(alloc), 100 | getWriter0 = vec => primitiveWriter(st, vec) 101 | )(st) 102 | 103 | override def deriveOption[A]( 104 | option: Schema.Optional[A], 105 | inner: => VectorSchemaRootEncoder[A], 106 | summoned: => Option[VectorSchemaRootEncoder[Option[A]]] 107 | ): VectorSchemaRootEncoder[Option[A]] = new VectorSchemaRootEncoder[Option[A]] { 108 | 109 | override def encodeValue(value: Option[A], name: Option[String], writer: FieldWriter)(implicit 110 | alloc: BufferAllocator 111 | ): Unit = 112 | value match { 113 | case Some(value0) => 114 | inner.encodeValue(value0, name, writer) 115 | case None => 116 | writer.writeNull() 117 | } 118 | 119 | override def encodeField(value: Option[A], writer: FieldWriter)(implicit alloc: BufferAllocator): Unit = 120 | value match { 121 | case Some(value0) => 122 | inner.encodeField(value0, writer) 123 | case None => 124 | writer.writeNull() 125 | } 126 | 127 | override def getWriter(vec: FieldVector): FieldWriter = 128 | inner.getWriter(vec) 129 | 130 | } 131 | 132 | override def deriveSequence[C[_], A]( 133 | sequence: Schema.Sequence[C[A], A, ?], 134 | inner: => VectorSchemaRootEncoder[A], 135 | summoned: => Option[VectorSchemaRootEncoder[C[A]]] 136 | ): VectorSchemaRootEncoder[C[A]] = new VectorSchemaRootEncoder[C[A]] { 137 | 138 | override def encodeValue(value: C[A], name: Option[String], writer: FieldWriter)(implicit 139 | alloc: BufferAllocator 140 | ): Unit = { 141 | val writer0 = name.fold(writer.list)(writer.list).asInstanceOf[PromotableWriter] 142 | 143 | ValueEncoder.encodeList(sequence.toChunk(value), inner, writer0) 144 | } 145 | 146 | override def encodeField(value: C[A], writer: FieldWriter)(implicit alloc: BufferAllocator): Unit = 147 | ValueEncoder.encodeList(sequence.toChunk(value), inner, writer) 148 | 149 | override def getWriter(vec: FieldVector): FieldWriter = 150 | vec.asInstanceOf[ListVector].getWriter 151 | 152 | } 153 | 154 | override def deriveMap[K, V]( 155 | map: Schema.Map[K, V], 156 | key: => VectorSchemaRootEncoder[K], 157 | value: => VectorSchemaRootEncoder[V], 158 | summoned: => Option[VectorSchemaRootEncoder[Map[K, V]]] 159 | ): VectorSchemaRootEncoder[Map[K, V]] = ??? 160 | 161 | override def deriveTransformedRecord[A, B]( 162 | record: Schema.Record[A], 163 | transform: Schema.Transform[A, B, ?], 164 | fields: => Chunk[Deriver.WrappedF[VectorSchemaRootEncoder, ?]], 165 | summoned: => Option[VectorSchemaRootEncoder[B]] 166 | ): VectorSchemaRootEncoder[B] = ??? 167 | 168 | }.cached 169 | 170 | def summoned: Deriver[VectorSchemaRootEncoder] = default.autoAcceptSummoned 171 | 172 | } 173 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/codec/package.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import org.apache.arrow.vector.complex.impl._ 4 | import org.apache.arrow.vector.complex.reader.FieldReader 5 | import org.apache.arrow.vector.complex.writer.FieldWriter 6 | import org.apache.arrow.vector.{ FieldVector, _ } 7 | import zio.schema.StandardType 8 | 9 | package object codec { 10 | 11 | def primitiveWriter(st: StandardType[?], vec: FieldVector): FieldWriter = 12 | (st, vec) match { 13 | case (StandardType.StringType, vec0: VarCharVector) => 14 | new VarCharWriterImpl(vec0) 15 | case (StandardType.BoolType, vec0: BitVector) => 16 | new BitWriterImpl(vec0) 17 | case (StandardType.ByteType, vec0: UInt1Vector) => 18 | new UInt1WriterImpl(vec0) 19 | case (StandardType.ShortType, vec0: SmallIntVector) => 20 | new SmallIntWriterImpl(vec0) 21 | case (StandardType.IntType, vec0: IntVector) => 22 | new IntWriterImpl(vec0) 23 | case (StandardType.LongType, vec0: BigIntVector) => 24 | new BigIntWriterImpl(vec0) 25 | case (StandardType.FloatType, vec0: Float4Vector) => 26 | new Float4WriterImpl(vec0) 27 | case (StandardType.DoubleType, vec0: Float8Vector) => 28 | new Float8WriterImpl(vec0) 29 | case (StandardType.BinaryType, vec0: LargeVarBinaryVector) => 30 | new LargeVarBinaryWriterImpl(vec0) 31 | case (StandardType.CharType, vec0: UInt2Vector) => 32 | new UInt2WriterImpl(vec0) 33 | case (StandardType.UUIDType, vec0: VarBinaryVector) => 34 | new VarBinaryWriterImpl(vec0) 35 | case (StandardType.BigDecimalType, vec0: DecimalVector) => 36 | new DecimalWriterImpl(vec0) 37 | case (StandardType.BigIntegerType, vec0: VarBinaryVector) => 38 | new VarBinaryWriterImpl(vec0) 39 | case (StandardType.DayOfWeekType, vec0: IntVector) => 40 | new IntWriterImpl(vec0) 41 | case (StandardType.MonthType, vec0: IntVector) => 42 | new IntWriterImpl(vec0) 43 | case (StandardType.MonthDayType, vec0: BigIntVector) => 44 | new BigIntWriterImpl(vec0) 45 | case (StandardType.PeriodType, vec0: VarBinaryVector) => 46 | new VarBinaryWriterImpl(vec0) 47 | case (StandardType.YearType, vec0: IntVector) => 48 | new IntWriterImpl(vec0) 49 | case (StandardType.YearMonthType, vec0: BigIntVector) => 50 | new BigIntWriterImpl(vec0) 51 | case (StandardType.ZoneIdType, vec0: VarCharVector) => 52 | new VarCharWriterImpl(vec0) 53 | case (StandardType.ZoneOffsetType, vec0: VarCharVector) => 54 | new VarCharWriterImpl(vec0) 55 | case (StandardType.DurationType, vec0: BigIntVector) => 56 | new BigIntWriterImpl(vec0) 57 | case (StandardType.InstantType, vec0: BigIntVector) => 58 | new BigIntWriterImpl(vec0) 59 | case (StandardType.LocalDateType, vec0: VarCharVector) => 60 | new VarCharWriterImpl(vec0) 61 | case (StandardType.LocalTimeType, vec0: VarCharVector) => 62 | new VarCharWriterImpl(vec0) 63 | case (StandardType.LocalDateTimeType, vec0: VarCharVector) => 64 | new VarCharWriterImpl(vec0) 65 | case (StandardType.OffsetTimeType, vec0: VarCharVector) => 66 | new VarCharWriterImpl(vec0) 67 | case (StandardType.OffsetDateTimeType, vec0: VarCharVector) => 68 | new VarCharWriterImpl(vec0) 69 | case (StandardType.ZonedDateTimeType, vec0: VarCharVector) => 70 | new VarCharWriterImpl(vec0) 71 | case (other, _) => 72 | throw EncoderError(s"Unsupported ZIO Schema StandardType $other") 73 | } 74 | 75 | def resolveReaderByName(name: Option[String], reader: FieldReader) = 76 | name.fold[FieldReader](reader.reader())(reader.reader(_)) 77 | 78 | } 79 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/ipc/package.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.codec.{ SchemaEncoder, VectorSchemaRootDecoder, VectorSchemaRootEncoder } 4 | import org.apache.arrow.memory.BufferAllocator 5 | import org.apache.arrow.vector.ipc.{ ArrowStreamReader, ArrowStreamWriter } 6 | import zio._ 7 | import zio.schema.Schema 8 | import zio.stream.ZStream 9 | 10 | import java.io.{ ByteArrayOutputStream, InputStream } 11 | import java.nio.channels.Channels 12 | 13 | package object ipc { 14 | 15 | def readStreaming[A: Schema: SchemaEncoder]( 16 | in: InputStream 17 | )(implicit 18 | decoder: VectorSchemaRootDecoder[A] 19 | ): ZStream[Scope & BufferAllocator, Throwable, A] = 20 | for { 21 | (reader, root) <- ZStream 22 | .fromZIO( 23 | ZIO.serviceWithZIO[BufferAllocator] { implicit alloc => 24 | for { 25 | reader <- ZIO.fromAutoCloseable(ZIO.attempt(new ArrowStreamReader(in, alloc))) 26 | root <- ZIO.attemptBlockingIO(reader.getVectorSchemaRoot) 27 | _ <- validateSchema(root.getSchema()) 28 | } yield (reader, root) 29 | } 30 | ) 31 | chunk <- ZStream.repeatZIOOption( 32 | ZIO 33 | .attemptBlockingIO(reader.loadNextBatch()) 34 | .asSomeError 35 | .filterOrFail(_ == true)(None) 36 | .zipRight(decoder.decodeZIO(root).asSomeError) 37 | ) 38 | elem <- ZStream.fromIterable(chunk) 39 | } yield elem 40 | 41 | def writeStreaming[R, A: Schema: SchemaEncoder]( 42 | in: ZStream[R, Throwable, A], 43 | // TODO: benchmark which value is more performant. See https://wesmckinney.com/blog/arrow-streaming-columnar/ 44 | // TODO: ArrowBuf size is limited 45 | batchSize: Int = 2048 46 | )(implicit 47 | encoder: VectorSchemaRootEncoder[A] 48 | ): ZIO[R & Scope & BufferAllocator, Throwable, ByteArrayOutputStream] = { 49 | val out = new ByteArrayOutputStream() 50 | 51 | for { 52 | root <- Tabular.empty[A] 53 | writer <- ZIO.fromAutoCloseable(ZIO.attempt(new ArrowStreamWriter(root, null, Channels.newChannel(out)))) 54 | _ <- ZIO.attempt(writer.start()) 55 | _ <- in.rechunk(batchSize).chunks.foreach { chunk => 56 | for { 57 | _ <- encoder.encodeZIO(chunk, root) 58 | _ <- ZIO.attempt(writer.writeBatch()) 59 | } yield () 60 | } 61 | } yield out 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/arrow/core/package.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.codec.SchemaEncoder 4 | import org.apache.arrow.vector.types.pojo.Schema 5 | import zio._ 6 | import zio.schema.{ Schema => ZSchema } 7 | 8 | package object core { 9 | 10 | def whenSchemaValid[A: ZSchema, B](schema: Schema)( 11 | ifValid: => B 12 | )(implicit schemaEncoder: SchemaEncoder[A]): B = 13 | // TODO: cache result of `schemaRoot` for better performance 14 | schemaEncoder.encode match { 15 | case Right(s) if s == schema => ifValid 16 | case Right(s) => throw ValidationError(s"Schemas are not equal $s != $schema") 17 | case Left(error) => throw error 18 | } 19 | 20 | def validateSchema[A: ZSchema: SchemaEncoder](schema: Schema): Task[Unit] = 21 | ZIO.attempt(whenSchemaValid[A, Unit](schema)(())) 22 | 23 | } 24 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/arrow/core/Fixtures.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.codec.{ 4 | SchemaEncoder, 5 | SchemaEncoderDeriver, 6 | VectorSchemaRootCodec, 7 | VectorSchemaRootDecoder 8 | } 9 | import zio.schema.Factory._ 10 | import zio.schema._ 11 | 12 | object Fixtures { 13 | 14 | final case class Primitives(a: Int, b: Double, c: String) 15 | object Primitives { 16 | implicit val schema: Schema[Primitives] = 17 | DeriveSchema.gen[Primitives] 18 | implicit val schemaEncoder: SchemaEncoder[Primitives] = 19 | Derive.derive[SchemaEncoder, Primitives](SchemaEncoderDeriver.default) 20 | implicit val deriverFactory: Factory[Primitives] = factory[Primitives] 21 | implicit val vectorSchemaRootDecoder: VectorSchemaRootDecoder[Primitives] = 22 | VectorSchemaRootDecoder.decoder[Primitives] 23 | implicit val vectorSchemaRootCodec: VectorSchemaRootCodec[Primitives] = 24 | VectorSchemaRootCodec.codec[Primitives] 25 | 26 | } 27 | 28 | final case class NullablePrimitives(a: Option[Int], b: Option[Double]) 29 | object NullablePrimitives { 30 | implicit val schema: Schema[NullablePrimitives] = 31 | DeriveSchema.gen[NullablePrimitives] 32 | implicit val schemaEncoder: SchemaEncoder[NullablePrimitives] = 33 | Derive.derive[SchemaEncoder, NullablePrimitives](SchemaEncoderDeriver.default) 34 | implicit val deriverFactory: Factory[NullablePrimitives] = factory[NullablePrimitives] 35 | implicit val vectorSchemaRootDecoder: VectorSchemaRootDecoder[NullablePrimitives] = 36 | VectorSchemaRootDecoder.decoder[NullablePrimitives] 37 | } 38 | 39 | final case class StructOfPrimitives(struct: Primitives) 40 | object StructOfPrimitives { 41 | implicit val schema: Schema[StructOfPrimitives] = 42 | DeriveSchema.gen[StructOfPrimitives] 43 | implicit val schemaEncoder: SchemaEncoder[StructOfPrimitives] = 44 | Derive.derive[SchemaEncoder, StructOfPrimitives](SchemaEncoderDeriver.default) 45 | } 46 | 47 | final case class StructOfLists(struct: ListOfPrimitives) 48 | object StructOfLists { 49 | implicit val schema: Schema[StructOfLists] = 50 | DeriveSchema.gen[StructOfLists] 51 | implicit val schemaEncoder: SchemaEncoder[StructOfLists] = 52 | Derive.derive[SchemaEncoder, StructOfLists](SchemaEncoderDeriver.default) 53 | } 54 | 55 | final case class StructOfStructs(struct: StructOfPrimitives) 56 | object StructOfStructs { 57 | implicit val schema: Schema[StructOfStructs] = 58 | DeriveSchema.gen[StructOfStructs] 59 | implicit val schemaEncoder: SchemaEncoder[StructOfStructs] = 60 | Derive.derive[SchemaEncoder, StructOfStructs](SchemaEncoderDeriver.default) 61 | } 62 | 63 | final case class StructOfListsOfStructs(struct: ListOfStructs) 64 | object StructOfListsOfStructs { 65 | implicit val schema: Schema[StructOfListsOfStructs] = 66 | DeriveSchema.gen[StructOfListsOfStructs] 67 | implicit val schemaEncoder: SchemaEncoder[StructOfListsOfStructs] = 68 | Derive.derive[SchemaEncoder, StructOfListsOfStructs](SchemaEncoderDeriver.default) 69 | } 70 | 71 | final case class ListOfPrimitives(list: List[Int]) 72 | object ListOfPrimitives { 73 | implicit val schema: Schema[ListOfPrimitives] = 74 | DeriveSchema.gen[ListOfPrimitives] 75 | implicit val schemaEncoder: SchemaEncoder[ListOfPrimitives] = 76 | Derive.derive[SchemaEncoder, ListOfPrimitives](SchemaEncoderDeriver.default) 77 | } 78 | 79 | final case class NullableListOfPrimitives(list: Option[List[Int]]) 80 | object NullableListOfPrimitives { 81 | implicit val schema: Schema[NullableListOfPrimitives] = 82 | DeriveSchema.gen[NullableListOfPrimitives] 83 | implicit val schemaEncoder: SchemaEncoder[NullableListOfPrimitives] = 84 | Derive.derive[SchemaEncoder, NullableListOfPrimitives](SchemaEncoderDeriver.default) 85 | implicit val deriverFactory: Factory[NullableListOfPrimitives] = 86 | factory[NullableListOfPrimitives] 87 | implicit val vectorSchemaRootDecoder: VectorSchemaRootDecoder[NullableListOfPrimitives] = 88 | VectorSchemaRootDecoder.decoder[NullableListOfPrimitives] 89 | } 90 | 91 | final case class ListOfStructs(list: List[Primitives]) 92 | object ListOfStructs { 93 | implicit val schema: Schema[ListOfStructs] = 94 | DeriveSchema.gen[ListOfStructs] 95 | implicit val schemaEncoder: SchemaEncoder[ListOfStructs] = 96 | Derive.derive[SchemaEncoder, ListOfStructs](SchemaEncoderDeriver.default) 97 | } 98 | 99 | final case class ListOfLists(list: List[List[Int]]) 100 | object ListOfLists { 101 | implicit val schema: Schema[ListOfLists] = 102 | DeriveSchema.gen[ListOfLists] 103 | implicit val schemaEncoder: SchemaEncoder[ListOfLists] = 104 | Derive.derive[SchemaEncoder, ListOfLists](SchemaEncoderDeriver.default) 105 | } 106 | 107 | final case class ListOfStructsOfLists(list: List[ListOfPrimitives]) 108 | object ListOfStructsOfLists { 109 | implicit val schema: Schema[ListOfStructsOfLists] = 110 | DeriveSchema.gen[ListOfStructsOfLists] 111 | implicit val schemaEncoder: SchemaEncoder[ListOfStructsOfLists] = 112 | Derive.derive[SchemaEncoder, ListOfStructsOfLists](SchemaEncoderDeriver.default) 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/arrow/core/TabularSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.Fixtures._ 4 | import zio._ 5 | import zio.stream.ZStream 6 | import zio.test.Assertion._ 7 | import zio.test._ 8 | 9 | object TabularSpec extends ZIOSpecDefault { 10 | 11 | override def spec: Spec[TestEnvironment & Scope, Any] = 12 | suite("Tabular")( 13 | test("empty") { 14 | ZIO.scoped( 15 | for { 16 | root <- Tabular.empty[Primitives] 17 | result <- Primitives.vectorSchemaRootCodec.decodeZIO(root) 18 | } yield assert(result)(isEmpty) 19 | ) 20 | }, 21 | test("fromChunk") { 22 | val payload = Chunk(Primitives(1, 1.0, "1"), Primitives(2, 2.0, "2")) 23 | 24 | ZIO.scoped( 25 | for { 26 | root <- Tabular.fromChunk(payload) 27 | result <- Primitives.vectorSchemaRootCodec.decodeZIO(root) 28 | } yield assert(result)(equalTo(payload)) 29 | ) 30 | }, 31 | test("fromStream") { 32 | val payload = Chunk(Primitives(1, 1.0, "1"), Primitives(2, 2.0, "2")) 33 | 34 | ZIO.scoped( 35 | for { 36 | root <- Tabular.fromStream(ZStream.fromChunk(payload)) 37 | result <- Primitives.vectorSchemaRootCodec.decodeZIO(root) 38 | } yield assert(result)(equalTo(payload)) 39 | ) 40 | }, 41 | test("toChunk") { 42 | val payload = Chunk(Primitives(1, 1.0, "1"), Primitives(2, 2.0, "2")) 43 | 44 | ZIO.scoped( 45 | for { 46 | root <- Tabular.fromChunk(payload) 47 | result <- Tabular.toChunk[Primitives](root) 48 | } yield assert(result)(equalTo(payload)) 49 | ) 50 | }, 51 | test("toStream") { 52 | val payload = Chunk(Primitives(1, 1.0, "1"), Primitives(2, 2.0, "2")) 53 | 54 | ZIO.scoped( 55 | for { 56 | root <- Tabular.fromChunk(payload) 57 | result <- Tabular.toStream[Primitives](root).runCollect 58 | } yield assert(result)(equalTo(payload)) 59 | ) 60 | } 61 | ).provideLayerShared(Allocator.rootLayer()) 62 | 63 | } 64 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/arrow/core/VectorSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.codec.ValueVectorDecoder 4 | import org.apache.arrow.vector.IntVector 5 | import zio._ 6 | import zio.stream.ZStream 7 | import zio.test.Assertion._ 8 | import zio.test._ 9 | 10 | object VectorSpec extends ZIOSpecDefault { 11 | 12 | override def spec: Spec[TestEnvironment & Scope, Any] = 13 | suite("Vector")( 14 | test("fromChunk") { 15 | val payload = Chunk(1, 2, 3) 16 | 17 | ZIO.scoped( 18 | for { 19 | vec <- Vector.fromChunk[IntVector](payload) 20 | result <- ValueVectorDecoder.intDecoder.decodeZIO(vec) 21 | } yield assert(result)(equalTo(payload)) 22 | ) 23 | }, 24 | test("fromStream") { 25 | val payload = Chunk(1, 2, 3) 26 | 27 | ZIO.scoped( 28 | for { 29 | vec <- Vector.fromStream[IntVector](ZStream.from(payload)) 30 | result <- ValueVectorDecoder.intDecoder.decodeZIO(vec) 31 | } yield assert(result)(equalTo(payload)) 32 | ) 33 | }, 34 | test("toChunk") { 35 | val payload = Chunk(1, 2, 3) 36 | 37 | ZIO.scoped( 38 | for { 39 | vec <- Vector.fromChunk[IntVector](payload) 40 | result <- Vector.toChunk[Int](vec) 41 | } yield assert(result)(equalTo(payload)) 42 | ) 43 | }, 44 | test("toStream") { 45 | val payload = Chunk(1, 2, 3) 46 | 47 | ZIO.scoped( 48 | for { 49 | vec <- Vector.fromChunk[IntVector](payload) 50 | result <- Vector.toStream[Int](vec).runCollect 51 | } yield assert(result)(equalTo(payload)) 52 | ) 53 | } 54 | ).provideLayerShared(Allocator.rootLayer()) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/arrow/core/codec/CodecSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | // import me.mnedokushev.zio.apache.arrow.core.Fixtures._ 4 | // import me.mnedokushev.zio.apache.arrow.core.{ Allocator, Tabular } 5 | import me.mnedokushev.zio.apache.arrow.core.Fixtures._ 6 | import me.mnedokushev.zio.apache.arrow.core.{ Allocator, Tabular } 7 | import org.apache.arrow.memory.BufferAllocator 8 | import org.apache.arrow.vector._ 9 | import org.apache.arrow.vector.complex.StructVector 10 | import org.apache.arrow.vector.complex.impl.VarCharWriterImpl 11 | import org.apache.arrow.vector.types.pojo.ArrowType 12 | import zio._ 13 | import zio.schema.Factory._ 14 | import zio.schema.Schema._ 15 | import zio.schema._ 16 | import zio.test.Assertion._ 17 | import zio.test.{ Spec, _ } 18 | 19 | import java.nio.charset.StandardCharsets 20 | // import java.util.UUID 21 | 22 | object CodecSpec extends ZIOSpecDefault { 23 | 24 | final case class Summoned(a: Int, b: String) 25 | object Summoned { 26 | implicit val schema: Schema[Summoned] = 27 | DeriveSchema.gen[Summoned] 28 | 29 | // TODO: fix summoning 30 | implicit val intEncoder: ValueVectorEncoder[VarCharVector, Int] = 31 | ValueVectorEncoder.primitive( 32 | allocateVec = alloc => new VarCharVector("stringVector", alloc), 33 | getWriter = vec => new VarCharWriterImpl(vec), 34 | encodeTopLevel = { case (v, writer, alloc) => 35 | val s = v.toString 36 | val len = s.length 37 | val buffer = alloc.buffer(len.toLong) 38 | 39 | buffer.writeBytes(s.getBytes(StandardCharsets.UTF_8)) 40 | writer.writeVarChar(0, len, buffer) 41 | 42 | buffer.close() 43 | }, 44 | encodeNested = { case (v, name, writer, alloc) => 45 | val s = v.toString 46 | val len = s.length 47 | val buffer = alloc.buffer(len.toLong) 48 | 49 | buffer.writeBytes(s.getBytes(StandardCharsets.UTF_8)) 50 | name.fold(writer.varChar)(writer.varChar).writeVarChar(0, len, buffer) 51 | 52 | buffer.close() 53 | } 54 | ) 55 | 56 | // implicit val intEncoder: ValueVectorEncoder[IntVector, String] = 57 | // ValueVectorEncoder.intEncoder.contramap[String](_.toInt) 58 | 59 | implicit val encoder: ValueVectorEncoder[StructVector, Summoned] = 60 | Derive.derive[ValueVectorEncoder[StructVector, *], Summoned](ValueVectorEncoderDeriver.summoned[StructVector]) 61 | implicit val decoder: ValueVectorDecoder[StructVector, Summoned] = 62 | Derive.derive[ValueVectorDecoder[StructVector, *], Summoned](ValueVectorDecoderDeriver.summoned[StructVector]) 63 | } 64 | 65 | final case class Summoned0(a: Int, b: String) 66 | object Summoned0 { 67 | implicit val schema: Schema[Summoned0] = 68 | DeriveSchema.gen[Summoned0] 69 | 70 | implicit val intSchemaEncoder: SchemaEncoder[Int] = 71 | SchemaEncoder.primitive[Int] { case (name, nullable) => 72 | SchemaEncoder.primitiveField(name, new ArrowType.Utf8, nullable) 73 | } 74 | implicit val schemaEncoder: SchemaEncoder[Summoned0] = 75 | Derive.derive[SchemaEncoder, Summoned0](SchemaEncoderDeriver.summoned) 76 | 77 | implicit val intEncoder: VectorSchemaRootEncoder[Int] = 78 | VectorSchemaRootEncoder.primitive( 79 | encodeValue0 = { case (v, name, writer, alloc) => 80 | val s = v.toString 81 | val len = s.length 82 | val buffer = alloc.buffer(len.toLong) 83 | 84 | buffer.writeBytes(s.getBytes(StandardCharsets.UTF_8)) 85 | name.fold(writer.varChar)(writer.varChar).writeVarChar(0, len, buffer) 86 | 87 | buffer.close() 88 | }, 89 | encodeField0 = { case (v, writer, alloc) => 90 | val s = v.toString 91 | val len = s.length 92 | val buffer = alloc.buffer(len.toLong) 93 | 94 | buffer.writeBytes(s.getBytes(StandardCharsets.UTF_8)) 95 | writer.writeVarChar(0, len, buffer) 96 | 97 | buffer.close() 98 | }, 99 | getWriter0 = vec => new VarCharWriterImpl(vec.asInstanceOf[VarCharVector]) 100 | ) 101 | 102 | implicit val intDecoder: VectorSchemaRootDecoder[Int] = 103 | VectorSchemaRootDecoder.primitive( 104 | decode0 = (st, reader) => DynamicValue.Primitive[Int](reader.readText().toString.toInt, st) 105 | ) 106 | 107 | implicit val encoder: VectorSchemaRootEncoder[Summoned0] = 108 | Derive.derive[VectorSchemaRootEncoder, Summoned0](VectorSchemaRootEncoderDeriver.summoned) 109 | implicit val decoder: VectorSchemaRootDecoder[Summoned0] = 110 | Derive.derive[VectorSchemaRootDecoder, Summoned0](VectorSchemaRootDecoderDeriver.summoned) 111 | } 112 | 113 | override def spec: Spec[TestEnvironment & Scope, Any] = 114 | suite("Codec")( 115 | valueVectorDecoderSpec, 116 | valueVectorEncoderSpec, 117 | valueVectorCodecSpec, 118 | vectorSchemaRootDecoderSpec, 119 | vectorSchemaRootEncoderSpec, 120 | vectorSchemaRootCodecSpec 121 | ).provideLayerShared(Allocator.rootLayer()) 122 | 123 | val valueVectorDecoderSpec: Spec[BufferAllocator, Throwable] = 124 | suite("ValueVectorDecoder")( 125 | test("map") { 126 | import ValueVectorCodec._ 127 | 128 | ZIO.scoped( 129 | for { 130 | intVec <- intCodec.encodeZIO(Chunk(1, 2, 3)) 131 | result <- intCodec.decoder.map(_.toString).decodeZIO(intVec) 132 | } yield assert(result)(equalTo(Chunk("1", "2", "3"))) 133 | ) 134 | } 135 | ) 136 | 137 | val valueVectorEncoderSpec: Spec[BufferAllocator, Throwable] = 138 | suite("ValueVectorEncoder")( 139 | test("contramap") { 140 | import ValueVectorCodec._ 141 | 142 | ZIO.scoped( 143 | for { 144 | intVec <- intCodec.encoder.contramap[String](s => s.toInt).encodeZIO(Chunk("1", "2", "3")) 145 | result <- intCodec.decodeZIO(intVec) 146 | } yield assert(result)(equalTo(Chunk(1, 2, 3))) 147 | ) 148 | } 149 | ) 150 | 151 | val valueVectorCodecSpec: Spec[BufferAllocator, Throwable] = 152 | suite("ValueVectorCodec")( 153 | test("primitive") { 154 | import ValueVectorCodec._ 155 | 156 | val emptyPayload = Chunk[Int]() 157 | val stringPaylod = Chunk("zio", "cats", "monix") 158 | val boolPayload = Chunk(true, false) 159 | val bytePayload = Chunk[Byte](1, 2) 160 | val shortPayload = Chunk[Short](1, 2) 161 | val intPayload = Chunk(1, 2, 3) 162 | val longPayload = Chunk(1L, 2L, 3L) 163 | val floatPayload = Chunk(0.5f, 1.5f, 2.5f) 164 | val doublePayload = Chunk(0.5d, 1.5d, 2.5d) 165 | val binaryPayload: Chunk[Chunk[Byte]] = Chunk(Chunk(1, 2, 3), Chunk(4, 5, 6)) 166 | val charPayload = Chunk('a', 'b') 167 | // val uuidPayload = Chunk(UUID.randomUUID(), UUID.randomUUID()) 168 | val bigDecimalPayload = Chunk(new java.math.BigDecimal("12312.33"), new java.math.BigDecimal("9990221.33")) 169 | val bigIntegerPayload = Chunk(new java.math.BigInteger("1231233999"), new java.math.BigInteger("9990221001223")) 170 | val dayOfWeekPayload = Chunk(java.time.DayOfWeek.MONDAY, java.time.DayOfWeek.TUESDAY) 171 | val monthPayload = Chunk(java.time.Month.JANUARY, java.time.Month.FEBRUARY) 172 | val monthDayPayload = 173 | Chunk(java.time.MonthDay.of(java.time.Month.JANUARY, 1), java.time.MonthDay.of(java.time.Month.FEBRUARY, 2)) 174 | // val periodPayload = Chunk(java.time.Period.ofDays(1), java.time.Period.ofMonths(2)) 175 | val yearPayload = Chunk(java.time.Year.of(2019), java.time.Year.of(2020)) 176 | val yearMonthPayload = Chunk(java.time.YearMonth.of(2019, 3), java.time.YearMonth.of(2020, 4)) 177 | // val zoneIdPayload = Chunk(java.time.ZoneId.of("Australia/Sydney"), java.time.ZoneId.of("Africa/Harare")) 178 | val zoneOffsetPayload = Chunk(java.time.ZoneOffset.of("+1"), java.time.ZoneOffset.of("+3")) 179 | val durationPayload = Chunk(java.time.Duration.ofDays(2), java.time.Duration.ofHours(4)) 180 | val instantPayload = Chunk(java.time.Instant.ofEpochMilli(123), java.time.Instant.ofEpochMilli(999312)) 181 | val localDatePayload = Chunk(java.time.LocalDate.of(2018, 5, 7), java.time.LocalDate.of(2019, 6, 4)) 182 | val localTimePayload = Chunk(java.time.LocalTime.of(12, 30), java.time.LocalTime.of(8, 45)) 183 | val localDateTimePayload = 184 | Chunk(java.time.LocalDateTime.of(2018, 3, 12, 12, 30), java.time.LocalDateTime.of(2019, 4, 5, 7, 45)) 185 | val offsetTimePayload = 186 | Chunk( 187 | java.time.OffsetTime.of(java.time.LocalTime.of(3, 12), java.time.ZoneOffset.of("+1")), 188 | java.time.OffsetTime.of(java.time.LocalTime.of(4, 12), java.time.ZoneOffset.of("-2")) 189 | ) 190 | val offsetDateTimePayload = 191 | Chunk( 192 | java.time.OffsetDateTime 193 | .of(java.time.LocalDate.of(1970, 1, 3), java.time.LocalTime.of(3, 12), java.time.ZoneOffset.of("+1")), 194 | java.time.OffsetDateTime 195 | .of(java.time.LocalDate.of(1989, 5, 31), java.time.LocalTime.of(4, 12), java.time.ZoneOffset.of("-2")) 196 | ) 197 | val zonedDateTimePayload = 198 | Chunk( 199 | java.time.ZonedDateTime 200 | .of(java.time.LocalDateTime.of(1970, 1, 3, 3, 12), java.time.ZoneId.of("Asia/Dhaka")), 201 | java.time.ZonedDateTime 202 | .of(java.time.LocalDateTime.of(1989, 5, 31, 3, 12), java.time.ZoneId.of("Asia/Shanghai")) 203 | ) 204 | 205 | ZIO.scoped( 206 | for { 207 | emptyVec <- intCodec.encodeZIO(emptyPayload) 208 | emptyResult <- intCodec.decodeZIO(emptyVec) 209 | stringVec <- stringCodec.encodeZIO(stringPaylod) 210 | stringResult <- stringCodec.decodeZIO(stringVec) 211 | boolVec <- boolCodec.encodeZIO(boolPayload) 212 | boolResult <- boolCodec.decodeZIO(boolVec) 213 | byteVec <- byteCodec.encodeZIO(bytePayload) 214 | byteResult <- byteCodec.decodeZIO(byteVec) 215 | shortVec <- shortCodec.encodeZIO(shortPayload) 216 | shortResult <- shortCodec.decodeZIO(shortVec) 217 | intVec <- intCodec.encodeZIO(intPayload) 218 | intResult <- intCodec.decodeZIO(intVec) 219 | longVec <- longCodec.encodeZIO(longPayload) 220 | longResult <- longCodec.decodeZIO(longVec) 221 | floatVec <- floatCodec.encodeZIO(floatPayload) 222 | floatResult <- floatCodec.decodeZIO(floatVec) 223 | doubleVec <- doubleCodec.encodeZIO(doublePayload) 224 | doubleResult <- doubleCodec.decodeZIO(doubleVec) 225 | binaryVec <- binaryCodec.encodeZIO(binaryPayload) 226 | binaryResult <- binaryCodec.decodeZIO(binaryVec) 227 | charVec <- charCodec.encodeZIO(charPayload) 228 | charResult <- charCodec.decodeZIO(charVec) 229 | // uuidVec <- uuidCodec.encodeZIO(uuidPayload) 230 | // uuidResult <- uuidCodec.decodeZIO(uuidVec) 231 | bigDecimalVec <- bigDecimalCodec.encodeZIO(bigDecimalPayload) 232 | bigDecimalResult <- bigDecimalCodec.decodeZIO(bigDecimalVec) 233 | bigIntegerVec <- bigIntegerCodec.encodeZIO(bigIntegerPayload) 234 | bigIntegerResult <- bigIntegerCodec.decodeZIO(bigIntegerVec) 235 | dayOfWeekVec <- dayOfWeekCodec.encodeZIO(dayOfWeekPayload) 236 | dayOfWeekResult <- dayOfWeekCodec.decodeZIO(dayOfWeekVec) 237 | monthVec <- monthCodec.encodeZIO(monthPayload) 238 | monthResult <- monthCodec.decodeZIO(monthVec) 239 | monthDayVec <- monthDayCodec.encodeZIO(monthDayPayload) 240 | monthDayResult <- monthDayCodec.decodeZIO(monthDayVec) 241 | // periodVec <- periodCodec.encodeZIO(periodPayload) 242 | // periodResult <- periodCodec.decodeZIO(periodVec) 243 | yearVec <- yearCodec.encodeZIO(yearPayload) 244 | yearResult <- yearCodec.decodeZIO(yearVec) 245 | yearMonthVec <- yearMonthCodec.encodeZIO(yearMonthPayload) 246 | yearMonthResult <- yearMonthCodec.decodeZIO(yearMonthVec) 247 | // zoneIdVec <- zoneIdCodec.encodeZIO(zoneIdPayload) 248 | // zoneIdResult <- zoneIdCodec.decodeZIO(zoneIdVec) 249 | zoneOffsetVec <- zoneOffsetCodec.encodeZIO(zoneOffsetPayload) 250 | zoneOffsetResult <- zoneOffsetCodec.decodeZIO(zoneOffsetVec) 251 | durationVec <- durationCodec.encodeZIO(durationPayload) 252 | durationResult <- durationCodec.decodeZIO(durationVec) 253 | instantVec <- instantCodec.encodeZIO(instantPayload) 254 | instantResult <- instantCodec.decodeZIO(instantVec) 255 | localDateVec <- localDateCodec.encodeZIO(localDatePayload) 256 | localDateResult <- localDateCodec.decodeZIO(localDateVec) 257 | localTimeVec <- localTimeCodec.encodeZIO(localTimePayload) 258 | localTimeResult <- localTimeCodec.decodeZIO(localTimeVec) 259 | localDateTimeVec <- localDateTimeCodec.encodeZIO(localDateTimePayload) 260 | localDateTimeResult <- localDateTimeCodec.decodeZIO(localDateTimeVec) 261 | offsetTimeVec <- offsetTimeCodec.encodeZIO(offsetTimePayload) 262 | offsetTimeResult <- offsetTimeCodec.decodeZIO(offsetTimeVec) 263 | offsetDateTimeVec <- offsetDateTimeCodec.encodeZIO(offsetDateTimePayload) 264 | offsetDateTimeResult <- offsetDateTimeCodec.decodeZIO(offsetDateTimeVec) 265 | zonedDateTimeVec <- zonedDateTimeCodec.encodeZIO(zonedDateTimePayload) 266 | zonedDateTimeResult <- zonedDateTimeCodec.decodeZIO(zonedDateTimeVec) 267 | } yield assertTrue( 268 | emptyResult == emptyPayload, 269 | stringResult == stringPaylod, 270 | boolResult == boolPayload, 271 | byteResult == bytePayload, 272 | shortResult == shortPayload, 273 | intResult == intPayload, 274 | longResult == longPayload, 275 | floatResult == floatPayload, 276 | doubleResult == doublePayload, 277 | binaryResult == binaryPayload, 278 | charResult == charPayload, 279 | // uuidResult == uuidPayload, 280 | bigDecimalResult == bigDecimalPayload, 281 | bigIntegerResult == bigIntegerPayload, 282 | dayOfWeekResult == dayOfWeekPayload, 283 | monthResult == monthPayload, 284 | monthDayResult == monthDayPayload, 285 | // periodResult == periodPayload, 286 | yearResult == yearPayload, 287 | yearMonthResult == yearMonthPayload, 288 | // zoneIdResult == zoneIdPayload, 289 | zoneOffsetResult == zoneOffsetPayload, 290 | durationResult == durationPayload, 291 | instantResult == instantPayload, 292 | localDateResult == localDatePayload, 293 | localTimeResult == localTimePayload, 294 | localDateTimeResult == localDateTimePayload, 295 | offsetTimeResult == offsetTimePayload, 296 | offsetDateTimeResult == offsetDateTimePayload, 297 | zonedDateTimeResult == zonedDateTimePayload 298 | ) 299 | ) 300 | }, 301 | test("list") { 302 | import ValueVectorEncoder._ 303 | import ValueVectorDecoder._ 304 | import ValueVectorCodec._ 305 | 306 | val stringCodec = listChunkCodec[String] 307 | val boolCodec = listChunkCodec[Boolean] 308 | val byteCodec = listCodec[Byte, List] 309 | val shortCodec = listChunkCodec[Short] 310 | val intCodec = listChunkCodec[Int] 311 | val longCodec = listChunkCodec[Long] 312 | val floatCodec = listChunkCodec[Float] 313 | val doubleCodec = listChunkCodec[Double] 314 | // FIX: me.mnedokushev.zio.apache.arrow.core.codec.DecoderError: Failed to cast Primitive(Chunk(1,2,3),binary) to schema Sequence(Primitive(byte,Chunk()), Chunk) 315 | // val binaryCodec = listCodec(listEncoder[Chunk[Byte], List], listDecoder[Chunk[Byte], List]) 316 | val charCodec = listChunkCodec[Char] 317 | // FIX: Chunk(Chunk(f04b88ba-6004-4e64-a903-f271c4672a95, ec472cee-ab02-0e20-ca14-e3208adfef9e)) was not equal to Chunk(Chunk(644e0460-ba88-4bf0-952a-67c471f203a9, 200e02ab-ee2c-47ec-9eef-df8a20e314ca)) 318 | // val uuidCodec = listChunkCodec(listChunkEncoder[java.util.UUID], listChunkDecoder[java.util.UUID]) 319 | val bigDecimalCodec = listChunkCodec[java.math.BigDecimal] 320 | val bigIntegerCodec = listChunkCodec[java.math.BigInteger] 321 | val dayOfWeekCodec = listChunkCodec[java.time.DayOfWeek] 322 | val monthCodec = listChunkCodec[java.time.Month] 323 | val monthDayCodec = listChunkCodec[java.time.MonthDay] 324 | // val periodCodec = listChunkCodec(listChunkEncoder[java.time.Period], listChunkDecoder[java.time.Period]) 325 | val yearCodec = listChunkCodec[java.time.Year] 326 | val yearMonthCodec = listChunkCodec[java.time.YearMonth] 327 | val zoneIdCodec = listChunkCodec[java.time.ZoneId] 328 | // TODO: fix compilation for scala 3.3 329 | // val zoneOffsetCodec = listChunkCodec[java.time.ZoneOffset] 330 | val durationCodec = listChunkCodec[java.time.Duration] 331 | val instantCodec = listChunkCodec[java.time.Instant] 332 | val localDateCodec = listChunkCodec[java.time.LocalDate] 333 | val localTimeCodec = listChunkCodec[java.time.LocalTime] 334 | val localDateTimeCodec = listChunkCodec[java.time.LocalDateTime] 335 | val offsetTimeCodec = listChunkCodec[java.time.OffsetTime] 336 | val offsetDateTimeCodec = listChunkCodec[java.time.OffsetDateTime] 337 | val zonedDateTimeCodec = listChunkCodec[java.time.ZonedDateTime] 338 | 339 | val primitivesCodec = listChunkCodec[Primitives] 340 | val optionStringCodec = listChunkOptionCodec[String] 341 | 342 | val stringPayload = Chunk(Chunk("zio"), Chunk("cats", "monix")) 343 | val boolPayload = Chunk(Chunk(true), Chunk(false)) 344 | val bytePayload: Chunk[List[Byte]] = Chunk(List(100, 99), List(23)) 345 | val shortPayload: Chunk[Chunk[Short]] = Chunk(Chunk(12, 23), Chunk(12)) 346 | val intPayload = Chunk(Chunk(-456789, -123456789)) 347 | val longPayload = Chunk(Chunk(123L, 90021L)) 348 | val floatPayload = Chunk(Chunk(1.2f), Chunk(-3.4f)) 349 | val doublePayload = Chunk(Chunk(-1.2d, 3.456789d)) 350 | // val binaryPayload: Chunk[List[Chunk[Byte]]] = Chunk(List(Chunk(1, 2, 3), Chunk(1, 3))) 351 | val charPayload = Chunk(Chunk('a', 'b')) 352 | // val uuidPayload = Chunk(Chunk(java.util.UUID.randomUUID(), java.util.UUID.randomUUID())) 353 | val bigDecimalPayload = Chunk(Chunk(new java.math.BigDecimal("0"), new java.math.BigDecimal("-456789"))) 354 | val bigIntegerPayload = Chunk(Chunk(new java.math.BigInteger("123")), Chunk(new java.math.BigInteger("-123"))) 355 | val dayOfWeekPayload = Chunk(Chunk(java.time.DayOfWeek.MONDAY), Chunk(java.time.DayOfWeek.SUNDAY)) 356 | val monthPayload = Chunk(Chunk(java.time.Month.JANUARY), Chunk(java.time.Month.DECEMBER)) 357 | val monthDayPayload = Chunk(Chunk(java.time.MonthDay.of(1, 2)), Chunk(java.time.MonthDay.of(3, 4))) 358 | // val periodPayload = Chunk( 359 | // Chunk(java.time.Period.ofDays(5), java.time.Period.ofWeeks(6)), 360 | // Chunk(java.time.Period.ofYears(123)) 361 | // ) 362 | val yearPayload = Chunk(Chunk(java.time.Year.of(2024))) 363 | val yearMonthPayload = Chunk(Chunk(java.time.YearMonth.of(4, 3)), Chunk(java.time.YearMonth.of(5, 6))) 364 | val zoneIdPayload = Chunk( 365 | Chunk(java.time.ZoneId.of("Europe/Paris")), 366 | Chunk(java.time.ZoneId.of("America/New_York")) 367 | ) 368 | // val zoneOffsetPayload = Chunk( 369 | // Chunk(java.time.ZoneOffset.of("+1")), 370 | // Chunk(java.time.ZoneOffset.of("-3")) 371 | // ) 372 | val durationPayload = Chunk( 373 | Chunk(java.time.Duration.ofDays(5)), 374 | Chunk(java.time.Duration.ofHours(6)) 375 | ) 376 | val instantPayload = Chunk( 377 | Chunk(java.time.Instant.ofEpochMilli(5L)) 378 | ) 379 | val localDatePayload = Chunk( 380 | Chunk(java.time.LocalDate.of(4, 3, 2)), 381 | Chunk(java.time.LocalDate.of(5, 6, 7)) 382 | ) 383 | val localTimePayload = Chunk( 384 | Chunk(java.time.LocalTime.of(4, 5)), 385 | Chunk(java.time.LocalTime.of(6, 7)) 386 | ) 387 | val localDateTimePayload = Chunk( 388 | Chunk(java.time.LocalDateTime.of(4, 3, 2, 5, 3)), 389 | Chunk(java.time.LocalDateTime.of(5, 6, 7, 8, 10)) 390 | ) 391 | val offsetTimePayload = Chunk( 392 | Chunk(java.time.OffsetTime.of(java.time.LocalTime.of(13, 1), java.time.ZoneOffset.of("+1"))), 393 | Chunk(java.time.OffsetTime.of(java.time.LocalTime.of(16, 15), java.time.ZoneOffset.of("+3"))) 394 | ) 395 | val offsetDateTimePayload = Chunk( 396 | Chunk( 397 | java.time.OffsetDateTime 398 | .of(java.time.LocalDate.of(1970, 1, 1), java.time.LocalTime.of(13, 1), java.time.ZoneOffset.of("+1")) 399 | ), 400 | Chunk( 401 | java.time.OffsetDateTime 402 | .of(java.time.LocalDate.of(1989, 5, 31), java.time.LocalTime.of(16, 15), java.time.ZoneOffset.of("+3")) 403 | ) 404 | ) 405 | val zonedDateTimePayload = Chunk( 406 | Chunk( 407 | java.time.ZonedDateTime 408 | .of(java.time.LocalDateTime.of(2019, 5, 31, 8, 30), java.time.ZoneId.of("Africa/Cairo")) 409 | ), 410 | Chunk( 411 | java.time.ZonedDateTime 412 | .of(java.time.LocalDateTime.of(2017, 4, 6, 19, 50), java.time.ZoneId.of("Asia/Tokyo")) 413 | ) 414 | ) 415 | 416 | // TODO: think to move them into separate "list of option", "list of struct" test cases 417 | val primitivesPayload = Chunk(Chunk(Primitives(1, 2.0, "3"))) 418 | val optionStringPayload = Chunk(Chunk(Some("zio")), Chunk(None, Some("cats"))) 419 | 420 | ZIO.scoped( 421 | for { 422 | stringVec <- stringCodec.encodeZIO(stringPayload) 423 | stringResult <- stringCodec.decodeZIO(stringVec) 424 | boolVec <- boolCodec.encodeZIO(boolPayload) 425 | boolResult <- boolCodec.decodeZIO(boolVec) 426 | byteVec <- byteCodec.encodeZIO(bytePayload) 427 | byteResult <- byteCodec.decodeZIO(byteVec) 428 | shortVec <- shortCodec.encodeZIO(shortPayload) 429 | shortResult <- shortCodec.decodeZIO(shortVec) 430 | intVec <- intCodec.encodeZIO(intPayload) 431 | intResult <- intCodec.decodeZIO(intVec) 432 | longVec <- longCodec.encodeZIO(longPayload) 433 | longResult <- longCodec.decodeZIO(longVec) 434 | floatVec <- floatCodec.encodeZIO(floatPayload) 435 | floatResult <- floatCodec.decodeZIO(floatVec) 436 | doubleVec <- doubleCodec.encodeZIO(doublePayload) 437 | doubleResult <- doubleCodec.decodeZIO(doubleVec) 438 | // binaryVec <- binaryCodec.encodeZIO(binaryPayload) 439 | // binaryResult <- binaryCodec.decodeZIO(binaryVec) 440 | charVec <- charCodec.encodeZIO(charPayload) 441 | charResult <- charCodec.decodeZIO(charVec) 442 | // uuidVec <- uuidCodec.encodeZIO(uuidPayload) 443 | // uuidResult <- uuidCodec.decodeZIO(uuidVec) 444 | bigDecimalVec <- bigDecimalCodec.encodeZIO(bigDecimalPayload) 445 | bigDecimalResult <- bigDecimalCodec.decodeZIO(bigDecimalVec) 446 | bigIntegerVec <- bigIntegerCodec.encodeZIO(bigIntegerPayload) 447 | bigIntegerResult <- bigIntegerCodec.decodeZIO(bigIntegerVec) 448 | dayOfWeekVec <- dayOfWeekCodec.encodeZIO(dayOfWeekPayload) 449 | dayOfWeekResult <- dayOfWeekCodec.decodeZIO(dayOfWeekVec) 450 | monthVec <- monthCodec.encodeZIO(monthPayload) 451 | monthResult <- monthCodec.decodeZIO(monthVec) 452 | monthDayVec <- monthDayCodec.encodeZIO(monthDayPayload) 453 | monthDayResult <- monthDayCodec.decodeZIO(monthDayVec) 454 | // periodVec <- periodCodec.encodeZIO(periodPayload) 455 | // periodResult <- periodCodec.decodeZIO(periodVec) 456 | yearVec <- yearCodec.encodeZIO(yearPayload) 457 | yearResult <- yearCodec.decodeZIO(yearVec) 458 | yearMonthVec <- yearMonthCodec.encodeZIO(yearMonthPayload) 459 | yearMonthResult <- yearMonthCodec.decodeZIO(yearMonthVec) 460 | zoneIdVec <- zoneIdCodec.encodeZIO(zoneIdPayload) 461 | zoneIdResult <- zoneIdCodec.decodeZIO(zoneIdVec) 462 | // zoneOffsetVec <- zoneOffsetCodec.encodeZIO(zoneOffsetPayload) 463 | // zoneOffsetResult <- zoneOffsetCodec.decodeZIO(zoneOffsetVec) 464 | durationVec <- durationCodec.encodeZIO(durationPayload) 465 | durationResult <- durationCodec.decodeZIO(durationVec) 466 | instantVec <- instantCodec.encodeZIO(instantPayload) 467 | instantResult <- instantCodec.decodeZIO(instantVec) 468 | localDateVec <- localDateCodec.encodeZIO(localDatePayload) 469 | localDateResult <- localDateCodec.decodeZIO(localDateVec) 470 | localTimeVec <- localTimeCodec.encodeZIO(localTimePayload) 471 | localTimeResult <- localTimeCodec.decodeZIO(localTimeVec) 472 | localDateTimeVec <- localDateTimeCodec.encodeZIO(localDateTimePayload) 473 | localDateTimeResult <- localDateTimeCodec.decodeZIO(localDateTimeVec) 474 | offsetTimeVec <- offsetTimeCodec.encodeZIO(offsetTimePayload) 475 | offsetTimeResult <- offsetTimeCodec.decodeZIO(offsetTimeVec) 476 | offsetDateTimeVec <- offsetDateTimeCodec.encodeZIO(offsetDateTimePayload) 477 | offsetDateTimeResult <- offsetDateTimeCodec.decodeZIO(offsetDateTimeVec) 478 | zonedDateTimeVec <- zonedDateTimeCodec.encodeZIO(zonedDateTimePayload) 479 | zonedDateTimeResult <- zonedDateTimeCodec.decodeZIO(zonedDateTimeVec) 480 | 481 | primitivesVec <- primitivesCodec.encodeZIO(primitivesPayload) 482 | primitivesResult <- primitivesCodec.decodeZIO(primitivesVec) 483 | optionStringVec <- optionStringCodec.encodeZIO(optionStringPayload) 484 | optionStringResult <- optionStringCodec.decodeZIO(optionStringVec) 485 | } yield assertTrue( 486 | stringResult == stringPayload, 487 | boolResult == boolPayload, 488 | byteResult == bytePayload, 489 | shortResult == shortPayload, 490 | intResult == intPayload, 491 | longResult == longPayload, 492 | floatResult == floatPayload, 493 | doubleResult == doublePayload, 494 | // binaryResult == binaryPayload, 495 | charResult == charPayload, 496 | // uuidResult == uuidPayload, 497 | bigDecimalResult == bigDecimalPayload, 498 | bigIntegerResult == bigIntegerPayload, 499 | dayOfWeekResult == dayOfWeekPayload, 500 | monthResult == monthPayload, 501 | monthDayResult == monthDayPayload, 502 | // periodResult == periodPayload, 503 | yearResult == yearPayload, 504 | yearMonthResult == yearMonthPayload, 505 | zoneIdResult == zoneIdPayload, 506 | // zoneOffsetResult == zoneOffsetPayload, 507 | durationResult == durationPayload, 508 | instantResult == instantPayload, 509 | localDateResult == localDatePayload, 510 | localTimeResult == localTimePayload, 511 | localDateTimeResult == localDateTimePayload, 512 | offsetTimeResult == offsetTimePayload, 513 | offsetDateTimeResult == offsetDateTimePayload, 514 | zonedDateTimeResult == zonedDateTimePayload, 515 | primitivesResult == primitivesPayload, 516 | optionStringResult == optionStringPayload 517 | ) 518 | ) 519 | }, 520 | test("struct") { 521 | import ValueVectorCodec._ 522 | 523 | val primitivesCodec = structCodec[Primitives] 524 | val structOfPrimitivesCodec = structCodec[StructOfPrimitives] 525 | val structOfListsCodec = structCodec[StructOfLists] 526 | val structOfListsOfStructsCodec = structCodec[StructOfListsOfStructs] 527 | val structOfStructsCodec = structCodec[StructOfStructs] 528 | val listOfPrimitivesCodec = structCodec[ListOfPrimitives] 529 | val listOfStructsCodec = structCodec[ListOfStructs] 530 | val listOfListsCodec = structCodec[ListOfLists] 531 | val listOfStructsOfListsCodec = structCodec[ListOfStructsOfLists] 532 | 533 | val primitivesPayload = Chunk(Primitives(1, 2.0, "3")) 534 | val structOfPrimitivesPayload = Chunk(StructOfPrimitives(Primitives(1, 2.0, "4"))) 535 | val structOfListsPayload = Chunk(StructOfLists(ListOfPrimitives(List(1, 2, 3)))) 536 | val structOfListsOfStructsPayload = Chunk( 537 | StructOfListsOfStructs(ListOfStructs(List(Primitives(1, 2.0, "3"), Primitives(11, 22.0, "33")))) 538 | ) 539 | val structOfStructsPayload = Chunk(StructOfStructs(StructOfPrimitives(Primitives(1, 2.0, "3")))) 540 | val listOfPrimitivesPayload = Chunk(ListOfPrimitives(List(1, 2, 3))) 541 | val listOfStructsPayload = Chunk(ListOfStructs(List(Primitives(1, 2.0, "3"), Primitives(11, 22.0, "33")))) 542 | val listOfListsPayload = Chunk(ListOfLists(List(List(1, 2), List(3)))) 543 | val listOfStructsOfListsPayload = Chunk( 544 | ListOfStructsOfLists(List(ListOfPrimitives(List(1, 2)), ListOfPrimitives(List(3)))), 545 | ListOfStructsOfLists(List(ListOfPrimitives(List(11, 22)), ListOfPrimitives(List(33)))) 546 | ) 547 | 548 | ZIO.scoped( 549 | for { 550 | primitivesVec <- primitivesCodec.encodeZIO(primitivesPayload) 551 | primitivesResult <- primitivesCodec.decodeZIO(primitivesVec) 552 | structOfPrimitivesVec <- structOfPrimitivesCodec.encodeZIO(structOfPrimitivesPayload) 553 | structOfPrimitivesResult <- structOfPrimitivesCodec.decodeZIO(structOfPrimitivesVec) 554 | structOfListsVec <- structOfListsCodec.encodeZIO(structOfListsPayload) 555 | structOfListsResult <- structOfListsCodec.decodeZIO(structOfListsVec) 556 | structOfListsOfStructsVec <- structOfListsOfStructsCodec.encodeZIO(structOfListsOfStructsPayload) 557 | structOfListsOfStructsResult <- structOfListsOfStructsCodec.decodeZIO(structOfListsOfStructsVec) 558 | structOfStructsVec <- structOfStructsCodec.encodeZIO(structOfStructsPayload) 559 | structOfStructsResult <- structOfStructsCodec.decodeZIO(structOfStructsVec) 560 | structOfListOfPrimitivesVec <- listOfPrimitivesCodec.encodeZIO(listOfPrimitivesPayload) 561 | structOfListOfPrimitivesResult <- listOfPrimitivesCodec.decodeZIO(structOfListOfPrimitivesVec) 562 | listOfStructsVec <- listOfStructsCodec.encodeZIO(listOfStructsPayload) 563 | listOfStructsResult <- listOfStructsCodec.decodeZIO(listOfStructsVec) 564 | listOfListsVec <- listOfListsCodec.encodeZIO(listOfListsPayload) 565 | listOfListsResult <- listOfListsCodec.decodeZIO(listOfListsVec) 566 | listOfStructsOfListVec <- listOfStructsOfListsCodec.encodeZIO(listOfStructsOfListsPayload) 567 | listOfListsOfStructsResult <- listOfStructsOfListsCodec.decodeZIO(listOfStructsOfListVec) 568 | } yield assertTrue( 569 | primitivesResult == primitivesPayload, 570 | structOfPrimitivesResult == structOfPrimitivesPayload, 571 | structOfListsResult == structOfListsPayload, 572 | structOfListsOfStructsResult == structOfListsOfStructsPayload, 573 | structOfStructsResult == structOfStructsPayload, 574 | structOfListOfPrimitivesResult == listOfPrimitivesPayload, 575 | listOfStructsResult == listOfStructsPayload, 576 | listOfListsResult == listOfListsPayload, 577 | listOfListsOfStructsResult == listOfStructsOfListsPayload 578 | ) 579 | ) 580 | }, 581 | test("option") { 582 | import ValueVectorEncoder._ 583 | import ValueVectorDecoder._ 584 | import ValueVectorCodec._ 585 | 586 | val stringPayload = Chunk(Some("zio"), None, Some("arrow")) 587 | val shortPayload = Chunk[Option[Short]](Some(3), Some(2), None) 588 | val intPayload = Chunk(Some(1), None, Some(3)) 589 | val listStringPayload = Chunk(Some(Chunk("zio", "cats")), None) 590 | 591 | val stringCodec = optionCodec[VarCharVector, String] 592 | val shortCodec = optionCodec[SmallIntVector, Short] 593 | val intCodec = optionCodec[IntVector, Int] 594 | val listStringCodec = optionListChunkCodec[String] 595 | 596 | ZIO.scoped( 597 | for { 598 | stringVec <- stringCodec.encodeZIO(stringPayload) 599 | stringResult <- stringCodec.decodeZIO(stringVec) 600 | shortVec <- shortCodec.encodeZIO(shortPayload) 601 | shortResult <- shortCodec.decodeZIO(shortVec) 602 | intVec <- intCodec.encodeZIO(intPayload) 603 | intResult <- intCodec.decodeZIO(intVec) 604 | listStringVec <- listStringCodec.encodeZIO(listStringPayload) 605 | listStringResult <- listStringCodec.decodeZIO(listStringVec) 606 | } yield assertTrue( 607 | stringResult == stringPayload, 608 | shortResult == shortPayload, 609 | intResult == intPayload, 610 | listStringResult == listStringPayload 611 | ) 612 | ) 613 | } 614 | // TODO: summoning does not work :( 615 | // test("summoned") { 616 | // val payload = Chunk(Summoned(1, "a"), Summoned(2, "b")) 617 | // // val codec = ValueVectorCodec.codec[StructVector, Summoned] 618 | 619 | // ZIO.scoped( 620 | // for { 621 | // vec <- Summoned.encoder.encodeZIO(payload) 622 | // // _ = println(vec) 623 | // result <- Summoned.decoder.decodeZIO(vec) 624 | // } yield assertTrue(result == payload) 625 | // ) 626 | // } 627 | ) 628 | 629 | val vectorSchemaRootCodecSpec: Spec[BufferAllocator, Throwable] = 630 | suite("VectorSchemaRootCodec")( 631 | test("primitives") { 632 | import VectorSchemaRootEncoder._ 633 | import VectorSchemaRootCodec._ 634 | 635 | val primitivesCodec = codec[Primitives] 636 | val nullablePrimitivesCodec = codec[NullablePrimitives] 637 | val nullableListOfPrimitivesCodec = codec[NullableListOfPrimitives] 638 | 639 | val primitivesPayload = Chunk(Primitives(1, 2.0, "3"), Primitives(4, 5.0, "6")) 640 | val nullablePrimitivesPayload = Chunk(NullablePrimitives(Some(7), None)) 641 | val nullableListOfPrimitivesPayload = 642 | Chunk(NullableListOfPrimitives(Some(List(1, 2, 3)))) 643 | 644 | ZIO.scoped( 645 | for { 646 | primitivesRoot <- Tabular.empty[Primitives] 647 | primitivesVec <- primitivesCodec.encodeZIO(primitivesPayload, primitivesRoot) 648 | primitivesResult <- primitivesCodec.decodeZIO(primitivesVec) 649 | nullablePrimitivesRoot <- Tabular.empty[NullablePrimitives] 650 | nullablePrimitivesVec <- 651 | nullablePrimitivesCodec.encodeZIO(nullablePrimitivesPayload, nullablePrimitivesRoot) 652 | nullablePrimitivesResult <- nullablePrimitivesCodec.decodeZIO(nullablePrimitivesVec) 653 | nullableListOfPrimitivesRoot <- Tabular.empty[NullableListOfPrimitives] 654 | nullableListOfPrimitivesVec <- 655 | nullableListOfPrimitivesCodec.encodeZIO(nullableListOfPrimitivesPayload, nullableListOfPrimitivesRoot) 656 | nullableListOfPrimitivesResult <- 657 | nullableListOfPrimitivesCodec.decodeZIO(nullableListOfPrimitivesVec) 658 | } yield assertTrue( 659 | primitivesResult == primitivesPayload, 660 | nullablePrimitivesResult == nullablePrimitivesPayload, 661 | nullableListOfPrimitivesResult == nullableListOfPrimitivesPayload 662 | ) 663 | ) 664 | }, 665 | test("summoned") { 666 | val payload = Chunk(Summoned0(1, "a"), Summoned0(2, "b")) 667 | 668 | ZIO.scoped( 669 | for { 670 | root <- Tabular.empty[Summoned0] 671 | vec <- Summoned0.encoder.encodeZIO(payload, root) 672 | result <- Summoned0.decoder.decodeZIO(vec) 673 | } yield assertTrue(result == payload) 674 | ) 675 | } 676 | ) 677 | 678 | val vectorSchemaRootDecoderSpec: Spec[BufferAllocator, Throwable] = 679 | suite("VectorSchemaRootDecoder")( 680 | test("map") { 681 | val codec = VectorSchemaRootCodec.codec[Primitives] 682 | 683 | ZIO.scoped( 684 | for { 685 | root <- Tabular.empty[Primitives] 686 | _ <- codec.encodeZIO(Chunk(Primitives(1, 2.0, "3")), root) 687 | result <- codec.decoder.map(p => s"${p.a}, ${p.b}, ${p.c}").decodeZIO(root) 688 | } yield assert(result)(equalTo(Chunk("1, 2.0, 3"))) 689 | ) 690 | } 691 | ) 692 | 693 | val vectorSchemaRootEncoderSpec: Spec[BufferAllocator, Throwable] = 694 | suite("VectorSchemaRootEncoder")( 695 | test("contramap") { 696 | val codec = VectorSchemaRootCodec.codec[Primitives] 697 | 698 | ZIO.scoped( 699 | for { 700 | root <- Tabular.empty[Primitives] 701 | _ <- codec.encoder 702 | .contramap[String](s => Primitives(s.toInt, s.toDouble, s)) 703 | .encodeZIO(Chunk("1", "2"), root) 704 | result <- codec.decodeZIO(root) 705 | } yield assert(result)(equalTo(Chunk(Primitives(1, 1.0, "1"), Primitives(2, 2.0, "2")))) 706 | ) 707 | } 708 | ) 709 | 710 | } 711 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/arrow/core/codec/SchemaEncoderSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.codec 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.Fixtures._ 4 | import org.apache.arrow.vector.types.FloatingPointPrecision 5 | import org.apache.arrow.vector.types.pojo.{ ArrowType, Field, Schema => JSchema } 6 | import zio.Scope 7 | import zio.schema._ 8 | import zio.test.Assertion._ 9 | import zio.test.{ Spec, _ } 10 | 11 | import scala.jdk.CollectionConverters._ 12 | 13 | object SchemaEncoderSpec extends ZIOSpecDefault { 14 | 15 | override def spec: Spec[TestEnvironment & Scope, Any] = 16 | suite("SchemaEncoder")( 17 | encodeFlatSpec 18 | ) 19 | 20 | final case class Summoned(a: Int, b: Double, c: String) 21 | object Summoned { 22 | implicit val schema: Schema[Summoned] = 23 | DeriveSchema.gen[Summoned] 24 | implicit val intSchemaEncoder: SchemaEncoder[Int] = 25 | SchemaEncoder.primitive[Int] { case (name, nullable) => 26 | SchemaEncoder.primitiveField(name, new ArrowType.Int(64, true), nullable) 27 | } 28 | // TODO: fix fromSummonedDeriver 29 | // implicit val schemaEncoder: SchemaEncoder[Summoned] = 30 | // SchemaEncoder.fromSummonedDeriver[Summoned] 31 | // implicit val schemaEncoder: SchemaEncoder[Summoned] = 32 | // SchemaEncoder.encoder[Summoned](SchemaEncoderDeriver.summoned) 33 | implicit val schemaEncoder: SchemaEncoder[Summoned] = 34 | Derive.derive[SchemaEncoder, Summoned](SchemaEncoderDeriver.summoned) 35 | } 36 | 37 | val encodeFlatSpec: Spec[Any, Throwable] = 38 | suite("schemaRoot")( 39 | test("primitive") { 40 | for { 41 | result <- Primitives.schemaEncoder.encode 42 | fields = getFields(result) 43 | } yield assert(fields)( 44 | contains(SchemaEncoder.primitiveField("a", new ArrowType.Int(32, true), nullable = false)) 45 | ) && 46 | assert(fields)( 47 | contains( 48 | SchemaEncoder 49 | .primitiveField("b", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), nullable = false) 50 | ) 51 | ) && 52 | assert(fields)(contains(SchemaEncoder.primitiveField("c", new ArrowType.Utf8, nullable = false))) 53 | }, 54 | test("struct") { 55 | for { 56 | result <- StructOfPrimitives.schemaEncoder.encode 57 | fields = getFields(result) 58 | } yield assert(fields)( 59 | contains( 60 | SchemaEncoder.structField( 61 | "struct", 62 | List( 63 | SchemaEncoder.primitiveField("a", new ArrowType.Int(32, true), nullable = false), 64 | SchemaEncoder.primitiveField( 65 | "b", 66 | new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), 67 | nullable = false 68 | ), 69 | SchemaEncoder.primitiveField("c", new ArrowType.Utf8, nullable = false) 70 | ), 71 | nullable = false 72 | ) 73 | ) 74 | ) 75 | }, 76 | test("list") { 77 | for { 78 | result <- ListOfPrimitives.schemaEncoder.encode 79 | fields = getFields(result) 80 | } yield assert(fields)( 81 | contains( 82 | SchemaEncoder.listField( 83 | "list", 84 | SchemaEncoder.primitiveField("element", new ArrowType.Int(32, true), nullable = false), 85 | nullable = false 86 | ) 87 | ) 88 | ) 89 | }, 90 | test("nullable primitives") { 91 | for { 92 | result <- NullablePrimitives.schemaEncoder.encode 93 | fields = getFields(result) 94 | } yield assert(fields)( 95 | contains(SchemaEncoder.primitiveField("a", new ArrowType.Int(32, true), nullable = true)) 96 | ) && 97 | assert(fields)( 98 | contains( 99 | SchemaEncoder 100 | .primitiveField("b", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), nullable = true) 101 | ) 102 | ) 103 | }, 104 | test("summoned") { 105 | for { 106 | result <- Summoned.schemaEncoder.encode 107 | fields = getFields(result) 108 | } yield assert(fields)( 109 | contains(SchemaEncoder.primitiveField("a", new ArrowType.Int(64, true), nullable = false)) 110 | ) && 111 | assert(fields)( 112 | contains( 113 | SchemaEncoder.primitiveField( 114 | "b", 115 | new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), 116 | nullable = false 117 | ) 118 | ) 119 | ) && 120 | assert(fields)(contains(SchemaEncoder.primitiveField("c", new ArrowType.Utf8, nullable = false))) 121 | } 122 | ) 123 | 124 | private def getFields(schema: JSchema): List[Field] = 125 | schema.getFields.asScala.toList 126 | 127 | } 128 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/arrow/core/ipc/IpcSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.core.ipc 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.Allocator 4 | import me.mnedokushev.zio.apache.arrow.core.Fixtures.Primitives 5 | import zio._ 6 | import zio.stream.ZStream 7 | import zio.test.Assertion._ 8 | import zio.test._ 9 | 10 | import java.io.ByteArrayInputStream 11 | 12 | object IpcSpec extends ZIOSpecDefault { 13 | override def spec: Spec[TestEnvironment & Scope, Any] = 14 | suite("IPC")( 15 | test("streaming") { 16 | val payload = (1 to 8096).map(i => Primitives(i, i.toDouble, i.toString)) 17 | 18 | ZIO.scoped( 19 | for { 20 | out <- writeStreaming[Any, Primitives](ZStream.from(payload)) 21 | result <- readStreaming[Primitives](new ByteArrayInputStream(out.toByteArray)).runCollect 22 | } yield assert(result)(equalTo(Chunk.fromIterable(payload))) 23 | ) 24 | } 25 | ).provideLayerShared(Allocator.rootLayer()) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /modules/datafusion/src/main/scala/me/mnedokushev/zio/apache/arrow/datafusion/Context.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.datafusion 2 | 3 | import org.apache.arrow.datafusion._ 4 | import zio._ 5 | 6 | import java.nio.file.Path 7 | 8 | class Context(underlying: SessionContext) { 9 | 10 | def sql(query: String): Task[Dataframe] = 11 | ZIO.fromCompletableFuture(underlying.sql(query)).map(new Dataframe(_)) 12 | 13 | def registerCsv(name: String, path: Path): Task[Unit] = 14 | ZIO.fromCompletableFuture(underlying.registerCsv(name, path)).unit 15 | 16 | def registerParquet(name: String, path: Path): Task[Unit] = 17 | ZIO.fromCompletableFuture(underlying.registerParquet(name, path)).unit 18 | 19 | } 20 | 21 | object Context { 22 | 23 | def create: TaskLayer[Context] = 24 | ZLayer.scoped( 25 | ZIO 26 | .fromAutoCloseable(ZIO.attempt(SessionContexts.create())) 27 | .map(new Context(_)) 28 | ) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /modules/datafusion/src/main/scala/me/mnedokushev/zio/apache/arrow/datafusion/Dataframe.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.datafusion 2 | 3 | import me.mnedokushev.zio.apache.arrow.core._ 4 | import me.mnedokushev.zio.apache.arrow.core.codec.{ SchemaEncoder, VectorSchemaRootDecoder } 5 | import org.apache.arrow.datafusion.DataFrame 6 | import org.apache.arrow.memory.BufferAllocator 7 | import zio._ 8 | import zio.schema.Schema 9 | import zio.stream.ZStream 10 | 11 | import java.nio.file.Path 12 | 13 | class Dataframe(underlying: DataFrame) { 14 | 15 | def collect[A: Schema: SchemaEncoder](implicit 16 | decoder: VectorSchemaRootDecoder[A] 17 | ): ZStream[BufferAllocator, Throwable, A] = 18 | ZStream.serviceWithStream[BufferAllocator] { alloc => 19 | for { 20 | reader <- ZStream.acquireReleaseWith( 21 | ZIO.fromCompletableFuture(underlying.collect(alloc)) 22 | )(reader => ZIO.attempt(reader.close()).ignoreLogged) 23 | root <- ZStream.fromZIO( 24 | for { 25 | root <- ZIO.attempt(reader.getVectorSchemaRoot) 26 | _ <- validateSchema(root.getSchema()) 27 | } yield root 28 | ) 29 | chunk <- ZStream.repeatZIOOption( 30 | ZIO 31 | .attempt(reader.loadNextBatch()) 32 | .asSomeError 33 | .filterOrFail(_ == true)(None) *> 34 | decoder.decodeZIO(root).asSomeError 35 | ) 36 | elem <- ZStream.fromIterable(chunk) 37 | } yield elem 38 | } 39 | 40 | def show: Task[Unit] = 41 | ZIO.fromCompletableFuture(underlying.show()).unit 42 | 43 | def writeParquet(path: Path): Task[Unit] = 44 | ZIO.fromCompletableFuture(underlying.writeParquet(path)).unit 45 | 46 | def writeCsv(path: Path): Task[Unit] = 47 | ZIO.fromCompletableFuture(underlying.writeCsv(path)).unit 48 | 49 | } 50 | -------------------------------------------------------------------------------- /modules/datafusion/src/test/resources/test.csv: -------------------------------------------------------------------------------- 1 | fname,lname,address,age 2 | Bob,Dylan,Hollywood,80 3 | Dog,Cat,NY,3 4 | John,Doe,London,99 -------------------------------------------------------------------------------- /modules/datafusion/src/test/scala/me/mnedokushev/zio/apache/arrow/datafusion/DataframeSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.arrow.datafusion 2 | 3 | import me.mnedokushev.zio.apache.arrow.core.Allocator 4 | import me.mnedokushev.zio.apache.arrow.core.codec.{ SchemaEncoder, SchemaEncoderDeriver, VectorSchemaRootDecoder } 5 | import zio._ 6 | import zio.schema._ 7 | import zio.test.Assertion._ 8 | import zio.test._ 9 | 10 | import java.nio.file.Paths 11 | 12 | object DataframeSpec extends ZIOSpecDefault { 13 | 14 | case class TestData(fname: String, lname: String, address: String, age: Long) 15 | object TestData { 16 | implicit val schema: Schema[TestData] = 17 | DeriveSchema.gen[TestData] 18 | implicit val schemaEncoder: SchemaEncoder[TestData] = 19 | Derive.derive[SchemaEncoder, TestData](SchemaEncoderDeriver.default) 20 | implicit val vectorSchemaRootDecoder: VectorSchemaRootDecoder[TestData] = 21 | VectorSchemaRootDecoder.decoder[TestData] 22 | 23 | } 24 | 25 | override def spec: Spec[TestEnvironment & Scope, Any] = 26 | suite("Dataframe")( 27 | test("collect") { 28 | ZIO.serviceWithZIO[Context] { context => 29 | for { 30 | _ <- context.registerCsv("test", Paths.get(getClass.getResource("/test.csv").toURI)) 31 | df <- context.sql("SELECT * FROM test WHERE fname = 'Dog'") 32 | result <- df.collect[TestData].runCollect 33 | } yield assert(result)(equalTo(Chunk(TestData("Dog", "Cat", "NY", 3)))) 34 | } 35 | } 36 | ).provide(Context.create, Allocator.rootLayer()) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /project/BuildHelper.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys._ 3 | import scalafix.sbt.ScalafixPlugin.autoImport.scalafixSemanticdb 4 | 5 | object BuildHelper { 6 | 7 | def stdSettings(projectName: String): Seq[Def.Setting[_]] = Seq( 8 | name := s"zio-apache-arrow-$projectName", 9 | organization := "me.mnedokushev", 10 | libraryDependencies ++= betterMonadicFor(scalaVersion.value), 11 | libraryDependencies ++= kindProjector(scalaVersion.value), 12 | scalacOptions --= disableUnusedImportsWarnings(scalaVersion.value), 13 | scalacOptions ++= source3Compatibility(scalaVersion.value), 14 | semanticdbEnabled := true, 15 | semanticdbVersion := scalafixSemanticdb.revision, 16 | Test / javaOptions ++= arrowJavaCompat, 17 | Test / fork := true 18 | ) 19 | 20 | val Scala212 = "2.12.20" 21 | val Scala213 = "2.13.16" 22 | val Scala3 = "3.6.4" 23 | 24 | private def betterMonadicFor(scalaVersion: String) = 25 | CrossVersion.partialVersion(scalaVersion) match { 26 | case Some((2, _)) => Seq(compilerPlugin("com.olegpy" %% "better-monadic-for" % "0.3.1")) 27 | case _ => Seq() 28 | } 29 | 30 | private def kindProjector(scalaVersion: String) = 31 | CrossVersion.partialVersion(scalaVersion) match { 32 | case Some((2, _)) => Seq(compilerPlugin("org.typelevel" % "kind-projector" % "0.13.3" cross CrossVersion.full)) 33 | case _ => Seq() 34 | } 35 | 36 | private def arrowJavaCompat = 37 | if (System.getProperty("java.version").startsWith("1.8")) 38 | Seq() 39 | else 40 | Seq("--add-opens=java.base/java.nio=ALL-UNNAMED") 41 | 42 | // TODO: can't figure out why scala 2.13 emits 'unused import' for zio.schema.Derive in ValueVectorDecoder.scala 43 | private def disableUnusedImportsWarnings(scalaVersion: String) = 44 | CrossVersion.partialVersion(scalaVersion) match { 45 | case Some((2, 13)) => Seq("-Wunused:imports") 46 | case _ => Seq() 47 | } 48 | 49 | // See https://www.scala-lang.org/api/3.x/docs/docs/reference/changed-features/vararg-splices.html#compatibility-considerations-2 50 | private def source3Compatibility(scalaVersion: String) = 51 | CrossVersion.partialVersion(scalaVersion) match { 52 | case Some((2, _)) => Seq("-Xsource:3") 53 | case _ => Seq() 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /project/Dep.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Dep { 4 | 5 | object V { 6 | val zio = "2.1.17" 7 | val zioSchema = "1.7.0" 8 | val arrow = "18.2.0" 9 | val scalaCollectionCompat = "2.13.0" 10 | } 11 | 12 | object O { 13 | val apacheArrow = "org.apache.arrow" 14 | val scalaLang = "org.scala-lang" 15 | val zio = "dev.zio" 16 | val scalaLangModules = "org.scala-lang.modules" 17 | } 18 | 19 | lazy val arrowFormat = O.apacheArrow % "arrow-format" % V.arrow 20 | lazy val arrowVector = O.apacheArrow % "arrow-vector" % V.arrow 21 | lazy val arrowMemory = O.apacheArrow % "arrow-memory" % V.arrow 22 | lazy val arrowMemoryUnsafe = O.apacheArrow % "arrow-memory-unsafe" % V.arrow 23 | 24 | lazy val zio = O.zio %% "zio" % V.zio 25 | lazy val zioSchema = O.zio %% "zio-schema" % V.zioSchema 26 | lazy val zioSchemaDerivation = O.zio %% "zio-schema-derivation" % V.zioSchema 27 | lazy val zioTest = O.zio %% "zio-test" % V.zio 28 | lazy val zioTestSbt = O.zio %% "zio-test-sbt" % V.zio 29 | 30 | lazy val scalaCollectionCompat = O.scalaLangModules %% "scala-collection-compat" % V.scalaCollectionCompat 31 | 32 | lazy val datafusionJava = "uk.co.gresearch.datafusion" % "datafusion-java" % "0.12.0" 33 | 34 | lazy val core = Seq( 35 | arrowFormat, 36 | arrowVector, 37 | arrowMemory, 38 | zio, 39 | zioSchema, 40 | zioSchemaDerivation, 41 | scalaCollectionCompat, 42 | arrowMemoryUnsafe % Test, 43 | zioTest % Test, 44 | zioTestSbt % Test 45 | ) 46 | 47 | lazy val datafusion = Seq( 48 | arrowFormat, 49 | arrowVector, 50 | arrowMemory, 51 | datafusionJava, 52 | zio, 53 | zioSchema, 54 | zioSchemaDerivation, 55 | scalaCollectionCompat, 56 | arrowMemoryUnsafe % Test, 57 | zioTest % Test, 58 | zioTestSbt % Test 59 | ) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.11 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | // Linting 2 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.4") 3 | addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.2") 4 | 5 | // Dependencies management 6 | addSbtPlugin("ch.epfl.scala" % "sbt-missinglink" % "0.3.6") 7 | addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.3.1") 8 | 9 | // Versioning and release 10 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.13.1") 11 | addSbtPlugin("org.typelevel" % "sbt-tpolecat" % "0.5.2") 12 | addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.9.3") 13 | addSbtPlugin("com.github.sbt" % "sbt-github-actions" % "0.25.0") 14 | 15 | addDependencyTreePlugin 16 | --------------------------------------------------------------------------------