├── .sbtopts
├── project
    ├── build.properties
    ├── plugins.sbt
    ├── BuildHelper.scala
    └── Dep.scala
├── docs
    ├── logo.png
    └── scala-cli
    │   ├── Schema.scala
    │   ├── Value.scala
    │   ├── SchemaArity23.scala
    │   ├── SchemaSummoned.scala
    │   ├── ValueSummoned.scala
    │   ├── ParquetIO.scala
    │   └── Filtering.scala
├── .git-blame-ignore-revs
├── modules
    ├── hadoop
    │   └── src
    │   │   ├── test
    │   │       ├── resources
    │   │       │   └── food.parquet
    │   │       └── scala
    │   │       │   └── me
    │   │       │       └── mnedokushev
    │   │       │           └── zio
    │   │       │               └── apache
    │   │       │                   └── parquet
    │   │       │                       └── hadoop
    │   │       │                           ├── ValueConverterSpec.scala
    │   │       │                           └── ParquetIOSpec.scala
    │   │   └── main
    │   │       └── scala
    │   │           └── me
    │   │               └── mnedokushev
    │   │                   └── zio
    │   │                       └── apache
    │   │                           └── parquet
    │   │                               └── hadoop
    │   │                                   ├── Path.scala
    │   │                                   ├── WriteSupport.scala
    │   │                                   ├── ReadSupport.scala
    │   │                                   ├── ParquetWriter.scala
    │   │                                   ├── ParquetReader.scala
    │   │                                   └── GroupValueConverter.scala
    └── core
    │   └── src
    │       ├── main
    │           ├── scala
    │           │   └── me
    │           │   │   └── mnedokushev
    │           │   │       └── zio
    │           │   │           └── apache
    │           │   │               └── parquet
    │           │   │                   └── core
    │           │   │                       ├── filter
    │           │   │                           ├── FilterError.scala
    │           │   │                           ├── package.scala
    │           │   │                           ├── Filter.scala
    │           │   │                           ├── ExprAccessorBuilder.scala
    │           │   │                           ├── Column.scala
    │           │   │                           ├── Operator.scala
    │           │   │                           ├── TypeTagDeriver.scala
    │           │   │                           ├── Predicate.scala
    │           │   │                           ├── OperatorSupport.scala
    │           │   │                           └── TypeTag.scala
    │           │   │                       ├── codec
    │           │   │                           ├── DecoderError.scala
    │           │   │                           ├── EncoderError.scala
    │           │   │                           ├── ValueDecoder.scala
    │           │   │                           ├── ValueEncoder.scala
    │           │   │                           ├── SchemaEncoder.scala
    │           │   │                           ├── SchemaEncoderDeriver.scala
    │           │   │                           ├── ValueEncoderDeriver.scala
    │           │   │                           └── ValueDecoderDeriver.scala
    │           │   │                       ├── package.scala
    │           │   │                       ├── Schemas.scala
    │           │   │                       └── Value.scala
    │           ├── scala-2.13
    │           │   └── me
    │           │   │   └── mnedokushev
    │           │   │       └── zio
    │           │   │           └── apache
    │           │   │               └── parquet
    │           │   │                   └── core
    │           │   │                       └── filter
    │           │   │                           ├── internal
    │           │   │                               ├── MacroUtils.scala
    │           │   │                               ├── ColumnPathConcatMacro.scala
    │           │   │                               └── SanitizeOptionalsMacro.scala
    │           │   │                           └── syntax.scala
    │           └── scala-3
    │           │   └── me
    │           │       └── mnedokushev
    │           │           └── zio
    │           │               └── apache
    │           │                   └── parquet
    │           │                       └── core
    │           │                           └── filter
    │           │                               ├── syntax.scala
    │           │                               └── internal
    │           │                                   ├── ColumnPathConcatMacro.scala
    │           │                                   └── SanitizeOptionalsMacro.scala
    │       └── test
    │           ├── scala-2.13+
    │               └── me
    │               │   └── mnedokushev
    │               │       └── zio
    │               │           └── apache
    │               │               └── parquet
    │               │                   └── core
    │               │                       └── Fixtures.scala
    │           └── scala
    │               └── me
    │                   └── mnedokushev
    │                       └── zio
    │                           └── apache
    │                               └── parquet
    │                                   └── core
    │                                       ├── codec
    │                                           └── SchemaEncoderDeriverSpec.scala
    │                                       └── filter
    │                                           └── ExprSpec.scala
├── .gitignore
├── .scalafmt.conf
├── .scalafix.conf
├── .github
    └── workflows
    │   ├── release.yml
    │   ├── ci.yml
    │   └── clean.yml
├── LICENSE
└── README.md


/.sbtopts:
--------------------------------------------------------------------------------
1 | -J-Xss4M


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.11.7
2 | 


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grouzen/zio-apache-parquet/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Scala Steward: Reformat with scalafmt 3.10.2
2 | 49ef9dde9e251e2a6bddf0092a3ba8b4bb1cad1e
3 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/test/resources/food.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grouzen/zio-apache-parquet/HEAD/modules/hadoop/src/test/resources/food.parquet


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | .idea/
 4 | .bsp/
 5 | target/
 6 | */target/
 7 | private/
 8 | 
 9 | .bloop/
10 | .metals/
11 | .vscode/
12 | metals.sbt
13 | .scala-build
14 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/FilterError.scala:
--------------------------------------------------------------------------------
1 | package me.mnedokushev.zio.apache.parquet.core.filter
2 | 
3 | final case class FilterError(
4 |   message: String,
5 |   cause: Option[Throwable] = None
6 | ) extends IllegalArgumentException(message, cause.getOrElse(new Throwable()))
7 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/package.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core
 2 | 
 3 | import org.apache.parquet.filter2.predicate.FilterPredicate
 4 | 
 5 | package object filter {
 6 | 
 7 |   type CompiledPredicate = Either[String, FilterPredicate]
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/DecoderError.scala:
--------------------------------------------------------------------------------
1 | package me.mnedokushev.zio.apache.parquet.core.codec
2 | 
3 | import java.io.IOException
4 | 
5 | final case class DecoderError(
6 |   message: String,
7 |   cause: Option[Throwable] = None
8 | ) extends IOException(message, cause.getOrElse(new Throwable()))
9 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/EncoderError.scala:
--------------------------------------------------------------------------------
1 | package me.mnedokushev.zio.apache.parquet.core.codec
2 | 
3 | import java.io.IOException
4 | 
5 | final case class EncoderError(
6 |   message: String,
7 |   cause: Option[Throwable] = None
8 | ) extends IOException(message, cause.getOrElse(new Throwable()))
9 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
 1 | version = "3.10.2"
 2 | runner.dialect = scala3
 3 | maxColumn = 120
 4 | align.preset = most
 5 | continuationIndent.defnSite = 2
 6 | assumeStandardLibraryStripMargin = true
 7 | docstrings.style = Asterisk
 8 | lineEndings = preserve
 9 | includeCurlyBraceInSelectChains = false
10 | danglingParentheses.preset = true
11 | spaces {
12 |   inImportCurlyBraces = true
13 | }
14 | optIn.annotationNewlines = true
15 | runner.dialect = scala3
16 | rewrite.rules = [SortImports, RedundantBraces]
17 | 


--------------------------------------------------------------------------------
/.scalafix.conf:
--------------------------------------------------------------------------------
 1 | rules = [
 2 |   DisableSyntax
 3 |   LeakingImplicitClassVal
 4 |   NoAutoTupling
 5 |   NoValInForComprehension
 6 |   OrganizeImports
 7 | ]
 8 | 
 9 | Disable {
10 |   ifSynthetic = [
11 |     "scala/Option.option2Iterable"
12 |     "scala/Predef.any2stringadd"
13 |   ]
14 | }
15 | 
16 | OrganizeImports {
17 |   # Allign with IntelliJ IDEA so that they don't fight each other
18 |   groupedImports = Merge
19 |   removeUnused = false
20 | }
21 | 
22 | RemoveUnused {
23 |   imports = false // handled by OrganizeImports
24 | }
25 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/internal/MacroUtils.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal
 2 | 
 3 | import scala.reflect.macros.blackbox
 4 | 
 5 | abstract class MacroUtils(c: blackbox.Context) {
 6 | 
 7 |   import c.universe._
 8 | 
 9 |   private def debugEnabled: Boolean = true
10 | 
11 |   implicit class Debugged[A](self: A) {
12 |     def debugged(): Unit =
13 |       if (debugEnabled)
14 |         c.info(c.enclosingPosition, s"tree=${showRaw(self)}", force = true)
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/package.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet
 2 | 
 3 | package object core {
 4 | 
 5 |   val MILLIS_PER_DAY    = 86400000L
 6 |   val NANOS_PER_DAY     = 86400000000000L
 7 |   val MILLIS_FACTOR     = 1000L
 8 |   val MICROS_FACTOR     = 1000000L
 9 |   val NANOS_FACTOR      = 1000000000L
10 |   val DECIMAL_PRECISION = 11
11 |   val DECIMAL_SCALE     = 2
12 | 
13 |   type Lens[F, S, A]   = filter.Column.Named[A, F]
14 |   type Prism[F, S, A]  = Unit
15 |   type Traversal[S, A] = Unit
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueDecoder.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.codec
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Value
 4 | import zio._
 5 | 
 6 | trait ValueDecoder[+A] { self =>
 7 | 
 8 |   def decode(value: Value): A
 9 | 
10 |   def decodeZIO(value: Value): Task[A] =
11 |     ZIO.attempt(decode(value))
12 | 
13 |   def map[B](f: A => B): ValueDecoder[B] =
14 |     new ValueDecoder[B] {
15 |       override def decode(value: Value): B =
16 |         f(self.decode(value))
17 |     }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueEncoder.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.codec
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Value
 4 | import zio._
 5 | 
 6 | trait ValueEncoder[-A] { self =>
 7 | 
 8 |   def encode(value: A): Value
 9 | 
10 |   def encodeZIO(value: A): Task[Value] =
11 |     ZIO.attemptBlocking(encode(value))
12 | 
13 |   def contramap[B](f: B => A): ValueEncoder[B] =
14 |     new ValueEncoder[B] {
15 |       override def encode(value: B): Value =
16 |         self.encode(f(value))
17 |     }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | // Linting
 2 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.6")
 3 | addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.4")
 4 | 
 5 | // Dependencies management
 6 | addSbtPlugin("ch.epfl.scala"    % "sbt-missinglink"           % "0.3.6")
 7 | addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.3.1")
 8 | 
 9 | // Versioning and release
10 | addSbtPlugin("com.eed3si9n"   % "sbt-buildinfo"      % "0.13.1")
11 | addSbtPlugin("org.typelevel"  % "sbt-tpolecat"       % "0.5.2")
12 | addSbtPlugin("com.github.sbt" % "sbt-ci-release"     % "1.11.2")
13 | addSbtPlugin("com.github.sbt" % "sbt-github-actions" % "0.29.0")
14 | 
15 | addDependencyTreePlugin
16 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoder.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.codec
 2 | 
 3 | import org.apache.parquet.schema.Type
 4 | import zio._
 5 | import zio.schema._
 6 | 
 7 | trait SchemaEncoder[A] { self =>
 8 | 
 9 |   def encode(schema: Schema[A], name: String, optional: Boolean): Type
10 | 
11 |   def encodeZIO(schema: Schema[A], name: String, optional: Boolean): Task[Type] =
12 |     ZIO.attempt(encode(schema, name, optional))
13 | 
14 |   def contramap[B](f: Schema[B] => Schema[A]): SchemaEncoder[B] =
15 |     new SchemaEncoder[B] {
16 |       override def encode(schema: Schema[B], name: String, optional: Boolean): Type =
17 |         self.encode(f(schema), name, optional)
18 |     }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | on:
 3 |   push:
 4 |     branches: [master, main]
 5 |     tags: ["*"]
 6 | jobs:
 7 |   publish:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 |         with:
12 |           fetch-depth: 0
13 | 
14 |       - uses: actions/setup-java@v4
15 |         with:
16 |           distribution: temurin
17 |           java-version: 17
18 |           cache: sbt
19 | 
20 |       - name: Setup sbt
21 |         uses: sbt/setup-sbt@v1
22 | 
23 |       - run: sbt ci-release
24 |         env:
25 |           PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }}
26 |           PGP_SECRET: ${{ secrets.PGP_SECRET }}
27 |           SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
28 |           SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
29 | 


--------------------------------------------------------------------------------
/docs/scala-cli/Schema.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3
 3 | 
 4 | import zio.schema.*
 5 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 6 | 
 7 | object Schema extends App:
 8 | 
 9 |   case class MyRecord(a: Int, b: String, c: Option[Long])
10 | 
11 |   object MyRecord:
12 |     given schema: Schema[MyRecord]               =
13 |       DeriveSchema.gen[MyRecord]
14 |     given schemaEncoder: SchemaEncoder[MyRecord] =
15 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default)
16 | 
17 |   val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false)
18 | 
19 |   println(parquetSchema)
20 |   // Outputs:
21 |   // required group my_record {
22 |   //   required int32 a (INTEGER(32,true));
23 |   //   required binary b (STRING);
24 |   //   optional int64 c (INTEGER(64,true));
25 |   // }
26 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-3/me/mnedokushev/zio/apache/parquet/core/filter/syntax.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Lens
 4 | import me.mnedokushev.zio.apache.parquet.core.filter.internal.{ ColumnPathConcatMacro, SanitizeOptionalsMacro }
 5 | 
 6 | package object syntax extends Predicate.Syntax {
 7 | 
 8 |   extension [F, S, A](column: Lens[F, S, Option[A]]) {
 9 |     def nullable(implicit typeTag: TypeTag[A]): Column.Named[A, column.Identity] =
10 |       Column.Named(column.path)
11 |   }
12 | 
13 |   inline def filter[A](inline predicate: Predicate[A]): CompiledPredicate =
14 |     ${ SanitizeOptionalsMacro.sanitizeImpl[A]('predicate) }
15 | 
16 |   inline def concat[A, B, F](inline parent: Column[A], inline child: Column.Named[B, F])(using
17 |     ctt: TypeTag[B]
18 |   ): Column[B] =
19 |     ${ ColumnPathConcatMacro.concatImpl[A, B, F]('parent, 'child, 'ctt) }
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/syntax.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Lens
 4 | import me.mnedokushev.zio.apache.parquet.core.filter.CompiledPredicate
 5 | import me.mnedokushev.zio.apache.parquet.core.filter.internal.{ ColumnPathConcatMacro, SanitizeOptionalsMacro }
 6 | 
 7 | package object syntax extends Predicate.Syntax {
 8 | 
 9 |   implicit class NullableColumnSyntax[F, S, A](val column: Lens[F, S, Option[A]]) {
10 |     def nullable(implicit typeTag: TypeTag[A]): Column.Named[A, column.Identity] =
11 |       Column.Named(column.path)
12 |   }
13 | 
14 |   def filter[A](predicate: Predicate[A]): CompiledPredicate = macro SanitizeOptionalsMacro.sanitizeImpl[A]
15 | 
16 |   def concat[A, B, F](
17 |     parent: Column[A],
18 |     child: Column.Named[B, F]
19 |   ): Column[B] = macro ColumnPathConcatMacro.concatImpl[A, B, F]
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/docs/scala-cli/Value.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3
 3 | 
 4 | import zio.schema.*
 5 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 6 | 
 7 | object Value extends App:
 8 | 
 9 |   case class MyRecord(a: Int, b: String, c: Option[Long])
10 | 
11 |   object MyRecord:
12 |     given Schema[MyRecord]                =
13 |       DeriveSchema.gen[MyRecord]
14 |     given encoder: ValueEncoder[MyRecord] =
15 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default)
16 |     given decoder: ValueDecoder[MyRecord] =
17 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default)
18 | 
19 |   val value  = MyRecord.encoder.encode(MyRecord(3, "zio", None))
20 |   val record = MyRecord.decoder.decode(value)
21 | 
22 |   println(value)
23 |   // Outputs:
24 |   // RecordValue(Map(a -> Int32Value(3), b -> BinaryValue(Binary{"zio"}), c -> NullValue))
25 |   println(record)
26 |   // Outputs:
27 |   // MyRecord(3,zio,None)
28 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Filter.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.{ Lens, Prism, Traversal }
 4 | import zio.schema._
 5 | 
 6 | trait Filter {
 7 | 
 8 |   type Columns
 9 | 
10 |   val columns: Columns
11 | 
12 | }
13 | 
14 | object Filter {
15 | 
16 |   type Aux[Columns0] = Filter {
17 |     type Columns = Columns0
18 |   }
19 | 
20 |   def apply[A](implicit
21 |     schema: Schema[A],
22 |     typeTag: TypeTag[A]
23 |   ): Filter.Aux[schema.Accessors[Lens, Prism, Traversal]] =
24 |     new Filter {
25 |       val accessorBuilder =
26 |         new ExprAccessorBuilder(typeTag.asInstanceOf[TypeTag.Record[A]].columns)
27 | 
28 |       override type Columns =
29 |         schema.Accessors[accessorBuilder.Lens, accessorBuilder.Prism, accessorBuilder.Traversal]
30 | 
31 |       override val columns: Columns =
32 |         schema.makeAccessors(accessorBuilder)
33 |     }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprAccessorBuilder.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter
 2 | 
 3 | import zio.schema.{ AccessorBuilder, Schema }
 4 | 
 5 | final class ExprAccessorBuilder(typeTags: Map[String, TypeTag[?]]) extends AccessorBuilder {
 6 | 
 7 |   override type Lens[F, S, A] = Column.Named[A, F]
 8 | 
 9 |   override type Prism[F, S, A] = Unit
10 | 
11 |   override type Traversal[S, A] = Unit
12 | 
13 |   override def makeLens[F, S, A](product: Schema.Record[S], term: Schema.Field[S, A]): Column.Named[A, F] = {
14 |     val name             = term.name.toString
15 |     implicit val typeTag = typeTags(name).asInstanceOf[TypeTag[A]]
16 | 
17 |     Column.Named[A, F](name)
18 |   }
19 | 
20 |   override def makePrism[F, S, A](sum: Schema.Enum[S], term: Schema.Case[S, A]): Prism[F, S, A] =
21 |     ()
22 | 
23 |   override def makeTraversal[S, A](collection: Schema.Collection[S, A], element: Schema[A]): Traversal[S, A] =
24 |     ()
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/docs/scala-cli/SchemaArity23.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3
 3 | 
 4 | import zio.schema._
 5 | import me.mnedokushev.zio.apache.parquet.core.codec._
 6 | 
 7 | object SchemaArity23 extends App {
 8 | 
 9 |   final case class Arity23(
10 |     a: Int,
11 |     b: Option[String],
12 |     c: Int,
13 |     d: Int,
14 |     e: Int,
15 |     f: Int,
16 |     g: Int,
17 |     h: Int,
18 |     i: Int,
19 |     j: Int,
20 |     k: Int,
21 |     l: Int,
22 |     m: Int,
23 |     n: Int,
24 |     o: Int,
25 |     p: Int,
26 |     q: Int,
27 |     r: Int,
28 |     s: Int,
29 |     t: Int,
30 |     u: Int,
31 |     v: Int,
32 |     w: Int
33 |   )
34 | 
35 |   object Arity23 {
36 |     implicit val schema: Schema[Arity23]               =
37 |       DeriveSchema.gen[Arity23]
38 |     implicit val schemaEncoder: SchemaEncoder[Arity23] =
39 |       Derive.derive[SchemaEncoder, Arity23](SchemaEncoderDeriver.default)
40 |   }
41 | 
42 |   val arity23Schema = Arity23.schemaEncoder.encode(Arity23.schema, "arity23", optional = false)
43 | 
44 |   println(arity23Schema)
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/docs/scala-cli/SchemaSummoned.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3
 3 | 
 4 | import me.mnedokushev.zio.apache.parquet.core.Schemas
 5 | import zio.schema.*
 6 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 7 | 
 8 | object SchemaSummoned extends App:
 9 | 
10 |   case class MyRecord(a: Int, b: String, c: Option[Long])
11 | 
12 |   object MyRecord:
13 |     given schema: Schema[MyRecord] =
14 |       DeriveSchema.gen[MyRecord]
15 |     // The custom encoder must be defined before the definition for your record type.
16 |     given SchemaEncoder[Int] with {
17 |       override def encode(schema: Schema[Int], name: String, optional: Boolean) =
18 |         Schemas.uuid.optionality(optional).named(name)
19 |     }
20 |     given schemaEncoder: SchemaEncoder[MyRecord] =
21 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.summoned)
22 | 
23 |   val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false)
24 | 
25 |   println(parquetSchema)
26 |   // Outputs:
27 |   // required group my_record {
28 |   //   required fixed_len_byte_array(16) a (UUID);
29 |   //   required binary b (STRING);
30 |   //   optional int64 c (INTEGER(64,true));
31 |   // }
32 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-3/me/mnedokushev/zio/apache/parquet/core/filter/internal/ColumnPathConcatMacro.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.filter.{ Column, TypeTag }
 4 | 
 5 | import scala.quoted.*
 6 | 
 7 | object ColumnPathConcatMacro {
 8 | 
 9 |   def concatImpl[A: Type, B: Type, F: Type](
10 |     parent: Expr[Column[A]],
11 |     child: Expr[Column.Named[B, F]],
12 |     childTypeTag: Expr[TypeTag[B]]
13 |   )(using
14 |     Quotes
15 |   ): Expr[Column[B]] = {
16 |     import quotes.reflect.*
17 | 
18 |     val childField   = TypeRepr.of[F] match {
19 |       case ConstantType(StringConstant(name)) =>
20 |         name
21 |       case tpe                                =>
22 |         report.errorAndAbort(s"Couldn't get a name of a singleton type $tpe")
23 |     }
24 |     val parentFields = TypeRepr.of[A].typeSymbol.caseFields.map(_.name)
25 | 
26 |     if (parentFields.contains(childField)) {
27 |       val concatExpr = '{ ${ parent }.path + "." + ${ child }.path }
28 | 
29 |       '{ me.mnedokushev.zio.apache.parquet.core.filter.Column.Named[B, F]($concatExpr)(using $childTypeTag) }
30 |     } else
31 |       report.errorAndAbort(s"Parent column doesn't contain a column named '$childField'")
32 |   }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/Path.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.hadoop
 2 | 
 3 | import org.apache.hadoop.conf.Configuration
 4 | import org.apache.hadoop.fs.{ Path => HadoopPath }
 5 | import org.apache.parquet.hadoop.util.{ HadoopInputFile, HadoopOutputFile }
 6 | import zio._
 7 | 
 8 | import java.io.IOException
 9 | import java.net.URI
10 | import java.nio.file.{ Path => JPath, Paths }
11 | 
12 | case class Path(underlying: HadoopPath) {
13 | 
14 |   def /(child: String): Path =
15 |     this.copy(underlying = new HadoopPath(underlying, child))
16 | 
17 |   def /(child: JPath): Path =
18 |     this.copy(underlying = new HadoopPath(underlying, Path(child).underlying))
19 | 
20 |   def toJava: JPath =
21 |     Paths.get(underlying.toUri)
22 | 
23 |   def toHadoop: HadoopPath =
24 |     underlying
25 | 
26 |   def toInputFileZIO(conf: Configuration): IO[IOException, HadoopInputFile] =
27 |     ZIO.attemptBlockingIO(HadoopInputFile.fromPath(underlying, conf))
28 | 
29 |   def toOutputFileZIO(conf: Configuration): IO[IOException, HadoopOutputFile] =
30 |     ZIO.attemptBlockingIO(HadoopOutputFile.fromPath(toHadoop, conf))
31 | 
32 | }
33 | 
34 | object Path {
35 | 
36 |   def apply(path: JPath): Path =
37 |     Path(new HadoopPath(new URI("file", null, path.toAbsolutePath.toString, null, null)))
38 | 
39 |   def apply(uri: URI): Path =
40 |     Path(new HadoopPath(uri))
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/WriteSupport.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.hadoop
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue
 4 | import me.mnedokushev.zio.apache.parquet.core.Value.NullValue
 5 | import org.apache.hadoop.conf.Configuration
 6 | import org.apache.parquet.hadoop.api.{ WriteSupport => HadoopWriteSupport }
 7 | import org.apache.parquet.io.api.RecordConsumer
 8 | import org.apache.parquet.schema.MessageType
 9 | 
10 | import scala.jdk.CollectionConverters._
11 | 
12 | class WriteSupport(schema: MessageType, metadata: Map[String, String]) extends HadoopWriteSupport[RecordValue] {
13 | 
14 |   override def init(configuration: Configuration): HadoopWriteSupport.WriteContext =
15 |     new HadoopWriteSupport.WriteContext(schema, metadata.asJava)
16 | 
17 |   override def prepareForWrite(recordConsumer: RecordConsumer): Unit =
18 |     this.consumer = recordConsumer
19 | 
20 |   override def write(record: RecordValue): Unit = {
21 |     consumer.startMessage()
22 | 
23 |     record.values.foreach {
24 |       case (_, NullValue) =>
25 |         ()
26 |       case (name, value)  =>
27 |         val fieldIndex = schema.getFieldIndex(name)
28 |         val fieldType  = schema.getType(fieldIndex)
29 | 
30 |         consumer.startField(name, fieldIndex)
31 |         value.write(fieldType, consumer)
32 |         consumer.endField(name, fieldIndex)
33 |     }
34 | 
35 |     consumer.endMessage()
36 |   }
37 | 
38 |   private var consumer: RecordConsumer = null
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/internal/ColumnPathConcatMacro.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.filter.Column
 4 | 
 5 | import scala.reflect.macros.blackbox
 6 | 
 7 | class ColumnPathConcatMacro(val c: blackbox.Context) extends MacroUtils(c) {
 8 |   import c.universe._
 9 | 
10 |   def concatImpl[A, B, F](parent: Expr[Column[A]], child: Expr[Column.Named[B, F]])(implicit
11 |     ptt: c.WeakTypeTag[A],
12 |     ftt: c.WeakTypeTag[F]
13 |   ): Tree = {
14 |     val childField   = getSingletonTypeName(ftt.tpe)
15 |     val parentFields = ptt.tpe.members.collect {
16 |       case p: TermSymbol if p.isCaseAccessor && !p.isMethod => p.name.toString.trim
17 |     }.toList
18 | 
19 |     if (parentFields.exists(_ == childField)) {
20 |       val pathTermName     = "path"
21 |       val dotStringLiteral = "."
22 |       val concatExpr       =
23 |         q"${parent.tree}.${TermName(pathTermName)} + ${Literal(Constant(dotStringLiteral))} + ${child.tree}.${TermName(pathTermName)}"
24 | 
25 |       q"_root_.me.mnedokushev.zio.apache.parquet.core.filter.Column.Named($concatExpr)"
26 |     } else
27 |       c.abort(c.enclosingPosition, s"Parent column doesn't contain a column named '$childField'")
28 |   }
29 | 
30 |   private def getSingletonTypeName(tpe: Type): String =
31 |     tpe match {
32 |       case ConstantType(Constant(name)) => name.toString
33 |       case _                            => c.abort(c.enclosingPosition, s"Couldn't get a name of a singleton type ${showRaw(tpe)}")
34 |     }
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/docs/scala-cli/ValueSummoned.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3
 3 | 
 4 | import me.mnedokushev.zio.apache.parquet.core.Value
 5 | import zio.schema.*
 6 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 7 | 
 8 | import java.nio.charset.StandardCharsets
 9 | 
10 | object ValueSummoned extends App:
11 | 
12 |   case class MyRecord(a: Int, b: String, c: Option[Long])
13 | 
14 |   object MyRecord:
15 |     given Schema[MyRecord] =
16 |       DeriveSchema.gen[MyRecord]
17 |     given ValueEncoder[Int] with {
18 |       override def encode(value: Int): Value =
19 |         Value.string(value.toString)
20 |     }
21 |     given ValueDecoder[Int] with {
22 |       override def decode(value: Value): Int =
23 |         value match {
24 |           case Value.PrimitiveValue.BinaryValue(v) =>
25 |             new String(v.getBytes, StandardCharsets.UTF_8).toInt
26 |           case other                               =>
27 |             throw DecoderError(s"Wrong value: $other")
28 |         }
29 |     }
30 |     given encoder: ValueEncoder[MyRecord] =
31 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.summoned)
32 |     given decoder: ValueDecoder[MyRecord] =
33 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.summoned)
34 | 
35 |   val value  = MyRecord.encoder.encode(MyRecord(3, "zio", None))
36 |   val record = MyRecord.decoder.decode(value)
37 | 
38 |   println(value)
39 |   // Outputs:
40 |   // RecordValue(Map(a -> BinaryValue(Binary{"3"}), b -> BinaryValue(Binary{"zio"}), c -> NullValue))
41 |   println(record)
42 |   // Outputs:
43 |   // MyRecord(3,zio,None)
44 | 


--------------------------------------------------------------------------------
/docs/scala-cli/ParquetIO.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.3
 3 | 
 4 | import zio.schema.*
 5 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 6 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path }
 7 | import zio.*
 8 | 
 9 | import java.nio.file.Files
10 | 
11 | object ParquetIO extends ZIOAppDefault:
12 | 
13 |   case class MyRecord(a: Int, b: String, c: Option[Long])
14 | 
15 |   object MyRecord:
16 |     given Schema[MyRecord]        =
17 |       DeriveSchema.gen[MyRecord]
18 |     given SchemaEncoder[MyRecord] =
19 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default)
20 |     given ValueEncoder[MyRecord]  =
21 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default)
22 |     given ValueDecoder[MyRecord]  =
23 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default)
24 | 
25 |   val data =
26 |     Chunk(
27 |       MyRecord(1, "first", Some(11)),
28 |       MyRecord(3, "third", None)
29 |     )
30 | 
31 |   val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet"
32 | 
33 |   override def run =
34 |     (for {
35 |       writer   <- ZIO.service[ParquetWriter[MyRecord]]
36 |       reader   <- ZIO.service[ParquetReader[MyRecord]]
37 |       _        <- writer.writeChunk(recordsFile, data)
38 |       fromFile <- reader.readChunk(recordsFile)
39 |       _        <- Console.printLine(fromFile)
40 |     } yield ()).provide(
41 |       ParquetWriter.configured[MyRecord](),
42 |       ParquetReader.configured[MyRecord]()
43 |     )
44 |   // Outputs:
45 |   // Chunk(MyRecord(1,first,Some(11)),MyRecord(3,third,None))
46 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/ReadSupport.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.hadoop
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Schemas
 4 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue
 5 | import me.mnedokushev.zio.apache.parquet.core.codec.SchemaEncoder
 6 | import org.apache.hadoop.conf.Configuration
 7 | import org.apache.parquet.hadoop.api.{ InitContext, ReadSupport => HadoopReadSupport }
 8 | import org.apache.parquet.io.api.{ GroupConverter, RecordMaterializer }
 9 | import org.apache.parquet.schema.MessageType
10 | import zio.Tag
11 | import zio.prelude._
12 | import zio.schema.Schema
13 | 
14 | class ReadSupport[A](
15 |   schema: Option[Schema[A]] = None,
16 |   schemaEncoder: Option[SchemaEncoder[A]] = None
17 | )(implicit tag: Tag[A])
18 |     extends HadoopReadSupport[RecordValue] {
19 | 
20 |   override def prepareForRead(
21 |     configuration: Configuration,
22 |     keyValueMetaData: java.util.Map[String, String],
23 |     fileSchema: MessageType,
24 |     readContext: HadoopReadSupport.ReadContext
25 |   ): RecordMaterializer[RecordValue] = new RecordMaterializer[RecordValue] {
26 | 
27 |     private val converter =
28 |       GroupValueConverter.root(resolveSchema(fileSchema))
29 | 
30 |     override def getCurrentRecord: RecordValue =
31 |       converter.get
32 | 
33 |     override def getRootConverter: GroupConverter =
34 |       converter
35 | 
36 |   }
37 | 
38 |   override def init(context: InitContext): HadoopReadSupport.ReadContext =
39 |     new HadoopReadSupport.ReadContext(resolveSchema(context.getFileSchema))
40 | 
41 |   private def resolveSchema(contextSchema: MessageType): MessageType =
42 |     (schema <*> schemaEncoder).fold(contextSchema) { case (schema0, schemaEncoder0) =>
43 |       Schemas.asMessageType(schemaEncoder0.encode(schema0, tag.tag.shortName, optional = false))
44 |     }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This file was automatically generated by sbt-github-actions using the
 2 | # githubWorkflowGenerate task. You should add and commit this file to
 3 | # your git repository. It goes without saying that you shouldn't edit
 4 | # this file by hand! Instead, if you wish to make changes, you should
 5 | # change your sbt build configuration to revise the workflow description
 6 | # to meet your needs, then regenerate this file.
 7 | 
 8 | name: Continuous Integration
 9 | 
10 | on:
11 |   pull_request:
12 |     branches: ['**']
13 |   push:
14 |     branches: ['**']
15 | 
16 | env:
17 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 | 
19 | jobs:
20 |   build:
21 |     name: Build and Test
22 |     strategy:
23 |       matrix:
24 |         os: [ubuntu-latest]
25 |         scala: [2.13.18, 3.3.6]
26 |         java: [temurin@11, temurin@17]
27 |     runs-on: ${{ matrix.os }}
28 |     steps:
29 |       - name: Checkout current branch (full)
30 |         uses: actions/checkout@v6
31 |         with:
32 |           fetch-depth: 0
33 | 
34 |       - name: Setup Java (temurin@11)
35 |         if: matrix.java == 'temurin@11'
36 |         uses: actions/setup-java@v5
37 |         with:
38 |           distribution: temurin
39 |           java-version: 11
40 |           cache: sbt
41 | 
42 |       - name: Setup Java (temurin@17)
43 |         if: matrix.java == 'temurin@17'
44 |         uses: actions/setup-java@v5
45 |         with:
46 |           distribution: temurin
47 |           java-version: 17
48 |           cache: sbt
49 | 
50 |       - name: Setup sbt
51 |         uses: sbt/setup-sbt@v1
52 | 
53 |       - name: Lint Scala code
54 |         run: sbt '++ ${{ matrix.scala }}' 'scalafix --check' scalafmtCheckAll
55 | 
56 |       - name: Check that workflows are up to date
57 |         run: sbt '++ ${{ matrix.scala }}' githubWorkflowCheck
58 | 
59 |       - name: Build project
60 |         run: sbt '++ ${{ matrix.scala }}' test
61 | 


--------------------------------------------------------------------------------
/project/BuildHelper.scala:
--------------------------------------------------------------------------------
 1 | import sbt.*
 2 | import sbt.Keys.*
 3 | import scalafix.sbt.ScalafixPlugin.autoImport.scalafixSemanticdb
 4 | 
 5 | object BuildHelper {
 6 | 
 7 |   def stdSettings(projectName: String): Seq[Def.Setting[_]] = Seq(
 8 |     name              := s"zio-apache-parquet-$projectName",
 9 |     organization      := "me.mnedokushev",
10 |     libraryDependencies ++= betterMonadicFor(scalaVersion.value),
11 |     semanticdbEnabled := true,
12 |     semanticdbVersion := scalafixSemanticdb.revision,
13 |     Test / fork       := true,
14 |     Test / unmanagedSourceDirectories ++= crossVersionSources(scalaVersion.value, "test", baseDirectory.value),
15 |     Test / unmanagedSourceDirectories ++= crossVersionSources(scalaVersion.value, "main", baseDirectory.value),
16 |     libraryDependencies ++= {
17 |       CrossVersion.partialVersion(scalaVersion.value) match {
18 |         case Some((2, _)) =>
19 |           Seq(Dep.scalaReflect.value)
20 |         case _            => Seq.empty
21 |       }
22 |     }
23 |   )
24 | 
25 |   val Scala212 = "2.12.21"
26 |   val Scala213 = "2.13.18"
27 |   val Scala3   = "3.3.6"
28 | 
29 |   private def betterMonadicFor(scalaVersion: String) =
30 |     CrossVersion.partialVersion(scalaVersion) match {
31 |       case Some((2, _)) => Seq(compilerPlugin("com.olegpy" %% "better-monadic-for" % "0.3.1"))
32 |       case _            => Seq()
33 |     }
34 | 
35 |   def crossVersionSources(scalaVersion: String, conf: String, baseDirectory: File): List[File] = {
36 |     val versions = CrossVersion.partialVersion(scalaVersion) match {
37 |       case Some((2, 13)) =>
38 |         List("2", "2.13", "2.13+")
39 |       case Some((3, _))  =>
40 |         List("2.13+", "3")
41 |       case _             =>
42 |         List.empty
43 |     }
44 | 
45 |     for {
46 |       version <- "scala" :: versions.map("scala-" + _)
47 |       file     = baseDirectory / "src" / conf / version if file.exists
48 |     } yield file
49 |   }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/project/Dep.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import sbt.Keys.scalaVersion
 3 | 
 4 | object Dep {
 5 | 
 6 |   object V {
 7 |     val zio                   = "2.1.23"
 8 |     val zioSchema             = "1.7.5"
 9 |     val scalaCollectionCompat = "2.13.0"
10 |     val apacheParquet         = "1.16.0"
11 |     val apacheHadoop          = "3.4.2"
12 |   }
13 | 
14 |   object O {
15 |     val apacheParquet    = "org.apache.parquet"
16 |     val apacheHadoop     = "org.apache.hadoop"
17 |     val zio              = "dev.zio"
18 |     val scalaLang        = "org.scala-lang"
19 |     val scalaLangModules = s"$scalaLang.modules"
20 |   }
21 | 
22 |   lazy val zio                 = O.zio %% "zio"                   % V.zio
23 |   lazy val zioSchema           = O.zio %% "zio-schema"            % V.zioSchema
24 |   lazy val zioSchemaDerivation = O.zio %% "zio-schema-derivation" % V.zioSchema
25 |   lazy val zioTest             = O.zio %% "zio-test"              % V.zio
26 |   lazy val zioTestSbt          = O.zio %% "zio-test-sbt"          % V.zio
27 | 
28 |   lazy val parquetHadoop = O.apacheParquet % "parquet-hadoop" % V.apacheParquet
29 |   lazy val parquetColumn = O.apacheParquet % "parquet-column" % V.apacheParquet
30 | 
31 |   lazy val hadoopCommon = O.apacheHadoop % "hadoop-common" % V.apacheHadoop
32 |   lazy val hadoopMapred = O.apacheHadoop % "hadoop-mapred" % "0.22.0"
33 | 
34 |   lazy val scalaCollectionCompat = O.scalaLangModules %% "scala-collection-compat" % V.scalaCollectionCompat
35 | 
36 |   lazy val scalaReflect = Def.setting("org.scala-lang" % "scala-reflect" % scalaVersion.value % "provided")
37 | 
38 |   lazy val core = Seq(
39 |     zio,
40 |     zioSchema,
41 |     zioSchemaDerivation,
42 |     scalaCollectionCompat,
43 |     parquetHadoop,
44 |     parquetColumn,
45 |     zioTest    % Test,
46 |     zioTestSbt % Test
47 |   )
48 | 
49 |   lazy val hadoop = Seq(
50 |     hadoopCommon,
51 |     hadoopMapred,
52 |     zioTest    % Test,
53 |     zioTestSbt % Test
54 |   )
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-3/me/mnedokushev/zio/apache/parquet/core/filter/internal/SanitizeOptionalsMacro.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.filter.{ CompiledPredicate, Predicate }
 4 | import org.apache.parquet.filter2.predicate.FilterPredicate
 5 | 
 6 | import scala.quoted.*
 7 | 
 8 | object SanitizeOptionalsMacro {
 9 | 
10 |   // TODO: tests
11 |   def sanitizeImpl[A: Type](predicate: Expr[Predicate[A]])(using Quotes): Expr[CompiledPredicate] = {
12 |     import quotes.reflect.*
13 | 
14 |     // Example of a type representation of A type:
15 |     // AndType(
16 |     //   AndType(
17 |     //     TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "scala")), "Predef"), "String"),
18 |     //     AppliedType(
19 |     //       TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "<root>")), "scala"), "Option"),
20 |     //       List(
21 |     //         TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "<root>")), "scala"), "Int")
22 |     //       )
23 |     //     )
24 |     //   ),
25 |     //   TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "<root>")), "scala"), "Int")
26 |     // )
27 |     // TODO: rewrite using limited stack for safety
28 |     def containsOptionalValue(tpe: TypeRepr): Boolean =
29 |       tpe match {
30 |         case AndType(a, b)       =>
31 |           containsOptionalValue(a) || containsOptionalValue(b)
32 |         case AppliedType(tpe, _) =>
33 |           containsOptionalValue(tpe)
34 |         case TypeRef(_, name)    =>
35 |           List("Option", "Some", "None").contains(name)
36 |       }
37 | 
38 |     if (containsOptionalValue(TypeRepr.of[A]))
39 |       report.errorAndAbort(
40 |         s"""
41 |            | The use of optional columns in filter predicate is prohibited. Please, use .nullable:
42 |            |   column.nullable > 3
43 |            | Predicate tree: ${predicate.show}
44 |         """.stripMargin
45 |       )
46 |     else
47 |       '{ _root_.me.mnedokushev.zio.apache.parquet.core.filter.Predicate.compile0($predicate) }
48 | 
49 |   }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Column.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter
 2 | 
 3 | trait Column[A] { self =>
 4 | 
 5 |   type Identity
 6 | 
 7 |   val path: String
 8 |   val typeTag: TypeTag[A]
 9 | 
10 |   // TODO: overcome the limitation of scala macros for having a better API
11 |   // I found out the compiler throws an error that macro is not found as
12 |   // the macro itself depends on Column. The only option is to move the definition
13 |   // of "concat" outside the Column class.
14 |   // def /[B](child: Column[B]): Column[B] =
15 |   //   ColumnPathConcatMacro.concatImpl[A, B]
16 | 
17 |   def >(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] =
18 |     Predicate.Binary(self, value, Operator.Binary.GreaterThen())
19 | 
20 |   def <(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] =
21 |     Predicate.Binary(self, value, Operator.Binary.LessThen())
22 | 
23 |   def >=(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] =
24 |     Predicate.Binary(self, value, Operator.Binary.GreaterEq())
25 | 
26 |   def <=(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] =
27 |     Predicate.Binary(self, value, Operator.Binary.LessEq())
28 | 
29 |   def ===(value: A)(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] =
30 |     Predicate.Binary(self, value, Operator.Binary.Eq())
31 | 
32 |   def =!=(value: A)(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] =
33 |     Predicate.Binary(self, value, Operator.Binary.NotEq())
34 | 
35 |   def in(values: Set[A])(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] =
36 |     Predicate.BinarySet(self, values, Operator.Binary.Set.In())
37 | 
38 |   def notIn(values: Set[A])(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] =
39 |     Predicate.BinarySet(self, values, Operator.Binary.Set.NotIn())
40 | 
41 | }
42 | 
43 | object Column {
44 | 
45 |   final case class Named[A: TypeTag, Identity0](path: String) extends Column[A] {
46 |     override type Identity = Identity0
47 |     override val typeTag: TypeTag[A] = implicitly[TypeTag[A]]
48 |   }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/internal/SanitizeOptionalsMacro.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.filter.Predicate
 4 | 
 5 | import scala.reflect.macros.blackbox
 6 | 
 7 | class SanitizeOptionalsMacro(val c: blackbox.Context) extends MacroUtils(c) {
 8 |   import c.universe._
 9 | 
10 |   def sanitizeImpl[A](predicate: Expr[Predicate[A]])(ptt: c.WeakTypeTag[A]): Tree = {
11 | 
12 |     // Example of a tree for A type:
13 |     // RefinedType(
14 |     //   List(
15 |     //     RefinedType(
16 |     //       List(
17 |     //         TypeRef(
18 |     //           ThisType(java.lang),
19 |     //           java.lang.String,
20 |     //           List()
21 |     //         ),
22 |     //         TypeRef(
23 |     //           ThisType(scala),
24 |     //           scala.Option,
25 |     //           List(
26 |     //             TypeRef(
27 |     //               ThisType(scala),
28 |     //               scala.Int,
29 |     //               List()
30 |     //             )
31 |     //           )
32 |     //         )
33 |     //       ),
34 |     //       Scope()
35 |     //     ),
36 |     //     TypeRef(ThisType(scala), scala.Int, List())
37 |     //   ),
38 |     //   Scope()
39 |     // )
40 |     // TODO: rewrite using limited stack for safety
41 |     def containsOptionalValue(tpe: Type): Boolean =
42 |       tpe match {
43 |         case RefinedType(tpes, _) =>
44 |           tpes.exists(containsOptionalValue)
45 |         case TypeRef(_, sym, _)   =>
46 |           List("scala.Option", "scala.Some", "scala.None").contains(sym.fullName)
47 |         case _                    =>
48 |           false
49 |       }
50 | 
51 |     if (containsOptionalValue(ptt.tpe))
52 |       c.abort(
53 |         c.enclosingPosition,
54 |         s"""
55 |            | The use of optional columns in filter predicate is prohibited. Please, use .nullable:
56 |            |   column.nullable > 3
57 |            | Predicate: ${predicate.tree}
58 |         """.stripMargin
59 |       )
60 |     else
61 |       q"_root_.me.mnedokushev.zio.apache.parquet.core.filter.Predicate.compile0($predicate)"
62 |   }
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Operator.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.core.filter
 2 | 
 3 | sealed trait Operator
 4 | 
 5 | object Operator {
 6 | 
 7 |   sealed trait Binary[A] extends Operator {
 8 |     def operatorSupport: OperatorSupport[A]
 9 |   }
10 | 
11 |   object Binary {
12 |     final case class Eq[A: OperatorSupport.EqNotEq]()       extends Binary[A] {
13 |       override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]]
14 |     }
15 |     final case class NotEq[A: OperatorSupport.EqNotEq]()    extends Binary[A] {
16 |       override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]]
17 |     }
18 |     final case class LessThen[A: OperatorSupport.LtGt]()    extends Binary[A] {
19 |       override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]]
20 |     }
21 |     final case class LessEq[A: OperatorSupport.LtGt]()      extends Binary[A] {
22 |       override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]]
23 |     }
24 |     final case class GreaterThen[A: OperatorSupport.LtGt]() extends Binary[A] {
25 |       override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]]
26 |     }
27 |     final case class GreaterEq[A: OperatorSupport.LtGt]()   extends Binary[A] {
28 |       override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]]
29 |     }
30 | 
31 |     sealed trait Set[A] extends Binary[A]
32 | 
33 |     object Set {
34 | 
35 |       final case class In[A: OperatorSupport.EqNotEq]()    extends Set[A] {
36 |         override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]]
37 |       }
38 |       final case class NotIn[A: OperatorSupport.EqNotEq]() extends Set[A] {
39 |         override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]]
40 |       }
41 | 
42 |     }
43 | 
44 |   }
45 | 
46 |   sealed trait Unary[A] extends Operator
47 | 
48 |   object Unary {
49 |     final case class Not[A]() extends Unary[A]
50 |   }
51 | 
52 |   sealed trait Logical[A, B] extends Operator
53 | 
54 |   object Logical {
55 |     final case class And[A, B]() extends Logical[A, B]
56 |     final case class Or[A, B]()  extends Logical[A, B]
57 |   }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/test/scala/me/mnedokushev/zio/apache/parquet/hadoop/ValueConverterSpec.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.hadoop
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.codec.{
 4 |   SchemaEncoder,
 5 |   SchemaEncoderDeriver,
 6 |   ValueDecoder,
 7 |   ValueDecoderDeriver
 8 | }
 9 | import me.mnedokushev.zio.apache.parquet.hadoop.ValueConverterSpec.FoodProductName.ProductName
10 | import zio.schema.{ Derive, DeriveSchema, Schema }
11 | import zio.test.{ Spec, TestEnvironment, ZIOSpecDefault, _ }
12 | import zio.{ Scope, _ }
13 | 
14 | object ValueConverterSpec extends ZIOSpecDefault {
15 | 
16 |   val dataPath =
17 |     Path(getClass.getResource("/food.parquet").toURI)
18 | 
19 |   final case class FoodProductName(product_name: Option[List[Option[ProductName]]])
20 | 
21 |   object FoodProductName {
22 | 
23 |     final case class ProductName(
24 |       lang: Option[String],
25 |       text: Option[String]
26 |     )
27 | 
28 |     implicit val schema: Schema[FoodProductName]               =
29 |       DeriveSchema.gen[FoodProductName]
30 |     implicit val schemaEncoder: SchemaEncoder[FoodProductName] =
31 |       Derive.derive[SchemaEncoder, FoodProductName](SchemaEncoderDeriver.default)
32 |     implicit val valueDecoder: ValueDecoder[FoodProductName]   =
33 |       Derive.derive[ValueDecoder, FoodProductName](ValueDecoderDeriver.default)
34 |   }
35 | 
36 |   final case class FoodBrandTags(brands_tags: Option[List[Option[String]]])
37 | 
38 |   object FoodBrandTags {
39 |     implicit val schema: Schema[FoodBrandTags]               =
40 |       DeriveSchema.gen[FoodBrandTags]
41 |     implicit val schemaEncoder: SchemaEncoder[FoodBrandTags] =
42 |       Derive.derive[SchemaEncoder, FoodBrandTags](SchemaEncoderDeriver.default)
43 |     implicit val valueDecoder: ValueDecoder[FoodBrandTags]   =
44 |       Derive.derive[ValueDecoder, FoodBrandTags](ValueDecoderDeriver.default)
45 |   }
46 | 
47 |   // TODO: add more test cases
48 |   override def spec: Spec[TestEnvironment & Scope, Any] =
49 |     suite("ValueConvertedSpec")(
50 |       test("read list of records") {
51 |         for {
52 |           reader <- ZIO.service[ParquetReader[FoodProductName]]
53 |           result <- reader.readChunk(dataPath)
54 |         } yield assertTrue(result.size == 10)
55 |       }.provide(ParquetReader.projected[FoodProductName]()),
56 |       test("read list of strings") {
57 |         for {
58 |           reader <- ZIO.service[ParquetReader[FoodBrandTags]]
59 |           result <- reader.readChunk(dataPath)
60 |         } yield assertTrue(result.size == 10)
61 |       }.provide(ParquetReader.projected[FoodBrandTags]())
62 |     )
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/.github/workflows/clean.yml:
--------------------------------------------------------------------------------
 1 | # This file was automatically generated by sbt-github-actions using the
 2 | # githubWorkflowGenerate task. You should add and commit this file to
 3 | # your git repository. It goes without saying that you shouldn't edit
 4 | # this file by hand! Instead, if you wish to make changes, you should
 5 | # change your sbt build configuration to revise the workflow description
 6 | # to meet your needs, then regenerate this file.
 7 | 
 8 | name: Clean
 9 | 
10 | on: push
11 | 
12 | permissions:
13 |   actions: write
14 | 
15 | jobs:
16 |   delete-artifacts:
17 |     name: Delete Artifacts
18 |     runs-on: ubuntu-latest
19 |     env:
20 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |     steps:
22 |       - name: Delete artifacts
23 |         shell: bash {0}
24 |         run: |
25 |           # Customize those three lines with your repository and credentials:
26 |           REPO=${GITHUB_API_URL}/repos/${{ github.repository }}
27 | 
28 |           # A shortcut to call GitHub API.
29 |           ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; }
30 | 
31 |           # A temporary file which receives HTTP response headers.
32 |           TMPFILE=$(mktemp)
33 | 
34 |           # An associative array, key: artifact name, value: number of artifacts of that name.
35 |           declare -A ARTCOUNT
36 | 
37 |           # Process all artifacts on this repository, loop on returned "pages".
38 |           URL=$REPO/actions/artifacts
39 |           while [[ -n "$URL" ]]; do
40 | 
41 |             # Get current page, get response headers in a temporary file.
42 |             JSON=$(ghapi --dump-header $TMPFILE "$URL")
43 | 
44 |             # Get URL of next page. Will be empty if we are at the last page.
45 |             URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*<//' -e 's/>.*//')
46 |             rm -f $TMPFILE
47 | 
48 |             # Number of artifacts on this page:
49 |             COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') ))
50 | 
51 |             # Loop on all artifacts on this page.
52 |             for ((i=0; $i < $COUNT; i++)); do
53 | 
54 |               # Get name of artifact and count instances of this name.
55 |               name=$(jq <<<$JSON -r ".artifacts[$i].name?")
56 |               ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1))
57 | 
58 |               id=$(jq <<<$JSON -r ".artifacts[$i].id?")
59 |               size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") ))
60 |               printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size
61 |               ghapi -X DELETE $REPO/actions/artifacts/$id
62 |             done
63 |           done
64 | 


--------------------------------------------------------------------------------
/docs/scala-cli/Filtering.scala:
--------------------------------------------------------------------------------
 1 | //> using scala "3.7.4"
 2 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.3
 3 | 
 4 | import zio.*
 5 | import zio.schema.*
 6 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 7 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path }
 8 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax.*
 9 | import me.mnedokushev.zio.apache.parquet.core.filter.*
10 | 
11 | import java.nio.file.Files
12 | 
13 | object Filtering extends ZIOAppDefault:
14 | 
15 |   case class MyRecord(a: Int, b: String, c: Option[Long])
16 | 
17 |   object MyRecord:
18 |     // We need to provide field names using singleton types
19 |     given Schema.CaseClass3.WithFields["a", "b", "c", Int, String, Option[Long], MyRecord] =
20 |       DeriveSchema.gen[MyRecord]
21 |     given SchemaEncoder[MyRecord]                                                          =
22 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default)
23 |     given ValueEncoder[MyRecord]                                                           =
24 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default)
25 |     given ValueDecoder[MyRecord]                                                           =
26 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default)
27 |     given TypeTag[MyRecord]                                                                =
28 |       Derive.derive[TypeTag, MyRecord](TypeTagDeriver.default)
29 | 
30 |     // Define accessors to use them later in the filter predicate.
31 |     // You can give any names to the accessors as we demonstrate here.
32 |     val (id, name, age) = Filter[MyRecord].columns
33 | 
34 |   val data =
35 |     Chunk(
36 |       MyRecord(1, "bob", Some(10L)),
37 |       MyRecord(2, "bob", Some(12L)),
38 |       MyRecord(3, "alice", Some(13L)),
39 |       MyRecord(4, "john", None)
40 |     )
41 | 
42 |   val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet"
43 | 
44 |   override def run =
45 |     (
46 |       for {
47 |         writer   <- ZIO.service[ParquetWriter[MyRecord]]
48 |         reader   <- ZIO.service[ParquetReader[MyRecord]]
49 |         _        <- writer.writeChunk(recordsFile, data)
50 |         fromFile <- reader.readChunkFiltered(
51 |                       recordsFile,
52 |                       filter(
53 |                         MyRecord.id > 1 `and` (
54 |                           MyRecord.name =!= "bob" `or`
55 |                             // Use .nullable syntax for optional fields.
56 |                             MyRecord.age.nullable > 10L
57 |                         )
58 |                       )
59 |                     )
60 |         _        <- Console.printLine(fromFile)
61 |       } yield ()
62 |     ).provide(
63 |       ParquetWriter.configured[MyRecord](),
64 |       ParquetReader.configured[MyRecord]()
65 |     )
66 |   // Outputs:
67 |   // Chunk(MyRecord(2,bob,Some(12)),MyRecord(3,alice,Some(13)),MyRecord(4,john,None))
68 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/test/scala/me/mnedokushev/zio/apache/parquet/hadoop/ParquetIOSpec.scala:
--------------------------------------------------------------------------------
 1 | package me.mnedokushev.zio.apache.parquet.hadoop
 2 | 
 3 | import me.mnedokushev.zio.apache.parquet.core.Fixtures._
 4 | import me.mnedokushev.zio.apache.parquet.core.filter.Filter
 5 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax._
 6 | import zio._
 7 | import zio.stream._
 8 | import zio.test.Assertion._
 9 | import zio.test.TestAspect._
10 | import zio.test._
11 | 
12 | import java.nio.file.Files
13 | 
14 | object ParquetIOSpec extends ZIOSpecDefault {
15 | 
16 |   val tmpDir     = Path(Files.createTempDirectory("zio-apache-parquet"))
17 |   val tmpFile    = "parquet-writer-spec.parquet"
18 |   val tmpCrcPath = tmpDir / ".parquet-writer-spec.parquet.crc"
19 |   val tmpPath    = tmpDir / tmpFile
20 | 
21 |   override def spec: Spec[TestEnvironment & Scope, Any] =
22 |     suite("ParquetIOSpec")(
23 |       test("write and read - chunk") {
24 |         val payload = Chunk(
25 |           MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)),
26 |           MyRecordIO(2, "bar", Some(3L), List.empty, Map("third" -> 3))
27 |         )
28 | 
29 |         for {
30 |           writer <- ZIO.service[ParquetWriter[MyRecordIO]]
31 |           reader <- ZIO.service[ParquetReader[MyRecordIO]]
32 |           _      <- writer.writeChunk(tmpPath, payload)
33 |           result <- reader.readChunk(tmpPath)
34 |         } yield assertTrue(result == payload)
35 |       } @@ after(cleanTmpFile(tmpDir)),
36 |       test("write and read - stream") {
37 |         val payload = Chunk(
38 |           MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)),
39 |           MyRecordIO(2, "bar", Some(3L), List.empty, Map("third" -> 3))
40 |         )
41 | 
42 |         for {
43 |           writer       <- ZIO.service[ParquetWriter[MyRecordIO]]
44 |           reader       <- ZIO.service[ParquetReader[MyRecordIO]]
45 |           _            <- writer.writeStream(tmpPath, ZStream.fromChunk(payload))
46 |           resultStream <- ZIO.scoped[Any](reader.readStream(tmpPath).runCollect)
47 |         } yield assertTrue(resultStream == payload)
48 |       } @@ after(cleanTmpFile(tmpDir)),
49 |       test("write full and read projected") {
50 |         val payload          = Chunk(
51 |           MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)),
52 |           MyRecordIO(2, "bar", Some(3L), List.empty, Map("third" -> 3))
53 |         )
54 |         val projectedPayload = payload.map { r =>
55 |           MyProjectedRecordIO(r.a, r.c, r.d, r.e)
56 |         }
57 | 
58 |         for {
59 |           writer <- ZIO.service[ParquetWriter[MyRecordIO]]
60 |           reader <- ZIO.service[ParquetReader[MyProjectedRecordIO]]
61 |           _      <- writer.writeChunk(tmpPath, payload)
62 |           result <- reader.readChunk(tmpPath)
63 |         } yield assertTrue(result == projectedPayload)
64 |       } @@ after(cleanTmpFile(tmpDir)),
65 |       test("write and read with filter") {
66 |         val payload             = Chunk(
67 |           MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)),
68 |           MyRecordIO(2, "foo", None, List(1, 2), Map.empty),
69 |           MyRecordIO(3, "bar", Some(3L), List.empty, Map("third" -> 3)),
70 |           MyRecordIO(4, "baz", None, List.empty, Map("fourth" -> 3))
71 |         )
72 |         val (id, name, _, _, _) = Filter[MyRecordIO].columns
73 | 
74 |         for {
75 |           writer <- ZIO.service[ParquetWriter[MyRecordIO]]
76 |           reader <- ZIO.service[ParquetReader[MyRecordIO]]
77 |           _      <- writer.writeChunk(tmpPath, payload)
78 |           result <- reader.readChunkFiltered(tmpPath, filter(id > 1 `and` name =!= "foo"))
79 |         } yield assertTrue(result.size == 2) && assert(result)(equalTo(payload.drop(2)))
80 |       } @@ after(cleanTmpFile(tmpDir))
81 |     ).provide(
82 |       ParquetWriter.configured[MyRecordIO](),
83 |       ParquetReader.configured[MyRecordIO](),
84 |       ParquetReader.projected[MyProjectedRecordIO]()
85 |     ) @@ sequential
86 | 
87 |   private def cleanTmpFile(path: Path) =
88 |     for {
89 |       _ <- ZIO.attemptBlockingIO(Files.delete(tmpCrcPath.toJava))
90 |       _ <- ZIO.attemptBlockingIO(Files.delete(tmpPath.toJava))
91 |       _ <- ZIO.attemptBlockingIO(Files.delete(path.toJava))
92 |     } yield ()
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTagDeriver.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.filter
  2 | 
  3 | import zio.Chunk
  4 | import zio.schema.{ Deriver, Schema, StandardType }
  5 | 
  6 | object TypeTagDeriver {
  7 | 
  8 |   val default: Deriver[TypeTag] = new Deriver[TypeTag] {
  9 | 
 10 |     override def deriveRecord[A](
 11 |       record: Schema.Record[A],
 12 |       fields: => Chunk[Deriver.WrappedF[TypeTag, ?]],
 13 |       summoned: => Option[TypeTag[A]]
 14 |     ): TypeTag[A] =
 15 |       TypeTag.Record(
 16 |         record.fields
 17 |           .map(_.name.toString)
 18 |           .zip(fields.map(_.unwrap))
 19 |           .toMap
 20 |       )
 21 | 
 22 |     override def deriveEnum[A](
 23 |       `enum`: Schema.Enum[A],
 24 |       cases: => Chunk[Deriver.WrappedF[TypeTag, ?]],
 25 |       summoned: => Option[TypeTag[A]]
 26 |     ): TypeTag[A] = {
 27 |       val casesMap = `enum`.cases.map { case0 =>
 28 |         case0.schema.asInstanceOf[Schema.CaseClass0[A]].defaultConstruct() -> case0.id
 29 |       }.toMap
 30 | 
 31 |       TypeTag.enum0(casesMap)
 32 |     }
 33 | 
 34 |     override def derivePrimitive[A](
 35 |       st: StandardType[A],
 36 |       summoned: => Option[TypeTag[A]]
 37 |     ): TypeTag[A] =
 38 |       st match {
 39 |         case StandardType.StringType         => TypeTag.string
 40 |         case StandardType.BoolType           => TypeTag.boolean
 41 |         case StandardType.ByteType           => TypeTag.byte
 42 |         case StandardType.ShortType          => TypeTag.short
 43 |         case StandardType.IntType            => TypeTag.int
 44 |         case StandardType.LongType           => TypeTag.long
 45 |         case StandardType.FloatType          => TypeTag.float
 46 |         case StandardType.DoubleType         => TypeTag.double
 47 |         case StandardType.BinaryType         => TypeTag.binary
 48 |         case StandardType.CharType           => TypeTag.char
 49 |         case StandardType.UUIDType           => TypeTag.uuid
 50 |         case StandardType.CurrencyType       => TypeTag.currency
 51 |         case StandardType.BigDecimalType     => TypeTag.bigDecimal
 52 |         case StandardType.BigIntegerType     => TypeTag.bigInteger
 53 |         case StandardType.DayOfWeekType      => TypeTag.dayOfWeek
 54 |         case StandardType.MonthType          => TypeTag.month
 55 |         case StandardType.MonthDayType       => TypeTag.monthDay
 56 |         case StandardType.PeriodType         => TypeTag.period
 57 |         case StandardType.YearType           => TypeTag.year
 58 |         case StandardType.YearMonthType      => TypeTag.yearMonth
 59 |         case StandardType.ZoneIdType         => TypeTag.zoneId
 60 |         case StandardType.ZoneOffsetType     => TypeTag.zoneOffset
 61 |         case StandardType.DurationType       => TypeTag.duration
 62 |         case StandardType.InstantType        => TypeTag.instant
 63 |         case StandardType.LocalDateType      => TypeTag.localDate
 64 |         case StandardType.LocalTimeType      => TypeTag.localTime
 65 |         case StandardType.LocalDateTimeType  => TypeTag.localDateTime
 66 |         case StandardType.OffsetTimeType     => TypeTag.offsetTime
 67 |         case StandardType.OffsetDateTimeType => TypeTag.offsetDateTime
 68 |         case StandardType.ZonedDateTimeType  => TypeTag.zonedDateTime
 69 |         case StandardType.UnitType           => TypeTag.dummy[A]
 70 |       }
 71 | 
 72 |     override def deriveOption[A](
 73 |       option: Schema.Optional[A],
 74 |       inner: => TypeTag[A],
 75 |       summoned: => Option[TypeTag[Option[A]]]
 76 |     ): TypeTag[Option[A]] =
 77 |       TypeTag.optional[A](using inner)
 78 | 
 79 |     override def deriveSequence[C[_], A](
 80 |       sequence: Schema.Sequence[C[A], A, ?],
 81 |       inner: => TypeTag[A],
 82 |       summoned: => Option[TypeTag[C[A]]]
 83 |     ): TypeTag[C[A]] =
 84 |       TypeTag.dummy[C[A]]
 85 | 
 86 |     override def deriveMap[K, V](
 87 |       map: Schema.Map[K, V],
 88 |       key: => TypeTag[K],
 89 |       value: => TypeTag[V],
 90 |       summoned: => Option[TypeTag[Map[K, V]]]
 91 |     ): TypeTag[Map[K, V]] =
 92 |       TypeTag.dummy[Map[K, V]]
 93 | 
 94 |     override def deriveTransformedRecord[A, B](
 95 |       record: Schema.Record[A],
 96 |       transform: Schema.Transform[A, B, ?],
 97 |       fields: => Chunk[Deriver.WrappedF[TypeTag, ?]],
 98 |       summoned: => Option[TypeTag[B]]
 99 |     ): TypeTag[B] =
100 |       TypeTag.dummy[B]
101 | 
102 |   }.cached
103 | 
104 |   val summoned: Deriver[TypeTag] = default.autoAcceptSummoned
105 | 
106 | }
107 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Predicate.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.filter
  2 | 
  3 | import org.apache.parquet.filter2.predicate.{ FilterApi, FilterPredicate, Operators }
  4 | import zio.prelude._
  5 | 
  6 | sealed trait Predicate[A] { self =>
  7 | 
  8 |   def and[B](other: Predicate[B]): Predicate[A & B] =
  9 |     Predicate.Logical[A, B](self, other, Operator.Logical.And[A, B]())
 10 | 
 11 |   def or[B](other: Predicate[B]): Predicate[A & B] =
 12 |     Predicate.Logical[A, B](self, other, Operator.Logical.Or[A, B]())
 13 | 
 14 | }
 15 | 
 16 | object Predicate {
 17 | 
 18 |   private[filter] trait Syntax {
 19 |     def not[A](pred: Predicate[A]) =
 20 |       Predicate.Unary(pred, Operator.Unary.Not[A]())
 21 |   }
 22 | 
 23 |   final case class Binary[A](column: Column[A], value: A, op: Operator.Binary[A]) extends Predicate[A]
 24 | 
 25 |   final case class BinarySet[A](column: Column[A], values: Set[A], op: Operator.Binary.Set[A]) extends Predicate[A]
 26 | 
 27 |   final case class Unary[A](predicate: Predicate[A], op: Operator.Unary[A]) extends Predicate[A]
 28 | 
 29 |   final case class Logical[A, B](left: Predicate[A], right: Predicate[B], op: Operator.Logical[A, B])
 30 |       extends Predicate[A & B]
 31 | 
 32 |   private[zio] def compile0[A](predicate: Predicate[A]): Either[String, FilterPredicate] = {
 33 | 
 34 |     def error(op: Operator) =
 35 |       Left(s"Operator $op is not supported by $predicate")
 36 | 
 37 |     def binarySet[T <: Comparable[T], C <: Operators.Column[T] & Operators.SupportsEqNotEq](
 38 |       column: C,
 39 |       values: java.util.Set[T],
 40 |       op: Operator.Binary.Set[?]
 41 |     ) =
 42 |       op match {
 43 |         case Operator.Binary.Set.In()    =>
 44 |           Right(FilterApi.in(column, values))
 45 |         case Operator.Binary.Set.NotIn() =>
 46 |           Right(FilterApi.notIn(column, values))
 47 |       }
 48 | 
 49 |     predicate match {
 50 |       case Predicate.Unary(predicate0, op)         =>
 51 |         op match {
 52 |           case Operator.Unary.Not() =>
 53 |             compile0(predicate0).map(FilterApi.not)
 54 |         }
 55 |       case Predicate.Logical(left, right, op)      =>
 56 |         (compile0(left) <*> compile0(right)).map { case (left0, right0) =>
 57 |           op match {
 58 |             case Operator.Logical.And() =>
 59 |               FilterApi.and(left0, right0)
 60 |             case Operator.Logical.Or()  =>
 61 |               FilterApi.or(left0, right0)
 62 |           }
 63 |         }
 64 |       case Predicate.Binary(column, value, op)     =>
 65 |         column.typeTag match {
 66 |           case typeTag: TypeTag.EqNotEq[_] =>
 67 |             val typeTag0 = typeTag.cast[A]
 68 |             val column0  = typeTag0.column(column.path)
 69 |             val value0   = typeTag0.value(value)
 70 | 
 71 |             op match {
 72 |               case Operator.Binary.Eq()    =>
 73 |                 Right(FilterApi.eq(column0, value0))
 74 |               case Operator.Binary.NotEq() =>
 75 |                 Right(FilterApi.notEq(column0, value0))
 76 |               case _                       =>
 77 |                 error(op)
 78 |             }
 79 |           case typeTag: TypeTag.LtGt[_]    =>
 80 |             val typeTag0 = typeTag.cast[A]
 81 |             val column0  = typeTag0.column(column.path)
 82 |             val value0   = typeTag0.value(value)
 83 | 
 84 |             op match {
 85 |               case Operator.Binary.Eq()          =>
 86 |                 Right(FilterApi.eq(column0, value0))
 87 |               case Operator.Binary.NotEq()       =>
 88 |                 Right(FilterApi.notEq(column0, value0))
 89 |               case Operator.Binary.LessThen()    =>
 90 |                 Right(FilterApi.lt(column0, value0))
 91 |               case Operator.Binary.LessEq()      =>
 92 |                 Right(FilterApi.ltEq(column0, value0))
 93 |               case Operator.Binary.GreaterThen() =>
 94 |                 Right(FilterApi.gt(column0, value0))
 95 |               case Operator.Binary.GreaterEq()   =>
 96 |                 Right(FilterApi.gtEq(column0, value0))
 97 |               case _                             =>
 98 |                 error(op)
 99 |             }
100 |           case _                           =>
101 |             error(op)
102 |         }
103 |       case Predicate.BinarySet(column, values, op) =>
104 |         column.typeTag match {
105 |           case typeTag: TypeTag.EqNotEq[_] =>
106 |             val typeTag0 = typeTag.cast[A]
107 |             val column0  = typeTag0.column(column.path)
108 |             val values0  = typeTag0.values(values)
109 | 
110 |             binarySet(column0, values0, op)
111 |           case typeTag: TypeTag.LtGt[_]    =>
112 |             val typeTag0 = typeTag.cast[A]
113 |             val column0  = typeTag0.column(column.path)
114 |             val values0  = typeTag0.values(values)
115 | 
116 |             binarySet(column0, values0, op)
117 |           case _                           =>
118 |             error(op)
119 |         }
120 |     }
121 | 
122 |   }
123 | 
124 | }
125 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/ParquetWriter.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.hadoop
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Schemas
  4 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue
  5 | import me.mnedokushev.zio.apache.parquet.core.codec.{ SchemaEncoder, ValueEncoder }
  6 | import org.apache.hadoop.conf.Configuration
  7 | import org.apache.parquet.hadoop.api.{ WriteSupport => HadoopWriteSupport }
  8 | import org.apache.parquet.hadoop.metadata.CompressionCodecName
  9 | import org.apache.parquet.hadoop.{ ParquetFileWriter, ParquetWriter => HadoopParquetWriter }
 10 | import org.apache.parquet.io.OutputFile
 11 | import org.apache.parquet.schema.MessageType
 12 | import zio._
 13 | import zio.schema.Schema
 14 | import zio.stream._
 15 | 
 16 | trait ParquetWriter[-A <: Product] {
 17 | 
 18 |   def writeChunk(path: Path, data: Chunk[A]): Task[Unit]
 19 | 
 20 |   def writeStream[R](path: Path, data: ZStream[R, Throwable, A]): RIO[R, Unit]
 21 | 
 22 | }
 23 | 
 24 | final class ParquetWriterLive[A <: Product](
 25 |   writeMode: ParquetFileWriter.Mode,
 26 |   compressionCodecName: CompressionCodecName,
 27 |   dictionaryEncodingEnabled: Boolean,
 28 |   dictionaryPageSize: Int,
 29 |   maxPaddingSize: Int,
 30 |   pageSize: Int,
 31 |   rowGroupSize: Long,
 32 |   validationEnabled: Boolean,
 33 |   hadoopConf: Configuration
 34 | )(implicit schema: Schema[A], schemaEncoder: SchemaEncoder[A], encoder: ValueEncoder[A], tag: Tag[A])
 35 |     extends ParquetWriter[A] {
 36 | 
 37 |   override def writeChunk(path: Path, data: Chunk[A]): Task[Unit] =
 38 |     ZIO.scoped[Any](
 39 |       for {
 40 |         writer <- build(path)
 41 |         _      <- ZIO.foreachDiscard(data)(writeSingle(writer, _))
 42 |       } yield ()
 43 |     )
 44 | 
 45 |   override def writeStream[R](path: Path, stream: ZStream[R, Throwable, A]): RIO[R, Unit] =
 46 |     ZIO.scoped[R](
 47 |       for {
 48 |         writer <- build(path)
 49 |         _      <- stream.runForeach(writeSingle(writer, _))
 50 |       } yield ()
 51 |     )
 52 | 
 53 |   private def writeSingle(writer: HadoopParquetWriter[RecordValue], value: A): Task[Unit] =
 54 |     for {
 55 |       record <- encoder.encodeZIO(value)
 56 |       _      <- ZIO.attemptBlockingIO(writer.write(record.asInstanceOf[RecordValue]))
 57 |     } yield ()
 58 | 
 59 |   private def build(path: Path): RIO[Scope, HadoopParquetWriter[RecordValue]] =
 60 |     for {
 61 |       schema        <- schemaEncoder.encodeZIO(schema, tag.tag.shortName, optional = false)
 62 |       messageSchema <- ZIO.attempt(Schemas.asMessageType(schema))
 63 |       outputFile    <- path.toOutputFileZIO(hadoopConf)
 64 |       builder        = new ParquetWriter.Builder(outputFile, messageSchema)
 65 |                          .withWriteMode(writeMode)
 66 |                          .withCompressionCodec(compressionCodecName)
 67 |                          .withDictionaryEncoding(dictionaryEncodingEnabled)
 68 |                          .withDictionaryPageSize(dictionaryPageSize)
 69 |                          .withMaxPaddingSize(maxPaddingSize)
 70 |                          .withPageSize(pageSize)
 71 |                          .withRowGroupSize(rowGroupSize)
 72 |                          .withValidation(validationEnabled)
 73 |                          .withConf(hadoopConf)
 74 |       writer        <- ZIO.fromAutoCloseable(ZIO.attemptBlockingIO(builder.build()))
 75 |     } yield writer
 76 | 
 77 | }
 78 | 
 79 | object ParquetWriter {
 80 | 
 81 |   final class Builder(file: OutputFile, schema: MessageType)
 82 |       extends HadoopParquetWriter.Builder[RecordValue, Builder](file) {
 83 | 
 84 |     override def self(): Builder = this
 85 | 
 86 |     override def getWriteSupport(conf: Configuration): HadoopWriteSupport[RecordValue] =
 87 |       new WriteSupport(schema, Map.empty)
 88 | 
 89 |   }
 90 | 
 91 |   def configured[A <: Product: ValueEncoder](
 92 |     writeMode: ParquetFileWriter.Mode = ParquetFileWriter.Mode.CREATE,
 93 |     compressionCodecName: CompressionCodecName = HadoopParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME,
 94 |     dictionaryEncodingEnabled: Boolean = HadoopParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
 95 |     dictionaryPageSize: Int = HadoopParquetWriter.DEFAULT_PAGE_SIZE,
 96 |     maxPaddingSize: Int = HadoopParquetWriter.MAX_PADDING_SIZE_DEFAULT,
 97 |     pageSize: Int = HadoopParquetWriter.DEFAULT_PAGE_SIZE,
 98 |     rowGroupSize: Long = HadoopParquetWriter.DEFAULT_BLOCK_SIZE,
 99 |     validationEnabled: Boolean = HadoopParquetWriter.DEFAULT_IS_VALIDATING_ENABLED,
100 |     hadoopConf: Configuration = new Configuration()
101 |   )(implicit
102 |     schema: Schema[A],
103 |     schemaEncoder: SchemaEncoder[A],
104 |     tag: Tag[A]
105 |   ): TaskLayer[ParquetWriter[A]] =
106 |     ZLayer.succeed(
107 |       new ParquetWriterLive[A](
108 |         writeMode,
109 |         compressionCodecName,
110 |         dictionaryEncodingEnabled,
111 |         dictionaryPageSize,
112 |         maxPaddingSize,
113 |         pageSize,
114 |         rowGroupSize,
115 |         validationEnabled,
116 |         hadoopConf
117 |       )
118 |     )
119 | 
120 | }
121 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/OperatorSupport.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.filter
  2 | 
  3 | import zio.{ Chunk, Duration }
  4 | 
  5 | import java.time.{
  6 |   DayOfWeek,
  7 |   Instant,
  8 |   LocalDate,
  9 |   LocalDateTime,
 10 |   LocalTime,
 11 |   Month,
 12 |   MonthDay,
 13 |   OffsetDateTime,
 14 |   OffsetTime,
 15 |   Period,
 16 |   Year,
 17 |   YearMonth,
 18 |   ZoneId,
 19 |   ZoneOffset,
 20 |   ZonedDateTime
 21 | }
 22 | import java.util.UUID
 23 | import scala.annotation.implicitNotFound
 24 | 
 25 | sealed trait OperatorSupport[A]
 26 | 
 27 | object OperatorSupport {
 28 | 
 29 |   trait Optional[A, S[_] <: OperatorSupport[?]] {
 30 |     val operatorSupport: S[A]
 31 |   }
 32 | 
 33 |   @implicitNotFound("You can't use this operator for the type ${A}")
 34 |   abstract class LtGt[A: TypeTag] extends OperatorSupport[A] {
 35 |     val typeTag: TypeTag[A] = implicitly[TypeTag[A]]
 36 |   }
 37 | 
 38 |   object LtGt {
 39 | 
 40 |     implicit def optional[A: TypeTag: LtGt]: LtGt[Option[A]] =
 41 |       new LtGt[Option[A]] with Optional[A, LtGt] {
 42 |         override val operatorSupport: LtGt[A] = implicitly[LtGt[A]]
 43 |       }
 44 | 
 45 |     implicit case object byte           extends LtGt[Byte]
 46 |     implicit case object short          extends LtGt[Short]
 47 |     implicit case object int            extends LtGt[Int]
 48 |     implicit case object long           extends LtGt[Long]
 49 |     implicit case object float          extends LtGt[Float]
 50 |     implicit case object double         extends LtGt[Double]
 51 |     implicit case object bigDecimal     extends LtGt[java.math.BigDecimal]
 52 |     implicit case object bigInteger     extends LtGt[java.math.BigInteger]
 53 |     implicit case object dayOfWeek      extends LtGt[DayOfWeek]
 54 |     implicit case object month          extends LtGt[Month]
 55 |     implicit case object monthDay       extends LtGt[MonthDay]
 56 |     implicit case object period         extends LtGt[Period]
 57 |     implicit case object year           extends LtGt[Year]
 58 |     implicit case object yearMonth      extends LtGt[YearMonth]
 59 |     implicit case object duration       extends LtGt[Duration]
 60 |     implicit case object instant        extends LtGt[Instant]
 61 |     implicit case object localDate      extends LtGt[LocalDate]
 62 |     implicit case object localTime      extends LtGt[LocalTime]
 63 |     implicit case object localDateTime  extends LtGt[LocalDateTime]
 64 |     implicit case object offsetTime     extends LtGt[OffsetTime]
 65 |     implicit case object offsetDateTime extends LtGt[OffsetDateTime]
 66 |     implicit case object zonedDateTime  extends LtGt[ZonedDateTime]
 67 | 
 68 |   }
 69 | 
 70 |   @implicitNotFound("You can't use this operator for the type ${A}")
 71 |   abstract class EqNotEq[A: TypeTag] extends OperatorSupport[A] {
 72 |     val typeTag: TypeTag[A] = implicitly[TypeTag[A]]
 73 |   }
 74 | 
 75 |   object EqNotEq {
 76 | 
 77 |     implicit def enum0[A: TypeTag]: EqNotEq[A] = new EqNotEq[A] {}
 78 | 
 79 |     implicit def optional[A: TypeTag: EqNotEq]: EqNotEq[Option[A]] =
 80 |       new EqNotEq[Option[A]] with Optional[A, EqNotEq] {
 81 |         override val operatorSupport: EqNotEq[A] = implicitly[EqNotEq[A]]
 82 |       }
 83 | 
 84 |     implicit case object string         extends EqNotEq[String]
 85 |     implicit case object boolean        extends EqNotEq[Boolean]
 86 |     implicit case object byte           extends EqNotEq[Byte]
 87 |     implicit case object short          extends EqNotEq[Short]
 88 |     implicit case object int            extends EqNotEq[Int]
 89 |     implicit case object long           extends EqNotEq[Long]
 90 |     implicit case object float          extends EqNotEq[Float]
 91 |     implicit case object double         extends EqNotEq[Double]
 92 |     implicit case object binary         extends EqNotEq[Chunk[Byte]]
 93 |     implicit case object char           extends EqNotEq[Char]
 94 |     implicit case object uuid           extends EqNotEq[UUID]
 95 |     implicit case object currency       extends EqNotEq[java.util.Currency]
 96 |     implicit case object bigDecimal     extends EqNotEq[java.math.BigDecimal]
 97 |     implicit case object bigInteger     extends EqNotEq[java.math.BigInteger]
 98 |     implicit case object dayOfWeek      extends EqNotEq[DayOfWeek]
 99 |     implicit case object month          extends EqNotEq[Month]
100 |     implicit case object monthDay       extends EqNotEq[MonthDay]
101 |     implicit case object period         extends EqNotEq[Period]
102 |     implicit case object year           extends EqNotEq[Year]
103 |     implicit case object yearMonth      extends EqNotEq[YearMonth]
104 |     implicit case object zoneId         extends EqNotEq[ZoneId]
105 |     implicit case object zoneOffset     extends EqNotEq[ZoneOffset]
106 |     implicit case object duration       extends EqNotEq[Duration]
107 |     implicit case object instant        extends EqNotEq[Instant]
108 |     implicit case object localDate      extends EqNotEq[LocalDate]
109 |     implicit case object localTime      extends EqNotEq[LocalTime]
110 |     implicit case object localDateTime  extends EqNotEq[LocalDateTime]
111 |     implicit case object offsetTime     extends EqNotEq[OffsetTime]
112 |     implicit case object offsetDateTime extends EqNotEq[OffsetDateTime]
113 |     implicit case object zonedDateTime  extends EqNotEq[ZonedDateTime]
114 | 
115 |   }
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/ParquetReader.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.hadoop
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue
  4 | import me.mnedokushev.zio.apache.parquet.core.codec.{ SchemaEncoder, ValueDecoder }
  5 | import me.mnedokushev.zio.apache.parquet.core.filter.CompiledPredicate
  6 | import org.apache.hadoop.conf.Configuration
  7 | import org.apache.parquet.filter2.compat.FilterCompat
  8 | import org.apache.parquet.hadoop.api.{ ReadSupport => HadoopReadSupport }
  9 | import org.apache.parquet.hadoop.{ ParquetReader => HadoopParquetReader }
 10 | import org.apache.parquet.io.InputFile
 11 | import zio._
 12 | import zio.schema.Schema
 13 | import zio.stream._
 14 | 
 15 | import java.io.IOException
 16 | 
 17 | trait ParquetReader[+A <: Product] {
 18 | 
 19 |   def readStream(path: Path): ZStream[Scope, Throwable, A]
 20 | 
 21 |   def readStreamFiltered(path: Path, filter: CompiledPredicate): ZStream[Scope, Throwable, A]
 22 | 
 23 |   def readChunk[B](path: Path): Task[Chunk[A]]
 24 | 
 25 |   def readChunkFiltered[B](path: Path, filter: CompiledPredicate): Task[Chunk[A]]
 26 | 
 27 | }
 28 | 
 29 | final class ParquetReaderLive[A <: Product: Tag](
 30 |   hadoopConf: Configuration,
 31 |   schema: Option[Schema[A]] = None,
 32 |   schemaEncoder: Option[SchemaEncoder[A]] = None
 33 | )(implicit decoder: ValueDecoder[A])
 34 |     extends ParquetReader[A] {
 35 | 
 36 |   override def readStream(path: Path): ZStream[Scope, Throwable, A] =
 37 |     for {
 38 |       reader <- ZStream.fromZIO(build(path, None))
 39 |       value  <- readStream0(reader)
 40 |     } yield value
 41 | 
 42 |   override def readStreamFiltered(path: Path, filter: CompiledPredicate): ZStream[Scope, Throwable, A] =
 43 |     for {
 44 |       reader <- ZStream.fromZIO(build(path, Some(filter)))
 45 |       value  <- readStream0(reader)
 46 |     } yield value
 47 | 
 48 |   override def readChunk[B](path: Path): Task[Chunk[A]] =
 49 |     ZIO.scoped(
 50 |       for {
 51 |         reader <- build(path, None)
 52 |         result <- readChunk0(reader)
 53 |       } yield result
 54 |     )
 55 | 
 56 |   override def readChunkFiltered[B](path: Path, filter: CompiledPredicate): Task[Chunk[A]] =
 57 |     ZIO.scoped(
 58 |       for {
 59 |         reader <- build(path, Some(filter))
 60 |         result <- readChunk0(reader)
 61 |       } yield result
 62 |     )
 63 | 
 64 |   private def readStream0(reader: HadoopParquetReader[RecordValue]): ZStream[Any, Throwable, A] =
 65 |     ZStream.repeatZIOOption(
 66 |       ZIO
 67 |         .attemptBlockingIO(reader.read())
 68 |         .asSomeError
 69 |         .filterOrFail(_ != null)(None)
 70 |         .flatMap(decoder.decodeZIO(_).asSomeError)
 71 |     )
 72 | 
 73 |   private def readChunk0[B](reader: HadoopParquetReader[RecordValue]): Task[Chunk[A]] = {
 74 |     val readNext = for {
 75 |       value  <- ZIO.attemptBlockingIO(reader.read())
 76 |       record <- if (value != null)
 77 |                   decoder.decodeZIO(value)
 78 |                 else
 79 |                   ZIO.succeed(null.asInstanceOf[A])
 80 |     } yield record
 81 |     val builder  = Chunk.newBuilder[A]
 82 | 
 83 |     ZIO.scoped(
 84 |       for {
 85 |         initial <- readNext
 86 |         _       <- {
 87 |           var current = initial
 88 | 
 89 |           ZIO.whileLoop(current != null)(readNext) { next =>
 90 |             builder.addOne(current)
 91 |             current = next
 92 |           }
 93 |         }
 94 |       } yield builder.result()
 95 |     )
 96 |   }
 97 | 
 98 |   private def build[B](
 99 |     path: Path,
100 |     filter: Option[CompiledPredicate]
101 |   ): ZIO[Scope, IOException, HadoopParquetReader[RecordValue]] =
102 |     for {
103 |       inputFile      <- path.toInputFileZIO(hadoopConf)
104 |       compiledFilter <- ZIO.foreach(filter) { pred =>
105 |                           ZIO
106 |                             .fromEither(pred)
107 |                             .mapError(new IOException(_))
108 |                         }
109 |       reader         <- ZIO.fromAutoCloseable(
110 |                           ZIO.attemptBlockingIO {
111 |                             val builder = new ParquetReader.Builder(inputFile, schema, schemaEncoder)
112 | 
113 |                             compiledFilter.foreach(pred => builder.withFilter(FilterCompat.get(pred)))
114 |                             builder.withConf(hadoopConf).build()
115 |                           }
116 |                         )
117 |     } yield reader
118 | 
119 | }
120 | 
121 | object ParquetReader {
122 | 
123 |   final class Builder[A: Tag](
124 |     file: InputFile,
125 |     schema: Option[Schema[A]] = None,
126 |     schemaEncoder: Option[SchemaEncoder[A]] = None
127 |   ) extends HadoopParquetReader.Builder[RecordValue](file) {
128 | 
129 |     override protected def getReadSupport: HadoopReadSupport[RecordValue] =
130 |       new ReadSupport(schema, schemaEncoder)
131 | 
132 |   }
133 | 
134 |   def configured[A <: Product: ValueDecoder: Tag](
135 |     hadoopConf: Configuration = new Configuration()
136 |   ): ULayer[ParquetReader[A]] =
137 |     ZLayer.succeed(new ParquetReaderLive[A](hadoopConf))
138 | 
139 |   def projected[A <: Product: ValueDecoder: Tag](
140 |     hadoopConf: Configuration = new Configuration()
141 |   )(implicit
142 |     schema: Schema[A],
143 |     schemaEncoder: SchemaEncoder[A]
144 |   ): ULayer[ParquetReader[A]] =
145 |     ZLayer.succeed(new ParquetReaderLive[A](hadoopConf, Some(schema), Some(schemaEncoder)))
146 | 
147 | }
148 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/Schemas.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core
  2 | 
  3 | import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
  4 | import org.apache.parquet.schema.Type.Repetition
  5 | import org.apache.parquet.schema._
  6 | import zio.Chunk
  7 | 
  8 | object Schemas {
  9 | 
 10 |   abstract class Def[Self <: Def[?]] {
 11 | 
 12 |     def named(name: String): Type
 13 | 
 14 |     def optionality(condition: Boolean): Self =
 15 |       if (condition) optional else required
 16 | 
 17 |     def required: Self
 18 | 
 19 |     def optional: Self
 20 | 
 21 |   }
 22 | 
 23 |   case class PrimitiveDef(
 24 |     typeName: PrimitiveTypeName,
 25 |     annotation: Option[LogicalTypeAnnotation] = None,
 26 |     isOptional: Boolean = false,
 27 |     length: Int = 0
 28 |   ) extends Def[PrimitiveDef] {
 29 | 
 30 |     def named(name: String): Type = {
 31 |       val builder = Types.primitive(typeName, repetition(isOptional))
 32 | 
 33 |       annotation
 34 |         .fold(builder)(builder.as)
 35 |         .length(length)
 36 |         .named(name)
 37 |     }
 38 | 
 39 |     def length(len: Int): PrimitiveDef =
 40 |       this.copy(length = len)
 41 | 
 42 |     def required: PrimitiveDef =
 43 |       this.copy(isOptional = false)
 44 | 
 45 |     def optional: PrimitiveDef =
 46 |       this.copy(isOptional = true)
 47 | 
 48 |   }
 49 | 
 50 |   case class RecordDef(fields: Chunk[Type], isOptional: Boolean = false) extends Def[RecordDef] {
 51 | 
 52 |     def named(name: String): Type = {
 53 |       val builder = Types.buildGroup(repetition(isOptional))
 54 | 
 55 |       fields.foreach(builder.addField)
 56 |       builder.named(name)
 57 |     }
 58 | 
 59 |     def required: RecordDef =
 60 |       this.copy(isOptional = false)
 61 | 
 62 |     def optional: RecordDef =
 63 |       this.copy(isOptional = true)
 64 | 
 65 |   }
 66 | 
 67 |   case class ListDef(
 68 |     element: Type,
 69 |     isOptional: Boolean = false
 70 |   ) extends Def[ListDef] {
 71 | 
 72 |     def named(name: String): Type =
 73 |       Types
 74 |         .list(repetition(isOptional))
 75 |         .element(element)
 76 |         .named(name)
 77 | 
 78 |     def required: ListDef =
 79 |       this.copy(isOptional = false)
 80 | 
 81 |     def optional: ListDef =
 82 |       this.copy(isOptional = true)
 83 | 
 84 |   }
 85 | 
 86 |   case class MapDef(key: Type, value: Type, isOptional: Boolean = false) extends Def[MapDef] {
 87 | 
 88 |     override def named(name: String): Type =
 89 |       Types
 90 |         .map(repetition(isOptional))
 91 |         .key(key)
 92 |         .value(value)
 93 |         .named(name)
 94 | 
 95 |     override def required: MapDef =
 96 |       this.copy(isOptional = false)
 97 | 
 98 |     override def optional: MapDef =
 99 |       this.copy(isOptional = true)
100 | 
101 |   }
102 | 
103 |   def repetition(optional: Boolean): Repetition =
104 |     if (optional) Repetition.OPTIONAL else Repetition.REQUIRED
105 | 
106 |   def asMessageType(schema: Type): MessageType = {
107 |     val groupSchema = schema.asGroupType()
108 |     val name        = groupSchema.getName
109 |     val fields      = groupSchema.getFields
110 | 
111 |     new MessageType(name, fields)
112 |   }
113 | 
114 |   import PrimitiveTypeName._
115 |   import LogicalTypeAnnotation._
116 | 
117 |   // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
118 |   def enum0: PrimitiveDef          = PrimitiveDef(BINARY, Some(enumType()))
119 |   val string: PrimitiveDef         = PrimitiveDef(BINARY, Some(stringType()))
120 |   val boolean: PrimitiveDef        = PrimitiveDef(BOOLEAN)
121 |   val byte: PrimitiveDef           = PrimitiveDef(INT32, Some(intType(8, false)))
122 |   val short: PrimitiveDef          = PrimitiveDef(INT32, Some(intType(16, true)))
123 |   val int: PrimitiveDef            = PrimitiveDef(INT32, Some(intType(32, true)))
124 |   val long: PrimitiveDef           = PrimitiveDef(INT64, Some(intType(64, true)))
125 |   val float: PrimitiveDef          = PrimitiveDef(FLOAT)
126 |   val double: PrimitiveDef         = PrimitiveDef(DOUBLE)
127 |   val binary: PrimitiveDef         = PrimitiveDef(BINARY)
128 |   val char: PrimitiveDef           = byte
129 |   val uuid: PrimitiveDef           = PrimitiveDef(FIXED_LEN_BYTE_ARRAY, Some(uuidType())).length(16)
130 |   val currency: PrimitiveDef       = string
131 |   val bigDecimal: PrimitiveDef     = PrimitiveDef(INT64, Some(decimalType(DECIMAL_PRECISION, DECIMAL_SCALE)))
132 |   val bigInteger: PrimitiveDef     = PrimitiveDef(BINARY)
133 |   val dayOfWeek: PrimitiveDef      = byte
134 |   val monthType: PrimitiveDef      = byte
135 |   val monthDay: PrimitiveDef       = PrimitiveDef(FIXED_LEN_BYTE_ARRAY).length(2)
136 |   val period: PrimitiveDef         = PrimitiveDef(FIXED_LEN_BYTE_ARRAY).length(12)
137 |   val year: PrimitiveDef           = PrimitiveDef(INT32, Some(intType(16, false)))
138 |   val yearMonth: PrimitiveDef      = PrimitiveDef(FIXED_LEN_BYTE_ARRAY).length(4)
139 |   val zoneId: PrimitiveDef         = string
140 |   val zoneOffset: PrimitiveDef     = string
141 |   val duration: PrimitiveDef       = PrimitiveDef(INT64, Some(intType(64, false)))
142 |   val instant: PrimitiveDef        = PrimitiveDef(INT64, Some(intType(64, false)))
143 |   val localDate: PrimitiveDef      = PrimitiveDef(INT32, Some(dateType()))
144 |   val localTime: PrimitiveDef      = PrimitiveDef(INT32, Some(timeType(true, TimeUnit.MILLIS)))
145 |   val localDateTime: PrimitiveDef  = PrimitiveDef(INT64, Some(timestampType(true, TimeUnit.MILLIS)))
146 |   val offsetTime: PrimitiveDef     = PrimitiveDef(INT32, Some(timeType(false, TimeUnit.MILLIS)))
147 |   val offsetDateTime: PrimitiveDef = PrimitiveDef(INT64, Some(timestampType(false, TimeUnit.MILLIS)))
148 |   val zonedDateTime: PrimitiveDef  = offsetDateTime
149 | 
150 |   def record(fields: Chunk[Type]): RecordDef = RecordDef(fields)
151 |   def list(element: Type): ListDef           = ListDef(element)
152 |   def map(key: Type, value: Type): MapDef    = MapDef(key, value)
153 | 
154 | }
155 | 


--------------------------------------------------------------------------------
/modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/GroupValueConverter.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.hadoop
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Value
  4 | import me.mnedokushev.zio.apache.parquet.core.Value.{ GroupValue, PrimitiveValue }
  5 | import org.apache.parquet.io.api.{ Binary, Converter, GroupConverter, PrimitiveConverter }
  6 | import org.apache.parquet.schema.Type.Repetition
  7 | import org.apache.parquet.schema.{ GroupType, LogicalTypeAnnotation }
  8 | import zio.Chunk
  9 | 
 10 | import scala.jdk.CollectionConverters._
 11 | 
 12 | trait GroupValueConverter[V <: GroupValue[V]] extends GroupConverter {
 13 | 
 14 |   def get: V =
 15 |     this.groupValue
 16 | 
 17 |   def put(name: String, value: Value): Unit
 18 | 
 19 |   protected var groupValue: V =
 20 |     null.asInstanceOf[V]
 21 | 
 22 |   protected val converters: Chunk[Converter]
 23 | 
 24 |   override def getConverter(fieldIndex: Int): Converter =
 25 |     converters(fieldIndex)
 26 | 
 27 | }
 28 | 
 29 | object GroupValueConverter {
 30 | 
 31 |   abstract case class Default[V <: GroupValue[V]](schema: GroupType) extends GroupValueConverter[V] {
 32 | 
 33 |     override def put(name: String, value: Value): Unit =
 34 |       this.groupValue = this.groupValue.put(name, value)
 35 | 
 36 |     override protected val converters: Chunk[Converter] =
 37 |       Chunk.fromIterable(
 38 |         schema.getFields.asScala.toList.map { schema0 =>
 39 |           val name = schema0.getName
 40 | 
 41 |           schema0.getLogicalTypeAnnotation match {
 42 |             case _ if schema0.isPrimitive                           =>
 43 |               GroupValueConverter.primitive(name, this)
 44 |             case _: LogicalTypeAnnotation.ListLogicalTypeAnnotation =>
 45 |               GroupValueConverter.list(schema0.asGroupType(), name, this)
 46 |             case _: LogicalTypeAnnotation.MapLogicalTypeAnnotation  =>
 47 |               GroupValueConverter.map(schema0.asGroupType(), name, this)
 48 |             case _                                                  =>
 49 |               (name, schema0.getRepetition) match {
 50 |                 case ("list", Repetition.REPEATED)      =>
 51 |                   GroupValueConverter.listElement(schema0.asGroupType(), this)
 52 |                 case ("key_value", Repetition.REPEATED) =>
 53 |                   GroupValueConverter.mapKeyValue(schema0.asGroupType(), name, this)
 54 |                 case _                                  =>
 55 |                   GroupValueConverter.record(schema0.asGroupType(), name, this)
 56 |               }
 57 |           }
 58 |         }
 59 |       )
 60 | 
 61 |   }
 62 | 
 63 |   abstract case class ByPass[V <: GroupValue[V], S <: GroupValue[S]](
 64 |     schema: GroupType,
 65 |     toSelf: GroupValueConverter[S]
 66 |   ) extends GroupValueConverter[V] {
 67 | 
 68 |     override def put(name: String, value: Value): Unit =
 69 |       toSelf.groupValue = toSelf.groupValue.put(name, value)
 70 | 
 71 |     override protected val converters: Chunk[Converter] =
 72 |       Chunk.fromIterable(
 73 |         schema.getFields.asScala.toList.map { schema0 =>
 74 |           val name = schema0.getName
 75 | 
 76 |           schema0.getLogicalTypeAnnotation match {
 77 |             case _ if schema0.isPrimitive                           =>
 78 |               GroupValueConverter.primitive(name, toSelf)
 79 |             case _: LogicalTypeAnnotation.ListLogicalTypeAnnotation =>
 80 |               GroupValueConverter.list(schema0.asGroupType(), name, this)
 81 |             case _: LogicalTypeAnnotation.MapLogicalTypeAnnotation  =>
 82 |               GroupValueConverter.map(schema0.asGroupType(), name, this)
 83 |             case _                                                  =>
 84 |               (name, schema0.getRepetition) match {
 85 |                 case ("list", Repetition.REPEATED)      =>
 86 |                   GroupValueConverter.listElement(schema0.asGroupType(), this)
 87 |                 case ("key_value", Repetition.REPEATED) =>
 88 |                   GroupValueConverter.mapKeyValue(schema0.asGroupType(), name, this)
 89 |                 case _                                  =>
 90 |                   GroupValueConverter.record(schema0.asGroupType(), name, this)
 91 |               }
 92 |           }
 93 |         }
 94 |       )
 95 | 
 96 |   }
 97 | 
 98 |   def primitive[V <: GroupValue[V]](name: String, parent: GroupValueConverter[V]): PrimitiveConverter =
 99 |     new PrimitiveConverter {
100 | 
101 |       override def addBinary(value: Binary): Unit =
102 |         parent.put(name, PrimitiveValue.BinaryValue(value))
103 | 
104 |       override def addBoolean(value: Boolean): Unit =
105 |         parent.put(name, PrimitiveValue.BooleanValue(value))
106 | 
107 |       override def addDouble(value: Double): Unit =
108 |         parent.put(name, PrimitiveValue.DoubleValue(value))
109 | 
110 |       override def addFloat(value: Float): Unit =
111 |         parent.put(name, PrimitiveValue.FloatValue(value))
112 | 
113 |       override def addInt(value: Int): Unit =
114 |         parent.put(name, PrimitiveValue.Int32Value(value))
115 | 
116 |       override def addLong(value: Long): Unit =
117 |         parent.put(name, PrimitiveValue.Int64Value(value))
118 | 
119 |     }
120 | 
121 |   def record[V <: GroupValue[V]](
122 |     schema: GroupType,
123 |     name: String,
124 |     parent: GroupValueConverter[V]
125 |   ): GroupValueConverter[GroupValue.RecordValue] =
126 |     new Default[GroupValue.RecordValue](schema) {
127 | 
128 |       override def start(): Unit =
129 |         this.groupValue = Value.record(
130 |           this.schema.getFields.asScala.toList.map(_.getName -> Value.nil).toMap
131 |         )
132 | 
133 |       override def end(): Unit =
134 |         parent.put(name, this.groupValue)
135 | 
136 |     }
137 | 
138 |   def list[V <: GroupValue[V]](
139 |     schema: GroupType,
140 |     name: String,
141 |     parent: GroupValueConverter[V]
142 |   ): GroupValueConverter[GroupValue.ListValue] =
143 |     new Default[GroupValue.ListValue](schema) {
144 | 
145 |       override def start(): Unit =
146 |         this.groupValue = Value.list(Chunk.empty)
147 | 
148 |       override def end(): Unit =
149 |         parent.put(name, this.groupValue)
150 |     }
151 | 
152 |   def listElement[V <: GroupValue[V], S <: GroupValue[S]](
153 |     schema: GroupType,
154 |     parent: GroupValueConverter[S]
155 |   ): GroupValueConverter[GroupValue.RecordValue] =
156 |     new ByPass[GroupValue.RecordValue, S](schema, parent) {
157 | 
158 |       override def start(): Unit = ()
159 | 
160 |       override def end(): Unit = ()
161 | 
162 |     }
163 | 
164 |   def map[V <: GroupValue[V]](
165 |     schema: GroupType,
166 |     name: String,
167 |     parent: GroupValueConverter[V]
168 |   ): GroupValueConverter[GroupValue.MapValue] =
169 |     new Default[GroupValue.MapValue](schema) {
170 | 
171 |       override def start(): Unit =
172 |         this.groupValue = Value.map(Map.empty)
173 | 
174 |       override def end(): Unit =
175 |         parent.put(name, this.groupValue)
176 |     }
177 | 
178 |   def mapKeyValue[V <: GroupValue[V]](
179 |     schema: GroupType,
180 |     name: String,
181 |     parent: GroupValueConverter[V]
182 |   ): GroupValueConverter[GroupValue.RecordValue] =
183 |     new Default[GroupValue.RecordValue](schema) {
184 | 
185 |       override def start(): Unit =
186 |         this.groupValue = Value.record(Map("key" -> Value.nil, "value" -> Value.nil))
187 | 
188 |       override def end(): Unit =
189 |         parent.put(name, this.groupValue)
190 | 
191 |     }
192 | 
193 |   def root(schema: GroupType): GroupValueConverter[GroupValue.RecordValue] =
194 |     new Default[GroupValue.RecordValue](schema) {
195 | 
196 |       override def start(): Unit =
197 |         this.groupValue = Value.record(
198 |           this.schema.getFields.asScala.toList.map(_.getName -> Value.nil).toMap
199 |         )
200 | 
201 |       override def end(): Unit = ()
202 |     }
203 | 
204 | }
205 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriver.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.codec
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Schemas
  4 | import me.mnedokushev.zio.apache.parquet.core.Schemas.PrimitiveDef
  5 | import org.apache.parquet.schema.Type
  6 | import zio.Chunk
  7 | import zio.schema.{ Deriver, Schema, StandardType }
  8 | 
  9 | object SchemaEncoderDeriver {
 10 | 
 11 |   val default: Deriver[SchemaEncoder] = new Deriver[SchemaEncoder] {
 12 | 
 13 |     override def deriveRecord[A](
 14 |       record: Schema.Record[A],
 15 |       fields: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]],
 16 |       summoned: => Option[SchemaEncoder[A]]
 17 |     ): SchemaEncoder[A] = new SchemaEncoder[A] {
 18 | 
 19 |       private def enc[A1](name0: String, schema0: Schema[A1], encoder: SchemaEncoder[?]) =
 20 |         encoder.asInstanceOf[SchemaEncoder[A1]].encode(schema0, name0, isSchemaOptional(schema0))
 21 | 
 22 |       override def encode(schema: Schema[A], name: String, optional: Boolean): Type = {
 23 |         val fieldTypes = record.fields.zip(fields.map(_.unwrap)).map { case (field, encoder) =>
 24 |           enc(field.name, field.schema, encoder)
 25 |         }
 26 | 
 27 |         Schemas.record(fieldTypes).optionality(optional).named(name)
 28 |       }
 29 |     }
 30 | 
 31 |     override def deriveEnum[A](
 32 |       `enum`: Schema.Enum[A],
 33 |       cases: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]],
 34 |       summoned: => Option[SchemaEncoder[A]]
 35 |     ): SchemaEncoder[A] = new SchemaEncoder[A] {
 36 |       override def encode(schema: Schema[A], name: String, optional: Boolean): Type =
 37 |         Schemas.enum0.optionality(optional).named(name)
 38 |     }
 39 | 
 40 |     override def derivePrimitive[A](
 41 |       st: StandardType[A],
 42 |       summoned: => Option[SchemaEncoder[A]]
 43 |     ): SchemaEncoder[A] =
 44 |       new SchemaEncoder[A] {
 45 |         override def encode(schema: Schema[A], name: String, optional: Boolean): Type = {
 46 |           def tpe(prim: PrimitiveDef) =
 47 |             prim.optionality(optional).named(name)
 48 | 
 49 |           st match {
 50 |             case StandardType.StringType         =>
 51 |               tpe(Schemas.string)
 52 |             case StandardType.BoolType           =>
 53 |               tpe(Schemas.boolean)
 54 |             case StandardType.ByteType           =>
 55 |               tpe(Schemas.byte)
 56 |             case StandardType.ShortType          =>
 57 |               tpe(Schemas.short)
 58 |             case StandardType.IntType            =>
 59 |               tpe(Schemas.int)
 60 |             case StandardType.LongType           =>
 61 |               tpe(Schemas.long)
 62 |             case StandardType.FloatType          =>
 63 |               tpe(Schemas.float)
 64 |             case StandardType.DoubleType         =>
 65 |               tpe(Schemas.double)
 66 |             case StandardType.BinaryType         =>
 67 |               tpe(Schemas.binary)
 68 |             case StandardType.CharType           =>
 69 |               tpe(Schemas.char)
 70 |             case StandardType.UUIDType           =>
 71 |               tpe(Schemas.uuid)
 72 |             case StandardType.CurrencyType       =>
 73 |               tpe(Schemas.currency)
 74 |             case StandardType.BigDecimalType     =>
 75 |               tpe(Schemas.bigDecimal)
 76 |             case StandardType.BigIntegerType     =>
 77 |               tpe(Schemas.bigInteger)
 78 |             case StandardType.DayOfWeekType      =>
 79 |               tpe(Schemas.dayOfWeek)
 80 |             case StandardType.MonthType          =>
 81 |               tpe(Schemas.monthType)
 82 |             case StandardType.MonthDayType       =>
 83 |               tpe(Schemas.monthDay)
 84 |             case StandardType.PeriodType         =>
 85 |               tpe(Schemas.period)
 86 |             case StandardType.YearType           =>
 87 |               tpe(Schemas.year)
 88 |             case StandardType.YearMonthType      =>
 89 |               tpe(Schemas.yearMonth)
 90 |             case StandardType.ZoneIdType         =>
 91 |               tpe(Schemas.zoneId)
 92 |             case StandardType.ZoneOffsetType     =>
 93 |               tpe(Schemas.zoneOffset)
 94 |             case StandardType.DurationType       =>
 95 |               tpe(Schemas.duration)
 96 |             case StandardType.InstantType        =>
 97 |               tpe(Schemas.instant)
 98 |             case StandardType.LocalDateType      =>
 99 |               tpe(Schemas.localDate)
100 |             case StandardType.LocalTimeType      =>
101 |               tpe(Schemas.localTime)
102 |             case StandardType.LocalDateTimeType  =>
103 |               tpe(Schemas.localDateTime)
104 |             case StandardType.OffsetTimeType     =>
105 |               tpe(Schemas.offsetTime)
106 |             case StandardType.OffsetDateTimeType =>
107 |               tpe(Schemas.offsetDateTime)
108 |             case StandardType.ZonedDateTimeType  =>
109 |               tpe(Schemas.zonedDateTime)
110 |             case StandardType.UnitType           =>
111 |               throw EncoderError("Unit standard type is unsupported")
112 |           }
113 |         }
114 |       }
115 | 
116 |     override def deriveOption[A](
117 |       option: Schema.Optional[A],
118 |       inner: => SchemaEncoder[A],
119 |       summoned: => Option[SchemaEncoder[Option[A]]]
120 |     ): SchemaEncoder[Option[A]] = new SchemaEncoder[Option[A]] {
121 |       override def encode(schema: Schema[Option[A]], name: String, optional: Boolean): Type =
122 |         inner.encode(option.schema, name, optional = true)
123 |     }
124 | 
125 |     override def deriveSequence[C[_], A](
126 |       sequence: Schema.Sequence[C[A], A, ?],
127 |       inner: => SchemaEncoder[A],
128 |       summoned: => Option[SchemaEncoder[C[A]]]
129 |     ): SchemaEncoder[C[A]] = new SchemaEncoder[C[A]] {
130 |       override def encode(schema: Schema[C[A]], name: String, optional: Boolean): Type =
131 |         Schemas
132 |           .list(inner.encode(sequence.elementSchema, "element", isSchemaOptional(sequence.elementSchema)))
133 |           .optionality(optional)
134 |           .named(name)
135 |     }
136 | 
137 |     override def deriveMap[K, V](
138 |       map: Schema.Map[K, V],
139 |       key: => SchemaEncoder[K],
140 |       value: => SchemaEncoder[V],
141 |       summoned: => Option[SchemaEncoder[Map[K, V]]]
142 |     ): SchemaEncoder[Map[K, V]] = new SchemaEncoder[Map[K, V]] {
143 |       override def encode(schema: Schema[Map[K, V]], name: String, optional: Boolean): Type =
144 |         Schemas
145 |           .map(
146 |             key.encode(map.keySchema, "key", optional = false),
147 |             value.encode(map.valueSchema, "value", optional = isSchemaOptional(map.valueSchema))
148 |           )
149 |           .optionality(optional)
150 |           .named(name)
151 |     }
152 | 
153 |     override def deriveTransformedRecord[A, B](
154 |       record: Schema.Record[A],
155 |       transform: Schema.Transform[A, B, ?],
156 |       fields: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]],
157 |       summoned: => Option[SchemaEncoder[B]]
158 |     ): SchemaEncoder[B] = summoned.getOrElse {
159 |       new SchemaEncoder[B] {
160 |         private def enc[A1](name0: String, schema0: Schema[A1], encoder: SchemaEncoder[?]) =
161 |           encoder.asInstanceOf[SchemaEncoder[A1]].encode(schema0, name0, isSchemaOptional(schema0))
162 | 
163 |         override def encode(schema: Schema[B], name: String, optional: Boolean): Type = {
164 |           val fieldTypes = record.fields.zip(fields.map(_.unwrap)).map { case (field, encoder) =>
165 |             enc(field.name, field.schema, encoder)
166 |           }
167 | 
168 |           Schemas.record(fieldTypes).optionality(optional).named(name)
169 |         }
170 |       }
171 |     }
172 |   }.cached
173 | 
174 |   val summoned: Deriver[SchemaEncoder] = default.autoAcceptSummoned
175 | 
176 |   private def isSchemaOptional(schema: Schema[?]): Boolean =
177 |     schema match {
178 |       case _: Schema.Optional[_] => true
179 |       case _                     => false
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueEncoderDeriver.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.codec
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Value
  4 | import zio.Chunk
  5 | import zio.schema.{ Deriver, Schema, StandardType }
  6 | 
  7 | import java.math.{ BigDecimal, BigInteger }
  8 | import java.time.{
  9 |   DayOfWeek,
 10 |   Duration,
 11 |   Instant,
 12 |   LocalDate,
 13 |   LocalDateTime,
 14 |   LocalTime,
 15 |   Month,
 16 |   MonthDay,
 17 |   OffsetDateTime,
 18 |   OffsetTime,
 19 |   Period,
 20 |   Year,
 21 |   YearMonth,
 22 |   ZoneId,
 23 |   ZoneOffset,
 24 |   ZonedDateTime
 25 | }
 26 | import java.util.{ Currency, UUID }
 27 | 
 28 | object ValueEncoderDeriver {
 29 | 
 30 |   val default: Deriver[ValueEncoder] = new Deriver[ValueEncoder] {
 31 | 
 32 |     override def deriveRecord[A](
 33 |       record: Schema.Record[A],
 34 |       fields: => Chunk[Deriver.WrappedF[ValueEncoder, ?]],
 35 |       summoned: => Option[ValueEncoder[A]]
 36 |     ): ValueEncoder[A] = new ValueEncoder[A] {
 37 | 
 38 |       private def enc[A1](v: A, field: Schema.Field[A, A1], encoder: ValueEncoder[?]) =
 39 |         encoder.asInstanceOf[ValueEncoder[A1]].encode(field.get(v))
 40 | 
 41 |       override def encode(value: A): Value =
 42 |         Value.record(
 43 |           record.fields
 44 |             .zip(fields.map(_.unwrap))
 45 |             .map { case (field, encoder) =>
 46 |               field.name -> enc(value, field, encoder)
 47 |             }
 48 |             .toMap
 49 |         )
 50 |     }
 51 | 
 52 |     override def deriveEnum[A](
 53 |       `enum`: Schema.Enum[A],
 54 |       cases: => Chunk[Deriver.WrappedF[ValueEncoder, ?]],
 55 |       summoned: => Option[ValueEncoder[A]]
 56 |     ): ValueEncoder[A] = new ValueEncoder[A] {
 57 |       override def encode(value: A): Value = {
 58 |         val casesMap = `enum`.cases.map { case0 =>
 59 |           case0.schema.asInstanceOf[Schema.CaseClass0[A]].defaultConstruct() -> case0.id
 60 |         }.toMap
 61 | 
 62 |         derivePrimitive(StandardType.StringType, summoned = None)
 63 |           .contramap[A] { case0 =>
 64 |             casesMap.getOrElse(case0, throw EncoderError(s"Failed to encode enum for value $case0"))
 65 |           }
 66 |           .encode(value)
 67 |       }
 68 |     }
 69 | 
 70 |     override def derivePrimitive[A](
 71 |       st: StandardType[A],
 72 |       summoned: => Option[ValueEncoder[A]]
 73 |     ): ValueEncoder[A] =
 74 |       new ValueEncoder[A] {
 75 |         override def encode(value: A): Value =
 76 |           (st, value) match {
 77 |             case (StandardType.StringType, v: String)                 =>
 78 |               Value.string(v)
 79 |             case (StandardType.BoolType, v: Boolean)                  =>
 80 |               Value.boolean(v)
 81 |             case (StandardType.ByteType, v: Byte)                     =>
 82 |               Value.int(v.toInt)
 83 |             case (StandardType.ShortType, v: Short)                   =>
 84 |               Value.short(v)
 85 |             case (StandardType.IntType, v: Int)                       =>
 86 |               Value.int(v)
 87 |             case (StandardType.LongType, v: Long)                     =>
 88 |               Value.long(v)
 89 |             case (StandardType.FloatType, v: Float)                   =>
 90 |               Value.float(v)
 91 |             case (StandardType.DoubleType, v: Double)                 =>
 92 |               Value.double(v)
 93 |             case (StandardType.BinaryType, v: Chunk[_])               =>
 94 |               Value.binary(v.asInstanceOf[Chunk[Byte]])
 95 |             case (StandardType.CharType, v: Char)                     =>
 96 |               Value.char(v)
 97 |             case (StandardType.UUIDType, v: UUID)                     =>
 98 |               Value.uuid(v)
 99 |             case (StandardType.CurrencyType, v: Currency)             =>
100 |               Value.currency(v)
101 |             case (StandardType.BigDecimalType, v: BigDecimal)         =>
102 |               Value.bigDecimal(v)
103 |             case (StandardType.BigIntegerType, v: BigInteger)         =>
104 |               Value.bigInteger(v)
105 |             case (StandardType.DayOfWeekType, v: DayOfWeek)           =>
106 |               Value.dayOfWeek(v)
107 |             case (StandardType.MonthType, v: Month)                   =>
108 |               Value.month(v)
109 |             case (StandardType.MonthDayType, v: MonthDay)             =>
110 |               Value.monthDay(v)
111 |             case (StandardType.PeriodType, v: Period)                 =>
112 |               Value.period(v)
113 |             case (StandardType.YearType, v: Year)                     =>
114 |               Value.year(v)
115 |             case (StandardType.YearMonthType, v: YearMonth)           =>
116 |               Value.yearMonth(v)
117 |             case (StandardType.ZoneIdType, v: ZoneId)                 =>
118 |               Value.zoneId(v)
119 |             case (StandardType.ZoneOffsetType, v: ZoneOffset)         =>
120 |               Value.zoneOffset(v)
121 |             case (StandardType.DurationType, v: Duration)             =>
122 |               Value.duration(v)
123 |             case (StandardType.InstantType, v: Instant)               =>
124 |               Value.instant(v)
125 |             case (StandardType.LocalDateType, v: LocalDate)           =>
126 |               Value.localDate(v)
127 |             case (StandardType.LocalTimeType, v: LocalTime)           =>
128 |               Value.localTime(v)
129 |             case (StandardType.LocalDateTimeType, v: LocalDateTime)   =>
130 |               Value.localDateTime(v)
131 |             case (StandardType.OffsetTimeType, v: OffsetTime)         =>
132 |               Value.offsetTime(v)
133 |             case (StandardType.OffsetDateTimeType, v: OffsetDateTime) =>
134 |               Value.offsetDateTime(v)
135 |             case (StandardType.ZonedDateTimeType, v: ZonedDateTime)   =>
136 |               Value.zonedDateTime(v)
137 |             case (other, _)                                           =>
138 |               throw EncoderError(s"Unsupported ZIO Schema StandardType $other")
139 |           }
140 |       }
141 | 
142 |     override def deriveOption[A](
143 |       option: Schema.Optional[A],
144 |       inner: => ValueEncoder[A],
145 |       summoned: => Option[ValueEncoder[Option[A]]]
146 |     ): ValueEncoder[Option[A]] =
147 |       new ValueEncoder[Option[A]] {
148 |         override def encode(value: Option[A]): Value =
149 |           value match {
150 |             case Some(v) => inner.encode(v)
151 |             case _       => Value.nil
152 |           }
153 |       }
154 | 
155 |     override def deriveSequence[C[_], A](
156 |       sequence: Schema.Sequence[C[A], A, ?],
157 |       inner: => ValueEncoder[A],
158 |       summoned: => Option[ValueEncoder[C[A]]]
159 |     ): ValueEncoder[C[A]] = new ValueEncoder[C[A]] {
160 |       override def encode(value: C[A]): Value =
161 |         Value.list(sequence.toChunk(value).map(inner.encode))
162 |     }
163 | 
164 |     override def deriveMap[K, V](
165 |       map: Schema.Map[K, V],
166 |       key: => ValueEncoder[K],
167 |       value: => ValueEncoder[V],
168 |       summoned: => Option[ValueEncoder[Map[K, V]]]
169 |     ): ValueEncoder[Map[K, V]] = new ValueEncoder[Map[K, V]] {
170 |       override def encode(value0: Map[K, V]): Value =
171 |         Value.map(
172 |           value0.map { case (k, v) =>
173 |             key.encode(k) -> value.encode(v)
174 |           }
175 |         )
176 |     }
177 | 
178 |     override def deriveTransformedRecord[A, B](
179 |       record: Schema.Record[A],
180 |       transform: Schema.Transform[A, B, ?],
181 |       fields: => Chunk[Deriver.WrappedF[ValueEncoder, ?]],
182 |       summoned: => Option[ValueEncoder[B]]
183 |     ): ValueEncoder[B] = summoned.getOrElse {
184 |       new ValueEncoder[B] {
185 |         private def enc[A1](v: A, field: Schema.Field[A, A1], encoder: ValueEncoder[?]) =
186 |           encoder.asInstanceOf[ValueEncoder[A1]].encode(field.get(v))
187 | 
188 |         override def encode(value: B): Value =
189 |           transform.g(value) match {
190 |             case Right(v)     =>
191 |               Value.record(
192 |                 record.fields
193 |                   .zip(fields.map(_.unwrap))
194 |                   .map { case (field, encoder) =>
195 |                     field.name -> enc(v, field, encoder)
196 |                   }
197 |                   .toMap
198 |               )
199 |             case Left(reason) =>
200 |               throw EncoderError(s"Failed to encode transformed record for value $value: $reason")
201 |           }
202 |       }
203 |     }
204 |   }.cached
205 | 
206 |   val summoned: Deriver[ValueEncoder] = default.autoAcceptSummoned
207 | 
208 | }
209 | 


--------------------------------------------------------------------------------
/modules/core/src/test/scala-2.13+/me/mnedokushev/zio/apache/parquet/core/Fixtures.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.codec.{
  4 |   SchemaEncoder,
  5 |   SchemaEncoderDeriver,
  6 |   ValueDecoder,
  7 |   ValueDecoderDeriver,
  8 |   ValueEncoder,
  9 |   ValueEncoderDeriver
 10 | }
 11 | import me.mnedokushev.zio.apache.parquet.core.filter.{ TypeTag, TypeTagDeriver }
 12 | import org.apache.parquet.filter2.predicate.FilterApi
 13 | import org.apache.parquet.filter2.predicate.Operators.BinaryColumn
 14 | import org.apache.parquet.io.api.Binary
 15 | import zio.Chunk
 16 | import zio.schema._
 17 | 
 18 | import java.time._
 19 | import java.util.{ Currency, UUID }
 20 | 
 21 | object Fixtures {
 22 | 
 23 |   // unable to generate code for case classes with more than 120 int fields due to following error:
 24 |   // tested with jdk 11.0.23 64bit
 25 |   // Error while emitting me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriverSpec$MaxArityRecord$
 26 |   // Method too large: me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriverSpec$MaxArityRecord$.derivedSchema0$lzyINIT4$1$$anonfun$364 (Lscala/collection/immutable/ListMap;)Lscala/util/Either;
 27 |   case class Arity23(
 28 |     a: Int,
 29 |     b: Option[String],
 30 |     c: Int,
 31 |     d: Int,
 32 |     e: Int,
 33 |     f: Int,
 34 |     g: Int,
 35 |     h: Int,
 36 |     i: Int,
 37 |     j: Int,
 38 |     k: Int,
 39 |     l: Int,
 40 |     m: Int,
 41 |     n: Int,
 42 |     o: Int,
 43 |     p: Int,
 44 |     q: Int,
 45 |     r: Int,
 46 |     s: Int,
 47 |     t: Int,
 48 |     u: Int,
 49 |     v: Int,
 50 |     w: Int
 51 |   )
 52 |   object Arity23 {
 53 |     implicit lazy val schema: Schema[Arity23] =
 54 |       DeriveSchema.gen[Arity23]
 55 |   }
 56 | 
 57 |   case class MyRecord(a: String, b: Int, child: MyRecord.Child, enm: MyRecord.Enum, opt: Option[Int])
 58 | 
 59 |   object MyRecord {
 60 |     implicit val schema: Schema.CaseClass5.WithFields[
 61 |       "a",
 62 |       "b",
 63 |       "child",
 64 |       "enm",
 65 |       "opt",
 66 |       String,
 67 |       Int,
 68 |       MyRecord.Child,
 69 |       MyRecord.Enum,
 70 |       Option[Int],
 71 |       MyRecord
 72 |     ]                                       =
 73 |       DeriveSchema.gen[MyRecord]
 74 |     implicit val typeTag: TypeTag[MyRecord] =
 75 |       Derive.derive[TypeTag, MyRecord](TypeTagDeriver.default)
 76 | 
 77 |     case class Child(c: Int, d: Option[Long])
 78 |     object Child {
 79 |       implicit val schema: Schema.CaseClass2.WithFields["c", "d", Int, Option[Long], MyRecord.Child] =
 80 |         DeriveSchema.gen[Child]
 81 |       implicit val typeTag: TypeTag[Child]                                                           =
 82 |         Derive.derive[TypeTag, Child](TypeTagDeriver.default)
 83 |     }
 84 | 
 85 |     sealed trait Enum
 86 |     object Enum {
 87 |       case object Started    extends Enum
 88 |       case object InProgress extends Enum
 89 |       case object Done       extends Enum
 90 | 
 91 |       implicit val schema: Schema[Enum]   =
 92 |         DeriveSchema.gen[Enum]
 93 |       implicit val typeTag: TypeTag[Enum] =
 94 |         Derive.derive[TypeTag, Enum](TypeTagDeriver.default)
 95 |     }
 96 |   }
 97 | 
 98 |   case class MyRecordSummoned(a: Int, b: String)
 99 | 
100 |   object MyRecordSummoned {
101 |     implicit val schema: zio.schema.Schema.CaseClass2.WithFields["a", "b", Int, String, MyRecordSummoned] =
102 |       DeriveSchema.gen[MyRecordSummoned]
103 | 
104 |     implicit val intTypeTag: TypeTag.EqNotEq[Int]   =
105 |       TypeTag.eqnoteq[Int, Binary, BinaryColumn](
106 |         FilterApi.binaryColumn,
107 |         v => Value.string(v.toString).value
108 |       )
109 |     implicit val typeTag: TypeTag[MyRecordSummoned] = Derive.derive[TypeTag, MyRecordSummoned](TypeTagDeriver.summoned)
110 |   }
111 | 
112 |   case class MyRecordIO(a: Int, b: String, c: Option[Long], d: List[Int], e: Map[String, Int])
113 |   object MyRecordIO {
114 |     implicit val schema: zio.schema.Schema.CaseClass5.WithFields[
115 |       "a",
116 |       "b",
117 |       "c",
118 |       "d",
119 |       "e",
120 |       Int,
121 |       String,
122 |       Option[Long],
123 |       List[Int],
124 |       Map[String, Int],
125 |       MyRecordIO
126 |     ]                                                     =
127 |       DeriveSchema.gen[MyRecordIO]
128 |     implicit val schemaEncoder: SchemaEncoder[MyRecordIO] =
129 |       Derive.derive[SchemaEncoder, MyRecordIO](SchemaEncoderDeriver.summoned)
130 |     implicit val valueEncoder: ValueEncoder[MyRecordIO]   =
131 |       Derive.derive[ValueEncoder, MyRecordIO](ValueEncoderDeriver.summoned)
132 |     implicit val valueDecoder: ValueDecoder[MyRecordIO]   =
133 |       Derive.derive[ValueDecoder, MyRecordIO](ValueDecoderDeriver.summoned)
134 |     implicit val typeTag: TypeTag[MyRecordIO]             =
135 |       Derive.derive[TypeTag, MyRecordIO](TypeTagDeriver.default)
136 |   }
137 | 
138 |   case class MyProjectedRecordIO(a: Int, c: Option[Long], d: List[Int], e: Map[String, Int])
139 |   object MyProjectedRecordIO {
140 |     implicit val schema: zio.schema.Schema.CaseClass4.WithFields[
141 |       "a",
142 |       "c",
143 |       "d",
144 |       "e",
145 |       Int,
146 |       Option[Long],
147 |       List[Int],
148 |       Map[String, Int],
149 |       MyProjectedRecordIO
150 |     ]                                                              =
151 |       DeriveSchema.gen[MyProjectedRecordIO]
152 |     implicit val schemaEncoder: SchemaEncoder[MyProjectedRecordIO] =
153 |       Derive.derive[SchemaEncoder, MyProjectedRecordIO](SchemaEncoderDeriver.summoned)
154 |     implicit val valueEncoder: ValueEncoder[MyProjectedRecordIO]   =
155 |       Derive.derive[ValueEncoder, MyProjectedRecordIO](ValueEncoderDeriver.summoned)
156 |     implicit val valueDecoder: ValueDecoder[MyProjectedRecordIO]   =
157 |       Derive.derive[ValueDecoder, MyProjectedRecordIO](ValueDecoderDeriver.summoned)
158 |   }
159 | 
160 |   case class MyRecordAllTypes1(
161 |     string: String,
162 |     boolean: Boolean,
163 |     byte: Byte,
164 |     short: Short,
165 |     int: Int,
166 |     long: Long,
167 |     float: Float,
168 |     double: Double,
169 |     binary: Chunk[Byte],
170 |     char: Char,
171 |     uuid: UUID,
172 |     currency: Currency,
173 |     bigDecimal: java.math.BigDecimal,
174 |     bigInteger: java.math.BigInteger,
175 |     dayOfWeek: DayOfWeek,
176 |     month: Month,
177 |     monthDay: MonthDay,
178 |     period: Period,
179 |     year: Year,
180 |     yearMonth: YearMonth,
181 |     zoneId: ZoneId,
182 |     zoneOffset: ZoneOffset
183 |   )
184 |   object MyRecordAllTypes1 {
185 |     implicit val schema: zio.schema.Schema.CaseClass22.WithFields[
186 |       "string",
187 |       "boolean",
188 |       "byte",
189 |       "short",
190 |       "int",
191 |       "long",
192 |       "float",
193 |       "double",
194 |       "binary",
195 |       "char",
196 |       "uuid",
197 |       "currency",
198 |       "bigDecimal",
199 |       "bigInteger",
200 |       "dayOfWeek",
201 |       "month",
202 |       "monthDay",
203 |       "period",
204 |       "year",
205 |       "yearMonth",
206 |       "zoneId",
207 |       "zoneOffset",
208 |       String,
209 |       Boolean,
210 |       Byte,
211 |       Short,
212 |       Int,
213 |       Long,
214 |       Float,
215 |       Double,
216 |       zio.Chunk[Byte],
217 |       Char,
218 |       java.util.UUID,
219 |       java.util.Currency,
220 |       java.math.BigDecimal,
221 |       java.math.BigInteger,
222 |       java.time.DayOfWeek,
223 |       java.time.Month,
224 |       java.time.MonthDay,
225 |       java.time.Period,
226 |       java.time.Year,
227 |       java.time.YearMonth,
228 |       java.time.ZoneId,
229 |       java.time.ZoneOffset,
230 |       MyRecordAllTypes1
231 |     ]                                                =
232 |       DeriveSchema.gen[MyRecordAllTypes1]
233 |     implicit val typeTag: TypeTag[MyRecordAllTypes1] =
234 |       Derive.derive[TypeTag, MyRecordAllTypes1](TypeTagDeriver.default)
235 |   }
236 |   case class MyRecordAllTypes2(
237 |     duration: Duration,
238 |     instant: Instant,
239 |     localDate: LocalDate,
240 |     localTime: LocalTime,
241 |     localDateTime: LocalDateTime,
242 |     offsetTime: OffsetTime,
243 |     offsetDateTime: OffsetDateTime,
244 |     zonedDateTime: ZonedDateTime
245 |   )
246 |   object MyRecordAllTypes2 {
247 |     implicit val schema: zio.schema.Schema.CaseClass8.WithFields[
248 |       "duration",
249 |       "instant",
250 |       "localDate",
251 |       "localTime",
252 |       "localDateTime",
253 |       "offsetTime",
254 |       "offsetDateTime",
255 |       "zonedDateTime",
256 |       java.time.Duration,
257 |       java.time.Instant,
258 |       java.time.LocalDate,
259 |       java.time.LocalTime,
260 |       java.time.LocalDateTime,
261 |       java.time.OffsetTime,
262 |       java.time.OffsetDateTime,
263 |       java.time.ZonedDateTime,
264 |       MyRecordAllTypes2
265 |     ]                                                =
266 |       DeriveSchema.gen[MyRecordAllTypes2]
267 |     implicit val typeTag: TypeTag[MyRecordAllTypes2] =
268 |       Derive.derive[TypeTag, MyRecordAllTypes2](TypeTagDeriver.default)
269 |   }
270 | 
271 | }
272 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTag.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.filter
  2 | 
  3 | import _root_.java.time.Instant
  4 | import me.mnedokushev.zio.apache.parquet.core.Value
  5 | import me.mnedokushev.zio.apache.parquet.core.filter.TypeTag.{ Dummy, EqNotEq, LtGt, Optional, Record }
  6 | import org.apache.parquet.filter2.predicate.{ FilterApi, Operators }
  7 | import org.apache.parquet.io.api.Binary
  8 | import zio.{ Chunk, Duration }
  9 | 
 10 | import java.time.{
 11 |   DayOfWeek,
 12 |   LocalDate,
 13 |   LocalDateTime,
 14 |   LocalTime,
 15 |   Month,
 16 |   MonthDay,
 17 |   OffsetDateTime,
 18 |   OffsetTime,
 19 |   Period,
 20 |   Year,
 21 |   YearMonth,
 22 |   ZoneId,
 23 |   ZoneOffset,
 24 |   ZonedDateTime
 25 | }
 26 | import java.util.{ Currency, UUID }
 27 | import scala.jdk.CollectionConverters._
 28 | 
 29 | sealed trait TypeTag[+A] { self =>
 30 | 
 31 |   override def toString: String =
 32 |     self match {
 33 |       case _: Dummy[_]    => "Dummy[A]"
 34 |       case _: Optional[_] => "Optional[A]"
 35 |       case _: Record[_]   => "Record[A]"
 36 |       case _: EqNotEq[_]  => "EqNotEq[A]"
 37 |       case _: LtGt[_]     => "LtGt[A]"
 38 |     }
 39 | 
 40 | }
 41 | 
 42 | object TypeTag {
 43 | 
 44 |   trait Dummy[+A] extends TypeTag[A]
 45 | 
 46 |   def dummy[A]: TypeTag.Dummy[A] =
 47 |     new Dummy[A] {}
 48 | 
 49 |   final case class Optional[+A: TypeTag]() extends TypeTag[Option[A]] {
 50 |     val typeTag: TypeTag[A] = implicitly[TypeTag[A]]
 51 |   }
 52 | 
 53 |   implicit def optional[A: TypeTag]: TypeTag[Option[A]] =
 54 |     Optional[A]()
 55 | 
 56 |   final case class Record[+A](columns: Map[String, TypeTag[?]]) extends TypeTag[A]
 57 | 
 58 |   trait EqNotEq[A] extends TypeTag[A] { self =>
 59 |     type T <: Comparable[T]
 60 |     type C <: Operators.Column[T] & Operators.SupportsEqNotEq
 61 | 
 62 |     def cast[A0]: EqNotEq[A0] = self.asInstanceOf[EqNotEq[A0]]
 63 | 
 64 |     def column(path: String): C
 65 |     def value(v: A): T
 66 |     def values(vs: Set[A]): java.util.Set[T] =
 67 |       vs.map(value).asJava
 68 |   }
 69 | 
 70 |   trait LtGt[A] extends TypeTag[A] { self =>
 71 |     type T <: Comparable[T]
 72 |     type C <: Operators.Column[T] & Operators.SupportsLtGt
 73 | 
 74 |     def cast[A0]: LtGt[A0] = self.asInstanceOf[LtGt[A0]]
 75 | 
 76 |     def column(path: String): C
 77 |     def value(v: A): T
 78 |     def values(vs: Set[A]): java.util.Set[T] =
 79 |       vs.map(value).asJava
 80 |   }
 81 | 
 82 |   def eqnoteq[A, T0 <: Comparable[T0], C0 <: Operators.Column[T0] & Operators.SupportsEqNotEq](
 83 |     column0: String => C0,
 84 |     value0: A => T0
 85 |   ): TypeTag.EqNotEq[A] =
 86 |     new TypeTag.EqNotEq[A] {
 87 | 
 88 |       override type T = T0
 89 | 
 90 |       override type C = C0
 91 | 
 92 |       override def column(path: String): C =
 93 |         column0(path)
 94 | 
 95 |       override def value(v: A): T =
 96 |         value0(v)
 97 | 
 98 |     }
 99 | 
100 |   def ltgt[A, T0 <: Comparable[T0], C0 <: Operators.Column[T0] & Operators.SupportsLtGt](
101 |     column0: String => C0,
102 |     value0: A => T0
103 |   ): TypeTag.LtGt[A] =
104 |     new TypeTag.LtGt[A] {
105 | 
106 |       override type T = T0
107 | 
108 |       override type C = C0
109 | 
110 |       override def column(path: String): C =
111 |         column0(path)
112 | 
113 |       override def value(v: A): T =
114 |         value0(v)
115 | 
116 |     }
117 | 
118 |   def enum0[A](casesMap: Map[A, String]): TypeTag.EqNotEq[A] =
119 |     eqnoteq[A, Binary, Operators.BinaryColumn](
120 |       FilterApi.binaryColumn,
121 |       v => Value.string(casesMap.getOrElse(v, throw FilterError(s"Failed to encode enum for value $v"))).value
122 |     )
123 | 
124 |   implicit val string: TypeTag.EqNotEq[String]                =
125 |     eqnoteq[String, Binary, Operators.BinaryColumn](
126 |       FilterApi.binaryColumn,
127 |       Value.string(_).value
128 |     )
129 |   implicit val boolean: TypeTag.EqNotEq[Boolean]              =
130 |     eqnoteq[Boolean, java.lang.Boolean, Operators.BooleanColumn](
131 |       FilterApi.booleanColumn,
132 |       Value.boolean(_).value
133 |     )
134 |   implicit val byte: TypeTag.LtGt[Byte]                       =
135 |     ltgt[Byte, java.lang.Integer, Operators.IntColumn](
136 |       FilterApi.intColumn,
137 |       Value.byte(_).value
138 |     )
139 |   implicit val short: TypeTag.LtGt[Short]                     =
140 |     ltgt[Short, java.lang.Integer, Operators.IntColumn](
141 |       FilterApi.intColumn,
142 |       Value.short(_).value
143 |     )
144 |   implicit val int: TypeTag.LtGt[Int]                         =
145 |     ltgt[Int, java.lang.Integer, Operators.IntColumn](
146 |       FilterApi.intColumn,
147 |       Value.int(_).value
148 |     )
149 |   implicit val long: TypeTag.LtGt[Long]                       =
150 |     ltgt[Long, java.lang.Long, Operators.LongColumn](
151 |       FilterApi.longColumn,
152 |       Value.long(_).value
153 |     )
154 |   implicit val float: TypeTag.LtGt[Float]                     =
155 |     ltgt[Float, java.lang.Float, Operators.FloatColumn](
156 |       FilterApi.floatColumn,
157 |       Value.float(_).value
158 |     )
159 |   implicit val double: TypeTag.LtGt[Double]                   =
160 |     ltgt[Double, java.lang.Double, Operators.DoubleColumn](
161 |       FilterApi.doubleColumn,
162 |       Value.double(_).value
163 |     )
164 |   implicit val binary: TypeTag.EqNotEq[Chunk[Byte]]           =
165 |     eqnoteq[Chunk[Byte], Binary, Operators.BinaryColumn](
166 |       FilterApi.binaryColumn,
167 |       Value.binary(_).value
168 |     )
169 |   implicit val char: TypeTag.EqNotEq[Char]                    =
170 |     eqnoteq[Char, java.lang.Integer, Operators.IntColumn](
171 |       FilterApi.intColumn,
172 |       Value.char(_).value
173 |     )
174 |   implicit val uuid: TypeTag.EqNotEq[UUID]                    =
175 |     eqnoteq[UUID, Binary, Operators.BinaryColumn](
176 |       FilterApi.binaryColumn,
177 |       Value.uuid(_).value
178 |     )
179 |   implicit val currency: TypeTag.EqNotEq[Currency]            =
180 |     eqnoteq[Currency, Binary, Operators.BinaryColumn](
181 |       FilterApi.binaryColumn,
182 |       Value.currency(_).value
183 |     )
184 |   implicit val bigDecimal: TypeTag.LtGt[java.math.BigDecimal] =
185 |     ltgt[java.math.BigDecimal, java.lang.Long, Operators.LongColumn](
186 |       FilterApi.longColumn,
187 |       Value.bigDecimal(_).value
188 |     )
189 |   implicit val bigInteger: TypeTag.LtGt[java.math.BigInteger] =
190 |     ltgt[java.math.BigInteger, Binary, Operators.BinaryColumn](
191 |       FilterApi.binaryColumn,
192 |       Value.bigInteger(_).value
193 |     )
194 |   implicit val dayOfWeek: TypeTag.LtGt[DayOfWeek]             =
195 |     ltgt[DayOfWeek, java.lang.Integer, Operators.IntColumn](
196 |       FilterApi.intColumn,
197 |       Value.dayOfWeek(_).value
198 |     )
199 |   implicit val month: TypeTag.LtGt[Month]                     =
200 |     ltgt[Month, java.lang.Integer, Operators.IntColumn](
201 |       FilterApi.intColumn,
202 |       Value.month(_).value
203 |     )
204 |   implicit val monthDay: TypeTag.LtGt[MonthDay]               =
205 |     ltgt[MonthDay, Binary, Operators.BinaryColumn](
206 |       FilterApi.binaryColumn,
207 |       Value.monthDay(_).value
208 |     )
209 |   implicit val period: TypeTag.LtGt[Period]                   =
210 |     ltgt[Period, Binary, Operators.BinaryColumn](
211 |       FilterApi.binaryColumn,
212 |       Value.period(_).value
213 |     )
214 |   implicit val year: TypeTag.LtGt[Year]                       =
215 |     ltgt[Year, java.lang.Integer, Operators.IntColumn](
216 |       FilterApi.intColumn,
217 |       Value.year(_).value
218 |     )
219 |   implicit val yearMonth: TypeTag.LtGt[YearMonth]             =
220 |     ltgt[YearMonth, Binary, Operators.BinaryColumn](
221 |       FilterApi.binaryColumn,
222 |       Value.yearMonth(_).value
223 |     )
224 |   // NOTE: it is not implicit to make scalac happy since ZoneOffset is a subtype of ZoneId
225 |   val zoneId: TypeTag.EqNotEq[ZoneId]                         =
226 |     eqnoteq[ZoneId, Binary, Operators.BinaryColumn](
227 |       FilterApi.binaryColumn,
228 |       Value.zoneId(_).value
229 |     )
230 |   implicit val zoneOffset: TypeTag.EqNotEq[ZoneOffset]        =
231 |     eqnoteq[ZoneOffset, Binary, Operators.BinaryColumn](
232 |       FilterApi.binaryColumn,
233 |       Value.zoneOffset(_).value
234 |     )
235 |   implicit val duration: TypeTag.LtGt[Duration]               =
236 |     ltgt[Duration, java.lang.Long, Operators.LongColumn](
237 |       FilterApi.longColumn,
238 |       Value.duration(_).value
239 |     )
240 |   implicit val instant: TypeTag.LtGt[Instant]                 =
241 |     ltgt[Instant, java.lang.Long, Operators.LongColumn](
242 |       FilterApi.longColumn,
243 |       Value.instant(_).value
244 |     )
245 |   implicit val localDate: TypeTag.LtGt[LocalDate]             =
246 |     ltgt[LocalDate, java.lang.Integer, Operators.IntColumn](
247 |       FilterApi.intColumn,
248 |       Value.localDate(_).value
249 |     )
250 |   implicit val localTime: TypeTag.LtGt[LocalTime]             =
251 |     ltgt[LocalTime, java.lang.Integer, Operators.IntColumn](
252 |       FilterApi.intColumn,
253 |       Value.localTime(_).value
254 |     )
255 |   implicit val localDateTime: TypeTag.LtGt[LocalDateTime]     =
256 |     ltgt[LocalDateTime, java.lang.Long, Operators.LongColumn](
257 |       FilterApi.longColumn,
258 |       Value.localDateTime(_).value
259 |     )
260 |   implicit val offsetTime: TypeTag.LtGt[OffsetTime]           =
261 |     ltgt[OffsetTime, java.lang.Integer, Operators.IntColumn](
262 |       FilterApi.intColumn,
263 |       Value.offsetTime(_).value
264 |     )
265 |   implicit val offsetDateTime: TypeTag.LtGt[OffsetDateTime]   =
266 |     ltgt[OffsetDateTime, java.lang.Long, Operators.LongColumn](
267 |       FilterApi.longColumn,
268 |       Value.offsetDateTime(_).value
269 |     )
270 |   implicit val zonedDateTime: TypeTag.LtGt[ZonedDateTime]     =
271 |     ltgt[ZonedDateTime, java.lang.Long, Operators.LongColumn](
272 |       FilterApi.longColumn,
273 |       Value.zonedDateTime(_).value
274 |     )
275 | 
276 | }
277 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueDecoderDeriver.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.codec
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Value.{ GroupValue, PrimitiveValue }
  4 | import me.mnedokushev.zio.apache.parquet.core.{ DECIMAL_SCALE, MICROS_FACTOR, MILLIS_PER_DAY, Value }
  5 | import zio._
  6 | import zio.schema._
  7 | 
  8 | import java.math.{ BigDecimal, BigInteger }
  9 | import java.nio.{ ByteBuffer, ByteOrder }
 10 | import java.time.{
 11 |   DayOfWeek,
 12 |   Instant,
 13 |   LocalDate,
 14 |   LocalDateTime,
 15 |   LocalTime,
 16 |   Month,
 17 |   MonthDay,
 18 |   OffsetDateTime,
 19 |   OffsetTime,
 20 |   Period,
 21 |   Year,
 22 |   YearMonth,
 23 |   ZoneId,
 24 |   ZoneOffset,
 25 |   ZonedDateTime
 26 | }
 27 | import java.util.{ Currency, UUID }
 28 | 
 29 | object ValueDecoderDeriver {
 30 | 
 31 |   val default: Deriver[ValueDecoder] = new Deriver[ValueDecoder] {
 32 | 
 33 |     override def deriveRecord[A](
 34 |       record: Schema.Record[A],
 35 |       fields: => Chunk[Deriver.WrappedF[ValueDecoder, ?]],
 36 |       summoned: => Option[ValueDecoder[A]]
 37 |     ): ValueDecoder[A] = new ValueDecoder[A] {
 38 |       override def decode(value: Value): A =
 39 |         value match {
 40 |           case GroupValue.RecordValue(values) =>
 41 |             Unsafe.unsafe { implicit unsafe =>
 42 |               record.construct(
 43 |                 Chunk
 44 |                   .fromIterable(record.fields.map(f => values(f.name)))
 45 |                   .zip(fields.map(_.unwrap))
 46 |                   .map { case (v, decoder) =>
 47 |                     decoder.decode(v)
 48 |                   }
 49 |               ) match {
 50 |                 case Right(v)     =>
 51 |                   v
 52 |                 case Left(reason) =>
 53 |                   throw DecoderError(s"Couldn't decode $value: $reason")
 54 |               }
 55 |             }
 56 | 
 57 |           case other =>
 58 |             throw DecoderError(s"Couldn't decode $other, it must be of type RecordValue")
 59 |         }
 60 | 
 61 |     }
 62 | 
 63 |     override def deriveEnum[A](
 64 |       `enum`: Schema.Enum[A],
 65 |       cases: => Chunk[Deriver.WrappedF[ValueDecoder, ?]],
 66 |       summoned: => Option[ValueDecoder[A]]
 67 |     ): ValueDecoder[A] = new ValueDecoder[A] {
 68 |       override def decode(value: Value): A = {
 69 |         val casesMap = `enum`.cases.map { case0 =>
 70 |           case0.id -> case0.schema.asInstanceOf[Schema.CaseClass0[A]].defaultConstruct()
 71 |         }.toMap
 72 | 
 73 |         derivePrimitive(StandardType.StringType, summoned = None).map { case0 =>
 74 |           casesMap.getOrElse(case0, throw DecoderError(s"Failed to decode enum for id $case0"))
 75 |         }.decode(value)
 76 |       }
 77 |     }
 78 | 
 79 |     override def derivePrimitive[A](
 80 |       st: StandardType[A],
 81 |       summoned: => Option[ValueDecoder[A]]
 82 |     ): ValueDecoder[A] = new ValueDecoder[A] {
 83 | 
 84 |       private def localTime(v: Int) =
 85 |         LocalTime.ofNanoOfDay(v * MICROS_FACTOR)
 86 | 
 87 |       private def localDateTime(v: Long) = {
 88 |         val epochDay  = v / MILLIS_PER_DAY
 89 |         val nanoOfDay = (v - (epochDay * MILLIS_PER_DAY)) * MICROS_FACTOR
 90 | 
 91 |         LocalDateTime.of(LocalDate.ofEpochDay(epochDay), LocalTime.ofNanoOfDay(nanoOfDay))
 92 |       }
 93 | 
 94 |       override def decode(value: Value): A =
 95 |         (st, value) match {
 96 |           case (StandardType.StringType, PrimitiveValue.BinaryValue(v))        =>
 97 |             v.toStringUsingUTF8
 98 |           case (StandardType.BoolType, PrimitiveValue.BooleanValue(v))         =>
 99 |             v
100 |           case (StandardType.ByteType, PrimitiveValue.Int32Value(v))           =>
101 |             v.toByte
102 |           case (StandardType.ShortType, PrimitiveValue.Int32Value(v))          =>
103 |             v.toShort
104 |           case (StandardType.IntType, PrimitiveValue.Int32Value(v))            =>
105 |             v
106 |           case (StandardType.LongType, PrimitiveValue.Int64Value(v))           =>
107 |             v
108 |           case (StandardType.FloatType, PrimitiveValue.FloatValue(v))          =>
109 |             v
110 |           case (StandardType.DoubleType, PrimitiveValue.DoubleValue(v))        =>
111 |             v
112 |           case (StandardType.BinaryType, PrimitiveValue.BinaryValue(v))        =>
113 |             Chunk.fromArray(v.getBytes)
114 |           case (StandardType.CharType, PrimitiveValue.Int32Value(v))           =>
115 |             v.toChar
116 |           case (StandardType.UUIDType, PrimitiveValue.BinaryValue(v))          =>
117 |             val bb = ByteBuffer.wrap(v.getBytes)
118 | 
119 |             new UUID(bb.getLong, bb.getLong)
120 |           case (StandardType.CurrencyType, PrimitiveValue.BinaryValue(v))      =>
121 |             Currency.getInstance(v.toStringUsingUTF8)
122 |           case (StandardType.BigDecimalType, PrimitiveValue.Int64Value(v))     =>
123 |             BigDecimal.valueOf(v, DECIMAL_SCALE)
124 |           case (StandardType.BigIntegerType, PrimitiveValue.BinaryValue(v))    =>
125 |             new BigInteger(v.getBytes)
126 |           case (StandardType.DayOfWeekType, PrimitiveValue.Int32Value(v))      =>
127 |             DayOfWeek.of(v)
128 |           case (StandardType.MonthType, PrimitiveValue.Int32Value(v))          =>
129 |             Month.of(v)
130 |           case (StandardType.MonthDayType, PrimitiveValue.BinaryValue(v))      =>
131 |             val bb = ByteBuffer.wrap(v.getBytes).order(ByteOrder.LITTLE_ENDIAN)
132 | 
133 |             MonthDay.of(bb.get.toInt, bb.get.toInt)
134 |           case (StandardType.PeriodType, PrimitiveValue.BinaryValue(v))        =>
135 |             val bb = ByteBuffer.wrap(v.getBytes).order(ByteOrder.LITTLE_ENDIAN)
136 | 
137 |             Period.of(bb.getInt, bb.getInt, bb.getInt)
138 |           case (StandardType.YearType, PrimitiveValue.Int32Value(v))           =>
139 |             Year.of(v)
140 |           case (StandardType.YearMonthType, PrimitiveValue.BinaryValue(v))     =>
141 |             val bb = ByteBuffer.wrap(v.getBytes).order(ByteOrder.LITTLE_ENDIAN)
142 | 
143 |             YearMonth.of(bb.getShort.toInt, bb.getShort.toInt)
144 |           case (StandardType.ZoneIdType, PrimitiveValue.BinaryValue(v))        =>
145 |             ZoneId.of(v.toStringUsingUTF8)
146 |           case (StandardType.ZoneOffsetType, PrimitiveValue.BinaryValue(v))    =>
147 |             ZoneOffset.of(v.toStringUsingUTF8)
148 |           case (StandardType.DurationType, PrimitiveValue.Int64Value(v))       =>
149 |             Duration.fromMillis(v)
150 |           case (StandardType.InstantType, PrimitiveValue.Int64Value(v))        =>
151 |             Instant.ofEpochMilli(v)
152 |           case (StandardType.LocalDateType, PrimitiveValue.Int32Value(v))      =>
153 |             LocalDate.ofEpochDay(v.toLong)
154 |           case (StandardType.LocalTimeType, PrimitiveValue.Int32Value(v))      =>
155 |             localTime(v)
156 |           case (StandardType.LocalDateTimeType, PrimitiveValue.Int64Value(v))  =>
157 |             localDateTime(v)
158 |           case (StandardType.OffsetTimeType, PrimitiveValue.Int32Value(v))     =>
159 |             OffsetTime.of(localTime(v), ZoneOffset.UTC)
160 |           case (StandardType.OffsetDateTimeType, PrimitiveValue.Int64Value(v)) =>
161 |             OffsetDateTime.of(localDateTime(v), ZoneOffset.UTC)
162 |           case (StandardType.ZonedDateTimeType, PrimitiveValue.Int64Value(v))  =>
163 |             ZonedDateTime.of(localDateTime(v), ZoneId.of("Z"))
164 |           case (other, _)                                                      =>
165 |             throw DecoderError(s"Unsupported ZIO Schema StandartType $other")
166 |         }
167 |     }
168 | 
169 |     override def deriveOption[A](
170 |       option: Schema.Optional[A],
171 |       inner: => ValueDecoder[A],
172 |       summoned: => Option[ValueDecoder[Option[A]]]
173 |     ): ValueDecoder[Option[A]] = new ValueDecoder[Option[A]] {
174 |       override def decode(value: Value): Option[A] =
175 |         value match {
176 |           case Value.NullValue =>
177 |             None
178 |           case _               =>
179 |             Some(inner.decode(value))
180 |         }
181 | 
182 |     }
183 | 
184 |     override def deriveSequence[C[_], A](
185 |       sequence: Schema.Sequence[C[A], A, ?],
186 |       inner: => ValueDecoder[A],
187 |       summoned: => Option[ValueDecoder[C[A]]]
188 |     ): ValueDecoder[C[A]] = new ValueDecoder[C[A]] {
189 |       override def decode(value: Value): C[A] =
190 |         value match {
191 |           case GroupValue.ListValue(values) =>
192 |             sequence.fromChunk(values.map(inner.decode))
193 |           case other                        =>
194 |             throw DecoderError(s"Couldn't decode $other, it must be of type ListValue")
195 |         }
196 |     }
197 | 
198 |     override def deriveMap[K, V](
199 |       map: Schema.Map[K, V],
200 |       key: => ValueDecoder[K],
201 |       value: => ValueDecoder[V],
202 |       summoned: => Option[ValueDecoder[Map[K, V]]]
203 |     ): ValueDecoder[Map[K, V]] = new ValueDecoder[Map[K, V]] {
204 |       override def decode(value0: Value): Map[K, V] =
205 |         value0 match {
206 |           case GroupValue.MapValue(values) =>
207 |             values.map { case (k, v) =>
208 |               key.decode(k) -> value.decode(v)
209 |             }
210 |           case other                       =>
211 |             throw DecoderError(s"Couldn't decode $other, it must be of type MapValue")
212 |         }
213 |     }
214 | 
215 |     override def deriveTransformedRecord[A, B](
216 |       record: Schema.Record[A],
217 |       transform: Schema.Transform[A, B, ?],
218 |       fields: => Chunk[Deriver.WrappedF[ValueDecoder, ?]],
219 |       summoned: => Option[ValueDecoder[B]]
220 |     ): ValueDecoder[B] = summoned.getOrElse {
221 |       new ValueDecoder[B] {
222 |         override def decode(value: Value): B =
223 |           value match {
224 |             case GroupValue.RecordValue(values) =>
225 |               Unsafe.unsafe { implicit unsafe =>
226 |                 record
227 |                   .construct(
228 |                     Chunk
229 |                       .fromIterable(record.fields.map(f => values(f.name)))
230 |                       .zip(fields.map(_.unwrap))
231 |                       .map { case (v, decoder) =>
232 |                         decoder.decode(v)
233 |                       }
234 |                   )
235 |                   .flatMap(transform.f) match {
236 |                   case Right(v)     => v
237 |                   case Left(reason) =>
238 |                     throw DecoderError(s"Couldn't decode $value: $reason")
239 |                 }
240 |               }
241 | 
242 |             case other =>
243 |               throw DecoderError(s"Couldn't decode $other, it must be of type RecordValue")
244 |           }
245 |       }
246 |     }
247 |   }.cached
248 | 
249 |   def summoned: Deriver[ValueDecoder] =
250 |     default.autoAcceptSummoned
251 | 
252 | }
253 | 


--------------------------------------------------------------------------------
/modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/Value.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core
  2 | 
  3 | import org.apache.parquet.io.api.{ Binary, RecordConsumer }
  4 | import org.apache.parquet.schema.Type
  5 | import zio.Chunk
  6 | 
  7 | import java.math.{ BigDecimal, BigInteger }
  8 | import java.nio.{ ByteBuffer, ByteOrder }
  9 | import java.time.{
 10 |   DayOfWeek,
 11 |   Duration,
 12 |   Instant,
 13 |   LocalDate,
 14 |   LocalDateTime,
 15 |   LocalTime,
 16 |   Month,
 17 |   MonthDay,
 18 |   OffsetDateTime,
 19 |   OffsetTime,
 20 |   Period,
 21 |   Year,
 22 |   YearMonth,
 23 |   ZoneId,
 24 |   ZoneOffset,
 25 |   ZonedDateTime
 26 | }
 27 | import java.util.{ Currency, UUID }
 28 | 
 29 | sealed trait Value {
 30 |   def write(schema: Type, recordConsumer: RecordConsumer): Unit
 31 | }
 32 | 
 33 | object Value {
 34 | 
 35 |   case object NullValue extends Value {
 36 |     override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 37 |       throw new UnsupportedOperationException(s"NullValue cannot be written")
 38 |   }
 39 | 
 40 |   sealed trait PrimitiveValue[A] extends Value {
 41 |     def value: A
 42 |   }
 43 | 
 44 |   object PrimitiveValue {
 45 | 
 46 |     case class BooleanValue(value: Boolean) extends PrimitiveValue[Boolean] {
 47 | 
 48 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 49 |         recordConsumer.addBoolean(value)
 50 | 
 51 |     }
 52 | 
 53 |     case class Int32Value(value: Int) extends PrimitiveValue[Int] {
 54 | 
 55 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 56 |         recordConsumer.addInteger(value)
 57 | 
 58 |     }
 59 | 
 60 |     case class Int64Value(value: Long) extends PrimitiveValue[Long] {
 61 | 
 62 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 63 |         recordConsumer.addLong(value)
 64 | 
 65 |     }
 66 | 
 67 |     case class FloatValue(value: Float) extends PrimitiveValue[Float] {
 68 | 
 69 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 70 |         recordConsumer.addFloat(value)
 71 | 
 72 |     }
 73 | 
 74 |     case class DoubleValue(value: Double) extends PrimitiveValue[Double] {
 75 | 
 76 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 77 |         recordConsumer.addDouble(value)
 78 | 
 79 |     }
 80 | 
 81 |     case class BinaryValue(value: Binary) extends PrimitiveValue[Binary] {
 82 | 
 83 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit =
 84 |         recordConsumer.addBinary(value)
 85 | 
 86 |     }
 87 | 
 88 |   }
 89 | 
 90 |   sealed trait GroupValue[Self <: GroupValue[Self]] extends Value {
 91 | 
 92 |     def put(name: String, value: Value): Self
 93 | 
 94 |   }
 95 | 
 96 |   object GroupValue {
 97 | 
 98 |     case class RecordValue(values: Map[String, Value]) extends GroupValue[RecordValue] {
 99 | 
100 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit = {
101 |         val groupSchema = schema.asGroupType()
102 | 
103 |         recordConsumer.startGroup()
104 | 
105 |         values.foreach { case (name, value) =>
106 |           val fieldIndex = groupSchema.getFieldIndex(name)
107 |           val fieldType  = groupSchema.getType(name)
108 | 
109 |           recordConsumer.startField(name, fieldIndex)
110 |           value.write(fieldType, recordConsumer)
111 |           recordConsumer.endField(name, fieldIndex)
112 |         }
113 | 
114 |         recordConsumer.endGroup()
115 |       }
116 | 
117 |       override def put(name: String, value: Value): RecordValue =
118 |         if (values.contains(name))
119 |           this.copy(values.updated(name, value))
120 |         else
121 |           throw new IllegalArgumentException(s"Record doesn't contain field $name")
122 | 
123 |     }
124 | 
125 |     case class ListValue(values: Chunk[Value]) extends GroupValue[ListValue] {
126 | 
127 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit = {
128 |         recordConsumer.startGroup()
129 | 
130 |         if (values.nonEmpty) {
131 |           val groupSchema   = schema.asGroupType()
132 |           val listSchema    = groupSchema.getFields.get(0).asGroupType()
133 |           val listFieldName = listSchema.getName
134 |           val elementName   = listSchema.getFields.get(0).getName // TODO: validate, must be "element"
135 |           val listIndex     = groupSchema.getFieldIndex(listFieldName)
136 | 
137 |           recordConsumer.startField(listFieldName, listIndex)
138 | 
139 |           values.foreach { value =>
140 |             RecordValue(Map(elementName -> value)).write(listSchema, recordConsumer)
141 |           }
142 | 
143 |           recordConsumer.endField(listFieldName, listIndex)
144 |         }
145 | 
146 |         recordConsumer.endGroup()
147 |       }
148 | 
149 |       override def put(name: String, value: Value): ListValue =
150 |         this.copy(values = values :+ value)
151 | 
152 |     }
153 | 
154 |     case class MapValue(values: Map[Value, Value]) extends GroupValue[MapValue] {
155 | 
156 |       override def write(schema: Type, recordConsumer: RecordConsumer): Unit = {
157 |         recordConsumer.startGroup()
158 | 
159 |         if (values.nonEmpty) {
160 |           val groupSchema  = schema.asGroupType()
161 |           val mapSchema    = groupSchema.getFields.get(0).asGroupType()
162 |           val mapFieldName = mapSchema.getName
163 |           val mapIndex     = groupSchema.getFieldIndex(mapFieldName)
164 | 
165 |           recordConsumer.startField(mapFieldName, mapIndex)
166 | 
167 |           values.foreach { case (key, value) =>
168 |             RecordValue(Map("key" -> key, "value" -> value)).write(mapSchema, recordConsumer)
169 |           }
170 | 
171 |           recordConsumer.endField(mapFieldName, mapIndex)
172 |         }
173 | 
174 |         recordConsumer.endGroup()
175 |       }
176 | 
177 |       override def put(name: String, value: Value): MapValue =
178 |         value match {
179 |           case RecordValue(values0) =>
180 |             (values0.get("key"), values0.get("value")) match {
181 |               case (Some(k), Some(v)) =>
182 |                 this.copy(values = values.updated(k, v))
183 |               case _                  => this
184 |             }
185 |           case mv: MapValue         => mv
186 |           case _                    => this
187 |         }
188 |     }
189 | 
190 |   }
191 | 
192 |   def nil =
193 |     NullValue
194 | 
195 |   def string(v: String) =
196 |     PrimitiveValue.BinaryValue(Binary.fromString(v))
197 | 
198 |   def boolean(v: Boolean) =
199 |     PrimitiveValue.BooleanValue(v)
200 | 
201 |   def byte(v: Byte) =
202 |     int(v.toInt)
203 | 
204 |   def short(v: Short) =
205 |     int(v.toInt)
206 | 
207 |   def int(v: Int) =
208 |     PrimitiveValue.Int32Value(v)
209 | 
210 |   def long(v: Long) =
211 |     PrimitiveValue.Int64Value(v)
212 | 
213 |   def float(v: Float) =
214 |     PrimitiveValue.FloatValue(v)
215 | 
216 |   def double(v: Double) =
217 |     PrimitiveValue.DoubleValue(v)
218 | 
219 |   def binary(v: Chunk[Byte]) =
220 |     PrimitiveValue.BinaryValue(Binary.fromConstantByteArray(v.toArray))
221 | 
222 |   def char(v: Char) =
223 |     int(v.toInt)
224 | 
225 |   def uuid(v: UUID) = {
226 |     val bb = ByteBuffer.wrap(Array.ofDim(16))
227 | 
228 |     bb.putLong(v.getMostSignificantBits)
229 |     bb.putLong(v.getLeastSignificantBits)
230 | 
231 |     PrimitiveValue.BinaryValue(Binary.fromConstantByteArray(bb.array()))
232 |   }
233 | 
234 |   def currency(v: Currency) =
235 |     string(v.getCurrencyCode)
236 | 
237 |   def bigDecimal(v: BigDecimal) =
238 |     long(v.unscaledValue.longValue)
239 | 
240 |   def bigInteger(v: BigInteger) =
241 |     PrimitiveValue.BinaryValue(Binary.fromConstantByteArray(v.toByteArray))
242 | 
243 |   def dayOfWeek(v: DayOfWeek) =
244 |     byte(v.getValue.toByte)
245 | 
246 |   def month(v: Month) =
247 |     byte(v.getValue.toByte)
248 | 
249 |   def monthDay(v: MonthDay) = {
250 |     val bb = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN)
251 | 
252 |     bb.put(v.getMonthValue.toByte)
253 |     bb.put(v.getDayOfMonth.toByte)
254 | 
255 |     PrimitiveValue.BinaryValue(Binary.fromReusedByteArray(bb.array()))
256 |   }
257 | 
258 |   def period(v: Period) = {
259 |     val bb = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN)
260 | 
261 |     bb.putInt(v.getYears)
262 |     bb.putInt(v.getMonths)
263 |     bb.putInt(v.getDays)
264 | 
265 |     PrimitiveValue.BinaryValue(Binary.fromReusedByteArray(bb.array()))
266 |   }
267 | 
268 |   def year(v: Year) =
269 |     short(v.getValue.toShort)
270 | 
271 |   def yearMonth(v: YearMonth) = {
272 |     val bb = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
273 | 
274 |     bb.putShort(v.getYear.toShort)
275 |     bb.putShort(v.getMonthValue.toShort)
276 | 
277 |     PrimitiveValue.BinaryValue(Binary.fromReusedByteArray(bb.array()))
278 |   }
279 | 
280 |   def zoneId(v: ZoneId) =
281 |     string(v.getId)
282 | 
283 |   def zoneOffset(v: ZoneOffset) =
284 |     string(v.getId)
285 | 
286 |   def duration(v: Duration) =
287 |     long(v.toMillis)
288 | 
289 |   def instant(v: Instant) =
290 |     long(v.toEpochMilli)
291 | 
292 |   def localDate(v: LocalDate) =
293 |     int(v.toEpochDay.toInt)
294 | 
295 |   def localTime(v: LocalTime) =
296 |     int((v.toNanoOfDay / MICROS_FACTOR).toInt)
297 | 
298 |   def localDateTime(v: LocalDateTime) = {
299 |     val dateMillis  = v.toLocalDate.toEpochDay * MILLIS_PER_DAY
300 |     val timeMillis  = v.toLocalTime.toNanoOfDay / MICROS_FACTOR
301 |     val epochMillis = dateMillis + timeMillis
302 | 
303 |     long(epochMillis)
304 |   }
305 | 
306 |   def offsetTime(v: OffsetTime) = {
307 |     val timeMillis       = v.toLocalTime.toNanoOfDay / MICROS_FACTOR
308 |     val offsetMillis     = v.getOffset.getTotalSeconds * MILLIS_FACTOR
309 |     val timeOffsetMillis = timeMillis - offsetMillis
310 |     val dayMillis        = if (timeOffsetMillis < 0) MILLIS_PER_DAY - timeOffsetMillis else timeOffsetMillis
311 | 
312 |     int(dayMillis.toInt)
313 |   }
314 | 
315 |   def offsetDateTime(v: OffsetDateTime) = {
316 |     val dateMillis       = v.toLocalDate.toEpochDay * MILLIS_PER_DAY
317 |     val timeMillis       = v.toLocalTime.toNanoOfDay / MICROS_FACTOR
318 |     val offsetMillis     = v.getOffset.getTotalSeconds * MILLIS_FACTOR
319 |     val timeOffsetMillis = timeMillis - offsetMillis
320 |     val dayMillis        = if (timeOffsetMillis < 0) MILLIS_PER_DAY - timeOffsetMillis else timeOffsetMillis
321 |     val epochMillis      = dateMillis + dayMillis
322 | 
323 |     long(epochMillis)
324 |   }
325 | 
326 |   def zonedDateTime(v: ZonedDateTime) = {
327 |     val dateMillis       = v.toLocalDate.toEpochDay * MILLIS_PER_DAY
328 |     val timeMillis       = v.toLocalTime.toNanoOfDay / MICROS_FACTOR
329 |     val offsetMillis     = v.getOffset.getTotalSeconds * MILLIS_FACTOR
330 |     val timeOffsetMillis = timeMillis - offsetMillis
331 |     val dayMillis        = if (timeOffsetMillis < 0) MILLIS_PER_DAY - timeOffsetMillis else timeOffsetMillis
332 |     val epochMillis      = dateMillis + dayMillis
333 | 
334 |     long(epochMillis)
335 |   }
336 | 
337 |   def record(r: Map[String, Value]) =
338 |     GroupValue.RecordValue(r)
339 | 
340 |   def list(vs: Chunk[Value]) =
341 |     GroupValue.ListValue(vs)
342 | 
343 |   def map(kvs: Map[Value, Value]) =
344 |     GroupValue.MapValue(kvs)
345 | }
346 | 


--------------------------------------------------------------------------------
/modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriverSpec.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.codec
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Schemas.PrimitiveDef
  4 | import me.mnedokushev.zio.apache.parquet.core.{ Fixtures, Schemas }
  5 | import zio._
  6 | import zio.schema._
  7 | import zio.test._
  8 | 
  9 | import java.util.UUID
 10 | //import scala.annotation.nowarn
 11 | 
 12 | object SchemaEncoderDeriverSpec extends ZIOSpecDefault {
 13 | 
 14 |   sealed trait MyEnum
 15 |   object MyEnum {
 16 |     case object Started    extends MyEnum
 17 |     case object InProgress extends MyEnum
 18 |     case object Done       extends MyEnum
 19 | 
 20 |     implicit val schema: Schema[MyEnum] = DeriveSchema.gen[MyEnum]
 21 |   }
 22 | 
 23 |   case class Record(a: Int, b: Option[String])
 24 |   object Record {
 25 |     implicit val schema: Schema[Record] = DeriveSchema.gen[Record]
 26 |   }
 27 | 
 28 |   // Helper for being able to extract type parameter A from a given schema in order to cast the type of encoder<
 29 |   private def encode[A](encoder: SchemaEncoder[?], schema: Schema[A], name: String, optional: Boolean) =
 30 |     encoder.asInstanceOf[SchemaEncoder[A]].encode(schema, name, optional)
 31 | 
 32 |   override def spec: Spec[TestEnvironment & Scope, Any] =
 33 |     suite("SchemaEncoderDeriverSpec")(
 34 |       test("primitive") {
 35 |         def named(defs: List[PrimitiveDef], names: List[String]) =
 36 |           defs.zip(names).map { case (schemaDef, name) =>
 37 |             schemaDef.named(name)
 38 |           }
 39 | 
 40 |         val encoders: List[SchemaEncoder[?]] =
 41 |           List(
 42 |             Derive.derive[SchemaEncoder, String](SchemaEncoderDeriver.default),
 43 |             Derive.derive[SchemaEncoder, Boolean](SchemaEncoderDeriver.default),
 44 |             Derive.derive[SchemaEncoder, Byte](SchemaEncoderDeriver.default),
 45 |             Derive.derive[SchemaEncoder, Short](SchemaEncoderDeriver.default),
 46 |             Derive.derive[SchemaEncoder, Int](SchemaEncoderDeriver.default),
 47 |             Derive.derive[SchemaEncoder, Long](SchemaEncoderDeriver.default),
 48 |             Derive.derive[SchemaEncoder, UUID](SchemaEncoderDeriver.default)
 49 |           )
 50 |         val schemas: List[Schema[?]]         =
 51 |           List(
 52 |             Schema.primitive[String],
 53 |             Schema.primitive[Boolean],
 54 |             Schema.primitive[Byte],
 55 |             Schema.primitive[Short],
 56 |             Schema.primitive[Int],
 57 |             Schema.primitive[Long],
 58 |             Schema.primitive[UUID]
 59 |           )
 60 |         val names                            =
 61 |           List(
 62 |             "string",
 63 |             "boolean",
 64 |             "byte",
 65 |             "short",
 66 |             "int",
 67 |             "long",
 68 |             "uuid"
 69 |           )
 70 |         val schemaDefs                       = List(
 71 |           Schemas.string,
 72 |           Schemas.boolean,
 73 |           Schemas.byte,
 74 |           Schemas.short,
 75 |           Schemas.int,
 76 |           Schemas.long,
 77 |           Schemas.uuid
 78 |         )
 79 |         val optionalDefs                     =
 80 |           schemaDefs.map(_.optional)
 81 |         val requiredDefs                     =
 82 |           schemaDefs.map(_.required)
 83 | 
 84 |         val expectedOptional = named(optionalDefs, names)
 85 |         val expectedRequired = named(requiredDefs, names)
 86 | 
 87 |         encoders
 88 |           .zip(schemas)
 89 |           .zip(names)
 90 |           .zip(expectedOptional)
 91 |           .zip(expectedRequired)
 92 |           .map { case ((((encoder, schema), name), expOptional), expRequired) =>
 93 |             val tpeOptional = encode(encoder, schema, name, optional = true)
 94 |             val tpeRequired = encode(encoder, schema, name, optional = false)
 95 | 
 96 |             assertTrue(tpeOptional == expOptional, tpeRequired == expRequired)
 97 |           }
 98 |           .reduce(_ && _)
 99 |       },
100 |       test("record") {
101 |         val name        = "record"
102 |         val encoder     = Derive.derive[SchemaEncoder, Record](SchemaEncoderDeriver.default)
103 |         val tpeOptional = encoder.encode(Record.schema, name, optional = true)
104 |         val tpeRequired = encoder.encode(Record.schema, name, optional = false)
105 |         val schemaDef   = Schemas.record(
106 |           Chunk(
107 |             Schemas.int.required.named("a"),
108 |             Schemas.string.optional.named("b")
109 |           )
110 |         )
111 | 
112 |         assertTrue(
113 |           tpeOptional == schemaDef.optional.named(name),
114 |           tpeRequired == schemaDef.required.named(name)
115 |         )
116 |       },
117 |       test("record arity > 22") {
118 |         val name        = "arity"
119 |         val encoder     = Derive.derive[SchemaEncoder, Fixtures.Arity23](SchemaEncoderDeriver.default)
120 |         val tpeOptional = encoder.encode(Fixtures.Arity23.schema, name, optional = true)
121 |         val tpeRequired = encoder.encode(Fixtures.Arity23.schema, name, optional = false)
122 |         val schemaDef   = Schemas.record(
123 |           Chunk(
124 |             Schemas.int.required.named("a"),
125 |             Schemas.string.optional.named("b"),
126 |             Schemas.int.required.named("c"),
127 |             Schemas.int.required.named("d"),
128 |             Schemas.int.required.named("e"),
129 |             Schemas.int.required.named("f"),
130 |             Schemas.int.required.named("g"),
131 |             Schemas.int.required.named("h"),
132 |             Schemas.int.required.named("i"),
133 |             Schemas.int.required.named("j"),
134 |             Schemas.int.required.named("k"),
135 |             Schemas.int.required.named("l"),
136 |             Schemas.int.required.named("m"),
137 |             Schemas.int.required.named("n"),
138 |             Schemas.int.required.named("o"),
139 |             Schemas.int.required.named("p"),
140 |             Schemas.int.required.named("q"),
141 |             Schemas.int.required.named("r"),
142 |             Schemas.int.required.named("s"),
143 |             Schemas.int.required.named("t"),
144 |             Schemas.int.required.named("u"),
145 |             Schemas.int.required.named("v"),
146 |             Schemas.int.required.named("w")
147 |           )
148 |         )
149 | 
150 |         assertTrue(
151 |           tpeOptional == schemaDef.optional.named(name),
152 |           tpeRequired == schemaDef.required.named(name)
153 |         )
154 |       },
155 |       test("sequence") {
156 |         val name                             = "mylist"
157 |         val encoders: List[SchemaEncoder[?]] =
158 |           List(
159 |             Derive.derive[SchemaEncoder, List[String]](SchemaEncoderDeriver.default),
160 |             Derive.derive[SchemaEncoder, List[Boolean]](SchemaEncoderDeriver.default),
161 |             Derive.derive[SchemaEncoder, List[Byte]](SchemaEncoderDeriver.default),
162 |             Derive.derive[SchemaEncoder, List[Short]](SchemaEncoderDeriver.default),
163 |             Derive.derive[SchemaEncoder, List[Int]](SchemaEncoderDeriver.default),
164 |             Derive.derive[SchemaEncoder, List[Long]](SchemaEncoderDeriver.default),
165 |             Derive.derive[SchemaEncoder, List[UUID]](SchemaEncoderDeriver.default),
166 |             Derive.derive[SchemaEncoder, List[Option[String]]](SchemaEncoderDeriver.default),
167 |             Derive.derive[SchemaEncoder, List[Option[Boolean]]](SchemaEncoderDeriver.default),
168 |             Derive.derive[SchemaEncoder, List[Option[Byte]]](SchemaEncoderDeriver.default),
169 |             Derive.derive[SchemaEncoder, List[Option[Short]]](SchemaEncoderDeriver.default),
170 |             Derive.derive[SchemaEncoder, List[Option[Int]]](SchemaEncoderDeriver.default),
171 |             Derive.derive[SchemaEncoder, List[Option[Long]]](SchemaEncoderDeriver.default),
172 |             Derive.derive[SchemaEncoder, List[Option[UUID]]](SchemaEncoderDeriver.default)
173 |           )
174 |         val schemas: List[Schema[?]]         =
175 |           List(
176 |             Schema.list[String],
177 |             Schema.list[Int],
178 |             Schema.list[Option[String]],
179 |             Schema.list[Option[Int]]
180 |           )
181 |         val elements                         =
182 |           List(
183 |             Schemas.string,
184 |             Schemas.boolean,
185 |             Schemas.byte,
186 |             Schemas.short,
187 |             Schemas.int,
188 |             Schemas.long,
189 |             Schemas.uuid
190 |           )
191 |         val schemaDefs                       =
192 |           (elements.map(_.required) ++ elements.map(_.optional))
193 |             .map(_.named("element"))
194 |             .map(Schemas.list)
195 |         val expectedOptional                 =
196 |           schemaDefs.map(_.optional.named(name))
197 |         val expectedRequired                 =
198 |           schemaDefs.map(_.required.named(name))
199 | 
200 |         encoders
201 |           .zip(schemas)
202 |           .zip(expectedOptional)
203 |           .zip(expectedRequired)
204 |           .map { case (((encoder, schema), expOptional), expRequired) =>
205 |             val tpeOptional = encode(encoder, schema, name, optional = true)
206 |             val tpeRequired = encode(encoder, schema, name, optional = false)
207 | 
208 |             assertTrue(
209 |               tpeOptional == expOptional,
210 |               tpeRequired == expRequired
211 |             )
212 |           }
213 |           .reduce(_ && _)
214 |       },
215 |       test("map") {
216 |         val name    = "mymap"
217 |         val encoder = Derive.derive[SchemaEncoder, Map[String, Int]](SchemaEncoderDeriver.default)
218 |         val tpe     = encoder.encode(Schema.map[String, Int], name, optional = true)
219 | 
220 |         assertTrue(
221 |           tpe == Schemas
222 |             .map(Schemas.string.required.named("key"), Schemas.int.required.named("value"))
223 |             .optional
224 |             .named(name)
225 |         )
226 |       },
227 |       test("enum") {
228 |         val name    = "myenum"
229 |         val encoder = Derive.derive[SchemaEncoder, MyEnum](SchemaEncoderDeriver.default)
230 |         val tpe     = encoder.encode(Schema[MyEnum], name, optional = true)
231 | 
232 |         assertTrue(tpe == Schemas.enum0.optional.named(name))
233 |       }
234 | //      test("summoned") {
235 |       //        // @nowarn annotation is needed to avoid having 'variable is not used' compiler error
236 |       //        @nowarn
237 |       //        implicit val intEncoder: SchemaEncoder[Int] = new SchemaEncoder[Int] {
238 |       //          override def encode(schema: Schema[Int], name: String, optional: Boolean): Type =
239 |       //            Schemas.uuid.optionality(optional).named(name)
240 |       //        }
241 |       //
242 |       //        val name    = "myrecord"
243 |       //        val encoder = Derive.derive[SchemaEncoder, Record](SchemaEncoderDeriver.summoned)
244 |       //        val tpe     = encoder.encode(Record.schema, name, optional = true)
245 |       //
246 |       //        assertTrue(
247 |       //          tpe == Schemas
248 |       //            .record(Chunk(Schemas.uuid.required.named("a"), Schemas.string.optional.named("b")))
249 |       //            .optional
250 |       //            .named(name)
251 |       //        )
252 |       //      }
253 |     )
254 | 
255 | }
256 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![](docs/logo.png)
  2 | 
  3 | ![Build status](https://github.com/grouzen/zio-apache-parquet/actions/workflows/ci.yml/badge.svg)
  4 | ![Maven Central](https://img.shields.io/maven-central/v/me.mnedokushev/zio-apache-parquet-core_2.13.svg?label=Maven%20central)
  5 | [![Scala Steward badge](https://img.shields.io/badge/Scala_Steward-helping-blue.svg?style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA4AAAAQCAMAAAARSr4IAAAAVFBMVEUAAACHjojlOy5NWlrKzcYRKjGFjIbp293YycuLa3pYY2LSqql4f3pCUFTgSjNodYRmcXUsPD/NTTbjRS+2jomhgnzNc223cGvZS0HaSD0XLjbaSjElhIr+AAAAAXRSTlMAQObYZgAAAHlJREFUCNdNyosOwyAIhWHAQS1Vt7a77/3fcxxdmv0xwmckutAR1nkm4ggbyEcg/wWmlGLDAA3oL50xi6fk5ffZ3E2E3QfZDCcCN2YtbEWZt+Drc6u6rlqv7Uk0LdKqqr5rk2UCRXOk0vmQKGfc94nOJyQjouF9H/wCc9gECEYfONoAAAAASUVORK5CYII=)](https://scala-steward.org)
  6 | 
  7 | # ZIO Apache Parquet
  8 | 
  9 | A ZIO-powered wrapper for [Apache Parquet's Java implementation](https://github.com/apache/parquet-mr), leveraging [ZIO Schema](https://zio.dev/zio-schema/) to automatically derive codecs and provide type-safe filter predicates. Operate your parquet files easily using a top-notch ZIO-powered ecosystem without running a Spark cluster.
 10 | 
 11 | Ready for more? Check out my other game-changing library that makes working with Apache Arrow format a breeze - [ZIO Apache Arrow](https://github.com/grouzen/zio-apache-arrow).
 12 | 
 13 | ## Why?
 14 | 
 15 | - **No Spark required** - you don't need to run a Spark cluster to read/write Parquet files.
 16 | - **ZIO native** - utilizes various ZIO features to offer a FP-oriented way of working with the Parquet API.
 17 | - **ZIO Schema** - the backbone that powers all the cool features of this library such as type-safe filter predicates and codecs derivation.
 18 | 
 19 | 
 20 | ## Contents
 21 | 
 22 | - [Installation](#installation)
 23 | - [Usage](#usage)
 24 |   - [Codecs](#codecs)
 25 |     - [Schema](#schema)
 26 |     - [Value](#value)
 27 |   - [Reading & Writing files](#reading--writing-files)
 28 |     - [Filtering](#filtering)
 29 | - [Resources](#resources)
 30 | 
 31 | ## Installation
 32 | 
 33 | ```scala
 34 | libraryDependencies += "me.mnedokushev" %% "zio-apache-parquet-core" % "@VERSION@"
 35 | ```
 36 | 
 37 | ## Usage
 38 | 
 39 | All examples are self-contained [Scala CLI](https://scala-cli.virtuslab.org) snippets. You can find copies of them in `docs/scala-cli`.
 40 | 
 41 | ### Codecs
 42 | 
 43 | To be able to write/read data to/from parquet files you need to define the following schema and value codecs 
 44 | `SchemaEncoder`, `ValueEncoder`, and `ValueDecoder` for your case classes.
 45 | 
 46 | #### Schema
 47 | 
 48 | You can get Java SDK's `Type` by using `SchemaEncoder` generated by `SchemaEncoderDeriver.default` ZIO Schema deriver:
 49 | 
 50 | ```scala
 51 | //> using scala "3.7.1"
 52 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1
 53 | 
 54 | import zio.schema.*
 55 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 56 | 
 57 | object Schema extends App:
 58 | 
 59 |   case class MyRecord(a: Int, b: String, c: Option[Long])
 60 | 
 61 |   object MyRecord:
 62 |     given schema: Schema[MyRecord]               =
 63 |       DeriveSchema.gen[MyRecord]
 64 |     given schemaEncoder: SchemaEncoder[MyRecord] =
 65 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default)
 66 | 
 67 |   val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false)
 68 | 
 69 |   println(parquetSchema)
 70 |   // Outputs:
 71 |   // required group my_record {
 72 |   //   required int32 a (INTEGER(32,true));
 73 |   //   required binary b (STRING);
 74 |   //   optional int64 c (INTEGER(64,true));
 75 |   // }
 76 | ```
 77 | 
 78 | Alternatively, you can customize the schemas of [primitive](https://zio.dev/zio-schema/standard-type-reference) fields within your record by defining a custom `SchemaEncoder` 
 79 | and using the `SchemaEncoderDeriver.summoned` deriver.
 80 | 
 81 | ```scala
 82 | //> using scala "3.7.1"
 83 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1
 84 | 
 85 | import me.mnedokushev.zio.apache.parquet.core.Schemas
 86 | import zio.schema.*
 87 | import me.mnedokushev.zio.apache.parquet.core.codec.*
 88 | 
 89 | object SchemaSummoned extends App:
 90 | 
 91 |   case class MyRecord(a: Int, b: String, c: Option[Long])
 92 | 
 93 |   object MyRecord:
 94 |     given schema: Schema[MyRecord] =
 95 |       DeriveSchema.gen[MyRecord]
 96 |     // The custom encoder must be defined before the definition for your record type.
 97 |     given SchemaEncoder[Int] with {
 98 |       override def encode(schema: Schema[Int], name: String, optional: Boolean) =
 99 |         Schemas.uuid.optionality(optional).named(name)
100 |     }
101 |     given schemaEncoder: SchemaEncoder[MyRecord] =
102 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.summoned)
103 | 
104 |   val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false)
105 | 
106 |   println(parquetSchema)
107 |   // Outputs:
108 |   // required group my_record {
109 |   //   required fixed_len_byte_array(16) a (UUID);
110 |   //   required binary b (STRING);
111 |   //   optional int64 c (INTEGER(64,true));
112 |   // }
113 | ```
114 | 
115 | The case classes with arity bigger than 22 are supported too. Check out the [SchemaArity23.scala ScalaCLI example](docs/scala-cli/SchemaArity23.scala)!
116 | 
117 | #### Value
118 | 
119 | `Value` is a sealed hierarchy of types for interop between Scala values and Parquet readers/writers.
120 | For converting Scala values into `Value` and back we need to define instances of `ValueEncoder` and `ValueDecoder`
121 | type classes. This could be done by using `ValueDecoderDeriver.default` ZIO Schema deriver.
122 | 
123 | ```scala
124 | //> using scala "3.7.1"
125 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1
126 | 
127 | import zio.schema.*
128 | import me.mnedokushev.zio.apache.parquet.core.codec.*
129 | 
130 | object Value extends App:
131 | 
132 |   case class MyRecord(a: Int, b: String, c: Option[Long])
133 | 
134 |   object MyRecord:
135 |     given Schema[MyRecord]                =
136 |       DeriveSchema.gen[MyRecord]
137 |     given encoder: ValueEncoder[MyRecord] =
138 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default)
139 |     given decoder: ValueDecoder[MyRecord] =
140 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default)
141 | 
142 |   val value  = MyRecord.encoder.encode(MyRecord(3, "zio", None))
143 |   val record = MyRecord.decoder.decode(value)
144 | 
145 |   println(value)
146 |   // Outputs:
147 |   // RecordValue(Map(a -> Int32Value(3), b -> BinaryValue(Binary{"zio"}), c -> NullValue))
148 |   println(record)
149 |   // Outputs:
150 |   // MyRecord(3,zio,None)
151 | ```
152 | 
153 | Same as for `SchemaEncoder`, you can customize the codecs of primitive types by defining custom 
154 | `ValueEncoder`/`ValueDecoder` and using `ValueEncoderDeriver.summoned`/`ValueDecoderDeriver.summoned` derivers accordingly.
155 | 
156 | ```scala
157 | //> using scala "3.7.1"
158 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1
159 | 
160 | import me.mnedokushev.zio.apache.parquet.core.Value
161 | import zio.schema.*
162 | import me.mnedokushev.zio.apache.parquet.core.codec.*
163 | 
164 | import java.nio.charset.StandardCharsets
165 | 
166 | object ValueSummoned extends App:
167 | 
168 |   case class MyRecord(a: Int, b: String, c: Option[Long])
169 | 
170 |   object MyRecord:
171 |     given Schema[MyRecord] =
172 |       DeriveSchema.gen[MyRecord]
173 |     given ValueEncoder[Int] with {
174 |       override def encode(value: Int): Value =
175 |         Value.string(value.toString)
176 |     }
177 |     given ValueDecoder[Int] with {
178 |       override def decode(value: Value): Int =
179 |         value match {
180 |           case Value.PrimitiveValue.BinaryValue(v) =>
181 |             new String(v.getBytes, StandardCharsets.UTF_8).toInt
182 |           case other                               =>
183 |             throw DecoderError(s"Wrong value: $other")
184 |         }
185 |     }
186 |     given encoder: ValueEncoder[MyRecord] =
187 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.summoned)
188 |     given decoder: ValueDecoder[MyRecord] =
189 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.summoned)
190 | 
191 |   val value  = MyRecord.encoder.encode(MyRecord(3, "zio", None))
192 |   val record = MyRecord.decoder.decode(value)
193 | 
194 |   println(value)
195 |   // Outputs:
196 |   // RecordValue(Map(a -> BinaryValue(Binary{"3"}), b -> BinaryValue(Binary{"zio"}), c -> NullValue))
197 |   println(record)
198 |   // Outputs:
199 |   // MyRecord(3,zio,None)
200 | ```
201 | 
202 | ### Reading & Writing files
203 | 
204 | Finally, to perform some IO operations we need to initialize `ParquetWriter` and `ParquetReader` and use either
205 | `writeChunk`/`readChunk` or `writeStream`/`readStream` methods. 
206 | 
207 | ```scala
208 | //> using scala "3.7.1"
209 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.1
210 | 
211 | import zio.schema.*
212 | import me.mnedokushev.zio.apache.parquet.core.codec.*
213 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path }
214 | import zio.*
215 | 
216 | import java.nio.file.Files
217 | 
218 | object ParquetIO extends ZIOAppDefault:
219 | 
220 |   case class MyRecord(a: Int, b: String, c: Option[Long])
221 | 
222 |   object MyRecord:
223 |     given Schema[MyRecord]        =
224 |       DeriveSchema.gen[MyRecord]
225 |     given SchemaEncoder[MyRecord] =
226 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default)
227 |     given ValueEncoder[MyRecord]  =
228 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default)
229 |     given ValueDecoder[MyRecord]  =
230 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default)
231 | 
232 |   val data =
233 |     Chunk(
234 |       MyRecord(1, "first", Some(11)),
235 |       MyRecord(3, "third", None)
236 |     )
237 | 
238 |   val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet"
239 | 
240 |   override def run =
241 |     (for {
242 |       writer   <- ZIO.service[ParquetWriter[MyRecord]]
243 |       reader   <- ZIO.service[ParquetReader[MyRecord]]
244 |       _        <- writer.writeChunk(recordsFile, data)
245 |       fromFile <- reader.readChunk(recordsFile)
246 |       _        <- Console.printLine(fromFile)
247 |     } yield ()).provide(
248 |       ParquetWriter.configured[MyRecord](),
249 |       ParquetReader.configured[MyRecord]()
250 |     )
251 |   // Outputs:
252 |   // Chunk(MyRecord(1,first,Some(11)),MyRecord(3,third,None))
253 | ```
254 | 
255 | In the previous code snippet we used `ParquetReader.configured[A]()` to initialize a reader that uses a parquet schema taken from a given file. Such a reader will always try to read all columns from a given file. 
256 | 
257 | In case you need to read only part of the columns, use `ParquetReader.projected[A]()`. This skips columns that are not present in the schema and reads only those that are, saving precious CPU cycles and time.
258 | 
259 | #### Filtering
260 | 
261 | Say goodbye to type-unsafe filter predicates such as `Col("foo") != "bar"`. The library takes advantage of an underdocumented feature in ZIO Schema - [Accessors](https://github.com/zio/zio-schema/blob/main/zio-schema/shared/src/main/scala/zio/schema/Schema.scala#L38) - the hidden pearl that allows extracting type level infromation about fields of case classes. In addition to the already provided codecs, you need to provide an instance of `TypeTag` for your record type. For this, use the `TypeTagDeriver.default` deriver.
262 | 
263 | ```scala
264 | //> using scala "3.7.1"
265 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.1
266 | 
267 | import zio.*
268 | import zio.schema.*
269 | import me.mnedokushev.zio.apache.parquet.core.codec.*
270 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path }
271 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax.*
272 | import me.mnedokushev.zio.apache.parquet.core.filter.*
273 | 
274 | import java.nio.file.Files
275 | 
276 | object Filtering extends ZIOAppDefault:
277 | 
278 |   case class MyRecord(a: Int, b: String, c: Option[Long])
279 | 
280 |   object MyRecord:
281 |     // We need to provide field names using singleton types
282 |     given Schema.CaseClass3.WithFields["a", "b", "c", Int, String, Option[Long], MyRecord] =
283 |       DeriveSchema.gen[MyRecord]
284 |     given SchemaEncoder[MyRecord]                                                          =
285 |       Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default)
286 |     given ValueEncoder[MyRecord]                                                           =
287 |       Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default)
288 |     given ValueDecoder[MyRecord]                                                           =
289 |       Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default)
290 |     given TypeTag[MyRecord]                                                                =
291 |       Derive.derive[TypeTag, MyRecord](TypeTagDeriver.default)
292 | 
293 |     // Define accessors to use them later in the filter predicate.
294 |     // You can give any names to the accessors as we demonstrate here.
295 |     val (id, name, age) = Filter[MyRecord].columns
296 | 
297 |   val data =
298 |     Chunk(
299 |       MyRecord(1, "bob", Some(10L)),
300 |       MyRecord(2, "bob", Some(12L)),
301 |       MyRecord(3, "alice", Some(13L)),
302 |       MyRecord(4, "john", None)
303 |     )
304 | 
305 |   val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet"
306 | 
307 |   override def run =
308 |     (
309 |       for {
310 |         writer   <- ZIO.service[ParquetWriter[MyRecord]]
311 |         reader   <- ZIO.service[ParquetReader[MyRecord]]
312 |         _        <- writer.writeChunk(recordsFile, data)
313 |         fromFile <- reader.readChunkFiltered(
314 |                       recordsFile,
315 |                       filter(
316 |                         MyRecord.id > 1 `and` (
317 |                           MyRecord.name =!= "bob" `or`
318 |                             // Use .nullable syntax for optional fields.
319 |                             MyRecord.age.nullable > 10L
320 |                         )
321 |                       )
322 |                     )
323 |         _        <- Console.printLine(fromFile)
324 |       } yield ()
325 |     ).provide(
326 |       ParquetWriter.configured[MyRecord](),
327 |       ParquetReader.configured[MyRecord]()
328 |     )
329 |   // Outputs:
330 |   // Chunk(MyRecord(2,bob,Some(12)),MyRecord(3,alice,Some(13)),MyRecord(4,john,None))
331 | ```
332 | 
333 | ## Resources
334 | 
335 | - [Unpacking ZIO Schema's Accessors](https://mnedokushev.me/2024/09/05/unpacking-zio-schema-accessors.html) - Explore how ZIO Schema enables type-safe filtering through its underdocumented feature on my personal blog.
336 | - [Scala's Hidden Treasures: Five ZIO-Compatible Libraries you didn't know you needed!](https://jorgevasquez.blog/scalas-hidden-treasures-five-zio-compatible-libraries-you-didnt-know-you-needed) - This article, featured in Jorge Vásquez's blog post accompanying his presentation at the [Functional Scala 2024 Conference](https://www.functionalscala.com). You can find more information on the [slides](https://jorge-vasquez-2301.github.io/scalas-hidden-treasures/24). The recording is now available on Ziverge's YouTube channel [here](https://www.youtube.com/watch?v=iFhQibDdqT0&list=PLvdARMfvom9CuM40p_Yr3UAtlADSKC2Js).
337 | - [Overview page on ZIO's official community ecosystem website](https://zio.dev/ecosystem/community/zio-apache-parquet/) - For a brief overview, visit this page on ZIO's official community ecosystem website.
338 | 


--------------------------------------------------------------------------------
/modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprSpec.scala:
--------------------------------------------------------------------------------
  1 | package me.mnedokushev.zio.apache.parquet.core.filter
  2 | 
  3 | import me.mnedokushev.zio.apache.parquet.core.Fixtures._
  4 | import me.mnedokushev.zio.apache.parquet.core.Value
  5 | import me.mnedokushev.zio.apache.parquet.core.filter.TypeTag._
  6 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax._
  7 | import org.apache.parquet.filter2.predicate.FilterApi
  8 | import zio._
  9 | import zio.test.Assertion.{ equalTo, isRight }
 10 | import zio.test._
 11 | 
 12 | import java.time._
 13 | import java.util.{ Currency, UUID }
 14 | import scala.jdk.CollectionConverters._
 15 | 
 16 | object ExprSpec extends ZIOSpecDefault {
 17 | 
 18 |   override def spec: Spec[TestEnvironment & Scope, Any] =
 19 |     suite("ExprSpec")(
 20 |       test("compile all operators") {
 21 |         val (a, b, _, _, _) = Filter[MyRecord].columns
 22 | 
 23 |         val result = filter(
 24 |           not(
 25 |             (b >= 3 `or` b <= 100 `and` a.in(Set("foo", "bar"))) `or`
 26 |               (a === "foo" `and` (b === 20 `or` b.notIn(Set(1, 2, 3)))) `or`
 27 |               (a =!= "foo" `and` b > 2 `and` b < 10)
 28 |           )
 29 |         )
 30 | 
 31 |         val acol     = FilterApi.binaryColumn("a")
 32 |         val bcol     = FilterApi.intColumn("b")
 33 |         val expected =
 34 |           FilterApi.not(
 35 |             FilterApi.or(
 36 |               FilterApi.or(
 37 |                 FilterApi.and(
 38 |                   FilterApi.or(
 39 |                     FilterApi.gtEq(bcol, Int.box(Value.int(3).value)),
 40 |                     FilterApi.ltEq(bcol, Int.box(Value.int(100).value))
 41 |                   ),
 42 |                   FilterApi.in(acol, Set(Value.string("foo").value, Value.string("bar").value).asJava)
 43 |                 ),
 44 |                 FilterApi.and(
 45 |                   FilterApi.eq(acol, Value.string("foo").value),
 46 |                   FilterApi.or(
 47 |                     FilterApi.eq(bcol, Int.box(Value.int(20).value)),
 48 |                     FilterApi.notIn(bcol, Set(1, 2, 3).map(i => Int.box(Value.int(i).value)).asJava)
 49 |                   )
 50 |                 )
 51 |               ),
 52 |               FilterApi.and(
 53 |                 FilterApi.and(
 54 |                   FilterApi.notEq(acol, Value.string("foo").value),
 55 |                   FilterApi.gt(bcol, Int.box(Value.int(2).value))
 56 |                 ),
 57 |                 FilterApi.lt(bcol, Int.box(Value.int(10).value))
 58 |               )
 59 |             )
 60 |           )
 61 | 
 62 |         assert(result)(isRight(equalTo(expected)))
 63 |       },
 64 |       test("compile summoned") {
 65 |         val (a, b) = Filter[MyRecordSummoned].columns
 66 | 
 67 |         val result = filter(
 68 |           a === 3 `and` b === "foo"
 69 |         )
 70 | 
 71 |         val acol     = FilterApi.binaryColumn("a")
 72 |         val bcol     = FilterApi.binaryColumn("b")
 73 |         val expected = FilterApi.and(
 74 |           FilterApi.eq(acol, Value.string("3").value),
 75 |           FilterApi.eq(bcol, Value.string("foo").value)
 76 |         )
 77 | 
 78 |         assert(result)(isRight(equalTo(expected)))
 79 |       },
 80 |       test("compile all primitive types") {
 81 |         val (
 82 |           string,
 83 |           boolean,
 84 |           byte,
 85 |           short,
 86 |           int,
 87 |           long,
 88 |           float,
 89 |           double,
 90 |           binary,
 91 |           char,
 92 |           uuid,
 93 |           currency,
 94 |           bigDecimal,
 95 |           bigInteger,
 96 |           dayOfWeek,
 97 |           month,
 98 |           monthDay,
 99 |           period,
100 |           year,
101 |           yearMonth,
102 |           zoneId,
103 |           zoneOffset
104 |         ) = Filter[MyRecordAllTypes1].columns
105 | 
106 |         val (
107 |           duration,
108 |           instant,
109 |           localDate,
110 |           localTime,
111 |           localDateTime,
112 |           offsetTime,
113 |           offsetDateTime,
114 |           zonedDateTime
115 |         ) = Filter[MyRecordAllTypes2].columns
116 | 
117 |         val stringPayload         = "foo"
118 |         val booleanPayload        = true
119 |         val bytePayload           = 1.toByte
120 |         val shortPayload          = 1.toShort
121 |         val intPayload            = 1
122 |         val longPayload           = 1L
123 |         val floatPayload          = 1.0f
124 |         val doublePayload         = 1.0
125 |         val binaryPayload         = Chunk(1.toByte, 2.toByte)
126 |         val charPayload           = 'c'
127 |         val uuidPayload           = UUID.randomUUID()
128 |         val currencyPayload       = Currency.getInstance("USD")
129 |         val bigDecimalPayload     = new java.math.BigDecimal("1.0")
130 |         val bigIntegerPayload     = new java.math.BigInteger("99999999999")
131 |         val dayOfWeekPayload      = DayOfWeek.of(1)
132 |         val monthPayload          = Month.of(1)
133 |         val monthDayPayload       = MonthDay.of(1, 1)
134 |         val periodPayload         = Period.of(1, 1, 1)
135 |         val yearPayload           = Year.of(1)
136 |         val yearMonthPayload      = YearMonth.of(1, 1)
137 |         val zoneIdPayload         = ZoneId.of("Europe/Paris")
138 |         val zoneOffsetPayload     = ZoneOffset.of("+02:00")
139 |         val durationPayload       = 1.second
140 |         val instantPayload        = Instant.ofEpochMilli(1)
141 |         val localDatePayload      = LocalDate.ofEpochDay(1)
142 |         val localTimePayload      = LocalTime.ofInstant(instantPayload, zoneIdPayload)
143 |         val localDateTimePayload  = LocalDateTime.of(localDatePayload, localTimePayload)
144 |         val offsetTimePayload     = OffsetTime.ofInstant(instantPayload, zoneIdPayload)
145 |         val offsetDateTimePayload = OffsetDateTime.ofInstant(instantPayload, zoneIdPayload)
146 |         val zonedDateTimePayload  = ZonedDateTime.ofInstant(localDateTimePayload, zoneOffsetPayload, zoneIdPayload)
147 | 
148 |         val stringExpected         = FilterApi.eq(
149 |           FilterApi.binaryColumn("string"),
150 |           Value.string(stringPayload).value
151 |         )
152 |         val booleanExpected        = FilterApi.eq(
153 |           FilterApi.booleanColumn("boolean"),
154 |           Boolean.box(Value.boolean(booleanPayload).value)
155 |         )
156 |         val byteExpected           = FilterApi.eq(
157 |           FilterApi.intColumn("byte"),
158 |           Int.box(Value.byte(bytePayload).value)
159 |         )
160 |         val shortExpected          = FilterApi.eq(
161 |           FilterApi.intColumn("short"),
162 |           Int.box(Value.short(shortPayload).value)
163 |         )
164 |         val intExpected            = FilterApi.eq(
165 |           FilterApi.intColumn("int"),
166 |           Int.box(Value.int(intPayload).value)
167 |         )
168 |         val longExpected           = FilterApi.eq(
169 |           FilterApi.longColumn("long"),
170 |           Long.box(Value.long(longPayload).value)
171 |         )
172 |         val floatExpected          = FilterApi.eq(
173 |           FilterApi.floatColumn("float"),
174 |           Float.box(Value.float(floatPayload).value)
175 |         )
176 |         val doubleExpected         = FilterApi.eq(
177 |           FilterApi.doubleColumn("double"),
178 |           Double.box(Value.double(doublePayload).value)
179 |         )
180 |         val binaryExpected         = FilterApi.eq(
181 |           FilterApi.binaryColumn("binary"),
182 |           Value.binary(binaryPayload).value
183 |         )
184 |         val charExpected           = FilterApi.eq(
185 |           FilterApi.intColumn("char"),
186 |           Int.box(Value.char(charPayload).value)
187 |         )
188 |         val uuidExpected           = FilterApi.eq(
189 |           FilterApi.binaryColumn("uuid"),
190 |           Value.uuid(uuidPayload).value
191 |         )
192 |         val currencyExpected       = FilterApi.eq(
193 |           FilterApi.binaryColumn("currency"),
194 |           Value.currency(currencyPayload).value
195 |         )
196 |         val bigDecimalExpected     = FilterApi.eq(
197 |           FilterApi.longColumn("bigDecimal"),
198 |           Long.box(Value.bigDecimal(bigDecimalPayload).value)
199 |         )
200 |         val bigIntegerExpected     = FilterApi.eq(
201 |           FilterApi.binaryColumn("bigInteger"),
202 |           Value.bigInteger(bigIntegerPayload).value
203 |         )
204 |         val dayOfWeekExpected      = FilterApi.eq(
205 |           FilterApi.intColumn("dayOfWeek"),
206 |           Int.box(Value.dayOfWeek(dayOfWeekPayload).value)
207 |         )
208 |         val monthExpected          = FilterApi.eq(
209 |           FilterApi.intColumn("month"),
210 |           Int.box(Value.month(monthPayload).value)
211 |         )
212 |         val monthDayExpected       = FilterApi.eq(
213 |           FilterApi.binaryColumn("monthDay"),
214 |           Value.monthDay(monthDayPayload).value
215 |         )
216 |         val periodExpected         = FilterApi.eq(
217 |           FilterApi.binaryColumn("period"),
218 |           Value.period(periodPayload).value
219 |         )
220 |         val yearExpected           = FilterApi.eq(
221 |           FilterApi.intColumn("year"),
222 |           Int.box(Value.year(yearPayload).value)
223 |         )
224 |         val yearMonthExpected      = FilterApi.eq(
225 |           FilterApi.binaryColumn("yearMonth"),
226 |           Value.yearMonth(yearMonthPayload).value
227 |         )
228 |         val zoneIdExpected         = FilterApi.eq(
229 |           FilterApi.binaryColumn("zoneId"),
230 |           Value.zoneId(zoneIdPayload).value
231 |         )
232 |         val zoneOffsetExpected     = FilterApi.eq(
233 |           FilterApi.binaryColumn("zoneOffset"),
234 |           Value.zoneOffset(zoneOffsetPayload).value
235 |         )
236 |         val durationExpected       = FilterApi.eq(
237 |           FilterApi.longColumn("duration"),
238 |           Long.box(Value.duration(durationPayload).value)
239 |         )
240 |         val instantExpected        = FilterApi.eq(
241 |           FilterApi.longColumn("instant"),
242 |           Long.box(Value.instant(instantPayload).value)
243 |         )
244 |         val localDateExpected      = FilterApi.eq(
245 |           FilterApi.intColumn("localDate"),
246 |           Int.box(Value.localDate(localDatePayload).value)
247 |         )
248 |         val localTimeExpected      = FilterApi.eq(
249 |           FilterApi.intColumn("localTime"),
250 |           Int.box(Value.localTime(localTimePayload).value)
251 |         )
252 |         val localDateTimeExpected  = FilterApi.eq(
253 |           FilterApi.longColumn("localDateTime"),
254 |           Long.box(Value.localDateTime(localDateTimePayload).value)
255 |         )
256 |         val offsetTimeExpected     = FilterApi.eq(
257 |           FilterApi.intColumn("offsetTime"),
258 |           Int.box(Value.offsetTime(offsetTimePayload).value)
259 |         )
260 |         val offsetDateTimeExpected = FilterApi.eq(
261 |           FilterApi.longColumn("offsetDateTime"),
262 |           Long.box(Value.offsetDateTime(offsetDateTimePayload).value)
263 |         )
264 |         val zonedDateTimeExpected  = FilterApi.eq(
265 |           FilterApi.longColumn("zonedDateTime"),
266 |           Long.box(Value.zonedDateTime(zonedDateTimePayload).value)
267 |         )
268 | 
269 |         val stringResul          = filter(string === stringPayload)
270 |         val booleanResult        = filter(boolean === booleanPayload)
271 |         val byteResult           = filter(byte === bytePayload)
272 |         val shortResult          = filter(short === shortPayload)
273 |         val intResult            = filter(int === intPayload)
274 |         val longResult           = filter(long === longPayload)
275 |         val floatResult          = filter(float === floatPayload)
276 |         val doubleResult         = filter(double === doublePayload)
277 |         val binaryResult         = filter(binary === binaryPayload)
278 |         val charResult           = filter(char === charPayload)
279 |         val uuidResult           = filter(uuid === uuidPayload)
280 |         val currencyResult       = filter(currency === currencyPayload)
281 |         val bigDecimalResult     = filter(bigDecimal === bigDecimalPayload)
282 |         val bigIntegerResult     = filter(bigInteger === bigIntegerPayload)
283 |         val dayOfWeekResult      = filter(dayOfWeek === dayOfWeekPayload)
284 |         val monthResult          = filter(month === monthPayload)
285 |         val monthDayResult       = filter(monthDay === monthDayPayload)
286 |         val periodResult         = filter(period === periodPayload)
287 |         val yearResult           = filter(year === yearPayload)
288 |         val yearMonthResult      = filter(yearMonth === yearMonthPayload)
289 |         val zoneIdResult         = filter(zoneId === zoneIdPayload)
290 |         val zoneOffsetResult     = filter(zoneOffset === zoneOffsetPayload)
291 |         val durationResult       = filter(duration === durationPayload)
292 |         val instantResult        = filter(instant === instantPayload)
293 |         val localDateResult      = filter(localDate === localDatePayload)
294 |         val localTimeResult      = filter(localTime === localTimePayload)
295 |         val localDateTimeResult  = filter(localDateTime === localDateTimePayload)
296 |         val offsetTimeResult     = filter(offsetTime === offsetTimePayload)
297 |         val offsetDateTimeResult = filter(offsetDateTime === offsetDateTimePayload)
298 |         val zonedDateTimeResult  = filter(zonedDateTime === zonedDateTimePayload)
299 | 
300 |         assert(stringResul)(isRight(equalTo(stringExpected))) &&
301 |         assert(booleanResult)(isRight(equalTo(booleanExpected))) &&
302 |         assert(byteResult)(isRight(equalTo(byteExpected))) &&
303 |         assert(shortResult)(isRight(equalTo(shortExpected))) &&
304 |         assert(intResult)(isRight(equalTo(intExpected))) &&
305 |         assert(longResult)(isRight(equalTo(longExpected))) &&
306 |         assert(floatResult)(isRight(equalTo(floatExpected))) &&
307 |         assert(doubleResult)(isRight(equalTo(doubleExpected))) &&
308 |         assert(binaryResult)(isRight(equalTo(binaryExpected))) &&
309 |         assert(charResult)(isRight(equalTo(charExpected))) &&
310 |         assert(uuidResult)(isRight(equalTo(uuidExpected))) &&
311 |         assert(currencyResult)(isRight(equalTo(currencyExpected))) &&
312 |         assert(bigDecimalResult)(isRight(equalTo(bigDecimalExpected))) &&
313 |         assert(bigIntegerResult)(isRight(equalTo(bigIntegerExpected))) &&
314 |         assert(dayOfWeekResult)(isRight(equalTo(dayOfWeekExpected))) &&
315 |         assert(monthResult)(isRight(equalTo(monthExpected))) &&
316 |         assert(monthDayResult)(isRight(equalTo(monthDayExpected))) &&
317 |         assert(periodResult)(isRight(equalTo(periodExpected))) &&
318 |         assert(yearResult)(isRight(equalTo(yearExpected))) &&
319 |         assert(yearMonthResult)(isRight(equalTo(yearMonthExpected))) &&
320 |         assert(zoneIdResult)(isRight(equalTo(zoneIdExpected))) &&
321 |         assert(zoneOffsetResult)(isRight(equalTo(zoneOffsetExpected))) &&
322 |         assert(durationResult)(isRight(equalTo(durationExpected))) &&
323 |         assert(instantResult)(isRight(equalTo(instantExpected))) &&
324 |         assert(localDateResult)(isRight(equalTo(localDateExpected))) &&
325 |         assert(localTimeResult)(isRight(equalTo(localTimeExpected))) &&
326 |         assert(localDateTimeResult)(isRight(equalTo(localDateTimeExpected))) &&
327 |         assert(offsetTimeResult)(isRight(equalTo(offsetTimeExpected))) &&
328 |         assert(offsetDateTimeResult)(isRight(equalTo(offsetDateTimeExpected))) &&
329 |         assert(zonedDateTimeResult)(isRight(equalTo(zonedDateTimeExpected)))
330 |       },
331 |       test("compile option") {
332 |         // TODO: test failing compile-time cases
333 |         val (_, _, _, _, opt) = Filter[MyRecord].columns
334 | 
335 |         val expected = FilterApi.gt(FilterApi.intColumn("opt"), Int.box(Value.int(3).value))
336 |         val result   = filter(opt.nullable > 3)
337 | 
338 |         assert(result)(isRight(equalTo(expected)))
339 |       },
340 |       test("compile enum") {
341 |         val (_, _, _, enm, _) = Filter[MyRecord].columns
342 | 
343 |         val result   = filter(enm === MyRecord.Enum.Done)
344 |         val expected = FilterApi.eq(FilterApi.binaryColumn("enm"), Value.string("Done").value)
345 | 
346 |         assert(result)(isRight(equalTo(expected)))
347 |       },
348 |       test("column path concatenation") {
349 |         // TODO: test failing compile-time cases
350 |         // Show the macro determines the names of the parent/child fields no matter how we name
351 |         // the variables that represent columns
352 |         val (_, _, child0, _, _) = Filter[MyRecord].columns
353 |         val (c0, d0)             = Filter[MyRecord.Child].columns
354 | 
355 |         assert(concat(child0, c0).path)(equalTo("child.c")) &&
356 |         assert(concat(child0, d0).path)(equalTo("child.d"))
357 |       }
358 |     )
359 | 
360 | }
361 | 


--------------------------------------------------------------------------------