├── .sbtopts ├── project ├── build.properties ├── plugins.sbt ├── BuildHelper.scala └── Dep.scala ├── docs ├── logo.png └── scala-cli │ ├── Schema.scala │ ├── Value.scala │ ├── SchemaArity23.scala │ ├── SchemaSummoned.scala │ ├── ValueSummoned.scala │ ├── ParquetIO.scala │ └── Filtering.scala ├── .git-blame-ignore-revs ├── modules ├── hadoop │ └── src │ │ ├── test │ │ ├── resources │ │ │ └── food.parquet │ │ └── scala │ │ │ └── me │ │ │ └── mnedokushev │ │ │ └── zio │ │ │ └── apache │ │ │ └── parquet │ │ │ └── hadoop │ │ │ ├── ValueConverterSpec.scala │ │ │ └── ParquetIOSpec.scala │ │ └── main │ │ └── scala │ │ └── me │ │ └── mnedokushev │ │ └── zio │ │ └── apache │ │ └── parquet │ │ └── hadoop │ │ ├── Path.scala │ │ ├── WriteSupport.scala │ │ ├── ReadSupport.scala │ │ ├── ParquetWriter.scala │ │ ├── ParquetReader.scala │ │ └── GroupValueConverter.scala └── core │ └── src │ ├── main │ ├── scala │ │ └── me │ │ │ └── mnedokushev │ │ │ └── zio │ │ │ └── apache │ │ │ └── parquet │ │ │ └── core │ │ │ ├── filter │ │ │ ├── FilterError.scala │ │ │ ├── package.scala │ │ │ ├── Filter.scala │ │ │ ├── ExprAccessorBuilder.scala │ │ │ ├── Column.scala │ │ │ ├── Operator.scala │ │ │ ├── TypeTagDeriver.scala │ │ │ ├── Predicate.scala │ │ │ ├── OperatorSupport.scala │ │ │ └── TypeTag.scala │ │ │ ├── codec │ │ │ ├── DecoderError.scala │ │ │ ├── EncoderError.scala │ │ │ ├── ValueDecoder.scala │ │ │ ├── ValueEncoder.scala │ │ │ ├── SchemaEncoder.scala │ │ │ ├── SchemaEncoderDeriver.scala │ │ │ ├── ValueEncoderDeriver.scala │ │ │ └── ValueDecoderDeriver.scala │ │ │ ├── package.scala │ │ │ ├── Schemas.scala │ │ │ └── Value.scala │ ├── scala-2.13 │ │ └── me │ │ │ └── mnedokushev │ │ │ └── zio │ │ │ └── apache │ │ │ └── parquet │ │ │ └── core │ │ │ └── filter │ │ │ ├── internal │ │ │ ├── MacroUtils.scala │ │ │ ├── ColumnPathConcatMacro.scala │ │ │ └── SanitizeOptionalsMacro.scala │ │ │ └── syntax.scala │ └── scala-3 │ │ └── me │ │ └── mnedokushev │ │ └── zio │ │ └── apache │ │ └── parquet │ │ └── core │ │ └── filter │ │ ├── syntax.scala │ │ └── internal │ │ ├── ColumnPathConcatMacro.scala │ │ └── SanitizeOptionalsMacro.scala │ └── test │ ├── scala-2.13+ │ └── me │ │ └── mnedokushev │ │ └── zio │ │ └── apache │ │ └── parquet │ │ └── core │ │ └── Fixtures.scala │ └── scala │ └── me │ └── mnedokushev │ └── zio │ └── apache │ └── parquet │ └── core │ ├── codec │ └── SchemaEncoderDeriverSpec.scala │ └── filter │ └── ExprSpec.scala ├── .gitignore ├── .scalafmt.conf ├── .scalafix.conf ├── .github └── workflows │ ├── release.yml │ ├── ci.yml │ └── clean.yml ├── LICENSE └── README.md /.sbtopts: -------------------------------------------------------------------------------- 1 | -J-Xss4M -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.11.7 2 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grouzen/zio-apache-parquet/HEAD/docs/logo.png -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Scala Steward: Reformat with scalafmt 3.10.2 2 | 49ef9dde9e251e2a6bddf0092a3ba8b4bb1cad1e 3 | -------------------------------------------------------------------------------- /modules/hadoop/src/test/resources/food.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grouzen/zio-apache-parquet/HEAD/modules/hadoop/src/test/resources/food.parquet -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | .idea/ 4 | .bsp/ 5 | target/ 6 | */target/ 7 | private/ 8 | 9 | .bloop/ 10 | .metals/ 11 | .vscode/ 12 | metals.sbt 13 | .scala-build 14 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/FilterError.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | final case class FilterError( 4 | message: String, 5 | cause: Option[Throwable] = None 6 | ) extends IllegalArgumentException(message, cause.getOrElse(new Throwable())) 7 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/package.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core 2 | 3 | import org.apache.parquet.filter2.predicate.FilterPredicate 4 | 5 | package object filter { 6 | 7 | type CompiledPredicate = Either[String, FilterPredicate] 8 | 9 | } 10 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/DecoderError.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import java.io.IOException 4 | 5 | final case class DecoderError( 6 | message: String, 7 | cause: Option[Throwable] = None 8 | ) extends IOException(message, cause.getOrElse(new Throwable())) 9 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/EncoderError.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import java.io.IOException 4 | 5 | final case class EncoderError( 6 | message: String, 7 | cause: Option[Throwable] = None 8 | ) extends IOException(message, cause.getOrElse(new Throwable())) 9 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.10.2" 2 | runner.dialect = scala3 3 | maxColumn = 120 4 | align.preset = most 5 | continuationIndent.defnSite = 2 6 | assumeStandardLibraryStripMargin = true 7 | docstrings.style = Asterisk 8 | lineEndings = preserve 9 | includeCurlyBraceInSelectChains = false 10 | danglingParentheses.preset = true 11 | spaces { 12 | inImportCurlyBraces = true 13 | } 14 | optIn.annotationNewlines = true 15 | runner.dialect = scala3 16 | rewrite.rules = [SortImports, RedundantBraces] 17 | -------------------------------------------------------------------------------- /.scalafix.conf: -------------------------------------------------------------------------------- 1 | rules = [ 2 | DisableSyntax 3 | LeakingImplicitClassVal 4 | NoAutoTupling 5 | NoValInForComprehension 6 | OrganizeImports 7 | ] 8 | 9 | Disable { 10 | ifSynthetic = [ 11 | "scala/Option.option2Iterable" 12 | "scala/Predef.any2stringadd" 13 | ] 14 | } 15 | 16 | OrganizeImports { 17 | # Allign with IntelliJ IDEA so that they don't fight each other 18 | groupedImports = Merge 19 | removeUnused = false 20 | } 21 | 22 | RemoveUnused { 23 | imports = false // handled by OrganizeImports 24 | } 25 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/internal/MacroUtils.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal 2 | 3 | import scala.reflect.macros.blackbox 4 | 5 | abstract class MacroUtils(c: blackbox.Context) { 6 | 7 | import c.universe._ 8 | 9 | private def debugEnabled: Boolean = true 10 | 11 | implicit class Debugged[A](self: A) { 12 | def debugged(): Unit = 13 | if (debugEnabled) 14 | c.info(c.enclosingPosition, s"tree=${showRaw(self)}", force = true) 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/package.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet 2 | 3 | package object core { 4 | 5 | val MILLIS_PER_DAY = 86400000L 6 | val NANOS_PER_DAY = 86400000000000L 7 | val MILLIS_FACTOR = 1000L 8 | val MICROS_FACTOR = 1000000L 9 | val NANOS_FACTOR = 1000000000L 10 | val DECIMAL_PRECISION = 11 11 | val DECIMAL_SCALE = 2 12 | 13 | type Lens[F, S, A] = filter.Column.Named[A, F] 14 | type Prism[F, S, A] = Unit 15 | type Traversal[S, A] = Unit 16 | 17 | } 18 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueDecoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value 4 | import zio._ 5 | 6 | trait ValueDecoder[+A] { self => 7 | 8 | def decode(value: Value): A 9 | 10 | def decodeZIO(value: Value): Task[A] = 11 | ZIO.attempt(decode(value)) 12 | 13 | def map[B](f: A => B): ValueDecoder[B] = 14 | new ValueDecoder[B] { 15 | override def decode(value: Value): B = 16 | f(self.decode(value)) 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueEncoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value 4 | import zio._ 5 | 6 | trait ValueEncoder[-A] { self => 7 | 8 | def encode(value: A): Value 9 | 10 | def encodeZIO(value: A): Task[Value] = 11 | ZIO.attemptBlocking(encode(value)) 12 | 13 | def contramap[B](f: B => A): ValueEncoder[B] = 14 | new ValueEncoder[B] { 15 | override def encode(value: B): Value = 16 | self.encode(f(value)) 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | // Linting 2 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.6") 3 | addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.14.4") 4 | 5 | // Dependencies management 6 | addSbtPlugin("ch.epfl.scala" % "sbt-missinglink" % "0.3.6") 7 | addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.3.1") 8 | 9 | // Versioning and release 10 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.13.1") 11 | addSbtPlugin("org.typelevel" % "sbt-tpolecat" % "0.5.2") 12 | addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.11.2") 13 | addSbtPlugin("com.github.sbt" % "sbt-github-actions" % "0.29.0") 14 | 15 | addDependencyTreePlugin 16 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import org.apache.parquet.schema.Type 4 | import zio._ 5 | import zio.schema._ 6 | 7 | trait SchemaEncoder[A] { self => 8 | 9 | def encode(schema: Schema[A], name: String, optional: Boolean): Type 10 | 11 | def encodeZIO(schema: Schema[A], name: String, optional: Boolean): Task[Type] = 12 | ZIO.attempt(encode(schema, name, optional)) 13 | 14 | def contramap[B](f: Schema[B] => Schema[A]): SchemaEncoder[B] = 15 | new SchemaEncoder[B] { 16 | override def encode(schema: Schema[B], name: String, optional: Boolean): Type = 17 | self.encode(f(schema), name, optional) 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | branches: [master, main] 5 | tags: ["*"] 6 | jobs: 7 | publish: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | with: 12 | fetch-depth: 0 13 | 14 | - uses: actions/setup-java@v4 15 | with: 16 | distribution: temurin 17 | java-version: 17 18 | cache: sbt 19 | 20 | - name: Setup sbt 21 | uses: sbt/setup-sbt@v1 22 | 23 | - run: sbt ci-release 24 | env: 25 | PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} 26 | PGP_SECRET: ${{ secrets.PGP_SECRET }} 27 | SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} 28 | SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} 29 | -------------------------------------------------------------------------------- /docs/scala-cli/Schema.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3 3 | 4 | import zio.schema.* 5 | import me.mnedokushev.zio.apache.parquet.core.codec.* 6 | 7 | object Schema extends App: 8 | 9 | case class MyRecord(a: Int, b: String, c: Option[Long]) 10 | 11 | object MyRecord: 12 | given schema: Schema[MyRecord] = 13 | DeriveSchema.gen[MyRecord] 14 | given schemaEncoder: SchemaEncoder[MyRecord] = 15 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default) 16 | 17 | val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false) 18 | 19 | println(parquetSchema) 20 | // Outputs: 21 | // required group my_record { 22 | // required int32 a (INTEGER(32,true)); 23 | // required binary b (STRING); 24 | // optional int64 c (INTEGER(64,true)); 25 | // } 26 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-3/me/mnedokushev/zio/apache/parquet/core/filter/syntax.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Lens 4 | import me.mnedokushev.zio.apache.parquet.core.filter.internal.{ ColumnPathConcatMacro, SanitizeOptionalsMacro } 5 | 6 | package object syntax extends Predicate.Syntax { 7 | 8 | extension [F, S, A](column: Lens[F, S, Option[A]]) { 9 | def nullable(implicit typeTag: TypeTag[A]): Column.Named[A, column.Identity] = 10 | Column.Named(column.path) 11 | } 12 | 13 | inline def filter[A](inline predicate: Predicate[A]): CompiledPredicate = 14 | ${ SanitizeOptionalsMacro.sanitizeImpl[A]('predicate) } 15 | 16 | inline def concat[A, B, F](inline parent: Column[A], inline child: Column.Named[B, F])(using 17 | ctt: TypeTag[B] 18 | ): Column[B] = 19 | ${ ColumnPathConcatMacro.concatImpl[A, B, F]('parent, 'child, 'ctt) } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/syntax.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Lens 4 | import me.mnedokushev.zio.apache.parquet.core.filter.CompiledPredicate 5 | import me.mnedokushev.zio.apache.parquet.core.filter.internal.{ ColumnPathConcatMacro, SanitizeOptionalsMacro } 6 | 7 | package object syntax extends Predicate.Syntax { 8 | 9 | implicit class NullableColumnSyntax[F, S, A](val column: Lens[F, S, Option[A]]) { 10 | def nullable(implicit typeTag: TypeTag[A]): Column.Named[A, column.Identity] = 11 | Column.Named(column.path) 12 | } 13 | 14 | def filter[A](predicate: Predicate[A]): CompiledPredicate = macro SanitizeOptionalsMacro.sanitizeImpl[A] 15 | 16 | def concat[A, B, F]( 17 | parent: Column[A], 18 | child: Column.Named[B, F] 19 | ): Column[B] = macro ColumnPathConcatMacro.concatImpl[A, B, F] 20 | 21 | } 22 | -------------------------------------------------------------------------------- /docs/scala-cli/Value.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3 3 | 4 | import zio.schema.* 5 | import me.mnedokushev.zio.apache.parquet.core.codec.* 6 | 7 | object Value extends App: 8 | 9 | case class MyRecord(a: Int, b: String, c: Option[Long]) 10 | 11 | object MyRecord: 12 | given Schema[MyRecord] = 13 | DeriveSchema.gen[MyRecord] 14 | given encoder: ValueEncoder[MyRecord] = 15 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default) 16 | given decoder: ValueDecoder[MyRecord] = 17 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default) 18 | 19 | val value = MyRecord.encoder.encode(MyRecord(3, "zio", None)) 20 | val record = MyRecord.decoder.decode(value) 21 | 22 | println(value) 23 | // Outputs: 24 | // RecordValue(Map(a -> Int32Value(3), b -> BinaryValue(Binary{"zio"}), c -> NullValue)) 25 | println(record) 26 | // Outputs: 27 | // MyRecord(3,zio,None) 28 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Filter.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.{ Lens, Prism, Traversal } 4 | import zio.schema._ 5 | 6 | trait Filter { 7 | 8 | type Columns 9 | 10 | val columns: Columns 11 | 12 | } 13 | 14 | object Filter { 15 | 16 | type Aux[Columns0] = Filter { 17 | type Columns = Columns0 18 | } 19 | 20 | def apply[A](implicit 21 | schema: Schema[A], 22 | typeTag: TypeTag[A] 23 | ): Filter.Aux[schema.Accessors[Lens, Prism, Traversal]] = 24 | new Filter { 25 | val accessorBuilder = 26 | new ExprAccessorBuilder(typeTag.asInstanceOf[TypeTag.Record[A]].columns) 27 | 28 | override type Columns = 29 | schema.Accessors[accessorBuilder.Lens, accessorBuilder.Prism, accessorBuilder.Traversal] 30 | 31 | override val columns: Columns = 32 | schema.makeAccessors(accessorBuilder) 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprAccessorBuilder.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import zio.schema.{ AccessorBuilder, Schema } 4 | 5 | final class ExprAccessorBuilder(typeTags: Map[String, TypeTag[?]]) extends AccessorBuilder { 6 | 7 | override type Lens[F, S, A] = Column.Named[A, F] 8 | 9 | override type Prism[F, S, A] = Unit 10 | 11 | override type Traversal[S, A] = Unit 12 | 13 | override def makeLens[F, S, A](product: Schema.Record[S], term: Schema.Field[S, A]): Column.Named[A, F] = { 14 | val name = term.name.toString 15 | implicit val typeTag = typeTags(name).asInstanceOf[TypeTag[A]] 16 | 17 | Column.Named[A, F](name) 18 | } 19 | 20 | override def makePrism[F, S, A](sum: Schema.Enum[S], term: Schema.Case[S, A]): Prism[F, S, A] = 21 | () 22 | 23 | override def makeTraversal[S, A](collection: Schema.Collection[S, A], element: Schema[A]): Traversal[S, A] = 24 | () 25 | 26 | } 27 | -------------------------------------------------------------------------------- /docs/scala-cli/SchemaArity23.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3 3 | 4 | import zio.schema._ 5 | import me.mnedokushev.zio.apache.parquet.core.codec._ 6 | 7 | object SchemaArity23 extends App { 8 | 9 | final case class Arity23( 10 | a: Int, 11 | b: Option[String], 12 | c: Int, 13 | d: Int, 14 | e: Int, 15 | f: Int, 16 | g: Int, 17 | h: Int, 18 | i: Int, 19 | j: Int, 20 | k: Int, 21 | l: Int, 22 | m: Int, 23 | n: Int, 24 | o: Int, 25 | p: Int, 26 | q: Int, 27 | r: Int, 28 | s: Int, 29 | t: Int, 30 | u: Int, 31 | v: Int, 32 | w: Int 33 | ) 34 | 35 | object Arity23 { 36 | implicit val schema: Schema[Arity23] = 37 | DeriveSchema.gen[Arity23] 38 | implicit val schemaEncoder: SchemaEncoder[Arity23] = 39 | Derive.derive[SchemaEncoder, Arity23](SchemaEncoderDeriver.default) 40 | } 41 | 42 | val arity23Schema = Arity23.schemaEncoder.encode(Arity23.schema, "arity23", optional = false) 43 | 44 | println(arity23Schema) 45 | 46 | } 47 | -------------------------------------------------------------------------------- /docs/scala-cli/SchemaSummoned.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3 3 | 4 | import me.mnedokushev.zio.apache.parquet.core.Schemas 5 | import zio.schema.* 6 | import me.mnedokushev.zio.apache.parquet.core.codec.* 7 | 8 | object SchemaSummoned extends App: 9 | 10 | case class MyRecord(a: Int, b: String, c: Option[Long]) 11 | 12 | object MyRecord: 13 | given schema: Schema[MyRecord] = 14 | DeriveSchema.gen[MyRecord] 15 | // The custom encoder must be defined before the definition for your record type. 16 | given SchemaEncoder[Int] with { 17 | override def encode(schema: Schema[Int], name: String, optional: Boolean) = 18 | Schemas.uuid.optionality(optional).named(name) 19 | } 20 | given schemaEncoder: SchemaEncoder[MyRecord] = 21 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.summoned) 22 | 23 | val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false) 24 | 25 | println(parquetSchema) 26 | // Outputs: 27 | // required group my_record { 28 | // required fixed_len_byte_array(16) a (UUID); 29 | // required binary b (STRING); 30 | // optional int64 c (INTEGER(64,true)); 31 | // } 32 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-3/me/mnedokushev/zio/apache/parquet/core/filter/internal/ColumnPathConcatMacro.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.filter.{ Column, TypeTag } 4 | 5 | import scala.quoted.* 6 | 7 | object ColumnPathConcatMacro { 8 | 9 | def concatImpl[A: Type, B: Type, F: Type]( 10 | parent: Expr[Column[A]], 11 | child: Expr[Column.Named[B, F]], 12 | childTypeTag: Expr[TypeTag[B]] 13 | )(using 14 | Quotes 15 | ): Expr[Column[B]] = { 16 | import quotes.reflect.* 17 | 18 | val childField = TypeRepr.of[F] match { 19 | case ConstantType(StringConstant(name)) => 20 | name 21 | case tpe => 22 | report.errorAndAbort(s"Couldn't get a name of a singleton type $tpe") 23 | } 24 | val parentFields = TypeRepr.of[A].typeSymbol.caseFields.map(_.name) 25 | 26 | if (parentFields.contains(childField)) { 27 | val concatExpr = '{ ${ parent }.path + "." + ${ child }.path } 28 | 29 | '{ me.mnedokushev.zio.apache.parquet.core.filter.Column.Named[B, F]($concatExpr)(using $childTypeTag) } 30 | } else 31 | report.errorAndAbort(s"Parent column doesn't contain a column named '$childField'") 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/Path.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | import org.apache.hadoop.fs.{ Path => HadoopPath } 5 | import org.apache.parquet.hadoop.util.{ HadoopInputFile, HadoopOutputFile } 6 | import zio._ 7 | 8 | import java.io.IOException 9 | import java.net.URI 10 | import java.nio.file.{ Path => JPath, Paths } 11 | 12 | case class Path(underlying: HadoopPath) { 13 | 14 | def /(child: String): Path = 15 | this.copy(underlying = new HadoopPath(underlying, child)) 16 | 17 | def /(child: JPath): Path = 18 | this.copy(underlying = new HadoopPath(underlying, Path(child).underlying)) 19 | 20 | def toJava: JPath = 21 | Paths.get(underlying.toUri) 22 | 23 | def toHadoop: HadoopPath = 24 | underlying 25 | 26 | def toInputFileZIO(conf: Configuration): IO[IOException, HadoopInputFile] = 27 | ZIO.attemptBlockingIO(HadoopInputFile.fromPath(underlying, conf)) 28 | 29 | def toOutputFileZIO(conf: Configuration): IO[IOException, HadoopOutputFile] = 30 | ZIO.attemptBlockingIO(HadoopOutputFile.fromPath(toHadoop, conf)) 31 | 32 | } 33 | 34 | object Path { 35 | 36 | def apply(path: JPath): Path = 37 | Path(new HadoopPath(new URI("file", null, path.toAbsolutePath.toString, null, null))) 38 | 39 | def apply(uri: URI): Path = 40 | Path(new HadoopPath(uri)) 41 | 42 | } 43 | -------------------------------------------------------------------------------- /modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/WriteSupport.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue 4 | import me.mnedokushev.zio.apache.parquet.core.Value.NullValue 5 | import org.apache.hadoop.conf.Configuration 6 | import org.apache.parquet.hadoop.api.{ WriteSupport => HadoopWriteSupport } 7 | import org.apache.parquet.io.api.RecordConsumer 8 | import org.apache.parquet.schema.MessageType 9 | 10 | import scala.jdk.CollectionConverters._ 11 | 12 | class WriteSupport(schema: MessageType, metadata: Map[String, String]) extends HadoopWriteSupport[RecordValue] { 13 | 14 | override def init(configuration: Configuration): HadoopWriteSupport.WriteContext = 15 | new HadoopWriteSupport.WriteContext(schema, metadata.asJava) 16 | 17 | override def prepareForWrite(recordConsumer: RecordConsumer): Unit = 18 | this.consumer = recordConsumer 19 | 20 | override def write(record: RecordValue): Unit = { 21 | consumer.startMessage() 22 | 23 | record.values.foreach { 24 | case (_, NullValue) => 25 | () 26 | case (name, value) => 27 | val fieldIndex = schema.getFieldIndex(name) 28 | val fieldType = schema.getType(fieldIndex) 29 | 30 | consumer.startField(name, fieldIndex) 31 | value.write(fieldType, consumer) 32 | consumer.endField(name, fieldIndex) 33 | } 34 | 35 | consumer.endMessage() 36 | } 37 | 38 | private var consumer: RecordConsumer = null 39 | 40 | } 41 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/internal/ColumnPathConcatMacro.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.filter.Column 4 | 5 | import scala.reflect.macros.blackbox 6 | 7 | class ColumnPathConcatMacro(val c: blackbox.Context) extends MacroUtils(c) { 8 | import c.universe._ 9 | 10 | def concatImpl[A, B, F](parent: Expr[Column[A]], child: Expr[Column.Named[B, F]])(implicit 11 | ptt: c.WeakTypeTag[A], 12 | ftt: c.WeakTypeTag[F] 13 | ): Tree = { 14 | val childField = getSingletonTypeName(ftt.tpe) 15 | val parentFields = ptt.tpe.members.collect { 16 | case p: TermSymbol if p.isCaseAccessor && !p.isMethod => p.name.toString.trim 17 | }.toList 18 | 19 | if (parentFields.exists(_ == childField)) { 20 | val pathTermName = "path" 21 | val dotStringLiteral = "." 22 | val concatExpr = 23 | q"${parent.tree}.${TermName(pathTermName)} + ${Literal(Constant(dotStringLiteral))} + ${child.tree}.${TermName(pathTermName)}" 24 | 25 | q"_root_.me.mnedokushev.zio.apache.parquet.core.filter.Column.Named($concatExpr)" 26 | } else 27 | c.abort(c.enclosingPosition, s"Parent column doesn't contain a column named '$childField'") 28 | } 29 | 30 | private def getSingletonTypeName(tpe: Type): String = 31 | tpe match { 32 | case ConstantType(Constant(name)) => name.toString 33 | case _ => c.abort(c.enclosingPosition, s"Couldn't get a name of a singleton type ${showRaw(tpe)}") 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /docs/scala-cli/ValueSummoned.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.3 3 | 4 | import me.mnedokushev.zio.apache.parquet.core.Value 5 | import zio.schema.* 6 | import me.mnedokushev.zio.apache.parquet.core.codec.* 7 | 8 | import java.nio.charset.StandardCharsets 9 | 10 | object ValueSummoned extends App: 11 | 12 | case class MyRecord(a: Int, b: String, c: Option[Long]) 13 | 14 | object MyRecord: 15 | given Schema[MyRecord] = 16 | DeriveSchema.gen[MyRecord] 17 | given ValueEncoder[Int] with { 18 | override def encode(value: Int): Value = 19 | Value.string(value.toString) 20 | } 21 | given ValueDecoder[Int] with { 22 | override def decode(value: Value): Int = 23 | value match { 24 | case Value.PrimitiveValue.BinaryValue(v) => 25 | new String(v.getBytes, StandardCharsets.UTF_8).toInt 26 | case other => 27 | throw DecoderError(s"Wrong value: $other") 28 | } 29 | } 30 | given encoder: ValueEncoder[MyRecord] = 31 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.summoned) 32 | given decoder: ValueDecoder[MyRecord] = 33 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.summoned) 34 | 35 | val value = MyRecord.encoder.encode(MyRecord(3, "zio", None)) 36 | val record = MyRecord.decoder.decode(value) 37 | 38 | println(value) 39 | // Outputs: 40 | // RecordValue(Map(a -> BinaryValue(Binary{"3"}), b -> BinaryValue(Binary{"zio"}), c -> NullValue)) 41 | println(record) 42 | // Outputs: 43 | // MyRecord(3,zio,None) 44 | -------------------------------------------------------------------------------- /docs/scala-cli/ParquetIO.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.3 3 | 4 | import zio.schema.* 5 | import me.mnedokushev.zio.apache.parquet.core.codec.* 6 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path } 7 | import zio.* 8 | 9 | import java.nio.file.Files 10 | 11 | object ParquetIO extends ZIOAppDefault: 12 | 13 | case class MyRecord(a: Int, b: String, c: Option[Long]) 14 | 15 | object MyRecord: 16 | given Schema[MyRecord] = 17 | DeriveSchema.gen[MyRecord] 18 | given SchemaEncoder[MyRecord] = 19 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default) 20 | given ValueEncoder[MyRecord] = 21 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default) 22 | given ValueDecoder[MyRecord] = 23 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default) 24 | 25 | val data = 26 | Chunk( 27 | MyRecord(1, "first", Some(11)), 28 | MyRecord(3, "third", None) 29 | ) 30 | 31 | val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet" 32 | 33 | override def run = 34 | (for { 35 | writer <- ZIO.service[ParquetWriter[MyRecord]] 36 | reader <- ZIO.service[ParquetReader[MyRecord]] 37 | _ <- writer.writeChunk(recordsFile, data) 38 | fromFile <- reader.readChunk(recordsFile) 39 | _ <- Console.printLine(fromFile) 40 | } yield ()).provide( 41 | ParquetWriter.configured[MyRecord](), 42 | ParquetReader.configured[MyRecord]() 43 | ) 44 | // Outputs: 45 | // Chunk(MyRecord(1,first,Some(11)),MyRecord(3,third,None)) 46 | -------------------------------------------------------------------------------- /modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/ReadSupport.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Schemas 4 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue 5 | import me.mnedokushev.zio.apache.parquet.core.codec.SchemaEncoder 6 | import org.apache.hadoop.conf.Configuration 7 | import org.apache.parquet.hadoop.api.{ InitContext, ReadSupport => HadoopReadSupport } 8 | import org.apache.parquet.io.api.{ GroupConverter, RecordMaterializer } 9 | import org.apache.parquet.schema.MessageType 10 | import zio.Tag 11 | import zio.prelude._ 12 | import zio.schema.Schema 13 | 14 | class ReadSupport[A]( 15 | schema: Option[Schema[A]] = None, 16 | schemaEncoder: Option[SchemaEncoder[A]] = None 17 | )(implicit tag: Tag[A]) 18 | extends HadoopReadSupport[RecordValue] { 19 | 20 | override def prepareForRead( 21 | configuration: Configuration, 22 | keyValueMetaData: java.util.Map[String, String], 23 | fileSchema: MessageType, 24 | readContext: HadoopReadSupport.ReadContext 25 | ): RecordMaterializer[RecordValue] = new RecordMaterializer[RecordValue] { 26 | 27 | private val converter = 28 | GroupValueConverter.root(resolveSchema(fileSchema)) 29 | 30 | override def getCurrentRecord: RecordValue = 31 | converter.get 32 | 33 | override def getRootConverter: GroupConverter = 34 | converter 35 | 36 | } 37 | 38 | override def init(context: InitContext): HadoopReadSupport.ReadContext = 39 | new HadoopReadSupport.ReadContext(resolveSchema(context.getFileSchema)) 40 | 41 | private def resolveSchema(contextSchema: MessageType): MessageType = 42 | (schema <*> schemaEncoder).fold(contextSchema) { case (schema0, schemaEncoder0) => 43 | Schemas.asMessageType(schemaEncoder0.encode(schema0, tag.tag.shortName, optional = false)) 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by sbt-github-actions using the 2 | # githubWorkflowGenerate task. You should add and commit this file to 3 | # your git repository. It goes without saying that you shouldn't edit 4 | # this file by hand! Instead, if you wish to make changes, you should 5 | # change your sbt build configuration to revise the workflow description 6 | # to meet your needs, then regenerate this file. 7 | 8 | name: Continuous Integration 9 | 10 | on: 11 | pull_request: 12 | branches: ['**'] 13 | push: 14 | branches: ['**'] 15 | 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | jobs: 20 | build: 21 | name: Build and Test 22 | strategy: 23 | matrix: 24 | os: [ubuntu-latest] 25 | scala: [2.13.18, 3.3.6] 26 | java: [temurin@11, temurin@17] 27 | runs-on: ${{ matrix.os }} 28 | steps: 29 | - name: Checkout current branch (full) 30 | uses: actions/checkout@v6 31 | with: 32 | fetch-depth: 0 33 | 34 | - name: Setup Java (temurin@11) 35 | if: matrix.java == 'temurin@11' 36 | uses: actions/setup-java@v5 37 | with: 38 | distribution: temurin 39 | java-version: 11 40 | cache: sbt 41 | 42 | - name: Setup Java (temurin@17) 43 | if: matrix.java == 'temurin@17' 44 | uses: actions/setup-java@v5 45 | with: 46 | distribution: temurin 47 | java-version: 17 48 | cache: sbt 49 | 50 | - name: Setup sbt 51 | uses: sbt/setup-sbt@v1 52 | 53 | - name: Lint Scala code 54 | run: sbt '++ ${{ matrix.scala }}' 'scalafix --check' scalafmtCheckAll 55 | 56 | - name: Check that workflows are up to date 57 | run: sbt '++ ${{ matrix.scala }}' githubWorkflowCheck 58 | 59 | - name: Build project 60 | run: sbt '++ ${{ matrix.scala }}' test 61 | -------------------------------------------------------------------------------- /project/BuildHelper.scala: -------------------------------------------------------------------------------- 1 | import sbt.* 2 | import sbt.Keys.* 3 | import scalafix.sbt.ScalafixPlugin.autoImport.scalafixSemanticdb 4 | 5 | object BuildHelper { 6 | 7 | def stdSettings(projectName: String): Seq[Def.Setting[_]] = Seq( 8 | name := s"zio-apache-parquet-$projectName", 9 | organization := "me.mnedokushev", 10 | libraryDependencies ++= betterMonadicFor(scalaVersion.value), 11 | semanticdbEnabled := true, 12 | semanticdbVersion := scalafixSemanticdb.revision, 13 | Test / fork := true, 14 | Test / unmanagedSourceDirectories ++= crossVersionSources(scalaVersion.value, "test", baseDirectory.value), 15 | Test / unmanagedSourceDirectories ++= crossVersionSources(scalaVersion.value, "main", baseDirectory.value), 16 | libraryDependencies ++= { 17 | CrossVersion.partialVersion(scalaVersion.value) match { 18 | case Some((2, _)) => 19 | Seq(Dep.scalaReflect.value) 20 | case _ => Seq.empty 21 | } 22 | } 23 | ) 24 | 25 | val Scala212 = "2.12.21" 26 | val Scala213 = "2.13.18" 27 | val Scala3 = "3.3.6" 28 | 29 | private def betterMonadicFor(scalaVersion: String) = 30 | CrossVersion.partialVersion(scalaVersion) match { 31 | case Some((2, _)) => Seq(compilerPlugin("com.olegpy" %% "better-monadic-for" % "0.3.1")) 32 | case _ => Seq() 33 | } 34 | 35 | def crossVersionSources(scalaVersion: String, conf: String, baseDirectory: File): List[File] = { 36 | val versions = CrossVersion.partialVersion(scalaVersion) match { 37 | case Some((2, 13)) => 38 | List("2", "2.13", "2.13+") 39 | case Some((3, _)) => 40 | List("2.13+", "3") 41 | case _ => 42 | List.empty 43 | } 44 | 45 | for { 46 | version <- "scala" :: versions.map("scala-" + _) 47 | file = baseDirectory / "src" / conf / version if file.exists 48 | } yield file 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /project/Dep.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbt.Keys.scalaVersion 3 | 4 | object Dep { 5 | 6 | object V { 7 | val zio = "2.1.23" 8 | val zioSchema = "1.7.5" 9 | val scalaCollectionCompat = "2.13.0" 10 | val apacheParquet = "1.16.0" 11 | val apacheHadoop = "3.4.2" 12 | } 13 | 14 | object O { 15 | val apacheParquet = "org.apache.parquet" 16 | val apacheHadoop = "org.apache.hadoop" 17 | val zio = "dev.zio" 18 | val scalaLang = "org.scala-lang" 19 | val scalaLangModules = s"$scalaLang.modules" 20 | } 21 | 22 | lazy val zio = O.zio %% "zio" % V.zio 23 | lazy val zioSchema = O.zio %% "zio-schema" % V.zioSchema 24 | lazy val zioSchemaDerivation = O.zio %% "zio-schema-derivation" % V.zioSchema 25 | lazy val zioTest = O.zio %% "zio-test" % V.zio 26 | lazy val zioTestSbt = O.zio %% "zio-test-sbt" % V.zio 27 | 28 | lazy val parquetHadoop = O.apacheParquet % "parquet-hadoop" % V.apacheParquet 29 | lazy val parquetColumn = O.apacheParquet % "parquet-column" % V.apacheParquet 30 | 31 | lazy val hadoopCommon = O.apacheHadoop % "hadoop-common" % V.apacheHadoop 32 | lazy val hadoopMapred = O.apacheHadoop % "hadoop-mapred" % "0.22.0" 33 | 34 | lazy val scalaCollectionCompat = O.scalaLangModules %% "scala-collection-compat" % V.scalaCollectionCompat 35 | 36 | lazy val scalaReflect = Def.setting("org.scala-lang" % "scala-reflect" % scalaVersion.value % "provided") 37 | 38 | lazy val core = Seq( 39 | zio, 40 | zioSchema, 41 | zioSchemaDerivation, 42 | scalaCollectionCompat, 43 | parquetHadoop, 44 | parquetColumn, 45 | zioTest % Test, 46 | zioTestSbt % Test 47 | ) 48 | 49 | lazy val hadoop = Seq( 50 | hadoopCommon, 51 | hadoopMapred, 52 | zioTest % Test, 53 | zioTestSbt % Test 54 | ) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-3/me/mnedokushev/zio/apache/parquet/core/filter/internal/SanitizeOptionalsMacro.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.filter.{ CompiledPredicate, Predicate } 4 | import org.apache.parquet.filter2.predicate.FilterPredicate 5 | 6 | import scala.quoted.* 7 | 8 | object SanitizeOptionalsMacro { 9 | 10 | // TODO: tests 11 | def sanitizeImpl[A: Type](predicate: Expr[Predicate[A]])(using Quotes): Expr[CompiledPredicate] = { 12 | import quotes.reflect.* 13 | 14 | // Example of a type representation of A type: 15 | // AndType( 16 | // AndType( 17 | // TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "scala")), "Predef"), "String"), 18 | // AppliedType( 19 | // TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "")), "scala"), "Option"), 20 | // List( 21 | // TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "")), "scala"), "Int") 22 | // ) 23 | // ) 24 | // ), 25 | // TypeRef(TermRef(ThisType(TypeRef(NoPrefix(), "")), "scala"), "Int") 26 | // ) 27 | // TODO: rewrite using limited stack for safety 28 | def containsOptionalValue(tpe: TypeRepr): Boolean = 29 | tpe match { 30 | case AndType(a, b) => 31 | containsOptionalValue(a) || containsOptionalValue(b) 32 | case AppliedType(tpe, _) => 33 | containsOptionalValue(tpe) 34 | case TypeRef(_, name) => 35 | List("Option", "Some", "None").contains(name) 36 | } 37 | 38 | if (containsOptionalValue(TypeRepr.of[A])) 39 | report.errorAndAbort( 40 | s""" 41 | | The use of optional columns in filter predicate is prohibited. Please, use .nullable: 42 | | column.nullable > 3 43 | | Predicate tree: ${predicate.show} 44 | """.stripMargin 45 | ) 46 | else 47 | '{ _root_.me.mnedokushev.zio.apache.parquet.core.filter.Predicate.compile0($predicate) } 48 | 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Column.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | trait Column[A] { self => 4 | 5 | type Identity 6 | 7 | val path: String 8 | val typeTag: TypeTag[A] 9 | 10 | // TODO: overcome the limitation of scala macros for having a better API 11 | // I found out the compiler throws an error that macro is not found as 12 | // the macro itself depends on Column. The only option is to move the definition 13 | // of "concat" outside the Column class. 14 | // def /[B](child: Column[B]): Column[B] = 15 | // ColumnPathConcatMacro.concatImpl[A, B] 16 | 17 | def >(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] = 18 | Predicate.Binary(self, value, Operator.Binary.GreaterThen()) 19 | 20 | def <(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] = 21 | Predicate.Binary(self, value, Operator.Binary.LessThen()) 22 | 23 | def >=(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] = 24 | Predicate.Binary(self, value, Operator.Binary.GreaterEq()) 25 | 26 | def <=(value: A)(implicit ev: OperatorSupport.LtGt[A]): Predicate[A] = 27 | Predicate.Binary(self, value, Operator.Binary.LessEq()) 28 | 29 | def ===(value: A)(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] = 30 | Predicate.Binary(self, value, Operator.Binary.Eq()) 31 | 32 | def =!=(value: A)(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] = 33 | Predicate.Binary(self, value, Operator.Binary.NotEq()) 34 | 35 | def in(values: Set[A])(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] = 36 | Predicate.BinarySet(self, values, Operator.Binary.Set.In()) 37 | 38 | def notIn(values: Set[A])(implicit ev: OperatorSupport.EqNotEq[A]): Predicate[A] = 39 | Predicate.BinarySet(self, values, Operator.Binary.Set.NotIn()) 40 | 41 | } 42 | 43 | object Column { 44 | 45 | final case class Named[A: TypeTag, Identity0](path: String) extends Column[A] { 46 | override type Identity = Identity0 47 | override val typeTag: TypeTag[A] = implicitly[TypeTag[A]] 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /modules/core/src/main/scala-2.13/me/mnedokushev/zio/apache/parquet/core/filter/internal/SanitizeOptionalsMacro.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter.internal 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.filter.Predicate 4 | 5 | import scala.reflect.macros.blackbox 6 | 7 | class SanitizeOptionalsMacro(val c: blackbox.Context) extends MacroUtils(c) { 8 | import c.universe._ 9 | 10 | def sanitizeImpl[A](predicate: Expr[Predicate[A]])(ptt: c.WeakTypeTag[A]): Tree = { 11 | 12 | // Example of a tree for A type: 13 | // RefinedType( 14 | // List( 15 | // RefinedType( 16 | // List( 17 | // TypeRef( 18 | // ThisType(java.lang), 19 | // java.lang.String, 20 | // List() 21 | // ), 22 | // TypeRef( 23 | // ThisType(scala), 24 | // scala.Option, 25 | // List( 26 | // TypeRef( 27 | // ThisType(scala), 28 | // scala.Int, 29 | // List() 30 | // ) 31 | // ) 32 | // ) 33 | // ), 34 | // Scope() 35 | // ), 36 | // TypeRef(ThisType(scala), scala.Int, List()) 37 | // ), 38 | // Scope() 39 | // ) 40 | // TODO: rewrite using limited stack for safety 41 | def containsOptionalValue(tpe: Type): Boolean = 42 | tpe match { 43 | case RefinedType(tpes, _) => 44 | tpes.exists(containsOptionalValue) 45 | case TypeRef(_, sym, _) => 46 | List("scala.Option", "scala.Some", "scala.None").contains(sym.fullName) 47 | case _ => 48 | false 49 | } 50 | 51 | if (containsOptionalValue(ptt.tpe)) 52 | c.abort( 53 | c.enclosingPosition, 54 | s""" 55 | | The use of optional columns in filter predicate is prohibited. Please, use .nullable: 56 | | column.nullable > 3 57 | | Predicate: ${predicate.tree} 58 | """.stripMargin 59 | ) 60 | else 61 | q"_root_.me.mnedokushev.zio.apache.parquet.core.filter.Predicate.compile0($predicate)" 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Operator.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | sealed trait Operator 4 | 5 | object Operator { 6 | 7 | sealed trait Binary[A] extends Operator { 8 | def operatorSupport: OperatorSupport[A] 9 | } 10 | 11 | object Binary { 12 | final case class Eq[A: OperatorSupport.EqNotEq]() extends Binary[A] { 13 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]] 14 | } 15 | final case class NotEq[A: OperatorSupport.EqNotEq]() extends Binary[A] { 16 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]] 17 | } 18 | final case class LessThen[A: OperatorSupport.LtGt]() extends Binary[A] { 19 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]] 20 | } 21 | final case class LessEq[A: OperatorSupport.LtGt]() extends Binary[A] { 22 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]] 23 | } 24 | final case class GreaterThen[A: OperatorSupport.LtGt]() extends Binary[A] { 25 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]] 26 | } 27 | final case class GreaterEq[A: OperatorSupport.LtGt]() extends Binary[A] { 28 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.LtGt[A]] 29 | } 30 | 31 | sealed trait Set[A] extends Binary[A] 32 | 33 | object Set { 34 | 35 | final case class In[A: OperatorSupport.EqNotEq]() extends Set[A] { 36 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]] 37 | } 38 | final case class NotIn[A: OperatorSupport.EqNotEq]() extends Set[A] { 39 | override def operatorSupport: OperatorSupport[A] = implicitly[OperatorSupport.EqNotEq[A]] 40 | } 41 | 42 | } 43 | 44 | } 45 | 46 | sealed trait Unary[A] extends Operator 47 | 48 | object Unary { 49 | final case class Not[A]() extends Unary[A] 50 | } 51 | 52 | sealed trait Logical[A, B] extends Operator 53 | 54 | object Logical { 55 | final case class And[A, B]() extends Logical[A, B] 56 | final case class Or[A, B]() extends Logical[A, B] 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /modules/hadoop/src/test/scala/me/mnedokushev/zio/apache/parquet/hadoop/ValueConverterSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.codec.{ 4 | SchemaEncoder, 5 | SchemaEncoderDeriver, 6 | ValueDecoder, 7 | ValueDecoderDeriver 8 | } 9 | import me.mnedokushev.zio.apache.parquet.hadoop.ValueConverterSpec.FoodProductName.ProductName 10 | import zio.schema.{ Derive, DeriveSchema, Schema } 11 | import zio.test.{ Spec, TestEnvironment, ZIOSpecDefault, _ } 12 | import zio.{ Scope, _ } 13 | 14 | object ValueConverterSpec extends ZIOSpecDefault { 15 | 16 | val dataPath = 17 | Path(getClass.getResource("/food.parquet").toURI) 18 | 19 | final case class FoodProductName(product_name: Option[List[Option[ProductName]]]) 20 | 21 | object FoodProductName { 22 | 23 | final case class ProductName( 24 | lang: Option[String], 25 | text: Option[String] 26 | ) 27 | 28 | implicit val schema: Schema[FoodProductName] = 29 | DeriveSchema.gen[FoodProductName] 30 | implicit val schemaEncoder: SchemaEncoder[FoodProductName] = 31 | Derive.derive[SchemaEncoder, FoodProductName](SchemaEncoderDeriver.default) 32 | implicit val valueDecoder: ValueDecoder[FoodProductName] = 33 | Derive.derive[ValueDecoder, FoodProductName](ValueDecoderDeriver.default) 34 | } 35 | 36 | final case class FoodBrandTags(brands_tags: Option[List[Option[String]]]) 37 | 38 | object FoodBrandTags { 39 | implicit val schema: Schema[FoodBrandTags] = 40 | DeriveSchema.gen[FoodBrandTags] 41 | implicit val schemaEncoder: SchemaEncoder[FoodBrandTags] = 42 | Derive.derive[SchemaEncoder, FoodBrandTags](SchemaEncoderDeriver.default) 43 | implicit val valueDecoder: ValueDecoder[FoodBrandTags] = 44 | Derive.derive[ValueDecoder, FoodBrandTags](ValueDecoderDeriver.default) 45 | } 46 | 47 | // TODO: add more test cases 48 | override def spec: Spec[TestEnvironment & Scope, Any] = 49 | suite("ValueConvertedSpec")( 50 | test("read list of records") { 51 | for { 52 | reader <- ZIO.service[ParquetReader[FoodProductName]] 53 | result <- reader.readChunk(dataPath) 54 | } yield assertTrue(result.size == 10) 55 | }.provide(ParquetReader.projected[FoodProductName]()), 56 | test("read list of strings") { 57 | for { 58 | reader <- ZIO.service[ParquetReader[FoodBrandTags]] 59 | result <- reader.readChunk(dataPath) 60 | } yield assertTrue(result.size == 10) 61 | }.provide(ParquetReader.projected[FoodBrandTags]()) 62 | ) 63 | 64 | } 65 | -------------------------------------------------------------------------------- /.github/workflows/clean.yml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by sbt-github-actions using the 2 | # githubWorkflowGenerate task. You should add and commit this file to 3 | # your git repository. It goes without saying that you shouldn't edit 4 | # this file by hand! Instead, if you wish to make changes, you should 5 | # change your sbt build configuration to revise the workflow description 6 | # to meet your needs, then regenerate this file. 7 | 8 | name: Clean 9 | 10 | on: push 11 | 12 | permissions: 13 | actions: write 14 | 15 | jobs: 16 | delete-artifacts: 17 | name: Delete Artifacts 18 | runs-on: ubuntu-latest 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | steps: 22 | - name: Delete artifacts 23 | shell: bash {0} 24 | run: | 25 | # Customize those three lines with your repository and credentials: 26 | REPO=${GITHUB_API_URL}/repos/${{ github.repository }} 27 | 28 | # A shortcut to call GitHub API. 29 | ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; } 30 | 31 | # A temporary file which receives HTTP response headers. 32 | TMPFILE=$(mktemp) 33 | 34 | # An associative array, key: artifact name, value: number of artifacts of that name. 35 | declare -A ARTCOUNT 36 | 37 | # Process all artifacts on this repository, loop on returned "pages". 38 | URL=$REPO/actions/artifacts 39 | while [[ -n "$URL" ]]; do 40 | 41 | # Get current page, get response headers in a temporary file. 42 | JSON=$(ghapi --dump-header $TMPFILE "$URL") 43 | 44 | # Get URL of next page. Will be empty if we are at the last page. 45 | URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*.*//') 46 | rm -f $TMPFILE 47 | 48 | # Number of artifacts on this page: 49 | COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') )) 50 | 51 | # Loop on all artifacts on this page. 52 | for ((i=0; $i < $COUNT; i++)); do 53 | 54 | # Get name of artifact and count instances of this name. 55 | name=$(jq <<<$JSON -r ".artifacts[$i].name?") 56 | ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1)) 57 | 58 | id=$(jq <<<$JSON -r ".artifacts[$i].id?") 59 | size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") )) 60 | printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size 61 | ghapi -X DELETE $REPO/actions/artifacts/$id 62 | done 63 | done 64 | -------------------------------------------------------------------------------- /docs/scala-cli/Filtering.scala: -------------------------------------------------------------------------------- 1 | //> using scala "3.7.4" 2 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.3 3 | 4 | import zio.* 5 | import zio.schema.* 6 | import me.mnedokushev.zio.apache.parquet.core.codec.* 7 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path } 8 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax.* 9 | import me.mnedokushev.zio.apache.parquet.core.filter.* 10 | 11 | import java.nio.file.Files 12 | 13 | object Filtering extends ZIOAppDefault: 14 | 15 | case class MyRecord(a: Int, b: String, c: Option[Long]) 16 | 17 | object MyRecord: 18 | // We need to provide field names using singleton types 19 | given Schema.CaseClass3.WithFields["a", "b", "c", Int, String, Option[Long], MyRecord] = 20 | DeriveSchema.gen[MyRecord] 21 | given SchemaEncoder[MyRecord] = 22 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default) 23 | given ValueEncoder[MyRecord] = 24 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default) 25 | given ValueDecoder[MyRecord] = 26 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default) 27 | given TypeTag[MyRecord] = 28 | Derive.derive[TypeTag, MyRecord](TypeTagDeriver.default) 29 | 30 | // Define accessors to use them later in the filter predicate. 31 | // You can give any names to the accessors as we demonstrate here. 32 | val (id, name, age) = Filter[MyRecord].columns 33 | 34 | val data = 35 | Chunk( 36 | MyRecord(1, "bob", Some(10L)), 37 | MyRecord(2, "bob", Some(12L)), 38 | MyRecord(3, "alice", Some(13L)), 39 | MyRecord(4, "john", None) 40 | ) 41 | 42 | val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet" 43 | 44 | override def run = 45 | ( 46 | for { 47 | writer <- ZIO.service[ParquetWriter[MyRecord]] 48 | reader <- ZIO.service[ParquetReader[MyRecord]] 49 | _ <- writer.writeChunk(recordsFile, data) 50 | fromFile <- reader.readChunkFiltered( 51 | recordsFile, 52 | filter( 53 | MyRecord.id > 1 `and` ( 54 | MyRecord.name =!= "bob" `or` 55 | // Use .nullable syntax for optional fields. 56 | MyRecord.age.nullable > 10L 57 | ) 58 | ) 59 | ) 60 | _ <- Console.printLine(fromFile) 61 | } yield () 62 | ).provide( 63 | ParquetWriter.configured[MyRecord](), 64 | ParquetReader.configured[MyRecord]() 65 | ) 66 | // Outputs: 67 | // Chunk(MyRecord(2,bob,Some(12)),MyRecord(3,alice,Some(13)),MyRecord(4,john,None)) 68 | -------------------------------------------------------------------------------- /modules/hadoop/src/test/scala/me/mnedokushev/zio/apache/parquet/hadoop/ParquetIOSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Fixtures._ 4 | import me.mnedokushev.zio.apache.parquet.core.filter.Filter 5 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax._ 6 | import zio._ 7 | import zio.stream._ 8 | import zio.test.Assertion._ 9 | import zio.test.TestAspect._ 10 | import zio.test._ 11 | 12 | import java.nio.file.Files 13 | 14 | object ParquetIOSpec extends ZIOSpecDefault { 15 | 16 | val tmpDir = Path(Files.createTempDirectory("zio-apache-parquet")) 17 | val tmpFile = "parquet-writer-spec.parquet" 18 | val tmpCrcPath = tmpDir / ".parquet-writer-spec.parquet.crc" 19 | val tmpPath = tmpDir / tmpFile 20 | 21 | override def spec: Spec[TestEnvironment & Scope, Any] = 22 | suite("ParquetIOSpec")( 23 | test("write and read - chunk") { 24 | val payload = Chunk( 25 | MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)), 26 | MyRecordIO(2, "bar", Some(3L), List.empty, Map("third" -> 3)) 27 | ) 28 | 29 | for { 30 | writer <- ZIO.service[ParquetWriter[MyRecordIO]] 31 | reader <- ZIO.service[ParquetReader[MyRecordIO]] 32 | _ <- writer.writeChunk(tmpPath, payload) 33 | result <- reader.readChunk(tmpPath) 34 | } yield assertTrue(result == payload) 35 | } @@ after(cleanTmpFile(tmpDir)), 36 | test("write and read - stream") { 37 | val payload = Chunk( 38 | MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)), 39 | MyRecordIO(2, "bar", Some(3L), List.empty, Map("third" -> 3)) 40 | ) 41 | 42 | for { 43 | writer <- ZIO.service[ParquetWriter[MyRecordIO]] 44 | reader <- ZIO.service[ParquetReader[MyRecordIO]] 45 | _ <- writer.writeStream(tmpPath, ZStream.fromChunk(payload)) 46 | resultStream <- ZIO.scoped[Any](reader.readStream(tmpPath).runCollect) 47 | } yield assertTrue(resultStream == payload) 48 | } @@ after(cleanTmpFile(tmpDir)), 49 | test("write full and read projected") { 50 | val payload = Chunk( 51 | MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)), 52 | MyRecordIO(2, "bar", Some(3L), List.empty, Map("third" -> 3)) 53 | ) 54 | val projectedPayload = payload.map { r => 55 | MyProjectedRecordIO(r.a, r.c, r.d, r.e) 56 | } 57 | 58 | for { 59 | writer <- ZIO.service[ParquetWriter[MyRecordIO]] 60 | reader <- ZIO.service[ParquetReader[MyProjectedRecordIO]] 61 | _ <- writer.writeChunk(tmpPath, payload) 62 | result <- reader.readChunk(tmpPath) 63 | } yield assertTrue(result == projectedPayload) 64 | } @@ after(cleanTmpFile(tmpDir)), 65 | test("write and read with filter") { 66 | val payload = Chunk( 67 | MyRecordIO(1, "foo", None, List(1, 2), Map("first" -> 1, "second" -> 2)), 68 | MyRecordIO(2, "foo", None, List(1, 2), Map.empty), 69 | MyRecordIO(3, "bar", Some(3L), List.empty, Map("third" -> 3)), 70 | MyRecordIO(4, "baz", None, List.empty, Map("fourth" -> 3)) 71 | ) 72 | val (id, name, _, _, _) = Filter[MyRecordIO].columns 73 | 74 | for { 75 | writer <- ZIO.service[ParquetWriter[MyRecordIO]] 76 | reader <- ZIO.service[ParquetReader[MyRecordIO]] 77 | _ <- writer.writeChunk(tmpPath, payload) 78 | result <- reader.readChunkFiltered(tmpPath, filter(id > 1 `and` name =!= "foo")) 79 | } yield assertTrue(result.size == 2) && assert(result)(equalTo(payload.drop(2))) 80 | } @@ after(cleanTmpFile(tmpDir)) 81 | ).provide( 82 | ParquetWriter.configured[MyRecordIO](), 83 | ParquetReader.configured[MyRecordIO](), 84 | ParquetReader.projected[MyProjectedRecordIO]() 85 | ) @@ sequential 86 | 87 | private def cleanTmpFile(path: Path) = 88 | for { 89 | _ <- ZIO.attemptBlockingIO(Files.delete(tmpCrcPath.toJava)) 90 | _ <- ZIO.attemptBlockingIO(Files.delete(tmpPath.toJava)) 91 | _ <- ZIO.attemptBlockingIO(Files.delete(path.toJava)) 92 | } yield () 93 | 94 | } 95 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTagDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import zio.Chunk 4 | import zio.schema.{ Deriver, Schema, StandardType } 5 | 6 | object TypeTagDeriver { 7 | 8 | val default: Deriver[TypeTag] = new Deriver[TypeTag] { 9 | 10 | override def deriveRecord[A]( 11 | record: Schema.Record[A], 12 | fields: => Chunk[Deriver.WrappedF[TypeTag, ?]], 13 | summoned: => Option[TypeTag[A]] 14 | ): TypeTag[A] = 15 | TypeTag.Record( 16 | record.fields 17 | .map(_.name.toString) 18 | .zip(fields.map(_.unwrap)) 19 | .toMap 20 | ) 21 | 22 | override def deriveEnum[A]( 23 | `enum`: Schema.Enum[A], 24 | cases: => Chunk[Deriver.WrappedF[TypeTag, ?]], 25 | summoned: => Option[TypeTag[A]] 26 | ): TypeTag[A] = { 27 | val casesMap = `enum`.cases.map { case0 => 28 | case0.schema.asInstanceOf[Schema.CaseClass0[A]].defaultConstruct() -> case0.id 29 | }.toMap 30 | 31 | TypeTag.enum0(casesMap) 32 | } 33 | 34 | override def derivePrimitive[A]( 35 | st: StandardType[A], 36 | summoned: => Option[TypeTag[A]] 37 | ): TypeTag[A] = 38 | st match { 39 | case StandardType.StringType => TypeTag.string 40 | case StandardType.BoolType => TypeTag.boolean 41 | case StandardType.ByteType => TypeTag.byte 42 | case StandardType.ShortType => TypeTag.short 43 | case StandardType.IntType => TypeTag.int 44 | case StandardType.LongType => TypeTag.long 45 | case StandardType.FloatType => TypeTag.float 46 | case StandardType.DoubleType => TypeTag.double 47 | case StandardType.BinaryType => TypeTag.binary 48 | case StandardType.CharType => TypeTag.char 49 | case StandardType.UUIDType => TypeTag.uuid 50 | case StandardType.CurrencyType => TypeTag.currency 51 | case StandardType.BigDecimalType => TypeTag.bigDecimal 52 | case StandardType.BigIntegerType => TypeTag.bigInteger 53 | case StandardType.DayOfWeekType => TypeTag.dayOfWeek 54 | case StandardType.MonthType => TypeTag.month 55 | case StandardType.MonthDayType => TypeTag.monthDay 56 | case StandardType.PeriodType => TypeTag.period 57 | case StandardType.YearType => TypeTag.year 58 | case StandardType.YearMonthType => TypeTag.yearMonth 59 | case StandardType.ZoneIdType => TypeTag.zoneId 60 | case StandardType.ZoneOffsetType => TypeTag.zoneOffset 61 | case StandardType.DurationType => TypeTag.duration 62 | case StandardType.InstantType => TypeTag.instant 63 | case StandardType.LocalDateType => TypeTag.localDate 64 | case StandardType.LocalTimeType => TypeTag.localTime 65 | case StandardType.LocalDateTimeType => TypeTag.localDateTime 66 | case StandardType.OffsetTimeType => TypeTag.offsetTime 67 | case StandardType.OffsetDateTimeType => TypeTag.offsetDateTime 68 | case StandardType.ZonedDateTimeType => TypeTag.zonedDateTime 69 | case StandardType.UnitType => TypeTag.dummy[A] 70 | } 71 | 72 | override def deriveOption[A]( 73 | option: Schema.Optional[A], 74 | inner: => TypeTag[A], 75 | summoned: => Option[TypeTag[Option[A]]] 76 | ): TypeTag[Option[A]] = 77 | TypeTag.optional[A](using inner) 78 | 79 | override def deriveSequence[C[_], A]( 80 | sequence: Schema.Sequence[C[A], A, ?], 81 | inner: => TypeTag[A], 82 | summoned: => Option[TypeTag[C[A]]] 83 | ): TypeTag[C[A]] = 84 | TypeTag.dummy[C[A]] 85 | 86 | override def deriveMap[K, V]( 87 | map: Schema.Map[K, V], 88 | key: => TypeTag[K], 89 | value: => TypeTag[V], 90 | summoned: => Option[TypeTag[Map[K, V]]] 91 | ): TypeTag[Map[K, V]] = 92 | TypeTag.dummy[Map[K, V]] 93 | 94 | override def deriveTransformedRecord[A, B]( 95 | record: Schema.Record[A], 96 | transform: Schema.Transform[A, B, ?], 97 | fields: => Chunk[Deriver.WrappedF[TypeTag, ?]], 98 | summoned: => Option[TypeTag[B]] 99 | ): TypeTag[B] = 100 | TypeTag.dummy[B] 101 | 102 | }.cached 103 | 104 | val summoned: Deriver[TypeTag] = default.autoAcceptSummoned 105 | 106 | } 107 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/Predicate.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import org.apache.parquet.filter2.predicate.{ FilterApi, FilterPredicate, Operators } 4 | import zio.prelude._ 5 | 6 | sealed trait Predicate[A] { self => 7 | 8 | def and[B](other: Predicate[B]): Predicate[A & B] = 9 | Predicate.Logical[A, B](self, other, Operator.Logical.And[A, B]()) 10 | 11 | def or[B](other: Predicate[B]): Predicate[A & B] = 12 | Predicate.Logical[A, B](self, other, Operator.Logical.Or[A, B]()) 13 | 14 | } 15 | 16 | object Predicate { 17 | 18 | private[filter] trait Syntax { 19 | def not[A](pred: Predicate[A]) = 20 | Predicate.Unary(pred, Operator.Unary.Not[A]()) 21 | } 22 | 23 | final case class Binary[A](column: Column[A], value: A, op: Operator.Binary[A]) extends Predicate[A] 24 | 25 | final case class BinarySet[A](column: Column[A], values: Set[A], op: Operator.Binary.Set[A]) extends Predicate[A] 26 | 27 | final case class Unary[A](predicate: Predicate[A], op: Operator.Unary[A]) extends Predicate[A] 28 | 29 | final case class Logical[A, B](left: Predicate[A], right: Predicate[B], op: Operator.Logical[A, B]) 30 | extends Predicate[A & B] 31 | 32 | private[zio] def compile0[A](predicate: Predicate[A]): Either[String, FilterPredicate] = { 33 | 34 | def error(op: Operator) = 35 | Left(s"Operator $op is not supported by $predicate") 36 | 37 | def binarySet[T <: Comparable[T], C <: Operators.Column[T] & Operators.SupportsEqNotEq]( 38 | column: C, 39 | values: java.util.Set[T], 40 | op: Operator.Binary.Set[?] 41 | ) = 42 | op match { 43 | case Operator.Binary.Set.In() => 44 | Right(FilterApi.in(column, values)) 45 | case Operator.Binary.Set.NotIn() => 46 | Right(FilterApi.notIn(column, values)) 47 | } 48 | 49 | predicate match { 50 | case Predicate.Unary(predicate0, op) => 51 | op match { 52 | case Operator.Unary.Not() => 53 | compile0(predicate0).map(FilterApi.not) 54 | } 55 | case Predicate.Logical(left, right, op) => 56 | (compile0(left) <*> compile0(right)).map { case (left0, right0) => 57 | op match { 58 | case Operator.Logical.And() => 59 | FilterApi.and(left0, right0) 60 | case Operator.Logical.Or() => 61 | FilterApi.or(left0, right0) 62 | } 63 | } 64 | case Predicate.Binary(column, value, op) => 65 | column.typeTag match { 66 | case typeTag: TypeTag.EqNotEq[_] => 67 | val typeTag0 = typeTag.cast[A] 68 | val column0 = typeTag0.column(column.path) 69 | val value0 = typeTag0.value(value) 70 | 71 | op match { 72 | case Operator.Binary.Eq() => 73 | Right(FilterApi.eq(column0, value0)) 74 | case Operator.Binary.NotEq() => 75 | Right(FilterApi.notEq(column0, value0)) 76 | case _ => 77 | error(op) 78 | } 79 | case typeTag: TypeTag.LtGt[_] => 80 | val typeTag0 = typeTag.cast[A] 81 | val column0 = typeTag0.column(column.path) 82 | val value0 = typeTag0.value(value) 83 | 84 | op match { 85 | case Operator.Binary.Eq() => 86 | Right(FilterApi.eq(column0, value0)) 87 | case Operator.Binary.NotEq() => 88 | Right(FilterApi.notEq(column0, value0)) 89 | case Operator.Binary.LessThen() => 90 | Right(FilterApi.lt(column0, value0)) 91 | case Operator.Binary.LessEq() => 92 | Right(FilterApi.ltEq(column0, value0)) 93 | case Operator.Binary.GreaterThen() => 94 | Right(FilterApi.gt(column0, value0)) 95 | case Operator.Binary.GreaterEq() => 96 | Right(FilterApi.gtEq(column0, value0)) 97 | case _ => 98 | error(op) 99 | } 100 | case _ => 101 | error(op) 102 | } 103 | case Predicate.BinarySet(column, values, op) => 104 | column.typeTag match { 105 | case typeTag: TypeTag.EqNotEq[_] => 106 | val typeTag0 = typeTag.cast[A] 107 | val column0 = typeTag0.column(column.path) 108 | val values0 = typeTag0.values(values) 109 | 110 | binarySet(column0, values0, op) 111 | case typeTag: TypeTag.LtGt[_] => 112 | val typeTag0 = typeTag.cast[A] 113 | val column0 = typeTag0.column(column.path) 114 | val values0 = typeTag0.values(values) 115 | 116 | binarySet(column0, values0, op) 117 | case _ => 118 | error(op) 119 | } 120 | } 121 | 122 | } 123 | 124 | } 125 | -------------------------------------------------------------------------------- /modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/ParquetWriter.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Schemas 4 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue 5 | import me.mnedokushev.zio.apache.parquet.core.codec.{ SchemaEncoder, ValueEncoder } 6 | import org.apache.hadoop.conf.Configuration 7 | import org.apache.parquet.hadoop.api.{ WriteSupport => HadoopWriteSupport } 8 | import org.apache.parquet.hadoop.metadata.CompressionCodecName 9 | import org.apache.parquet.hadoop.{ ParquetFileWriter, ParquetWriter => HadoopParquetWriter } 10 | import org.apache.parquet.io.OutputFile 11 | import org.apache.parquet.schema.MessageType 12 | import zio._ 13 | import zio.schema.Schema 14 | import zio.stream._ 15 | 16 | trait ParquetWriter[-A <: Product] { 17 | 18 | def writeChunk(path: Path, data: Chunk[A]): Task[Unit] 19 | 20 | def writeStream[R](path: Path, data: ZStream[R, Throwable, A]): RIO[R, Unit] 21 | 22 | } 23 | 24 | final class ParquetWriterLive[A <: Product]( 25 | writeMode: ParquetFileWriter.Mode, 26 | compressionCodecName: CompressionCodecName, 27 | dictionaryEncodingEnabled: Boolean, 28 | dictionaryPageSize: Int, 29 | maxPaddingSize: Int, 30 | pageSize: Int, 31 | rowGroupSize: Long, 32 | validationEnabled: Boolean, 33 | hadoopConf: Configuration 34 | )(implicit schema: Schema[A], schemaEncoder: SchemaEncoder[A], encoder: ValueEncoder[A], tag: Tag[A]) 35 | extends ParquetWriter[A] { 36 | 37 | override def writeChunk(path: Path, data: Chunk[A]): Task[Unit] = 38 | ZIO.scoped[Any]( 39 | for { 40 | writer <- build(path) 41 | _ <- ZIO.foreachDiscard(data)(writeSingle(writer, _)) 42 | } yield () 43 | ) 44 | 45 | override def writeStream[R](path: Path, stream: ZStream[R, Throwable, A]): RIO[R, Unit] = 46 | ZIO.scoped[R]( 47 | for { 48 | writer <- build(path) 49 | _ <- stream.runForeach(writeSingle(writer, _)) 50 | } yield () 51 | ) 52 | 53 | private def writeSingle(writer: HadoopParquetWriter[RecordValue], value: A): Task[Unit] = 54 | for { 55 | record <- encoder.encodeZIO(value) 56 | _ <- ZIO.attemptBlockingIO(writer.write(record.asInstanceOf[RecordValue])) 57 | } yield () 58 | 59 | private def build(path: Path): RIO[Scope, HadoopParquetWriter[RecordValue]] = 60 | for { 61 | schema <- schemaEncoder.encodeZIO(schema, tag.tag.shortName, optional = false) 62 | messageSchema <- ZIO.attempt(Schemas.asMessageType(schema)) 63 | outputFile <- path.toOutputFileZIO(hadoopConf) 64 | builder = new ParquetWriter.Builder(outputFile, messageSchema) 65 | .withWriteMode(writeMode) 66 | .withCompressionCodec(compressionCodecName) 67 | .withDictionaryEncoding(dictionaryEncodingEnabled) 68 | .withDictionaryPageSize(dictionaryPageSize) 69 | .withMaxPaddingSize(maxPaddingSize) 70 | .withPageSize(pageSize) 71 | .withRowGroupSize(rowGroupSize) 72 | .withValidation(validationEnabled) 73 | .withConf(hadoopConf) 74 | writer <- ZIO.fromAutoCloseable(ZIO.attemptBlockingIO(builder.build())) 75 | } yield writer 76 | 77 | } 78 | 79 | object ParquetWriter { 80 | 81 | final class Builder(file: OutputFile, schema: MessageType) 82 | extends HadoopParquetWriter.Builder[RecordValue, Builder](file) { 83 | 84 | override def self(): Builder = this 85 | 86 | override def getWriteSupport(conf: Configuration): HadoopWriteSupport[RecordValue] = 87 | new WriteSupport(schema, Map.empty) 88 | 89 | } 90 | 91 | def configured[A <: Product: ValueEncoder]( 92 | writeMode: ParquetFileWriter.Mode = ParquetFileWriter.Mode.CREATE, 93 | compressionCodecName: CompressionCodecName = HadoopParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME, 94 | dictionaryEncodingEnabled: Boolean = HadoopParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, 95 | dictionaryPageSize: Int = HadoopParquetWriter.DEFAULT_PAGE_SIZE, 96 | maxPaddingSize: Int = HadoopParquetWriter.MAX_PADDING_SIZE_DEFAULT, 97 | pageSize: Int = HadoopParquetWriter.DEFAULT_PAGE_SIZE, 98 | rowGroupSize: Long = HadoopParquetWriter.DEFAULT_BLOCK_SIZE, 99 | validationEnabled: Boolean = HadoopParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, 100 | hadoopConf: Configuration = new Configuration() 101 | )(implicit 102 | schema: Schema[A], 103 | schemaEncoder: SchemaEncoder[A], 104 | tag: Tag[A] 105 | ): TaskLayer[ParquetWriter[A]] = 106 | ZLayer.succeed( 107 | new ParquetWriterLive[A]( 108 | writeMode, 109 | compressionCodecName, 110 | dictionaryEncodingEnabled, 111 | dictionaryPageSize, 112 | maxPaddingSize, 113 | pageSize, 114 | rowGroupSize, 115 | validationEnabled, 116 | hadoopConf 117 | ) 118 | ) 119 | 120 | } 121 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/OperatorSupport.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import zio.{ Chunk, Duration } 4 | 5 | import java.time.{ 6 | DayOfWeek, 7 | Instant, 8 | LocalDate, 9 | LocalDateTime, 10 | LocalTime, 11 | Month, 12 | MonthDay, 13 | OffsetDateTime, 14 | OffsetTime, 15 | Period, 16 | Year, 17 | YearMonth, 18 | ZoneId, 19 | ZoneOffset, 20 | ZonedDateTime 21 | } 22 | import java.util.UUID 23 | import scala.annotation.implicitNotFound 24 | 25 | sealed trait OperatorSupport[A] 26 | 27 | object OperatorSupport { 28 | 29 | trait Optional[A, S[_] <: OperatorSupport[?]] { 30 | val operatorSupport: S[A] 31 | } 32 | 33 | @implicitNotFound("You can't use this operator for the type ${A}") 34 | abstract class LtGt[A: TypeTag] extends OperatorSupport[A] { 35 | val typeTag: TypeTag[A] = implicitly[TypeTag[A]] 36 | } 37 | 38 | object LtGt { 39 | 40 | implicit def optional[A: TypeTag: LtGt]: LtGt[Option[A]] = 41 | new LtGt[Option[A]] with Optional[A, LtGt] { 42 | override val operatorSupport: LtGt[A] = implicitly[LtGt[A]] 43 | } 44 | 45 | implicit case object byte extends LtGt[Byte] 46 | implicit case object short extends LtGt[Short] 47 | implicit case object int extends LtGt[Int] 48 | implicit case object long extends LtGt[Long] 49 | implicit case object float extends LtGt[Float] 50 | implicit case object double extends LtGt[Double] 51 | implicit case object bigDecimal extends LtGt[java.math.BigDecimal] 52 | implicit case object bigInteger extends LtGt[java.math.BigInteger] 53 | implicit case object dayOfWeek extends LtGt[DayOfWeek] 54 | implicit case object month extends LtGt[Month] 55 | implicit case object monthDay extends LtGt[MonthDay] 56 | implicit case object period extends LtGt[Period] 57 | implicit case object year extends LtGt[Year] 58 | implicit case object yearMonth extends LtGt[YearMonth] 59 | implicit case object duration extends LtGt[Duration] 60 | implicit case object instant extends LtGt[Instant] 61 | implicit case object localDate extends LtGt[LocalDate] 62 | implicit case object localTime extends LtGt[LocalTime] 63 | implicit case object localDateTime extends LtGt[LocalDateTime] 64 | implicit case object offsetTime extends LtGt[OffsetTime] 65 | implicit case object offsetDateTime extends LtGt[OffsetDateTime] 66 | implicit case object zonedDateTime extends LtGt[ZonedDateTime] 67 | 68 | } 69 | 70 | @implicitNotFound("You can't use this operator for the type ${A}") 71 | abstract class EqNotEq[A: TypeTag] extends OperatorSupport[A] { 72 | val typeTag: TypeTag[A] = implicitly[TypeTag[A]] 73 | } 74 | 75 | object EqNotEq { 76 | 77 | implicit def enum0[A: TypeTag]: EqNotEq[A] = new EqNotEq[A] {} 78 | 79 | implicit def optional[A: TypeTag: EqNotEq]: EqNotEq[Option[A]] = 80 | new EqNotEq[Option[A]] with Optional[A, EqNotEq] { 81 | override val operatorSupport: EqNotEq[A] = implicitly[EqNotEq[A]] 82 | } 83 | 84 | implicit case object string extends EqNotEq[String] 85 | implicit case object boolean extends EqNotEq[Boolean] 86 | implicit case object byte extends EqNotEq[Byte] 87 | implicit case object short extends EqNotEq[Short] 88 | implicit case object int extends EqNotEq[Int] 89 | implicit case object long extends EqNotEq[Long] 90 | implicit case object float extends EqNotEq[Float] 91 | implicit case object double extends EqNotEq[Double] 92 | implicit case object binary extends EqNotEq[Chunk[Byte]] 93 | implicit case object char extends EqNotEq[Char] 94 | implicit case object uuid extends EqNotEq[UUID] 95 | implicit case object currency extends EqNotEq[java.util.Currency] 96 | implicit case object bigDecimal extends EqNotEq[java.math.BigDecimal] 97 | implicit case object bigInteger extends EqNotEq[java.math.BigInteger] 98 | implicit case object dayOfWeek extends EqNotEq[DayOfWeek] 99 | implicit case object month extends EqNotEq[Month] 100 | implicit case object monthDay extends EqNotEq[MonthDay] 101 | implicit case object period extends EqNotEq[Period] 102 | implicit case object year extends EqNotEq[Year] 103 | implicit case object yearMonth extends EqNotEq[YearMonth] 104 | implicit case object zoneId extends EqNotEq[ZoneId] 105 | implicit case object zoneOffset extends EqNotEq[ZoneOffset] 106 | implicit case object duration extends EqNotEq[Duration] 107 | implicit case object instant extends EqNotEq[Instant] 108 | implicit case object localDate extends EqNotEq[LocalDate] 109 | implicit case object localTime extends EqNotEq[LocalTime] 110 | implicit case object localDateTime extends EqNotEq[LocalDateTime] 111 | implicit case object offsetTime extends EqNotEq[OffsetTime] 112 | implicit case object offsetDateTime extends EqNotEq[OffsetDateTime] 113 | implicit case object zonedDateTime extends EqNotEq[ZonedDateTime] 114 | 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/ParquetReader.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value.GroupValue.RecordValue 4 | import me.mnedokushev.zio.apache.parquet.core.codec.{ SchemaEncoder, ValueDecoder } 5 | import me.mnedokushev.zio.apache.parquet.core.filter.CompiledPredicate 6 | import org.apache.hadoop.conf.Configuration 7 | import org.apache.parquet.filter2.compat.FilterCompat 8 | import org.apache.parquet.hadoop.api.{ ReadSupport => HadoopReadSupport } 9 | import org.apache.parquet.hadoop.{ ParquetReader => HadoopParquetReader } 10 | import org.apache.parquet.io.InputFile 11 | import zio._ 12 | import zio.schema.Schema 13 | import zio.stream._ 14 | 15 | import java.io.IOException 16 | 17 | trait ParquetReader[+A <: Product] { 18 | 19 | def readStream(path: Path): ZStream[Scope, Throwable, A] 20 | 21 | def readStreamFiltered(path: Path, filter: CompiledPredicate): ZStream[Scope, Throwable, A] 22 | 23 | def readChunk[B](path: Path): Task[Chunk[A]] 24 | 25 | def readChunkFiltered[B](path: Path, filter: CompiledPredicate): Task[Chunk[A]] 26 | 27 | } 28 | 29 | final class ParquetReaderLive[A <: Product: Tag]( 30 | hadoopConf: Configuration, 31 | schema: Option[Schema[A]] = None, 32 | schemaEncoder: Option[SchemaEncoder[A]] = None 33 | )(implicit decoder: ValueDecoder[A]) 34 | extends ParquetReader[A] { 35 | 36 | override def readStream(path: Path): ZStream[Scope, Throwable, A] = 37 | for { 38 | reader <- ZStream.fromZIO(build(path, None)) 39 | value <- readStream0(reader) 40 | } yield value 41 | 42 | override def readStreamFiltered(path: Path, filter: CompiledPredicate): ZStream[Scope, Throwable, A] = 43 | for { 44 | reader <- ZStream.fromZIO(build(path, Some(filter))) 45 | value <- readStream0(reader) 46 | } yield value 47 | 48 | override def readChunk[B](path: Path): Task[Chunk[A]] = 49 | ZIO.scoped( 50 | for { 51 | reader <- build(path, None) 52 | result <- readChunk0(reader) 53 | } yield result 54 | ) 55 | 56 | override def readChunkFiltered[B](path: Path, filter: CompiledPredicate): Task[Chunk[A]] = 57 | ZIO.scoped( 58 | for { 59 | reader <- build(path, Some(filter)) 60 | result <- readChunk0(reader) 61 | } yield result 62 | ) 63 | 64 | private def readStream0(reader: HadoopParquetReader[RecordValue]): ZStream[Any, Throwable, A] = 65 | ZStream.repeatZIOOption( 66 | ZIO 67 | .attemptBlockingIO(reader.read()) 68 | .asSomeError 69 | .filterOrFail(_ != null)(None) 70 | .flatMap(decoder.decodeZIO(_).asSomeError) 71 | ) 72 | 73 | private def readChunk0[B](reader: HadoopParquetReader[RecordValue]): Task[Chunk[A]] = { 74 | val readNext = for { 75 | value <- ZIO.attemptBlockingIO(reader.read()) 76 | record <- if (value != null) 77 | decoder.decodeZIO(value) 78 | else 79 | ZIO.succeed(null.asInstanceOf[A]) 80 | } yield record 81 | val builder = Chunk.newBuilder[A] 82 | 83 | ZIO.scoped( 84 | for { 85 | initial <- readNext 86 | _ <- { 87 | var current = initial 88 | 89 | ZIO.whileLoop(current != null)(readNext) { next => 90 | builder.addOne(current) 91 | current = next 92 | } 93 | } 94 | } yield builder.result() 95 | ) 96 | } 97 | 98 | private def build[B]( 99 | path: Path, 100 | filter: Option[CompiledPredicate] 101 | ): ZIO[Scope, IOException, HadoopParquetReader[RecordValue]] = 102 | for { 103 | inputFile <- path.toInputFileZIO(hadoopConf) 104 | compiledFilter <- ZIO.foreach(filter) { pred => 105 | ZIO 106 | .fromEither(pred) 107 | .mapError(new IOException(_)) 108 | } 109 | reader <- ZIO.fromAutoCloseable( 110 | ZIO.attemptBlockingIO { 111 | val builder = new ParquetReader.Builder(inputFile, schema, schemaEncoder) 112 | 113 | compiledFilter.foreach(pred => builder.withFilter(FilterCompat.get(pred))) 114 | builder.withConf(hadoopConf).build() 115 | } 116 | ) 117 | } yield reader 118 | 119 | } 120 | 121 | object ParquetReader { 122 | 123 | final class Builder[A: Tag]( 124 | file: InputFile, 125 | schema: Option[Schema[A]] = None, 126 | schemaEncoder: Option[SchemaEncoder[A]] = None 127 | ) extends HadoopParquetReader.Builder[RecordValue](file) { 128 | 129 | override protected def getReadSupport: HadoopReadSupport[RecordValue] = 130 | new ReadSupport(schema, schemaEncoder) 131 | 132 | } 133 | 134 | def configured[A <: Product: ValueDecoder: Tag]( 135 | hadoopConf: Configuration = new Configuration() 136 | ): ULayer[ParquetReader[A]] = 137 | ZLayer.succeed(new ParquetReaderLive[A](hadoopConf)) 138 | 139 | def projected[A <: Product: ValueDecoder: Tag]( 140 | hadoopConf: Configuration = new Configuration() 141 | )(implicit 142 | schema: Schema[A], 143 | schemaEncoder: SchemaEncoder[A] 144 | ): ULayer[ParquetReader[A]] = 145 | ZLayer.succeed(new ParquetReaderLive[A](hadoopConf, Some(schema), Some(schemaEncoder))) 146 | 147 | } 148 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/Schemas.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core 2 | 3 | import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName 4 | import org.apache.parquet.schema.Type.Repetition 5 | import org.apache.parquet.schema._ 6 | import zio.Chunk 7 | 8 | object Schemas { 9 | 10 | abstract class Def[Self <: Def[?]] { 11 | 12 | def named(name: String): Type 13 | 14 | def optionality(condition: Boolean): Self = 15 | if (condition) optional else required 16 | 17 | def required: Self 18 | 19 | def optional: Self 20 | 21 | } 22 | 23 | case class PrimitiveDef( 24 | typeName: PrimitiveTypeName, 25 | annotation: Option[LogicalTypeAnnotation] = None, 26 | isOptional: Boolean = false, 27 | length: Int = 0 28 | ) extends Def[PrimitiveDef] { 29 | 30 | def named(name: String): Type = { 31 | val builder = Types.primitive(typeName, repetition(isOptional)) 32 | 33 | annotation 34 | .fold(builder)(builder.as) 35 | .length(length) 36 | .named(name) 37 | } 38 | 39 | def length(len: Int): PrimitiveDef = 40 | this.copy(length = len) 41 | 42 | def required: PrimitiveDef = 43 | this.copy(isOptional = false) 44 | 45 | def optional: PrimitiveDef = 46 | this.copy(isOptional = true) 47 | 48 | } 49 | 50 | case class RecordDef(fields: Chunk[Type], isOptional: Boolean = false) extends Def[RecordDef] { 51 | 52 | def named(name: String): Type = { 53 | val builder = Types.buildGroup(repetition(isOptional)) 54 | 55 | fields.foreach(builder.addField) 56 | builder.named(name) 57 | } 58 | 59 | def required: RecordDef = 60 | this.copy(isOptional = false) 61 | 62 | def optional: RecordDef = 63 | this.copy(isOptional = true) 64 | 65 | } 66 | 67 | case class ListDef( 68 | element: Type, 69 | isOptional: Boolean = false 70 | ) extends Def[ListDef] { 71 | 72 | def named(name: String): Type = 73 | Types 74 | .list(repetition(isOptional)) 75 | .element(element) 76 | .named(name) 77 | 78 | def required: ListDef = 79 | this.copy(isOptional = false) 80 | 81 | def optional: ListDef = 82 | this.copy(isOptional = true) 83 | 84 | } 85 | 86 | case class MapDef(key: Type, value: Type, isOptional: Boolean = false) extends Def[MapDef] { 87 | 88 | override def named(name: String): Type = 89 | Types 90 | .map(repetition(isOptional)) 91 | .key(key) 92 | .value(value) 93 | .named(name) 94 | 95 | override def required: MapDef = 96 | this.copy(isOptional = false) 97 | 98 | override def optional: MapDef = 99 | this.copy(isOptional = true) 100 | 101 | } 102 | 103 | def repetition(optional: Boolean): Repetition = 104 | if (optional) Repetition.OPTIONAL else Repetition.REQUIRED 105 | 106 | def asMessageType(schema: Type): MessageType = { 107 | val groupSchema = schema.asGroupType() 108 | val name = groupSchema.getName 109 | val fields = groupSchema.getFields 110 | 111 | new MessageType(name, fields) 112 | } 113 | 114 | import PrimitiveTypeName._ 115 | import LogicalTypeAnnotation._ 116 | 117 | // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md 118 | def enum0: PrimitiveDef = PrimitiveDef(BINARY, Some(enumType())) 119 | val string: PrimitiveDef = PrimitiveDef(BINARY, Some(stringType())) 120 | val boolean: PrimitiveDef = PrimitiveDef(BOOLEAN) 121 | val byte: PrimitiveDef = PrimitiveDef(INT32, Some(intType(8, false))) 122 | val short: PrimitiveDef = PrimitiveDef(INT32, Some(intType(16, true))) 123 | val int: PrimitiveDef = PrimitiveDef(INT32, Some(intType(32, true))) 124 | val long: PrimitiveDef = PrimitiveDef(INT64, Some(intType(64, true))) 125 | val float: PrimitiveDef = PrimitiveDef(FLOAT) 126 | val double: PrimitiveDef = PrimitiveDef(DOUBLE) 127 | val binary: PrimitiveDef = PrimitiveDef(BINARY) 128 | val char: PrimitiveDef = byte 129 | val uuid: PrimitiveDef = PrimitiveDef(FIXED_LEN_BYTE_ARRAY, Some(uuidType())).length(16) 130 | val currency: PrimitiveDef = string 131 | val bigDecimal: PrimitiveDef = PrimitiveDef(INT64, Some(decimalType(DECIMAL_PRECISION, DECIMAL_SCALE))) 132 | val bigInteger: PrimitiveDef = PrimitiveDef(BINARY) 133 | val dayOfWeek: PrimitiveDef = byte 134 | val monthType: PrimitiveDef = byte 135 | val monthDay: PrimitiveDef = PrimitiveDef(FIXED_LEN_BYTE_ARRAY).length(2) 136 | val period: PrimitiveDef = PrimitiveDef(FIXED_LEN_BYTE_ARRAY).length(12) 137 | val year: PrimitiveDef = PrimitiveDef(INT32, Some(intType(16, false))) 138 | val yearMonth: PrimitiveDef = PrimitiveDef(FIXED_LEN_BYTE_ARRAY).length(4) 139 | val zoneId: PrimitiveDef = string 140 | val zoneOffset: PrimitiveDef = string 141 | val duration: PrimitiveDef = PrimitiveDef(INT64, Some(intType(64, false))) 142 | val instant: PrimitiveDef = PrimitiveDef(INT64, Some(intType(64, false))) 143 | val localDate: PrimitiveDef = PrimitiveDef(INT32, Some(dateType())) 144 | val localTime: PrimitiveDef = PrimitiveDef(INT32, Some(timeType(true, TimeUnit.MILLIS))) 145 | val localDateTime: PrimitiveDef = PrimitiveDef(INT64, Some(timestampType(true, TimeUnit.MILLIS))) 146 | val offsetTime: PrimitiveDef = PrimitiveDef(INT32, Some(timeType(false, TimeUnit.MILLIS))) 147 | val offsetDateTime: PrimitiveDef = PrimitiveDef(INT64, Some(timestampType(false, TimeUnit.MILLIS))) 148 | val zonedDateTime: PrimitiveDef = offsetDateTime 149 | 150 | def record(fields: Chunk[Type]): RecordDef = RecordDef(fields) 151 | def list(element: Type): ListDef = ListDef(element) 152 | def map(key: Type, value: Type): MapDef = MapDef(key, value) 153 | 154 | } 155 | -------------------------------------------------------------------------------- /modules/hadoop/src/main/scala/me/mnedokushev/zio/apache/parquet/hadoop/GroupValueConverter.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.hadoop 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value 4 | import me.mnedokushev.zio.apache.parquet.core.Value.{ GroupValue, PrimitiveValue } 5 | import org.apache.parquet.io.api.{ Binary, Converter, GroupConverter, PrimitiveConverter } 6 | import org.apache.parquet.schema.Type.Repetition 7 | import org.apache.parquet.schema.{ GroupType, LogicalTypeAnnotation } 8 | import zio.Chunk 9 | 10 | import scala.jdk.CollectionConverters._ 11 | 12 | trait GroupValueConverter[V <: GroupValue[V]] extends GroupConverter { 13 | 14 | def get: V = 15 | this.groupValue 16 | 17 | def put(name: String, value: Value): Unit 18 | 19 | protected var groupValue: V = 20 | null.asInstanceOf[V] 21 | 22 | protected val converters: Chunk[Converter] 23 | 24 | override def getConverter(fieldIndex: Int): Converter = 25 | converters(fieldIndex) 26 | 27 | } 28 | 29 | object GroupValueConverter { 30 | 31 | abstract case class Default[V <: GroupValue[V]](schema: GroupType) extends GroupValueConverter[V] { 32 | 33 | override def put(name: String, value: Value): Unit = 34 | this.groupValue = this.groupValue.put(name, value) 35 | 36 | override protected val converters: Chunk[Converter] = 37 | Chunk.fromIterable( 38 | schema.getFields.asScala.toList.map { schema0 => 39 | val name = schema0.getName 40 | 41 | schema0.getLogicalTypeAnnotation match { 42 | case _ if schema0.isPrimitive => 43 | GroupValueConverter.primitive(name, this) 44 | case _: LogicalTypeAnnotation.ListLogicalTypeAnnotation => 45 | GroupValueConverter.list(schema0.asGroupType(), name, this) 46 | case _: LogicalTypeAnnotation.MapLogicalTypeAnnotation => 47 | GroupValueConverter.map(schema0.asGroupType(), name, this) 48 | case _ => 49 | (name, schema0.getRepetition) match { 50 | case ("list", Repetition.REPEATED) => 51 | GroupValueConverter.listElement(schema0.asGroupType(), this) 52 | case ("key_value", Repetition.REPEATED) => 53 | GroupValueConverter.mapKeyValue(schema0.asGroupType(), name, this) 54 | case _ => 55 | GroupValueConverter.record(schema0.asGroupType(), name, this) 56 | } 57 | } 58 | } 59 | ) 60 | 61 | } 62 | 63 | abstract case class ByPass[V <: GroupValue[V], S <: GroupValue[S]]( 64 | schema: GroupType, 65 | toSelf: GroupValueConverter[S] 66 | ) extends GroupValueConverter[V] { 67 | 68 | override def put(name: String, value: Value): Unit = 69 | toSelf.groupValue = toSelf.groupValue.put(name, value) 70 | 71 | override protected val converters: Chunk[Converter] = 72 | Chunk.fromIterable( 73 | schema.getFields.asScala.toList.map { schema0 => 74 | val name = schema0.getName 75 | 76 | schema0.getLogicalTypeAnnotation match { 77 | case _ if schema0.isPrimitive => 78 | GroupValueConverter.primitive(name, toSelf) 79 | case _: LogicalTypeAnnotation.ListLogicalTypeAnnotation => 80 | GroupValueConverter.list(schema0.asGroupType(), name, this) 81 | case _: LogicalTypeAnnotation.MapLogicalTypeAnnotation => 82 | GroupValueConverter.map(schema0.asGroupType(), name, this) 83 | case _ => 84 | (name, schema0.getRepetition) match { 85 | case ("list", Repetition.REPEATED) => 86 | GroupValueConverter.listElement(schema0.asGroupType(), this) 87 | case ("key_value", Repetition.REPEATED) => 88 | GroupValueConverter.mapKeyValue(schema0.asGroupType(), name, this) 89 | case _ => 90 | GroupValueConverter.record(schema0.asGroupType(), name, this) 91 | } 92 | } 93 | } 94 | ) 95 | 96 | } 97 | 98 | def primitive[V <: GroupValue[V]](name: String, parent: GroupValueConverter[V]): PrimitiveConverter = 99 | new PrimitiveConverter { 100 | 101 | override def addBinary(value: Binary): Unit = 102 | parent.put(name, PrimitiveValue.BinaryValue(value)) 103 | 104 | override def addBoolean(value: Boolean): Unit = 105 | parent.put(name, PrimitiveValue.BooleanValue(value)) 106 | 107 | override def addDouble(value: Double): Unit = 108 | parent.put(name, PrimitiveValue.DoubleValue(value)) 109 | 110 | override def addFloat(value: Float): Unit = 111 | parent.put(name, PrimitiveValue.FloatValue(value)) 112 | 113 | override def addInt(value: Int): Unit = 114 | parent.put(name, PrimitiveValue.Int32Value(value)) 115 | 116 | override def addLong(value: Long): Unit = 117 | parent.put(name, PrimitiveValue.Int64Value(value)) 118 | 119 | } 120 | 121 | def record[V <: GroupValue[V]]( 122 | schema: GroupType, 123 | name: String, 124 | parent: GroupValueConverter[V] 125 | ): GroupValueConverter[GroupValue.RecordValue] = 126 | new Default[GroupValue.RecordValue](schema) { 127 | 128 | override def start(): Unit = 129 | this.groupValue = Value.record( 130 | this.schema.getFields.asScala.toList.map(_.getName -> Value.nil).toMap 131 | ) 132 | 133 | override def end(): Unit = 134 | parent.put(name, this.groupValue) 135 | 136 | } 137 | 138 | def list[V <: GroupValue[V]]( 139 | schema: GroupType, 140 | name: String, 141 | parent: GroupValueConverter[V] 142 | ): GroupValueConverter[GroupValue.ListValue] = 143 | new Default[GroupValue.ListValue](schema) { 144 | 145 | override def start(): Unit = 146 | this.groupValue = Value.list(Chunk.empty) 147 | 148 | override def end(): Unit = 149 | parent.put(name, this.groupValue) 150 | } 151 | 152 | def listElement[V <: GroupValue[V], S <: GroupValue[S]]( 153 | schema: GroupType, 154 | parent: GroupValueConverter[S] 155 | ): GroupValueConverter[GroupValue.RecordValue] = 156 | new ByPass[GroupValue.RecordValue, S](schema, parent) { 157 | 158 | override def start(): Unit = () 159 | 160 | override def end(): Unit = () 161 | 162 | } 163 | 164 | def map[V <: GroupValue[V]]( 165 | schema: GroupType, 166 | name: String, 167 | parent: GroupValueConverter[V] 168 | ): GroupValueConverter[GroupValue.MapValue] = 169 | new Default[GroupValue.MapValue](schema) { 170 | 171 | override def start(): Unit = 172 | this.groupValue = Value.map(Map.empty) 173 | 174 | override def end(): Unit = 175 | parent.put(name, this.groupValue) 176 | } 177 | 178 | def mapKeyValue[V <: GroupValue[V]]( 179 | schema: GroupType, 180 | name: String, 181 | parent: GroupValueConverter[V] 182 | ): GroupValueConverter[GroupValue.RecordValue] = 183 | new Default[GroupValue.RecordValue](schema) { 184 | 185 | override def start(): Unit = 186 | this.groupValue = Value.record(Map("key" -> Value.nil, "value" -> Value.nil)) 187 | 188 | override def end(): Unit = 189 | parent.put(name, this.groupValue) 190 | 191 | } 192 | 193 | def root(schema: GroupType): GroupValueConverter[GroupValue.RecordValue] = 194 | new Default[GroupValue.RecordValue](schema) { 195 | 196 | override def start(): Unit = 197 | this.groupValue = Value.record( 198 | this.schema.getFields.asScala.toList.map(_.getName -> Value.nil).toMap 199 | ) 200 | 201 | override def end(): Unit = () 202 | } 203 | 204 | } 205 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Schemas 4 | import me.mnedokushev.zio.apache.parquet.core.Schemas.PrimitiveDef 5 | import org.apache.parquet.schema.Type 6 | import zio.Chunk 7 | import zio.schema.{ Deriver, Schema, StandardType } 8 | 9 | object SchemaEncoderDeriver { 10 | 11 | val default: Deriver[SchemaEncoder] = new Deriver[SchemaEncoder] { 12 | 13 | override def deriveRecord[A]( 14 | record: Schema.Record[A], 15 | fields: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]], 16 | summoned: => Option[SchemaEncoder[A]] 17 | ): SchemaEncoder[A] = new SchemaEncoder[A] { 18 | 19 | private def enc[A1](name0: String, schema0: Schema[A1], encoder: SchemaEncoder[?]) = 20 | encoder.asInstanceOf[SchemaEncoder[A1]].encode(schema0, name0, isSchemaOptional(schema0)) 21 | 22 | override def encode(schema: Schema[A], name: String, optional: Boolean): Type = { 23 | val fieldTypes = record.fields.zip(fields.map(_.unwrap)).map { case (field, encoder) => 24 | enc(field.name, field.schema, encoder) 25 | } 26 | 27 | Schemas.record(fieldTypes).optionality(optional).named(name) 28 | } 29 | } 30 | 31 | override def deriveEnum[A]( 32 | `enum`: Schema.Enum[A], 33 | cases: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]], 34 | summoned: => Option[SchemaEncoder[A]] 35 | ): SchemaEncoder[A] = new SchemaEncoder[A] { 36 | override def encode(schema: Schema[A], name: String, optional: Boolean): Type = 37 | Schemas.enum0.optionality(optional).named(name) 38 | } 39 | 40 | override def derivePrimitive[A]( 41 | st: StandardType[A], 42 | summoned: => Option[SchemaEncoder[A]] 43 | ): SchemaEncoder[A] = 44 | new SchemaEncoder[A] { 45 | override def encode(schema: Schema[A], name: String, optional: Boolean): Type = { 46 | def tpe(prim: PrimitiveDef) = 47 | prim.optionality(optional).named(name) 48 | 49 | st match { 50 | case StandardType.StringType => 51 | tpe(Schemas.string) 52 | case StandardType.BoolType => 53 | tpe(Schemas.boolean) 54 | case StandardType.ByteType => 55 | tpe(Schemas.byte) 56 | case StandardType.ShortType => 57 | tpe(Schemas.short) 58 | case StandardType.IntType => 59 | tpe(Schemas.int) 60 | case StandardType.LongType => 61 | tpe(Schemas.long) 62 | case StandardType.FloatType => 63 | tpe(Schemas.float) 64 | case StandardType.DoubleType => 65 | tpe(Schemas.double) 66 | case StandardType.BinaryType => 67 | tpe(Schemas.binary) 68 | case StandardType.CharType => 69 | tpe(Schemas.char) 70 | case StandardType.UUIDType => 71 | tpe(Schemas.uuid) 72 | case StandardType.CurrencyType => 73 | tpe(Schemas.currency) 74 | case StandardType.BigDecimalType => 75 | tpe(Schemas.bigDecimal) 76 | case StandardType.BigIntegerType => 77 | tpe(Schemas.bigInteger) 78 | case StandardType.DayOfWeekType => 79 | tpe(Schemas.dayOfWeek) 80 | case StandardType.MonthType => 81 | tpe(Schemas.monthType) 82 | case StandardType.MonthDayType => 83 | tpe(Schemas.monthDay) 84 | case StandardType.PeriodType => 85 | tpe(Schemas.period) 86 | case StandardType.YearType => 87 | tpe(Schemas.year) 88 | case StandardType.YearMonthType => 89 | tpe(Schemas.yearMonth) 90 | case StandardType.ZoneIdType => 91 | tpe(Schemas.zoneId) 92 | case StandardType.ZoneOffsetType => 93 | tpe(Schemas.zoneOffset) 94 | case StandardType.DurationType => 95 | tpe(Schemas.duration) 96 | case StandardType.InstantType => 97 | tpe(Schemas.instant) 98 | case StandardType.LocalDateType => 99 | tpe(Schemas.localDate) 100 | case StandardType.LocalTimeType => 101 | tpe(Schemas.localTime) 102 | case StandardType.LocalDateTimeType => 103 | tpe(Schemas.localDateTime) 104 | case StandardType.OffsetTimeType => 105 | tpe(Schemas.offsetTime) 106 | case StandardType.OffsetDateTimeType => 107 | tpe(Schemas.offsetDateTime) 108 | case StandardType.ZonedDateTimeType => 109 | tpe(Schemas.zonedDateTime) 110 | case StandardType.UnitType => 111 | throw EncoderError("Unit standard type is unsupported") 112 | } 113 | } 114 | } 115 | 116 | override def deriveOption[A]( 117 | option: Schema.Optional[A], 118 | inner: => SchemaEncoder[A], 119 | summoned: => Option[SchemaEncoder[Option[A]]] 120 | ): SchemaEncoder[Option[A]] = new SchemaEncoder[Option[A]] { 121 | override def encode(schema: Schema[Option[A]], name: String, optional: Boolean): Type = 122 | inner.encode(option.schema, name, optional = true) 123 | } 124 | 125 | override def deriveSequence[C[_], A]( 126 | sequence: Schema.Sequence[C[A], A, ?], 127 | inner: => SchemaEncoder[A], 128 | summoned: => Option[SchemaEncoder[C[A]]] 129 | ): SchemaEncoder[C[A]] = new SchemaEncoder[C[A]] { 130 | override def encode(schema: Schema[C[A]], name: String, optional: Boolean): Type = 131 | Schemas 132 | .list(inner.encode(sequence.elementSchema, "element", isSchemaOptional(sequence.elementSchema))) 133 | .optionality(optional) 134 | .named(name) 135 | } 136 | 137 | override def deriveMap[K, V]( 138 | map: Schema.Map[K, V], 139 | key: => SchemaEncoder[K], 140 | value: => SchemaEncoder[V], 141 | summoned: => Option[SchemaEncoder[Map[K, V]]] 142 | ): SchemaEncoder[Map[K, V]] = new SchemaEncoder[Map[K, V]] { 143 | override def encode(schema: Schema[Map[K, V]], name: String, optional: Boolean): Type = 144 | Schemas 145 | .map( 146 | key.encode(map.keySchema, "key", optional = false), 147 | value.encode(map.valueSchema, "value", optional = isSchemaOptional(map.valueSchema)) 148 | ) 149 | .optionality(optional) 150 | .named(name) 151 | } 152 | 153 | override def deriveTransformedRecord[A, B]( 154 | record: Schema.Record[A], 155 | transform: Schema.Transform[A, B, ?], 156 | fields: => Chunk[Deriver.WrappedF[SchemaEncoder, ?]], 157 | summoned: => Option[SchemaEncoder[B]] 158 | ): SchemaEncoder[B] = summoned.getOrElse { 159 | new SchemaEncoder[B] { 160 | private def enc[A1](name0: String, schema0: Schema[A1], encoder: SchemaEncoder[?]) = 161 | encoder.asInstanceOf[SchemaEncoder[A1]].encode(schema0, name0, isSchemaOptional(schema0)) 162 | 163 | override def encode(schema: Schema[B], name: String, optional: Boolean): Type = { 164 | val fieldTypes = record.fields.zip(fields.map(_.unwrap)).map { case (field, encoder) => 165 | enc(field.name, field.schema, encoder) 166 | } 167 | 168 | Schemas.record(fieldTypes).optionality(optional).named(name) 169 | } 170 | } 171 | } 172 | }.cached 173 | 174 | val summoned: Deriver[SchemaEncoder] = default.autoAcceptSummoned 175 | 176 | private def isSchemaOptional(schema: Schema[?]): Boolean = 177 | schema match { 178 | case _: Schema.Optional[_] => true 179 | case _ => false 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueEncoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value 4 | import zio.Chunk 5 | import zio.schema.{ Deriver, Schema, StandardType } 6 | 7 | import java.math.{ BigDecimal, BigInteger } 8 | import java.time.{ 9 | DayOfWeek, 10 | Duration, 11 | Instant, 12 | LocalDate, 13 | LocalDateTime, 14 | LocalTime, 15 | Month, 16 | MonthDay, 17 | OffsetDateTime, 18 | OffsetTime, 19 | Period, 20 | Year, 21 | YearMonth, 22 | ZoneId, 23 | ZoneOffset, 24 | ZonedDateTime 25 | } 26 | import java.util.{ Currency, UUID } 27 | 28 | object ValueEncoderDeriver { 29 | 30 | val default: Deriver[ValueEncoder] = new Deriver[ValueEncoder] { 31 | 32 | override def deriveRecord[A]( 33 | record: Schema.Record[A], 34 | fields: => Chunk[Deriver.WrappedF[ValueEncoder, ?]], 35 | summoned: => Option[ValueEncoder[A]] 36 | ): ValueEncoder[A] = new ValueEncoder[A] { 37 | 38 | private def enc[A1](v: A, field: Schema.Field[A, A1], encoder: ValueEncoder[?]) = 39 | encoder.asInstanceOf[ValueEncoder[A1]].encode(field.get(v)) 40 | 41 | override def encode(value: A): Value = 42 | Value.record( 43 | record.fields 44 | .zip(fields.map(_.unwrap)) 45 | .map { case (field, encoder) => 46 | field.name -> enc(value, field, encoder) 47 | } 48 | .toMap 49 | ) 50 | } 51 | 52 | override def deriveEnum[A]( 53 | `enum`: Schema.Enum[A], 54 | cases: => Chunk[Deriver.WrappedF[ValueEncoder, ?]], 55 | summoned: => Option[ValueEncoder[A]] 56 | ): ValueEncoder[A] = new ValueEncoder[A] { 57 | override def encode(value: A): Value = { 58 | val casesMap = `enum`.cases.map { case0 => 59 | case0.schema.asInstanceOf[Schema.CaseClass0[A]].defaultConstruct() -> case0.id 60 | }.toMap 61 | 62 | derivePrimitive(StandardType.StringType, summoned = None) 63 | .contramap[A] { case0 => 64 | casesMap.getOrElse(case0, throw EncoderError(s"Failed to encode enum for value $case0")) 65 | } 66 | .encode(value) 67 | } 68 | } 69 | 70 | override def derivePrimitive[A]( 71 | st: StandardType[A], 72 | summoned: => Option[ValueEncoder[A]] 73 | ): ValueEncoder[A] = 74 | new ValueEncoder[A] { 75 | override def encode(value: A): Value = 76 | (st, value) match { 77 | case (StandardType.StringType, v: String) => 78 | Value.string(v) 79 | case (StandardType.BoolType, v: Boolean) => 80 | Value.boolean(v) 81 | case (StandardType.ByteType, v: Byte) => 82 | Value.int(v.toInt) 83 | case (StandardType.ShortType, v: Short) => 84 | Value.short(v) 85 | case (StandardType.IntType, v: Int) => 86 | Value.int(v) 87 | case (StandardType.LongType, v: Long) => 88 | Value.long(v) 89 | case (StandardType.FloatType, v: Float) => 90 | Value.float(v) 91 | case (StandardType.DoubleType, v: Double) => 92 | Value.double(v) 93 | case (StandardType.BinaryType, v: Chunk[_]) => 94 | Value.binary(v.asInstanceOf[Chunk[Byte]]) 95 | case (StandardType.CharType, v: Char) => 96 | Value.char(v) 97 | case (StandardType.UUIDType, v: UUID) => 98 | Value.uuid(v) 99 | case (StandardType.CurrencyType, v: Currency) => 100 | Value.currency(v) 101 | case (StandardType.BigDecimalType, v: BigDecimal) => 102 | Value.bigDecimal(v) 103 | case (StandardType.BigIntegerType, v: BigInteger) => 104 | Value.bigInteger(v) 105 | case (StandardType.DayOfWeekType, v: DayOfWeek) => 106 | Value.dayOfWeek(v) 107 | case (StandardType.MonthType, v: Month) => 108 | Value.month(v) 109 | case (StandardType.MonthDayType, v: MonthDay) => 110 | Value.monthDay(v) 111 | case (StandardType.PeriodType, v: Period) => 112 | Value.period(v) 113 | case (StandardType.YearType, v: Year) => 114 | Value.year(v) 115 | case (StandardType.YearMonthType, v: YearMonth) => 116 | Value.yearMonth(v) 117 | case (StandardType.ZoneIdType, v: ZoneId) => 118 | Value.zoneId(v) 119 | case (StandardType.ZoneOffsetType, v: ZoneOffset) => 120 | Value.zoneOffset(v) 121 | case (StandardType.DurationType, v: Duration) => 122 | Value.duration(v) 123 | case (StandardType.InstantType, v: Instant) => 124 | Value.instant(v) 125 | case (StandardType.LocalDateType, v: LocalDate) => 126 | Value.localDate(v) 127 | case (StandardType.LocalTimeType, v: LocalTime) => 128 | Value.localTime(v) 129 | case (StandardType.LocalDateTimeType, v: LocalDateTime) => 130 | Value.localDateTime(v) 131 | case (StandardType.OffsetTimeType, v: OffsetTime) => 132 | Value.offsetTime(v) 133 | case (StandardType.OffsetDateTimeType, v: OffsetDateTime) => 134 | Value.offsetDateTime(v) 135 | case (StandardType.ZonedDateTimeType, v: ZonedDateTime) => 136 | Value.zonedDateTime(v) 137 | case (other, _) => 138 | throw EncoderError(s"Unsupported ZIO Schema StandardType $other") 139 | } 140 | } 141 | 142 | override def deriveOption[A]( 143 | option: Schema.Optional[A], 144 | inner: => ValueEncoder[A], 145 | summoned: => Option[ValueEncoder[Option[A]]] 146 | ): ValueEncoder[Option[A]] = 147 | new ValueEncoder[Option[A]] { 148 | override def encode(value: Option[A]): Value = 149 | value match { 150 | case Some(v) => inner.encode(v) 151 | case _ => Value.nil 152 | } 153 | } 154 | 155 | override def deriveSequence[C[_], A]( 156 | sequence: Schema.Sequence[C[A], A, ?], 157 | inner: => ValueEncoder[A], 158 | summoned: => Option[ValueEncoder[C[A]]] 159 | ): ValueEncoder[C[A]] = new ValueEncoder[C[A]] { 160 | override def encode(value: C[A]): Value = 161 | Value.list(sequence.toChunk(value).map(inner.encode)) 162 | } 163 | 164 | override def deriveMap[K, V]( 165 | map: Schema.Map[K, V], 166 | key: => ValueEncoder[K], 167 | value: => ValueEncoder[V], 168 | summoned: => Option[ValueEncoder[Map[K, V]]] 169 | ): ValueEncoder[Map[K, V]] = new ValueEncoder[Map[K, V]] { 170 | override def encode(value0: Map[K, V]): Value = 171 | Value.map( 172 | value0.map { case (k, v) => 173 | key.encode(k) -> value.encode(v) 174 | } 175 | ) 176 | } 177 | 178 | override def deriveTransformedRecord[A, B]( 179 | record: Schema.Record[A], 180 | transform: Schema.Transform[A, B, ?], 181 | fields: => Chunk[Deriver.WrappedF[ValueEncoder, ?]], 182 | summoned: => Option[ValueEncoder[B]] 183 | ): ValueEncoder[B] = summoned.getOrElse { 184 | new ValueEncoder[B] { 185 | private def enc[A1](v: A, field: Schema.Field[A, A1], encoder: ValueEncoder[?]) = 186 | encoder.asInstanceOf[ValueEncoder[A1]].encode(field.get(v)) 187 | 188 | override def encode(value: B): Value = 189 | transform.g(value) match { 190 | case Right(v) => 191 | Value.record( 192 | record.fields 193 | .zip(fields.map(_.unwrap)) 194 | .map { case (field, encoder) => 195 | field.name -> enc(v, field, encoder) 196 | } 197 | .toMap 198 | ) 199 | case Left(reason) => 200 | throw EncoderError(s"Failed to encode transformed record for value $value: $reason") 201 | } 202 | } 203 | } 204 | }.cached 205 | 206 | val summoned: Deriver[ValueEncoder] = default.autoAcceptSummoned 207 | 208 | } 209 | -------------------------------------------------------------------------------- /modules/core/src/test/scala-2.13+/me/mnedokushev/zio/apache/parquet/core/Fixtures.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.codec.{ 4 | SchemaEncoder, 5 | SchemaEncoderDeriver, 6 | ValueDecoder, 7 | ValueDecoderDeriver, 8 | ValueEncoder, 9 | ValueEncoderDeriver 10 | } 11 | import me.mnedokushev.zio.apache.parquet.core.filter.{ TypeTag, TypeTagDeriver } 12 | import org.apache.parquet.filter2.predicate.FilterApi 13 | import org.apache.parquet.filter2.predicate.Operators.BinaryColumn 14 | import org.apache.parquet.io.api.Binary 15 | import zio.Chunk 16 | import zio.schema._ 17 | 18 | import java.time._ 19 | import java.util.{ Currency, UUID } 20 | 21 | object Fixtures { 22 | 23 | // unable to generate code for case classes with more than 120 int fields due to following error: 24 | // tested with jdk 11.0.23 64bit 25 | // Error while emitting me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriverSpec$MaxArityRecord$ 26 | // Method too large: me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriverSpec$MaxArityRecord$.derivedSchema0$lzyINIT4$1$$anonfun$364 (Lscala/collection/immutable/ListMap;)Lscala/util/Either; 27 | case class Arity23( 28 | a: Int, 29 | b: Option[String], 30 | c: Int, 31 | d: Int, 32 | e: Int, 33 | f: Int, 34 | g: Int, 35 | h: Int, 36 | i: Int, 37 | j: Int, 38 | k: Int, 39 | l: Int, 40 | m: Int, 41 | n: Int, 42 | o: Int, 43 | p: Int, 44 | q: Int, 45 | r: Int, 46 | s: Int, 47 | t: Int, 48 | u: Int, 49 | v: Int, 50 | w: Int 51 | ) 52 | object Arity23 { 53 | implicit lazy val schema: Schema[Arity23] = 54 | DeriveSchema.gen[Arity23] 55 | } 56 | 57 | case class MyRecord(a: String, b: Int, child: MyRecord.Child, enm: MyRecord.Enum, opt: Option[Int]) 58 | 59 | object MyRecord { 60 | implicit val schema: Schema.CaseClass5.WithFields[ 61 | "a", 62 | "b", 63 | "child", 64 | "enm", 65 | "opt", 66 | String, 67 | Int, 68 | MyRecord.Child, 69 | MyRecord.Enum, 70 | Option[Int], 71 | MyRecord 72 | ] = 73 | DeriveSchema.gen[MyRecord] 74 | implicit val typeTag: TypeTag[MyRecord] = 75 | Derive.derive[TypeTag, MyRecord](TypeTagDeriver.default) 76 | 77 | case class Child(c: Int, d: Option[Long]) 78 | object Child { 79 | implicit val schema: Schema.CaseClass2.WithFields["c", "d", Int, Option[Long], MyRecord.Child] = 80 | DeriveSchema.gen[Child] 81 | implicit val typeTag: TypeTag[Child] = 82 | Derive.derive[TypeTag, Child](TypeTagDeriver.default) 83 | } 84 | 85 | sealed trait Enum 86 | object Enum { 87 | case object Started extends Enum 88 | case object InProgress extends Enum 89 | case object Done extends Enum 90 | 91 | implicit val schema: Schema[Enum] = 92 | DeriveSchema.gen[Enum] 93 | implicit val typeTag: TypeTag[Enum] = 94 | Derive.derive[TypeTag, Enum](TypeTagDeriver.default) 95 | } 96 | } 97 | 98 | case class MyRecordSummoned(a: Int, b: String) 99 | 100 | object MyRecordSummoned { 101 | implicit val schema: zio.schema.Schema.CaseClass2.WithFields["a", "b", Int, String, MyRecordSummoned] = 102 | DeriveSchema.gen[MyRecordSummoned] 103 | 104 | implicit val intTypeTag: TypeTag.EqNotEq[Int] = 105 | TypeTag.eqnoteq[Int, Binary, BinaryColumn]( 106 | FilterApi.binaryColumn, 107 | v => Value.string(v.toString).value 108 | ) 109 | implicit val typeTag: TypeTag[MyRecordSummoned] = Derive.derive[TypeTag, MyRecordSummoned](TypeTagDeriver.summoned) 110 | } 111 | 112 | case class MyRecordIO(a: Int, b: String, c: Option[Long], d: List[Int], e: Map[String, Int]) 113 | object MyRecordIO { 114 | implicit val schema: zio.schema.Schema.CaseClass5.WithFields[ 115 | "a", 116 | "b", 117 | "c", 118 | "d", 119 | "e", 120 | Int, 121 | String, 122 | Option[Long], 123 | List[Int], 124 | Map[String, Int], 125 | MyRecordIO 126 | ] = 127 | DeriveSchema.gen[MyRecordIO] 128 | implicit val schemaEncoder: SchemaEncoder[MyRecordIO] = 129 | Derive.derive[SchemaEncoder, MyRecordIO](SchemaEncoderDeriver.summoned) 130 | implicit val valueEncoder: ValueEncoder[MyRecordIO] = 131 | Derive.derive[ValueEncoder, MyRecordIO](ValueEncoderDeriver.summoned) 132 | implicit val valueDecoder: ValueDecoder[MyRecordIO] = 133 | Derive.derive[ValueDecoder, MyRecordIO](ValueDecoderDeriver.summoned) 134 | implicit val typeTag: TypeTag[MyRecordIO] = 135 | Derive.derive[TypeTag, MyRecordIO](TypeTagDeriver.default) 136 | } 137 | 138 | case class MyProjectedRecordIO(a: Int, c: Option[Long], d: List[Int], e: Map[String, Int]) 139 | object MyProjectedRecordIO { 140 | implicit val schema: zio.schema.Schema.CaseClass4.WithFields[ 141 | "a", 142 | "c", 143 | "d", 144 | "e", 145 | Int, 146 | Option[Long], 147 | List[Int], 148 | Map[String, Int], 149 | MyProjectedRecordIO 150 | ] = 151 | DeriveSchema.gen[MyProjectedRecordIO] 152 | implicit val schemaEncoder: SchemaEncoder[MyProjectedRecordIO] = 153 | Derive.derive[SchemaEncoder, MyProjectedRecordIO](SchemaEncoderDeriver.summoned) 154 | implicit val valueEncoder: ValueEncoder[MyProjectedRecordIO] = 155 | Derive.derive[ValueEncoder, MyProjectedRecordIO](ValueEncoderDeriver.summoned) 156 | implicit val valueDecoder: ValueDecoder[MyProjectedRecordIO] = 157 | Derive.derive[ValueDecoder, MyProjectedRecordIO](ValueDecoderDeriver.summoned) 158 | } 159 | 160 | case class MyRecordAllTypes1( 161 | string: String, 162 | boolean: Boolean, 163 | byte: Byte, 164 | short: Short, 165 | int: Int, 166 | long: Long, 167 | float: Float, 168 | double: Double, 169 | binary: Chunk[Byte], 170 | char: Char, 171 | uuid: UUID, 172 | currency: Currency, 173 | bigDecimal: java.math.BigDecimal, 174 | bigInteger: java.math.BigInteger, 175 | dayOfWeek: DayOfWeek, 176 | month: Month, 177 | monthDay: MonthDay, 178 | period: Period, 179 | year: Year, 180 | yearMonth: YearMonth, 181 | zoneId: ZoneId, 182 | zoneOffset: ZoneOffset 183 | ) 184 | object MyRecordAllTypes1 { 185 | implicit val schema: zio.schema.Schema.CaseClass22.WithFields[ 186 | "string", 187 | "boolean", 188 | "byte", 189 | "short", 190 | "int", 191 | "long", 192 | "float", 193 | "double", 194 | "binary", 195 | "char", 196 | "uuid", 197 | "currency", 198 | "bigDecimal", 199 | "bigInteger", 200 | "dayOfWeek", 201 | "month", 202 | "monthDay", 203 | "period", 204 | "year", 205 | "yearMonth", 206 | "zoneId", 207 | "zoneOffset", 208 | String, 209 | Boolean, 210 | Byte, 211 | Short, 212 | Int, 213 | Long, 214 | Float, 215 | Double, 216 | zio.Chunk[Byte], 217 | Char, 218 | java.util.UUID, 219 | java.util.Currency, 220 | java.math.BigDecimal, 221 | java.math.BigInteger, 222 | java.time.DayOfWeek, 223 | java.time.Month, 224 | java.time.MonthDay, 225 | java.time.Period, 226 | java.time.Year, 227 | java.time.YearMonth, 228 | java.time.ZoneId, 229 | java.time.ZoneOffset, 230 | MyRecordAllTypes1 231 | ] = 232 | DeriveSchema.gen[MyRecordAllTypes1] 233 | implicit val typeTag: TypeTag[MyRecordAllTypes1] = 234 | Derive.derive[TypeTag, MyRecordAllTypes1](TypeTagDeriver.default) 235 | } 236 | case class MyRecordAllTypes2( 237 | duration: Duration, 238 | instant: Instant, 239 | localDate: LocalDate, 240 | localTime: LocalTime, 241 | localDateTime: LocalDateTime, 242 | offsetTime: OffsetTime, 243 | offsetDateTime: OffsetDateTime, 244 | zonedDateTime: ZonedDateTime 245 | ) 246 | object MyRecordAllTypes2 { 247 | implicit val schema: zio.schema.Schema.CaseClass8.WithFields[ 248 | "duration", 249 | "instant", 250 | "localDate", 251 | "localTime", 252 | "localDateTime", 253 | "offsetTime", 254 | "offsetDateTime", 255 | "zonedDateTime", 256 | java.time.Duration, 257 | java.time.Instant, 258 | java.time.LocalDate, 259 | java.time.LocalTime, 260 | java.time.LocalDateTime, 261 | java.time.OffsetTime, 262 | java.time.OffsetDateTime, 263 | java.time.ZonedDateTime, 264 | MyRecordAllTypes2 265 | ] = 266 | DeriveSchema.gen[MyRecordAllTypes2] 267 | implicit val typeTag: TypeTag[MyRecordAllTypes2] = 268 | Derive.derive[TypeTag, MyRecordAllTypes2](TypeTagDeriver.default) 269 | } 270 | 271 | } 272 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/filter/TypeTag.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import _root_.java.time.Instant 4 | import me.mnedokushev.zio.apache.parquet.core.Value 5 | import me.mnedokushev.zio.apache.parquet.core.filter.TypeTag.{ Dummy, EqNotEq, LtGt, Optional, Record } 6 | import org.apache.parquet.filter2.predicate.{ FilterApi, Operators } 7 | import org.apache.parquet.io.api.Binary 8 | import zio.{ Chunk, Duration } 9 | 10 | import java.time.{ 11 | DayOfWeek, 12 | LocalDate, 13 | LocalDateTime, 14 | LocalTime, 15 | Month, 16 | MonthDay, 17 | OffsetDateTime, 18 | OffsetTime, 19 | Period, 20 | Year, 21 | YearMonth, 22 | ZoneId, 23 | ZoneOffset, 24 | ZonedDateTime 25 | } 26 | import java.util.{ Currency, UUID } 27 | import scala.jdk.CollectionConverters._ 28 | 29 | sealed trait TypeTag[+A] { self => 30 | 31 | override def toString: String = 32 | self match { 33 | case _: Dummy[_] => "Dummy[A]" 34 | case _: Optional[_] => "Optional[A]" 35 | case _: Record[_] => "Record[A]" 36 | case _: EqNotEq[_] => "EqNotEq[A]" 37 | case _: LtGt[_] => "LtGt[A]" 38 | } 39 | 40 | } 41 | 42 | object TypeTag { 43 | 44 | trait Dummy[+A] extends TypeTag[A] 45 | 46 | def dummy[A]: TypeTag.Dummy[A] = 47 | new Dummy[A] {} 48 | 49 | final case class Optional[+A: TypeTag]() extends TypeTag[Option[A]] { 50 | val typeTag: TypeTag[A] = implicitly[TypeTag[A]] 51 | } 52 | 53 | implicit def optional[A: TypeTag]: TypeTag[Option[A]] = 54 | Optional[A]() 55 | 56 | final case class Record[+A](columns: Map[String, TypeTag[?]]) extends TypeTag[A] 57 | 58 | trait EqNotEq[A] extends TypeTag[A] { self => 59 | type T <: Comparable[T] 60 | type C <: Operators.Column[T] & Operators.SupportsEqNotEq 61 | 62 | def cast[A0]: EqNotEq[A0] = self.asInstanceOf[EqNotEq[A0]] 63 | 64 | def column(path: String): C 65 | def value(v: A): T 66 | def values(vs: Set[A]): java.util.Set[T] = 67 | vs.map(value).asJava 68 | } 69 | 70 | trait LtGt[A] extends TypeTag[A] { self => 71 | type T <: Comparable[T] 72 | type C <: Operators.Column[T] & Operators.SupportsLtGt 73 | 74 | def cast[A0]: LtGt[A0] = self.asInstanceOf[LtGt[A0]] 75 | 76 | def column(path: String): C 77 | def value(v: A): T 78 | def values(vs: Set[A]): java.util.Set[T] = 79 | vs.map(value).asJava 80 | } 81 | 82 | def eqnoteq[A, T0 <: Comparable[T0], C0 <: Operators.Column[T0] & Operators.SupportsEqNotEq]( 83 | column0: String => C0, 84 | value0: A => T0 85 | ): TypeTag.EqNotEq[A] = 86 | new TypeTag.EqNotEq[A] { 87 | 88 | override type T = T0 89 | 90 | override type C = C0 91 | 92 | override def column(path: String): C = 93 | column0(path) 94 | 95 | override def value(v: A): T = 96 | value0(v) 97 | 98 | } 99 | 100 | def ltgt[A, T0 <: Comparable[T0], C0 <: Operators.Column[T0] & Operators.SupportsLtGt]( 101 | column0: String => C0, 102 | value0: A => T0 103 | ): TypeTag.LtGt[A] = 104 | new TypeTag.LtGt[A] { 105 | 106 | override type T = T0 107 | 108 | override type C = C0 109 | 110 | override def column(path: String): C = 111 | column0(path) 112 | 113 | override def value(v: A): T = 114 | value0(v) 115 | 116 | } 117 | 118 | def enum0[A](casesMap: Map[A, String]): TypeTag.EqNotEq[A] = 119 | eqnoteq[A, Binary, Operators.BinaryColumn]( 120 | FilterApi.binaryColumn, 121 | v => Value.string(casesMap.getOrElse(v, throw FilterError(s"Failed to encode enum for value $v"))).value 122 | ) 123 | 124 | implicit val string: TypeTag.EqNotEq[String] = 125 | eqnoteq[String, Binary, Operators.BinaryColumn]( 126 | FilterApi.binaryColumn, 127 | Value.string(_).value 128 | ) 129 | implicit val boolean: TypeTag.EqNotEq[Boolean] = 130 | eqnoteq[Boolean, java.lang.Boolean, Operators.BooleanColumn]( 131 | FilterApi.booleanColumn, 132 | Value.boolean(_).value 133 | ) 134 | implicit val byte: TypeTag.LtGt[Byte] = 135 | ltgt[Byte, java.lang.Integer, Operators.IntColumn]( 136 | FilterApi.intColumn, 137 | Value.byte(_).value 138 | ) 139 | implicit val short: TypeTag.LtGt[Short] = 140 | ltgt[Short, java.lang.Integer, Operators.IntColumn]( 141 | FilterApi.intColumn, 142 | Value.short(_).value 143 | ) 144 | implicit val int: TypeTag.LtGt[Int] = 145 | ltgt[Int, java.lang.Integer, Operators.IntColumn]( 146 | FilterApi.intColumn, 147 | Value.int(_).value 148 | ) 149 | implicit val long: TypeTag.LtGt[Long] = 150 | ltgt[Long, java.lang.Long, Operators.LongColumn]( 151 | FilterApi.longColumn, 152 | Value.long(_).value 153 | ) 154 | implicit val float: TypeTag.LtGt[Float] = 155 | ltgt[Float, java.lang.Float, Operators.FloatColumn]( 156 | FilterApi.floatColumn, 157 | Value.float(_).value 158 | ) 159 | implicit val double: TypeTag.LtGt[Double] = 160 | ltgt[Double, java.lang.Double, Operators.DoubleColumn]( 161 | FilterApi.doubleColumn, 162 | Value.double(_).value 163 | ) 164 | implicit val binary: TypeTag.EqNotEq[Chunk[Byte]] = 165 | eqnoteq[Chunk[Byte], Binary, Operators.BinaryColumn]( 166 | FilterApi.binaryColumn, 167 | Value.binary(_).value 168 | ) 169 | implicit val char: TypeTag.EqNotEq[Char] = 170 | eqnoteq[Char, java.lang.Integer, Operators.IntColumn]( 171 | FilterApi.intColumn, 172 | Value.char(_).value 173 | ) 174 | implicit val uuid: TypeTag.EqNotEq[UUID] = 175 | eqnoteq[UUID, Binary, Operators.BinaryColumn]( 176 | FilterApi.binaryColumn, 177 | Value.uuid(_).value 178 | ) 179 | implicit val currency: TypeTag.EqNotEq[Currency] = 180 | eqnoteq[Currency, Binary, Operators.BinaryColumn]( 181 | FilterApi.binaryColumn, 182 | Value.currency(_).value 183 | ) 184 | implicit val bigDecimal: TypeTag.LtGt[java.math.BigDecimal] = 185 | ltgt[java.math.BigDecimal, java.lang.Long, Operators.LongColumn]( 186 | FilterApi.longColumn, 187 | Value.bigDecimal(_).value 188 | ) 189 | implicit val bigInteger: TypeTag.LtGt[java.math.BigInteger] = 190 | ltgt[java.math.BigInteger, Binary, Operators.BinaryColumn]( 191 | FilterApi.binaryColumn, 192 | Value.bigInteger(_).value 193 | ) 194 | implicit val dayOfWeek: TypeTag.LtGt[DayOfWeek] = 195 | ltgt[DayOfWeek, java.lang.Integer, Operators.IntColumn]( 196 | FilterApi.intColumn, 197 | Value.dayOfWeek(_).value 198 | ) 199 | implicit val month: TypeTag.LtGt[Month] = 200 | ltgt[Month, java.lang.Integer, Operators.IntColumn]( 201 | FilterApi.intColumn, 202 | Value.month(_).value 203 | ) 204 | implicit val monthDay: TypeTag.LtGt[MonthDay] = 205 | ltgt[MonthDay, Binary, Operators.BinaryColumn]( 206 | FilterApi.binaryColumn, 207 | Value.monthDay(_).value 208 | ) 209 | implicit val period: TypeTag.LtGt[Period] = 210 | ltgt[Period, Binary, Operators.BinaryColumn]( 211 | FilterApi.binaryColumn, 212 | Value.period(_).value 213 | ) 214 | implicit val year: TypeTag.LtGt[Year] = 215 | ltgt[Year, java.lang.Integer, Operators.IntColumn]( 216 | FilterApi.intColumn, 217 | Value.year(_).value 218 | ) 219 | implicit val yearMonth: TypeTag.LtGt[YearMonth] = 220 | ltgt[YearMonth, Binary, Operators.BinaryColumn]( 221 | FilterApi.binaryColumn, 222 | Value.yearMonth(_).value 223 | ) 224 | // NOTE: it is not implicit to make scalac happy since ZoneOffset is a subtype of ZoneId 225 | val zoneId: TypeTag.EqNotEq[ZoneId] = 226 | eqnoteq[ZoneId, Binary, Operators.BinaryColumn]( 227 | FilterApi.binaryColumn, 228 | Value.zoneId(_).value 229 | ) 230 | implicit val zoneOffset: TypeTag.EqNotEq[ZoneOffset] = 231 | eqnoteq[ZoneOffset, Binary, Operators.BinaryColumn]( 232 | FilterApi.binaryColumn, 233 | Value.zoneOffset(_).value 234 | ) 235 | implicit val duration: TypeTag.LtGt[Duration] = 236 | ltgt[Duration, java.lang.Long, Operators.LongColumn]( 237 | FilterApi.longColumn, 238 | Value.duration(_).value 239 | ) 240 | implicit val instant: TypeTag.LtGt[Instant] = 241 | ltgt[Instant, java.lang.Long, Operators.LongColumn]( 242 | FilterApi.longColumn, 243 | Value.instant(_).value 244 | ) 245 | implicit val localDate: TypeTag.LtGt[LocalDate] = 246 | ltgt[LocalDate, java.lang.Integer, Operators.IntColumn]( 247 | FilterApi.intColumn, 248 | Value.localDate(_).value 249 | ) 250 | implicit val localTime: TypeTag.LtGt[LocalTime] = 251 | ltgt[LocalTime, java.lang.Integer, Operators.IntColumn]( 252 | FilterApi.intColumn, 253 | Value.localTime(_).value 254 | ) 255 | implicit val localDateTime: TypeTag.LtGt[LocalDateTime] = 256 | ltgt[LocalDateTime, java.lang.Long, Operators.LongColumn]( 257 | FilterApi.longColumn, 258 | Value.localDateTime(_).value 259 | ) 260 | implicit val offsetTime: TypeTag.LtGt[OffsetTime] = 261 | ltgt[OffsetTime, java.lang.Integer, Operators.IntColumn]( 262 | FilterApi.intColumn, 263 | Value.offsetTime(_).value 264 | ) 265 | implicit val offsetDateTime: TypeTag.LtGt[OffsetDateTime] = 266 | ltgt[OffsetDateTime, java.lang.Long, Operators.LongColumn]( 267 | FilterApi.longColumn, 268 | Value.offsetDateTime(_).value 269 | ) 270 | implicit val zonedDateTime: TypeTag.LtGt[ZonedDateTime] = 271 | ltgt[ZonedDateTime, java.lang.Long, Operators.LongColumn]( 272 | FilterApi.longColumn, 273 | Value.zonedDateTime(_).value 274 | ) 275 | 276 | } 277 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/codec/ValueDecoderDeriver.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Value.{ GroupValue, PrimitiveValue } 4 | import me.mnedokushev.zio.apache.parquet.core.{ DECIMAL_SCALE, MICROS_FACTOR, MILLIS_PER_DAY, Value } 5 | import zio._ 6 | import zio.schema._ 7 | 8 | import java.math.{ BigDecimal, BigInteger } 9 | import java.nio.{ ByteBuffer, ByteOrder } 10 | import java.time.{ 11 | DayOfWeek, 12 | Instant, 13 | LocalDate, 14 | LocalDateTime, 15 | LocalTime, 16 | Month, 17 | MonthDay, 18 | OffsetDateTime, 19 | OffsetTime, 20 | Period, 21 | Year, 22 | YearMonth, 23 | ZoneId, 24 | ZoneOffset, 25 | ZonedDateTime 26 | } 27 | import java.util.{ Currency, UUID } 28 | 29 | object ValueDecoderDeriver { 30 | 31 | val default: Deriver[ValueDecoder] = new Deriver[ValueDecoder] { 32 | 33 | override def deriveRecord[A]( 34 | record: Schema.Record[A], 35 | fields: => Chunk[Deriver.WrappedF[ValueDecoder, ?]], 36 | summoned: => Option[ValueDecoder[A]] 37 | ): ValueDecoder[A] = new ValueDecoder[A] { 38 | override def decode(value: Value): A = 39 | value match { 40 | case GroupValue.RecordValue(values) => 41 | Unsafe.unsafe { implicit unsafe => 42 | record.construct( 43 | Chunk 44 | .fromIterable(record.fields.map(f => values(f.name))) 45 | .zip(fields.map(_.unwrap)) 46 | .map { case (v, decoder) => 47 | decoder.decode(v) 48 | } 49 | ) match { 50 | case Right(v) => 51 | v 52 | case Left(reason) => 53 | throw DecoderError(s"Couldn't decode $value: $reason") 54 | } 55 | } 56 | 57 | case other => 58 | throw DecoderError(s"Couldn't decode $other, it must be of type RecordValue") 59 | } 60 | 61 | } 62 | 63 | override def deriveEnum[A]( 64 | `enum`: Schema.Enum[A], 65 | cases: => Chunk[Deriver.WrappedF[ValueDecoder, ?]], 66 | summoned: => Option[ValueDecoder[A]] 67 | ): ValueDecoder[A] = new ValueDecoder[A] { 68 | override def decode(value: Value): A = { 69 | val casesMap = `enum`.cases.map { case0 => 70 | case0.id -> case0.schema.asInstanceOf[Schema.CaseClass0[A]].defaultConstruct() 71 | }.toMap 72 | 73 | derivePrimitive(StandardType.StringType, summoned = None).map { case0 => 74 | casesMap.getOrElse(case0, throw DecoderError(s"Failed to decode enum for id $case0")) 75 | }.decode(value) 76 | } 77 | } 78 | 79 | override def derivePrimitive[A]( 80 | st: StandardType[A], 81 | summoned: => Option[ValueDecoder[A]] 82 | ): ValueDecoder[A] = new ValueDecoder[A] { 83 | 84 | private def localTime(v: Int) = 85 | LocalTime.ofNanoOfDay(v * MICROS_FACTOR) 86 | 87 | private def localDateTime(v: Long) = { 88 | val epochDay = v / MILLIS_PER_DAY 89 | val nanoOfDay = (v - (epochDay * MILLIS_PER_DAY)) * MICROS_FACTOR 90 | 91 | LocalDateTime.of(LocalDate.ofEpochDay(epochDay), LocalTime.ofNanoOfDay(nanoOfDay)) 92 | } 93 | 94 | override def decode(value: Value): A = 95 | (st, value) match { 96 | case (StandardType.StringType, PrimitiveValue.BinaryValue(v)) => 97 | v.toStringUsingUTF8 98 | case (StandardType.BoolType, PrimitiveValue.BooleanValue(v)) => 99 | v 100 | case (StandardType.ByteType, PrimitiveValue.Int32Value(v)) => 101 | v.toByte 102 | case (StandardType.ShortType, PrimitiveValue.Int32Value(v)) => 103 | v.toShort 104 | case (StandardType.IntType, PrimitiveValue.Int32Value(v)) => 105 | v 106 | case (StandardType.LongType, PrimitiveValue.Int64Value(v)) => 107 | v 108 | case (StandardType.FloatType, PrimitiveValue.FloatValue(v)) => 109 | v 110 | case (StandardType.DoubleType, PrimitiveValue.DoubleValue(v)) => 111 | v 112 | case (StandardType.BinaryType, PrimitiveValue.BinaryValue(v)) => 113 | Chunk.fromArray(v.getBytes) 114 | case (StandardType.CharType, PrimitiveValue.Int32Value(v)) => 115 | v.toChar 116 | case (StandardType.UUIDType, PrimitiveValue.BinaryValue(v)) => 117 | val bb = ByteBuffer.wrap(v.getBytes) 118 | 119 | new UUID(bb.getLong, bb.getLong) 120 | case (StandardType.CurrencyType, PrimitiveValue.BinaryValue(v)) => 121 | Currency.getInstance(v.toStringUsingUTF8) 122 | case (StandardType.BigDecimalType, PrimitiveValue.Int64Value(v)) => 123 | BigDecimal.valueOf(v, DECIMAL_SCALE) 124 | case (StandardType.BigIntegerType, PrimitiveValue.BinaryValue(v)) => 125 | new BigInteger(v.getBytes) 126 | case (StandardType.DayOfWeekType, PrimitiveValue.Int32Value(v)) => 127 | DayOfWeek.of(v) 128 | case (StandardType.MonthType, PrimitiveValue.Int32Value(v)) => 129 | Month.of(v) 130 | case (StandardType.MonthDayType, PrimitiveValue.BinaryValue(v)) => 131 | val bb = ByteBuffer.wrap(v.getBytes).order(ByteOrder.LITTLE_ENDIAN) 132 | 133 | MonthDay.of(bb.get.toInt, bb.get.toInt) 134 | case (StandardType.PeriodType, PrimitiveValue.BinaryValue(v)) => 135 | val bb = ByteBuffer.wrap(v.getBytes).order(ByteOrder.LITTLE_ENDIAN) 136 | 137 | Period.of(bb.getInt, bb.getInt, bb.getInt) 138 | case (StandardType.YearType, PrimitiveValue.Int32Value(v)) => 139 | Year.of(v) 140 | case (StandardType.YearMonthType, PrimitiveValue.BinaryValue(v)) => 141 | val bb = ByteBuffer.wrap(v.getBytes).order(ByteOrder.LITTLE_ENDIAN) 142 | 143 | YearMonth.of(bb.getShort.toInt, bb.getShort.toInt) 144 | case (StandardType.ZoneIdType, PrimitiveValue.BinaryValue(v)) => 145 | ZoneId.of(v.toStringUsingUTF8) 146 | case (StandardType.ZoneOffsetType, PrimitiveValue.BinaryValue(v)) => 147 | ZoneOffset.of(v.toStringUsingUTF8) 148 | case (StandardType.DurationType, PrimitiveValue.Int64Value(v)) => 149 | Duration.fromMillis(v) 150 | case (StandardType.InstantType, PrimitiveValue.Int64Value(v)) => 151 | Instant.ofEpochMilli(v) 152 | case (StandardType.LocalDateType, PrimitiveValue.Int32Value(v)) => 153 | LocalDate.ofEpochDay(v.toLong) 154 | case (StandardType.LocalTimeType, PrimitiveValue.Int32Value(v)) => 155 | localTime(v) 156 | case (StandardType.LocalDateTimeType, PrimitiveValue.Int64Value(v)) => 157 | localDateTime(v) 158 | case (StandardType.OffsetTimeType, PrimitiveValue.Int32Value(v)) => 159 | OffsetTime.of(localTime(v), ZoneOffset.UTC) 160 | case (StandardType.OffsetDateTimeType, PrimitiveValue.Int64Value(v)) => 161 | OffsetDateTime.of(localDateTime(v), ZoneOffset.UTC) 162 | case (StandardType.ZonedDateTimeType, PrimitiveValue.Int64Value(v)) => 163 | ZonedDateTime.of(localDateTime(v), ZoneId.of("Z")) 164 | case (other, _) => 165 | throw DecoderError(s"Unsupported ZIO Schema StandartType $other") 166 | } 167 | } 168 | 169 | override def deriveOption[A]( 170 | option: Schema.Optional[A], 171 | inner: => ValueDecoder[A], 172 | summoned: => Option[ValueDecoder[Option[A]]] 173 | ): ValueDecoder[Option[A]] = new ValueDecoder[Option[A]] { 174 | override def decode(value: Value): Option[A] = 175 | value match { 176 | case Value.NullValue => 177 | None 178 | case _ => 179 | Some(inner.decode(value)) 180 | } 181 | 182 | } 183 | 184 | override def deriveSequence[C[_], A]( 185 | sequence: Schema.Sequence[C[A], A, ?], 186 | inner: => ValueDecoder[A], 187 | summoned: => Option[ValueDecoder[C[A]]] 188 | ): ValueDecoder[C[A]] = new ValueDecoder[C[A]] { 189 | override def decode(value: Value): C[A] = 190 | value match { 191 | case GroupValue.ListValue(values) => 192 | sequence.fromChunk(values.map(inner.decode)) 193 | case other => 194 | throw DecoderError(s"Couldn't decode $other, it must be of type ListValue") 195 | } 196 | } 197 | 198 | override def deriveMap[K, V]( 199 | map: Schema.Map[K, V], 200 | key: => ValueDecoder[K], 201 | value: => ValueDecoder[V], 202 | summoned: => Option[ValueDecoder[Map[K, V]]] 203 | ): ValueDecoder[Map[K, V]] = new ValueDecoder[Map[K, V]] { 204 | override def decode(value0: Value): Map[K, V] = 205 | value0 match { 206 | case GroupValue.MapValue(values) => 207 | values.map { case (k, v) => 208 | key.decode(k) -> value.decode(v) 209 | } 210 | case other => 211 | throw DecoderError(s"Couldn't decode $other, it must be of type MapValue") 212 | } 213 | } 214 | 215 | override def deriveTransformedRecord[A, B]( 216 | record: Schema.Record[A], 217 | transform: Schema.Transform[A, B, ?], 218 | fields: => Chunk[Deriver.WrappedF[ValueDecoder, ?]], 219 | summoned: => Option[ValueDecoder[B]] 220 | ): ValueDecoder[B] = summoned.getOrElse { 221 | new ValueDecoder[B] { 222 | override def decode(value: Value): B = 223 | value match { 224 | case GroupValue.RecordValue(values) => 225 | Unsafe.unsafe { implicit unsafe => 226 | record 227 | .construct( 228 | Chunk 229 | .fromIterable(record.fields.map(f => values(f.name))) 230 | .zip(fields.map(_.unwrap)) 231 | .map { case (v, decoder) => 232 | decoder.decode(v) 233 | } 234 | ) 235 | .flatMap(transform.f) match { 236 | case Right(v) => v 237 | case Left(reason) => 238 | throw DecoderError(s"Couldn't decode $value: $reason") 239 | } 240 | } 241 | 242 | case other => 243 | throw DecoderError(s"Couldn't decode $other, it must be of type RecordValue") 244 | } 245 | } 246 | } 247 | }.cached 248 | 249 | def summoned: Deriver[ValueDecoder] = 250 | default.autoAcceptSummoned 251 | 252 | } 253 | -------------------------------------------------------------------------------- /modules/core/src/main/scala/me/mnedokushev/zio/apache/parquet/core/Value.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core 2 | 3 | import org.apache.parquet.io.api.{ Binary, RecordConsumer } 4 | import org.apache.parquet.schema.Type 5 | import zio.Chunk 6 | 7 | import java.math.{ BigDecimal, BigInteger } 8 | import java.nio.{ ByteBuffer, ByteOrder } 9 | import java.time.{ 10 | DayOfWeek, 11 | Duration, 12 | Instant, 13 | LocalDate, 14 | LocalDateTime, 15 | LocalTime, 16 | Month, 17 | MonthDay, 18 | OffsetDateTime, 19 | OffsetTime, 20 | Period, 21 | Year, 22 | YearMonth, 23 | ZoneId, 24 | ZoneOffset, 25 | ZonedDateTime 26 | } 27 | import java.util.{ Currency, UUID } 28 | 29 | sealed trait Value { 30 | def write(schema: Type, recordConsumer: RecordConsumer): Unit 31 | } 32 | 33 | object Value { 34 | 35 | case object NullValue extends Value { 36 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 37 | throw new UnsupportedOperationException(s"NullValue cannot be written") 38 | } 39 | 40 | sealed trait PrimitiveValue[A] extends Value { 41 | def value: A 42 | } 43 | 44 | object PrimitiveValue { 45 | 46 | case class BooleanValue(value: Boolean) extends PrimitiveValue[Boolean] { 47 | 48 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 49 | recordConsumer.addBoolean(value) 50 | 51 | } 52 | 53 | case class Int32Value(value: Int) extends PrimitiveValue[Int] { 54 | 55 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 56 | recordConsumer.addInteger(value) 57 | 58 | } 59 | 60 | case class Int64Value(value: Long) extends PrimitiveValue[Long] { 61 | 62 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 63 | recordConsumer.addLong(value) 64 | 65 | } 66 | 67 | case class FloatValue(value: Float) extends PrimitiveValue[Float] { 68 | 69 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 70 | recordConsumer.addFloat(value) 71 | 72 | } 73 | 74 | case class DoubleValue(value: Double) extends PrimitiveValue[Double] { 75 | 76 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 77 | recordConsumer.addDouble(value) 78 | 79 | } 80 | 81 | case class BinaryValue(value: Binary) extends PrimitiveValue[Binary] { 82 | 83 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = 84 | recordConsumer.addBinary(value) 85 | 86 | } 87 | 88 | } 89 | 90 | sealed trait GroupValue[Self <: GroupValue[Self]] extends Value { 91 | 92 | def put(name: String, value: Value): Self 93 | 94 | } 95 | 96 | object GroupValue { 97 | 98 | case class RecordValue(values: Map[String, Value]) extends GroupValue[RecordValue] { 99 | 100 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = { 101 | val groupSchema = schema.asGroupType() 102 | 103 | recordConsumer.startGroup() 104 | 105 | values.foreach { case (name, value) => 106 | val fieldIndex = groupSchema.getFieldIndex(name) 107 | val fieldType = groupSchema.getType(name) 108 | 109 | recordConsumer.startField(name, fieldIndex) 110 | value.write(fieldType, recordConsumer) 111 | recordConsumer.endField(name, fieldIndex) 112 | } 113 | 114 | recordConsumer.endGroup() 115 | } 116 | 117 | override def put(name: String, value: Value): RecordValue = 118 | if (values.contains(name)) 119 | this.copy(values.updated(name, value)) 120 | else 121 | throw new IllegalArgumentException(s"Record doesn't contain field $name") 122 | 123 | } 124 | 125 | case class ListValue(values: Chunk[Value]) extends GroupValue[ListValue] { 126 | 127 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = { 128 | recordConsumer.startGroup() 129 | 130 | if (values.nonEmpty) { 131 | val groupSchema = schema.asGroupType() 132 | val listSchema = groupSchema.getFields.get(0).asGroupType() 133 | val listFieldName = listSchema.getName 134 | val elementName = listSchema.getFields.get(0).getName // TODO: validate, must be "element" 135 | val listIndex = groupSchema.getFieldIndex(listFieldName) 136 | 137 | recordConsumer.startField(listFieldName, listIndex) 138 | 139 | values.foreach { value => 140 | RecordValue(Map(elementName -> value)).write(listSchema, recordConsumer) 141 | } 142 | 143 | recordConsumer.endField(listFieldName, listIndex) 144 | } 145 | 146 | recordConsumer.endGroup() 147 | } 148 | 149 | override def put(name: String, value: Value): ListValue = 150 | this.copy(values = values :+ value) 151 | 152 | } 153 | 154 | case class MapValue(values: Map[Value, Value]) extends GroupValue[MapValue] { 155 | 156 | override def write(schema: Type, recordConsumer: RecordConsumer): Unit = { 157 | recordConsumer.startGroup() 158 | 159 | if (values.nonEmpty) { 160 | val groupSchema = schema.asGroupType() 161 | val mapSchema = groupSchema.getFields.get(0).asGroupType() 162 | val mapFieldName = mapSchema.getName 163 | val mapIndex = groupSchema.getFieldIndex(mapFieldName) 164 | 165 | recordConsumer.startField(mapFieldName, mapIndex) 166 | 167 | values.foreach { case (key, value) => 168 | RecordValue(Map("key" -> key, "value" -> value)).write(mapSchema, recordConsumer) 169 | } 170 | 171 | recordConsumer.endField(mapFieldName, mapIndex) 172 | } 173 | 174 | recordConsumer.endGroup() 175 | } 176 | 177 | override def put(name: String, value: Value): MapValue = 178 | value match { 179 | case RecordValue(values0) => 180 | (values0.get("key"), values0.get("value")) match { 181 | case (Some(k), Some(v)) => 182 | this.copy(values = values.updated(k, v)) 183 | case _ => this 184 | } 185 | case mv: MapValue => mv 186 | case _ => this 187 | } 188 | } 189 | 190 | } 191 | 192 | def nil = 193 | NullValue 194 | 195 | def string(v: String) = 196 | PrimitiveValue.BinaryValue(Binary.fromString(v)) 197 | 198 | def boolean(v: Boolean) = 199 | PrimitiveValue.BooleanValue(v) 200 | 201 | def byte(v: Byte) = 202 | int(v.toInt) 203 | 204 | def short(v: Short) = 205 | int(v.toInt) 206 | 207 | def int(v: Int) = 208 | PrimitiveValue.Int32Value(v) 209 | 210 | def long(v: Long) = 211 | PrimitiveValue.Int64Value(v) 212 | 213 | def float(v: Float) = 214 | PrimitiveValue.FloatValue(v) 215 | 216 | def double(v: Double) = 217 | PrimitiveValue.DoubleValue(v) 218 | 219 | def binary(v: Chunk[Byte]) = 220 | PrimitiveValue.BinaryValue(Binary.fromConstantByteArray(v.toArray)) 221 | 222 | def char(v: Char) = 223 | int(v.toInt) 224 | 225 | def uuid(v: UUID) = { 226 | val bb = ByteBuffer.wrap(Array.ofDim(16)) 227 | 228 | bb.putLong(v.getMostSignificantBits) 229 | bb.putLong(v.getLeastSignificantBits) 230 | 231 | PrimitiveValue.BinaryValue(Binary.fromConstantByteArray(bb.array())) 232 | } 233 | 234 | def currency(v: Currency) = 235 | string(v.getCurrencyCode) 236 | 237 | def bigDecimal(v: BigDecimal) = 238 | long(v.unscaledValue.longValue) 239 | 240 | def bigInteger(v: BigInteger) = 241 | PrimitiveValue.BinaryValue(Binary.fromConstantByteArray(v.toByteArray)) 242 | 243 | def dayOfWeek(v: DayOfWeek) = 244 | byte(v.getValue.toByte) 245 | 246 | def month(v: Month) = 247 | byte(v.getValue.toByte) 248 | 249 | def monthDay(v: MonthDay) = { 250 | val bb = ByteBuffer.allocate(2).order(ByteOrder.LITTLE_ENDIAN) 251 | 252 | bb.put(v.getMonthValue.toByte) 253 | bb.put(v.getDayOfMonth.toByte) 254 | 255 | PrimitiveValue.BinaryValue(Binary.fromReusedByteArray(bb.array())) 256 | } 257 | 258 | def period(v: Period) = { 259 | val bb = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN) 260 | 261 | bb.putInt(v.getYears) 262 | bb.putInt(v.getMonths) 263 | bb.putInt(v.getDays) 264 | 265 | PrimitiveValue.BinaryValue(Binary.fromReusedByteArray(bb.array())) 266 | } 267 | 268 | def year(v: Year) = 269 | short(v.getValue.toShort) 270 | 271 | def yearMonth(v: YearMonth) = { 272 | val bb = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN) 273 | 274 | bb.putShort(v.getYear.toShort) 275 | bb.putShort(v.getMonthValue.toShort) 276 | 277 | PrimitiveValue.BinaryValue(Binary.fromReusedByteArray(bb.array())) 278 | } 279 | 280 | def zoneId(v: ZoneId) = 281 | string(v.getId) 282 | 283 | def zoneOffset(v: ZoneOffset) = 284 | string(v.getId) 285 | 286 | def duration(v: Duration) = 287 | long(v.toMillis) 288 | 289 | def instant(v: Instant) = 290 | long(v.toEpochMilli) 291 | 292 | def localDate(v: LocalDate) = 293 | int(v.toEpochDay.toInt) 294 | 295 | def localTime(v: LocalTime) = 296 | int((v.toNanoOfDay / MICROS_FACTOR).toInt) 297 | 298 | def localDateTime(v: LocalDateTime) = { 299 | val dateMillis = v.toLocalDate.toEpochDay * MILLIS_PER_DAY 300 | val timeMillis = v.toLocalTime.toNanoOfDay / MICROS_FACTOR 301 | val epochMillis = dateMillis + timeMillis 302 | 303 | long(epochMillis) 304 | } 305 | 306 | def offsetTime(v: OffsetTime) = { 307 | val timeMillis = v.toLocalTime.toNanoOfDay / MICROS_FACTOR 308 | val offsetMillis = v.getOffset.getTotalSeconds * MILLIS_FACTOR 309 | val timeOffsetMillis = timeMillis - offsetMillis 310 | val dayMillis = if (timeOffsetMillis < 0) MILLIS_PER_DAY - timeOffsetMillis else timeOffsetMillis 311 | 312 | int(dayMillis.toInt) 313 | } 314 | 315 | def offsetDateTime(v: OffsetDateTime) = { 316 | val dateMillis = v.toLocalDate.toEpochDay * MILLIS_PER_DAY 317 | val timeMillis = v.toLocalTime.toNanoOfDay / MICROS_FACTOR 318 | val offsetMillis = v.getOffset.getTotalSeconds * MILLIS_FACTOR 319 | val timeOffsetMillis = timeMillis - offsetMillis 320 | val dayMillis = if (timeOffsetMillis < 0) MILLIS_PER_DAY - timeOffsetMillis else timeOffsetMillis 321 | val epochMillis = dateMillis + dayMillis 322 | 323 | long(epochMillis) 324 | } 325 | 326 | def zonedDateTime(v: ZonedDateTime) = { 327 | val dateMillis = v.toLocalDate.toEpochDay * MILLIS_PER_DAY 328 | val timeMillis = v.toLocalTime.toNanoOfDay / MICROS_FACTOR 329 | val offsetMillis = v.getOffset.getTotalSeconds * MILLIS_FACTOR 330 | val timeOffsetMillis = timeMillis - offsetMillis 331 | val dayMillis = if (timeOffsetMillis < 0) MILLIS_PER_DAY - timeOffsetMillis else timeOffsetMillis 332 | val epochMillis = dateMillis + dayMillis 333 | 334 | long(epochMillis) 335 | } 336 | 337 | def record(r: Map[String, Value]) = 338 | GroupValue.RecordValue(r) 339 | 340 | def list(vs: Chunk[Value]) = 341 | GroupValue.ListValue(vs) 342 | 343 | def map(kvs: Map[Value, Value]) = 344 | GroupValue.MapValue(kvs) 345 | } 346 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/codec/SchemaEncoderDeriverSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.codec 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Schemas.PrimitiveDef 4 | import me.mnedokushev.zio.apache.parquet.core.{ Fixtures, Schemas } 5 | import zio._ 6 | import zio.schema._ 7 | import zio.test._ 8 | 9 | import java.util.UUID 10 | //import scala.annotation.nowarn 11 | 12 | object SchemaEncoderDeriverSpec extends ZIOSpecDefault { 13 | 14 | sealed trait MyEnum 15 | object MyEnum { 16 | case object Started extends MyEnum 17 | case object InProgress extends MyEnum 18 | case object Done extends MyEnum 19 | 20 | implicit val schema: Schema[MyEnum] = DeriveSchema.gen[MyEnum] 21 | } 22 | 23 | case class Record(a: Int, b: Option[String]) 24 | object Record { 25 | implicit val schema: Schema[Record] = DeriveSchema.gen[Record] 26 | } 27 | 28 | // Helper for being able to extract type parameter A from a given schema in order to cast the type of encoder< 29 | private def encode[A](encoder: SchemaEncoder[?], schema: Schema[A], name: String, optional: Boolean) = 30 | encoder.asInstanceOf[SchemaEncoder[A]].encode(schema, name, optional) 31 | 32 | override def spec: Spec[TestEnvironment & Scope, Any] = 33 | suite("SchemaEncoderDeriverSpec")( 34 | test("primitive") { 35 | def named(defs: List[PrimitiveDef], names: List[String]) = 36 | defs.zip(names).map { case (schemaDef, name) => 37 | schemaDef.named(name) 38 | } 39 | 40 | val encoders: List[SchemaEncoder[?]] = 41 | List( 42 | Derive.derive[SchemaEncoder, String](SchemaEncoderDeriver.default), 43 | Derive.derive[SchemaEncoder, Boolean](SchemaEncoderDeriver.default), 44 | Derive.derive[SchemaEncoder, Byte](SchemaEncoderDeriver.default), 45 | Derive.derive[SchemaEncoder, Short](SchemaEncoderDeriver.default), 46 | Derive.derive[SchemaEncoder, Int](SchemaEncoderDeriver.default), 47 | Derive.derive[SchemaEncoder, Long](SchemaEncoderDeriver.default), 48 | Derive.derive[SchemaEncoder, UUID](SchemaEncoderDeriver.default) 49 | ) 50 | val schemas: List[Schema[?]] = 51 | List( 52 | Schema.primitive[String], 53 | Schema.primitive[Boolean], 54 | Schema.primitive[Byte], 55 | Schema.primitive[Short], 56 | Schema.primitive[Int], 57 | Schema.primitive[Long], 58 | Schema.primitive[UUID] 59 | ) 60 | val names = 61 | List( 62 | "string", 63 | "boolean", 64 | "byte", 65 | "short", 66 | "int", 67 | "long", 68 | "uuid" 69 | ) 70 | val schemaDefs = List( 71 | Schemas.string, 72 | Schemas.boolean, 73 | Schemas.byte, 74 | Schemas.short, 75 | Schemas.int, 76 | Schemas.long, 77 | Schemas.uuid 78 | ) 79 | val optionalDefs = 80 | schemaDefs.map(_.optional) 81 | val requiredDefs = 82 | schemaDefs.map(_.required) 83 | 84 | val expectedOptional = named(optionalDefs, names) 85 | val expectedRequired = named(requiredDefs, names) 86 | 87 | encoders 88 | .zip(schemas) 89 | .zip(names) 90 | .zip(expectedOptional) 91 | .zip(expectedRequired) 92 | .map { case ((((encoder, schema), name), expOptional), expRequired) => 93 | val tpeOptional = encode(encoder, schema, name, optional = true) 94 | val tpeRequired = encode(encoder, schema, name, optional = false) 95 | 96 | assertTrue(tpeOptional == expOptional, tpeRequired == expRequired) 97 | } 98 | .reduce(_ && _) 99 | }, 100 | test("record") { 101 | val name = "record" 102 | val encoder = Derive.derive[SchemaEncoder, Record](SchemaEncoderDeriver.default) 103 | val tpeOptional = encoder.encode(Record.schema, name, optional = true) 104 | val tpeRequired = encoder.encode(Record.schema, name, optional = false) 105 | val schemaDef = Schemas.record( 106 | Chunk( 107 | Schemas.int.required.named("a"), 108 | Schemas.string.optional.named("b") 109 | ) 110 | ) 111 | 112 | assertTrue( 113 | tpeOptional == schemaDef.optional.named(name), 114 | tpeRequired == schemaDef.required.named(name) 115 | ) 116 | }, 117 | test("record arity > 22") { 118 | val name = "arity" 119 | val encoder = Derive.derive[SchemaEncoder, Fixtures.Arity23](SchemaEncoderDeriver.default) 120 | val tpeOptional = encoder.encode(Fixtures.Arity23.schema, name, optional = true) 121 | val tpeRequired = encoder.encode(Fixtures.Arity23.schema, name, optional = false) 122 | val schemaDef = Schemas.record( 123 | Chunk( 124 | Schemas.int.required.named("a"), 125 | Schemas.string.optional.named("b"), 126 | Schemas.int.required.named("c"), 127 | Schemas.int.required.named("d"), 128 | Schemas.int.required.named("e"), 129 | Schemas.int.required.named("f"), 130 | Schemas.int.required.named("g"), 131 | Schemas.int.required.named("h"), 132 | Schemas.int.required.named("i"), 133 | Schemas.int.required.named("j"), 134 | Schemas.int.required.named("k"), 135 | Schemas.int.required.named("l"), 136 | Schemas.int.required.named("m"), 137 | Schemas.int.required.named("n"), 138 | Schemas.int.required.named("o"), 139 | Schemas.int.required.named("p"), 140 | Schemas.int.required.named("q"), 141 | Schemas.int.required.named("r"), 142 | Schemas.int.required.named("s"), 143 | Schemas.int.required.named("t"), 144 | Schemas.int.required.named("u"), 145 | Schemas.int.required.named("v"), 146 | Schemas.int.required.named("w") 147 | ) 148 | ) 149 | 150 | assertTrue( 151 | tpeOptional == schemaDef.optional.named(name), 152 | tpeRequired == schemaDef.required.named(name) 153 | ) 154 | }, 155 | test("sequence") { 156 | val name = "mylist" 157 | val encoders: List[SchemaEncoder[?]] = 158 | List( 159 | Derive.derive[SchemaEncoder, List[String]](SchemaEncoderDeriver.default), 160 | Derive.derive[SchemaEncoder, List[Boolean]](SchemaEncoderDeriver.default), 161 | Derive.derive[SchemaEncoder, List[Byte]](SchemaEncoderDeriver.default), 162 | Derive.derive[SchemaEncoder, List[Short]](SchemaEncoderDeriver.default), 163 | Derive.derive[SchemaEncoder, List[Int]](SchemaEncoderDeriver.default), 164 | Derive.derive[SchemaEncoder, List[Long]](SchemaEncoderDeriver.default), 165 | Derive.derive[SchemaEncoder, List[UUID]](SchemaEncoderDeriver.default), 166 | Derive.derive[SchemaEncoder, List[Option[String]]](SchemaEncoderDeriver.default), 167 | Derive.derive[SchemaEncoder, List[Option[Boolean]]](SchemaEncoderDeriver.default), 168 | Derive.derive[SchemaEncoder, List[Option[Byte]]](SchemaEncoderDeriver.default), 169 | Derive.derive[SchemaEncoder, List[Option[Short]]](SchemaEncoderDeriver.default), 170 | Derive.derive[SchemaEncoder, List[Option[Int]]](SchemaEncoderDeriver.default), 171 | Derive.derive[SchemaEncoder, List[Option[Long]]](SchemaEncoderDeriver.default), 172 | Derive.derive[SchemaEncoder, List[Option[UUID]]](SchemaEncoderDeriver.default) 173 | ) 174 | val schemas: List[Schema[?]] = 175 | List( 176 | Schema.list[String], 177 | Schema.list[Int], 178 | Schema.list[Option[String]], 179 | Schema.list[Option[Int]] 180 | ) 181 | val elements = 182 | List( 183 | Schemas.string, 184 | Schemas.boolean, 185 | Schemas.byte, 186 | Schemas.short, 187 | Schemas.int, 188 | Schemas.long, 189 | Schemas.uuid 190 | ) 191 | val schemaDefs = 192 | (elements.map(_.required) ++ elements.map(_.optional)) 193 | .map(_.named("element")) 194 | .map(Schemas.list) 195 | val expectedOptional = 196 | schemaDefs.map(_.optional.named(name)) 197 | val expectedRequired = 198 | schemaDefs.map(_.required.named(name)) 199 | 200 | encoders 201 | .zip(schemas) 202 | .zip(expectedOptional) 203 | .zip(expectedRequired) 204 | .map { case (((encoder, schema), expOptional), expRequired) => 205 | val tpeOptional = encode(encoder, schema, name, optional = true) 206 | val tpeRequired = encode(encoder, schema, name, optional = false) 207 | 208 | assertTrue( 209 | tpeOptional == expOptional, 210 | tpeRequired == expRequired 211 | ) 212 | } 213 | .reduce(_ && _) 214 | }, 215 | test("map") { 216 | val name = "mymap" 217 | val encoder = Derive.derive[SchemaEncoder, Map[String, Int]](SchemaEncoderDeriver.default) 218 | val tpe = encoder.encode(Schema.map[String, Int], name, optional = true) 219 | 220 | assertTrue( 221 | tpe == Schemas 222 | .map(Schemas.string.required.named("key"), Schemas.int.required.named("value")) 223 | .optional 224 | .named(name) 225 | ) 226 | }, 227 | test("enum") { 228 | val name = "myenum" 229 | val encoder = Derive.derive[SchemaEncoder, MyEnum](SchemaEncoderDeriver.default) 230 | val tpe = encoder.encode(Schema[MyEnum], name, optional = true) 231 | 232 | assertTrue(tpe == Schemas.enum0.optional.named(name)) 233 | } 234 | // test("summoned") { 235 | // // @nowarn annotation is needed to avoid having 'variable is not used' compiler error 236 | // @nowarn 237 | // implicit val intEncoder: SchemaEncoder[Int] = new SchemaEncoder[Int] { 238 | // override def encode(schema: Schema[Int], name: String, optional: Boolean): Type = 239 | // Schemas.uuid.optionality(optional).named(name) 240 | // } 241 | // 242 | // val name = "myrecord" 243 | // val encoder = Derive.derive[SchemaEncoder, Record](SchemaEncoderDeriver.summoned) 244 | // val tpe = encoder.encode(Record.schema, name, optional = true) 245 | // 246 | // assertTrue( 247 | // tpe == Schemas 248 | // .record(Chunk(Schemas.uuid.required.named("a"), Schemas.string.optional.named("b"))) 249 | // .optional 250 | // .named(name) 251 | // ) 252 | // } 253 | ) 254 | 255 | } 256 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](docs/logo.png) 2 | 3 | ![Build status](https://github.com/grouzen/zio-apache-parquet/actions/workflows/ci.yml/badge.svg) 4 | ![Maven Central](https://img.shields.io/maven-central/v/me.mnedokushev/zio-apache-parquet-core_2.13.svg?label=Maven%20central) 5 | [![Scala Steward badge](https://img.shields.io/badge/Scala_Steward-helping-blue.svg?style=flat&logo=)](https://scala-steward.org) 6 | 7 | # ZIO Apache Parquet 8 | 9 | A ZIO-powered wrapper for [Apache Parquet's Java implementation](https://github.com/apache/parquet-mr), leveraging [ZIO Schema](https://zio.dev/zio-schema/) to automatically derive codecs and provide type-safe filter predicates. Operate your parquet files easily using a top-notch ZIO-powered ecosystem without running a Spark cluster. 10 | 11 | Ready for more? Check out my other game-changing library that makes working with Apache Arrow format a breeze - [ZIO Apache Arrow](https://github.com/grouzen/zio-apache-arrow). 12 | 13 | ## Why? 14 | 15 | - **No Spark required** - you don't need to run a Spark cluster to read/write Parquet files. 16 | - **ZIO native** - utilizes various ZIO features to offer a FP-oriented way of working with the Parquet API. 17 | - **ZIO Schema** - the backbone that powers all the cool features of this library such as type-safe filter predicates and codecs derivation. 18 | 19 | 20 | ## Contents 21 | 22 | - [Installation](#installation) 23 | - [Usage](#usage) 24 | - [Codecs](#codecs) 25 | - [Schema](#schema) 26 | - [Value](#value) 27 | - [Reading & Writing files](#reading--writing-files) 28 | - [Filtering](#filtering) 29 | - [Resources](#resources) 30 | 31 | ## Installation 32 | 33 | ```scala 34 | libraryDependencies += "me.mnedokushev" %% "zio-apache-parquet-core" % "@VERSION@" 35 | ``` 36 | 37 | ## Usage 38 | 39 | All examples are self-contained [Scala CLI](https://scala-cli.virtuslab.org) snippets. You can find copies of them in `docs/scala-cli`. 40 | 41 | ### Codecs 42 | 43 | To be able to write/read data to/from parquet files you need to define the following schema and value codecs 44 | `SchemaEncoder`, `ValueEncoder`, and `ValueDecoder` for your case classes. 45 | 46 | #### Schema 47 | 48 | You can get Java SDK's `Type` by using `SchemaEncoder` generated by `SchemaEncoderDeriver.default` ZIO Schema deriver: 49 | 50 | ```scala 51 | //> using scala "3.7.1" 52 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1 53 | 54 | import zio.schema.* 55 | import me.mnedokushev.zio.apache.parquet.core.codec.* 56 | 57 | object Schema extends App: 58 | 59 | case class MyRecord(a: Int, b: String, c: Option[Long]) 60 | 61 | object MyRecord: 62 | given schema: Schema[MyRecord] = 63 | DeriveSchema.gen[MyRecord] 64 | given schemaEncoder: SchemaEncoder[MyRecord] = 65 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default) 66 | 67 | val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false) 68 | 69 | println(parquetSchema) 70 | // Outputs: 71 | // required group my_record { 72 | // required int32 a (INTEGER(32,true)); 73 | // required binary b (STRING); 74 | // optional int64 c (INTEGER(64,true)); 75 | // } 76 | ``` 77 | 78 | Alternatively, you can customize the schemas of [primitive](https://zio.dev/zio-schema/standard-type-reference) fields within your record by defining a custom `SchemaEncoder` 79 | and using the `SchemaEncoderDeriver.summoned` deriver. 80 | 81 | ```scala 82 | //> using scala "3.7.1" 83 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1 84 | 85 | import me.mnedokushev.zio.apache.parquet.core.Schemas 86 | import zio.schema.* 87 | import me.mnedokushev.zio.apache.parquet.core.codec.* 88 | 89 | object SchemaSummoned extends App: 90 | 91 | case class MyRecord(a: Int, b: String, c: Option[Long]) 92 | 93 | object MyRecord: 94 | given schema: Schema[MyRecord] = 95 | DeriveSchema.gen[MyRecord] 96 | // The custom encoder must be defined before the definition for your record type. 97 | given SchemaEncoder[Int] with { 98 | override def encode(schema: Schema[Int], name: String, optional: Boolean) = 99 | Schemas.uuid.optionality(optional).named(name) 100 | } 101 | given schemaEncoder: SchemaEncoder[MyRecord] = 102 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.summoned) 103 | 104 | val parquetSchema = MyRecord.schemaEncoder.encode(MyRecord.schema, "my_record", optional = false) 105 | 106 | println(parquetSchema) 107 | // Outputs: 108 | // required group my_record { 109 | // required fixed_len_byte_array(16) a (UUID); 110 | // required binary b (STRING); 111 | // optional int64 c (INTEGER(64,true)); 112 | // } 113 | ``` 114 | 115 | The case classes with arity bigger than 22 are supported too. Check out the [SchemaArity23.scala ScalaCLI example](docs/scala-cli/SchemaArity23.scala)! 116 | 117 | #### Value 118 | 119 | `Value` is a sealed hierarchy of types for interop between Scala values and Parquet readers/writers. 120 | For converting Scala values into `Value` and back we need to define instances of `ValueEncoder` and `ValueDecoder` 121 | type classes. This could be done by using `ValueDecoderDeriver.default` ZIO Schema deriver. 122 | 123 | ```scala 124 | //> using scala "3.7.1" 125 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1 126 | 127 | import zio.schema.* 128 | import me.mnedokushev.zio.apache.parquet.core.codec.* 129 | 130 | object Value extends App: 131 | 132 | case class MyRecord(a: Int, b: String, c: Option[Long]) 133 | 134 | object MyRecord: 135 | given Schema[MyRecord] = 136 | DeriveSchema.gen[MyRecord] 137 | given encoder: ValueEncoder[MyRecord] = 138 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default) 139 | given decoder: ValueDecoder[MyRecord] = 140 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default) 141 | 142 | val value = MyRecord.encoder.encode(MyRecord(3, "zio", None)) 143 | val record = MyRecord.decoder.decode(value) 144 | 145 | println(value) 146 | // Outputs: 147 | // RecordValue(Map(a -> Int32Value(3), b -> BinaryValue(Binary{"zio"}), c -> NullValue)) 148 | println(record) 149 | // Outputs: 150 | // MyRecord(3,zio,None) 151 | ``` 152 | 153 | Same as for `SchemaEncoder`, you can customize the codecs of primitive types by defining custom 154 | `ValueEncoder`/`ValueDecoder` and using `ValueEncoderDeriver.summoned`/`ValueDecoderDeriver.summoned` derivers accordingly. 155 | 156 | ```scala 157 | //> using scala "3.7.1" 158 | //> using dep me.mnedokushev::zio-apache-parquet-core:0.3.1 159 | 160 | import me.mnedokushev.zio.apache.parquet.core.Value 161 | import zio.schema.* 162 | import me.mnedokushev.zio.apache.parquet.core.codec.* 163 | 164 | import java.nio.charset.StandardCharsets 165 | 166 | object ValueSummoned extends App: 167 | 168 | case class MyRecord(a: Int, b: String, c: Option[Long]) 169 | 170 | object MyRecord: 171 | given Schema[MyRecord] = 172 | DeriveSchema.gen[MyRecord] 173 | given ValueEncoder[Int] with { 174 | override def encode(value: Int): Value = 175 | Value.string(value.toString) 176 | } 177 | given ValueDecoder[Int] with { 178 | override def decode(value: Value): Int = 179 | value match { 180 | case Value.PrimitiveValue.BinaryValue(v) => 181 | new String(v.getBytes, StandardCharsets.UTF_8).toInt 182 | case other => 183 | throw DecoderError(s"Wrong value: $other") 184 | } 185 | } 186 | given encoder: ValueEncoder[MyRecord] = 187 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.summoned) 188 | given decoder: ValueDecoder[MyRecord] = 189 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.summoned) 190 | 191 | val value = MyRecord.encoder.encode(MyRecord(3, "zio", None)) 192 | val record = MyRecord.decoder.decode(value) 193 | 194 | println(value) 195 | // Outputs: 196 | // RecordValue(Map(a -> BinaryValue(Binary{"3"}), b -> BinaryValue(Binary{"zio"}), c -> NullValue)) 197 | println(record) 198 | // Outputs: 199 | // MyRecord(3,zio,None) 200 | ``` 201 | 202 | ### Reading & Writing files 203 | 204 | Finally, to perform some IO operations we need to initialize `ParquetWriter` and `ParquetReader` and use either 205 | `writeChunk`/`readChunk` or `writeStream`/`readStream` methods. 206 | 207 | ```scala 208 | //> using scala "3.7.1" 209 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.1 210 | 211 | import zio.schema.* 212 | import me.mnedokushev.zio.apache.parquet.core.codec.* 213 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path } 214 | import zio.* 215 | 216 | import java.nio.file.Files 217 | 218 | object ParquetIO extends ZIOAppDefault: 219 | 220 | case class MyRecord(a: Int, b: String, c: Option[Long]) 221 | 222 | object MyRecord: 223 | given Schema[MyRecord] = 224 | DeriveSchema.gen[MyRecord] 225 | given SchemaEncoder[MyRecord] = 226 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default) 227 | given ValueEncoder[MyRecord] = 228 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default) 229 | given ValueDecoder[MyRecord] = 230 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default) 231 | 232 | val data = 233 | Chunk( 234 | MyRecord(1, "first", Some(11)), 235 | MyRecord(3, "third", None) 236 | ) 237 | 238 | val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet" 239 | 240 | override def run = 241 | (for { 242 | writer <- ZIO.service[ParquetWriter[MyRecord]] 243 | reader <- ZIO.service[ParquetReader[MyRecord]] 244 | _ <- writer.writeChunk(recordsFile, data) 245 | fromFile <- reader.readChunk(recordsFile) 246 | _ <- Console.printLine(fromFile) 247 | } yield ()).provide( 248 | ParquetWriter.configured[MyRecord](), 249 | ParquetReader.configured[MyRecord]() 250 | ) 251 | // Outputs: 252 | // Chunk(MyRecord(1,first,Some(11)),MyRecord(3,third,None)) 253 | ``` 254 | 255 | In the previous code snippet we used `ParquetReader.configured[A]()` to initialize a reader that uses a parquet schema taken from a given file. Such a reader will always try to read all columns from a given file. 256 | 257 | In case you need to read only part of the columns, use `ParquetReader.projected[A]()`. This skips columns that are not present in the schema and reads only those that are, saving precious CPU cycles and time. 258 | 259 | #### Filtering 260 | 261 | Say goodbye to type-unsafe filter predicates such as `Col("foo") != "bar"`. The library takes advantage of an underdocumented feature in ZIO Schema - [Accessors](https://github.com/zio/zio-schema/blob/main/zio-schema/shared/src/main/scala/zio/schema/Schema.scala#L38) - the hidden pearl that allows extracting type level infromation about fields of case classes. In addition to the already provided codecs, you need to provide an instance of `TypeTag` for your record type. For this, use the `TypeTagDeriver.default` deriver. 262 | 263 | ```scala 264 | //> using scala "3.7.1" 265 | //> using dep me.mnedokushev::zio-apache-parquet-hadoop:0.3.1 266 | 267 | import zio.* 268 | import zio.schema.* 269 | import me.mnedokushev.zio.apache.parquet.core.codec.* 270 | import me.mnedokushev.zio.apache.parquet.hadoop.{ ParquetReader, ParquetWriter, Path } 271 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax.* 272 | import me.mnedokushev.zio.apache.parquet.core.filter.* 273 | 274 | import java.nio.file.Files 275 | 276 | object Filtering extends ZIOAppDefault: 277 | 278 | case class MyRecord(a: Int, b: String, c: Option[Long]) 279 | 280 | object MyRecord: 281 | // We need to provide field names using singleton types 282 | given Schema.CaseClass3.WithFields["a", "b", "c", Int, String, Option[Long], MyRecord] = 283 | DeriveSchema.gen[MyRecord] 284 | given SchemaEncoder[MyRecord] = 285 | Derive.derive[SchemaEncoder, MyRecord](SchemaEncoderDeriver.default) 286 | given ValueEncoder[MyRecord] = 287 | Derive.derive[ValueEncoder, MyRecord](ValueEncoderDeriver.default) 288 | given ValueDecoder[MyRecord] = 289 | Derive.derive[ValueDecoder, MyRecord](ValueDecoderDeriver.default) 290 | given TypeTag[MyRecord] = 291 | Derive.derive[TypeTag, MyRecord](TypeTagDeriver.default) 292 | 293 | // Define accessors to use them later in the filter predicate. 294 | // You can give any names to the accessors as we demonstrate here. 295 | val (id, name, age) = Filter[MyRecord].columns 296 | 297 | val data = 298 | Chunk( 299 | MyRecord(1, "bob", Some(10L)), 300 | MyRecord(2, "bob", Some(12L)), 301 | MyRecord(3, "alice", Some(13L)), 302 | MyRecord(4, "john", None) 303 | ) 304 | 305 | val recordsFile = Path(Files.createTempDirectory("records")) / "records.parquet" 306 | 307 | override def run = 308 | ( 309 | for { 310 | writer <- ZIO.service[ParquetWriter[MyRecord]] 311 | reader <- ZIO.service[ParquetReader[MyRecord]] 312 | _ <- writer.writeChunk(recordsFile, data) 313 | fromFile <- reader.readChunkFiltered( 314 | recordsFile, 315 | filter( 316 | MyRecord.id > 1 `and` ( 317 | MyRecord.name =!= "bob" `or` 318 | // Use .nullable syntax for optional fields. 319 | MyRecord.age.nullable > 10L 320 | ) 321 | ) 322 | ) 323 | _ <- Console.printLine(fromFile) 324 | } yield () 325 | ).provide( 326 | ParquetWriter.configured[MyRecord](), 327 | ParquetReader.configured[MyRecord]() 328 | ) 329 | // Outputs: 330 | // Chunk(MyRecord(2,bob,Some(12)),MyRecord(3,alice,Some(13)),MyRecord(4,john,None)) 331 | ``` 332 | 333 | ## Resources 334 | 335 | - [Unpacking ZIO Schema's Accessors](https://mnedokushev.me/2024/09/05/unpacking-zio-schema-accessors.html) - Explore how ZIO Schema enables type-safe filtering through its underdocumented feature on my personal blog. 336 | - [Scala's Hidden Treasures: Five ZIO-Compatible Libraries you didn't know you needed!](https://jorgevasquez.blog/scalas-hidden-treasures-five-zio-compatible-libraries-you-didnt-know-you-needed) - This article, featured in Jorge Vásquez's blog post accompanying his presentation at the [Functional Scala 2024 Conference](https://www.functionalscala.com). You can find more information on the [slides](https://jorge-vasquez-2301.github.io/scalas-hidden-treasures/24). The recording is now available on Ziverge's YouTube channel [here](https://www.youtube.com/watch?v=iFhQibDdqT0&list=PLvdARMfvom9CuM40p_Yr3UAtlADSKC2Js). 337 | - [Overview page on ZIO's official community ecosystem website](https://zio.dev/ecosystem/community/zio-apache-parquet/) - For a brief overview, visit this page on ZIO's official community ecosystem website. 338 | -------------------------------------------------------------------------------- /modules/core/src/test/scala/me/mnedokushev/zio/apache/parquet/core/filter/ExprSpec.scala: -------------------------------------------------------------------------------- 1 | package me.mnedokushev.zio.apache.parquet.core.filter 2 | 3 | import me.mnedokushev.zio.apache.parquet.core.Fixtures._ 4 | import me.mnedokushev.zio.apache.parquet.core.Value 5 | import me.mnedokushev.zio.apache.parquet.core.filter.TypeTag._ 6 | import me.mnedokushev.zio.apache.parquet.core.filter.syntax._ 7 | import org.apache.parquet.filter2.predicate.FilterApi 8 | import zio._ 9 | import zio.test.Assertion.{ equalTo, isRight } 10 | import zio.test._ 11 | 12 | import java.time._ 13 | import java.util.{ Currency, UUID } 14 | import scala.jdk.CollectionConverters._ 15 | 16 | object ExprSpec extends ZIOSpecDefault { 17 | 18 | override def spec: Spec[TestEnvironment & Scope, Any] = 19 | suite("ExprSpec")( 20 | test("compile all operators") { 21 | val (a, b, _, _, _) = Filter[MyRecord].columns 22 | 23 | val result = filter( 24 | not( 25 | (b >= 3 `or` b <= 100 `and` a.in(Set("foo", "bar"))) `or` 26 | (a === "foo" `and` (b === 20 `or` b.notIn(Set(1, 2, 3)))) `or` 27 | (a =!= "foo" `and` b > 2 `and` b < 10) 28 | ) 29 | ) 30 | 31 | val acol = FilterApi.binaryColumn("a") 32 | val bcol = FilterApi.intColumn("b") 33 | val expected = 34 | FilterApi.not( 35 | FilterApi.or( 36 | FilterApi.or( 37 | FilterApi.and( 38 | FilterApi.or( 39 | FilterApi.gtEq(bcol, Int.box(Value.int(3).value)), 40 | FilterApi.ltEq(bcol, Int.box(Value.int(100).value)) 41 | ), 42 | FilterApi.in(acol, Set(Value.string("foo").value, Value.string("bar").value).asJava) 43 | ), 44 | FilterApi.and( 45 | FilterApi.eq(acol, Value.string("foo").value), 46 | FilterApi.or( 47 | FilterApi.eq(bcol, Int.box(Value.int(20).value)), 48 | FilterApi.notIn(bcol, Set(1, 2, 3).map(i => Int.box(Value.int(i).value)).asJava) 49 | ) 50 | ) 51 | ), 52 | FilterApi.and( 53 | FilterApi.and( 54 | FilterApi.notEq(acol, Value.string("foo").value), 55 | FilterApi.gt(bcol, Int.box(Value.int(2).value)) 56 | ), 57 | FilterApi.lt(bcol, Int.box(Value.int(10).value)) 58 | ) 59 | ) 60 | ) 61 | 62 | assert(result)(isRight(equalTo(expected))) 63 | }, 64 | test("compile summoned") { 65 | val (a, b) = Filter[MyRecordSummoned].columns 66 | 67 | val result = filter( 68 | a === 3 `and` b === "foo" 69 | ) 70 | 71 | val acol = FilterApi.binaryColumn("a") 72 | val bcol = FilterApi.binaryColumn("b") 73 | val expected = FilterApi.and( 74 | FilterApi.eq(acol, Value.string("3").value), 75 | FilterApi.eq(bcol, Value.string("foo").value) 76 | ) 77 | 78 | assert(result)(isRight(equalTo(expected))) 79 | }, 80 | test("compile all primitive types") { 81 | val ( 82 | string, 83 | boolean, 84 | byte, 85 | short, 86 | int, 87 | long, 88 | float, 89 | double, 90 | binary, 91 | char, 92 | uuid, 93 | currency, 94 | bigDecimal, 95 | bigInteger, 96 | dayOfWeek, 97 | month, 98 | monthDay, 99 | period, 100 | year, 101 | yearMonth, 102 | zoneId, 103 | zoneOffset 104 | ) = Filter[MyRecordAllTypes1].columns 105 | 106 | val ( 107 | duration, 108 | instant, 109 | localDate, 110 | localTime, 111 | localDateTime, 112 | offsetTime, 113 | offsetDateTime, 114 | zonedDateTime 115 | ) = Filter[MyRecordAllTypes2].columns 116 | 117 | val stringPayload = "foo" 118 | val booleanPayload = true 119 | val bytePayload = 1.toByte 120 | val shortPayload = 1.toShort 121 | val intPayload = 1 122 | val longPayload = 1L 123 | val floatPayload = 1.0f 124 | val doublePayload = 1.0 125 | val binaryPayload = Chunk(1.toByte, 2.toByte) 126 | val charPayload = 'c' 127 | val uuidPayload = UUID.randomUUID() 128 | val currencyPayload = Currency.getInstance("USD") 129 | val bigDecimalPayload = new java.math.BigDecimal("1.0") 130 | val bigIntegerPayload = new java.math.BigInteger("99999999999") 131 | val dayOfWeekPayload = DayOfWeek.of(1) 132 | val monthPayload = Month.of(1) 133 | val monthDayPayload = MonthDay.of(1, 1) 134 | val periodPayload = Period.of(1, 1, 1) 135 | val yearPayload = Year.of(1) 136 | val yearMonthPayload = YearMonth.of(1, 1) 137 | val zoneIdPayload = ZoneId.of("Europe/Paris") 138 | val zoneOffsetPayload = ZoneOffset.of("+02:00") 139 | val durationPayload = 1.second 140 | val instantPayload = Instant.ofEpochMilli(1) 141 | val localDatePayload = LocalDate.ofEpochDay(1) 142 | val localTimePayload = LocalTime.ofInstant(instantPayload, zoneIdPayload) 143 | val localDateTimePayload = LocalDateTime.of(localDatePayload, localTimePayload) 144 | val offsetTimePayload = OffsetTime.ofInstant(instantPayload, zoneIdPayload) 145 | val offsetDateTimePayload = OffsetDateTime.ofInstant(instantPayload, zoneIdPayload) 146 | val zonedDateTimePayload = ZonedDateTime.ofInstant(localDateTimePayload, zoneOffsetPayload, zoneIdPayload) 147 | 148 | val stringExpected = FilterApi.eq( 149 | FilterApi.binaryColumn("string"), 150 | Value.string(stringPayload).value 151 | ) 152 | val booleanExpected = FilterApi.eq( 153 | FilterApi.booleanColumn("boolean"), 154 | Boolean.box(Value.boolean(booleanPayload).value) 155 | ) 156 | val byteExpected = FilterApi.eq( 157 | FilterApi.intColumn("byte"), 158 | Int.box(Value.byte(bytePayload).value) 159 | ) 160 | val shortExpected = FilterApi.eq( 161 | FilterApi.intColumn("short"), 162 | Int.box(Value.short(shortPayload).value) 163 | ) 164 | val intExpected = FilterApi.eq( 165 | FilterApi.intColumn("int"), 166 | Int.box(Value.int(intPayload).value) 167 | ) 168 | val longExpected = FilterApi.eq( 169 | FilterApi.longColumn("long"), 170 | Long.box(Value.long(longPayload).value) 171 | ) 172 | val floatExpected = FilterApi.eq( 173 | FilterApi.floatColumn("float"), 174 | Float.box(Value.float(floatPayload).value) 175 | ) 176 | val doubleExpected = FilterApi.eq( 177 | FilterApi.doubleColumn("double"), 178 | Double.box(Value.double(doublePayload).value) 179 | ) 180 | val binaryExpected = FilterApi.eq( 181 | FilterApi.binaryColumn("binary"), 182 | Value.binary(binaryPayload).value 183 | ) 184 | val charExpected = FilterApi.eq( 185 | FilterApi.intColumn("char"), 186 | Int.box(Value.char(charPayload).value) 187 | ) 188 | val uuidExpected = FilterApi.eq( 189 | FilterApi.binaryColumn("uuid"), 190 | Value.uuid(uuidPayload).value 191 | ) 192 | val currencyExpected = FilterApi.eq( 193 | FilterApi.binaryColumn("currency"), 194 | Value.currency(currencyPayload).value 195 | ) 196 | val bigDecimalExpected = FilterApi.eq( 197 | FilterApi.longColumn("bigDecimal"), 198 | Long.box(Value.bigDecimal(bigDecimalPayload).value) 199 | ) 200 | val bigIntegerExpected = FilterApi.eq( 201 | FilterApi.binaryColumn("bigInteger"), 202 | Value.bigInteger(bigIntegerPayload).value 203 | ) 204 | val dayOfWeekExpected = FilterApi.eq( 205 | FilterApi.intColumn("dayOfWeek"), 206 | Int.box(Value.dayOfWeek(dayOfWeekPayload).value) 207 | ) 208 | val monthExpected = FilterApi.eq( 209 | FilterApi.intColumn("month"), 210 | Int.box(Value.month(monthPayload).value) 211 | ) 212 | val monthDayExpected = FilterApi.eq( 213 | FilterApi.binaryColumn("monthDay"), 214 | Value.monthDay(monthDayPayload).value 215 | ) 216 | val periodExpected = FilterApi.eq( 217 | FilterApi.binaryColumn("period"), 218 | Value.period(periodPayload).value 219 | ) 220 | val yearExpected = FilterApi.eq( 221 | FilterApi.intColumn("year"), 222 | Int.box(Value.year(yearPayload).value) 223 | ) 224 | val yearMonthExpected = FilterApi.eq( 225 | FilterApi.binaryColumn("yearMonth"), 226 | Value.yearMonth(yearMonthPayload).value 227 | ) 228 | val zoneIdExpected = FilterApi.eq( 229 | FilterApi.binaryColumn("zoneId"), 230 | Value.zoneId(zoneIdPayload).value 231 | ) 232 | val zoneOffsetExpected = FilterApi.eq( 233 | FilterApi.binaryColumn("zoneOffset"), 234 | Value.zoneOffset(zoneOffsetPayload).value 235 | ) 236 | val durationExpected = FilterApi.eq( 237 | FilterApi.longColumn("duration"), 238 | Long.box(Value.duration(durationPayload).value) 239 | ) 240 | val instantExpected = FilterApi.eq( 241 | FilterApi.longColumn("instant"), 242 | Long.box(Value.instant(instantPayload).value) 243 | ) 244 | val localDateExpected = FilterApi.eq( 245 | FilterApi.intColumn("localDate"), 246 | Int.box(Value.localDate(localDatePayload).value) 247 | ) 248 | val localTimeExpected = FilterApi.eq( 249 | FilterApi.intColumn("localTime"), 250 | Int.box(Value.localTime(localTimePayload).value) 251 | ) 252 | val localDateTimeExpected = FilterApi.eq( 253 | FilterApi.longColumn("localDateTime"), 254 | Long.box(Value.localDateTime(localDateTimePayload).value) 255 | ) 256 | val offsetTimeExpected = FilterApi.eq( 257 | FilterApi.intColumn("offsetTime"), 258 | Int.box(Value.offsetTime(offsetTimePayload).value) 259 | ) 260 | val offsetDateTimeExpected = FilterApi.eq( 261 | FilterApi.longColumn("offsetDateTime"), 262 | Long.box(Value.offsetDateTime(offsetDateTimePayload).value) 263 | ) 264 | val zonedDateTimeExpected = FilterApi.eq( 265 | FilterApi.longColumn("zonedDateTime"), 266 | Long.box(Value.zonedDateTime(zonedDateTimePayload).value) 267 | ) 268 | 269 | val stringResul = filter(string === stringPayload) 270 | val booleanResult = filter(boolean === booleanPayload) 271 | val byteResult = filter(byte === bytePayload) 272 | val shortResult = filter(short === shortPayload) 273 | val intResult = filter(int === intPayload) 274 | val longResult = filter(long === longPayload) 275 | val floatResult = filter(float === floatPayload) 276 | val doubleResult = filter(double === doublePayload) 277 | val binaryResult = filter(binary === binaryPayload) 278 | val charResult = filter(char === charPayload) 279 | val uuidResult = filter(uuid === uuidPayload) 280 | val currencyResult = filter(currency === currencyPayload) 281 | val bigDecimalResult = filter(bigDecimal === bigDecimalPayload) 282 | val bigIntegerResult = filter(bigInteger === bigIntegerPayload) 283 | val dayOfWeekResult = filter(dayOfWeek === dayOfWeekPayload) 284 | val monthResult = filter(month === monthPayload) 285 | val monthDayResult = filter(monthDay === monthDayPayload) 286 | val periodResult = filter(period === periodPayload) 287 | val yearResult = filter(year === yearPayload) 288 | val yearMonthResult = filter(yearMonth === yearMonthPayload) 289 | val zoneIdResult = filter(zoneId === zoneIdPayload) 290 | val zoneOffsetResult = filter(zoneOffset === zoneOffsetPayload) 291 | val durationResult = filter(duration === durationPayload) 292 | val instantResult = filter(instant === instantPayload) 293 | val localDateResult = filter(localDate === localDatePayload) 294 | val localTimeResult = filter(localTime === localTimePayload) 295 | val localDateTimeResult = filter(localDateTime === localDateTimePayload) 296 | val offsetTimeResult = filter(offsetTime === offsetTimePayload) 297 | val offsetDateTimeResult = filter(offsetDateTime === offsetDateTimePayload) 298 | val zonedDateTimeResult = filter(zonedDateTime === zonedDateTimePayload) 299 | 300 | assert(stringResul)(isRight(equalTo(stringExpected))) && 301 | assert(booleanResult)(isRight(equalTo(booleanExpected))) && 302 | assert(byteResult)(isRight(equalTo(byteExpected))) && 303 | assert(shortResult)(isRight(equalTo(shortExpected))) && 304 | assert(intResult)(isRight(equalTo(intExpected))) && 305 | assert(longResult)(isRight(equalTo(longExpected))) && 306 | assert(floatResult)(isRight(equalTo(floatExpected))) && 307 | assert(doubleResult)(isRight(equalTo(doubleExpected))) && 308 | assert(binaryResult)(isRight(equalTo(binaryExpected))) && 309 | assert(charResult)(isRight(equalTo(charExpected))) && 310 | assert(uuidResult)(isRight(equalTo(uuidExpected))) && 311 | assert(currencyResult)(isRight(equalTo(currencyExpected))) && 312 | assert(bigDecimalResult)(isRight(equalTo(bigDecimalExpected))) && 313 | assert(bigIntegerResult)(isRight(equalTo(bigIntegerExpected))) && 314 | assert(dayOfWeekResult)(isRight(equalTo(dayOfWeekExpected))) && 315 | assert(monthResult)(isRight(equalTo(monthExpected))) && 316 | assert(monthDayResult)(isRight(equalTo(monthDayExpected))) && 317 | assert(periodResult)(isRight(equalTo(periodExpected))) && 318 | assert(yearResult)(isRight(equalTo(yearExpected))) && 319 | assert(yearMonthResult)(isRight(equalTo(yearMonthExpected))) && 320 | assert(zoneIdResult)(isRight(equalTo(zoneIdExpected))) && 321 | assert(zoneOffsetResult)(isRight(equalTo(zoneOffsetExpected))) && 322 | assert(durationResult)(isRight(equalTo(durationExpected))) && 323 | assert(instantResult)(isRight(equalTo(instantExpected))) && 324 | assert(localDateResult)(isRight(equalTo(localDateExpected))) && 325 | assert(localTimeResult)(isRight(equalTo(localTimeExpected))) && 326 | assert(localDateTimeResult)(isRight(equalTo(localDateTimeExpected))) && 327 | assert(offsetTimeResult)(isRight(equalTo(offsetTimeExpected))) && 328 | assert(offsetDateTimeResult)(isRight(equalTo(offsetDateTimeExpected))) && 329 | assert(zonedDateTimeResult)(isRight(equalTo(zonedDateTimeExpected))) 330 | }, 331 | test("compile option") { 332 | // TODO: test failing compile-time cases 333 | val (_, _, _, _, opt) = Filter[MyRecord].columns 334 | 335 | val expected = FilterApi.gt(FilterApi.intColumn("opt"), Int.box(Value.int(3).value)) 336 | val result = filter(opt.nullable > 3) 337 | 338 | assert(result)(isRight(equalTo(expected))) 339 | }, 340 | test("compile enum") { 341 | val (_, _, _, enm, _) = Filter[MyRecord].columns 342 | 343 | val result = filter(enm === MyRecord.Enum.Done) 344 | val expected = FilterApi.eq(FilterApi.binaryColumn("enm"), Value.string("Done").value) 345 | 346 | assert(result)(isRight(equalTo(expected))) 347 | }, 348 | test("column path concatenation") { 349 | // TODO: test failing compile-time cases 350 | // Show the macro determines the names of the parent/child fields no matter how we name 351 | // the variables that represent columns 352 | val (_, _, child0, _, _) = Filter[MyRecord].columns 353 | val (c0, d0) = Filter[MyRecord.Child].columns 354 | 355 | assert(concat(child0, c0).path)(equalTo("child.c")) && 356 | assert(concat(child0, d0).path)(equalTo("child.d")) 357 | } 358 | ) 359 | 360 | } 361 | --------------------------------------------------------------------------------