├── .gitignore ├── project ├── build.properties └── plugins.sbt ├── core └── src │ ├── test │ ├── resources │ │ └── log4j.properties │ ├── scala-2.11 │ │ └── com.mediative.sparrow │ │ │ └── CodecLimitations211Test.scala │ ├── scala-2.10 │ │ └── com.mediative.sparrow │ │ │ └── CodecLimitations210Test.scala │ └── scala │ │ └── com.mediative.sparrow │ │ ├── ConverterTester.scala │ │ ├── DateTimeOptionsSpec.scala │ │ ├── RowConverterEmbeddedSpec.scala │ │ ├── RowConverterFieldNameAdjustmentsSpec.scala │ │ ├── DataFrameReaderMacroFailureTest.scala │ │ ├── CodecLimitationsTest.scala │ │ ├── DataFrameReaderMacroTest.scala │ │ ├── SchemaSpec.scala │ │ └── DataFrameReaderTest.scala │ └── main │ └── scala │ └── com.mediative.sparrow │ ├── RowConverter.scala │ ├── FieldConverter.scala │ └── DataFrameReader.scala ├── NEWS.md ├── .travis.yml ├── CONTRIBUTING.md ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.8 2 | -------------------------------------------------------------------------------- /core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=ERROR, stderr 3 | log4j.appender.stderr=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stderr.target=System.err 5 | log4j.appender.stderr.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.stderr.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 7 | 8 | log4j.logger.com.mediative=FATAL -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += Resolver.url("YPG-Data SBT Plugins", url("https://dl.bintray.com/ypg-data/sbt-plugins"))(Resolver.ivyStylePatterns) 2 | resolvers += "Spark Package Main Repo" at "https://dl.bintray.com/spark-packages/maven" 3 | 4 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.2") 5 | addSbtPlugin("com.mediative.sbt" % "sbt-mediative-core" % "0.1.1") 6 | addSbtPlugin("com.mediative.sbt" % "sbt-mediative-oss" % "0.1.1") 7 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # Release notes 2 | 3 | ## Sparrow 0.2.0 4 | 5 | - Bump Spark version to 1.6.0 6 | - Test against Scala 2.10.6 on Travis 7 | - Bump the Macro Paradise plugin to 2.1.0 8 | 9 | ## Sparrow 0.1.2 10 | 11 | - Add tests to document limitations 12 | - Switch to `sbt-mediative` plugins to reduce build.sbt boilerplate 13 | 14 | ## Sparrow 0.1.1 15 | 16 | - Publish Spark package 17 | - Bump Spark version to 1.3.1 18 | 19 | ## Sparrow 0.1.0 20 | 21 | First public release of Sparrow - a Scala library for converting Spark rows to 22 | case classes. 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Use container-based infrastructure 2 | sudo: false 3 | 4 | language: scala 5 | 6 | scala: 7 | - 2.10.5 8 | - 2.10.6 9 | - 2.11.7 10 | 11 | jdk: 12 | - openjdk7 13 | - oraclejdk8 14 | 15 | script: 16 | - sbt ++$TRAVIS_SCALA_VERSION test 17 | 18 | before_cache: 19 | - find $HOME/.sbt -name "*.lock" | xargs rm 20 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm 21 | 22 | cache: 23 | directories: 24 | - $HOME/.ivy2/cache 25 | - $HOME/.sbt/boot/ 26 | 27 | notifications: 28 | webhooks: 29 | urls: 30 | - https://webhooks.gitter.im/e/57534ac5b1f36eaa92c6 31 | on_success: change # options: [always|never|change] default: always 32 | on_failure: always # options: [always|never|change] default: always 33 | on_start: false # default: false 34 | -------------------------------------------------------------------------------- /core/src/test/scala-2.11/com.mediative.sparrow/CodecLimitations211Test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | class CodecLimitations211Test extends CodecLimitationsTestBase { 20 | import CodecLimitationsTest._ 21 | 22 | "toRDD should" - { 23 | "successfully marshall RDD => DataFrame => RDD an object containing" - { 24 | "Int, Double" in { 25 | assertCodec(TestToRdd4(1, 2.0)) 26 | } 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Bugs and feature requests should be reported in the [GitHub issue 4 | tracker](https://github.com/ypg-data/sparrow/issues/new) and 5 | answer the following questions: 6 | 7 | - Motivation: Why should this be addressed? What is the purpose? 8 | - Input: What are the pre-conditions? 9 | - Output: What is the expected outcome after the issue has been addressed? 10 | - Test: How can the results listed in the "Output" be QA'ed? 11 | 12 | For code contributions, these are the suggested steps: 13 | 14 | - Identify the change you'd like to make, e.g. fix a bug or add a feature. 15 | Larger contributions should always begin with [first creating an 16 | issue](https://github.com/ypg-data/sparrow/issues/new) to ensure 17 | that the change is properly scoped. 18 | - Fork the repository on GitHub. 19 | - Develop your change on a feature branch. 20 | - Write tests to validate your change works as expected. 21 | - Create a pull request. 22 | - Address any issues raised during the code review. 23 | - Once you get a "+1" on the pull request, the change can be merged. 24 | -------------------------------------------------------------------------------- /core/src/test/scala-2.10/com.mediative.sparrow/CodecLimitations210Test.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | class CodecLimitations210Test extends CodecLimitationsTestBase { 20 | import CodecLimitationsTest._ 21 | 22 | "toRDD should" - { 23 | "successfully marshall RDD => DataFrame => RDD an object containing" - { 24 | "Int, Double" in { 25 | pendingUntilFixed { 26 | // FIXME: 27 | // "org.apache.spark.SparkException: Job aborted due to stage failure" 28 | // Caused by: java.lang.ClassCastException: java.lang.Double cannot be cast to java.lang.Integer 29 | // at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106) 30 | assertCodec(TestToRdd4(1, 2.0)) 31 | } 32 | } 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/ConverterTester.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import org.apache.spark.sql.Row 20 | import org.apache.spark.sql.types.StructType 21 | import org.scalatest.Assertions.fail 22 | import org.scalatest.Matchers 23 | 24 | import scalaz.{ Failure, Success } 25 | 26 | object ConverterTester extends Matchers { 27 | 28 | trait Tpe[T] extends (() => StructType) 29 | implicit def toTpe[T](tpe: StructType): Tpe[T] = new Tpe[T] { 30 | def apply() = tpe 31 | } 32 | 33 | def test[T](row: Row, expected: T)(implicit schema: RowConverter[T], tpe: Tpe[T]) = { 34 | schema.validateAndApply(tpe()) match { 35 | case Success(f) => assert(f(row) == expected) 36 | case Failure(errors) => fail(errors.stream.mkString(". ")) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/DateTimeOptionsSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import org.apache.spark.sql.Row 20 | import org.apache.spark.sql.types._ 21 | import org.scalatest._ 22 | 23 | import com.github.nscala_time.time.Imports._ 24 | 25 | import RowConverter._ 26 | import RowConverter.syntax._ 27 | 28 | class DateTimeOptionsSpec extends FreeSpec { 29 | 30 | import ConverterTester._ 31 | 32 | case class DateTimeHolder( 33 | name: String, 34 | dateTime: DateTime) 35 | 36 | object DateTimeHolder { 37 | implicit val schema = ( 38 | field[String]("name") and 39 | field[DateTime]("dateTime")(DatePattern("dd/MM/yyyy HH:mm:ss")) 40 | )(apply _) 41 | 42 | implicit val tpe: Tpe[DateTimeHolder] = StructType(List( 43 | StructField("name", StringType, nullable = false), 44 | StructField("dateTime", StringType, nullable = false) 45 | )) 46 | } 47 | 48 | case class LocalDateHolder( 49 | name: String, 50 | dateTime: LocalDate) 51 | 52 | object LocalDateHolder { 53 | implicit val schema = ( 54 | field[String]("name") and 55 | field[LocalDate]("dateTime")(DatePattern("dd/MM/yyyy")) 56 | )(apply _) 57 | 58 | implicit val tpe: Tpe[LocalDateHolder] = StructType(List( 59 | StructField("name", StringType, nullable = false), 60 | StructField("dateTime", StringType, nullable = false) 61 | )) 62 | } 63 | 64 | "DateTimeRowConverter" - { 65 | "should allow define a custom date format for DateTime fields" in { 66 | test(Row("Hello", "25/12/2015 14:40:00"), DateTimeHolder("Hello", DateTime.parse("2015-12-25T14:40:00.00"))) 67 | } 68 | "should throw an exception if the DateTime value doesn't have the correct format" in { 69 | val ex = intercept[IllegalArgumentException] { 70 | test(Row("Hello", "2/212/2015 14:40:00"), DateTimeHolder("Hello", DateTime.parse("2015-12-25T14:40:00.00"))) 71 | } 72 | assert(ex.getMessage === "Invalid format: \"2/212/2015 14:40:00\" is malformed at \"2/2015 14:40:00\"") 73 | } 74 | 75 | "should allow define a custom date format for LocalDate fields" in { 76 | test(Row("Hello", "25/12/2015"), LocalDateHolder("Hello", LocalDate.parse("2015-12-25"))) 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sparrow 2 | 3 | Sparrow is a Scala library for converting Spark Dataframe rows to case classes. 4 | 5 | [![Build Status](https://travis-ci.org/mediative/sparrow.svg)](https://travis-ci.org/mediative/sparrow) 6 | [![Latest version](https://api.bintray.com/packages/ypg-data/maven/sparrow/images/download.svg)](https://bintray.com/ypg-data/maven/sparrow/_latestVersion) 7 | 8 | ## Status 9 | 10 | The project is still in an experimental state and the API is subject to change 11 | without concerns about backward compatibility. 12 | 13 | ## Requirements 14 | 15 | This library requires Spark 1.3+. 16 | 17 | ## Limitations and Known Issues 18 | 19 | - Fields of type `java.sql.Timestamp` is not supported. 20 | - Custom wrapper fields types is not supported. 21 | - Conversion of certain other field types are not supported. 22 | 23 | See the [CodecLimitationsTest](core/src/test/scala/com.mediative.sparrow/CodecLimitationsTest.scala) for details. 24 | 25 | ## Getting Started 26 | 27 | The best way to get started at this point is to read the [API 28 | docs](https://mediative.github.io/sparrow/api) and look at the [examples in the 29 | tests](https://github.com/mediative/sparrow/tree/master/core/src/test/scala/com.mediative.sparrow). 30 | 31 | To use the libray in an SBT project add the following two project settings: 32 | 33 | resolvers += Resolver.bintrayRepo("ypg-data", "maven") 34 | libraryDependencies += "com.mediative" %% "sparrow" % "0.2.0" 35 | 36 | ## Building and Testing 37 | 38 | This library is built with SBT, which needs to be installed. To run the tests 39 | and build a JAR run the following commands from the project root: 40 | 41 | $ sbt test 42 | $ sbt package 43 | 44 | To build a package for Scala 2.11 run the following command: 45 | 46 | $ sbt ++2.11.7 test package 47 | 48 | See [CONTRIBUTING.md](CONTRIBUTING.md) for how to contribute. 49 | 50 | ## Releasing 51 | 52 | To release version `x.y.z` run: 53 | 54 | $ sbt release -Dversion=x.y.z 55 | 56 | This will take care of running tests, tagging and publishing JARs and API docs 57 | for both version 2.10 and 2.11. To publish the Spark package run: 58 | 59 | $ sbt core/spPublish 60 | $ sbt ++2.11.7 core/spPublish 61 | 62 | The above requires that `~/.credentials/spark-packages.properties` exists with 63 | the following content: 64 | 65 | realm=Spark Packages 66 | host=spark-packages.org 67 | user=$GITHUB_USERNAME 68 | # Generate token at https://github.com/settings/tokens 69 | password=$GITHUB_PERSONAL_ACCESS_TOKEN 70 | 71 | If you see the following error go to 72 | [http://spark-packages.org/](http://spark-packages.org/) and login to grant 73 | access to your GitHub account: 74 | 75 | /opt/sparrow#master > sbt core/spPublish 76 | ... 77 | Zip File created at: /opt/sparrow/core/target/sparrow-0.2.0-s_2.10.zip 78 | 79 | ERROR: 404 - Error while accessing commit on Github. Are you sure that you pushed your local commit to the remote repository? 80 | 81 | ## License 82 | 83 | Copyright 2016 Mediative 84 | 85 | Licensed under the Apache License, Version 2.0. See LICENSE file for terms and 86 | conditions for use, reproduction, and distribution. 87 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/RowConverterEmbeddedSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import org.apache.spark.sql.Row 20 | import org.apache.spark.sql.types._ 21 | import org.scalatest._ 22 | 23 | import scalaz.{ Failure, Success } 24 | 25 | import com.github.nscala_time.time.Imports._ 26 | 27 | class RowConverterEmbeddedSpec extends FreeSpec { 28 | 29 | trait Tpe[T] extends (() => StructType) 30 | implicit def toTpe[T](tpe: StructType): Tpe[T] = new Tpe[T] { 31 | def apply() = tpe 32 | } 33 | 34 | case class Advertiser( 35 | name: String, 36 | currency: String, 37 | id: Long, 38 | status: String) 39 | 40 | case class Creative( 41 | name: String, 42 | id: Option[Long], 43 | integrationCode: Long, 44 | source: String, 45 | status: String) 46 | 47 | case class ReportRow( 48 | @embedded("Advertiser") advertiser: Advertiser, 49 | clicks: Long, 50 | @embedded("Creative") creative: Creative, 51 | date: LocalDate, 52 | dfaPlacementId: Option[Long]) 53 | 54 | object ReportRow { 55 | 56 | implicit val schema: RowConverter[ReportRow] = DataFrameReader.createSchema(opts) 57 | private def opts = SchemaOptions(RowConverter.lenientEqual) { 58 | case "Creativename" => "Creative" 59 | case "Advertisername" => "Advertiser" 60 | } 61 | 62 | implicit val tpe: Tpe[ReportRow] = StructType(List( 63 | StructField("Advertiser", StringType, nullable = false), 64 | StructField("Advertiser_Currency", StringType, nullable = false), 65 | StructField("Advertiser_ID", LongType, nullable = false), 66 | StructField("Advertiser_Status", StringType, nullable = false), 67 | StructField("Clicks", LongType, nullable = false), 68 | StructField("Creative", StringType, nullable = false), 69 | StructField("Creative_ID", LongType, nullable = false), 70 | StructField("Creative_Integration_Code", LongType, nullable = false), 71 | StructField("Creative_Source", StringType, nullable = false), 72 | StructField("Creative_Status", StringType, nullable = false), 73 | StructField("DATE", StringType, nullable = false), 74 | StructField("DFA_Placement_ID", LongType, nullable = false) 75 | )) 76 | } 77 | 78 | def test[T](row: Row, expected: T)(implicit schema: RowConverter[T], tpe: Tpe[T]) = { 79 | schema.validateAndApply(tpe()) match { 80 | case Success(f) => assert(f(row) == expected) 81 | case Failure(errors) => fail(errors.stream.mkString(". ")) 82 | } 83 | } 84 | 85 | "@embedded" - { 86 | "allow to transform a flat structure to a DAG" in { 87 | 88 | val expected = ReportRow( 89 | Advertiser("Hello", "CAD", 123L, "ACTIVE"), 90 | 123514L, 91 | Creative("Creative Name", None, 13L, "Source!", "ACTIVE"), 92 | new LocalDate(2014, 10, 14), 93 | Some(124L) 94 | ) 95 | val row = Row( 96 | "Hello", "CAD", 123L, "ACTIVE", 123514L, 97 | "Creative Name", null, 13L, "Source!", "ACTIVE", 98 | "2014-10-14", 124L 99 | ) 100 | test(row, expected) 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /core/src/main/scala/com.mediative.sparrow/RowConverter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import scalaz._ 20 | import Scalaz.ToApplyOps 21 | import scalaz.syntax.validation._ 22 | 23 | import org.apache.spark.sql._ 24 | import org.apache.spark.sql.types._ 25 | 26 | import play.api.libs.functional.{ Applicative => PApplicative, Functor => PFunctor, FunctionalBuilderOps } 27 | 28 | import Alias._ 29 | 30 | trait RowConverter[T] extends (StructType => V[Row => T]) with Serializable { self => 31 | def validateFields(fields: Set[String]): (V[Unit], Set[String]) 32 | 33 | def validateStruct(schema: StructType): V[Unit] = { 34 | val (v, others) = validateFields(schema.fieldNames.toSet) 35 | val extraFields = 36 | if (others.isEmpty) ().success 37 | else s"There are extra fields: $others".failureNel 38 | 39 | (v |@| extraFields) { (_, _) => () } 40 | } 41 | 42 | def map[U](f: T => U): RowConverter[U] = new RowConverter[U] { 43 | override def validateFields(fields: Set[String]) = self.validateFields(fields) 44 | override def apply(tpe: StructType): V[Row => U] = { 45 | for { 46 | g <- self(tpe) 47 | } yield { 48 | g andThen f 49 | } 50 | } 51 | } 52 | 53 | def validateAndApply(tpe: StructType): V[Row => T] = { 54 | import scalaz.Validation.FlatMap._ 55 | validateStruct(tpe) flatMap { _ => 56 | apply(tpe) 57 | } 58 | } 59 | } 60 | 61 | object RowConverter { 62 | 63 | object syntax { 64 | import play.api.libs.functional.syntax.functionalCanBuildApplicative 65 | 66 | implicit def toFunctionalBuilderOps[A](a: RowConverter[A]): FunctionalBuilderOps[RowConverter, A] = { 67 | val cbf = functionalCanBuildApplicative(RowConverterApplicative) 68 | play.api.libs.functional.syntax.toFunctionalBuilderOps(a)(cbf) 69 | } 70 | } 71 | 72 | implicit object RowConverterApplicative extends PApplicative[RowConverter] with PFunctor[RowConverter] { 73 | def pure[A](a: A): RowConverter[A] = new RowConverter[A] { 74 | override def validateFields(fields: Set[String]) = (().success, fields) 75 | override def apply(tpe: StructType) = Success(_ => a) 76 | } 77 | 78 | def fmap[A, B](m: RowConverter[A], f: A => B): RowConverter[B] = map(m, f) 79 | def map[A, B](m: RowConverter[A], f: A => B): RowConverter[B] = m.map(f) 80 | 81 | def apply[A, B](mf: RowConverter[A => B], ma: RowConverter[A]): RowConverter[B] = new RowConverter[B] { 82 | override def validateFields(fields: Set[String]) = { 83 | val (v1, fs1) = mf.validateFields(fields) 84 | val (v2, fs2) = ma.validateFields(fs1) 85 | (v1 |@| v2)((_, _) => ()) -> fs2 86 | } 87 | override def apply(tpe: StructType): V[Row => B] = { 88 | (ma(tpe) |@| mf(tpe)) { (ra, rab) => 89 | (row: Row) => rab(row)(ra(row)) 90 | } 91 | } 92 | } 93 | } 94 | 95 | val lenientEqual: Equal[String] = { 96 | def normalize(s: String) = s.replaceAllLiterally("_", "").toLowerCase 97 | Equal.equal { (a, b) => 98 | normalize(a) == normalize(b) 99 | } 100 | } 101 | 102 | def field[T](name: String, equal: Equal[String] = Equal.equalA)(implicit fc: FieldConverter[T]): RowConverter[T] = 103 | new RowConverter[T] { 104 | override def validateFields(fields: Set[String]): (V[Unit], Set[String]) = { 105 | val (named, others) = fields.partition(equal.equal(_, name)) 106 | 107 | val v = 108 | if (named.isEmpty && !fc.isNullable) s"The field '$name' is missing".failureNel 109 | else ().success 110 | v -> others 111 | } 112 | 113 | override def apply(tpe: StructType): V[Row => T] = { 114 | val fieldName = tpe.fieldNames.find(equal.equal(name, _)) getOrElse { 115 | if (fc.isNullable) name 116 | else sys.error( 117 | s""" 118 | |Assertion failure, the field should have been validated to exist. 119 | |Field name: $name, StrucType: $tpe. 120 | |""".stripMargin) 121 | } 122 | fc(NamedStruct(fieldName, tpe)) 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/RowConverterFieldNameAdjustmentsSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import org.apache.spark.sql.Row 20 | import org.apache.spark.sql.types._ 21 | import org.scalatest._ 22 | 23 | import RowConverter._ 24 | import RowConverter.syntax._ 25 | 26 | import scalaz.{ Failure, Success } 27 | 28 | class RowConverterFieldNameAdjustmentsSpec extends FreeSpec { 29 | 30 | import ConverterTester._ 31 | 32 | case class Simple(name: String, twoWords: Long) 33 | 34 | object Simple { 35 | implicit val schema: RowConverter[Simple] = ( 36 | field[String]("name", lenientEqual) and 37 | field[Long]("twoWords", lenientEqual) 38 | )(apply _) 39 | 40 | implicit val tpe: Tpe[Simple] = StructType(List( 41 | StructField("Name", StringType, nullable = false), 42 | StructField("two_words", LongType, nullable = false) 43 | )) 44 | } 45 | 46 | case class SimpleLenient(name: String, twoWords: Long) 47 | 48 | object SimpleLenient { 49 | implicit val schema: RowConverter[SimpleLenient] = DataFrameReader.createSchema(lenientEqual) 50 | 51 | implicit val tpe: Tpe[SimpleLenient] = StructType(List( 52 | StructField("Name", StringType, nullable = false), 53 | StructField("two_words", LongType, nullable = false) 54 | )) 55 | } 56 | 57 | case class SimplePartialFunction(name: String, count: Long) 58 | 59 | object SimplePartialFunction { 60 | 61 | private val opt: PartialFunction[String, String] = { 62 | case "name" => "ID" 63 | } 64 | implicit val schema: RowConverter[SimplePartialFunction] = DataFrameReader.createSchema(opt) 65 | 66 | implicit val tpe: Tpe[SimplePartialFunction] = StructType(List( 67 | StructField("ID", StringType, nullable = false), 68 | StructField("count", LongType, nullable = false) 69 | )) 70 | } 71 | 72 | "lenient equal" - { 73 | "should tolerate small differences in field names" in { 74 | test(Row("Hello", 3L), Simple("Hello", 3)) 75 | } 76 | 77 | "should be usable by macros" in { 78 | test(Row("Hello", 3L), SimpleLenient("Hello", 3)) 79 | } 80 | } 81 | 82 | "name transformer" - { 83 | "should allow to use a different name for the case class than the JSON file" in { 84 | test(Row("Hello", 3L), SimplePartialFunction("Hello", 3)) 85 | } 86 | } 87 | 88 | case class SimpleFieldOption(@fieldName("description") name: String, id: Long) 89 | 90 | object SimpleFieldOption { 91 | implicit val schema: RowConverter[SimpleFieldOption] = DataFrameReader.createSchema 92 | 93 | implicit val tpe: Tpe[SimpleFieldOption] = StructType(List( 94 | StructField("description", StringType, nullable = false), 95 | StructField("id", LongType, nullable = false) 96 | )) 97 | } 98 | 99 | case class SimpleFieldOptionOuter(id: Long, @embedded("Inner") inner: SimpleFieldOptionInner) 100 | case class SimpleFieldOptionInner(@fieldName("") name: String, id: Long) 101 | 102 | object SimpleFieldOptionOuter { 103 | implicit val schema: RowConverter[SimpleFieldOptionOuter] = DataFrameReader.createSchema(lenientEqual) 104 | 105 | implicit val tpe: Tpe[SimpleFieldOptionOuter] = StructType(List( 106 | StructField("id", LongType, nullable = false), 107 | StructField("Inner", StringType, nullable = false), 108 | StructField("Inner_ID", LongType, nullable = false) 109 | )) 110 | } 111 | 112 | object SimpleFieldOptionInner { 113 | implicit val schema: RowConverter[SimpleFieldOptionInner] = DataFrameReader.createSchema(lenientEqual) 114 | 115 | implicit val tpe: Tpe[SimpleFieldOptionInner] = StructType(List( 116 | StructField("", StringType, nullable = false), 117 | StructField("ID", LongType, nullable = false) 118 | )) 119 | } 120 | 121 | "field annotation" - { 122 | "should allow to rename the field" in { 123 | test(Row("Hello", 3L), SimpleFieldOption("Hello", 3)) 124 | } 125 | 126 | "should allow to use empty field name" in { 127 | test(Row("Hello", 3L), SimpleFieldOptionInner("Hello", 3)) 128 | } 129 | 130 | "should allow to use empty field name for embedded fields" in { 131 | test(Row(42L, "Hello", 3L), SimpleFieldOptionOuter(42, SimpleFieldOptionInner("Hello", 3))) 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /core/src/main/scala/com.mediative.sparrow/FieldConverter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import scala.util.control.NonFatal 20 | import scala.math.BigDecimal 21 | 22 | import scalaz._ 23 | import scalaz.syntax.validation._ 24 | 25 | import org.apache.spark.sql._ 26 | import org.apache.spark.sql.types._ 27 | 28 | import com.github.nscala_time.time.Imports._ 29 | import org.joda.time.format.DateTimeFormatter 30 | 31 | import Alias._ 32 | 33 | case class NamedStruct(name: String, tpe: StructType) { 34 | def index = tpe.fieldNames.indexOf(name) 35 | def field = tpe.fields.lift(index) getOrElse { 36 | sys.error( 37 | s"Cannot find field '$name' in fields: ${tpe.fields.toList}" + 38 | s"(field names: ${tpe.fieldNames.toList}, index: $index)") 39 | } 40 | 41 | def description: String = s"$name ($field)" 42 | def nullCheck(row: Row): Unit = { 43 | if (row.isNullAt(index)) 44 | throw new NullPointerException(s"The field $description is missing.") 45 | } 46 | } 47 | 48 | trait FieldConverter[T] extends (NamedStruct => V[Row => T]) with Serializable { self => 49 | def isNullable: Boolean = false 50 | 51 | def map[U](f: T => U) = new FieldConverter[U] { 52 | override def isNullable = self.isNullable 53 | override def apply(struct: NamedStruct): V[Row => U] = 54 | self.apply(struct).map { _ andThen f } 55 | } 56 | } 57 | 58 | object FieldConverter { 59 | 60 | def convert[A: FieldConverter, B](f: A => B) = reader[A].map(f) 61 | 62 | def reader[T](implicit fc: FieldConverter[T]): FieldConverter[T] = fc 63 | 64 | def simple[T](tpe: DataType, f: (Row, Int) => T): FieldConverter[T] = new FieldConverter[T] { 65 | override def apply(struct: NamedStruct): V[Row => T] = { 66 | val index = struct.index 67 | val field = struct.field 68 | if (field.dataType != tpe) 69 | s"The field '${struct.name}' isn't a $tpe as expected, ${field.dataType} received.".failureNel 70 | else Success { row => 71 | struct.nullCheck(row) 72 | try f(row, index) 73 | catch { 74 | case NonFatal(e) => 75 | throw new RuntimeException(s"Failed to read the field ${struct.description}).", e) 76 | } 77 | } 78 | } 79 | } 80 | 81 | implicit def stringConverter: FieldConverter[String] = FieldConverter.simple(StringType, _.getString(_)) 82 | implicit def intConverter: FieldConverter[Int] = FieldConverter.simple(IntegerType, _.getInt(_)) 83 | implicit def longConverter: FieldConverter[Long] = FieldConverter.simple(LongType, _.getLong(_)) 84 | implicit def doubleConverter: FieldConverter[Double] = FieldConverter.simple(DoubleType, _.getDouble(_)) 85 | implicit def bigDecimalConverter: FieldConverter[BigDecimal] = FieldConverter.simple(DecimalType.Unlimited, _.getDecimal(_)) 86 | implicit def bigIntConverter: FieldConverter[BigInt] = FieldConverter.reader[BigDecimal].map(_.toBigInt) 87 | 88 | implicit def localDateConverter: FieldConverter[LocalDate] = stringConverter.map(LocalDate.parse) 89 | implicit def dateTimeConverter: FieldConverter[DateTime] = stringConverter.map(DateTime.parse) 90 | implicit def dateTimeConverterFromString(pattern: String): FieldConverter[DateTime] = DatePattern(pattern) 91 | implicit def dateTimeConverterFromFmt(fmt: DateTimeFormatter): FieldConverter[DateTime] = DatePattern(fmt) 92 | implicit def localDateConverterFromString(pattern: String): FieldConverter[LocalDate] = DatePattern(pattern) 93 | implicit def localDateConverterFromFmt(fmt: DateTimeFormatter): FieldConverter[LocalDate] = DatePattern(fmt) 94 | 95 | import java.sql.Timestamp 96 | implicit def timestampConverter: FieldConverter[Timestamp] = longConverter.map(new Timestamp(_)) 97 | 98 | implicit def optionConverter[T](implicit fc: FieldConverter[T]): FieldConverter[Option[T]] = 99 | new FieldConverter[Option[T]] { 100 | override def isNullable: Boolean = true 101 | override def apply(struct: NamedStruct): V[Row => Option[T]] = { 102 | import struct.index 103 | if (index == -1) Success(row => None) 104 | else fc(struct) map { f => row => Some(row).filterNot(_.isNullAt(index)).map(f) } 105 | } 106 | } 107 | 108 | implicit def fieldConverter[T](implicit rc: RowConverter[T]): FieldConverter[T] = 109 | new FieldConverter[T] { 110 | override def apply(struct: NamedStruct): V[Row => T] = { 111 | import struct.index 112 | val dt = struct.field.dataType 113 | dt match { 114 | case tpe: StructType => 115 | rc.validateAndApply(tpe) map { f => 116 | row => 117 | struct.nullCheck(row) 118 | f(row.getAs[Row](index)) 119 | } 120 | case _ => s"StructType expected, received: $dt".failureNel 121 | } 122 | } 123 | } 124 | 125 | implicit def dateTimeFieldConverter(x: UnixTimestamp.type): FieldConverter[DateTime] = 126 | FieldConverter.longConverter.map { seconds => 127 | new DateTime(seconds * 1000) 128 | } 129 | 130 | implicit def dateTimeFieldConverter(x: JavaTimestamp.type): FieldConverter[DateTime] = 131 | FieldConverter.longConverter.map { millis => 132 | new DateTime(millis) 133 | } 134 | } 135 | 136 | case object UnixTimestamp 137 | case object JavaTimestamp 138 | 139 | case class DatePattern(fmt: DateTimeFormatter) 140 | 141 | object DatePattern { 142 | def apply(pattern: String): DatePattern = DatePattern(DateTimeFormat.forPattern(pattern)) 143 | 144 | implicit def toDateTimeFieldConverter(dtp: DatePattern): FieldConverter[DateTime] = { 145 | FieldConverter.stringConverter.map(dtp.fmt.parseDateTime) 146 | } 147 | 148 | implicit def toLocalDateFieldConverter(dtp: DatePattern): FieldConverter[LocalDate] = { 149 | FieldConverter.stringConverter.map(dtp.fmt.parseLocalDate) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/DataFrameReaderMacroFailureTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import org.scalatest._ 20 | import Matchers._ 21 | 22 | class DataFrameReaderMacroFailureTest extends FreeSpec { 23 | 24 | "createSchema" - { 25 | 26 | "works for a proper case class" in { 27 | """ 28 | import com.mediative.sparrow._ 29 | object Wrapper { 30 | class Simple(first: String, second: Int) 31 | object Simple { 32 | val options = SchemaOptions() 33 | implicit val schema = DataFrameReader.createSchema[Simple](options) 34 | def apply(f: String, s: Int) = new Simple(f, s) 35 | } 36 | } 37 | """ should compile 38 | } 39 | 40 | def checkError(error: exceptions.TestFailedException)(expected: String) = { 41 | assert(error.getMessage.startsWith( 42 | s"""Expected no compiler error, but got the following type error: "$expected", for code:""")) 43 | } 44 | 45 | "fail for less two two fields for a case class" in { 46 | val error = intercept[exceptions.TestFailedException] { 47 | """ 48 | import com.mediative.sparrow._ 49 | object Wrapper { 50 | case class Simple(first: String) 51 | object Simple { 52 | val options = SchemaOptions() 53 | DataFrameReader.createSchema[Simple](options) 54 | } 55 | } 56 | """ should compile 57 | } 58 | 59 | checkError(error) { 60 | "Only case classes with more than one field are supported." 61 | } 62 | } 63 | 64 | "fail if T doesn't have an apply method" in { 65 | val error = intercept[exceptions.TestFailedException] { 66 | """ 67 | import com.mediative.sparrow._ 68 | object Wrapper { 69 | class Simple(first: String, second: Int) 70 | object Simple { 71 | val options = SchemaOptions() 72 | DataFrameReader.createSchema[Simple](options) 73 | } 74 | } 75 | """ should compile 76 | } 77 | 78 | checkError(error) { 79 | "Cannot find an apply method with the proper signature." 80 | } 81 | } 82 | 83 | "fail if T doesn't have an apply method with the proper return type" in { 84 | val error = intercept[exceptions.TestFailedException] { 85 | """ 86 | import com.mediative.sparrow._ 87 | object Wrapper { 88 | class Simple(first: String, second: Int) 89 | object Simple { 90 | val options = SchemaOptions() 91 | DataFrameReader.createSchema[Simple](options) 92 | def apply(first: String, second: Int) = first + second 93 | } 94 | } 95 | """ should compile 96 | } 97 | 98 | checkError(error) { 99 | "Cannot find an apply method with the proper signature." 100 | } 101 | } 102 | 103 | "fail if T doesn't have an apply method with the proper argument count" in { 104 | val error = intercept[exceptions.TestFailedException] { 105 | """ 106 | import com.mediative.sparrow._ 107 | object Wrapper { 108 | class Simple(first: String, second: Int) 109 | object Simple { 110 | val options = SchemaOptions() 111 | DataFrameReader.createSchema[Simple](options) 112 | def apply(first: String) = new Simple(first, 3) 113 | } 114 | } 115 | """ should compile 116 | } 117 | 118 | checkError(error) { 119 | "Cannot find an apply method with the proper signature." 120 | } 121 | } 122 | 123 | "fail if T doesn't have an apply method with the proper argument types" in { 124 | val error = intercept[exceptions.TestFailedException] { 125 | """ 126 | import com.mediative.sparrow._ 127 | object Wrapper { 128 | class Simple(first: String, second: Int) 129 | object Simple { 130 | val options = SchemaOptions() 131 | DataFrameReader.createSchema[Simple](options) 132 | def apply(second: Int, first: String) = new Simple(first, second) 133 | } 134 | } 135 | """ should compile 136 | } 137 | 138 | checkError(error) { 139 | "Cannot find an apply method with the proper signature." 140 | } 141 | } 142 | 143 | "fail if @embedded and @fieldOptions is used on the same field" in { 144 | val error = intercept[exceptions.TestFailedException] { 145 | """ 146 | import com.mediative.sparrow._ 147 | object Wrapper { 148 | case class Outer(first: String, @embedded("prefix") @fieldName("Inner") inner: Inner) 149 | case class Inner(first: String, second: Int) 150 | object Outer { 151 | val options = SchemaOptions() 152 | DataFrameReader.createSchema[Outer](options) 153 | } 154 | } 155 | """ should compile 156 | } 157 | 158 | checkError(error) { 159 | "@embedded and @fieldName or @fieldOption cannot be used on the same field." 160 | } 161 | } 162 | 163 | "fail if @schema is used on a class that isn't a case class" in { 164 | val error = intercept[exceptions.TestFailedException] { 165 | """ 166 | import com.mediative.sparrow._ 167 | object Wrapper { 168 | @schema 169 | class Simple(first: String, second: Int) 170 | } 171 | """ should compile 172 | } 173 | 174 | checkError(error) { 175 | "The @schema annotation only support public case classes." 176 | } 177 | } 178 | 179 | "fail if @schema is used on something else than a case class" in { 180 | val error = intercept[exceptions.TestFailedException] { 181 | """ 182 | import com.mediative.sparrow._ 183 | object Wrapper { 184 | @schema 185 | val first: String = "first" 186 | } 187 | """ should compile 188 | } 189 | 190 | checkError(error) { 191 | "The @schema annotation only support public case classes." 192 | } 193 | } 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/CodecLimitationsTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import java.sql.Timestamp 20 | import scala.reflect.ClassTag 21 | 22 | import org.scalatest._ 23 | 24 | import org.apache.spark.{ SparkContext, SparkConf } 25 | import org.apache.spark.rdd.RDD 26 | import org.apache.spark.sql._ 27 | import org.apache.spark.sql.types._ 28 | 29 | /** 30 | * Reusable base class for codec limitation specs. 31 | */ 32 | trait CodecLimitationsTestBase extends FreeSpec with BeforeAndAfterAll { 33 | 34 | val sc = new SparkContext("local", "test2", new SparkConf) 35 | val sqlContext = new SQLContext(sc) 36 | 37 | override def afterAll() = sc.stop() 38 | 39 | // To get DataFrame#toRDD usage. 40 | import com.mediative.sparrow.syntax.df._ 41 | import scala.reflect.runtime.universe.TypeTag 42 | 43 | def assertCodec[T <: Product: ClassTag: TypeTag: RowConverter](value: T): Unit = { 44 | val rdd0 = sc.parallelize(List(value)) 45 | assertResult(1) { rdd0.count } 46 | val df = sqlContext.createDataFrame(rdd0) 47 | val rdd1Maybe = df.toRDD[T] 48 | assert(rdd1Maybe.isSuccess, rdd1Maybe) 49 | val rdd1 = rdd1Maybe.toOption.get 50 | assertResult(0) { rdd0.subtract(rdd1).count } 51 | assertResult(0) { rdd1.subtract(rdd0).count } 52 | } 53 | } 54 | 55 | /** 56 | * By design, toRDD requires the classes it works on to take at least two 57 | * public constructor arguments. 58 | */ 59 | object CodecLimitationsTest { 60 | 61 | /* 62 | * FIXME: Replace @schema annotations for the following case classes 63 | * by direct use of DSL to decouple its failure from that of any 64 | * regressions wrt @schema annotation. 65 | */ 66 | 67 | @schema(equal = RowConverter.lenientEqual) 68 | case class TestToRdd1(intVal: Int, stringVal: String) 69 | 70 | @schema(equal = RowConverter.lenientEqual) 71 | case class TestToRdd2(intVal: Int, intOptionVal: Option[Int]) 72 | 73 | @schema(equal = RowConverter.lenientEqual) 74 | case class TestToRdd3(stringVal: String, timestampVal: Timestamp) 75 | 76 | @schema(equal = RowConverter.lenientEqual) 77 | case class TestToRdd4(intVal: Int, doubleVal: Double) 78 | 79 | @schema(equal = RowConverter.lenientEqual) 80 | case class TestToRdd5(intVal: Int, doubleVal: Option[Double]) 81 | 82 | import Wrap._ 83 | @schema(equal = RowConverter.lenientEqual) 84 | case class TestToRdd6(intVal: Int, wrappedDoubleVal: Option[Wrapped[Double]]) 85 | 86 | @schema(equal = RowConverter.lenientEqual) 87 | case class TestToRdd7(intVal: Int, wrappedStringVal: Option[Wrapped[String]]) 88 | 89 | @schema(equal = RowConverter.lenientEqual) 90 | case class TestToRdd8(intVal: Int, wrappedStringVal: Wrapped[String]) 91 | 92 | object Wrap { 93 | case class Wrapped[T](unwrap: T) 94 | implicit def wrappedDoubleConverter[T: FieldConverter]: FieldConverter[Wrapped[T]] = 95 | FieldConverter.reader[T].map(Wrapped(_)) 96 | } 97 | } 98 | 99 | /** 100 | * Specifications to track current limitations related with marshalling. 101 | * 102 | * By design, toRDD requires the classes it works on to take at least two 103 | * public constructor arguments. 104 | */ 105 | class CodecLimitationsTest extends CodecLimitationsTestBase { 106 | import CodecLimitationsTest._ 107 | 108 | "toRDD should" - { 109 | 110 | import DataFrameReader._ 111 | 112 | "successfully marshall RDD => DataFrame => RDD an object containing" - { 113 | "Int, String" in { 114 | assertCodec(TestToRdd1(1, "a")) 115 | } 116 | 117 | "Int, Option[Int]" - { 118 | "when Some(Int)" in { 119 | assertCodec(TestToRdd2(1, Option(1))) 120 | } 121 | 122 | "when None" in { 123 | assertCodec(TestToRdd2(1, Option.empty)) 124 | } 125 | } 126 | 127 | "String, java.sql.Timestamp" in { 128 | pendingUntilFixed { 129 | // FIXME: 130 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'timestampVal' isn't a LongType as expected, TimestampType received.)) (DataFrameReaderTest.scala:203) 131 | assertCodec( 132 | TestToRdd3("a", Timestamp.valueOf("2015-07-15 09:00:00")) 133 | ) 134 | } 135 | } 136 | 137 | "Int, Option[Double]" - { 138 | "when Some(Double)" in { 139 | assertCodec(TestToRdd5(1, Some(2.0))) 140 | } 141 | "when None" in { 142 | assertCodec(TestToRdd5(1, None)) 143 | } 144 | } 145 | 146 | "Int, Wrapped[String]" in { 147 | pendingUntilFixed { 148 | // FIXME: 149 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedStringVal' isn't a StringType as expected, StructType(StructField(unwrap,StringType,true)) received.)) (DataFrameReaderTest.scala:207) 150 | assertCodec(TestToRdd8(1, Wrap.Wrapped("foo"))) 151 | } 152 | } 153 | 154 | "Int, Option[Wrapped[Double]]" - { 155 | "when None" in { 156 | pendingUntilFixed { 157 | // FIXME: 158 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedDoubleVal' isn't a DoubleType as expected, StructType(StructField(unwrap,DoubleType,false)) received.)) (DataFrameReaderTest.scala:207) 159 | assertCodec(TestToRdd6(1, None)) 160 | } 161 | } 162 | "when Some(Wrapped[Double])" in { 163 | pendingUntilFixed { 164 | // FIXME: 165 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedDoubleVal' isn't a DoubleType as expected, StructType(StructField(unwrap,DoubleType,false)) received.)) (DataFrameReaderTest.scala:204) 166 | assertCodec(TestToRdd6(1, Some(Wrap.Wrapped(2.0)))) 167 | } 168 | } 169 | } 170 | 171 | "Int, Option[Wrapped[String]]" - { 172 | "when None" in { 173 | pendingUntilFixed { 174 | // FIXME: 175 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedStringVal' isn't a StringType as expected, StructType(StructField(unwrap,StringType,true)) received.)) (DataFrameReaderTest.scala:204) 176 | assertCodec(TestToRdd7(1, None)) 177 | } 178 | } 179 | "when Some(Wrapped[String])" in { 180 | pendingUntilFixed { 181 | // FIXME: 182 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedStringVal' isn't a StringType as expected, StructType(StructField(unwrap,StringType,true)) received.)) (DataFrameReaderTest.scala:204) 183 | assertCodec(TestToRdd7(1, Some(Wrap.Wrapped("foo")))) 184 | } 185 | } 186 | } 187 | } 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/DataFrameReaderMacroTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import scala.reflect.ClassTag 20 | 21 | import scalaz._ 22 | import scalaz.syntax.validation._ 23 | 24 | import org.scalatest._ 25 | 26 | import org.apache.spark.{ SparkContext, SparkConf } 27 | import org.apache.spark.sql._ 28 | import org.apache.spark.sql.types._ 29 | 30 | import Alias._ 31 | 32 | object DataFrameReaderMacroTest { 33 | 34 | case class Simple(name: String, count: Long) 35 | 36 | object Simple { 37 | implicit val schema: RowConverter[Simple] = DataFrameReader.createSchema 38 | } 39 | 40 | case class WithSimpleOption(name: String, count: Long, description: Option[String]) 41 | 42 | object WithSimpleOption { 43 | implicit val schema: RowConverter[WithSimpleOption] = DataFrameReader.createSchema 44 | } 45 | 46 | case class WithNested(name: String, inner: Simple, innerOpt: Option[WithSimpleOption]) 47 | 48 | object WithNested { 49 | implicit val schema: RowConverter[WithNested] = DataFrameReader.createSchema 50 | } 51 | 52 | case class SimpleMap(name: String, count: Int) 53 | 54 | object SimpleMap { 55 | implicit val schema = DataFrameReader.createSchema[SimpleMap] 56 | } 57 | 58 | trait EnumProvider[T] extends (String => T) { 59 | implicit val schema: FieldConverter[T] = 60 | FieldConverter.reader[String].map(apply) 61 | } 62 | 63 | sealed abstract class PetType extends Serializable 64 | case object Dog extends PetType 65 | case object Cat extends PetType 66 | case object Hamster extends PetType 67 | 68 | object PetType extends EnumProvider[PetType] { 69 | def apply(name: String): PetType = name match { 70 | case "dog" => Dog 71 | case "cat" => Cat 72 | case "hamster" => Hamster 73 | } 74 | } 75 | 76 | case class Pet(name: String, `type`: PetType) 77 | 78 | object Pet { 79 | implicit val schema: RowConverter[Pet] = DataFrameReader.createSchema 80 | } 81 | } 82 | 83 | class DataFrameReaderMacroTest extends FreeSpec with BeforeAndAfterAll { 84 | 85 | import DataFrameReaderMacroTest._ 86 | 87 | val sc = new SparkContext("local", "test2", new SparkConf) 88 | 89 | override def afterAll() = sc.stop() 90 | 91 | "RowConverter" - { 92 | 93 | def testSerialization(obj: Any) = { 94 | import java.io._ 95 | val buf = new ByteArrayOutputStream() 96 | val out = new ObjectOutputStream(buf) 97 | out.writeObject(obj) 98 | out.flush() 99 | 100 | val in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray)) 101 | assert(obj.getClass == in.readObject().getClass) 102 | } 103 | 104 | "can be serialized" in { 105 | val simple = StructType(Seq(StructField("name", StringType), StructField("count", LongType))) 106 | val withNested = StructType(Seq( 107 | StructField("name", StringType), 108 | StructField("inner", simple), 109 | StructField("innerOpt", simple, nullable = true))) 110 | 111 | Simple.schema.validateAndApply(simple) match { 112 | case Success(f) => 113 | testSerialization(f) 114 | testSerialization(f(Row("Name", 12L))) 115 | case Failure(e) => fail(e.toString) 116 | } 117 | 118 | WithNested.schema.validateAndApply(withNested) match { 119 | case Success(f) => 120 | testSerialization(f) 121 | testSerialization(f(Row("Name", Row("Name", 12L), null))) 122 | case Failure(e) => fail(e.toString) 123 | } 124 | } 125 | } 126 | 127 | "toRDD" - { 128 | import DataFrameReader._ 129 | 130 | def testSuccess[T: RowConverter: ClassTag](json: Array[String], expected: List[T]) = { 131 | val sqlContext = new SQLContext(sc) 132 | val df = sqlContext.read.json(sc.parallelize(json)) 133 | val rdd = toRDD[T](df).valueOr { es => fail((es.head :: es.tail).mkString("\n")) } 134 | 135 | assert(rdd.collect().toList == expected) 136 | } 137 | 138 | def testFailure[T: RowConverter: ClassTag](json: Array[String], expected: NonEmptyList[String]) = { 139 | val sqlContext = new SQLContext(sc) 140 | val df = sqlContext.read.json(sc.parallelize(json)) 141 | 142 | assert(toRDD[T](df) == expected.failure) 143 | } 144 | 145 | "work for simple case class with only primitives" in { 146 | val json = Array( 147 | """{"name": "First's Inner", "count": 121}""", 148 | """{"name": "Last's inner", "count": 12}""" 149 | ) 150 | val expected = List( 151 | Simple("First's Inner", count = 121), 152 | Simple("Last's inner", count = 12) 153 | ) 154 | 155 | testSuccess(json, expected) 156 | } 157 | 158 | "support optional fields" - { 159 | "when completely missing from the json" in { 160 | val json = Array( 161 | """{"name": "First's name", "count": 121}""", 162 | """{"name": "Last's name", "count": 12}""" 163 | ) 164 | val expected = List( 165 | WithSimpleOption("First's name", count = 121, None), 166 | WithSimpleOption("Last's name", count = 12, None) 167 | ) 168 | 169 | testSuccess(json, expected) 170 | } 171 | "when partially present in the json" in { 172 | val json = Array( 173 | """{"name": "First's name", "count": 121, "description": "abc"}""", 174 | """{"name": "Last's name", "count": 12}""" 175 | ) 176 | val expected = List( 177 | WithSimpleOption("First's name", count = 121, Some("abc")), 178 | WithSimpleOption("Last's name", count = 12, None) 179 | ) 180 | 181 | testSuccess(json, expected) 182 | } 183 | } 184 | 185 | "supported nested objects" in { 186 | val json = Array( 187 | """{"name": "Guillaume", "inner": {"name": "First Inner", "count": 121}}""", 188 | """{"name": "Last", "inner": {"name": "Last Inner", "count": 12}}""" 189 | ) 190 | val expected = List( 191 | WithNested("Guillaume", Simple("First Inner", 121), None), 192 | WithNested("Last", Simple("Last Inner", 12), None) 193 | ) 194 | 195 | testSuccess(json, expected) 196 | } 197 | 198 | "validate extra fields" in { 199 | val json = Array( 200 | """{"name": "Guillaume", "inner": {"name": "First's Inner", "count": 121, "abc": 244}}""", 201 | """{"name": "Last", "inner": {"name": "Last's inner", "count": 12}}""" 202 | ) 203 | 204 | testFailure[WithNested](json, NonEmptyList.nel("There are extra fields: Set(abc)", Nil)) 205 | } 206 | 207 | "validate mixed type for a field with conversion possible (e.g. same colum has both String and Int)" in { 208 | val json = Array( 209 | """{"name": "First's Inner", "count": 121}""", 210 | """{"name": 2, "count": 12}""" 211 | ) 212 | val expected = List( 213 | Simple("First's Inner", count = 121), 214 | Simple("2", count = 12) 215 | ) 216 | 217 | testSuccess(json, expected) 218 | } 219 | 220 | "validate mixed type for a field without conversion possible (e.g. same colum has both String and Int)" in { 221 | val json = Array( 222 | """{"name": "First's Inner", "count": 121}""", 223 | """{"name": "Second", "count": "12"}""" 224 | ) 225 | val expected = List( 226 | Simple("First's Inner", count = 121), 227 | Simple("Second", count = 12) 228 | ) 229 | 230 | testFailure[Simple](json, NonEmptyList.nel( 231 | "The field 'count' isn't a LongType as expected, StringType received.", Nil)) 232 | } 233 | 234 | "work with ADT enums" in { 235 | val json = Array( 236 | """{"name": "Chausette", "type": "dog"}""", 237 | """{"name": "Mixcer", "type": "cat"}""" 238 | ) 239 | val expected = List( 240 | Pet("Chausette", Dog), 241 | Pet("Mixcer", Cat) 242 | ) 243 | 244 | testSuccess(json, expected) 245 | } 246 | } 247 | 248 | } 249 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/SchemaSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import org.apache.spark.sql.Row 20 | import org.apache.spark.sql.types._ 21 | import org.scalatest._ 22 | import ConverterTester._ 23 | 24 | import com.github.nscala_time.time.Imports._ 25 | 26 | object SchemaSpec { 27 | 28 | @schema 29 | case class WithoutCompanion(name: String, count: Long) 30 | 31 | implicit val tpe: Tpe[WithoutCompanion] = StructType(List( 32 | StructField("name", StringType, nullable = false), 33 | StructField("count", LongType, nullable = false) 34 | )) 35 | 36 | @schema 37 | case class WithCompanion(name: String, count: Long) 38 | 39 | object WithCompanion { 40 | implicit val tpeWithCompanion: Tpe[WithCompanion] = StructType(List( 41 | StructField("name", StringType, nullable = false), 42 | StructField("count", LongType, nullable = false) 43 | )) 44 | } 45 | 46 | @schema 47 | case class WithFieldOptions( 48 | @fieldName("NAME") name: String, 49 | count: Long) 50 | 51 | implicit val tpeWithFieldOptions: Tpe[WithFieldOptions] = StructType(List( 52 | StructField("NAME", StringType, nullable = false), 53 | StructField("count", LongType, nullable = false) 54 | )) 55 | 56 | @schema 57 | case class WithBody(name: String, count: Long) { 58 | override def toString = name 59 | def description = s"$name($count)" 60 | } 61 | 62 | object WithBody { 63 | implicit val tpe: Tpe[WithBody] = StructType(List( 64 | StructField("name", StringType, nullable = false), 65 | StructField("count", LongType, nullable = false) 66 | )) 67 | // FIXME the create methods should be named apply, 68 | // but the (apply _) synthax used by the macro 69 | // doesn't support apply to be overloaded, 70 | // so the function should be called directly 71 | // in the generated code instead of 72 | // use partial application. 73 | def create: WithBody = create("World") 74 | def create(name: String): WithBody = WithBody(name, 10) 75 | } 76 | 77 | @schema 78 | case class WithSingleStatement(name: String, count: Long) { 79 | def description = s"$name($count)" 80 | } 81 | 82 | object WithSingleStatement { 83 | implicit val tpe: Tpe[WithSingleStatement] = StructType(List( 84 | StructField("name", StringType, nullable = false), 85 | StructField("count", LongType, nullable = false) 86 | )) 87 | } 88 | 89 | @schema(equal = RowConverter.lenientEqual) 90 | case class Lenient(name: String, count: Long) 91 | 92 | object Lenient { 93 | implicit val tpe: Tpe[Lenient] = StructType(List( 94 | StructField("NAME", StringType, nullable = false), 95 | StructField("Count", LongType, nullable = false) 96 | )) 97 | } 98 | 99 | @schema({ 100 | case "name" => "NAME" 101 | }) 102 | case class WithPF(name: String, count: Long) 103 | 104 | object WithPF { 105 | implicit val tpe: Tpe[WithPF] = StructType(List( 106 | StructField("NAME", StringType, nullable = false), 107 | StructField("count", LongType, nullable = false) 108 | )) 109 | } 110 | 111 | @schema({ case "name" => "id" }, equal = RowConverter.lenientEqual) 112 | case class WithSchemaOptions(name: String, count: Long) 113 | 114 | object WithSchemaOptions { 115 | implicit val tpe: Tpe[WithSchemaOptions] = StructType(List( 116 | StructField("ID", StringType, nullable = false), 117 | StructField("Count", LongType, nullable = false) 118 | )) 119 | } 120 | 121 | case class EmbeddedChild(@fieldName("") name: String, count: Long) 122 | 123 | @schema(equal = RowConverter.lenientEqual) 124 | case class Parent(name: String, @embedded(prefix = "Child") child: EmbeddedChild) 125 | 126 | object Parent { 127 | implicit val tpe: Tpe[Parent] = StructType(List( 128 | StructField("Name", StringType, nullable = false), 129 | StructField("Child", StringType, nullable = false), 130 | StructField("Child_Count", LongType, nullable = false) 131 | )) 132 | } 133 | 134 | @schema 135 | case class DateTimeHolder( 136 | name: String, 137 | @fieldOption(DatePattern("dd/MM/yyyy HH:mm:ss")) dateTime: DateTime) 138 | 139 | object DateTimeHolder { 140 | implicit val tpe: Tpe[DateTimeHolder] = StructType(List( 141 | StructField("name", StringType, nullable = false), 142 | StructField("dateTime", StringType, nullable = false) 143 | )) 144 | } 145 | 146 | @schema 147 | case class LocalDateHolder( 148 | name: String, 149 | @fieldOption("dd/MM/yyyy") dateTime: LocalDate) 150 | 151 | object LocalDateHolder { 152 | implicit val tpe: Tpe[LocalDateHolder] = StructType(List( 153 | StructField("name", StringType, nullable = false), 154 | StructField("dateTime", StringType, nullable = false) 155 | )) 156 | } 157 | 158 | @schema 159 | case class UnixDateHolder( 160 | name: String, 161 | @fieldOption(UnixTimestamp) dateTime: DateTime) 162 | 163 | object UnixDateHolder { 164 | implicit val tpe: Tpe[UnixDateHolder] = StructType(List( 165 | StructField("name", StringType, nullable = false), 166 | StructField("dateTime", LongType, nullable = false) 167 | )) 168 | } 169 | 170 | @schema 171 | case class JavaTimestampHolder( 172 | name: String, 173 | @fieldOption(JavaTimestamp) dateTime: DateTime) 174 | 175 | object JavaTimestampHolder { 176 | implicit val tpe: Tpe[JavaTimestampHolder] = StructType(List( 177 | StructField("name", StringType, nullable = false), 178 | StructField("dateTime", LongType, nullable = false) 179 | )) 180 | } 181 | } 182 | 183 | class SchemaSpec extends FreeSpec { 184 | 185 | import SchemaSpec._ 186 | 187 | "@schema" - { 188 | "should generate an implicit schema in an existing companion object" in { 189 | test(Row("Hello", 1L), WithCompanion("Hello", 1)) 190 | } 191 | 192 | "should generate an implicit schema in a new companion object" in { 193 | test(Row("Hello", 1L), WithoutCompanion("Hello", 1)) 194 | } 195 | 196 | "should support @fieldOption name" in { 197 | test(Row("Hello", 1L), WithFieldOptions("Hello", 1)) 198 | } 199 | 200 | "should support case class with multiple statements in the body" in { 201 | // Making sure that no method was removed from WithBody 202 | assert(WithBody.create.description == "World(10)") 203 | test(Row("Hello", 1L), WithBody("Hello", 1)) 204 | } 205 | 206 | "should support case class with a single statement in the body" in { 207 | test(Row("Hello", 1L), WithSingleStatement("Hello", 1)) 208 | } 209 | 210 | "should support lenient equal" in { 211 | test(Row("Hello", 1L), Lenient("Hello", 1)) 212 | } 213 | 214 | "should support partial function rename" in { 215 | test(Row("Hello", 1L), WithPF("Hello", 1)) 216 | } 217 | 218 | "should support both lenient equal and partial function rename on the same case class" in { 219 | test(Row("Hello", 1L), WithSchemaOptions("Hello", 1)) 220 | } 221 | 222 | "should support @embedded" in { 223 | test(Row("Hello", "World", 1L), Parent("Hello", EmbeddedChild("World", 1))) 224 | } 225 | 226 | "should support @fieldOption with DatePattern as option" in { 227 | test(Row("Hello", "25/12/2015 14:40:00"), DateTimeHolder("Hello", DateTime.parse("2015-12-25T14:40:00.00"))) 228 | } 229 | "should support @fieldOption with a string as option" in { 230 | test(Row("Hello", "25/12/2015"), LocalDateHolder("Hello", LocalDate.parse("2015-12-25"))) 231 | } 232 | "should support @fieldOption with a unix timestamp" in { 233 | val seconds = System.currentTimeMillis / 1000 234 | test(Row("Hello", seconds), UnixDateHolder("Hello", new DateTime(seconds * 1000))) 235 | } 236 | "should support @fieldOption with a java timestamp" in { 237 | val now = System.currentTimeMillis 238 | test(Row("Hello", now), JavaTimestampHolder("Hello", new DateTime(now))) 239 | } 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /core/src/test/scala/com.mediative.sparrow/DataFrameReaderTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import scala.reflect.ClassTag 20 | 21 | import scalaz._ 22 | import scalaz.syntax.validation._ 23 | 24 | import org.scalatest._ 25 | 26 | import org.apache.spark.{ SparkContext, SparkConf } 27 | import org.apache.spark.rdd.RDD 28 | import org.apache.spark.sql._ 29 | import org.apache.spark.sql.types._ 30 | 31 | import play.api.libs.functional.syntax._ 32 | 33 | import RowConverter._ 34 | 35 | object DataFrameReaderTest { 36 | case class Simple(name: String, count: Long) 37 | 38 | object Simple { 39 | implicit val schema = ( 40 | field[String]("name") and 41 | field[Long]("count") 42 | )(apply _) 43 | } 44 | 45 | case class WithSimpleOption(name: String, count: Long, description: Option[String]) 46 | 47 | object WithSimpleOption { 48 | implicit val schema = ( 49 | field[String]("name") and 50 | field[Long]("count") and 51 | field[Option[String]]("description") 52 | )(apply _) 53 | } 54 | 55 | case class WithNested(name: String, inner: Simple, innerOpt: Option[WithSimpleOption]) 56 | 57 | object WithNested { 58 | implicit val schema = ( 59 | field[String]("name") and 60 | field[Simple]("inner") and 61 | field[Option[WithSimpleOption]]("innerOpt") 62 | )(apply _) 63 | } 64 | 65 | case class SimpleMap(name: String, count: Int) 66 | 67 | object SimpleMap { 68 | implicit val schema = ( 69 | field[String]("name") and 70 | field[String]("count").map(_.toInt) 71 | )(apply _) 72 | } 73 | 74 | sealed abstract class PetType 75 | case object Dog extends PetType 76 | case object Cat extends PetType 77 | case object Hamster extends PetType 78 | 79 | object PetType { 80 | implicit val schema: FieldConverter[PetType] = 81 | FieldConverter.reader[String].map { 82 | case "dog" => Dog 83 | case "cat" => Cat 84 | case "hamster" => Hamster 85 | } 86 | } 87 | 88 | case class Pet(name: String, tpe: PetType) 89 | 90 | object Pet { 91 | implicit val schema = ( 92 | field[String]("name") and 93 | field[PetType]("type") 94 | )(apply _) 95 | } 96 | } 97 | 98 | class DataFrameReaderTest extends FreeSpec with BeforeAndAfterAll { 99 | 100 | import DataFrameReaderTest._ 101 | 102 | val sc = new SparkContext("local", "test2", new SparkConf) 103 | 104 | override def afterAll() = sc.stop() 105 | 106 | "RowConverter" - { 107 | 108 | def testSerialization(obj: Any) = { 109 | import java.io._ 110 | val buf = new ByteArrayOutputStream() 111 | val out = new ObjectOutputStream(buf) 112 | out.writeObject(obj) 113 | out.flush() 114 | 115 | val in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray)) 116 | assert(obj.getClass == in.readObject().getClass) 117 | } 118 | 119 | "can be serialized" in { 120 | val simple = StructType(Seq(StructField("name", StringType), StructField("count", LongType))) 121 | val withNested = StructType(Seq( 122 | StructField("name", StringType), 123 | StructField("inner", simple), 124 | StructField("innerOpt", simple, nullable = true))) 125 | 126 | Simple.schema.validateAndApply(simple) match { 127 | case Success(f) => 128 | testSerialization(f) 129 | testSerialization(f(Row("Name", 12L))) 130 | case Failure(e) => fail(e.toString) 131 | } 132 | 133 | WithNested.schema.validateAndApply(withNested) match { 134 | case Success(f) => 135 | testSerialization(f) 136 | testSerialization(f(Row("Name", Row("Name", 12L), null))) 137 | case Failure(e) => fail(e.toString) 138 | } 139 | } 140 | } 141 | 142 | "toRDD should" - { 143 | 144 | import DataFrameReader._ 145 | 146 | def testSuccess[T: RowConverter: ClassTag](json: Array[String], expected: List[T]) = { 147 | val sqlContext = new SQLContext(sc) 148 | val df = sqlContext.read.json(sc.parallelize(json)) 149 | val rdd = toRDD[T](df).valueOr { es => fail((es.head :: es.tail).mkString("\n")) } 150 | 151 | assert(rdd.collect().toList == expected) 152 | } 153 | 154 | def testFailure[T: RowConverter: ClassTag](json: Array[String], expected: NonEmptyList[String]) = { 155 | val sqlContext = new SQLContext(sc) 156 | val df = sqlContext.read.json(sc.parallelize(json)) 157 | 158 | assert(toRDD[T](df) == expected.failure) 159 | } 160 | 161 | "work for simple case class with only primitives" in { 162 | val json = Array( 163 | """{"name": "First's Inner", "count": 121}""", 164 | """{"name": "Last's inner", "count": 12}""" 165 | ) 166 | val expected = List( 167 | Simple("First's Inner", count = 121), 168 | Simple("Last's inner", count = 12) 169 | ) 170 | 171 | testSuccess(json, expected) 172 | } 173 | 174 | "support optional fields" - { 175 | "when completely missing from the json" in { 176 | val json = Array( 177 | """{"name": "First's name", "count": 121}""", 178 | """{"name": "Last's name", "count": 12}""" 179 | ) 180 | val expected = List( 181 | WithSimpleOption("First's name", count = 121, None), 182 | WithSimpleOption("Last's name", count = 12, None) 183 | ) 184 | 185 | testSuccess(json, expected) 186 | } 187 | "when partially present in the json" in { 188 | val json = Array( 189 | """{"name": "First's name", "count": 121, "description": "abc"}""", 190 | """{"name": "Last's name", "count": 12}""" 191 | ) 192 | val expected = List( 193 | WithSimpleOption("First's name", count = 121, Some("abc")), 194 | WithSimpleOption("Last's name", count = 12, None) 195 | ) 196 | 197 | testSuccess(json, expected) 198 | } 199 | } 200 | 201 | "support nested objects" in { 202 | val json = Array( 203 | """{"name": "Guillaume", "inner": {"name": "First Inner", "count": 121}}""", 204 | """{"name": "Last", "inner": {"name": "Last Inner", "count": 12}}""" 205 | ) 206 | val expected = List( 207 | WithNested("Guillaume", Simple("First Inner", 121), None), 208 | WithNested("Last", Simple("Last Inner", 12), None) 209 | ) 210 | 211 | testSuccess(json, expected) 212 | } 213 | 214 | "validate extra fields" in { 215 | val json = Array( 216 | """{"name": "Guillaume", "inner": {"name": "First's Inner", "count": 121, "abc": 244}}""", 217 | """{"name": "Last", "inner": {"name": "Last's inner", "count": 12}}""" 218 | ) 219 | 220 | testFailure[WithNested](json, NonEmptyList.nel("There are extra fields: Set(abc)", Nil)) 221 | } 222 | 223 | "validate mixed type for a field with conversion possible (e.g. same colum has both String and Int)" in { 224 | val json = Array( 225 | """{"name": "First's Inner", "count": 121}""", 226 | """{"name": 2, "count": 12}""" 227 | ) 228 | val expected = List( 229 | Simple("First's Inner", count = 121), 230 | Simple("2", count = 12) 231 | ) 232 | 233 | testSuccess(json, expected) 234 | } 235 | 236 | "validate mixed type for a field without conversion possible (e.g. same colum has both String and Int)" in { 237 | val json = Array( 238 | """{"name": "First's Inner", "count": 121}""", 239 | """{"name": "Second", "count": "12"}""" 240 | ) 241 | val expected = List( 242 | Simple("First's Inner", count = 121), 243 | Simple("Second", count = 12) 244 | ) 245 | 246 | testFailure[Simple](json, NonEmptyList.nel( 247 | "The field 'count' isn't a LongType as expected, StringType received.", Nil)) 248 | } 249 | 250 | "work with ADT enums" in { 251 | val json = Array( 252 | """{"name": "Chausette", "type": "dog"}""", 253 | """{"name": "Mixcer", "type": "cat"}""" 254 | ) 255 | val expected = List( 256 | Pet("Chausette", Dog), 257 | Pet("Mixcer", Cat) 258 | ) 259 | 260 | testSuccess(json, expected) 261 | } 262 | } 263 | } 264 | -------------------------------------------------------------------------------- /core/src/main/scala/com.mediative.sparrow/DataFrameReader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Mediative 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.mediative.sparrow 18 | 19 | import language.experimental.macros 20 | import reflect.macros.Context 21 | import scala.annotation.StaticAnnotation 22 | import scala.reflect.internal.annotations.compileTimeOnly 23 | import scala.reflect.runtime.universe._ 24 | import scala.reflect.ClassTag 25 | 26 | import scalaz.{ Equal, ValidationNel } 27 | 28 | import org.apache.spark.rdd.RDD 29 | import org.apache.spark.sql._ 30 | 31 | object Alias { 32 | type V[T] = ValidationNel[String, T] 33 | } 34 | import Alias._ 35 | 36 | package syntax { 37 | object df extends ToDataFrameOps 38 | } 39 | 40 | trait ToDataFrameOps { 41 | implicit def ToDataFrameOpsFromDataFrame(a: DataFrame): DataFrameOps = new DataFrameOps(a) 42 | } 43 | 44 | final class DataFrameOps(val self: DataFrame) extends AnyVal { 45 | def toRDD[T: ClassTag: RowConverter]: V[RDD[T]] = { 46 | DataFrameReader.toRDD[T](self) 47 | } 48 | } 49 | 50 | case class SchemaOptions( 51 | nameTransformer: PartialFunction[String, String] = PartialFunction.empty, 52 | equal: Equal[String] = Equal.equalA) { 53 | def transform(name: String): String = nameTransformer.applyOrElse(name, identity[String]) 54 | } 55 | 56 | object SchemaOptions { 57 | implicit def defaultOptions: SchemaOptions = SchemaOptions() 58 | implicit def fromEqual(equal: Equal[String]): SchemaOptions = SchemaOptions(equal = equal) 59 | implicit def fromTransformer(transformer: PartialFunction[String, String]): SchemaOptions = 60 | SchemaOptions(nameTransformer = transformer) 61 | 62 | def apply(equal: Equal[String])(nameTransformer: PartialFunction[String, String]): SchemaOptions = 63 | SchemaOptions(nameTransformer, equal) 64 | 65 | } 66 | 67 | class embedded(prefix: String = "") extends StaticAnnotation 68 | class fieldName(name: String) extends StaticAnnotation 69 | class fieldOption(option: Any) extends StaticAnnotation 70 | 71 | @compileTimeOnly("This annotation requires macro paradise.") 72 | class schema( 73 | nameTransformer: PartialFunction[String, String] = PartialFunction.empty, 74 | equal: Equal[String] = Equal.equalA) 75 | extends StaticAnnotation { 76 | 77 | def macroTransform(annottees: Any*): Any = macro DataFrameReader.annotationImpl 78 | } 79 | 80 | object DataFrameReader { 81 | def toRDD[T: ClassTag](df: DataFrame)(implicit rc: RowConverter[T]): V[RDD[T]] = { 82 | rc.validateAndApply(df.schema).map { f => df.map(f) } 83 | } 84 | 85 | /** 86 | * This function will use a macro to inspect the case class' AST and generate code similar to: 87 | * 88 | * 89 | * implicit val schema = ( 90 | * field[String]("name") and 91 | * field[Int]("count") 92 | * )(apply _) 93 | * 94 | * 95 | * This macro also support an @embedded annotation that will treat a field as another case class 96 | * to embed in the parent one instead of relying on its schema. Thus a case class like: 97 | * 98 | * 99 | * case class Child(firstName: String, lastName: String) 100 | * case class Parent(id: Int, @embedded("prefix_") child: Child) 101 | * 102 | * 103 | * The macro will generate code similar to: 104 | * 105 | * 106 | * implicit val schema = ( 107 | * field[Int]("name") and 108 | * ( 109 | * field[String]("prefix_firstName") and 110 | * field[String]("prefix_lastName") 111 | * )(apply _) 112 | * )(apply _) 113 | * 114 | * 115 | * The SchemaOption case class also allows further customization related to field names. 116 | * Taking into account the options, the generated code will be closer to: 117 | * 118 | * 119 | * implicit val schema = ( 120 | * field[String](options.transform("name"), options.equal) and 121 | * field[Int](options.transform("count"), options.equal) 122 | * )(apply _) 123 | * 124 | * 125 | * The instance of Equal will be used to find a match between the case class field name and 126 | * the DataFrame's field name. The transform function uses the provided partial function 127 | * to support things like more specific renaming of fields. The unit tests provides examples. 128 | * 129 | * @tparam T the type of the case class for which to generate a RowConverter[T] composite 130 | */ 131 | def createSchema[T](implicit options: SchemaOptions): RowConverter[T] = macro createSchemaImpl[T] 132 | 133 | def createSchemaImpl[T: c.WeakTypeTag](c: Context)(options: c.Expr[SchemaOptions]): c.Expr[RowConverter[T]] = { 134 | import c.universe._ 135 | 136 | val emdeddedType = weakTypeOf[embedded] 137 | val fieldNameType = weakTypeOf[fieldName] 138 | val fieldOptionsType = weakTypeOf[fieldOption] 139 | 140 | val optionsDeclaration = q"val options = $options" 141 | val optionsName = optionsDeclaration match { 142 | case q"val $name = $value" => name 143 | } 144 | 145 | def converter(tpe: Type, prefix: Option[Tree]): Tree = { 146 | 147 | val declarations = tpe.declarations 148 | val ctor = declarations.collectFirst { 149 | case m: MethodSymbol if m.isPrimaryConstructor => m 150 | } getOrElse { 151 | val msg = "Cannot find the primary constructor for type " + tpe 152 | c.abort(c.enclosingPosition, msg) 153 | } 154 | 155 | val params = ctor.paramss.head 156 | val fields = params.map { p => 157 | val fieldType = tpe.declaration(p.name).typeSignature 158 | val name = p.name.decoded 159 | 160 | val fieldNameOpt = p.annotations.find(_.tpe == fieldNameType) 161 | val fieldOptions = p.annotations.find(_.tpe == fieldOptionsType) 162 | 163 | p.annotations.find(_.tpe == emdeddedType).map { at => 164 | if (fieldNameOpt.isDefined || fieldOptions.isDefined) 165 | c.abort(c.enclosingPosition, "@embedded and @fieldName or @fieldOption cannot be used on the same field.") 166 | converter(fieldType, at.scalaArgs.headOption) 167 | } getOrElse { 168 | val p = prefix.getOrElse(q""" "" """) 169 | 170 | val block = fieldNameOpt.map { at => 171 | val fieldName = at.scalaArgs.head 172 | q"field[$fieldType]($p + $fieldName)" 173 | } getOrElse { 174 | q"field[$fieldType]($optionsName.transform($p + $name), $optionsName.equal)" 175 | } 176 | 177 | fieldOptions.fold(block) { fc => 178 | q"$block(${fc.scalaArgs.head})" 179 | } 180 | } 181 | }.toList 182 | 183 | // 184 | // This macro serves no purpose for case class without field, so it will never be supported. 185 | // 186 | // However, case classes with only one field are also not supported right now. While there could 187 | // be some usefulness, there's generally not much value in case class with only one field 188 | // other than as a wrapper, in which case they should likely be serialized to a string, 189 | // not to a single field case class. 190 | // 191 | // If support is required in the future, it can be implemented. The reason it isn't supported 192 | // from the current code is the macro would generate something like: 193 | // 194 | // implicit val schema = ( 195 | // field[String]("name") 196 | // )(apply _) 197 | // 198 | // The problem with this is that without the `and`, the RowConverter isn't converted to a 199 | // functional builder and the apply method isn't defined. 200 | // 201 | if (fields.size < 2) { 202 | c.error(c.enclosingPosition, "Only case classes with more than one field are supported.") 203 | } 204 | 205 | val composite = fields.reduceLeft { (left, right) => q"$left and $right" } 206 | val companion = tpe.typeSymbol.companionSymbol 207 | val applies = companion.asModule.typeSignature.members 208 | .filter(_.name.decoded == "apply") 209 | .filter(_.isMethod) 210 | val exists = applies 211 | .exists { apply => 212 | val m = apply.asMethod 213 | m.returnType.typeSymbol == tpe.typeSymbol && { 214 | m.paramss match { 215 | case applyParams :: Nil => 216 | fields.length == applyParams.length && { 217 | (params zip applyParams) forall { 218 | case (x, y) => 219 | x.typeSignature == y.typeSignature 220 | } 221 | } 222 | case _ => false 223 | } 224 | } 225 | } 226 | if (!exists) { 227 | val msg = 228 | s""" 229 | | Cannot find an apply method with the proper signature. 230 | | tpe: $tpe 231 | | apply methods: $applies 232 | """.stripMargin 233 | c.info(c.enclosingPosition, msg, force = true) 234 | c.error(c.enclosingPosition, "Cannot find an apply method with the proper signature.") 235 | } 236 | 237 | q"$composite($companion.apply _)" 238 | } 239 | 240 | val tpe = implicitly[c.WeakTypeTag[T]].tpe 241 | 242 | val code = q""" 243 | import _root_.com.mediative.sparrow.RowConverter._ 244 | import _root_.com.mediative.sparrow.RowConverter.syntax._ 245 | $optionsDeclaration 246 | ${converter(tpe, None)} 247 | """ 248 | 249 | c.Expr[RowConverter[T]](code) 250 | } 251 | 252 | def annotationImpl(c: Context)(annottees: c.Expr[Any]*): c.Expr[Any] = { 253 | import c.universe._ 254 | 255 | val className = annottees.head.tree match { 256 | case q"case class $className(..$args) extends ..$parents { ..$body }" => 257 | className 258 | case _ => 259 | c.abort(c.enclosingPosition, "The @schema annotation only support public case classes.") 260 | } 261 | 262 | val tpe = className.toTermName 263 | 264 | val schemaOptionsType = c.weakTypeOf[SchemaOptions] 265 | val opts = c.prefix.tree match { 266 | case q"new $atName(..$args)" => 267 | q"new $schemaOptionsType(..$args)" 268 | case _ => 269 | q"new $schemaOptionsType" 270 | } 271 | 272 | val schema = q""" 273 | implicit val __schema = _root_.com.mediative.sparrow.DataFrameReader.createSchema[$className]($opts) 274 | """ 275 | 276 | val companion = annottees.drop(1).headOption.map { obj => 277 | val q"object $objectName extends ..$parents { $self => ..$body }" = obj.tree 278 | q""" 279 | object $objectName extends ..$parents { $self => 280 | ..$body 281 | $schema 282 | } 283 | """ 284 | } getOrElse { 285 | q""" 286 | object $tpe { 287 | $schema 288 | } 289 | """ 290 | } 291 | c.Expr[Any](q"..${List(annottees.head.tree, companion)}") 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | --------------------------------------------------------------------------------