├── .gitignore
├── project
├── build.properties
└── plugins.sbt
├── core
└── src
│ ├── test
│ ├── resources
│ │ └── log4j.properties
│ ├── scala-2.11
│ │ └── com.mediative.sparrow
│ │ │ └── CodecLimitations211Test.scala
│ ├── scala-2.10
│ │ └── com.mediative.sparrow
│ │ │ └── CodecLimitations210Test.scala
│ └── scala
│ │ └── com.mediative.sparrow
│ │ ├── ConverterTester.scala
│ │ ├── DateTimeOptionsSpec.scala
│ │ ├── RowConverterEmbeddedSpec.scala
│ │ ├── RowConverterFieldNameAdjustmentsSpec.scala
│ │ ├── DataFrameReaderMacroFailureTest.scala
│ │ ├── CodecLimitationsTest.scala
│ │ ├── DataFrameReaderMacroTest.scala
│ │ ├── SchemaSpec.scala
│ │ └── DataFrameReaderTest.scala
│ └── main
│ └── scala
│ └── com.mediative.sparrow
│ ├── RowConverter.scala
│ ├── FieldConverter.scala
│ └── DataFrameReader.scala
├── NEWS.md
├── .travis.yml
├── CONTRIBUTING.md
├── README.md
└── LICENSE
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.8
2 |
--------------------------------------------------------------------------------
/core/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set everything to be logged to the console
2 | log4j.rootCategory=ERROR, stderr
3 | log4j.appender.stderr=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stderr.target=System.err
5 | log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
6 | log4j.appender.stderr.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
7 |
8 | log4j.logger.com.mediative=FATAL
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | resolvers += Resolver.url("YPG-Data SBT Plugins", url("https://dl.bintray.com/ypg-data/sbt-plugins"))(Resolver.ivyStylePatterns)
2 | resolvers += "Spark Package Main Repo" at "https://dl.bintray.com/spark-packages/maven"
3 |
4 | addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.2")
5 | addSbtPlugin("com.mediative.sbt" % "sbt-mediative-core" % "0.1.1")
6 | addSbtPlugin("com.mediative.sbt" % "sbt-mediative-oss" % "0.1.1")
7 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # Release notes
2 |
3 | ## Sparrow 0.2.0
4 |
5 | - Bump Spark version to 1.6.0
6 | - Test against Scala 2.10.6 on Travis
7 | - Bump the Macro Paradise plugin to 2.1.0
8 |
9 | ## Sparrow 0.1.2
10 |
11 | - Add tests to document limitations
12 | - Switch to `sbt-mediative` plugins to reduce build.sbt boilerplate
13 |
14 | ## Sparrow 0.1.1
15 |
16 | - Publish Spark package
17 | - Bump Spark version to 1.3.1
18 |
19 | ## Sparrow 0.1.0
20 |
21 | First public release of Sparrow - a Scala library for converting Spark rows to
22 | case classes.
23 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # Use container-based infrastructure
2 | sudo: false
3 |
4 | language: scala
5 |
6 | scala:
7 | - 2.10.5
8 | - 2.10.6
9 | - 2.11.7
10 |
11 | jdk:
12 | - openjdk7
13 | - oraclejdk8
14 |
15 | script:
16 | - sbt ++$TRAVIS_SCALA_VERSION test
17 |
18 | before_cache:
19 | - find $HOME/.sbt -name "*.lock" | xargs rm
20 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
21 |
22 | cache:
23 | directories:
24 | - $HOME/.ivy2/cache
25 | - $HOME/.sbt/boot/
26 |
27 | notifications:
28 | webhooks:
29 | urls:
30 | - https://webhooks.gitter.im/e/57534ac5b1f36eaa92c6
31 | on_success: change # options: [always|never|change] default: always
32 | on_failure: always # options: [always|never|change] default: always
33 | on_start: false # default: false
34 |
--------------------------------------------------------------------------------
/core/src/test/scala-2.11/com.mediative.sparrow/CodecLimitations211Test.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | class CodecLimitations211Test extends CodecLimitationsTestBase {
20 | import CodecLimitationsTest._
21 |
22 | "toRDD should" - {
23 | "successfully marshall RDD => DataFrame => RDD an object containing" - {
24 | "Int, Double" in {
25 | assertCodec(TestToRdd4(1, 2.0))
26 | }
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Bugs and feature requests should be reported in the [GitHub issue
4 | tracker](https://github.com/ypg-data/sparrow/issues/new) and
5 | answer the following questions:
6 |
7 | - Motivation: Why should this be addressed? What is the purpose?
8 | - Input: What are the pre-conditions?
9 | - Output: What is the expected outcome after the issue has been addressed?
10 | - Test: How can the results listed in the "Output" be QA'ed?
11 |
12 | For code contributions, these are the suggested steps:
13 |
14 | - Identify the change you'd like to make, e.g. fix a bug or add a feature.
15 | Larger contributions should always begin with [first creating an
16 | issue](https://github.com/ypg-data/sparrow/issues/new) to ensure
17 | that the change is properly scoped.
18 | - Fork the repository on GitHub.
19 | - Develop your change on a feature branch.
20 | - Write tests to validate your change works as expected.
21 | - Create a pull request.
22 | - Address any issues raised during the code review.
23 | - Once you get a "+1" on the pull request, the change can be merged.
24 |
--------------------------------------------------------------------------------
/core/src/test/scala-2.10/com.mediative.sparrow/CodecLimitations210Test.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | class CodecLimitations210Test extends CodecLimitationsTestBase {
20 | import CodecLimitationsTest._
21 |
22 | "toRDD should" - {
23 | "successfully marshall RDD => DataFrame => RDD an object containing" - {
24 | "Int, Double" in {
25 | pendingUntilFixed {
26 | // FIXME:
27 | // "org.apache.spark.SparkException: Job aborted due to stage failure"
28 | // Caused by: java.lang.ClassCastException: java.lang.Double cannot be cast to java.lang.Integer
29 | // at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
30 | assertCodec(TestToRdd4(1, 2.0))
31 | }
32 | }
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/ConverterTester.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import org.apache.spark.sql.Row
20 | import org.apache.spark.sql.types.StructType
21 | import org.scalatest.Assertions.fail
22 | import org.scalatest.Matchers
23 |
24 | import scalaz.{ Failure, Success }
25 |
26 | object ConverterTester extends Matchers {
27 |
28 | trait Tpe[T] extends (() => StructType)
29 | implicit def toTpe[T](tpe: StructType): Tpe[T] = new Tpe[T] {
30 | def apply() = tpe
31 | }
32 |
33 | def test[T](row: Row, expected: T)(implicit schema: RowConverter[T], tpe: Tpe[T]) = {
34 | schema.validateAndApply(tpe()) match {
35 | case Success(f) => assert(f(row) == expected)
36 | case Failure(errors) => fail(errors.stream.mkString(". "))
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/DateTimeOptionsSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import org.apache.spark.sql.Row
20 | import org.apache.spark.sql.types._
21 | import org.scalatest._
22 |
23 | import com.github.nscala_time.time.Imports._
24 |
25 | import RowConverter._
26 | import RowConverter.syntax._
27 |
28 | class DateTimeOptionsSpec extends FreeSpec {
29 |
30 | import ConverterTester._
31 |
32 | case class DateTimeHolder(
33 | name: String,
34 | dateTime: DateTime)
35 |
36 | object DateTimeHolder {
37 | implicit val schema = (
38 | field[String]("name") and
39 | field[DateTime]("dateTime")(DatePattern("dd/MM/yyyy HH:mm:ss"))
40 | )(apply _)
41 |
42 | implicit val tpe: Tpe[DateTimeHolder] = StructType(List(
43 | StructField("name", StringType, nullable = false),
44 | StructField("dateTime", StringType, nullable = false)
45 | ))
46 | }
47 |
48 | case class LocalDateHolder(
49 | name: String,
50 | dateTime: LocalDate)
51 |
52 | object LocalDateHolder {
53 | implicit val schema = (
54 | field[String]("name") and
55 | field[LocalDate]("dateTime")(DatePattern("dd/MM/yyyy"))
56 | )(apply _)
57 |
58 | implicit val tpe: Tpe[LocalDateHolder] = StructType(List(
59 | StructField("name", StringType, nullable = false),
60 | StructField("dateTime", StringType, nullable = false)
61 | ))
62 | }
63 |
64 | "DateTimeRowConverter" - {
65 | "should allow define a custom date format for DateTime fields" in {
66 | test(Row("Hello", "25/12/2015 14:40:00"), DateTimeHolder("Hello", DateTime.parse("2015-12-25T14:40:00.00")))
67 | }
68 | "should throw an exception if the DateTime value doesn't have the correct format" in {
69 | val ex = intercept[IllegalArgumentException] {
70 | test(Row("Hello", "2/212/2015 14:40:00"), DateTimeHolder("Hello", DateTime.parse("2015-12-25T14:40:00.00")))
71 | }
72 | assert(ex.getMessage === "Invalid format: \"2/212/2015 14:40:00\" is malformed at \"2/2015 14:40:00\"")
73 | }
74 |
75 | "should allow define a custom date format for LocalDate fields" in {
76 | test(Row("Hello", "25/12/2015"), LocalDateHolder("Hello", LocalDate.parse("2015-12-25")))
77 | }
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Sparrow
2 |
3 | Sparrow is a Scala library for converting Spark Dataframe rows to case classes.
4 |
5 | [](https://travis-ci.org/mediative/sparrow)
6 | [](https://bintray.com/ypg-data/maven/sparrow/_latestVersion)
7 |
8 | ## Status
9 |
10 | The project is still in an experimental state and the API is subject to change
11 | without concerns about backward compatibility.
12 |
13 | ## Requirements
14 |
15 | This library requires Spark 1.3+.
16 |
17 | ## Limitations and Known Issues
18 |
19 | - Fields of type `java.sql.Timestamp` is not supported.
20 | - Custom wrapper fields types is not supported.
21 | - Conversion of certain other field types are not supported.
22 |
23 | See the [CodecLimitationsTest](core/src/test/scala/com.mediative.sparrow/CodecLimitationsTest.scala) for details.
24 |
25 | ## Getting Started
26 |
27 | The best way to get started at this point is to read the [API
28 | docs](https://mediative.github.io/sparrow/api) and look at the [examples in the
29 | tests](https://github.com/mediative/sparrow/tree/master/core/src/test/scala/com.mediative.sparrow).
30 |
31 | To use the libray in an SBT project add the following two project settings:
32 |
33 | resolvers += Resolver.bintrayRepo("ypg-data", "maven")
34 | libraryDependencies += "com.mediative" %% "sparrow" % "0.2.0"
35 |
36 | ## Building and Testing
37 |
38 | This library is built with SBT, which needs to be installed. To run the tests
39 | and build a JAR run the following commands from the project root:
40 |
41 | $ sbt test
42 | $ sbt package
43 |
44 | To build a package for Scala 2.11 run the following command:
45 |
46 | $ sbt ++2.11.7 test package
47 |
48 | See [CONTRIBUTING.md](CONTRIBUTING.md) for how to contribute.
49 |
50 | ## Releasing
51 |
52 | To release version `x.y.z` run:
53 |
54 | $ sbt release -Dversion=x.y.z
55 |
56 | This will take care of running tests, tagging and publishing JARs and API docs
57 | for both version 2.10 and 2.11. To publish the Spark package run:
58 |
59 | $ sbt core/spPublish
60 | $ sbt ++2.11.7 core/spPublish
61 |
62 | The above requires that `~/.credentials/spark-packages.properties` exists with
63 | the following content:
64 |
65 | realm=Spark Packages
66 | host=spark-packages.org
67 | user=$GITHUB_USERNAME
68 | # Generate token at https://github.com/settings/tokens
69 | password=$GITHUB_PERSONAL_ACCESS_TOKEN
70 |
71 | If you see the following error go to
72 | [http://spark-packages.org/](http://spark-packages.org/) and login to grant
73 | access to your GitHub account:
74 |
75 | /opt/sparrow#master > sbt core/spPublish
76 | ...
77 | Zip File created at: /opt/sparrow/core/target/sparrow-0.2.0-s_2.10.zip
78 |
79 | ERROR: 404 - Error while accessing commit on Github. Are you sure that you pushed your local commit to the remote repository?
80 |
81 | ## License
82 |
83 | Copyright 2016 Mediative
84 |
85 | Licensed under the Apache License, Version 2.0. See LICENSE file for terms and
86 | conditions for use, reproduction, and distribution.
87 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/RowConverterEmbeddedSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import org.apache.spark.sql.Row
20 | import org.apache.spark.sql.types._
21 | import org.scalatest._
22 |
23 | import scalaz.{ Failure, Success }
24 |
25 | import com.github.nscala_time.time.Imports._
26 |
27 | class RowConverterEmbeddedSpec extends FreeSpec {
28 |
29 | trait Tpe[T] extends (() => StructType)
30 | implicit def toTpe[T](tpe: StructType): Tpe[T] = new Tpe[T] {
31 | def apply() = tpe
32 | }
33 |
34 | case class Advertiser(
35 | name: String,
36 | currency: String,
37 | id: Long,
38 | status: String)
39 |
40 | case class Creative(
41 | name: String,
42 | id: Option[Long],
43 | integrationCode: Long,
44 | source: String,
45 | status: String)
46 |
47 | case class ReportRow(
48 | @embedded("Advertiser") advertiser: Advertiser,
49 | clicks: Long,
50 | @embedded("Creative") creative: Creative,
51 | date: LocalDate,
52 | dfaPlacementId: Option[Long])
53 |
54 | object ReportRow {
55 |
56 | implicit val schema: RowConverter[ReportRow] = DataFrameReader.createSchema(opts)
57 | private def opts = SchemaOptions(RowConverter.lenientEqual) {
58 | case "Creativename" => "Creative"
59 | case "Advertisername" => "Advertiser"
60 | }
61 |
62 | implicit val tpe: Tpe[ReportRow] = StructType(List(
63 | StructField("Advertiser", StringType, nullable = false),
64 | StructField("Advertiser_Currency", StringType, nullable = false),
65 | StructField("Advertiser_ID", LongType, nullable = false),
66 | StructField("Advertiser_Status", StringType, nullable = false),
67 | StructField("Clicks", LongType, nullable = false),
68 | StructField("Creative", StringType, nullable = false),
69 | StructField("Creative_ID", LongType, nullable = false),
70 | StructField("Creative_Integration_Code", LongType, nullable = false),
71 | StructField("Creative_Source", StringType, nullable = false),
72 | StructField("Creative_Status", StringType, nullable = false),
73 | StructField("DATE", StringType, nullable = false),
74 | StructField("DFA_Placement_ID", LongType, nullable = false)
75 | ))
76 | }
77 |
78 | def test[T](row: Row, expected: T)(implicit schema: RowConverter[T], tpe: Tpe[T]) = {
79 | schema.validateAndApply(tpe()) match {
80 | case Success(f) => assert(f(row) == expected)
81 | case Failure(errors) => fail(errors.stream.mkString(". "))
82 | }
83 | }
84 |
85 | "@embedded" - {
86 | "allow to transform a flat structure to a DAG" in {
87 |
88 | val expected = ReportRow(
89 | Advertiser("Hello", "CAD", 123L, "ACTIVE"),
90 | 123514L,
91 | Creative("Creative Name", None, 13L, "Source!", "ACTIVE"),
92 | new LocalDate(2014, 10, 14),
93 | Some(124L)
94 | )
95 | val row = Row(
96 | "Hello", "CAD", 123L, "ACTIVE", 123514L,
97 | "Creative Name", null, 13L, "Source!", "ACTIVE",
98 | "2014-10-14", 124L
99 | )
100 | test(row, expected)
101 | }
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/core/src/main/scala/com.mediative.sparrow/RowConverter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import scalaz._
20 | import Scalaz.ToApplyOps
21 | import scalaz.syntax.validation._
22 |
23 | import org.apache.spark.sql._
24 | import org.apache.spark.sql.types._
25 |
26 | import play.api.libs.functional.{ Applicative => PApplicative, Functor => PFunctor, FunctionalBuilderOps }
27 |
28 | import Alias._
29 |
30 | trait RowConverter[T] extends (StructType => V[Row => T]) with Serializable { self =>
31 | def validateFields(fields: Set[String]): (V[Unit], Set[String])
32 |
33 | def validateStruct(schema: StructType): V[Unit] = {
34 | val (v, others) = validateFields(schema.fieldNames.toSet)
35 | val extraFields =
36 | if (others.isEmpty) ().success
37 | else s"There are extra fields: $others".failureNel
38 |
39 | (v |@| extraFields) { (_, _) => () }
40 | }
41 |
42 | def map[U](f: T => U): RowConverter[U] = new RowConverter[U] {
43 | override def validateFields(fields: Set[String]) = self.validateFields(fields)
44 | override def apply(tpe: StructType): V[Row => U] = {
45 | for {
46 | g <- self(tpe)
47 | } yield {
48 | g andThen f
49 | }
50 | }
51 | }
52 |
53 | def validateAndApply(tpe: StructType): V[Row => T] = {
54 | import scalaz.Validation.FlatMap._
55 | validateStruct(tpe) flatMap { _ =>
56 | apply(tpe)
57 | }
58 | }
59 | }
60 |
61 | object RowConverter {
62 |
63 | object syntax {
64 | import play.api.libs.functional.syntax.functionalCanBuildApplicative
65 |
66 | implicit def toFunctionalBuilderOps[A](a: RowConverter[A]): FunctionalBuilderOps[RowConverter, A] = {
67 | val cbf = functionalCanBuildApplicative(RowConverterApplicative)
68 | play.api.libs.functional.syntax.toFunctionalBuilderOps(a)(cbf)
69 | }
70 | }
71 |
72 | implicit object RowConverterApplicative extends PApplicative[RowConverter] with PFunctor[RowConverter] {
73 | def pure[A](a: A): RowConverter[A] = new RowConverter[A] {
74 | override def validateFields(fields: Set[String]) = (().success, fields)
75 | override def apply(tpe: StructType) = Success(_ => a)
76 | }
77 |
78 | def fmap[A, B](m: RowConverter[A], f: A => B): RowConverter[B] = map(m, f)
79 | def map[A, B](m: RowConverter[A], f: A => B): RowConverter[B] = m.map(f)
80 |
81 | def apply[A, B](mf: RowConverter[A => B], ma: RowConverter[A]): RowConverter[B] = new RowConverter[B] {
82 | override def validateFields(fields: Set[String]) = {
83 | val (v1, fs1) = mf.validateFields(fields)
84 | val (v2, fs2) = ma.validateFields(fs1)
85 | (v1 |@| v2)((_, _) => ()) -> fs2
86 | }
87 | override def apply(tpe: StructType): V[Row => B] = {
88 | (ma(tpe) |@| mf(tpe)) { (ra, rab) =>
89 | (row: Row) => rab(row)(ra(row))
90 | }
91 | }
92 | }
93 | }
94 |
95 | val lenientEqual: Equal[String] = {
96 | def normalize(s: String) = s.replaceAllLiterally("_", "").toLowerCase
97 | Equal.equal { (a, b) =>
98 | normalize(a) == normalize(b)
99 | }
100 | }
101 |
102 | def field[T](name: String, equal: Equal[String] = Equal.equalA)(implicit fc: FieldConverter[T]): RowConverter[T] =
103 | new RowConverter[T] {
104 | override def validateFields(fields: Set[String]): (V[Unit], Set[String]) = {
105 | val (named, others) = fields.partition(equal.equal(_, name))
106 |
107 | val v =
108 | if (named.isEmpty && !fc.isNullable) s"The field '$name' is missing".failureNel
109 | else ().success
110 | v -> others
111 | }
112 |
113 | override def apply(tpe: StructType): V[Row => T] = {
114 | val fieldName = tpe.fieldNames.find(equal.equal(name, _)) getOrElse {
115 | if (fc.isNullable) name
116 | else sys.error(
117 | s"""
118 | |Assertion failure, the field should have been validated to exist.
119 | |Field name: $name, StrucType: $tpe.
120 | |""".stripMargin)
121 | }
122 | fc(NamedStruct(fieldName, tpe))
123 | }
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/RowConverterFieldNameAdjustmentsSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import org.apache.spark.sql.Row
20 | import org.apache.spark.sql.types._
21 | import org.scalatest._
22 |
23 | import RowConverter._
24 | import RowConverter.syntax._
25 |
26 | import scalaz.{ Failure, Success }
27 |
28 | class RowConverterFieldNameAdjustmentsSpec extends FreeSpec {
29 |
30 | import ConverterTester._
31 |
32 | case class Simple(name: String, twoWords: Long)
33 |
34 | object Simple {
35 | implicit val schema: RowConverter[Simple] = (
36 | field[String]("name", lenientEqual) and
37 | field[Long]("twoWords", lenientEqual)
38 | )(apply _)
39 |
40 | implicit val tpe: Tpe[Simple] = StructType(List(
41 | StructField("Name", StringType, nullable = false),
42 | StructField("two_words", LongType, nullable = false)
43 | ))
44 | }
45 |
46 | case class SimpleLenient(name: String, twoWords: Long)
47 |
48 | object SimpleLenient {
49 | implicit val schema: RowConverter[SimpleLenient] = DataFrameReader.createSchema(lenientEqual)
50 |
51 | implicit val tpe: Tpe[SimpleLenient] = StructType(List(
52 | StructField("Name", StringType, nullable = false),
53 | StructField("two_words", LongType, nullable = false)
54 | ))
55 | }
56 |
57 | case class SimplePartialFunction(name: String, count: Long)
58 |
59 | object SimplePartialFunction {
60 |
61 | private val opt: PartialFunction[String, String] = {
62 | case "name" => "ID"
63 | }
64 | implicit val schema: RowConverter[SimplePartialFunction] = DataFrameReader.createSchema(opt)
65 |
66 | implicit val tpe: Tpe[SimplePartialFunction] = StructType(List(
67 | StructField("ID", StringType, nullable = false),
68 | StructField("count", LongType, nullable = false)
69 | ))
70 | }
71 |
72 | "lenient equal" - {
73 | "should tolerate small differences in field names" in {
74 | test(Row("Hello", 3L), Simple("Hello", 3))
75 | }
76 |
77 | "should be usable by macros" in {
78 | test(Row("Hello", 3L), SimpleLenient("Hello", 3))
79 | }
80 | }
81 |
82 | "name transformer" - {
83 | "should allow to use a different name for the case class than the JSON file" in {
84 | test(Row("Hello", 3L), SimplePartialFunction("Hello", 3))
85 | }
86 | }
87 |
88 | case class SimpleFieldOption(@fieldName("description") name: String, id: Long)
89 |
90 | object SimpleFieldOption {
91 | implicit val schema: RowConverter[SimpleFieldOption] = DataFrameReader.createSchema
92 |
93 | implicit val tpe: Tpe[SimpleFieldOption] = StructType(List(
94 | StructField("description", StringType, nullable = false),
95 | StructField("id", LongType, nullable = false)
96 | ))
97 | }
98 |
99 | case class SimpleFieldOptionOuter(id: Long, @embedded("Inner") inner: SimpleFieldOptionInner)
100 | case class SimpleFieldOptionInner(@fieldName("") name: String, id: Long)
101 |
102 | object SimpleFieldOptionOuter {
103 | implicit val schema: RowConverter[SimpleFieldOptionOuter] = DataFrameReader.createSchema(lenientEqual)
104 |
105 | implicit val tpe: Tpe[SimpleFieldOptionOuter] = StructType(List(
106 | StructField("id", LongType, nullable = false),
107 | StructField("Inner", StringType, nullable = false),
108 | StructField("Inner_ID", LongType, nullable = false)
109 | ))
110 | }
111 |
112 | object SimpleFieldOptionInner {
113 | implicit val schema: RowConverter[SimpleFieldOptionInner] = DataFrameReader.createSchema(lenientEqual)
114 |
115 | implicit val tpe: Tpe[SimpleFieldOptionInner] = StructType(List(
116 | StructField("", StringType, nullable = false),
117 | StructField("ID", LongType, nullable = false)
118 | ))
119 | }
120 |
121 | "field annotation" - {
122 | "should allow to rename the field" in {
123 | test(Row("Hello", 3L), SimpleFieldOption("Hello", 3))
124 | }
125 |
126 | "should allow to use empty field name" in {
127 | test(Row("Hello", 3L), SimpleFieldOptionInner("Hello", 3))
128 | }
129 |
130 | "should allow to use empty field name for embedded fields" in {
131 | test(Row(42L, "Hello", 3L), SimpleFieldOptionOuter(42, SimpleFieldOptionInner("Hello", 3)))
132 | }
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/core/src/main/scala/com.mediative.sparrow/FieldConverter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import scala.util.control.NonFatal
20 | import scala.math.BigDecimal
21 |
22 | import scalaz._
23 | import scalaz.syntax.validation._
24 |
25 | import org.apache.spark.sql._
26 | import org.apache.spark.sql.types._
27 |
28 | import com.github.nscala_time.time.Imports._
29 | import org.joda.time.format.DateTimeFormatter
30 |
31 | import Alias._
32 |
33 | case class NamedStruct(name: String, tpe: StructType) {
34 | def index = tpe.fieldNames.indexOf(name)
35 | def field = tpe.fields.lift(index) getOrElse {
36 | sys.error(
37 | s"Cannot find field '$name' in fields: ${tpe.fields.toList}" +
38 | s"(field names: ${tpe.fieldNames.toList}, index: $index)")
39 | }
40 |
41 | def description: String = s"$name ($field)"
42 | def nullCheck(row: Row): Unit = {
43 | if (row.isNullAt(index))
44 | throw new NullPointerException(s"The field $description is missing.")
45 | }
46 | }
47 |
48 | trait FieldConverter[T] extends (NamedStruct => V[Row => T]) with Serializable { self =>
49 | def isNullable: Boolean = false
50 |
51 | def map[U](f: T => U) = new FieldConverter[U] {
52 | override def isNullable = self.isNullable
53 | override def apply(struct: NamedStruct): V[Row => U] =
54 | self.apply(struct).map { _ andThen f }
55 | }
56 | }
57 |
58 | object FieldConverter {
59 |
60 | def convert[A: FieldConverter, B](f: A => B) = reader[A].map(f)
61 |
62 | def reader[T](implicit fc: FieldConverter[T]): FieldConverter[T] = fc
63 |
64 | def simple[T](tpe: DataType, f: (Row, Int) => T): FieldConverter[T] = new FieldConverter[T] {
65 | override def apply(struct: NamedStruct): V[Row => T] = {
66 | val index = struct.index
67 | val field = struct.field
68 | if (field.dataType != tpe)
69 | s"The field '${struct.name}' isn't a $tpe as expected, ${field.dataType} received.".failureNel
70 | else Success { row =>
71 | struct.nullCheck(row)
72 | try f(row, index)
73 | catch {
74 | case NonFatal(e) =>
75 | throw new RuntimeException(s"Failed to read the field ${struct.description}).", e)
76 | }
77 | }
78 | }
79 | }
80 |
81 | implicit def stringConverter: FieldConverter[String] = FieldConverter.simple(StringType, _.getString(_))
82 | implicit def intConverter: FieldConverter[Int] = FieldConverter.simple(IntegerType, _.getInt(_))
83 | implicit def longConverter: FieldConverter[Long] = FieldConverter.simple(LongType, _.getLong(_))
84 | implicit def doubleConverter: FieldConverter[Double] = FieldConverter.simple(DoubleType, _.getDouble(_))
85 | implicit def bigDecimalConverter: FieldConverter[BigDecimal] = FieldConverter.simple(DecimalType.Unlimited, _.getDecimal(_))
86 | implicit def bigIntConverter: FieldConverter[BigInt] = FieldConverter.reader[BigDecimal].map(_.toBigInt)
87 |
88 | implicit def localDateConverter: FieldConverter[LocalDate] = stringConverter.map(LocalDate.parse)
89 | implicit def dateTimeConverter: FieldConverter[DateTime] = stringConverter.map(DateTime.parse)
90 | implicit def dateTimeConverterFromString(pattern: String): FieldConverter[DateTime] = DatePattern(pattern)
91 | implicit def dateTimeConverterFromFmt(fmt: DateTimeFormatter): FieldConverter[DateTime] = DatePattern(fmt)
92 | implicit def localDateConverterFromString(pattern: String): FieldConverter[LocalDate] = DatePattern(pattern)
93 | implicit def localDateConverterFromFmt(fmt: DateTimeFormatter): FieldConverter[LocalDate] = DatePattern(fmt)
94 |
95 | import java.sql.Timestamp
96 | implicit def timestampConverter: FieldConverter[Timestamp] = longConverter.map(new Timestamp(_))
97 |
98 | implicit def optionConverter[T](implicit fc: FieldConverter[T]): FieldConverter[Option[T]] =
99 | new FieldConverter[Option[T]] {
100 | override def isNullable: Boolean = true
101 | override def apply(struct: NamedStruct): V[Row => Option[T]] = {
102 | import struct.index
103 | if (index == -1) Success(row => None)
104 | else fc(struct) map { f => row => Some(row).filterNot(_.isNullAt(index)).map(f) }
105 | }
106 | }
107 |
108 | implicit def fieldConverter[T](implicit rc: RowConverter[T]): FieldConverter[T] =
109 | new FieldConverter[T] {
110 | override def apply(struct: NamedStruct): V[Row => T] = {
111 | import struct.index
112 | val dt = struct.field.dataType
113 | dt match {
114 | case tpe: StructType =>
115 | rc.validateAndApply(tpe) map { f =>
116 | row =>
117 | struct.nullCheck(row)
118 | f(row.getAs[Row](index))
119 | }
120 | case _ => s"StructType expected, received: $dt".failureNel
121 | }
122 | }
123 | }
124 |
125 | implicit def dateTimeFieldConverter(x: UnixTimestamp.type): FieldConverter[DateTime] =
126 | FieldConverter.longConverter.map { seconds =>
127 | new DateTime(seconds * 1000)
128 | }
129 |
130 | implicit def dateTimeFieldConverter(x: JavaTimestamp.type): FieldConverter[DateTime] =
131 | FieldConverter.longConverter.map { millis =>
132 | new DateTime(millis)
133 | }
134 | }
135 |
136 | case object UnixTimestamp
137 | case object JavaTimestamp
138 |
139 | case class DatePattern(fmt: DateTimeFormatter)
140 |
141 | object DatePattern {
142 | def apply(pattern: String): DatePattern = DatePattern(DateTimeFormat.forPattern(pattern))
143 |
144 | implicit def toDateTimeFieldConverter(dtp: DatePattern): FieldConverter[DateTime] = {
145 | FieldConverter.stringConverter.map(dtp.fmt.parseDateTime)
146 | }
147 |
148 | implicit def toLocalDateFieldConverter(dtp: DatePattern): FieldConverter[LocalDate] = {
149 | FieldConverter.stringConverter.map(dtp.fmt.parseLocalDate)
150 | }
151 | }
152 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/DataFrameReaderMacroFailureTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import org.scalatest._
20 | import Matchers._
21 |
22 | class DataFrameReaderMacroFailureTest extends FreeSpec {
23 |
24 | "createSchema" - {
25 |
26 | "works for a proper case class" in {
27 | """
28 | import com.mediative.sparrow._
29 | object Wrapper {
30 | class Simple(first: String, second: Int)
31 | object Simple {
32 | val options = SchemaOptions()
33 | implicit val schema = DataFrameReader.createSchema[Simple](options)
34 | def apply(f: String, s: Int) = new Simple(f, s)
35 | }
36 | }
37 | """ should compile
38 | }
39 |
40 | def checkError(error: exceptions.TestFailedException)(expected: String) = {
41 | assert(error.getMessage.startsWith(
42 | s"""Expected no compiler error, but got the following type error: "$expected", for code:"""))
43 | }
44 |
45 | "fail for less two two fields for a case class" in {
46 | val error = intercept[exceptions.TestFailedException] {
47 | """
48 | import com.mediative.sparrow._
49 | object Wrapper {
50 | case class Simple(first: String)
51 | object Simple {
52 | val options = SchemaOptions()
53 | DataFrameReader.createSchema[Simple](options)
54 | }
55 | }
56 | """ should compile
57 | }
58 |
59 | checkError(error) {
60 | "Only case classes with more than one field are supported."
61 | }
62 | }
63 |
64 | "fail if T doesn't have an apply method" in {
65 | val error = intercept[exceptions.TestFailedException] {
66 | """
67 | import com.mediative.sparrow._
68 | object Wrapper {
69 | class Simple(first: String, second: Int)
70 | object Simple {
71 | val options = SchemaOptions()
72 | DataFrameReader.createSchema[Simple](options)
73 | }
74 | }
75 | """ should compile
76 | }
77 |
78 | checkError(error) {
79 | "Cannot find an apply method with the proper signature."
80 | }
81 | }
82 |
83 | "fail if T doesn't have an apply method with the proper return type" in {
84 | val error = intercept[exceptions.TestFailedException] {
85 | """
86 | import com.mediative.sparrow._
87 | object Wrapper {
88 | class Simple(first: String, second: Int)
89 | object Simple {
90 | val options = SchemaOptions()
91 | DataFrameReader.createSchema[Simple](options)
92 | def apply(first: String, second: Int) = first + second
93 | }
94 | }
95 | """ should compile
96 | }
97 |
98 | checkError(error) {
99 | "Cannot find an apply method with the proper signature."
100 | }
101 | }
102 |
103 | "fail if T doesn't have an apply method with the proper argument count" in {
104 | val error = intercept[exceptions.TestFailedException] {
105 | """
106 | import com.mediative.sparrow._
107 | object Wrapper {
108 | class Simple(first: String, second: Int)
109 | object Simple {
110 | val options = SchemaOptions()
111 | DataFrameReader.createSchema[Simple](options)
112 | def apply(first: String) = new Simple(first, 3)
113 | }
114 | }
115 | """ should compile
116 | }
117 |
118 | checkError(error) {
119 | "Cannot find an apply method with the proper signature."
120 | }
121 | }
122 |
123 | "fail if T doesn't have an apply method with the proper argument types" in {
124 | val error = intercept[exceptions.TestFailedException] {
125 | """
126 | import com.mediative.sparrow._
127 | object Wrapper {
128 | class Simple(first: String, second: Int)
129 | object Simple {
130 | val options = SchemaOptions()
131 | DataFrameReader.createSchema[Simple](options)
132 | def apply(second: Int, first: String) = new Simple(first, second)
133 | }
134 | }
135 | """ should compile
136 | }
137 |
138 | checkError(error) {
139 | "Cannot find an apply method with the proper signature."
140 | }
141 | }
142 |
143 | "fail if @embedded and @fieldOptions is used on the same field" in {
144 | val error = intercept[exceptions.TestFailedException] {
145 | """
146 | import com.mediative.sparrow._
147 | object Wrapper {
148 | case class Outer(first: String, @embedded("prefix") @fieldName("Inner") inner: Inner)
149 | case class Inner(first: String, second: Int)
150 | object Outer {
151 | val options = SchemaOptions()
152 | DataFrameReader.createSchema[Outer](options)
153 | }
154 | }
155 | """ should compile
156 | }
157 |
158 | checkError(error) {
159 | "@embedded and @fieldName or @fieldOption cannot be used on the same field."
160 | }
161 | }
162 |
163 | "fail if @schema is used on a class that isn't a case class" in {
164 | val error = intercept[exceptions.TestFailedException] {
165 | """
166 | import com.mediative.sparrow._
167 | object Wrapper {
168 | @schema
169 | class Simple(first: String, second: Int)
170 | }
171 | """ should compile
172 | }
173 |
174 | checkError(error) {
175 | "The @schema annotation only support public case classes."
176 | }
177 | }
178 |
179 | "fail if @schema is used on something else than a case class" in {
180 | val error = intercept[exceptions.TestFailedException] {
181 | """
182 | import com.mediative.sparrow._
183 | object Wrapper {
184 | @schema
185 | val first: String = "first"
186 | }
187 | """ should compile
188 | }
189 |
190 | checkError(error) {
191 | "The @schema annotation only support public case classes."
192 | }
193 | }
194 | }
195 | }
196 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/CodecLimitationsTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import java.sql.Timestamp
20 | import scala.reflect.ClassTag
21 |
22 | import org.scalatest._
23 |
24 | import org.apache.spark.{ SparkContext, SparkConf }
25 | import org.apache.spark.rdd.RDD
26 | import org.apache.spark.sql._
27 | import org.apache.spark.sql.types._
28 |
29 | /**
30 | * Reusable base class for codec limitation specs.
31 | */
32 | trait CodecLimitationsTestBase extends FreeSpec with BeforeAndAfterAll {
33 |
34 | val sc = new SparkContext("local", "test2", new SparkConf)
35 | val sqlContext = new SQLContext(sc)
36 |
37 | override def afterAll() = sc.stop()
38 |
39 | // To get DataFrame#toRDD usage.
40 | import com.mediative.sparrow.syntax.df._
41 | import scala.reflect.runtime.universe.TypeTag
42 |
43 | def assertCodec[T <: Product: ClassTag: TypeTag: RowConverter](value: T): Unit = {
44 | val rdd0 = sc.parallelize(List(value))
45 | assertResult(1) { rdd0.count }
46 | val df = sqlContext.createDataFrame(rdd0)
47 | val rdd1Maybe = df.toRDD[T]
48 | assert(rdd1Maybe.isSuccess, rdd1Maybe)
49 | val rdd1 = rdd1Maybe.toOption.get
50 | assertResult(0) { rdd0.subtract(rdd1).count }
51 | assertResult(0) { rdd1.subtract(rdd0).count }
52 | }
53 | }
54 |
55 | /**
56 | * By design, toRDD requires the classes it works on to take at least two
57 | * public constructor arguments.
58 | */
59 | object CodecLimitationsTest {
60 |
61 | /*
62 | * FIXME: Replace @schema annotations for the following case classes
63 | * by direct use of DSL to decouple its failure from that of any
64 | * regressions wrt @schema annotation.
65 | */
66 |
67 | @schema(equal = RowConverter.lenientEqual)
68 | case class TestToRdd1(intVal: Int, stringVal: String)
69 |
70 | @schema(equal = RowConverter.lenientEqual)
71 | case class TestToRdd2(intVal: Int, intOptionVal: Option[Int])
72 |
73 | @schema(equal = RowConverter.lenientEqual)
74 | case class TestToRdd3(stringVal: String, timestampVal: Timestamp)
75 |
76 | @schema(equal = RowConverter.lenientEqual)
77 | case class TestToRdd4(intVal: Int, doubleVal: Double)
78 |
79 | @schema(equal = RowConverter.lenientEqual)
80 | case class TestToRdd5(intVal: Int, doubleVal: Option[Double])
81 |
82 | import Wrap._
83 | @schema(equal = RowConverter.lenientEqual)
84 | case class TestToRdd6(intVal: Int, wrappedDoubleVal: Option[Wrapped[Double]])
85 |
86 | @schema(equal = RowConverter.lenientEqual)
87 | case class TestToRdd7(intVal: Int, wrappedStringVal: Option[Wrapped[String]])
88 |
89 | @schema(equal = RowConverter.lenientEqual)
90 | case class TestToRdd8(intVal: Int, wrappedStringVal: Wrapped[String])
91 |
92 | object Wrap {
93 | case class Wrapped[T](unwrap: T)
94 | implicit def wrappedDoubleConverter[T: FieldConverter]: FieldConverter[Wrapped[T]] =
95 | FieldConverter.reader[T].map(Wrapped(_))
96 | }
97 | }
98 |
99 | /**
100 | * Specifications to track current limitations related with marshalling.
101 | *
102 | * By design, toRDD requires the classes it works on to take at least two
103 | * public constructor arguments.
104 | */
105 | class CodecLimitationsTest extends CodecLimitationsTestBase {
106 | import CodecLimitationsTest._
107 |
108 | "toRDD should" - {
109 |
110 | import DataFrameReader._
111 |
112 | "successfully marshall RDD => DataFrame => RDD an object containing" - {
113 | "Int, String" in {
114 | assertCodec(TestToRdd1(1, "a"))
115 | }
116 |
117 | "Int, Option[Int]" - {
118 | "when Some(Int)" in {
119 | assertCodec(TestToRdd2(1, Option(1)))
120 | }
121 |
122 | "when None" in {
123 | assertCodec(TestToRdd2(1, Option.empty))
124 | }
125 | }
126 |
127 | "String, java.sql.Timestamp" in {
128 | pendingUntilFixed {
129 | // FIXME:
130 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'timestampVal' isn't a LongType as expected, TimestampType received.)) (DataFrameReaderTest.scala:203)
131 | assertCodec(
132 | TestToRdd3("a", Timestamp.valueOf("2015-07-15 09:00:00"))
133 | )
134 | }
135 | }
136 |
137 | "Int, Option[Double]" - {
138 | "when Some(Double)" in {
139 | assertCodec(TestToRdd5(1, Some(2.0)))
140 | }
141 | "when None" in {
142 | assertCodec(TestToRdd5(1, None))
143 | }
144 | }
145 |
146 | "Int, Wrapped[String]" in {
147 | pendingUntilFixed {
148 | // FIXME:
149 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedStringVal' isn't a StringType as expected, StructType(StructField(unwrap,StringType,true)) received.)) (DataFrameReaderTest.scala:207)
150 | assertCodec(TestToRdd8(1, Wrap.Wrapped("foo")))
151 | }
152 | }
153 |
154 | "Int, Option[Wrapped[Double]]" - {
155 | "when None" in {
156 | pendingUntilFixed {
157 | // FIXME:
158 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedDoubleVal' isn't a DoubleType as expected, StructType(StructField(unwrap,DoubleType,false)) received.)) (DataFrameReaderTest.scala:207)
159 | assertCodec(TestToRdd6(1, None))
160 | }
161 | }
162 | "when Some(Wrapped[Double])" in {
163 | pendingUntilFixed {
164 | // FIXME:
165 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedDoubleVal' isn't a DoubleType as expected, StructType(StructField(unwrap,DoubleType,false)) received.)) (DataFrameReaderTest.scala:204)
166 | assertCodec(TestToRdd6(1, Some(Wrap.Wrapped(2.0))))
167 | }
168 | }
169 | }
170 |
171 | "Int, Option[Wrapped[String]]" - {
172 | "when None" in {
173 | pendingUntilFixed {
174 | // FIXME:
175 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedStringVal' isn't a StringType as expected, StructType(StructField(unwrap,StringType,true)) received.)) (DataFrameReaderTest.scala:204)
176 | assertCodec(TestToRdd7(1, None))
177 | }
178 | }
179 | "when Some(Wrapped[String])" in {
180 | pendingUntilFixed {
181 | // FIXME:
182 | // rdd1Maybe.isSuccess was false Failure(NonEmptyList(The field 'wrappedStringVal' isn't a StringType as expected, StructType(StructField(unwrap,StringType,true)) received.)) (DataFrameReaderTest.scala:204)
183 | assertCodec(TestToRdd7(1, Some(Wrap.Wrapped("foo"))))
184 | }
185 | }
186 | }
187 | }
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/DataFrameReaderMacroTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import scala.reflect.ClassTag
20 |
21 | import scalaz._
22 | import scalaz.syntax.validation._
23 |
24 | import org.scalatest._
25 |
26 | import org.apache.spark.{ SparkContext, SparkConf }
27 | import org.apache.spark.sql._
28 | import org.apache.spark.sql.types._
29 |
30 | import Alias._
31 |
32 | object DataFrameReaderMacroTest {
33 |
34 | case class Simple(name: String, count: Long)
35 |
36 | object Simple {
37 | implicit val schema: RowConverter[Simple] = DataFrameReader.createSchema
38 | }
39 |
40 | case class WithSimpleOption(name: String, count: Long, description: Option[String])
41 |
42 | object WithSimpleOption {
43 | implicit val schema: RowConverter[WithSimpleOption] = DataFrameReader.createSchema
44 | }
45 |
46 | case class WithNested(name: String, inner: Simple, innerOpt: Option[WithSimpleOption])
47 |
48 | object WithNested {
49 | implicit val schema: RowConverter[WithNested] = DataFrameReader.createSchema
50 | }
51 |
52 | case class SimpleMap(name: String, count: Int)
53 |
54 | object SimpleMap {
55 | implicit val schema = DataFrameReader.createSchema[SimpleMap]
56 | }
57 |
58 | trait EnumProvider[T] extends (String => T) {
59 | implicit val schema: FieldConverter[T] =
60 | FieldConverter.reader[String].map(apply)
61 | }
62 |
63 | sealed abstract class PetType extends Serializable
64 | case object Dog extends PetType
65 | case object Cat extends PetType
66 | case object Hamster extends PetType
67 |
68 | object PetType extends EnumProvider[PetType] {
69 | def apply(name: String): PetType = name match {
70 | case "dog" => Dog
71 | case "cat" => Cat
72 | case "hamster" => Hamster
73 | }
74 | }
75 |
76 | case class Pet(name: String, `type`: PetType)
77 |
78 | object Pet {
79 | implicit val schema: RowConverter[Pet] = DataFrameReader.createSchema
80 | }
81 | }
82 |
83 | class DataFrameReaderMacroTest extends FreeSpec with BeforeAndAfterAll {
84 |
85 | import DataFrameReaderMacroTest._
86 |
87 | val sc = new SparkContext("local", "test2", new SparkConf)
88 |
89 | override def afterAll() = sc.stop()
90 |
91 | "RowConverter" - {
92 |
93 | def testSerialization(obj: Any) = {
94 | import java.io._
95 | val buf = new ByteArrayOutputStream()
96 | val out = new ObjectOutputStream(buf)
97 | out.writeObject(obj)
98 | out.flush()
99 |
100 | val in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray))
101 | assert(obj.getClass == in.readObject().getClass)
102 | }
103 |
104 | "can be serialized" in {
105 | val simple = StructType(Seq(StructField("name", StringType), StructField("count", LongType)))
106 | val withNested = StructType(Seq(
107 | StructField("name", StringType),
108 | StructField("inner", simple),
109 | StructField("innerOpt", simple, nullable = true)))
110 |
111 | Simple.schema.validateAndApply(simple) match {
112 | case Success(f) =>
113 | testSerialization(f)
114 | testSerialization(f(Row("Name", 12L)))
115 | case Failure(e) => fail(e.toString)
116 | }
117 |
118 | WithNested.schema.validateAndApply(withNested) match {
119 | case Success(f) =>
120 | testSerialization(f)
121 | testSerialization(f(Row("Name", Row("Name", 12L), null)))
122 | case Failure(e) => fail(e.toString)
123 | }
124 | }
125 | }
126 |
127 | "toRDD" - {
128 | import DataFrameReader._
129 |
130 | def testSuccess[T: RowConverter: ClassTag](json: Array[String], expected: List[T]) = {
131 | val sqlContext = new SQLContext(sc)
132 | val df = sqlContext.read.json(sc.parallelize(json))
133 | val rdd = toRDD[T](df).valueOr { es => fail((es.head :: es.tail).mkString("\n")) }
134 |
135 | assert(rdd.collect().toList == expected)
136 | }
137 |
138 | def testFailure[T: RowConverter: ClassTag](json: Array[String], expected: NonEmptyList[String]) = {
139 | val sqlContext = new SQLContext(sc)
140 | val df = sqlContext.read.json(sc.parallelize(json))
141 |
142 | assert(toRDD[T](df) == expected.failure)
143 | }
144 |
145 | "work for simple case class with only primitives" in {
146 | val json = Array(
147 | """{"name": "First's Inner", "count": 121}""",
148 | """{"name": "Last's inner", "count": 12}"""
149 | )
150 | val expected = List(
151 | Simple("First's Inner", count = 121),
152 | Simple("Last's inner", count = 12)
153 | )
154 |
155 | testSuccess(json, expected)
156 | }
157 |
158 | "support optional fields" - {
159 | "when completely missing from the json" in {
160 | val json = Array(
161 | """{"name": "First's name", "count": 121}""",
162 | """{"name": "Last's name", "count": 12}"""
163 | )
164 | val expected = List(
165 | WithSimpleOption("First's name", count = 121, None),
166 | WithSimpleOption("Last's name", count = 12, None)
167 | )
168 |
169 | testSuccess(json, expected)
170 | }
171 | "when partially present in the json" in {
172 | val json = Array(
173 | """{"name": "First's name", "count": 121, "description": "abc"}""",
174 | """{"name": "Last's name", "count": 12}"""
175 | )
176 | val expected = List(
177 | WithSimpleOption("First's name", count = 121, Some("abc")),
178 | WithSimpleOption("Last's name", count = 12, None)
179 | )
180 |
181 | testSuccess(json, expected)
182 | }
183 | }
184 |
185 | "supported nested objects" in {
186 | val json = Array(
187 | """{"name": "Guillaume", "inner": {"name": "First Inner", "count": 121}}""",
188 | """{"name": "Last", "inner": {"name": "Last Inner", "count": 12}}"""
189 | )
190 | val expected = List(
191 | WithNested("Guillaume", Simple("First Inner", 121), None),
192 | WithNested("Last", Simple("Last Inner", 12), None)
193 | )
194 |
195 | testSuccess(json, expected)
196 | }
197 |
198 | "validate extra fields" in {
199 | val json = Array(
200 | """{"name": "Guillaume", "inner": {"name": "First's Inner", "count": 121, "abc": 244}}""",
201 | """{"name": "Last", "inner": {"name": "Last's inner", "count": 12}}"""
202 | )
203 |
204 | testFailure[WithNested](json, NonEmptyList.nel("There are extra fields: Set(abc)", Nil))
205 | }
206 |
207 | "validate mixed type for a field with conversion possible (e.g. same colum has both String and Int)" in {
208 | val json = Array(
209 | """{"name": "First's Inner", "count": 121}""",
210 | """{"name": 2, "count": 12}"""
211 | )
212 | val expected = List(
213 | Simple("First's Inner", count = 121),
214 | Simple("2", count = 12)
215 | )
216 |
217 | testSuccess(json, expected)
218 | }
219 |
220 | "validate mixed type for a field without conversion possible (e.g. same colum has both String and Int)" in {
221 | val json = Array(
222 | """{"name": "First's Inner", "count": 121}""",
223 | """{"name": "Second", "count": "12"}"""
224 | )
225 | val expected = List(
226 | Simple("First's Inner", count = 121),
227 | Simple("Second", count = 12)
228 | )
229 |
230 | testFailure[Simple](json, NonEmptyList.nel(
231 | "The field 'count' isn't a LongType as expected, StringType received.", Nil))
232 | }
233 |
234 | "work with ADT enums" in {
235 | val json = Array(
236 | """{"name": "Chausette", "type": "dog"}""",
237 | """{"name": "Mixcer", "type": "cat"}"""
238 | )
239 | val expected = List(
240 | Pet("Chausette", Dog),
241 | Pet("Mixcer", Cat)
242 | )
243 |
244 | testSuccess(json, expected)
245 | }
246 | }
247 |
248 | }
249 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/SchemaSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import org.apache.spark.sql.Row
20 | import org.apache.spark.sql.types._
21 | import org.scalatest._
22 | import ConverterTester._
23 |
24 | import com.github.nscala_time.time.Imports._
25 |
26 | object SchemaSpec {
27 |
28 | @schema
29 | case class WithoutCompanion(name: String, count: Long)
30 |
31 | implicit val tpe: Tpe[WithoutCompanion] = StructType(List(
32 | StructField("name", StringType, nullable = false),
33 | StructField("count", LongType, nullable = false)
34 | ))
35 |
36 | @schema
37 | case class WithCompanion(name: String, count: Long)
38 |
39 | object WithCompanion {
40 | implicit val tpeWithCompanion: Tpe[WithCompanion] = StructType(List(
41 | StructField("name", StringType, nullable = false),
42 | StructField("count", LongType, nullable = false)
43 | ))
44 | }
45 |
46 | @schema
47 | case class WithFieldOptions(
48 | @fieldName("NAME") name: String,
49 | count: Long)
50 |
51 | implicit val tpeWithFieldOptions: Tpe[WithFieldOptions] = StructType(List(
52 | StructField("NAME", StringType, nullable = false),
53 | StructField("count", LongType, nullable = false)
54 | ))
55 |
56 | @schema
57 | case class WithBody(name: String, count: Long) {
58 | override def toString = name
59 | def description = s"$name($count)"
60 | }
61 |
62 | object WithBody {
63 | implicit val tpe: Tpe[WithBody] = StructType(List(
64 | StructField("name", StringType, nullable = false),
65 | StructField("count", LongType, nullable = false)
66 | ))
67 | // FIXME the create methods should be named apply,
68 | // but the (apply _) synthax used by the macro
69 | // doesn't support apply to be overloaded,
70 | // so the function should be called directly
71 | // in the generated code instead of
72 | // use partial application.
73 | def create: WithBody = create("World")
74 | def create(name: String): WithBody = WithBody(name, 10)
75 | }
76 |
77 | @schema
78 | case class WithSingleStatement(name: String, count: Long) {
79 | def description = s"$name($count)"
80 | }
81 |
82 | object WithSingleStatement {
83 | implicit val tpe: Tpe[WithSingleStatement] = StructType(List(
84 | StructField("name", StringType, nullable = false),
85 | StructField("count", LongType, nullable = false)
86 | ))
87 | }
88 |
89 | @schema(equal = RowConverter.lenientEqual)
90 | case class Lenient(name: String, count: Long)
91 |
92 | object Lenient {
93 | implicit val tpe: Tpe[Lenient] = StructType(List(
94 | StructField("NAME", StringType, nullable = false),
95 | StructField("Count", LongType, nullable = false)
96 | ))
97 | }
98 |
99 | @schema({
100 | case "name" => "NAME"
101 | })
102 | case class WithPF(name: String, count: Long)
103 |
104 | object WithPF {
105 | implicit val tpe: Tpe[WithPF] = StructType(List(
106 | StructField("NAME", StringType, nullable = false),
107 | StructField("count", LongType, nullable = false)
108 | ))
109 | }
110 |
111 | @schema({ case "name" => "id" }, equal = RowConverter.lenientEqual)
112 | case class WithSchemaOptions(name: String, count: Long)
113 |
114 | object WithSchemaOptions {
115 | implicit val tpe: Tpe[WithSchemaOptions] = StructType(List(
116 | StructField("ID", StringType, nullable = false),
117 | StructField("Count", LongType, nullable = false)
118 | ))
119 | }
120 |
121 | case class EmbeddedChild(@fieldName("") name: String, count: Long)
122 |
123 | @schema(equal = RowConverter.lenientEqual)
124 | case class Parent(name: String, @embedded(prefix = "Child") child: EmbeddedChild)
125 |
126 | object Parent {
127 | implicit val tpe: Tpe[Parent] = StructType(List(
128 | StructField("Name", StringType, nullable = false),
129 | StructField("Child", StringType, nullable = false),
130 | StructField("Child_Count", LongType, nullable = false)
131 | ))
132 | }
133 |
134 | @schema
135 | case class DateTimeHolder(
136 | name: String,
137 | @fieldOption(DatePattern("dd/MM/yyyy HH:mm:ss")) dateTime: DateTime)
138 |
139 | object DateTimeHolder {
140 | implicit val tpe: Tpe[DateTimeHolder] = StructType(List(
141 | StructField("name", StringType, nullable = false),
142 | StructField("dateTime", StringType, nullable = false)
143 | ))
144 | }
145 |
146 | @schema
147 | case class LocalDateHolder(
148 | name: String,
149 | @fieldOption("dd/MM/yyyy") dateTime: LocalDate)
150 |
151 | object LocalDateHolder {
152 | implicit val tpe: Tpe[LocalDateHolder] = StructType(List(
153 | StructField("name", StringType, nullable = false),
154 | StructField("dateTime", StringType, nullable = false)
155 | ))
156 | }
157 |
158 | @schema
159 | case class UnixDateHolder(
160 | name: String,
161 | @fieldOption(UnixTimestamp) dateTime: DateTime)
162 |
163 | object UnixDateHolder {
164 | implicit val tpe: Tpe[UnixDateHolder] = StructType(List(
165 | StructField("name", StringType, nullable = false),
166 | StructField("dateTime", LongType, nullable = false)
167 | ))
168 | }
169 |
170 | @schema
171 | case class JavaTimestampHolder(
172 | name: String,
173 | @fieldOption(JavaTimestamp) dateTime: DateTime)
174 |
175 | object JavaTimestampHolder {
176 | implicit val tpe: Tpe[JavaTimestampHolder] = StructType(List(
177 | StructField("name", StringType, nullable = false),
178 | StructField("dateTime", LongType, nullable = false)
179 | ))
180 | }
181 | }
182 |
183 | class SchemaSpec extends FreeSpec {
184 |
185 | import SchemaSpec._
186 |
187 | "@schema" - {
188 | "should generate an implicit schema in an existing companion object" in {
189 | test(Row("Hello", 1L), WithCompanion("Hello", 1))
190 | }
191 |
192 | "should generate an implicit schema in a new companion object" in {
193 | test(Row("Hello", 1L), WithoutCompanion("Hello", 1))
194 | }
195 |
196 | "should support @fieldOption name" in {
197 | test(Row("Hello", 1L), WithFieldOptions("Hello", 1))
198 | }
199 |
200 | "should support case class with multiple statements in the body" in {
201 | // Making sure that no method was removed from WithBody
202 | assert(WithBody.create.description == "World(10)")
203 | test(Row("Hello", 1L), WithBody("Hello", 1))
204 | }
205 |
206 | "should support case class with a single statement in the body" in {
207 | test(Row("Hello", 1L), WithSingleStatement("Hello", 1))
208 | }
209 |
210 | "should support lenient equal" in {
211 | test(Row("Hello", 1L), Lenient("Hello", 1))
212 | }
213 |
214 | "should support partial function rename" in {
215 | test(Row("Hello", 1L), WithPF("Hello", 1))
216 | }
217 |
218 | "should support both lenient equal and partial function rename on the same case class" in {
219 | test(Row("Hello", 1L), WithSchemaOptions("Hello", 1))
220 | }
221 |
222 | "should support @embedded" in {
223 | test(Row("Hello", "World", 1L), Parent("Hello", EmbeddedChild("World", 1)))
224 | }
225 |
226 | "should support @fieldOption with DatePattern as option" in {
227 | test(Row("Hello", "25/12/2015 14:40:00"), DateTimeHolder("Hello", DateTime.parse("2015-12-25T14:40:00.00")))
228 | }
229 | "should support @fieldOption with a string as option" in {
230 | test(Row("Hello", "25/12/2015"), LocalDateHolder("Hello", LocalDate.parse("2015-12-25")))
231 | }
232 | "should support @fieldOption with a unix timestamp" in {
233 | val seconds = System.currentTimeMillis / 1000
234 | test(Row("Hello", seconds), UnixDateHolder("Hello", new DateTime(seconds * 1000)))
235 | }
236 | "should support @fieldOption with a java timestamp" in {
237 | val now = System.currentTimeMillis
238 | test(Row("Hello", now), JavaTimestampHolder("Hello", new DateTime(now)))
239 | }
240 | }
241 | }
242 |
--------------------------------------------------------------------------------
/core/src/test/scala/com.mediative.sparrow/DataFrameReaderTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import scala.reflect.ClassTag
20 |
21 | import scalaz._
22 | import scalaz.syntax.validation._
23 |
24 | import org.scalatest._
25 |
26 | import org.apache.spark.{ SparkContext, SparkConf }
27 | import org.apache.spark.rdd.RDD
28 | import org.apache.spark.sql._
29 | import org.apache.spark.sql.types._
30 |
31 | import play.api.libs.functional.syntax._
32 |
33 | import RowConverter._
34 |
35 | object DataFrameReaderTest {
36 | case class Simple(name: String, count: Long)
37 |
38 | object Simple {
39 | implicit val schema = (
40 | field[String]("name") and
41 | field[Long]("count")
42 | )(apply _)
43 | }
44 |
45 | case class WithSimpleOption(name: String, count: Long, description: Option[String])
46 |
47 | object WithSimpleOption {
48 | implicit val schema = (
49 | field[String]("name") and
50 | field[Long]("count") and
51 | field[Option[String]]("description")
52 | )(apply _)
53 | }
54 |
55 | case class WithNested(name: String, inner: Simple, innerOpt: Option[WithSimpleOption])
56 |
57 | object WithNested {
58 | implicit val schema = (
59 | field[String]("name") and
60 | field[Simple]("inner") and
61 | field[Option[WithSimpleOption]]("innerOpt")
62 | )(apply _)
63 | }
64 |
65 | case class SimpleMap(name: String, count: Int)
66 |
67 | object SimpleMap {
68 | implicit val schema = (
69 | field[String]("name") and
70 | field[String]("count").map(_.toInt)
71 | )(apply _)
72 | }
73 |
74 | sealed abstract class PetType
75 | case object Dog extends PetType
76 | case object Cat extends PetType
77 | case object Hamster extends PetType
78 |
79 | object PetType {
80 | implicit val schema: FieldConverter[PetType] =
81 | FieldConverter.reader[String].map {
82 | case "dog" => Dog
83 | case "cat" => Cat
84 | case "hamster" => Hamster
85 | }
86 | }
87 |
88 | case class Pet(name: String, tpe: PetType)
89 |
90 | object Pet {
91 | implicit val schema = (
92 | field[String]("name") and
93 | field[PetType]("type")
94 | )(apply _)
95 | }
96 | }
97 |
98 | class DataFrameReaderTest extends FreeSpec with BeforeAndAfterAll {
99 |
100 | import DataFrameReaderTest._
101 |
102 | val sc = new SparkContext("local", "test2", new SparkConf)
103 |
104 | override def afterAll() = sc.stop()
105 |
106 | "RowConverter" - {
107 |
108 | def testSerialization(obj: Any) = {
109 | import java.io._
110 | val buf = new ByteArrayOutputStream()
111 | val out = new ObjectOutputStream(buf)
112 | out.writeObject(obj)
113 | out.flush()
114 |
115 | val in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray))
116 | assert(obj.getClass == in.readObject().getClass)
117 | }
118 |
119 | "can be serialized" in {
120 | val simple = StructType(Seq(StructField("name", StringType), StructField("count", LongType)))
121 | val withNested = StructType(Seq(
122 | StructField("name", StringType),
123 | StructField("inner", simple),
124 | StructField("innerOpt", simple, nullable = true)))
125 |
126 | Simple.schema.validateAndApply(simple) match {
127 | case Success(f) =>
128 | testSerialization(f)
129 | testSerialization(f(Row("Name", 12L)))
130 | case Failure(e) => fail(e.toString)
131 | }
132 |
133 | WithNested.schema.validateAndApply(withNested) match {
134 | case Success(f) =>
135 | testSerialization(f)
136 | testSerialization(f(Row("Name", Row("Name", 12L), null)))
137 | case Failure(e) => fail(e.toString)
138 | }
139 | }
140 | }
141 |
142 | "toRDD should" - {
143 |
144 | import DataFrameReader._
145 |
146 | def testSuccess[T: RowConverter: ClassTag](json: Array[String], expected: List[T]) = {
147 | val sqlContext = new SQLContext(sc)
148 | val df = sqlContext.read.json(sc.parallelize(json))
149 | val rdd = toRDD[T](df).valueOr { es => fail((es.head :: es.tail).mkString("\n")) }
150 |
151 | assert(rdd.collect().toList == expected)
152 | }
153 |
154 | def testFailure[T: RowConverter: ClassTag](json: Array[String], expected: NonEmptyList[String]) = {
155 | val sqlContext = new SQLContext(sc)
156 | val df = sqlContext.read.json(sc.parallelize(json))
157 |
158 | assert(toRDD[T](df) == expected.failure)
159 | }
160 |
161 | "work for simple case class with only primitives" in {
162 | val json = Array(
163 | """{"name": "First's Inner", "count": 121}""",
164 | """{"name": "Last's inner", "count": 12}"""
165 | )
166 | val expected = List(
167 | Simple("First's Inner", count = 121),
168 | Simple("Last's inner", count = 12)
169 | )
170 |
171 | testSuccess(json, expected)
172 | }
173 |
174 | "support optional fields" - {
175 | "when completely missing from the json" in {
176 | val json = Array(
177 | """{"name": "First's name", "count": 121}""",
178 | """{"name": "Last's name", "count": 12}"""
179 | )
180 | val expected = List(
181 | WithSimpleOption("First's name", count = 121, None),
182 | WithSimpleOption("Last's name", count = 12, None)
183 | )
184 |
185 | testSuccess(json, expected)
186 | }
187 | "when partially present in the json" in {
188 | val json = Array(
189 | """{"name": "First's name", "count": 121, "description": "abc"}""",
190 | """{"name": "Last's name", "count": 12}"""
191 | )
192 | val expected = List(
193 | WithSimpleOption("First's name", count = 121, Some("abc")),
194 | WithSimpleOption("Last's name", count = 12, None)
195 | )
196 |
197 | testSuccess(json, expected)
198 | }
199 | }
200 |
201 | "support nested objects" in {
202 | val json = Array(
203 | """{"name": "Guillaume", "inner": {"name": "First Inner", "count": 121}}""",
204 | """{"name": "Last", "inner": {"name": "Last Inner", "count": 12}}"""
205 | )
206 | val expected = List(
207 | WithNested("Guillaume", Simple("First Inner", 121), None),
208 | WithNested("Last", Simple("Last Inner", 12), None)
209 | )
210 |
211 | testSuccess(json, expected)
212 | }
213 |
214 | "validate extra fields" in {
215 | val json = Array(
216 | """{"name": "Guillaume", "inner": {"name": "First's Inner", "count": 121, "abc": 244}}""",
217 | """{"name": "Last", "inner": {"name": "Last's inner", "count": 12}}"""
218 | )
219 |
220 | testFailure[WithNested](json, NonEmptyList.nel("There are extra fields: Set(abc)", Nil))
221 | }
222 |
223 | "validate mixed type for a field with conversion possible (e.g. same colum has both String and Int)" in {
224 | val json = Array(
225 | """{"name": "First's Inner", "count": 121}""",
226 | """{"name": 2, "count": 12}"""
227 | )
228 | val expected = List(
229 | Simple("First's Inner", count = 121),
230 | Simple("2", count = 12)
231 | )
232 |
233 | testSuccess(json, expected)
234 | }
235 |
236 | "validate mixed type for a field without conversion possible (e.g. same colum has both String and Int)" in {
237 | val json = Array(
238 | """{"name": "First's Inner", "count": 121}""",
239 | """{"name": "Second", "count": "12"}"""
240 | )
241 | val expected = List(
242 | Simple("First's Inner", count = 121),
243 | Simple("Second", count = 12)
244 | )
245 |
246 | testFailure[Simple](json, NonEmptyList.nel(
247 | "The field 'count' isn't a LongType as expected, StringType received.", Nil))
248 | }
249 |
250 | "work with ADT enums" in {
251 | val json = Array(
252 | """{"name": "Chausette", "type": "dog"}""",
253 | """{"name": "Mixcer", "type": "cat"}"""
254 | )
255 | val expected = List(
256 | Pet("Chausette", Dog),
257 | Pet("Mixcer", Cat)
258 | )
259 |
260 | testSuccess(json, expected)
261 | }
262 | }
263 | }
264 |
--------------------------------------------------------------------------------
/core/src/main/scala/com.mediative.sparrow/DataFrameReader.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 Mediative
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.mediative.sparrow
18 |
19 | import language.experimental.macros
20 | import reflect.macros.Context
21 | import scala.annotation.StaticAnnotation
22 | import scala.reflect.internal.annotations.compileTimeOnly
23 | import scala.reflect.runtime.universe._
24 | import scala.reflect.ClassTag
25 |
26 | import scalaz.{ Equal, ValidationNel }
27 |
28 | import org.apache.spark.rdd.RDD
29 | import org.apache.spark.sql._
30 |
31 | object Alias {
32 | type V[T] = ValidationNel[String, T]
33 | }
34 | import Alias._
35 |
36 | package syntax {
37 | object df extends ToDataFrameOps
38 | }
39 |
40 | trait ToDataFrameOps {
41 | implicit def ToDataFrameOpsFromDataFrame(a: DataFrame): DataFrameOps = new DataFrameOps(a)
42 | }
43 |
44 | final class DataFrameOps(val self: DataFrame) extends AnyVal {
45 | def toRDD[T: ClassTag: RowConverter]: V[RDD[T]] = {
46 | DataFrameReader.toRDD[T](self)
47 | }
48 | }
49 |
50 | case class SchemaOptions(
51 | nameTransformer: PartialFunction[String, String] = PartialFunction.empty,
52 | equal: Equal[String] = Equal.equalA) {
53 | def transform(name: String): String = nameTransformer.applyOrElse(name, identity[String])
54 | }
55 |
56 | object SchemaOptions {
57 | implicit def defaultOptions: SchemaOptions = SchemaOptions()
58 | implicit def fromEqual(equal: Equal[String]): SchemaOptions = SchemaOptions(equal = equal)
59 | implicit def fromTransformer(transformer: PartialFunction[String, String]): SchemaOptions =
60 | SchemaOptions(nameTransformer = transformer)
61 |
62 | def apply(equal: Equal[String])(nameTransformer: PartialFunction[String, String]): SchemaOptions =
63 | SchemaOptions(nameTransformer, equal)
64 |
65 | }
66 |
67 | class embedded(prefix: String = "") extends StaticAnnotation
68 | class fieldName(name: String) extends StaticAnnotation
69 | class fieldOption(option: Any) extends StaticAnnotation
70 |
71 | @compileTimeOnly("This annotation requires macro paradise.")
72 | class schema(
73 | nameTransformer: PartialFunction[String, String] = PartialFunction.empty,
74 | equal: Equal[String] = Equal.equalA)
75 | extends StaticAnnotation {
76 |
77 | def macroTransform(annottees: Any*): Any = macro DataFrameReader.annotationImpl
78 | }
79 |
80 | object DataFrameReader {
81 | def toRDD[T: ClassTag](df: DataFrame)(implicit rc: RowConverter[T]): V[RDD[T]] = {
82 | rc.validateAndApply(df.schema).map { f => df.map(f) }
83 | }
84 |
85 | /**
86 | * This function will use a macro to inspect the case class' AST and generate code similar to:
87 | *
88 | *
89 | * implicit val schema = (
90 | * field[String]("name") and
91 | * field[Int]("count")
92 | * )(apply _)
93 | *
94 | *
95 | * This macro also support an @embedded annotation that will treat a field as another case class
96 | * to embed in the parent one instead of relying on its schema. Thus a case class like:
97 | *
98 | *
99 | * case class Child(firstName: String, lastName: String)
100 | * case class Parent(id: Int, @embedded("prefix_") child: Child)
101 | *
102 | *
103 | * The macro will generate code similar to:
104 | *
105 | *
106 | * implicit val schema = (
107 | * field[Int]("name") and
108 | * (
109 | * field[String]("prefix_firstName") and
110 | * field[String]("prefix_lastName")
111 | * )(apply _)
112 | * )(apply _)
113 | *
114 | *
115 | * The SchemaOption case class also allows further customization related to field names.
116 | * Taking into account the options, the generated code will be closer to:
117 | *
118 | *
119 | * implicit val schema = (
120 | * field[String](options.transform("name"), options.equal) and
121 | * field[Int](options.transform("count"), options.equal)
122 | * )(apply _)
123 | *
124 | *
125 | * The instance of Equal will be used to find a match between the case class field name and
126 | * the DataFrame's field name. The transform function uses the provided partial function
127 | * to support things like more specific renaming of fields. The unit tests provides examples.
128 | *
129 | * @tparam T the type of the case class for which to generate a RowConverter[T] composite
130 | */
131 | def createSchema[T](implicit options: SchemaOptions): RowConverter[T] = macro createSchemaImpl[T]
132 |
133 | def createSchemaImpl[T: c.WeakTypeTag](c: Context)(options: c.Expr[SchemaOptions]): c.Expr[RowConverter[T]] = {
134 | import c.universe._
135 |
136 | val emdeddedType = weakTypeOf[embedded]
137 | val fieldNameType = weakTypeOf[fieldName]
138 | val fieldOptionsType = weakTypeOf[fieldOption]
139 |
140 | val optionsDeclaration = q"val options = $options"
141 | val optionsName = optionsDeclaration match {
142 | case q"val $name = $value" => name
143 | }
144 |
145 | def converter(tpe: Type, prefix: Option[Tree]): Tree = {
146 |
147 | val declarations = tpe.declarations
148 | val ctor = declarations.collectFirst {
149 | case m: MethodSymbol if m.isPrimaryConstructor => m
150 | } getOrElse {
151 | val msg = "Cannot find the primary constructor for type " + tpe
152 | c.abort(c.enclosingPosition, msg)
153 | }
154 |
155 | val params = ctor.paramss.head
156 | val fields = params.map { p =>
157 | val fieldType = tpe.declaration(p.name).typeSignature
158 | val name = p.name.decoded
159 |
160 | val fieldNameOpt = p.annotations.find(_.tpe == fieldNameType)
161 | val fieldOptions = p.annotations.find(_.tpe == fieldOptionsType)
162 |
163 | p.annotations.find(_.tpe == emdeddedType).map { at =>
164 | if (fieldNameOpt.isDefined || fieldOptions.isDefined)
165 | c.abort(c.enclosingPosition, "@embedded and @fieldName or @fieldOption cannot be used on the same field.")
166 | converter(fieldType, at.scalaArgs.headOption)
167 | } getOrElse {
168 | val p = prefix.getOrElse(q""" "" """)
169 |
170 | val block = fieldNameOpt.map { at =>
171 | val fieldName = at.scalaArgs.head
172 | q"field[$fieldType]($p + $fieldName)"
173 | } getOrElse {
174 | q"field[$fieldType]($optionsName.transform($p + $name), $optionsName.equal)"
175 | }
176 |
177 | fieldOptions.fold(block) { fc =>
178 | q"$block(${fc.scalaArgs.head})"
179 | }
180 | }
181 | }.toList
182 |
183 | //
184 | // This macro serves no purpose for case class without field, so it will never be supported.
185 | //
186 | // However, case classes with only one field are also not supported right now. While there could
187 | // be some usefulness, there's generally not much value in case class with only one field
188 | // other than as a wrapper, in which case they should likely be serialized to a string,
189 | // not to a single field case class.
190 | //
191 | // If support is required in the future, it can be implemented. The reason it isn't supported
192 | // from the current code is the macro would generate something like:
193 | //
194 | // implicit val schema = (
195 | // field[String]("name")
196 | // )(apply _)
197 | //
198 | // The problem with this is that without the `and`, the RowConverter isn't converted to a
199 | // functional builder and the apply method isn't defined.
200 | //
201 | if (fields.size < 2) {
202 | c.error(c.enclosingPosition, "Only case classes with more than one field are supported.")
203 | }
204 |
205 | val composite = fields.reduceLeft { (left, right) => q"$left and $right" }
206 | val companion = tpe.typeSymbol.companionSymbol
207 | val applies = companion.asModule.typeSignature.members
208 | .filter(_.name.decoded == "apply")
209 | .filter(_.isMethod)
210 | val exists = applies
211 | .exists { apply =>
212 | val m = apply.asMethod
213 | m.returnType.typeSymbol == tpe.typeSymbol && {
214 | m.paramss match {
215 | case applyParams :: Nil =>
216 | fields.length == applyParams.length && {
217 | (params zip applyParams) forall {
218 | case (x, y) =>
219 | x.typeSignature == y.typeSignature
220 | }
221 | }
222 | case _ => false
223 | }
224 | }
225 | }
226 | if (!exists) {
227 | val msg =
228 | s"""
229 | | Cannot find an apply method with the proper signature.
230 | | tpe: $tpe
231 | | apply methods: $applies
232 | """.stripMargin
233 | c.info(c.enclosingPosition, msg, force = true)
234 | c.error(c.enclosingPosition, "Cannot find an apply method with the proper signature.")
235 | }
236 |
237 | q"$composite($companion.apply _)"
238 | }
239 |
240 | val tpe = implicitly[c.WeakTypeTag[T]].tpe
241 |
242 | val code = q"""
243 | import _root_.com.mediative.sparrow.RowConverter._
244 | import _root_.com.mediative.sparrow.RowConverter.syntax._
245 | $optionsDeclaration
246 | ${converter(tpe, None)}
247 | """
248 |
249 | c.Expr[RowConverter[T]](code)
250 | }
251 |
252 | def annotationImpl(c: Context)(annottees: c.Expr[Any]*): c.Expr[Any] = {
253 | import c.universe._
254 |
255 | val className = annottees.head.tree match {
256 | case q"case class $className(..$args) extends ..$parents { ..$body }" =>
257 | className
258 | case _ =>
259 | c.abort(c.enclosingPosition, "The @schema annotation only support public case classes.")
260 | }
261 |
262 | val tpe = className.toTermName
263 |
264 | val schemaOptionsType = c.weakTypeOf[SchemaOptions]
265 | val opts = c.prefix.tree match {
266 | case q"new $atName(..$args)" =>
267 | q"new $schemaOptionsType(..$args)"
268 | case _ =>
269 | q"new $schemaOptionsType"
270 | }
271 |
272 | val schema = q"""
273 | implicit val __schema = _root_.com.mediative.sparrow.DataFrameReader.createSchema[$className]($opts)
274 | """
275 |
276 | val companion = annottees.drop(1).headOption.map { obj =>
277 | val q"object $objectName extends ..$parents { $self => ..$body }" = obj.tree
278 | q"""
279 | object $objectName extends ..$parents { $self =>
280 | ..$body
281 | $schema
282 | }
283 | """
284 | } getOrElse {
285 | q"""
286 | object $tpe {
287 | $schema
288 | }
289 | """
290 | }
291 | c.Expr[Any](q"..${List(annottees.head.tree, companion)}")
292 | }
293 | }
294 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------