├── NOTICE ├── project ├── build.properties └── plugins.sbt ├── version.sbt ├── .jvmopts ├── catalog-info.yaml ├── .gitignore ├── benchmarking ├── README.md └── src │ └── main │ └── scala │ └── com │ └── spotify │ └── elitzur │ ├── BenchmarkCamelToSnake.scala │ ├── ProfilingAvro.scala │ ├── ValidationTypeTestingExamples.scala │ └── Benchmarking.scala ├── elitzur-core └── src │ ├── main │ └── scala │ │ └── com │ │ └── spotify │ │ └── elitzur │ │ ├── validators │ │ ├── ValidatorAccessor.scala │ │ ├── package.scala │ │ ├── ValidationConfig.scala │ │ ├── ValidationType.scala │ │ ├── Companion.scala │ │ ├── DynamicRecordValidator.scala │ │ ├── DerivedValidator.scala │ │ ├── Implicits.scala │ │ ├── ValidatorMacros.scala │ │ └── ValidationStatus.scala │ │ ├── types │ │ └── Owner.scala │ │ ├── CounterTypes.scala │ │ ├── Exceptions.scala │ │ ├── MetricsReporter.scala │ │ └── Utils.scala │ └── test │ └── scala │ └── com │ └── spotify │ └── elitzur │ ├── UtilTests.scala │ ├── validators │ ├── DynamicRecordValidatorTest.scala │ └── ValidatorTest.scala │ └── TestingTypes.scala ├── elitzur-scio └── src │ ├── main │ ├── avro │ │ ├── EnumType.avsc │ │ ├── DynamicType.avsc │ │ ├── TestNestedRecord.avsc │ │ ├── InnerNestedType.avsc │ │ ├── NullableNestedRecord.avsc │ │ ├── RepeatedRecord.avsc │ │ └── TestAvroTypes.avsc │ └── scala │ │ └── com │ │ └── spotify │ │ └── elitzur │ │ └── scio │ │ ├── package.scala │ │ ├── ScioMetricsReporter.scala │ │ ├── ValidatorDoFns.scala │ │ ├── AvroConverterDoFns.scala │ │ ├── Implicits.scala │ │ └── ElitzurMetrics.scala │ └── test │ └── scala │ └── com │ └── spotify │ └── elitzur │ ├── scio │ └── ElitzurMetricsTest.scala │ └── ValidatorDoFnTest.scala ├── .scala-steward.conf ├── elitzur-schemas └── src │ └── main │ └── avro │ ├── InnerNestedOut.avsc │ ├── InnerNestedType.avsc │ ├── TestAvroEnum.avsc │ ├── TestAvroOut.avsc │ ├── TestAvroTypes.avsc │ ├── TestAvroUnionTypes.avsc │ └── TestAvroArrayTypes.avsc ├── elitzur-avro └── src │ ├── main │ └── scala │ │ └── com │ │ └── spotify │ │ └── elitzur │ │ └── converters │ │ └── avro │ │ ├── package.scala │ │ ├── dynamic │ │ ├── dsl │ │ │ ├── AvroAccessorException.scala │ │ │ ├── FieldAccessor.scala │ │ │ ├── AvroAccessor.scala │ │ │ ├── AvroObjMapper.scala │ │ │ └── AvroAccessorLogics.scala │ │ ├── DynamicAccessorValidator.scala │ │ └── DynamicAccessorCompanion.scala │ │ ├── AvroElitzurConversionUtils.scala │ │ ├── ConverterMacros.scala │ │ └── Implicits.scala │ └── test │ └── scala │ └── com │ └── spotify │ └── elitzur │ ├── helpers │ ├── SampleAvroRecords.scala │ └── DynamicAccessorValidationUtil.scala │ ├── AvroFieldExtractorBaseTest.scala │ ├── DynamicAccessorValidationUnionTest.scala │ ├── AccessorOpToValidatorOpTest.scala │ ├── AvroFieldExtractorArrayTest.scala │ ├── DynamicAccessorValidationBaseTest.scala │ ├── DynamicAccessorValidationArrayTest.scala │ ├── AvroFieldExtractorUnionTest.scala │ └── AvroConverterTest.scala ├── .circleci └── config.yml ├── elitzur-examples └── src │ └── main │ └── scala │ └── com │ └── spotify │ └── elitzur │ └── examples │ ├── ExampleTypes.scala │ ├── ScioAvro.scala │ └── AvroBasic.scala ├── scalastyle-config.xml ├── CODE_OF_CONDUCT.md └── README.md /NOTICE: -------------------------------------------------------------------------------- 1 | Elitzur 2 | Copyright 2020 Spotify AB 3 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.7 2 | -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | ThisBuild / version := "0.7.5-SNAPSHOT" 2 | -------------------------------------------------------------------------------- /.jvmopts: -------------------------------------------------------------------------------- 1 | -Dfile.encoding=UTF8 2 | -Xms1G 3 | -Xmx6G 4 | -XX:ReservedCodeCacheSize=250M 5 | -XX:+TieredCompilation 6 | -XX:+UseParallelGC -------------------------------------------------------------------------------- /catalog-info.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: backstage.io/v1alpha1 2 | kind: Resource 3 | metadata: 4 | name: elitzur 5 | spec: 6 | type: resource 7 | owner: dmx 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | */target 2 | target 3 | .idea/ 4 | */.DS_Store 5 | .DS_Store 6 | node_modules/ 7 | *.json 8 | !package.json 9 | .bigquery/ 10 | */*.iml 11 | !tc4d.json 12 | .bloop/ 13 | .metals/ -------------------------------------------------------------------------------- /benchmarking/README.md: -------------------------------------------------------------------------------- 1 | ## Benchmarking 2 | 3 | Benchmarking with JMH can be done with: 4 | 5 | ```sbtshell 6 | sbt:validation> benchmarking/jmh:run -i 20 -wi 10 -f1 -t1 .*Benchmarking.* 7 | ``` -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/ValidatorAccessor.scala: -------------------------------------------------------------------------------- 1 | package com.spotify.elitzur.validators 2 | 3 | case class ValidatorAccessor[T](validator: Validator[T], value: T, label: String) 4 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/EnumType.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "EnumType", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "enum_test", 8 | "type": "EnumTest" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/DynamicType.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "DynamicType", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "stringField", 8 | "type": "string", 9 | "doc": "A string" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/TestNestedRecord.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestNestedRecord", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "inner", 8 | "type": "com.spotify.skeleton.schema.InnerNestedType" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /.scala-steward.conf: -------------------------------------------------------------------------------- 1 | updates.ignore = [ 2 | { groupId = "com.softwaremill.magnolia1_2" }, 3 | { groupId = "com.spotify", artifactId = "ratatool-scalacheck" }, 4 | { groupId = "org.apache.avro" }, 5 | { groupId = "org.apache.beam" }, 6 | { groupId = "joda-time", artifactId = "joda-time" }, 7 | ] 8 | -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/InnerNestedOut.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "InnerNestedOut", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "countryCode", 8 | "type": "string", 9 | "doc": "{validationType: countryCode}" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/InnerNestedType.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "InnerNestedType", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "userId", 8 | "type": "string", 9 | "doc": "The user id" 10 | }, 11 | { 12 | "name": "countryCode", 13 | "type": "string", 14 | "doc": "{validationType: countryCode}" 15 | }, 16 | { 17 | "name": "playCount", 18 | "type": "long", 19 | "doc": "The track play count for the given user" 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/InnerNestedType.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "InnerNestedType", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "userId", 8 | "type": "string", 9 | "doc": "The user id" 10 | }, 11 | { 12 | "name": "countryCode", 13 | "type": "string", 14 | "doc": "{validationType: countryCode}" 15 | }, 16 | { 17 | "name": "playCount", 18 | "type": "long", 19 | "doc": "The track play count for the given user" 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/TestAvroEnum.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestAvroEnum", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "testEnum", 8 | "type": { 9 | "type": "enum", 10 | "name": "TestAvroEnumValue", 11 | "symbols": ["snake_case_aaa", "snake_case_bbb", "snake_case_ccc"] 12 | } 13 | }, 14 | { 15 | "name": "optTestEnum", 16 | "type": ["null", "com.spotify.elitzur.schemas.TestAvroEnumValue"], 17 | "default": null 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/TestAvroOut.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestAvroOut", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "userAge", 8 | "type": "long", 9 | "doc": "{validationType: age}" 10 | }, 11 | { 12 | "name": "userFloat", 13 | "type": "float", 14 | "doc": "floating away on the clouds of google" 15 | }, 16 | { 17 | "name": "userLong", 18 | "type": "long", 19 | "doc": "{validationType: nonNegativeLong}" 20 | }, 21 | { 22 | "name": "inner", 23 | "type": "com.spotify.elitzur.schemas.InnerNestedOut" 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /elitzur-scio/src/main/scala/com/spotify/elitzur/scio/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | package object scio extends Implicits 20 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/types/Owner.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.types 18 | 19 | trait Owner { 20 | def name: String 21 | } 22 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | package object validators extends Implicits 20 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters 18 | 19 | package object avro extends Implicits 20 | -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/TestAvroTypes.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestAvroTypes", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "userAge", 8 | "type": "long", 9 | "doc": "{validationType: age}" 10 | }, 11 | { 12 | "name": "userFloat", 13 | "type": "float", 14 | "doc": "floating away on the clouds of google" 15 | }, 16 | { 17 | "name": "userLong", 18 | "type": "long", 19 | "doc": "{validationType: nonNegativeLong}" 20 | }, 21 | { 22 | "name": "innerOpt", 23 | "type": ["null", "com.spotify.elitzur.schemas.InnerNestedType"], 24 | "default": null 25 | }, 26 | { 27 | "name": "inner", 28 | "type": "com.spotify.elitzur.schemas.InnerNestedType" 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/CounterTypes.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | object CounterTypes extends Enumeration { 20 | val Valid, Invalid = Value 21 | } 22 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/Exceptions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | class DataInvalidException(m: String) extends Exception(m) 20 | 21 | class IllegalValidationException(m: String) extends Exception(m) 22 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/MetricsReporter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | trait MetricsReporter extends Serializable { 20 | def reportValid(className: String, fieldName: String, validationTypeName: String): Unit 21 | def reportInvalid(className: String, fieldName: String, validationTypeName: String): Unit 22 | } 23 | -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/TestAvroUnionTypes.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestAvroUnionTypes", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "optRecord", 8 | "type": [ 9 | "null", 10 | { 11 | "name": "InnerComplexType", 12 | "type": "record", 13 | "fields": [ 14 | { 15 | "name": "optString", 16 | "type": ["null", "string"], 17 | "default": null 18 | }, 19 | { 20 | "name": "nonOptString", 21 | "type": "string", 22 | "default": "default string" 23 | }, 24 | { 25 | "name": "optRepeatedArray", 26 | "type": ["null", { 27 | "type": "array", 28 | "items": "com.spotify.elitzur.schemas.InnerNestedType" 29 | }] 30 | } 31 | ] 32 | } 33 | ], 34 | "default": null 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /benchmarking/src/main/scala/com/spotify/elitzur/BenchmarkCamelToSnake.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import java.util.concurrent.TimeUnit 20 | 21 | import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Mode, OutputTimeUnit} 22 | 23 | class BenchmarkCamelToSnake { 24 | @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.MILLISECONDS) 25 | def camelToSnake(): String = { 26 | Utils.camelToSnake("testingInputCamelCaseStringForFunction") 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /elitzur-core/src/test/scala/com/spotify/elitzur/UtilTests.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.example 18 | 19 | import com.spotify.elitzur.Utils._ 20 | import org.scalatest.flatspec.AnyFlatSpec 21 | import org.scalatest.matchers.should.Matchers 22 | 23 | class UtilTests extends AnyFlatSpec with Matchers { 24 | "camelToSnake" should "convert camelCase to snake_case" in { 25 | val inputs = Seq("myField", "inputFieldName", "nocamels") 26 | 27 | inputs.map(camelToSnake) should be (Seq("my_field", "input_field_name", "nocamels")) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/NullableNestedRecord.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "NullableNestedRecord", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "inner", 8 | "type": [ 9 | "null", 10 | { 11 | "type": "record", 12 | "name": "NullableInner", 13 | "namespace": "com.spotify.skeleton.schema", 14 | "doc": "Inline defined nullable inner record", 15 | "fields": [ 16 | { 17 | "name": "userId", 18 | "type": "string", 19 | "doc": "The user id" 20 | }, 21 | { 22 | "name": "countryCode", 23 | "type": "string", 24 | "doc": "{validationType: countryCode}" 25 | }, 26 | { 27 | "name": "playCount", 28 | "type": "long", 29 | "doc": "The track play count for the given user" 30 | } 31 | ] 32 | } 33 | ], 34 | "default": "null" 35 | } 36 | ] 37 | } -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/dsl/AvroAccessorException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro.dynamic.dsl 18 | 19 | object AvroAccessorException { 20 | class InvalidDynamicFieldException(msg: String) extends Exception(msg) 21 | 22 | // TODO: Update docs on Dynamic and Magnolia based Elitzur and link it to exception below 23 | final val UNSUPPORTED_MAP_SCHEMA = 24 | "Map schema not supported. Please use Magnolia version of Elitzur." 25 | 26 | final val INVALID_UNION_SCHEMA = 27 | "Union schemas containing more than one non-null schemas is not supported." 28 | 29 | } 30 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/scala/com/spotify/elitzur/scio/ScioMetricsReporter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.scio 18 | 19 | import com.spotify.elitzur.{CounterTypes, MetricsReporter} 20 | 21 | class ScioMetricsReporter extends MetricsReporter { 22 | override def reportValid(className: String, fieldName: String, validationType: String): Unit = 23 | ElitzurMetrics.getCounter(className, fieldName, validationType, CounterTypes.Valid).inc() 24 | 25 | override def reportInvalid(className: String, fieldName: String, validationType: String): Unit = 26 | ElitzurMetrics.getCounter(className, fieldName, validationType, CounterTypes.Invalid).inc() 27 | } 28 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | addSbtPlugin("com.cavorite" % "sbt-avro-1-8" % "1.1.9") 18 | addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") 19 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") 20 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.7") 21 | addSbtPlugin("org.wartremover" % "sbt-wartremover" % "3.2.5") 22 | addSbtPlugin("com.github.sbt" % "sbt-git" % "2.1.0") 23 | addSbtPlugin("com.github.sbt" % "sbt-release" % "1.4.0") 24 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.12.2") 25 | addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.3.1") 26 | 27 | libraryDependencies ++= Seq( 28 | "com.spotify.checkstyle" % "spotify-checkstyle-config" % "1.0.10", 29 | "com.puppycrawl.tools" % "checkstyle" % "10.21.1" 30 | ) 31 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/Utils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import scala.util.matching.Regex 20 | 21 | private[elitzur] object Utils { 22 | val camelRegEx: Regex = "[A-Z\\d]".r 23 | 24 | /** 25 | * This function exists to handle the disjoint between avro names (snake) and scala names (camel) 26 | * Finds all upper case chars and replaces with _{char} 27 | */ 28 | private[elitzur] def camelToSnake(i: String): String = { 29 | camelRegEx.replaceAllIn(i, { m => 30 | if (m.end(0) == 1) { 31 | m.group(0).toLowerCase() 32 | } else { 33 | val s = m.group(0) 34 | val sb = new StringBuilder(s.length + 1) 35 | sb.append("_") 36 | sb.append(s.toLowerCase()) 37 | sb.mkString 38 | } 39 | }) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/ValidationConfig.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | trait ValidationConfig extends Serializable 20 | 21 | trait ValidationRecordConfig extends ValidationConfig { 22 | def fieldConfig(name: String): ValidationFieldConfig 23 | } 24 | 25 | 26 | class MapConfig( 27 | m: Map[String, ValidationFieldConfig], 28 | default: ValidationFieldConfig = DefaultFieldConfig 29 | ) extends ValidationRecordConfig { 30 | override def fieldConfig(name: String): ValidationFieldConfig = m.getOrElse(name, default) 31 | } 32 | 33 | case object DefaultRecordConfig extends MapConfig(Map()) 34 | 35 | object ValidationRecordConfig { 36 | def apply(s: (String, ValidationFieldConfig)*): ValidationRecordConfig = new MapConfig(Map(s:_*)) 37 | } 38 | 39 | sealed trait ValidationFieldConfig extends ValidationConfig 40 | case object ThrowException extends ValidationFieldConfig 41 | case object NoCounter extends ValidationFieldConfig 42 | case object DefaultFieldConfig extends ValidationFieldConfig 43 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/RepeatedRecord.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "RepeatedRecord", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "repeated_record", 8 | "type": { 9 | "type": "array", 10 | "items": { 11 | "type": "record", 12 | "name": "RepeatedInnerRecord", 13 | "doc": "repeated record field", 14 | "fields": [ 15 | { 16 | "name": "string_field", 17 | "type": "string", 18 | "doc": "inner string field" 19 | }, 20 | { 21 | "name": "long_field", 22 | "type": "long", 23 | "doc": "inner long field" 24 | } 25 | ] 26 | } 27 | } 28 | }, 29 | { 30 | "name": "repeated_field", 31 | "type": { 32 | "type": "array", 33 | "items": { 34 | "type": "string", 35 | "doc": "repeated string field" 36 | } 37 | } 38 | }, 39 | { 40 | "name": "optional_repeated_record", 41 | "type": ["null", { 42 | "type": "array", 43 | "items": { 44 | "type": "record", 45 | "name": "RepeatedInnerOptionalRecord", 46 | "doc": "repeated record field", 47 | "fields": [ 48 | { 49 | "name": "string_field", 50 | "type": "string", 51 | "doc": "inner string field" 52 | }, 53 | { 54 | "name": "long_field", 55 | "type": "long", 56 | "doc": "inner long field" 57 | } 58 | ] 59 | } 60 | }], 61 | "default": null 62 | } 63 | ] 64 | } 65 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/dsl/FieldAccessor.scala: -------------------------------------------------------------------------------- 1 | package com.spotify.elitzur.converters.avro.dynamic.dsl 2 | 3 | import com.spotify.elitzur.converters.avro.dynamic.{ 4 | ArrayValidatorOp, 5 | OptionValidatorOp, 6 | ValidatorOp 7 | } 8 | 9 | import scala.annotation.tailrec 10 | 11 | case class FieldAccessor(accessors: List[BaseAccessor]) extends Serializable { 12 | def combineFns: Any => Any = 13 | accessors.map(_.fn).reduceLeftOption((f, g) => f andThen g).getOrElse(NoopAccessor().fn) 14 | 15 | def toValidatorOp: List[ValidatorOp] = toValidatorOp(this.accessors) 16 | 17 | @tailrec 18 | private def toValidatorOp( 19 | ops: List[BaseAccessor], 20 | modifiers: List[ValidatorOp] = List.empty[ValidatorOp] 21 | ): List[ValidatorOp] = { 22 | if (ops.isEmpty) { 23 | List.empty[ValidatorOp] 24 | } else { 25 | ops.lastOption.get match { 26 | case n: NullableBaseAccessor => 27 | // A sequence of options can be reduce to a single option operation 28 | if (modifiers.lastOption.contains(OptionValidatorOp)) { 29 | toValidatorOp(n.innerOps, modifiers) 30 | } else { 31 | toValidatorOp(n.innerOps, modifiers :+ OptionValidatorOp) 32 | } 33 | case a: ArrayBaseAccessor => 34 | // The DSL will flatten nested arrays into a single array. The first instance of an 35 | // array is captured below. 36 | if (modifiers.contains(ArrayValidatorOp)) { 37 | toValidatorOp(a.innerOps, modifiers) 38 | } else { 39 | toValidatorOp(a.innerOps, modifiers :+ ArrayValidatorOp) 40 | } 41 | case _: IndexAccessor => modifiers 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/ValidationType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | trait BaseValidationType[T] { 20 | def checkValid: Boolean 21 | 22 | def data: T 23 | 24 | override def toString: String = data.toString 25 | } 26 | 27 | //scalastyle:off line.size.limit structural.type 28 | /** 29 | * Validation type that allows dynamic (runtime) setting of an arg that is used in the validation 30 | * function. This accepts any arbitrary args, however has implicit state (whether the arg is set) 31 | * so we recommend using this sparingly. 32 | * 33 | * @tparam T The field data type 34 | * @tparam U The argument type needed for validation 35 | * @tparam A F-bounded polymorphism type 36 | */ 37 | abstract class DynamicValidationType[T, U, A <: DynamicValidationType[T, U, A]: ({type L[x] = DynamicCompanionImplicit[T, U, x]})#L] 38 | extends BaseValidationType[T] { 39 | 40 | private[elitzur] def arg: Option[U] 41 | 42 | def setArg(a: U): A = { 43 | implicitly[DynamicCompanionImplicit[T, U, A]].companion.parseWithArg(data, a) 44 | } 45 | } 46 | //scalastyle:on line.size.limit structural.type 47 | -------------------------------------------------------------------------------- /elitzur-schemas/src/main/avro/TestAvroArrayTypes.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestAvroArrayTypes", 3 | "namespace": "com.spotify.elitzur.schemas", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name":"arrayLongs", 8 | "type":{ 9 | "type": "array", 10 | "items": "long" 11 | } 12 | }, 13 | { 14 | "name":"arrayNullableStrings", 15 | "type":{ 16 | "type": "array", 17 | "items": ["null", "string"] 18 | } 19 | }, 20 | { 21 | "name": "innerArrayRoot", 22 | "type": { 23 | "type": "array", 24 | "items": { 25 | "name": "innerArrayRecord", 26 | "type": "record", 27 | "fields": [ 28 | { 29 | "name": "innerArrayInsideRecord", 30 | "type": { 31 | "type": "array", 32 | "items": "long" 33 | } 34 | }, 35 | { 36 | "name": "userId", 37 | "type": "string" 38 | }, 39 | { 40 | "name": "deepNestedRecord", 41 | "type": { 42 | "type": "record", 43 | "name": "deepNestedRecord", 44 | "fields": [ { 45 | "name": "recordId", 46 | "type": "long" 47 | } ] 48 | } 49 | }, 50 | { 51 | "name": "deeperArrayNestedRecord", 52 | "type": ["null", { 53 | "type": "record", 54 | "name": "DeeperArray", 55 | "fields": [ { 56 | "name": "DeeperArray", 57 | "type": { 58 | "type": "array", 59 | "items": "long" 60 | } 61 | } ] 62 | }], 63 | "default": null 64 | } 65 | ] 66 | } 67 | } 68 | } 69 | ] 70 | } 71 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/scala/com/spotify/elitzur/scio/ValidatorDoFns.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.scio 18 | 19 | import com.spotify.elitzur.validators.{ 20 | PostValidation, Unvalidated, ValidationRecordConfig, Validator 21 | } 22 | import com.spotify.scio.coders.Coder 23 | import org.apache.beam.sdk.transforms.DoFn 24 | import org.apache.beam.sdk.transforms.DoFn.ProcessElement 25 | 26 | object ValidatorDoFns { 27 | 28 | class ValidatorDoFn[T: Coder](vr: Validator[T], 29 | config: ValidationRecordConfig = ValidationRecordConfig()) 30 | extends DoFn[T, T] with Serializable { 31 | @ProcessElement 32 | def processElement(c: DoFn[T, T]#ProcessContext): Unit = { 33 | val e = c.element() 34 | c.output(vr.validateRecord(Unvalidated(e), config = config).forceGet) 35 | } 36 | } 37 | 38 | class ValidatorDoFnWithResult[T: Coder](vr: Validator[T], 39 | config: ValidationRecordConfig = ValidationRecordConfig()) 40 | extends DoFn[T, PostValidation[T]] with Serializable { 41 | @ProcessElement 42 | def processElement(c: DoFn[T, PostValidation[T]]#ProcessContext): Unit = { 43 | val e = c.element() 44 | c.output(vr.validateRecord(Unvalidated(e), config = config)) 45 | } 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/Companion.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | import com.spotify.elitzur.types.Owner 20 | 21 | 22 | // Companion trait used to implicitly find companions for validation types and reduce the amount 23 | // of reflection being done 24 | private[elitzur] trait CompanionImplicit[T, A, C <: BaseCompanion[T, A]] extends Serializable { 25 | def companion: C 26 | } 27 | 28 | //scalastyle:off line.size.limit 29 | case class SimpleCompanionImplicit[T, A <: com.spotify.elitzur.validators.BaseValidationType[T]](companion: BaseCompanion[T, A]) 30 | extends CompanionImplicit[T, A, BaseCompanion[T, A]] 31 | 32 | case class DynamicCompanionImplicit[T, U, A <: DynamicValidationType[T, U, A]](companion: DynamicCompanion[T, U, A]) 33 | extends CompanionImplicit[T, A, DynamicCompanion[T, U, A]] 34 | //scalastyle:on line.size.limit 35 | 36 | trait BaseCompanion[T, A] extends Serializable { 37 | def validationType: String 38 | def bigQueryType: String 39 | def apply(data: T): A 40 | def parse(data: T): A 41 | def owner: Owner 42 | def description: String 43 | } 44 | 45 | //scalastyle:off line.size.limit 46 | trait DynamicCompanion[T, U, A <: DynamicValidationType[T, U, A]] 47 | extends BaseCompanion[T, A] { 48 | //scalastyle:on line.size.limit 49 | def setArg(i: A, a: U): A = i.setArg(a) 50 | def parseWithArg(data: T, arg: U): A 51 | def apply(data: T, arg: U): A = 52 | parseWithArg(data, arg) 53 | } 54 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/DynamicRecordValidator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | import com.spotify.elitzur.MetricsReporter 20 | 21 | 22 | @SuppressWarnings(Array("org.wartremover.warts.Var")) 23 | final private[elitzur] case class DynamicRecordValidator(validators: Array[Validator[Any]], 24 | labels: Array[String]) 25 | (implicit reporter: MetricsReporter) 26 | extends Validator[Seq[Any]] { 27 | 28 | override def validateRecord(a: PreValidation[Seq[Any]], 29 | path: String = "", 30 | outermostClassName: Option[String] = None, 31 | config: ValidationRecordConfig = DefaultRecordConfig) 32 | : PostValidation[Seq[Any]] = { 33 | val ps = a.forceGet.toArray 34 | val as = new Array[ValidatorAccessor[Any]](ps.length) 35 | var i = 0 36 | 37 | while (i < ps.length) { 38 | val value = ps(i) 39 | as.update(i, ValidatorAccessor(validators(i), value, labels(i)) 40 | .asInstanceOf[ValidatorAccessor[Any]]) 41 | i = i + 1 42 | } 43 | 44 | Validator.validationLoop( 45 | as, 46 | identity[Seq[Any]], 47 | outermostClassName.getOrElse( 48 | throw new Exception("A class name is required for Metrics Reporting")), 49 | path, 50 | config 51 | ) 52 | } 53 | 54 | override def shouldValidate: Boolean = true 55 | } 56 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/helpers/SampleAvroRecords.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.helpers 18 | 19 | import java.util 20 | import com.spotify.elitzur.schemas._ 21 | import com.spotify.ratatool.scalacheck.{RichAvroGen, avroOf} 22 | import org.scalacheck.{Arbitrary, Gen} 23 | import com.spotify.ratatool.scalacheck._ 24 | import collection.JavaConverters._ 25 | 26 | case class OptFieldLong(optFieldLong: Option[Long]) 27 | case class OptInnerField(optInnerField: Option[OptFieldLong]) 28 | 29 | object SampleAvroRecords { 30 | 31 | def innerNestedSample(isValid: Boolean = true): InnerNestedType = avroOf[InnerNestedType] 32 | .amend(if (isValid) Gen.const("US") else Gen.const("NYC"))(_.setCountryCode) 33 | .amend(if (isValid) Gen.posNum[Long] else Gen.negNum[Long])(_.setPlayCount) 34 | .sample.get 35 | 36 | def testAvroTypes(isValid: Boolean = true): TestAvroTypes = avroOf[TestAvroTypes] 37 | .amend(innerNestedSample(isValid))(_.setInner) 38 | .sample.get 39 | 40 | def testAvroArrayTypes: TestAvroArrayTypes = { 41 | def innerArrayInsideRecord = Gen.listOfN[Long](2, Arbitrary.arbitrary[Long]) 42 | .sample.get.map(_.asInstanceOf[java.lang.Long]).asJava 43 | def deepArrayRecord = avroOf[DeeperArray] 44 | .amend(innerArrayInsideRecord)(_.setDeeperArray) 45 | def innerArrayRecord = avroOf[innerArrayRecord] 46 | .amend(innerArrayInsideRecord)(_.setInnerArrayInsideRecord) 47 | .amend(deepArrayRecord)(_.setDeeperArrayNestedRecord) 48 | def innerArrayRoot: Gen[util.List[innerArrayRecord]] = 49 | Gen.listOfN(2, innerArrayRecord).map(_.asJava) 50 | 51 | avroOf[TestAvroArrayTypes] 52 | .amend(innerArrayRoot)(_.setInnerArrayRoot) 53 | .sample 54 | .get 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/DerivedValidator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | import com.spotify.elitzur.MetricsReporter 20 | import magnolia1._ 21 | 22 | 23 | @SuppressWarnings(Array("org.wartremover.warts.Var")) 24 | final private[elitzur] case class DerivedValidator[T] private(caseClass: CaseClass[Validator, T]) 25 | (implicit reporter: MetricsReporter) 26 | extends Validator[T] { 27 | override def validateRecord(a: PreValidation[T], 28 | path: String = "", 29 | outermostClassName: Option[String] = None, 30 | config: ValidationRecordConfig = DefaultRecordConfig) 31 | : PostValidation[T] = { 32 | val ps = caseClass.parameters 33 | val as = new Array[ValidatorAccessor[Any]](ps.length) 34 | var i = 0 35 | 36 | // Loop through parameters once to dereference and avoid leaking magnolia types in APIs 37 | while (i < ps.length) { 38 | val p = ps(i) 39 | val deref = p.dereference(a.forceGet) 40 | as.update(i, ValidatorAccessor(p.typeclass, deref, p.label) 41 | .asInstanceOf[ValidatorAccessor[Any]]) 42 | i = i + 1 43 | } 44 | Validator.validationLoop( 45 | as, 46 | caseClass.rawConstruct, 47 | if (outermostClassName.isEmpty) caseClass.typeName.full else outermostClassName.get, 48 | path, 49 | config 50 | ) 51 | } 52 | 53 | override def shouldValidate: Boolean = { 54 | val ps = caseClass.parameters 55 | var i = 0 56 | var shouldValidate = false 57 | 58 | while (i < ps.length) { 59 | val p = ps(i) 60 | if (p.typeclass.shouldValidate) { 61 | shouldValidate = true 62 | } 63 | i = i + 1 64 | } 65 | shouldValidate 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/avro/TestAvroTypes.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "TestAvroTypes", 3 | "namespace": "com.spotify.skeleton.schema", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "userAge", 8 | "type": "long", 9 | "doc": "{validationType: age}" 10 | }, 11 | { 12 | "name": "userBoolean", 13 | "type": "boolean", 14 | "doc": "{validationType: nonNullBoolean}" 15 | }, 16 | { 17 | "name": "userFloat", 18 | "type": "float", 19 | "doc": "floating away on the clouds of google" 20 | }, 21 | { 22 | "name": "userDouble", 23 | "type": "double", 24 | "doc": "{validationType: nonNegativeDouble}" 25 | }, 26 | { 27 | "name": "userLong", 28 | "type": "long", 29 | "doc": "{validationType: nonNegativeLong}" 30 | }, 31 | { 32 | "name": "latitude", 33 | "type": "bytes", 34 | "doc": "" 35 | }, 36 | { 37 | "name": "longitude", 38 | "type": "bytes", 39 | "doc": "" 40 | }, 41 | { 42 | "name": "userOptionalLong", 43 | "type": ["null", "long"], 44 | "default": null, 45 | "doc": "{validationType: nonNegativeLong}" 46 | }, 47 | { 48 | "name": "innerOpt", 49 | "type": ["null", "com.spotify.skeleton.schema.InnerNestedType"], 50 | "default": null 51 | }, 52 | { 53 | "name": "inner", 54 | "type": "com.spotify.skeleton.schema.InnerNestedType" 55 | }, 56 | { 57 | "name": "userTypeEnum", 58 | "type" : { 59 | "name": "UserType", 60 | "type": "enum", 61 | "symbols": [ 62 | "USER", 63 | "BOT" 64 | ] 65 | } 66 | }, 67 | { 68 | "name": "userArrayString", 69 | "type" : { 70 | "type": "array", 71 | "items": "string" 72 | } 73 | }, 74 | { 75 | "name": "userMapStringLong", 76 | "type" : { 77 | "type": "map", 78 | "values": "long" 79 | } 80 | }, 81 | { 82 | "name": "userFixed", 83 | "type": { 84 | "name": "FIXED_ELEMENT", 85 | "type": "fixed", 86 | "size": 16 87 | } 88 | }, 89 | { 90 | "name": "enum_test", 91 | "type": { 92 | "type": "enum", 93 | "name": "EnumTest", 94 | "symbols": [ 95 | "EndSong", 96 | "EndVideo", 97 | "StreamProgress" 98 | ] 99 | }, 100 | "doc": "Message name, one of {EndSong, EndVideo, StreamProgress}" 101 | } 102 | ] 103 | } -------------------------------------------------------------------------------- /elitzur-scio/src/main/scala/com/spotify/elitzur/scio/AvroConverterDoFns.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.scio 18 | 19 | import com.spotify.elitzur.converters.avro.{AvroConverter, AvroElitzurConversionUtils} 20 | import com.spotify.scio.coders.Coder 21 | import org.apache.avro.Schema 22 | import org.apache.avro.generic.{GenericData, GenericRecord} 23 | import org.apache.beam.sdk.transforms.DoFn 24 | import org.apache.beam.sdk.transforms.DoFn.ProcessElement 25 | 26 | import scala.reflect.ClassTag 27 | 28 | class FromAvroConverterDoFn[GR <: GenericRecord, T : Coder](ac: AvroConverter[T]) 29 | extends DoFn[GR, T] with Serializable { 30 | @ProcessElement 31 | def processElement(c: DoFn[GR, T]#ProcessContext): Unit = { 32 | val e = c.element() 33 | c.output(ac.fromAvro(e, e.getSchema)) 34 | } 35 | } 36 | 37 | class ToAvroConverterDoFn[T : Coder, GR <: GenericRecord : Coder : ClassTag](ac: AvroConverter[T]) 38 | extends DoFn[T, GR] with Serializable { 39 | 40 | //We use reflection to get the schema here since it's only invoked per-worker instead of element 41 | val schemaSer: String = implicitly[ClassTag[GR]].runtimeClass.getMethod("getClassSchema") 42 | .invoke(null).asInstanceOf[Schema].toString(false) 43 | @transient lazy val schemaSerDe: Schema = new Schema.Parser().parse(schemaSer) 44 | 45 | @ProcessElement 46 | def processElement(c: DoFn[T, GR]#ProcessContext): Unit = { 47 | val e = c.element() 48 | c.output(ac.toAvro(e, schemaSerDe).asInstanceOf[GR]) 49 | } 50 | } 51 | 52 | class ToAvroDefaultConverterDoFn[T : Coder, GR <: GenericRecord : Coder] 53 | (defaultValueRecord: GR, ac: AvroConverter[T]) 54 | extends DoFn[T, GR] with Serializable { 55 | 56 | @transient lazy val defaultGenericData: GenericData.Record = 57 | AvroElitzurConversionUtils.recordToGenericData(defaultValueRecord) 58 | @ProcessElement 59 | def processElement(c: DoFn[T, GR]#ProcessContext): Unit = { 60 | val e = c.element() 61 | c.output(ac.toAvroDefault(e, defaultGenericData).asInstanceOf[GR]) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /benchmarking/src/main/scala/com/spotify/elitzur/ProfilingAvro.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur._ 20 | import com.spotify.elitzur.converters.avro.AvroConverter 21 | import com.spotify.elitzur.schemas.{TestAvroOut, TestAvroTypes} 22 | import com.spotify.elitzur.validators._ 23 | import com.spotify.elitzur.converters.avro._ 24 | import com.spotify.ratatool.scalacheck._ 25 | import com.spotify.elitzur.Companions._ 26 | import com.spotify.elitzur.validators.{Unvalidated, Validator} 27 | import org.scalacheck._ 28 | 29 | object ProfilingAvro { 30 | case class TestAvro( 31 | userAge: AgeExample, 32 | userLong: NonNegativeLongExample, 33 | userFloat: Float, 34 | inner: InnerNested 35 | ) 36 | 37 | case class InnerNested(countryCode: CountryCodeExample) 38 | 39 | implicit val metricsReporter: MetricsReporter = new MetricsReporter { 40 | val map : scala.collection.mutable.Map[String, Int] = 41 | scala.collection.mutable.Map[String, Int]().withDefaultValue(0) 42 | override def reportValid(className: String, fieldName: String, validationType: String): Unit = 43 | map(s"$className.$fieldName.$validationType.valid") += 1 44 | override def reportInvalid(className: String, fieldName: String, validationType: String): Unit = 45 | map(s"$className.$fieldName.$validationType.invalid") += 1 46 | override def toString: String = map.toString() 47 | } 48 | 49 | //scalastyle:off magic.number 50 | val avroRecords: Seq[TestAvroTypes] = Gen.listOfN(1000, avroOf[TestAvroTypes]).sample.get 51 | //scalastyle:on magic.number 52 | 53 | val c: AvroConverter[TestAvro] = implicitly[AvroConverter[TestAvro]] 54 | val v: Validator[TestAvro] = implicitly[Validator[TestAvro]] 55 | 56 | 57 | def main(args: Array[String]): Unit = { 58 | avroRecords 59 | .map(a => c.fromAvro(a, TestAvroTypes.SCHEMA$)) 60 | .map(a => v.validateRecord(Unvalidated(a))) 61 | .map(a => c.toAvro(a.forceGet, TestAvroOut.SCHEMA$)) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: &version 2.1 2 | 3 | 4 | #################### 5 | # Templates 6 | #################### 7 | 8 | executors: 9 | scala_212: 10 | docker: 11 | - image: circleci/openjdk:11-jdk 12 | environment: 13 | SCALA_VERSION: 2.12.14 14 | scala_213: 15 | docker: 16 | - image: circleci/openjdk:11-jdk 17 | environment: 18 | SCALA_VERSION: 2.13.6 19 | 20 | commands: 21 | build: 22 | parameters: 23 | build-steps: 24 | description: "build steps" 25 | type: steps 26 | default: [] 27 | steps: 28 | - checkout 29 | - run: 30 | command: | 31 | sbt clean "project $TEST_PROJECT" scalastyle test:scalastyle test 32 | 33 | 34 | #################### 35 | # Jobs 36 | #################### 37 | 38 | jobs: 39 | avro_212: 40 | executor: scala_212 41 | environment: 42 | TEST_PROJECT=elitzur-avro 43 | steps: 44 | - build 45 | core_212: 46 | executor: scala_212 47 | environment: 48 | TEST_PROJECT=elitzur-core 49 | steps: 50 | - build 51 | examples_212: 52 | executor: scala_212 53 | environment: 54 | TEST_PROJECT=elitzur-examples 55 | steps: 56 | - build 57 | schemas_212: 58 | executor: scala_212 59 | environment: 60 | TEST_PROJECT=elitzur-schemas 61 | steps: 62 | - build 63 | scio_212: 64 | executor: scala_212 65 | environment: 66 | TEST_PROJECT=elitzur-scio 67 | steps: 68 | - build 69 | 70 | avro_213: 71 | executor: scala_213 72 | environment: 73 | TEST_PROJECT=elitzur-avro 74 | steps: 75 | - build 76 | core_213: 77 | executor: scala_213 78 | environment: 79 | TEST_PROJECT=elitzur-core 80 | steps: 81 | - build 82 | examples_213: 83 | executor: scala_213 84 | environment: 85 | TEST_PROJECT=elitzur-examples 86 | steps: 87 | - build 88 | schemas_213: 89 | executor: scala_213 90 | environment: 91 | TEST_PROJECT=elitzur-schemas 92 | steps: 93 | - build 94 | scio_213: 95 | executor: scala_213 96 | environment: 97 | TEST_PROJECT=elitzur-scio 98 | steps: 99 | - build 100 | 101 | 102 | #################### 103 | # Workflows 104 | #################### 105 | 106 | workflows: 107 | version: *version 108 | 109 | # Per commit build 110 | build: 111 | jobs: 112 | - avro_212 113 | - core_212 114 | - examples_212 115 | - schemas_212 116 | - scio_212 117 | 118 | - avro_213 119 | - core_213 120 | - examples_213 121 | - schemas_213 122 | - scio_213 123 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/AvroElitzurConversionUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro 18 | 19 | import java.nio.{Buffer, ByteBuffer} 20 | 21 | import org.apache.avro.Schema 22 | import org.apache.avro.generic.{GenericData, GenericRecord, GenericRecordBuilder} 23 | 24 | import scala.jdk.CollectionConverters._ 25 | 26 | object AvroElitzurConversionUtils { 27 | private[elitzur] def getAvroField(r: GenericRecord, fieldName: Seq[String]): Object = { 28 | fieldName match { 29 | case name :: Nil => r.get(name) 30 | case name :: "innerData" :: Nil => r.get(name) 31 | case head :: tail => getAvroField(r.get(head).asInstanceOf[GenericRecord], tail) 32 | } 33 | } 34 | 35 | private[elitzur] def convertOptional[T](v: java.util.Optional[T]): Option[T] = { 36 | if (v.isPresent) Option(v.get()) else None 37 | } 38 | 39 | def byteBufferToByteArray(bBuffer: ByteBuffer): Array[Byte] = { 40 | // http://errorprone.info/bugpattern/ByteBufferBackingArray 41 | val bArray = new Array[Byte](bBuffer.remaining) 42 | bBuffer.get(bArray) 43 | // cast to Buffer to fix cross-compat issue described 44 | // here: https://stackoverflow.com/questions/61267495 45 | (bBuffer: Buffer).position((bBuffer: Buffer).position() - bArray.length) // Restores position 46 | bArray 47 | } 48 | 49 | private[elitzur] def recordToGenericData[T <: GenericRecord](record: T): GenericData.Record = { 50 | val defaultBuilder = new GenericRecordBuilder(record.getSchema) 51 | record.getSchema.getFields.asScala.foreach { f => 52 | defaultBuilder.set(f.name(), record.get(f.name())) 53 | } 54 | defaultBuilder.build() 55 | } 56 | 57 | private[elitzur] def isAvroRecordType(schema: Schema): Boolean = 58 | Schema.Type.RECORD.equals(schema.getType) || 59 | (Schema.Type.UNION.equals(schema.getType) && 60 | schema.getTypes.asScala.map(_.getType).contains(Schema.Type.RECORD)) 61 | 62 | private[elitzur] def isAvroArrayType(schema: Schema): Boolean = 63 | Schema.Type.ARRAY.equals(schema.getType) || 64 | (Schema.Type.UNION.equals(schema.getType) && schema.getTypes.contains(Schema.Type.ARRAY)) 65 | } 66 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/dsl/AvroAccessor.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro.dynamic.dsl 18 | 19 | import org.apache.avro.generic.GenericRecord 20 | 21 | import java.{util => ju} 22 | 23 | trait BaseAccessor { 24 | def fn: Any => Any 25 | } 26 | 27 | trait InnerBaseAccessor extends BaseAccessor { 28 | val innerOps: List[BaseAccessor] 29 | val innerFn: Any => Any = (o: Any) => FieldAccessor(innerOps).combineFns(o) 30 | } 31 | 32 | trait ArrayBaseAccessor extends InnerBaseAccessor 33 | 34 | trait NullableBaseAccessor extends InnerBaseAccessor 35 | 36 | case class NoopAccessor() extends BaseAccessor { 37 | def fn: Any => Any = (o: Any) => o 38 | } 39 | 40 | case class IndexAccessor(field: String) extends BaseAccessor { 41 | override def fn: Any => Any = (o: Any) => o.asInstanceOf[GenericRecord].get(field) 42 | } 43 | 44 | case class NullableAccessor(field: String, innerOps: List[BaseAccessor]) 45 | extends NullableBaseAccessor { 46 | override def fn: Any => Any = (o: Any) => { 47 | val innerAvroObj = o.asInstanceOf[GenericRecord].get(field) 48 | if (innerAvroObj == null) null else innerFn(o) 49 | } 50 | } 51 | 52 | case class ArrayFlatmapAccessor(field: String, innerOps: List[BaseAccessor]) 53 | extends ArrayBaseAccessor { 54 | override def fn: Any => Any = (o: Any) => { 55 | val innerAvroObj = o.asInstanceOf[GenericRecord].get(field) 56 | val res = new ju.ArrayList[Any] 57 | innerAvroObj.asInstanceOf[ju.List[Any]].forEach( 58 | elem => innerFn(elem).asInstanceOf[ju.List[Any]].forEach(x => res.add(x)) 59 | ) 60 | res 61 | } 62 | } 63 | 64 | case class ArrayMapAccessor(field: String, innerOps: List[BaseAccessor]) 65 | extends ArrayBaseAccessor { 66 | override def fn: Any => Any = (o: Any) => { 67 | val innerAvroObj = o.asInstanceOf[GenericRecord].get(field) 68 | val res = new ju.ArrayList[Any] 69 | innerAvroObj.asInstanceOf[ju.List[Any]].forEach(elem => res.add(innerFn(elem))) 70 | res 71 | } 72 | } 73 | 74 | case class ArrayNoopAccessor(field: String, innerOps: List[BaseAccessor]) 75 | extends ArrayBaseAccessor { 76 | override def fn: Any => Any = (o: Any) => IndexAccessor(field).fn(o) 77 | } 78 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/DynamicAccessorValidator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro.dynamic 18 | 19 | import com.spotify.elitzur.MetricsReporter 20 | import com.spotify.elitzur.converters.avro.dynamic.dsl.FieldAccessor 21 | import com.spotify.elitzur.validators.{DynamicRecordValidator, Unvalidated, Validator} 22 | import org.apache.avro.generic.GenericRecord 23 | import org.slf4j.LoggerFactory 24 | 25 | //scalastyle:off line.size.limit 26 | class DynamicAccessorValidator(fieldParsers: Array[DynamicFieldParser])(implicit metricsReporter: MetricsReporter) extends Serializable { 27 | //scalastyle:on line.size.limit 28 | final val className: String = this.getClass.getName 29 | 30 | val validator: DynamicRecordValidator = DynamicRecordValidator( 31 | fieldParsers.map(_.fieldValidator), 32 | fieldParsers.map(_.fieldLabel) 33 | ) 34 | 35 | def validateRecord(avroRecord: GenericRecord): Unit = { 36 | val parseAllResult: Seq[Any] = fieldParsers.map(_.fieldParser(avroRecord)) 37 | validator.validateRecord(Unvalidated(parseAllResult), outermostClassName = Some(className)) 38 | } 39 | } 40 | 41 | class DynamicFieldParser( 42 | accessorInput: String, 43 | accessorCompanion: DynamicAccessorCompanion[_, _], 44 | accessorOps: FieldAccessor 45 | )(implicit metricsReporter: MetricsReporter) extends Serializable { 46 | private val logger = LoggerFactory.getLogger(this.getClass) 47 | 48 | private val validatorOp = accessorOps.toValidatorOp 49 | private val fieldFn: Any => Any = accessorCompanion.getPreprocessorForValidator(validatorOp) 50 | 51 | private[dynamic] val fieldValidator: Validator[Any] = accessorCompanion.getValidator(validatorOp) 52 | private[dynamic] val fieldLabel: String = accessorInput 53 | private[dynamic] def fieldParser(avroRecord: GenericRecord): Any = { 54 | val fieldValue = accessorOps.combineFns(avroRecord) 55 | fieldFn(fieldValue) 56 | } 57 | 58 | logger.info( 59 | s""" 60 | |The field validator input of '$accessorInput' resulted in: 61 | |\tAccessors: ${accessorOps.accessors.toString} 62 | |\tValidators: ${validatorOp.map(_.getClass.getSimpleName).toString} 63 | |""".stripMargin 64 | ) 65 | } 66 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/AvroFieldExtractorBaseTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroAccessorException._ 20 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroObjMapper 21 | import helpers.SampleAvroRecords._ 22 | import org.scalatest.flatspec.AnyFlatSpec 23 | import org.scalatest.matchers.should.Matchers 24 | import org.apache.avro.SchemaBuilder 25 | import org.apache.avro.generic.GenericRecordBuilder 26 | 27 | class AvroFieldExtractorBaseTest extends AnyFlatSpec with Matchers { 28 | 29 | it should "extract a primitive at the record root level" in { 30 | val testSimpleAvroRecord = innerNestedSample() 31 | val fn = AvroObjMapper.getAvroFun("userId", testSimpleAvroRecord.getSchema) 32 | 33 | fn.combineFns(testSimpleAvroRecord) should be (testSimpleAvroRecord.getUserId) 34 | } 35 | 36 | it should "extract an array at the record root level" in { 37 | val testSimpleAvroRecord = testAvroArrayTypes 38 | val fn = AvroObjMapper.getAvroFun("arrayLongs", testSimpleAvroRecord.getSchema) 39 | 40 | fn.combineFns(testSimpleAvroRecord) should be (testSimpleAvroRecord.getArrayLongs) 41 | } 42 | 43 | it should "extract a nested record" in { 44 | val testSimpleAvroRecord = testAvroTypes() 45 | val fn = AvroObjMapper.getAvroFun("inner.userId", testSimpleAvroRecord.getSchema) 46 | 47 | fn.combineFns(testSimpleAvroRecord) should be (testSimpleAvroRecord.getInner.getUserId) 48 | } 49 | 50 | it should "extract a record if the field has _ in it" in { 51 | val schema = SchemaBuilder 52 | .builder.record("record").fields.requiredLong("_user_id10").endRecord 53 | val testSimpleAvroRecord = new GenericRecordBuilder(schema).set("_user_id10", 1L).build 54 | val fn = AvroObjMapper.getAvroFun("_user_id10", testSimpleAvroRecord.getSchema) 55 | 56 | fn.combineFns(testSimpleAvroRecord) should be (testSimpleAvroRecord.get("_user_id10")) 57 | } 58 | 59 | it should "throw an exception if the field is missing" in { 60 | val testSimpleAvroRecord = testAvroTypes() 61 | val thrown = intercept[InvalidDynamicFieldException] { 62 | AvroObjMapper.getAvroFun("notRealField", testSimpleAvroRecord.getSchema) 63 | } 64 | 65 | thrown.getMessage should include("notRealField not found in") 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/ConverterMacros.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro 18 | 19 | import scala.reflect.macros.whitebox 20 | 21 | object ConverterMacros { 22 | // Add a level of indirection to prevent the macro from capturing 23 | // $outer which would make the Coder serialization fail 24 | def wrappedRecordConverter[T: c.WeakTypeTag](c: whitebox.Context): c.Tree = { 25 | import c.universe._ 26 | 27 | val magTree = magnolia1.Magnolia.gen[T](c) 28 | 29 | def getLazyVal = 30 | magTree match { 31 | case q"lazy val $_ = $body; $_" => 32 | body 33 | 34 | case q"val $_ = $body; $_" => 35 | body 36 | } 37 | 38 | // Remove annotations from magnolia since they are 39 | // not serializable and we don't use them anyway 40 | // scalastyle:off line.size.limit 41 | val removeAnnotations = 42 | new Transformer { 43 | override def transform(tree: Tree): c.universe.Tree = { 44 | tree match { 45 | case Apply(AppliedTypeTree(Select(pack, TypeName("CaseClass")), ps), 46 | List(typeName, isObject, isValueClass, params, _)) => 47 | Apply(AppliedTypeTree(Select(pack, TypeName("CaseClass")), ps), 48 | List(typeName, isObject, isValueClass, params, q"""Array()""")) 49 | 50 | case q"""new magnolia1.CaseClass[$tc, $t]($typeName, $isObject, $isValueClass, $params, $_){ $body }""" => 51 | q"""_root_.magnolia1.CaseClass[$tc, $t]($typeName, $isObject, $isValueClass, $params, Array()){ $body }""" 52 | 53 | case q"com.spotify.elitzur.AvroConverter.split(new magnolia1.SealedTrait($name, $subtypes, $_))" => 54 | q"_root_.com.spotify.elitzur.AvroConverter.split(new magnolia1.SealedTrait($name, $subtypes, Array()))" 55 | 56 | case q"""magnolia1.Magnolia.param[$tc, $t, $p]($name, $idx, $repeated, $tcParam, $defaultVal, $_)""" => 57 | q"""_root_.magnolia1.Magnolia.param[$tc, $t, $p]($name, $idx, $repeated, $tcParam, $defaultVal, Array())""" 58 | 59 | case _ => 60 | super.transform(tree) 61 | } 62 | } 63 | } 64 | // scalastyle:on line.size.limit 65 | val transformer = removeAnnotations.transform(getLazyVal) 66 | 67 | transformer 68 | } 69 | // scalastyle:on method.length 70 | 71 | } 72 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/scala/com/spotify/elitzur/scio/Implicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.scio 18 | 19 | import com.spotify.elitzur.MetricsReporter 20 | import ValidatorDoFns.{ValidatorDoFn, ValidatorDoFnWithResult} 21 | import com.spotify.elitzur.converters.avro.AvroConverter 22 | import com.spotify.elitzur.validators.{PostValidation, ValidationRecordConfig, Validator} 23 | import com.spotify.scio.coders.Coder 24 | import com.spotify.scio.coders.kryo._ 25 | import com.spotify.scio.values.SCollection 26 | import org.apache.avro.generic.GenericRecord 27 | import org.apache.beam.sdk.transforms.ParDo 28 | 29 | import scala.reflect.ClassTag 30 | 31 | trait Implicits { 32 | implicit val metricsReporter: MetricsReporter = new ScioMetricsReporter 33 | 34 | implicit class SCollectionImplicitValidatorFns[T: Coder](sc: SCollection[T])( 35 | implicit vr: Validator[T]) { 36 | 37 | def validate(conf: ValidationRecordConfig = ValidationRecordConfig()) 38 | : SCollection[T] = { 39 | sc.withName("validate").applyTransform(ParDo.of(new ValidatorDoFn(vr, config = conf))) 40 | } 41 | 42 | def validateWithResult(conf: ValidationRecordConfig = ValidationRecordConfig()) 43 | : SCollection[PostValidation[T]] = { 44 | sc.withName("validateWithResult") 45 | .applyTransform(ParDo.of(new ValidatorDoFnWithResult[T](vr, conf))) 46 | } 47 | } 48 | 49 | implicit class SCollFromAvroConverter[GR <: GenericRecord : Coder](sc: SCollection[GR]) { 50 | def fromAvro[T: Coder](implicit c: AvroConverter[T]): SCollection[T] = { 51 | sc.withName("fromAvro").applyTransform(ParDo.of(new FromAvroConverterDoFn(c))) 52 | } 53 | } 54 | 55 | implicit class SCollToAvroConverter[T: AvroConverter: Coder](sc: SCollection[T]) { 56 | def toAvro[GR <: GenericRecord: Coder : ClassTag](implicit c: AvroConverter[T]) 57 | : SCollection[GR] = { 58 | sc.withName("toAvro").applyTransform(ParDo.of(new ToAvroConverterDoFn(c))) 59 | } 60 | } 61 | 62 | implicit class SCollToAvroDefaultConverter[T: AvroConverter: Coder](sc: SCollection[T]) { 63 | def toAvroDefault[GR <: GenericRecord: Coder : ClassTag](defaultR: GR) 64 | (implicit c: AvroConverter[T]) 65 | : SCollection[GR] = { 66 | sc.withName("toAvroDefault").applyTransform( 67 | ParDo.of(new ToAvroDefaultConverterDoFn(defaultR, c))) 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/Implicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | import com.spotify.elitzur.MetricsReporter 20 | 21 | import scala.reflect.ClassTag 22 | 23 | trait Implicits { 24 | //scalastyle:off line.size.limit 25 | import Validator._ 26 | 27 | implicit val stringValidator: FieldValidator[String] = new IgnoreValidator[String] 28 | implicit val longValidator: FieldValidator[Long] = new IgnoreValidator[Long] 29 | implicit val doubleValidator: FieldValidator[Double] = new IgnoreValidator[Double] 30 | implicit val booleanValidator: FieldValidator[Boolean] = new IgnoreValidator[Boolean] 31 | implicit val arrayByteValidator: FieldValidator[Array[Byte]] = new IgnoreValidator[Array[Byte]] 32 | implicit val floatValidator: FieldValidator[Float] = new IgnoreValidator[Float] 33 | implicit val intValidator: FieldValidator[Int] = new IgnoreValidator[Int] 34 | 35 | implicit def baseTypeValidator[T <: BaseValidationType[_]: ClassTag]: FieldValidator[T] = new BaseFieldValidator[T] 36 | implicit def optionTypeValidator[T <: BaseValidationType[_]: FieldValidator: ClassTag]: FieldValidator[Option[T]] = new OptionTypeValidator[T] 37 | implicit def statusTypeValidator[T <: BaseValidationType[_]: FieldValidator: ClassTag]: FieldValidator[ValidationStatus[T]] = new StatusTypeValidator[T] 38 | implicit def statusOptionTypeValidator[T <: BaseValidationType[_]: FieldValidator: ClassTag]: FieldValidator[ValidationStatus[Option[T]]] = new StatusOptionTypeValidator[T] 39 | implicit def wrappedValidator[T: Validator]: Validator[ValidationStatus[T]] = new WrappedValidator[T] 40 | implicit def optionValidator[T: Validator]: Validator[Option[T]] = new OptionValidator[T] 41 | implicit def dynamicTypeValidator[T <: DynamicValidationType[_, _, _]: ClassTag]: DynamicValidator[T] = new DynamicValidator[T] 42 | implicit def seqValidator[T: Validator: ClassTag](implicit reporter: MetricsReporter): Validator[Seq[T]] = wrapSeqLikeValidator(() => Seq.newBuilder[T]) 43 | implicit def listValidator[T: Validator: ClassTag](implicit reporter: MetricsReporter): Validator[List[T]] = wrapSeqLikeValidator(() => List.newBuilder[T]) 44 | implicit def arrayValidator[T: Validator: ClassTag](implicit reporter: MetricsReporter): Validator[Array[T]] = wrapSeqLikeValidator(() => Array.newBuilder[T]) 45 | implicit def vectorValidator[T: Validator: ClassTag](implicit reporter: MetricsReporter): Validator[Vector[T]] = wrapSeqLikeValidator(() => Vector.newBuilder[T]) 46 | //scalastyle:on line.size.limit 47 | 48 | } 49 | -------------------------------------------------------------------------------- /elitzur-examples/src/main/scala/com/spotify/elitzur/examples/ExampleTypes.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.examples 18 | 19 | import java.util.Locale 20 | 21 | import com.spotify.elitzur.types.Owner 22 | import com.spotify.elitzur.validators.{BaseCompanion, BaseValidationType, SimpleCompanionImplicit} 23 | 24 | case object Blizzard extends Owner { 25 | override def name: String = "Blizzard" 26 | } 27 | 28 | object Companions { 29 | implicit val ageC: SimpleCompanionImplicit[Long, Age] = 30 | SimpleCompanionImplicit(AgeCompanion) 31 | implicit val ccC: SimpleCompanionImplicit[String, CountryCode] = 32 | SimpleCompanionImplicit(CountryCompanion) 33 | implicit val nnlC: SimpleCompanionImplicit[Long, NonNegativeLong] = 34 | SimpleCompanionImplicit(NonNegativeLongCompanion) 35 | } 36 | 37 | case class CountryCode(data: String) extends BaseValidationType[String] { 38 | override def checkValid: Boolean = Locale.getISOCountries.contains(data) 39 | } 40 | 41 | case class Age(data: Long) extends BaseValidationType[Long] { 42 | override def checkValid: Boolean = data > 0L && data < 150L 43 | } 44 | 45 | case class NonNegativeLong(data: Long) extends BaseValidationType[Long] { 46 | override def checkValid: Boolean = data >= 0L 47 | } 48 | 49 | object CountryCompanion extends BaseCompanion[String, CountryCode] { 50 | def validationType: String = "CountryCode" 51 | 52 | def bigQueryType: String = "STRING" 53 | 54 | def apply(data: String): CountryCode = CountryCode(data) 55 | 56 | def parse(data: String): CountryCode = CountryCode(data) 57 | 58 | def description: String = "Represents an ISO standard two-letter country code" 59 | 60 | def owner: Owner = Blizzard 61 | } 62 | 63 | object AgeCompanion extends BaseCompanion[Long, Age] { 64 | def validationType: String = "Age" 65 | 66 | def bigQueryType: String = "INTEGER" 67 | 68 | def apply(data: Long): Age = Age(data) 69 | 70 | def parse(data: Long): Age = Age(data) 71 | 72 | def owner: Owner = Blizzard 73 | 74 | def description: String = "This represents an age above 0 and less than 150" 75 | } 76 | 77 | 78 | object NonNegativeLongCompanion extends BaseCompanion[Long, NonNegativeLong] { 79 | def validationType: String = "NonNegativeLong" 80 | 81 | def bigQueryType: String = "INTEGER" 82 | 83 | def apply(data: Long): NonNegativeLong = NonNegativeLong(data) 84 | 85 | def parse(data: Long): NonNegativeLong = NonNegativeLong(data) 86 | 87 | override def owner: Owner = Blizzard 88 | 89 | override def description: String = "Non negative long" 90 | } 91 | 92 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/DynamicAccessorValidationUnionTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroObjMapper 20 | import com.spotify.elitzur.converters.avro.dynamic.{ 21 | DynamicAccessorCompanion, 22 | DynamicFieldParser 23 | } 24 | import com.spotify.elitzur.helpers.DynamicAccessorValidatorTestUtils.TestMetricsReporter 25 | import com.spotify.elitzur.schemas.{InnerComplexType, TestAvroUnionTypes} 26 | 27 | import org.scalatest.BeforeAndAfterEach 28 | import org.scalatest.flatspec.AnyFlatSpec 29 | import org.scalatest.matchers.should.Matchers 30 | 31 | class DynamicAccessorValidationUnionTest extends AnyFlatSpec with Matchers with BeforeAndAfterEach { 32 | import com.spotify.elitzur.helpers._ 33 | import Companions._ 34 | 35 | implicit val metricsReporter: MetricsReporter = 36 | DynamicAccessorValidatorTestUtils.metricsReporter() 37 | 38 | override def afterEach(): Unit = { 39 | metricsReporter.asInstanceOf[TestMetricsReporter].cleanSlate() 40 | } 41 | 42 | val userInput: Array[DynamicFieldParser] = Array( 43 | new DynamicFieldParser( 44 | "optRecord.optString:CountryCode", 45 | new DynamicAccessorCompanion[String, CountryCode], 46 | AvroObjMapper.getAvroFun("optRecord.optString", TestAvroUnionTypes.SCHEMA$) 47 | ) 48 | ) 49 | 50 | it should "correctly validate a nullable field if the field exists and is a country code" in { 51 | val testSetUp = new DynamicAccessorValidationHelpers(userInput) 52 | 53 | val validAvroRecord = TestAvroUnionTypes.newBuilder() 54 | .setOptRecord( 55 | InnerComplexType.newBuilder() 56 | .setOptString("US") 57 | .setOptRepeatedArray(null).build() 58 | ).build 59 | 60 | testSetUp.dynamicRecordValidator.validateRecord(validAvroRecord) 61 | 62 | val (countryCodValidCount, countryCodInvalidCount) = testSetUp.getValidAndInvalidCounts( 63 | "optRecord.optString:CountryCode", CountryCompanion) 64 | 65 | (countryCodValidCount, countryCodInvalidCount) should be ((1, 0)) 66 | } 67 | 68 | it should "correctly validate a null value" in { 69 | val testSetUp = new DynamicAccessorValidationHelpers(userInput) 70 | 71 | val inValidAvroRecord = TestAvroUnionTypes.newBuilder() 72 | .setOptRecord( 73 | InnerComplexType.newBuilder() 74 | .setOptString(null) 75 | .setOptRepeatedArray(null) 76 | .build() 77 | ).build 78 | 79 | testSetUp.dynamicRecordValidator.validateRecord(inValidAvroRecord) 80 | 81 | val (countryCodValidCount, countryCodInvalidCount) = testSetUp.getValidAndInvalidCounts( 82 | "optRecord.optString:CountryCode", CountryCompanion) 83 | 84 | (countryCodValidCount, countryCodInvalidCount) should be ((1, 0)) 85 | } 86 | } 87 | 88 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/AccessorOpToValidatorOpTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.{ 20 | ArrayValidatorOp, 21 | OptionValidatorOp, 22 | ValidatorOp 23 | } 24 | import com.spotify.elitzur.converters.avro.dynamic.dsl.{ 25 | ArrayFlatmapAccessor, 26 | ArrayMapAccessor, 27 | ArrayNoopAccessor, 28 | BaseAccessor, 29 | FieldAccessor, 30 | IndexAccessor, 31 | NullableAccessor 32 | } 33 | import org.scalatest.flatspec.AnyFlatSpec 34 | import org.scalatest.matchers.should.Matchers 35 | 36 | class AccessorOpToValidatorOpTest extends AnyFlatSpec with Matchers { 37 | val DEFAULT_VALUE = "" 38 | 39 | it should "single index accessor should correctly parse" in { 40 | val accessors: List[BaseAccessor] = List(IndexAccessor(DEFAULT_VALUE)) 41 | FieldAccessor(accessors).toValidatorOp should be (List.empty[ValidatorOp]) 42 | } 43 | 44 | it should "subsequent nullable accessor should correctly parse" in { 45 | val accessors: List[BaseAccessor] = 46 | List[BaseAccessor](NullableAccessor(DEFAULT_VALUE, 47 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), NullableAccessor(DEFAULT_VALUE, 48 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE)) 49 | )) 50 | )) 51 | FieldAccessor(accessors).toValidatorOp should be (List(OptionValidatorOp)) 52 | } 53 | 54 | it should "map and nullable accessors should correctly parse" in { 55 | val accessors: List[BaseAccessor] = 56 | List[BaseAccessor](ArrayNoopAccessor(DEFAULT_VALUE, 57 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), NullableAccessor(DEFAULT_VALUE, 58 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE)) 59 | )) 60 | )) 61 | FieldAccessor(accessors).toValidatorOp should be (List(ArrayValidatorOp, OptionValidatorOp)) 62 | } 63 | 64 | it should "only the first map should correctly parse" in { 65 | val accessors: List[BaseAccessor] = 66 | List[BaseAccessor](ArrayFlatmapAccessor(DEFAULT_VALUE, 67 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), NullableAccessor(DEFAULT_VALUE, 68 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), ArrayMapAccessor(DEFAULT_VALUE, 69 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE)))) 70 | )) 71 | )) 72 | FieldAccessor(accessors).toValidatorOp should be (List(ArrayValidatorOp, OptionValidatorOp)) 73 | } 74 | 75 | it should "null accessors separated by a map accessor should correctly parse" in { 76 | val accessors: List[BaseAccessor] = 77 | List[BaseAccessor](NullableAccessor(DEFAULT_VALUE, 78 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), NullableAccessor(DEFAULT_VALUE, 79 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), ArrayMapAccessor(DEFAULT_VALUE, 80 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE), NullableAccessor(DEFAULT_VALUE, 81 | List[BaseAccessor](IndexAccessor(DEFAULT_VALUE)) 82 | )))) 83 | )) 84 | )) 85 | FieldAccessor(accessors).toValidatorOp should be ( 86 | List(OptionValidatorOp, ArrayValidatorOp, OptionValidatorOp)) 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/AvroFieldExtractorArrayTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroObjMapper 20 | import com.spotify.elitzur.schemas.TestAvroArrayTypes 21 | import helpers.SampleAvroRecords.testAvroArrayTypes 22 | import org.scalatest.flatspec.AnyFlatSpec 23 | import org.scalatest.matchers.should.Matchers 24 | 25 | import collection.JavaConverters._ 26 | 27 | class AvroFieldExtractorArrayTest extends AnyFlatSpec with Matchers { 28 | val testArrayRecord: TestAvroArrayTypes = testAvroArrayTypes 29 | 30 | it should "extract generic records in an array" in { 31 | // Input: {"innerArrayRoot": [{"userId": "one"}, {"userId": "two"}]} 32 | // Output: [{"userId": "one"}, {"userId": "two"}] 33 | val fn = AvroObjMapper.getAvroFun("innerArrayRoot", testArrayRecord.getSchema) 34 | 35 | fn.combineFns(testArrayRecord) should be (testArrayRecord.getInnerArrayRoot) 36 | } 37 | 38 | it should "extract a field from generic records in an array" in { 39 | // Input: {"innerArrayRoot": [{"userId": "one"}, {"userId": "two"}]} 40 | // Output: ["one", "two"] 41 | val fn = AvroObjMapper.getAvroFun("innerArrayRoot.userId", testArrayRecord.getSchema) 42 | 43 | fn.combineFns(testArrayRecord) should be ( 44 | testArrayRecord.getInnerArrayRoot.asScala.map(_.getUserId).asJava) 45 | } 46 | 47 | it should "extract a field from nested generic records in an array" in { 48 | // Input: {"innerArrayRoot": [ 49 | // {"innerArrayInsideRecord": "deepNestedRecord": {"recordId": -1}}"}, 50 | // {"innerArrayInsideRecord": "deepNestedRecord": {"recordId": -5}}"} 51 | // ]} 52 | // Output: [-1, -5] 53 | val fn = AvroObjMapper.getAvroFun( 54 | "innerArrayRoot.deepNestedRecord.recordId", testArrayRecord.getSchema) 55 | 56 | fn.combineFns(testArrayRecord) should be ( 57 | testArrayRecord.getInnerArrayRoot.asScala.map(_.getDeepNestedRecord.getRecordId).asJava) 58 | } 59 | 60 | it should "flatten the resulting array" in { 61 | // Input: {"innerArrayRoot": [ 62 | // {"innerArrayInsideRecord": [1, 2]}, 63 | // {"innerArrayInsideRecord": [3, 4]} 64 | // ]} 65 | // Output: [1, 2, 3, 4] 66 | val fn = AvroObjMapper.getAvroFun( 67 | "innerArrayRoot.innerArrayInsideRecord", testArrayRecord.getSchema) 68 | 69 | fn.combineFns(testArrayRecord) should be ( 70 | testArrayRecord.getInnerArrayRoot.asScala.flatMap(_.getInnerArrayInsideRecord.asScala).asJava) 71 | } 72 | 73 | it should "flatten resulting array with a nullable field in the path" in { 74 | // Input: {"innerArrayRoot": [ 75 | // {"deeperArrayNestedRecord": {"DeeperArray": [1, 2]}}, 76 | // {"deeperArrayNestedRecord": {"DeeperArray": [3, 4]}} 77 | // ]} 78 | // Output: [1, 2, 3, 4] 79 | val fn = AvroObjMapper.getAvroFun( 80 | "innerArrayRoot.deeperArrayNestedRecord.DeeperArray", testArrayRecord.getSchema) 81 | 82 | fn.combineFns(testArrayRecord) should be ( 83 | testArrayRecord.getInnerArrayRoot 84 | .asScala.flatMap(_.getDeeperArrayNestedRecord.getDeeperArray.asScala).asJava) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/DynamicAccessorValidationBaseTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroObjMapper 20 | import com.spotify.elitzur.converters.avro.dynamic.{DynamicAccessorCompanion, DynamicFieldParser} 21 | import com.spotify.elitzur.helpers.DynamicAccessorValidatorTestUtils.TestMetricsReporter 22 | import com.spotify.elitzur.schemas.TestAvroTypes 23 | 24 | import org.scalatest.BeforeAndAfterEach 25 | import org.scalatest.flatspec.AnyFlatSpec 26 | import org.scalatest.matchers.should.Matchers 27 | 28 | class DynamicAccessorValidationBaseTest extends AnyFlatSpec with Matchers with BeforeAndAfterEach { 29 | import com.spotify.elitzur.helpers._ 30 | import Companions._ 31 | 32 | implicit val metricsReporter: MetricsReporter = 33 | DynamicAccessorValidatorTestUtils.metricsReporter() 34 | 35 | override def afterEach(): Unit = { 36 | metricsReporter.asInstanceOf[TestMetricsReporter].cleanSlate() 37 | } 38 | 39 | val userInput: Array[DynamicFieldParser] = Array( 40 | new DynamicFieldParser( 41 | "inner.playCount:NonNegativeLong", 42 | new DynamicAccessorCompanion[Long, NonNegativeLong], 43 | AvroObjMapper.getAvroFun("inner.playCount", TestAvroTypes.SCHEMA$) 44 | ), 45 | new DynamicFieldParser( 46 | "inner.countryCode:CountryCode", 47 | new DynamicAccessorCompanion[String, CountryCode], 48 | AvroObjMapper.getAvroFun("inner.countryCode", TestAvroTypes.SCHEMA$) 49 | ) 50 | ) 51 | 52 | it should "correctly count the valid fields" in { 53 | val testSetUp = new DynamicAccessorValidationHelpers(userInput) 54 | 55 | val validAvroRecord = helpers.SampleAvroRecords.testAvroTypes(isValid = true) 56 | 57 | // Validate the sample input 58 | testSetUp.dynamicRecordValidator.validateRecord(validAvroRecord) 59 | 60 | val (playCountValidCount, playCountInvalidCount) = testSetUp.getValidAndInvalidCounts( 61 | "inner.playCount:NonNegativeLong", NonNegativeLongCompanion) 62 | 63 | val (countryCodValidCount, countryCodInvalidCount) = testSetUp.getValidAndInvalidCounts( 64 | "inner.countryCode:CountryCode", CountryCompanion) 65 | 66 | (playCountValidCount, playCountInvalidCount, 67 | countryCodValidCount, countryCodInvalidCount) should be ((1, 0, 1, 0)) 68 | } 69 | 70 | it should "correctly count the invalid fields" in { 71 | val testSetUp = new DynamicAccessorValidationHelpers(userInput) 72 | 73 | val validAvroRecord = helpers.SampleAvroRecords.testAvroTypes(isValid = false) 74 | 75 | val abc = AvroObjMapper.getAvroFun("inner.playCount", TestAvroTypes.SCHEMA$) 76 | 77 | // Validate the sample input 78 | testSetUp.dynamicRecordValidator.validateRecord(validAvroRecord) 79 | 80 | val (playCountValidCount, playCountInvalidCount) = testSetUp.getValidAndInvalidCounts( 81 | "inner.playCount:NonNegativeLong", NonNegativeLongCompanion) 82 | 83 | val (countryCodValidCount, countryCodInvalidCount) = testSetUp.getValidAndInvalidCounts( 84 | "inner.countryCode:CountryCode", CountryCompanion) 85 | 86 | (playCountValidCount, playCountInvalidCount, 87 | countryCodValidCount, countryCodInvalidCount) should be ((0, 1, 0, 1)) 88 | } 89 | 90 | } 91 | 92 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/dsl/AvroObjMapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro.dynamic.dsl 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroAccessorException._ 20 | import org.apache.avro.Schema 21 | 22 | import java.{util => ju} 23 | import scala.annotation.tailrec 24 | import scala.collection.mutable 25 | import scala.util.{Failure, Success, Try} 26 | 27 | object AvroObjMapper { 28 | private val mapToAvroFun: mutable.Map[String, FieldAccessor] 29 | = mutable.Map.empty[String, FieldAccessor] 30 | 31 | def getAvroFun(avroFieldPath: String, schema: Schema): FieldAccessor = { 32 | if (!mapToAvroFun.contains(avroFieldPath)) { 33 | val avroOperators = new FieldAccessor(getAvroAccessors(avroFieldPath, schema).map(_.ops)) 34 | mapToAvroFun += (avroFieldPath -> avroOperators) 35 | } 36 | mapToAvroFun(avroFieldPath) 37 | } 38 | 39 | @tailrec 40 | private[dsl] def getAvroAccessors( 41 | path: String, 42 | avroSchema: Schema, 43 | accAvroOperators: List[AvroAccessorContainer] = List.empty[AvroAccessorContainer] 44 | ): List[AvroAccessorContainer] = { 45 | val thisAvroOp = AvroAccessorUtil.mapToAccessors(path, avroSchema) 46 | val appendedAvroOp = accAvroOperators :+ thisAvroOp 47 | thisAvroOp.rest match { 48 | case Some(remainingPath) => getAvroAccessors(remainingPath, thisAvroOp.schema, appendedAvroOp) 49 | case _ => appendedAvroOp 50 | } 51 | } 52 | } 53 | 54 | object AvroAccessorUtil { 55 | private val PRIMITIVES: ju.EnumSet[Schema.Type] = 56 | ju.EnumSet.complementOf(ju.EnumSet.of(Schema.Type.ARRAY, Schema.Type.MAP, Schema.Type.UNION)) 57 | 58 | private val token = '.' 59 | 60 | def mapToAccessors(path: String, schema: Schema): AvroAccessorContainer = { 61 | val fieldTokens = pathToTokens(path) 62 | val fieldSchema = Try(schema.getField(fieldTokens.field).schema()) match { 63 | case Success(s) => s 64 | case Failure(_) => 65 | throw new InvalidDynamicFieldException(s"$path not found in ${schema.getFields.toString}") 66 | } 67 | 68 | mapToAccessors(fieldSchema, fieldTokens) 69 | } 70 | 71 | def mapToAccessors(fieldSchema: Schema, fieldTokens: AvroFieldTokens): AvroAccessorContainer = { 72 | fieldSchema.getType match { 73 | case _schema if PRIMITIVES.contains(_schema) => 74 | new IndexAccessorLogic(fieldSchema, fieldTokens).avroOp 75 | case Schema.Type.ARRAY => 76 | new ArrayAccessorLogic(fieldSchema.getElementType, fieldTokens).avroOp 77 | case Schema.Type.UNION => 78 | new NullableAccessorLogic(fieldSchema, fieldTokens).avroOp 79 | case Schema.Type.MAP => throw new InvalidDynamicFieldException(UNSUPPORTED_MAP_SCHEMA) 80 | } 81 | } 82 | 83 | private def pathToTokens(path: String): AvroFieldTokens = { 84 | val idx = path.indexOf('.') 85 | val (field, rest) = if (idx > 0) { 86 | (path.substring(0, idx), Some(path.substring(idx + 1))) 87 | } else { 88 | (path, None) 89 | } 90 | 91 | AvroFieldTokens(field, rest) 92 | } 93 | } 94 | 95 | case class AvroAccessorContainer(ops: BaseAccessor, schema: Schema, rest: Option[String]) 96 | 97 | case class AvroFieldTokens(field: String, rest: Option[String]) 98 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/DynamicAccessorValidationArrayTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroObjMapper 20 | import com.spotify.elitzur.converters.avro.dynamic.{DynamicAccessorCompanion, DynamicFieldParser} 21 | import com.spotify.elitzur.helpers.DynamicAccessorValidatorTestUtils.TestMetricsReporter 22 | import com.spotify.elitzur.schemas.TestAvroArrayTypes 23 | import com.spotify.ratatool.scalacheck.avroOf 24 | import com.spotify.ratatool.scalacheck._ 25 | import org.scalatest.BeforeAndAfterEach 26 | import org.scalatest.flatspec.AnyFlatSpec 27 | import org.scalatest.matchers.should.Matchers 28 | 29 | import collection.JavaConverters._ 30 | 31 | class DynamicAccessorValidationArrayTest extends AnyFlatSpec with Matchers with BeforeAndAfterEach { 32 | import com.spotify.elitzur.helpers._ 33 | import Companions._ 34 | 35 | implicit val metricsReporter: MetricsReporter = 36 | DynamicAccessorValidatorTestUtils.metricsReporter() 37 | 38 | override def afterEach(): Unit = { 39 | metricsReporter.asInstanceOf[TestMetricsReporter].cleanSlate() 40 | } 41 | 42 | it should "correctly validate and invalidate elements in a list (Seq)" in { 43 | val userInput: Array[DynamicFieldParser] = Array( 44 | new DynamicFieldParser( 45 | "arrayLongs:NonNegativeLong", 46 | new DynamicAccessorCompanion[Long, NonNegativeLong], 47 | AvroObjMapper.getAvroFun("arrayLongs", TestAvroArrayTypes.SCHEMA$) 48 | ) 49 | ) 50 | 51 | val testSetUp = new DynamicAccessorValidationHelpers(userInput) 52 | val validAvroRecord: TestAvroArrayTypes = avroOf[TestAvroArrayTypes].sample.get 53 | 54 | testSetUp.dynamicRecordValidator.validateRecord(validAvroRecord) 55 | 56 | val (playCountValidCount, playCountInvalidCount) = testSetUp.getValidAndInvalidCounts( 57 | "arrayLongs:NonNegativeLong", NonNegativeLongCompanion) 58 | 59 | val (expectedValid, expectedInvalid) = validAvroRecord 60 | .getArrayLongs 61 | .asScala 62 | .map(NonNegativeLong(_)) 63 | .partition(_.checkValid) 64 | 65 | (playCountValidCount, playCountInvalidCount) should be( 66 | (expectedValid.length, expectedInvalid.length)) 67 | } 68 | 69 | it should "correctly validate and invalidate nullable elements in a list (Seq.Option)" in { 70 | val userInput: Array[DynamicFieldParser] = Array( 71 | new DynamicFieldParser( 72 | "arrayNullableStrings:CountryCode", 73 | new DynamicAccessorCompanion[String, CountryCode], 74 | AvroObjMapper.getAvroFun("arrayNullableStrings", TestAvroArrayTypes.SCHEMA$) 75 | ) 76 | ) 77 | 78 | val testSetUp = new DynamicAccessorValidationHelpers(userInput) 79 | val validAvroRecord: TestAvroArrayTypes = avroOf[TestAvroArrayTypes] 80 | .amend(List( 81 | "SE".asInstanceOf[CharSequence], 82 | "NYC".asInstanceOf[CharSequence], 83 | null).asJava)(_.setArrayNullableStrings) 84 | .sample.get 85 | 86 | testSetUp.dynamicRecordValidator.validateRecord(validAvroRecord) 87 | 88 | val (countryCountValidCount, countryCountInvalidCount) = testSetUp.getValidAndInvalidCounts( 89 | "arrayNullableStrings:CountryCode", CountryCompanion) 90 | 91 | (countryCountValidCount, countryCountInvalidCount) should be((2, 1)) 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /elitzur-examples/src/main/scala/com/spotify/elitzur/examples/ScioAvro.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.examples 18 | 19 | 20 | import com.spotify.scio._ 21 | import com.spotify.scio.avro._ 22 | import com.spotify.elitzur.schemas.{InnerNestedType, TestAvroTypes} 23 | import com.spotify.elitzur.scio._ 24 | import com.spotify.elitzur.validators._ 25 | import org.slf4j.LoggerFactory 26 | import com.spotify.elitzur.examples.Companions._ 27 | import org.apache.beam.sdk.metrics.MetricName 28 | 29 | // Example: Reading in Avro records within a Scio job and validating 30 | 31 | // Usage: 32 | // sbt "elitzur-examples/runMain com.spotify.elitzur.examples.ScioAvro --runner=DirectRunner" 33 | // Dataflow requires additional params, see https://beam.apache.org/documentation/runners/dataflow/ 34 | 35 | object ScioAvro { 36 | 37 | private val logger = LoggerFactory.getLogger(this.getClass) 38 | 39 | case class User( 40 | userAge: Age, 41 | userLong: NonNegativeLong, 42 | userFloat: Float, 43 | inner: InnerNested 44 | ) 45 | 46 | case class InnerNested(countryCode: CountryCode) 47 | 48 | val builder :TestAvroTypes.Builder = TestAvroTypes.newBuilder() 49 | val innerBuilder : InnerNestedType.Builder = InnerNestedType.newBuilder() 50 | val avroRecords: Seq[TestAvroTypes] = Seq( 51 | // record with all fields valid 52 | builder 53 | .setUserAge(33L) 54 | .setUserLong(45L) 55 | .setUserFloat(4f) 56 | .setInner(innerBuilder.setCountryCode("US").setUserId("182").setPlayCount(72L).build()) 57 | .build(), 58 | // record with invalid age 59 | builder 60 | .setUserAge(-33L) 61 | .setUserLong(45L) 62 | .setUserFloat(4f) 63 | .setInner(innerBuilder.setCountryCode("CA").setUserId("129").setPlayCount(43L).build()) 64 | .build(), 65 | // record with invalid country code 66 | builder 67 | .setUserAge(33L) 68 | .setUserLong(45L) 69 | .setUserFloat(4f) 70 | .setInner(innerBuilder.setCountryCode("USA").setUserId("678").setPlayCount(201L).build()) 71 | .build() 72 | ) 73 | 74 | /* 75 | Common use case of Elitzur is to read in a schematized file in a standard format, e.g. Avro. 76 | 77 | We define case classes that specify validation types for fields we want to validate, in this 78 | example, Age, NonNegativeLong and CountryCode and validate records based on those validation types 79 | 80 | One can then go back to a standard file format, e.g. in this case back to Avro to persist the data 81 | */ 82 | def main(cmdlineArgs: Array[String]): Unit = { 83 | val (sc, _) = ContextAndArgs(cmdlineArgs) 84 | // For reading in real avro files, can use 85 | // val records = sc.typedAvroFile[TestAvroTypes](inputPath) instead 86 | val records = sc.parallelize(avroRecords) 87 | 88 | records 89 | .fromAvro[User] 90 | .validate() 91 | 92 | // .toAvro[TestAvroOut] can comment out these two lines so save avro output 93 | //.saveAsAvroFile(outputPath) 94 | 95 | val result = sc 96 | .run() 97 | .waitUntilDone() 98 | 99 | val elitzurCounters = ElitzurMetrics.getElitzurCounters(result) 100 | 101 | // log to display validation metrics 102 | logCounters(elitzurCounters) 103 | } 104 | 105 | def logCounters(counters: Map[MetricName, metrics.MetricValue[Long]]) : Unit = { 106 | val logString = 107 | counters 108 | .foldLeft("")((acc, d) => { 109 | s"$acc\n Counter ${d._1.toString} has value ${d._2.committed.getOrElse(0L).toString}" 110 | }) 111 | 112 | logger.info(s"Logging Elitzur Counters: $logString \n Done logging Elitzur Counters") 113 | } 114 | 115 | 116 | } 117 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/AvroFieldExtractorUnionTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroObjMapper 20 | import com.spotify.elitzur.helpers.SampleAvroRecords.innerNestedSample 21 | import com.spotify.elitzur.schemas.{InnerComplexType, TestAvroUnionTypes} 22 | import org.scalatest.flatspec.AnyFlatSpec 23 | import org.scalatest.matchers.should.Matchers 24 | 25 | import collection.JavaConverters._ 26 | 27 | class AvroFieldExtractorUnionTest extends AnyFlatSpec with Matchers { 28 | 29 | it should "extract a null from an Union schema type" in { 30 | // Input: {"optRecord": null} 31 | // Output: null 32 | val fn = AvroObjMapper.getAvroFun("optRecord.optString", TestAvroUnionTypes.SCHEMA$) 33 | val testNullRecord = TestAvroUnionTypes.newBuilder().setOptRecord(null).build 34 | 35 | fn.combineFns(testNullRecord) should be (testNullRecord.getOptRecord) 36 | } 37 | 38 | it should "extract a null from a nested Union Avro schema type" in { 39 | // Input: {"optRecord": {"optString": null}} 40 | // Output: null 41 | val fn = AvroObjMapper.getAvroFun("optRecord.optString", TestAvroUnionTypes.SCHEMA$) 42 | val testInnerNullRecord = TestAvroUnionTypes.newBuilder() 43 | .setOptRecord( 44 | InnerComplexType.newBuilder() 45 | .setOptString(null) 46 | .setOptRepeatedArray(null) 47 | .build() 48 | ).build 49 | 50 | fn.combineFns(testInnerNullRecord) should be (testInnerNullRecord.getOptRecord.getOptString) 51 | } 52 | 53 | it should "extract a primitive from a Union Avro schema type" in { 54 | // Input: {"optRecord": {"optString": "abc"}} 55 | // Output: "abc" 56 | val fn = AvroObjMapper.getAvroFun("optRecord.optString", TestAvroUnionTypes.SCHEMA$) 57 | val testInnerNonNullRecord = TestAvroUnionTypes.newBuilder() 58 | .setOptRecord( 59 | InnerComplexType.newBuilder() 60 | .setOptString("abc") 61 | .setOptRepeatedArray(null).build() 62 | ).build 63 | 64 | fn.combineFns(testInnerNonNullRecord) should be 65 | (testInnerNonNullRecord.getOptRecord.getOptString) 66 | } 67 | 68 | it should "return null if child schema is non-nullable" in { 69 | // Input: {"optRecord": null} 70 | // Output: "null" 71 | val fnNonNull = AvroObjMapper.getAvroFun("optRecord.nonOptString", TestAvroUnionTypes.SCHEMA$) 72 | val testNullRecord = TestAvroUnionTypes.newBuilder().setOptRecord(null).build 73 | 74 | fnNonNull.combineFns(testNullRecord) should be (testNullRecord.getOptRecord) 75 | } 76 | 77 | it should "return the elements of an array if array is not null" in { 78 | // Input: {"optRecord": {"optRepeatedArray": [{"userId": "a", "countryCode": "US"}]}} 79 | // Output: "a" 80 | val fnArrayNull = AvroObjMapper.getAvroFun("optRecord.optRepeatedArray.userId", 81 | TestAvroUnionTypes.SCHEMA$) 82 | val testInnerNonNullRecord = TestAvroUnionTypes.newBuilder() 83 | .setOptRecord( 84 | InnerComplexType.newBuilder() 85 | .setOptString(null) 86 | .setOptRepeatedArray(List(innerNestedSample()).asJava).build() 87 | ).build 88 | 89 | fnArrayNull.combineFns(testInnerNonNullRecord) should be ( 90 | testInnerNonNullRecord.getOptRecord.getOptRepeatedArray.asScala.map(_.getUserId).asJava) 91 | } 92 | 93 | it should "return null if array is null" in { 94 | // Input: {"optRecord": {"optRepeatedArray": null}} 95 | // Output: null 96 | val fnArrayNull = AvroObjMapper.getAvroFun("optRecord.optRepeatedArray.userId", 97 | TestAvroUnionTypes.SCHEMA$) 98 | val testInnerNullRecord = TestAvroUnionTypes.newBuilder() 99 | .setOptRecord( 100 | InnerComplexType.newBuilder() 101 | .setOptString(null) 102 | .setOptRepeatedArray(null).build()).build 103 | 104 | fnArrayNull.combineFns(testInnerNullRecord) should be ( 105 | testInnerNullRecord.getOptRecord.getOptRepeatedArray) 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/dsl/AvroAccessorLogics.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro.dynamic.dsl 18 | 19 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroAccessorException._ 20 | import com.spotify.elitzur.converters.avro.dynamic.dsl.AvroAccessorUtil.mapToAccessors 21 | import org.apache.avro.Schema 22 | 23 | import java.{util => ju} 24 | 25 | trait BaseAccessorLogic { 26 | val accessor: BaseAccessor 27 | val avroOp: AvroAccessorContainer 28 | } 29 | 30 | class IndexAccessorLogic(schema: Schema, fieldTokens: AvroFieldTokens) extends BaseAccessorLogic { 31 | override val accessor: BaseAccessor = IndexAccessor(fieldTokens.field) 32 | override val avroOp: AvroAccessorContainer = 33 | AvroAccessorContainer(accessor, schema, fieldTokens.rest) 34 | } 35 | 36 | class NullableAccessorLogic( 37 | schema: Schema, fieldTokens: AvroFieldTokens) extends BaseAccessorLogic { 38 | 39 | val nonNullSchema: Schema = getNonNullSchema(schema) 40 | val headAccessor: AvroAccessorContainer = mapToAccessors(nonNullSchema, fieldTokens) 41 | override val accessor: BaseAccessor = 42 | getNullableAccessor(nonNullSchema, fieldTokens.field, headAccessor) 43 | override val avroOp: AvroAccessorContainer = AvroAccessorContainer(accessor, nonNullSchema, None) 44 | 45 | private def getNullableAccessor( 46 | innerSchema: Schema, field: String, headAccessor: AvroAccessorContainer 47 | ): NullableAccessor = { 48 | if (headAccessor.rest.isDefined) { 49 | val recursiveResult = AvroObjMapper.getAvroAccessors(headAccessor.rest.get, innerSchema) 50 | // innerOps represents the list of all accessors to be applied if the avro obj is not null 51 | val innerOps = (headAccessor +: recursiveResult).map(_.ops) 52 | NullableAccessor(field, innerOps) 53 | } else { 54 | NullableAccessor(field, List(headAccessor.ops)) 55 | } 56 | } 57 | 58 | private def getNonNullSchema(schema: Schema): Schema = { 59 | val nonNullSchemas: ju.ArrayList[Schema] = new ju.ArrayList[Schema] 60 | schema.getTypes.forEach(s => if (s.getType != Schema.Type.NULL) {nonNullSchemas.add(s)}) 61 | if (nonNullSchemas.size > 1) { 62 | throw new InvalidDynamicFieldException(INVALID_UNION_SCHEMA) 63 | } 64 | nonNullSchemas.get(0) 65 | } 66 | } 67 | 68 | class ArrayAccessorLogic( 69 | arrayElemSchema: Schema, fieldTokens: AvroFieldTokens) extends BaseAccessorLogic { 70 | 71 | override val accessor: BaseAccessor = getArrayAccessor(arrayElemSchema, fieldTokens) 72 | override val avroOp: AvroAccessorContainer = 73 | AvroAccessorContainer(accessor, arrayElemSchema, None) 74 | 75 | private def getArrayAccessor(innerSchema: Schema, fieldTokens: AvroFieldTokens): BaseAccessor = { 76 | if (fieldTokens.rest.isDefined) { 77 | val recursiveResult = AvroObjMapper.getAvroAccessors(fieldTokens.rest.get, innerSchema) 78 | // innerOps represents the list of accessors to be applied to each element in an array 79 | val innerOps = recursiveResult.map(_.ops) 80 | // flattenFlag is true if one of the internal operation types is a map based operation 81 | val flattenFlag = getFlattenFlag(recursiveResult.map(_.ops)) 82 | if (flattenFlag) { 83 | ArrayFlatmapAccessor(fieldTokens.field, innerOps) 84 | } else { 85 | ArrayMapAccessor(fieldTokens.field, innerOps) 86 | } 87 | } else { 88 | val headAccessor: BaseAccessor = mapToAccessors(innerSchema, fieldTokens).ops 89 | ArrayNoopAccessor(fieldTokens.field, List(headAccessor)) 90 | } 91 | } 92 | 93 | private def getFlattenFlag(ops: List[BaseAccessor]): Boolean = { 94 | ops.foldLeft(false)((accBoolean, currAccessor) => { 95 | val hasArrayAccessor = currAccessor match { 96 | case n: NullableAccessor => getFlattenFlag(n.innerOps) 97 | case _: ArrayMapAccessor | _: ArrayFlatmapAccessor | _: ArrayNoopAccessor=> true 98 | case _ => false 99 | } 100 | accBoolean || hasArrayAccessor 101 | }) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /elitzur-scio/src/test/scala/com/spotify/elitzur/scio/ElitzurMetricsTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.scio 18 | 19 | import com.spotify.elitzur.CountryCodeTesting 20 | import com.spotify.elitzur.scio.ElitzurMetrics.getValidationTypeFromCaseClass 21 | import com.spotify.elitzur.validators.ValidationStatus 22 | import org.scalatest.flatspec.AnyFlatSpec 23 | import org.scalatest.matchers.should.Matchers 24 | import org.scalatest.PrivateMethodTester 25 | 26 | //scalastyle:off no.whitespace.before.left.bracket 27 | class ElitzurMetricsTest extends AnyFlatSpec with PrivateMethodTester with Matchers { 28 | 29 | case class Test(test: Int) 30 | 31 | case class HasValidationTypes(inner: CountryCodeTesting, 32 | innerOption: Option[CountryCodeTesting]) 33 | case class HasNestedValidationTypes(outer: HasValidationTypes, 34 | outerOption: Option[HasValidationTypes]) 35 | 36 | case class HasWrappedValidationTypes(inner: ValidationStatus[CountryCodeTesting], 37 | innerOption: ValidationStatus[Option[CountryCodeTesting]]) 38 | case class HasNestedWrappedValidationTypes(outer: ValidationStatus[HasWrappedValidationTypes], 39 | outerOption: 40 | ValidationStatus[Option[HasValidationTypes]]) 41 | 42 | case class HasMixedWrapping(outer: ValidationStatus[HasWrappedValidationTypes], 43 | outerOption: ValidationStatus[Option[HasValidationTypes]]) 44 | 45 | case class RepeatedTest(repeated: List[CountryCodeTesting], nested: List[HasValidationTypes]) 46 | 47 | "getValidationTypeFromCaseClass" should "return unqualified validation type name" in { 48 | val getValidationTypeFromCaseClass = PrivateMethod[String]('getValidationTypeFromCaseClass) 49 | val countryCodeName = ElitzurMetrics invokePrivate 50 | getValidationTypeFromCaseClass(classOf[HasValidationTypes], "inner") 51 | countryCodeName shouldBe "CountryCodeTesting" 52 | } 53 | 54 | private def testGetValidationTypeFromCaseClass(className: Class[_]) = { 55 | val getValidationTypeFromCaseClass = PrivateMethod[String]('getValidationTypeFromCaseClass) 56 | val countryCodeName = ElitzurMetrics invokePrivate 57 | getValidationTypeFromCaseClass(className, "outer.inner") 58 | countryCodeName shouldBe "CountryCodeTesting" 59 | val countryCodeOptionName = ElitzurMetrics invokePrivate 60 | getValidationTypeFromCaseClass(className, "outer.innerOption") 61 | countryCodeOptionName shouldBe "CountryCodeTesting" 62 | val optionCountryCodeName = ElitzurMetrics invokePrivate 63 | getValidationTypeFromCaseClass(className, "outerOption.inner") 64 | optionCountryCodeName shouldBe "CountryCodeTesting" 65 | val optionCountryCodeOptionName = ElitzurMetrics invokePrivate 66 | getValidationTypeFromCaseClass(className, "outerOption.innerOption") 67 | optionCountryCodeOptionName shouldBe "CountryCodeTesting" 68 | } 69 | 70 | "getValidationTypeFromCaseClass" should "work for nested fields" in { 71 | testGetValidationTypeFromCaseClass(classOf[HasNestedValidationTypes]) 72 | } 73 | 74 | "getValidationTypeFromCaseClass" should "work for wrapped fields and records" in { 75 | testGetValidationTypeFromCaseClass(classOf[HasNestedWrappedValidationTypes]) 76 | } 77 | 78 | "getValidationTypeFromCaseClass" should 79 | "work for records where some inner fields are wrapped and some aren't" in { 80 | testGetValidationTypeFromCaseClass(classOf[HasMixedWrapping]) 81 | } 82 | 83 | "getValidationTypeFromCaseClass" should "work for repeated fields" in { 84 | val name = ElitzurMetrics 85 | .getValidationTypeFromCaseClass(classOf[RepeatedTest], "repeated") 86 | name shouldBe "CountryCodeTesting" 87 | } 88 | 89 | "getValidationTypeFromCaseClass" should "work for repeated records" in { 90 | val name = ElitzurMetrics 91 | .getValidationTypeFromCaseClass(classOf[RepeatedTest], "nested.inner") 92 | name shouldBe "CountryCodeTesting" 93 | } 94 | } 95 | //scalastyle:on no.whitespace.before.left.bracket 96 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/Implicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro 18 | 19 | import com.spotify.scio.coders.Coder 20 | import org.apache.avro.Schema 21 | import org.apache.avro.generic.GenericContainer 22 | import enumeratum._ 23 | 24 | import scala.reflect.ClassTag 25 | 26 | trait Implicits { 27 | import AvroConverter._ 28 | import com.spotify.elitzur.validators._ 29 | 30 | implicit val intT: PrimitiveConverter[Int] = new PrimitiveConverter[Int] 31 | implicit val longT: PrimitiveConverter[Long] = new PrimitiveConverter[Long] 32 | implicit val doubleT: PrimitiveConverter[Double] = new PrimitiveConverter[Double] 33 | implicit val floatT: PrimitiveConverter[Float] = new PrimitiveConverter[Float] 34 | implicit val boolT: PrimitiveConverter[Boolean] = new PrimitiveConverter[Boolean] 35 | implicit val arrBT: AvroConverter[Array[Byte]] = new AvroConverter[Array[Byte]] { 36 | override def fromAvro(v: Any, schema: Schema, doc: Option[String] = None): Array[Byte] = { 37 | val bb = v.asInstanceOf[java.nio.ByteBuffer] 38 | AvroElitzurConversionUtils.byteBufferToByteArray(bb) 39 | } 40 | 41 | override def toAvro(v: Array[Byte], schema: Schema): Any = { 42 | java.nio.ByteBuffer.wrap(v) 43 | } 44 | 45 | override def toAvroDefault(v: Array[Byte], defaultGenericContainer: GenericContainer): Any = { 46 | java.nio.ByteBuffer.wrap(v) 47 | } 48 | } 49 | // we can't use a primitive converter here because we need to be able to convert strings of the 50 | // type: org.apache.avro.util.Utf8 (a subclass of Charsequence) which cannot be cast to a string 51 | implicit val stringT: AvroConverter[String] = new AvroConverter[String] { 52 | override def fromAvro(v: Any, schema: Schema, doc: Option[String]): String = v.toString 53 | 54 | override def toAvro(v: String, schema: Schema): Any = v 55 | 56 | override def toAvroDefault(v: String, defaultGenericContainer: GenericContainer): Any = v 57 | } 58 | 59 | //scalastyle:off line.size.limit structural.type 60 | implicit def simpleTypeConverter[LT <: BaseValidationType[T]: ({type L[x] = SimpleCompanionImplicit[T, x]})#L,T: AvroConverter] 61 | : AvroConverter[LT] = 62 | new AvroSimpleTypeConverter[LT, T] 63 | 64 | implicit def dynamicTypeConverter[LT <: DynamicValidationType[T, _, LT]: ({type L[x] = DynamicCompanionImplicit[T, _, x]})#L, T: AvroConverter] 65 | : AvroConverter[LT] = 66 | new AvroDynamicTypeConverter[LT, T] 67 | //scalastyle:on line.size.limit structural.type 68 | 69 | implicit def validationTypeOptionConverter[T <: BaseValidationType[_]: AvroConverter] 70 | : AvroConverter[Option[T]] = 71 | new AvroOptionConverter[T] 72 | 73 | implicit def wrappedValidationTypeConverter[T <: BaseValidationType[_]: AvroConverter] 74 | : AvroConverter[ValidationStatus[T]] = 75 | new AvroStatusConverter[T] 76 | 77 | implicit def statusOptionEncryptionValidator[T <: BaseValidationType[_]: AvroConverter] 78 | : AvroConverter[ValidationStatus[Option[T]]] = 79 | new AvroStatusOptionConverter[T] 80 | 81 | implicit def optionConverter[T: AvroConverter]: AvroConverter[Option[T]] = 82 | new OptionConverter[T] 83 | 84 | implicit def wrappedValidationConverter[T: AvroConverter]: AvroConverter[ValidationStatus[T]] = 85 | new AvroWrappedValidationConverter[T] 86 | 87 | implicit def seqConverter[T: AvroConverter: Coder: ClassTag]: AvroConverter[Seq[T]] = 88 | new AvroSeqConverter[T, Seq](() => Seq.newBuilder[T]) 89 | 90 | implicit def listConverter[T: AvroConverter: Coder: ClassTag]: AvroConverter[List[T]] = 91 | new AvroSeqConverter[T, List](() => List.newBuilder[T]) 92 | 93 | implicit def vectorConverter[T: AvroConverter: Coder: ClassTag]: AvroConverter[Vector[T]] = 94 | new AvroSeqConverter[T, Vector](() => Vector.newBuilder[T]) 95 | 96 | implicit def arrayConverter[T: AvroConverter: Coder: ClassTag]: AvroConverter[Array[T]] = 97 | new AvroSeqConverter[T, Array](() => Array.newBuilder[T]) 98 | 99 | implicit def enumConverter[T <: enumeratum.EnumEntry: Enum]: AvroConverter[T] = 100 | new AvroEnumConverter[T] 101 | 102 | } 103 | -------------------------------------------------------------------------------- /elitzur-examples/src/main/scala/com/spotify/elitzur/examples/AvroBasic.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.examples 18 | 19 | import com.spotify.elitzur._ 20 | import com.spotify.elitzur.validators._ 21 | import com.spotify.elitzur.examples.Companions._ 22 | import com.spotify.elitzur.schemas.{InnerNestedType, TestAvroOut, TestAvroTypes} 23 | import com.spotify.elitzur.validators._ 24 | import com.spotify.elitzur.converters.avro._ 25 | 26 | // Example: Common use case of Elitzur is to have a schematized dataset, e.g. Avro, Protobuf, 27 | // BigQuery, etc and transform to validation types. For example a "CountryCode" field is a string 28 | // in the avro schema and we convert it to a scala case class with a CountryCode type that has a 29 | // built in validator. Elitzur will handle the logic to convert from the avro schema to the 30 | // scala case class. Then we use the use the built in validator to check which records are valid 31 | 32 | object AvroBasic { 33 | 34 | // Age, NonNegativeLong and CountryCode are types that we validate 35 | // userFloat is a field which we choose to not validate. 36 | case class User( 37 | userAge: Age, 38 | userLong: NonNegativeLong, 39 | userFloat: Float, 40 | inner: InnerNested 41 | ) 42 | 43 | case class InnerNested(countryCode: CountryCode) 44 | 45 | // A MetricsReporter is needed to keep track of how many records are valid and invalid 46 | // For Scio use cases, one does not need to define a MetricsReporter 47 | implicit val metricsReporter: MetricsReporter = new MetricsReporter { 48 | val map : scala.collection.mutable.Map[String, Int] = 49 | scala.collection.mutable.Map[String, Int]().withDefaultValue(0) 50 | override def reportValid(className: String, fieldName: String, validationType: String): Unit = 51 | map(s"$className.$fieldName.$validationType.valid") += 1 52 | override def reportInvalid(className: String, fieldName: String, validationType: String): Unit = 53 | map(s"$className.$fieldName.$validationType.invalid") += 1 54 | override def toString: String = map.toString() 55 | } 56 | 57 | // Deserialized avro records. 58 | //scalastyle:off magic.number 59 | val builder :TestAvroTypes.Builder = TestAvroTypes.newBuilder() 60 | val innerBuilder : InnerNestedType.Builder = InnerNestedType.newBuilder() 61 | val avroRecords: Seq[TestAvroTypes] = Seq( 62 | // record with all fields valid 63 | builder 64 | .setUserAge(33L) 65 | .setUserLong(45L) 66 | .setUserFloat(4f) 67 | .setInner(innerBuilder.setCountryCode("US").setUserId("182").setPlayCount(72L).build()) 68 | .build(), 69 | // record with invalid age 70 | builder 71 | .setUserAge(-33L) 72 | .setUserLong(45L) 73 | .setUserFloat(4f) 74 | .setInner(innerBuilder.setCountryCode("CA").setUserId("129").setPlayCount(43L).build()) 75 | .build(), 76 | // record with invalid country code 77 | builder 78 | .setUserAge(33L) 79 | .setUserLong(45L) 80 | .setUserFloat(4f) 81 | .setInner(innerBuilder.setCountryCode("USA").setUserId("678").setPlayCount(201L).build()) 82 | .build() 83 | ) 84 | //scalastyle:on magic.number 85 | 86 | val c: AvroConverter[User] = implicitly[AvroConverter[User]] 87 | val v: Validator[User] = implicitly[Validator[User]] 88 | 89 | def main(args: Array[String]): Unit = { 90 | avroRecords 91 | // transform avro records to arbitrary case class with validation types 92 | // The AvroConverter is smart enough to transform string, longs, etc to Age, CountryCode, 93 | // and NonnegativeLong validation types with arbitrary levels of nesting in the avro schema 94 | .map(a => c.fromAvro(a, TestAvroTypes.SCHEMA$)) 95 | // validate records 96 | .map(a => v.validateRecord(Unvalidated(a))) 97 | // map case classes to avro output 98 | .map(a => c.toAvro(a.forceGet, TestAvroOut.SCHEMA$)) 99 | 100 | // See valid/invalid counts of fields which have been validated 101 | //scalastyle:off regex 102 | println(metricsReporter.toString) 103 | //scalastyle:on regex 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/AvroConverterTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package com.spotify.elitzur 19 | 20 | import org.scalatest.flatspec.AnyFlatSpec 21 | import org.scalatest.matchers.should.Matchers 22 | 23 | import java.nio.ByteBuffer 24 | import org.apache.avro.generic.GenericData 25 | import org.apache.avro.specific.SpecificData 26 | import org.apache.avro.message.{BinaryMessageEncoder, BinaryMessageDecoder} 27 | import com.spotify.elitzur.converters.avro.AvroConverter 28 | 29 | import enumeratum.EnumEntry.Snakecase 30 | import enumeratum._ 31 | import org.apache.avro.Schema 32 | 33 | object AvroClassConverterTest { 34 | case class TestTypes(userAge: Long, 35 | userFloat: Float, 36 | userLong: Long, 37 | innerOpt: Option[Inner], 38 | inner: Inner) 39 | 40 | case class Inner(userId: String, countryCode: String, playCount: Long) 41 | 42 | sealed trait EnumValue extends EnumEntry with Snakecase 43 | object EnumValue extends Enum[EnumValue] { 44 | val values = findValues 45 | case object SnakeCaseAaa extends EnumValue 46 | case object SnakeCaseBbb extends EnumValue 47 | case object SnakeCaseCcc extends EnumValue 48 | } 49 | case class TestEnum(testEnum: EnumValue, optTestEnum: Option[EnumValue]) 50 | } 51 | 52 | class AvroConverterTest extends AnyFlatSpec with Matchers { 53 | 54 | it should "round-trip via a generic record" in { 55 | import AvroClassConverterTest._ 56 | import com.spotify.elitzur.converters.avro._ 57 | import com.spotify.elitzur.schemas.TestAvroEnum 58 | 59 | val schema: Schema = TestAvroEnum.getClassSchema 60 | val converter: AvroConverter[TestEnum] = implicitly 61 | val decoder = new BinaryMessageDecoder[TestAvroEnum](new SpecificData(), schema) 62 | 63 | val a: TestEnum = TestEnum(EnumValue.SnakeCaseBbb, Some(EnumValue.SnakeCaseCcc)) 64 | val rec: GenericData.Record = converter.toAvro(a, schema).asInstanceOf[GenericData.Record] 65 | val encoder = new BinaryMessageEncoder[GenericData.Record](new GenericData(), schema) 66 | val bytes: ByteBuffer = encoder.encode(rec) 67 | val converted: TestAvroEnum = decoder.decode(bytes) 68 | 69 | val b: TestEnum = converter.fromAvro(converted, schema) 70 | assert(a == b) 71 | } 72 | 73 | it should "round-trip enumeratum enums" in { 74 | import AvroClassConverterTest._ 75 | import com.spotify.elitzur.converters.avro._ 76 | import com.spotify.elitzur.schemas.TestAvroEnum 77 | 78 | val converter: AvroConverter[TestEnum] = implicitly 79 | val schema: Schema = TestAvroEnum.getClassSchema 80 | 81 | val a: TestEnum = TestEnum(EnumValue.SnakeCaseBbb, Some(EnumValue.SnakeCaseCcc)) 82 | val b: TestEnum = converter.fromAvro(converter.toAvro(a, schema), schema) 83 | assert(a == b) 84 | 85 | val c: TestEnum = TestEnum(EnumValue.SnakeCaseAaa, None) 86 | val d: TestEnum = converter.fromAvro(converter.toAvro(c, schema), schema) 87 | assert(c == d) 88 | } 89 | 90 | it should "work on nested optional records w/toAvro" in { 91 | import AvroClassConverterTest._ 92 | import com.spotify.elitzur.converters.avro._ 93 | import com.spotify.elitzur.schemas._ 94 | 95 | val a: TestTypes = TestTypes(0L, 0F, 0L, Some(Inner("", "", 0L)), Inner("", "", 0L)) 96 | val converter: AvroConverter[TestTypes] = implicitly 97 | converter.toAvro(a, TestAvroTypes.getClassSchema) 98 | } 99 | 100 | it should "work on nested optional records w/toAvroDefault" in { 101 | import AvroClassConverterTest._ 102 | import com.spotify.elitzur.converters.avro._ 103 | import com.spotify.elitzur.schemas._ 104 | 105 | val a: TestTypes = TestTypes(0L, 0F, 0L, Some(Inner("", "", 0L)), Inner("", "", 0L)) 106 | val converter: AvroConverter[TestTypes] = implicitly 107 | 108 | val inner = InnerNestedType.newBuilder() 109 | .setUserId("") 110 | .setCountryCode("") 111 | .setPlayCount(0L) 112 | .build() 113 | 114 | val testAvroTypeR = TestAvroTypes.newBuilder() 115 | .setUserAge(0L) 116 | .setUserFloat(0F) 117 | .setUserLong(0L) 118 | .setInnerOpt(inner) 119 | .setInner(inner) 120 | .build() 121 | 122 | converter.toAvroDefault(a, testAvroTypeR) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /benchmarking/src/main/scala/com/spotify/elitzur/ValidationTypeTestingExamples.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import java.util.Locale 20 | 21 | import com.spotify.elitzur.types.Owner 22 | import com.spotify.elitzur.validators.{BaseCompanion, BaseValidationType, SimpleCompanionImplicit} 23 | 24 | /** 25 | * This file contains 'fake' ValidationType & related implementations, used only to test that 26 | * validation runs consistently. 27 | * 28 | * Unfortunately we can't make this private, 29 | * because our Override Type Provider is called from Scio and not within elitzur 30 | */ 31 | 32 | /* OWNERS */ 33 | 34 | private[this] case object Blizzard extends Owner { 35 | override def name: String = "Blizzard" 36 | } 37 | 38 | 39 | /* VALIDATION TYPES */ 40 | 41 | object Companions { 42 | implicit val ageC: SimpleCompanionImplicit[Long, AgeExample] = 43 | SimpleCompanionImplicit(AgeExampleCompanion) 44 | implicit val ccC: SimpleCompanionImplicit[String, CountryCodeExample] = 45 | SimpleCompanionImplicit(CountryCodeExampleCompanion) 46 | implicit val nnlC: SimpleCompanionImplicit[Long, NonNegativeLongExample] = 47 | SimpleCompanionImplicit(NonNegativeLongExampleCompanion) 48 | implicit val brC: SimpleCompanionImplicit[String, BucketizedReferrerExample] = 49 | SimpleCompanionImplicit(BucketizedReferrerExampleCompanion) 50 | } 51 | 52 | 53 | case class CountryCodeExample(data: String) 54 | extends BaseValidationType[String] { 55 | override def checkValid: Boolean = Locale.getISOCountries.contains(data) 56 | } 57 | 58 | object CountryCodeExampleCompanion extends BaseCompanion[String, CountryCodeExample] { 59 | def validationType: String = "CountryCode" 60 | 61 | def bigQueryType: String = "STRING" 62 | 63 | def apply(data: String): CountryCodeExample = CountryCodeExample(data) 64 | 65 | def parse(data: String): CountryCodeExample = CountryCodeExample(data) 66 | 67 | def owner: Owner = Blizzard 68 | 69 | def description: String = "Represents an ISO standard two-letter country code" 70 | } 71 | 72 | 73 | 74 | case class AgeExample(data: Long) 75 | extends BaseValidationType[Long] { 76 | override def checkValid: Boolean = data > 0L && data < 150L 77 | } 78 | 79 | object AgeExampleCompanion extends BaseCompanion[Long, AgeExample] { 80 | def validationType: String = "Age" 81 | 82 | def bigQueryType: String = "INTEGER" 83 | 84 | def apply(data: Long): AgeExample = AgeExample(data) 85 | 86 | def parse(data: Long): AgeExample = AgeExample(data) 87 | 88 | def owner: Owner = Blizzard 89 | 90 | def description: String = "This represents an age above 0 and less than 150" 91 | } 92 | 93 | 94 | case class NonNegativeLongExample(data: Long) 95 | extends BaseValidationType[Long] { 96 | override def checkValid: Boolean = data >= 0L 97 | } 98 | 99 | object NonNegativeLongExampleCompanion extends BaseCompanion[Long, NonNegativeLongExample] { 100 | def validationType: String = "NonNegativeLong" 101 | 102 | def bigQueryType: String = "INTEGER" 103 | 104 | def apply(data: Long): NonNegativeLongExample = NonNegativeLongExample(data) 105 | 106 | def parse(data: Long): NonNegativeLongExample = NonNegativeLongExample(data) 107 | 108 | override def owner: Owner = Blizzard 109 | 110 | override def description: String = "Non negative long" 111 | } 112 | 113 | case class BucketizedReferrerExample(data: String) 114 | extends BaseValidationType[String] { 115 | private val values: Set[String] = Set( 116 | "home", 117 | "your_library", 118 | "search", 119 | "browse", 120 | "radio", 121 | "other") 122 | 123 | override def checkValid: Boolean = values.contains(data) 124 | } 125 | 126 | object BucketizedReferrerExampleCompanion 127 | extends BaseCompanion[String, BucketizedReferrerExample] { 128 | def validationType: String = "BucketizedReferrer" 129 | 130 | def bigQueryType: String = "STRING" 131 | 132 | def apply(data: String): BucketizedReferrerExample = BucketizedReferrerExample(data) 133 | 134 | def parse(data: String): BucketizedReferrerExample = BucketizedReferrerExample(data) 135 | 136 | def owner: Owner = Blizzard 137 | 138 | def description: String = "The page/tab in the mobile or desktop App from which stream was " + 139 | "initiated. Possible values: home, your_library, search, browse, radio, other" 140 | } 141 | -------------------------------------------------------------------------------- /elitzur-avro/src/main/scala/com/spotify/elitzur/converters/avro/dynamic/DynamicAccessorCompanion.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.converters.avro.dynamic 18 | 19 | import com.spotify.elitzur.MetricsReporter 20 | import com.spotify.elitzur.converters.avro.AvroElitzurConversionUtils.byteBufferToByteArray 21 | import com.spotify.elitzur.validators.{ 22 | BaseCompanion, 23 | BaseValidationType, 24 | SimpleCompanionImplicit, 25 | Validator 26 | } 27 | 28 | import scala.reflect.ClassTag 29 | import scala.reflect.runtime.universe.{TypeTag, typeOf} 30 | import java.{util => ju} 31 | import collection.JavaConverters._ 32 | 33 | //scalastyle:off line.size.limit structural.type 34 | class DynamicAccessorCompanion[T: TypeTag, LT <: BaseValidationType[T]: ClassTag: ({type L[x] = SimpleCompanionImplicit[T, x]})#L] extends Serializable { 35 | //scalastyle:on line.size.limit structural.type 36 | 37 | private val companion: BaseCompanion[T, LT] = 38 | implicitly[SimpleCompanionImplicit[T, LT]].companion 39 | private[dynamic] val validationType: String = companion.validationType 40 | private def parseUnsafe(v: Any): Any = companion.parse(v.asInstanceOf[T]) 41 | 42 | @transient private lazy val preParserProcessor: Any => Any = 43 | typeOf[T] match { 44 | // String in Avro can be stored as org.apache.avro.util.Utf8 (a subclass of Charsequence) 45 | // which cannot be cast to String as-is. The toString method is added to ensure casting. 46 | case t if t =:= typeOf[String] => (v: Any) => v.toString 47 | // ByteBuffer in Avro to be converted into Array[Byte] which is the the format that Validation 48 | // type expects the input the input to be in. 49 | case t if t =:= typeOf[Array[Byte]] => 50 | (v: Any) => byteBufferToByteArray(v.asInstanceOf[java.nio.ByteBuffer]) 51 | case _ => (v: Any) => v 52 | } 53 | 54 | def parseAvro: Any => Any = (v: Any) => parseUnsafe(preParserProcessor(v)) 55 | 56 | // TODO: Optimize the method below by introducing changes to Elitzur-Core to allow non-implicit 57 | // driven wiring of Validators 58 | //scalastyle:off line.size.limit 59 | private[dynamic] def getValidator(modifiers: List[ValidatorOp])(implicit m: MetricsReporter): Validator[Any] = { 60 | //scalastyle:on line.size.limit 61 | modifiers match { 62 | case Nil => implicitly[Validator[LT]].asInstanceOf[Validator[Any]] 63 | case OptionValidatorOp :: Nil => implicitly[Validator[Option[LT]]] 64 | .asInstanceOf[Validator[Any]] 65 | case ArrayValidatorOp :: Nil => implicitly[Validator[Seq[LT]]] 66 | .asInstanceOf[Validator[Any]] 67 | case ArrayValidatorOp :: OptionValidatorOp :: Nil => implicitly[Validator[Seq[Option[LT]]]] 68 | .asInstanceOf[Validator[Any]] 69 | case OptionValidatorOp :: ArrayValidatorOp :: Nil => implicitly[Validator[Option[Seq[LT]]]] 70 | .asInstanceOf[Validator[Any]] 71 | case OptionValidatorOp :: ArrayValidatorOp :: OptionValidatorOp :: Nil => 72 | implicitly[Validator[Option[Seq[Option[LT]]]]].asInstanceOf[Validator[Any]] 73 | case _ => throw new Exception(s"Unsupported validator operation: ${modifiers.mkString(",")}") 74 | } 75 | } 76 | 77 | /** 78 | * The expected input for the dynamic Elitzur is in the form ".path.to.field:thisValidator". 79 | * The first part of the input (".path.to.field") is used by the DSL to generate an output 80 | * of Any. Before this output can be mapped to the Elitzur-Core's Validation loop, the output 81 | * should be wrapped inside of the validator object 'thisValidator'. A few example of what 82 | * the method below could output includes: thisValidator(Any), Some(thisValidator(Any)) and 83 | * List(thisValidator(Any)). 84 | */ 85 | private[dynamic] def getPreprocessorForValidator(modifiers: List[ValidatorOp]): Any => Any = { 86 | modifiers.reverse.foldLeft(parseAvro)((a, c) => (v: Any) => c.preprocessorOp(v, a)) 87 | } 88 | } 89 | 90 | 91 | trait ValidatorOp extends Serializable { 92 | def preprocessorOp(v: Any, fn: Any => Any): Any 93 | } 94 | 95 | object OptionValidatorOp extends ValidatorOp { 96 | def preprocessorOp(v: Any, fn: Any => Any): Any = Option(v).map(fn) 97 | } 98 | 99 | case object ArrayValidatorOp extends ValidatorOp { 100 | def preprocessorOp(v: Any, fn: Any => Any): Any = v.asInstanceOf[ju.List[Any]].asScala.map(fn) 101 | } 102 | -------------------------------------------------------------------------------- /elitzur-avro/src/test/scala/com/spotify/elitzur/helpers/DynamicAccessorValidationUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package com.spotify.elitzur.helpers 19 | 20 | import com.spotify.elitzur.MetricsReporter 21 | import com.spotify.elitzur.converters.avro.dynamic.{DynamicAccessorValidator, DynamicFieldParser} 22 | import com.spotify.elitzur.validators._ 23 | import com.spotify.elitzur.types.Owner 24 | 25 | import java.util.Locale 26 | 27 | case object Blizzard extends Owner { 28 | override def name: String = "Blizzard" 29 | } 30 | 31 | object Companions { 32 | implicit val ccC: SimpleCompanionImplicit[String, CountryCode] = 33 | SimpleCompanionImplicit(CountryCompanion) 34 | implicit val nnlC: SimpleCompanionImplicit[Long, NonNegativeLong] = 35 | SimpleCompanionImplicit(NonNegativeLongCompanion) 36 | } 37 | 38 | 39 | case class NonNegativeLong(data: Long) extends BaseValidationType[Long] { 40 | override def checkValid: Boolean = data >= 0L 41 | } 42 | 43 | object NonNegativeLongCompanion extends BaseCompanion[Long, NonNegativeLong] { 44 | def validationType: String = "NonNegativeLong" 45 | 46 | def bigQueryType: String = "INTEGER" 47 | 48 | def apply(data: Long): NonNegativeLong = NonNegativeLong(data) 49 | 50 | def parse(data: Long): NonNegativeLong = NonNegativeLong(data) 51 | 52 | override def owner: Owner = Blizzard 53 | 54 | override def description: String = "Non negative long" 55 | } 56 | 57 | case class NonNegativeDouble(data: Double) extends BaseValidationType[Double] { 58 | override def checkValid: Boolean = data >= 0.0 59 | } 60 | 61 | object NonNegativeDoubleCompanion extends BaseCompanion[Double, NonNegativeDouble] { 62 | def validationType: String = "NonNegativeDouble" 63 | 64 | def bigQueryType: String = "FLOAT" 65 | 66 | def apply(data: Double): NonNegativeDouble = NonNegativeDouble(data) 67 | 68 | def parse(data: Double): NonNegativeDouble = NonNegativeDouble(data) 69 | 70 | override def owner: Owner = Blizzard 71 | 72 | override def description: String = "Non negative double" 73 | } 74 | 75 | case class CountryCode(data: String) extends BaseValidationType[String] { 76 | override def checkValid: Boolean = Locale.getISOCountries.contains(data) 77 | } 78 | 79 | object CountryCompanion extends BaseCompanion[String, CountryCode] { 80 | def validationType: String = "CountryCode" 81 | 82 | def bigQueryType: String = "STRING" 83 | 84 | def apply(data: String): CountryCode = CountryCode(data) 85 | 86 | def parse(data: String): CountryCode = CountryCode(data) 87 | 88 | def description: String = "Represents an ISO standard two-letter country code" 89 | 90 | def owner: Owner = Blizzard 91 | } 92 | 93 | object DynamicAccessorValidatorTestUtils { 94 | class TestMetricsReporter extends MetricsReporter { 95 | val map: scala.collection.mutable.Map[String, Int] = 96 | scala.collection.mutable.Map[String, Int]().withDefaultValue(0) 97 | override def reportValid(className: String, fieldName: String, validationType: String): Unit = 98 | map(s"$className.$fieldName.$validationType.valid") += 1 99 | override def reportInvalid(className: String, fieldName: String, validationType: String): Unit = 100 | map(s"$className.$fieldName.$validationType.invalid") += 1 101 | override def toString: String = map.toString() 102 | def getValid(className: String, fieldName: String, validationType: String): Int = 103 | map(s"$className.$fieldName.$validationType.valid") 104 | def getInvalid(className: String, fieldName: String, validationType: String): Int = 105 | map(s"$className.$fieldName.$validationType.invalid") 106 | def cleanSlate(): Unit = map.clear() 107 | } 108 | 109 | def metricsReporter(): MetricsReporter = new TestMetricsReporter 110 | } 111 | 112 | class DynamicAccessorValidationHelpers( 113 | input: Array[DynamicFieldParser])(implicit metricsReporter: MetricsReporter){ 114 | val dynamicRecordValidator = new DynamicAccessorValidator(input)(metricsReporter) 115 | 116 | def getValidAndInvalidCounts(fieldLabel: String, c: BaseCompanion[_, _]): (Int, Int) = { 117 | val m = metricsReporter.asInstanceOf[DynamicAccessorValidatorTestUtils.TestMetricsReporter] 118 | val args = ( 119 | dynamicRecordValidator.className, 120 | fieldLabel, 121 | c.validationType 122 | ) 123 | ((m.getValid _).tupled(args), (m.getInvalid _).tupled(args)) 124 | } 125 | } 126 | 127 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/ValidatorMacros.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | import scala.reflect.macros._ 20 | 21 | @SuppressWarnings(Array("org.wartremover.warts.StringPlusAny")) 22 | private[elitzur] object ValidatorMacros { 23 | 24 | private[this] val ShowWarnDefault = true 25 | private[this] val ShowWarnSettingRegex = "show-validator-fallback=(true|false)".r 26 | 27 | // Add a level of indirection to prevent the macro from capturing 28 | // $outer which would make the Coder serialization fail 29 | def wrappedValidator[T: c.WeakTypeTag](c: whitebox.Context): c.Tree = { 30 | import c.universe._ 31 | 32 | val magTree = magnolia1.Magnolia.gen[T](c) 33 | 34 | def getLazyVal = 35 | magTree match { 36 | case q"lazy val $_ = $body; $_" => 37 | body 38 | 39 | case q"val $_ = $body; $_" => 40 | body 41 | } 42 | 43 | // Remove annotations from magnolia since they are 44 | // not serializable and we don't use them anyway 45 | // scalastyle:off line.size.limit 46 | val removeAnnotations = 47 | new Transformer { 48 | override def transform(tree: Tree): c.universe.Tree = { 49 | tree match { 50 | case Apply(AppliedTypeTree(Select(pack, TypeName("CaseClass")), ps), 51 | List(typeName, isObject, isValueClass, params, _)) => 52 | Apply(AppliedTypeTree(Select(pack, TypeName("CaseClass")), ps), 53 | List(typeName, isObject, isValueClass, params, q"""Array()""")) 54 | 55 | case q"""new magnolia1.CaseClass[$tc, $t]($typeName, $isObject, $isValueClass, $params,$_){ $body }""" => 56 | q"""_root_.magnolia1.CaseClass[$tc, $t]($typeName, $isObject, $isValueClass, $params, Array()){ $body }""" 57 | 58 | case q"com.spotify.elitzur.Validator.split(new magnolia1.SealedTrait($name, $subtypes, $_))" => 59 | q"_root_.com.spotify.elitzur.Validator.split(new magnolia1.SealedTrait($name, $subtypes, Array()))" 60 | 61 | case q"""magnolia1.Magnolia.param[$tc, $t, $p]($name, $idx, $repeated, $tcParam, $defaultVal, $_)""" => 62 | q"""_root_.magnolia1.Magnolia.param[$tc, $t, $p]($name, $idx, $repeated, $tcParam, $defaultVal, Array())""" 63 | 64 | case _ => 65 | super.transform(tree) 66 | } 67 | } 68 | } 69 | // scalastyle:on line.size.limit 70 | val validator = removeAnnotations.transform(getLazyVal) 71 | 72 | validator 73 | } 74 | // scalastyle:on method.length 75 | 76 | //scalastyle:off line.size.limit 77 | /** 78 | * Makes it possible to configure fallback warnings by passing 79 | * "-Xmacro-settings:show-validator-fallback=true" as a Scalac option. 80 | * Stolen from scio here: 81 | * https://github.com/spotify/scio/blob/9379a2b8a6a6b30963841700f99ca2cf04857172/scio-macros/src/main/scala/com/spotify/scio/coders/CoderMacros.scala 82 | */ 83 | //scalastyle:on line.size.limit 84 | private[this] def showWarn(c: whitebox.Context) = 85 | c.settings 86 | .collectFirst { 87 | case ShowWarnSettingRegex(value) => 88 | value.toBoolean 89 | } 90 | .getOrElse(ShowWarnDefault) 91 | 92 | 93 | def issueFallbackWarning[T: c.WeakTypeTag](c: whitebox.Context): c.Tree = { 94 | import c.universe._ 95 | 96 | val wtt = weakTypeOf[T] 97 | val TypeRef(_, sym, args) = wtt 98 | val typeName = sym.name 99 | val params = args.headOption 100 | .map { _ => 101 | args.mkString("[", ",", "]") 102 | } 103 | .getOrElse("") 104 | val fullType = typeName.toString + params 105 | 106 | val warning = 107 | s""" 108 | | Warning: No implicit Validator found for the following type: 109 | | 110 | | >> $wtt 111 | | 112 | | You can add a Validator for this type like this: 113 | | 114 | | implicit val = new IgnoreValidator[$fullType] 115 | | 116 | | If this is a primitive or a type other people use please consider contributing this 117 | | back to elitzur 118 | | """.stripMargin 119 | 120 | val shouldWarn = showWarn(c) 121 | // TODO this doesn't show up when using c.warning. We might want to use that 122 | if (shouldWarn) c.echo(c.enclosingPosition, warning) 123 | 124 | val fallback = q"""new _root_.com.spotify.elitzur.validators.IgnoreValidator[$wtt]""" 125 | fallback 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 2 | Scalastyle standard configuration 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /elitzur-scio/src/main/scala/com/spotify/elitzur/scio/ElitzurMetrics.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.scio 18 | 19 | import java.lang.reflect.{ParameterizedType, Type} 20 | import java.lang.{StringBuilder => JStringBuilder} 21 | 22 | import com.spotify.elitzur.CounterTypes 23 | import com.spotify.elitzur.validators.ValidationStatus 24 | import com.spotify.scio.{ScioMetrics, ScioResult, metrics} 25 | import org.apache.beam.sdk.metrics.{Counter, MetricName} 26 | 27 | import scala.annotation.tailrec 28 | import scala.collection.compat.immutable.ArraySeq 29 | 30 | object ElitzurMetrics { 31 | 32 | /** construct Beam counter from parts of counter name */ 33 | def getCounter(className: String, 34 | fieldName: String, 35 | validationType: String, 36 | state: CounterTypes.Value): Counter = { 37 | val stateStr = state.toString 38 | val sb = 39 | new JStringBuilder(fieldName.length + 1 + validationType.length + 8 + stateStr.length) 40 | // This method is called very frequently (per-elitzur field per record) and building strings 41 | // the scala way is slower than expected (it seems to create multiple string builders) 42 | val counterName = 43 | sb 44 | .append(fieldName) 45 | .append("/") 46 | .append(validationType) 47 | .append("/Elitzur") 48 | .append(stateStr) 49 | .toString 50 | ScioMetrics.counter(className, counterName) 51 | } 52 | 53 | private[elitzur] def getClassNameFromCounterName(counterName: String): String = 54 | counterName.split("/")(0) 55 | 56 | private[elitzur] def getFieldNameFromCounterName(counterName: String): String = 57 | counterName.split("/")(1) 58 | 59 | private[elitzur] def getValidationTypeFromCounterName(counterName: String): String = 60 | counterName.split("/")(2) 61 | 62 | private[elitzur] def getCounterTypeFromCounterName(counterName: String): String = 63 | counterName.split("/")(3) 64 | 65 | private[elitzur] def getValidationTypeFromCaseClass(className: Class[_], fieldName: String) 66 | : String = 67 | getValidationTypeFromCaseClass( 68 | className, ArraySeq.unsafeWrapArray(fieldName.split("\\.")) 69 | ).getSimpleName 70 | 71 | private def getParameterizedInnerType(genericType: Type): Type = { 72 | // removes one layer of type nesting from reflection 73 | // workaround found via https://stackoverflow.com/a/11165045 74 | genericType 75 | .asInstanceOf[ParameterizedType] 76 | .getActualTypeArguments()(0) 77 | } 78 | 79 | private def unwrapOptionType(optType: Type): Class[_] = 80 | getParameterizedInnerType(optType).asInstanceOf[Class[_]] 81 | 82 | private def unwrapValidationStatus(vsType: Type): Class[_] = { 83 | // assume we either have a ValidationStatus[Option[T]] or a ValidationStatus[T] 84 | val innerType = getParameterizedInnerType(vsType) 85 | innerType match { 86 | case it: Class[_] => 87 | // if innerType is a Class[_] then the nested type wasn't itself parameterized 88 | it 89 | case pt: ParameterizedType => 90 | // we can't cast this to a Class, so it's an Option[T], remove one layer and cast that 91 | getParameterizedInnerType(pt).asInstanceOf[Class[_]] 92 | } 93 | } 94 | 95 | // scalastyle:off cyclomatic.complexity 96 | @tailrec 97 | private def getValidationTypeFromCaseClass(caseClassClass: Class[_], 98 | fieldNames: Seq[String]): Class[_] = { 99 | 100 | val firstFieldName = fieldNames(0) 101 | val firstFieldClass: Class[_] = caseClassClass.getDeclaredField(firstFieldName).getType 102 | val firstFieldGenericType: Type = caseClassClass.getDeclaredField(firstFieldName).getGenericType 103 | 104 | val isOption = classOf[Option[_]].equals(firstFieldClass) 105 | val isWrapped = classOf[ValidationStatus[_]].isAssignableFrom(firstFieldClass) 106 | val isSeq = classOf[Seq[_]].isAssignableFrom(firstFieldClass) 107 | 108 | fieldNames match { 109 | case Seq(_) if isWrapped => 110 | unwrapValidationStatus(firstFieldGenericType) 111 | case Seq(_) if isOption || isSeq => 112 | // remove one layer of parameterization only 113 | getParameterizedInnerType(firstFieldGenericType).asInstanceOf[Class[_]] 114 | case Seq(_) => 115 | // no parameterization 116 | firstFieldClass 117 | case Seq(_, tail@_*) if isOption || isSeq => 118 | getValidationTypeFromCaseClass(unwrapOptionType(firstFieldGenericType), tail) 119 | case Seq(_, tail@_*) if isWrapped => 120 | getValidationTypeFromCaseClass(unwrapValidationStatus(firstFieldGenericType), tail) 121 | case Seq(_, tail@_*) => 122 | getValidationTypeFromCaseClass(firstFieldClass, tail) 123 | } 124 | } 125 | 126 | // scalastyle:on cyclomatic.complexity 127 | 128 | /** return subset of all Scio counters named with Elitzur */ 129 | def getElitzurCounters(sr: ScioResult): Map[MetricName, metrics.MetricValue[Long]] = { 130 | sr.allCounters 131 | .filter(counter => counter._1.toString.contains("Elitzur")) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /elitzur-core/src/test/scala/com/spotify/elitzur/validators/DynamicRecordValidatorTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package com.spotify.elitzur.validators 19 | 20 | import org.scalatest.flatspec.AnyFlatSpec 21 | import org.scalatest.matchers.should.Matchers 22 | import java.util.Locale 23 | 24 | import com.spotify.elitzur.MetricsReporter 25 | import com.spotify.elitzur.types.Owner 26 | import com.spotify.elitzur.validators.DynamicRecordValidatorTest.TestMetricsReporter 27 | 28 | case object Blizzard extends Owner { 29 | override def name: String = "Blizzard" 30 | } 31 | 32 | object Companions { 33 | implicit val nnlC: SimpleCompanionImplicit[Long, NonNegativeLong] = 34 | SimpleCompanionImplicit(NonNegativeLongCompanion) 35 | } 36 | 37 | case class NonNegativeLong(data: Long) extends BaseValidationType[Long] { 38 | override def checkValid: Boolean = data >= 0L 39 | } 40 | 41 | object NonNegativeLongCompanion extends BaseCompanion[Long, NonNegativeLong] { 42 | def validationType: String = "NonNegativeLong" 43 | 44 | def bigQueryType: String = "INTEGER" 45 | 46 | def apply(data: Long): NonNegativeLong = NonNegativeLong(data) 47 | 48 | def parse(data: Long): NonNegativeLong = NonNegativeLong(data) 49 | 50 | override def owner: Owner = Blizzard 51 | 52 | override def description: String = "Non negative long" 53 | } 54 | 55 | object DynamicRecordValidatorTest { 56 | class TestMetricsReporter extends MetricsReporter { 57 | val map : scala.collection.mutable.Map[String, Int] = 58 | scala.collection.mutable.Map[String, Int]().withDefaultValue(0) 59 | override def reportValid(className: String, fieldName: String, validationType: String): Unit = 60 | map(s"$className.$fieldName.$validationType.valid") += 1 61 | override def reportInvalid(className: String, fieldName: String, validationType: String): Unit = 62 | map(s"$className.$fieldName.$validationType.invalid") += 1 63 | override def toString: String = map.toString() 64 | def getValid(className: String, fieldName: String, validationType: String): Int = 65 | map(s"$className.$fieldName.$validationType.valid") 66 | def getInvalid(className: String, fieldName: String, validationType: String): Int = 67 | map(s"$className.$fieldName.$validationType.invalid") 68 | } 69 | def metricsReporter(): MetricsReporter = new TestMetricsReporter 70 | } 71 | 72 | 73 | class DynamicRecordValidatorTest extends AnyFlatSpec with Matchers { 74 | 75 | it should "validate a simple case" in { 76 | val label = "label" 77 | val className = "com.spotify.DynamicClass" 78 | implicit val metricsReporter: MetricsReporter = DynamicRecordValidatorTest.metricsReporter() 79 | val nnlValidator = implicitly[Validator[NonNegativeLong]] 80 | val recordValidator = DynamicRecordValidator( 81 | Array(nnlValidator).asInstanceOf[Array[Validator[Any]]], Array(label)) 82 | recordValidator.validateRecord( 83 | Unvalidated(Seq(NonNegativeLong(1L)).asInstanceOf[Seq[Any]]), 84 | outermostClassName = Some(className) 85 | ) 86 | metricsReporter.asInstanceOf[TestMetricsReporter].getValid( 87 | className, 88 | label, 89 | NonNegativeLongCompanion.validationType 90 | ) shouldEqual 1 91 | } 92 | 93 | it should "validate a repeated value" in { 94 | val label = "label" 95 | val className = "com.spotify.DynamicClass" 96 | implicit val metricsReporter: MetricsReporter = DynamicRecordValidatorTest.metricsReporter() 97 | val repeatedValidator = implicitly[Validator[Seq[NonNegativeLong]]] 98 | val recordValidator = DynamicRecordValidator( 99 | Array(repeatedValidator).asInstanceOf[Array[Validator[Any]]], Array(label)) 100 | recordValidator.validateRecord( 101 | Unvalidated(Seq(Seq(NonNegativeLong(1L), NonNegativeLong(-1L))).asInstanceOf[Seq[Any]]), 102 | outermostClassName = Some(className) 103 | ) 104 | print(metricsReporter.toString) 105 | metricsReporter.asInstanceOf[TestMetricsReporter].getValid( 106 | className, 107 | label, 108 | NonNegativeLongCompanion.validationType 109 | ) shouldEqual 1 110 | metricsReporter.asInstanceOf[TestMetricsReporter].getInvalid( 111 | className, 112 | label, 113 | NonNegativeLongCompanion.validationType 114 | ) shouldEqual 1 115 | } 116 | 117 | it should "validate multiple fields" in { 118 | val label1 = "label1" 119 | val label2 = "label2" 120 | val className = "com.spotify.DynamicClass" 121 | implicit val metricsReporter: MetricsReporter = DynamicRecordValidatorTest.metricsReporter() 122 | val nnlValidator = implicitly[Validator[NonNegativeLong]] 123 | val recordValidator = DynamicRecordValidator( 124 | Array(nnlValidator, nnlValidator).asInstanceOf[Array[Validator[Any]]], Array(label1, label2)) 125 | recordValidator.validateRecord( 126 | Unvalidated(Seq(NonNegativeLong(1L), NonNegativeLong(-1L)).asInstanceOf[Seq[Any]]), 127 | outermostClassName = Some(className) 128 | ) 129 | metricsReporter.asInstanceOf[TestMetricsReporter].getValid( 130 | className, 131 | label1, 132 | NonNegativeLongCompanion.validationType 133 | ) shouldEqual 1 134 | metricsReporter.asInstanceOf[TestMetricsReporter].getInvalid( 135 | className, 136 | label2, 137 | NonNegativeLongCompanion.validationType 138 | ) shouldEqual 1 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /elitzur-core/src/main/scala/com/spotify/elitzur/validators/ValidationStatus.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur.validators 18 | 19 | import scala.util.Try 20 | import scala.collection.compat._ 21 | 22 | private[this] case class PostValidationWrapper[A](inner: A) extends PostValidation[A] { 23 | override def isValid: Boolean = inner.asInstanceOf[PostValidation[_]].isValid 24 | 25 | override def get: A = inner 26 | 27 | override def map[B](f: A => B): ValidationStatus[B] = 28 | throw new NotImplementedError("Call forceGet or inner, not implemented") 29 | 30 | override def flatMap[B](f: A => ValidationStatus[B]): ValidationStatus[B] = 31 | throw new NotImplementedError("Call forceGet or inner, not implemented") 32 | 33 | override def forceGet: A = inner 34 | 35 | override def toOption: Option[A] = 36 | throw new NotImplementedError("Call forceGet or inner, not implemented") 37 | 38 | override def isNonvalidated: Boolean = inner.asInstanceOf[PostValidation[_]].isNonvalidated 39 | 40 | override def toString: String = inner.toString 41 | } 42 | 43 | trait ValidationStatus[+A] extends IterableOnce[A] { 44 | def isValid: Boolean 45 | def isNonvalidated: Boolean 46 | def isPostValidation: Boolean 47 | def get: A 48 | def map[B](f: A => B): ValidationStatus[B] 49 | def flatMap[B](f: A => ValidationStatus[B]): ValidationStatus[B] 50 | 51 | def forceGet: A 52 | 53 | def toOption: Option[A] 54 | 55 | def foreach[U](f: A => U): Unit = f(this.forceGet) 56 | 57 | def isEmpty: Boolean = false 58 | 59 | def hasDefiniteSize: Boolean = true 60 | 61 | def seq: IterableOnce[A] = this 62 | 63 | def forall(p: A => Boolean): Boolean = p(this.forceGet) 64 | 65 | def exists(p: A => Boolean): Boolean = p(this.forceGet) 66 | 67 | def find(p: A => Boolean): Option[A] = 68 | if (p(this.forceGet)) Some(this.forceGet) else None 69 | 70 | def copyToArray[B >: A](xs: Array[B], start: Int, len: Int): Unit = 71 | xs.update(start, this.forceGet) 72 | 73 | def toTraversable: Iterable[A] = this.asInstanceOf[Iterable[A]] 74 | 75 | def isTraversableAgain: Boolean = true 76 | 77 | def toStream: Stream[A] = Stream(this.forceGet) 78 | 79 | def toIterator: Iterator[A] = this.iterator 80 | 81 | def iterator: Iterator[A] = Iterator(this.forceGet) 82 | 83 | } 84 | 85 | abstract class PreValidation[+A] extends ValidationStatus[A] { 86 | def isPostValidation: Boolean = false 87 | 88 | def isInvalid: Boolean = !isValid 89 | } 90 | 91 | abstract class PostValidation[+A] extends ValidationStatus[A] { 92 | def isPostValidation: Boolean = true 93 | 94 | def isInvalid: Boolean = !isValid 95 | 96 | def isNonvalidated: Boolean 97 | } 98 | 99 | case class Unvalidated[+A](x: A) extends PreValidation[A] { 100 | override def isValid: Boolean = false 101 | 102 | override def get: A = throw new Exception("Can't get Unvalidated data, use getOpt") 103 | def getOpt: Option[A] = Some(x) 104 | 105 | override def isNonvalidated: Boolean = false 106 | 107 | override def forceGet: A = x 108 | 109 | override def map[B](f: A => B): ValidationStatus[B] = Unvalidated(f(x)) 110 | 111 | override def flatMap[B](f: A => ValidationStatus[B]): ValidationStatus[B] = f(x) 112 | 113 | override def toOption: Option[A] = Some(x) 114 | 115 | //TODO: Remove this, should not be saving unvalidated data, Only added for simple benchmarking 116 | override def toString: String = x.toString 117 | } 118 | 119 | case class Valid[+A](x: A) extends PostValidation[A] { 120 | def isValid: Boolean = true 121 | def get: A = x 122 | 123 | override def forceGet: A = x 124 | 125 | override def map[B](f: A => B): ValidationStatus[B] = Valid(f(this.x)) 126 | 127 | override def flatMap[B](f: A => ValidationStatus[B]): ValidationStatus[B] = f(this.x) 128 | 129 | override def toOption: Option[A] = Some(x) 130 | 131 | override def isNonvalidated: Boolean = false 132 | 133 | override def toString: String = x.toString 134 | } 135 | 136 | final case class IgnoreValidation[+A](a: A) extends PostValidation[A] { 137 | override def isNonvalidated: Boolean = true 138 | 139 | override def isValid: Boolean = true 140 | 141 | override def get: A = a 142 | 143 | override def map[B](f: A => B): ValidationStatus[B] = IgnoreValidation(f(a)) 144 | 145 | override def flatMap[B](f: A => ValidationStatus[B]): ValidationStatus[B] = f(a) 146 | 147 | override def toOption: Option[A] = Some(a) 148 | 149 | override def forceGet: A = a 150 | } 151 | 152 | final case class Invalid[+A](x: A) extends PostValidation[A] { 153 | override def isValid: Boolean = false 154 | override def get: A = throw new Exception("Can't get Invalid data, use getInvalid") 155 | def getOpt: Option[A] = Try(recover(identity)).toOption 156 | def getInvalid: A = x 157 | override def map[B](f: A => B): ValidationStatus[B] = Invalid(f(x)) 158 | override def flatMap[B](f: A => ValidationStatus[B]): ValidationStatus[B] = f(x) 159 | def recover[B](f: A => B): B = f(x) 160 | def recoverValid[B](f: A => B): Valid[B] = Valid(recover(f)) 161 | 162 | override def toString: String = x.toString 163 | 164 | override def toOption: Option[A] = None 165 | 166 | override def forceGet: A = x 167 | 168 | override def isNonvalidated: Boolean = false 169 | 170 | // Override this so flatten and flatMap skip Invalid records 171 | override def iterator: Iterator[A] = Iterator() 172 | } 173 | -------------------------------------------------------------------------------- /elitzur-core/src/test/scala/com/spotify/elitzur/TestingTypes.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import java.util.Locale 20 | 21 | import com.spotify.elitzur.validators.{BaseCompanion, BaseValidationType, SimpleCompanionImplicit} 22 | 23 | //scalastyle:off line.size.limit 24 | import com.spotify.elitzur.types.Owner 25 | import com.spotify.elitzur.validators.{DynamicCompanion, DynamicCompanionImplicit, DynamicValidationType} 26 | //scalastyle:on line.size.limit 27 | 28 | private[elitzur] object ValidationTypeImplicits { 29 | implicit val ageC: SimpleCompanionImplicit[Long, AgeTesting] = 30 | SimpleCompanionImplicit(AgeTestingCompanion) 31 | 32 | implicit val ccC: SimpleCompanionImplicit[String, CountryCodeTesting] = 33 | SimpleCompanionImplicit(CountryCodeTestingCompanion) 34 | 35 | implicit val nnlC: SimpleCompanionImplicit[Long, NonNegativeLongTesting] = 36 | SimpleCompanionImplicit(NonNegativeLongTestingCompanion) 37 | 38 | implicit val nndC: SimpleCompanionImplicit[Double, NonNegativeDoubleTesting] = 39 | SimpleCompanionImplicit(NonNegativeDoubleTestingCompanion) 40 | 41 | implicit val dtC 42 | : DynamicCompanionImplicit[String, Set[String], DynamicString] = 43 | DynamicCompanionImplicit(DynamicString) 44 | } 45 | 46 | private[this] case object Blizzard extends Owner { 47 | override def name: String = "Blizzard" 48 | } 49 | 50 | import com.spotify.elitzur.ValidationTypeImplicits._ 51 | 52 | //This file contains 'fake' ValidationType & related implementations, used only to test that 53 | //validation runs consistently. 54 | // 55 | //Unfortunately we can't make these types private, 56 | //because our Override Type Provider is called from Scio and not within elitzur 57 | case class CountryCodeTesting(data: String) extends BaseValidationType[String] { 58 | override def checkValid: Boolean = Locale.getISOCountries.contains(data) 59 | } 60 | 61 | object CountryCodeTestingCompanion extends BaseCompanion[String, CountryCodeTesting] { 62 | def validationType: String = "CountryCode" 63 | 64 | def bigQueryType: String = "STRING" 65 | 66 | def apply(data: String): CountryCodeTesting = CountryCodeTesting(data) 67 | 68 | def parse(data: String): CountryCodeTesting = CountryCodeTesting(data) 69 | 70 | def owner: Owner = Blizzard 71 | 72 | def description: String = "Represents an ISO standard two-letter country code" 73 | } 74 | 75 | case class AgeTesting(data: Long) extends BaseValidationType[Long] { 76 | override def checkValid: Boolean = data > 0L && data < 150L 77 | } 78 | 79 | object AgeTestingCompanion extends BaseCompanion[Long, AgeTesting] { 80 | def validationType: String = "Age" 81 | 82 | def bigQueryType: String = "INTEGER" 83 | 84 | def apply(data: Long): AgeTesting = new AgeTesting(data) 85 | 86 | def parse(data: Long): AgeTesting = AgeTesting(data) 87 | 88 | def owner: Owner = Blizzard 89 | 90 | def description: String = "This represents an age above 0 and less than 150" 91 | } 92 | 93 | case class NonNegativeLongTesting(data: Long) extends BaseValidationType[Long] { 94 | override def checkValid: Boolean = data >= 0L 95 | } 96 | 97 | object NonNegativeLongTestingCompanion extends BaseCompanion[Long, NonNegativeLongTesting] { 98 | def validationType: String = "NonNegativeLong" 99 | 100 | def bigQueryType: String = "INTEGER" 101 | 102 | def apply(data: Long): NonNegativeLongTesting = NonNegativeLongTesting(data) 103 | 104 | def parse(data: Long): NonNegativeLongTesting = NonNegativeLongTesting(data) 105 | 106 | override def owner: Owner = Blizzard 107 | 108 | override def description: String = "Non negative long" 109 | } 110 | 111 | case class NonNegativeDoubleTesting(data: Double) extends BaseValidationType[Double] { 112 | override def checkValid: Boolean = data >= 0.0 113 | } 114 | 115 | object NonNegativeDoubleTestingCompanion extends BaseCompanion[Double, NonNegativeDoubleTesting] { 116 | def validationType: String = "NonNegativeDouble" 117 | 118 | def bigQueryType: String = "FLOAT" 119 | 120 | def apply(data: Double): NonNegativeDoubleTesting = NonNegativeDoubleTesting(data) 121 | 122 | def parse(data: Double): NonNegativeDoubleTesting = NonNegativeDoubleTesting(data) 123 | 124 | override def owner: Owner = Blizzard 125 | 126 | override def description: String = "Non negative double" 127 | } 128 | 129 | //scalastyle:off line.size.limit 130 | case class DynamicString(data: String, arg: Option[Set[String]] = None) 131 | extends DynamicValidationType[String, Set[String], DynamicString] { 132 | override def checkValid: Boolean = { 133 | arg.exists(_.contains(data)) 134 | } 135 | 136 | } 137 | //scalastyle:on line.size.limit 138 | 139 | object DynamicString extends DynamicCompanion[String, Set[String], DynamicString] { 140 | def validationType: String = "DynamicCountryCode" 141 | 142 | def bigQueryType: String = "STRING" 143 | 144 | override def apply(data: String): DynamicString = DynamicString(data, None) 145 | 146 | override def setArg(i: DynamicString, a: Set[String]): DynamicString = 147 | i.setArg(a) 148 | 149 | override def parseWithArg(data: String, arg: Set[String]): DynamicString = 150 | DynamicString(data, Some(arg)) 151 | 152 | override def parse(data: String): DynamicString = 153 | DynamicString(data, None) 154 | 155 | def owner: Owner = Blizzard 156 | 157 | def description: String = "Represents a dynamically defined testing type" 158 | } 159 | -------------------------------------------------------------------------------- /elitzur-scio/src/test/scala/com/spotify/elitzur/ValidatorDoFnTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import java.util.Locale 20 | import com.spotify.elitzur.scio._ 21 | import com.spotify.scio.{ContextAndArgs, ScioMetrics} 22 | import com.spotify.scio.testing.PipelineSpec 23 | import org.scalacheck.{Arbitrary, Gen} 24 | 25 | import scala.collection.compat.immutable.ArraySeq 26 | 27 | object TestClasses { 28 | case class Test(inner: Inner, countryCode: CountryCodeTesting) 29 | case class Inner(playCount: NonNegativeLongTesting) 30 | case class DynamicRecord(i: DynamicString, j: NonNegativeLongTesting) 31 | 32 | case class ListTest(t: List[CountryCodeTesting], a: List[String]) 33 | case class VectorTest(t: Vector[CountryCodeTesting]) 34 | case class SeqTest(t: Seq[CountryCodeTesting]) 35 | case class ArrayTest(t: Array[CountryCodeTesting]) 36 | // Test for nested Record too 37 | case class NestedRecordSequence(nested: ListTest) 38 | } 39 | 40 | 41 | object PipelineInput { 42 | import TestClasses._ 43 | 44 | val validListInput = 45 | List(ListTest(List(CountryCodeTesting("US"), CountryCodeTesting("MX")), List("A", "B"))) 46 | 47 | val validVectorInput = 48 | List(VectorTest(Vector(CountryCodeTesting("US"), CountryCodeTesting("MX")))) 49 | 50 | val invalidSeqInput = List( 51 | SeqTest( 52 | // Contains a valid country code but should still return invalid since 53 | // some of the values are invalid 54 | Seq(CountryCodeTesting("US"), CountryCodeTesting("sdfsdfd"), CountryCodeTesting("sdfsd")) 55 | ) 56 | ) 57 | 58 | val invalidArrayInput = 59 | List(ArrayTest(Array(CountryCodeTesting("12"), CountryCodeTesting("2121X")))) 60 | 61 | val nestedRecordValidInput = 62 | List( 63 | NestedRecordSequence( 64 | ListTest(validListInput.headOption.get.t, validListInput.headOption.get.a)) 65 | ) 66 | } 67 | 68 | object DummyPipeline { 69 | import PipelineInput._ 70 | 71 | def main(args: Array[String]): Unit = { 72 | val (sc, _) = ContextAndArgs(args) 73 | sc.parallelize(validListInput).validate() 74 | sc.parallelize(invalidSeqInput).validate() 75 | sc.parallelize(invalidArrayInput).validate() 76 | sc.parallelize(validVectorInput).validate() 77 | sc.parallelize(nestedRecordValidInput).validate() 78 | 79 | sc.run().waitUntilDone() 80 | } 81 | } 82 | 83 | class ValidatorDoFnTest extends PipelineSpec { 84 | 85 | 86 | "Validator SCollection helper" should "validate valid records" in { 87 | val validRecord = TestClasses.Test(TestClasses.Inner(NonNegativeLongTesting(0)), 88 | CountryCodeTesting("US")) 89 | 90 | runWithData(Seq(validRecord))(sc => { 91 | sc.validate() 92 | .count 93 | }) shouldBe Seq(1) 94 | } 95 | 96 | "Validator SCollection helper" should "validateWithResult autogenerated valid records" in { 97 | val validRecordGen = for { 98 | nnl <- Gen.posNum[Long] 99 | cc <- Gen.oneOf(ArraySeq.unsafeWrapArray(Locale.getISOCountries)) 100 | } yield TestClasses.Test(TestClasses.Inner(NonNegativeLongTesting(nnl)), CountryCodeTesting(cc)) 101 | val numberToValidate = 100 102 | val validRecords = Gen.listOfN(numberToValidate, validRecordGen).sample.get 103 | 104 | runWithData(validRecords)(sc => 105 | sc.validateWithResult().filter(_.isValid).count) shouldBe Seq(numberToValidate) 106 | } 107 | 108 | "Validator SCollection helper" should "validateWithResult autogenerated invalid records" in { 109 | val invalidRecordGen = for { 110 | nnl <- Gen.negNum[Long] 111 | cc <- Gen.numStr 112 | } yield TestClasses.Test(TestClasses.Inner(NonNegativeLongTesting(nnl)), CountryCodeTesting(cc)) 113 | val numberToValidate = 100 114 | val invalidRecords = Gen.listOfN(numberToValidate, invalidRecordGen).sample.get 115 | 116 | runWithData(invalidRecords)(sc => 117 | sc.validateWithResult().filter(_.isInvalid).count) shouldBe Seq(numberToValidate) 118 | } 119 | 120 | "Validator SCollection" should "validate dynamic records" in { 121 | val dynamicGen: Gen[TestClasses.DynamicRecord] = for { 122 | s <- Arbitrary.arbString.arbitrary 123 | l <- Gen.posNum[Long] 124 | } yield TestClasses.DynamicRecord(DynamicString(s, Set(s)), NonNegativeLongTesting(l)) 125 | 126 | val input = List(dynamicGen.sample.get) 127 | runWithData(input)(sc => { 128 | sc.validateWithResult().flatten 129 | .count 130 | }) shouldBe Seq(1) 131 | } 132 | 133 | "Validator SCollection" should "validate collection of Seq,Vector,List or Array " + 134 | "and set counters" in { 135 | JobTest[DummyPipeline.type ] 136 | .counters(_.size shouldBe 6) 137 | .counter( 138 | ScioMetrics.counter( 139 | "com.spotify.elitzur.TestClasses.SeqTest", 140 | "t/CountryCodeTesting/ElitzurInvalid" 141 | ) 142 | )(_ shouldBe 2) 143 | .counter( 144 | ScioMetrics.counter( 145 | "com.spotify.elitzur.TestClasses.SeqTest", 146 | "t/CountryCodeTesting/ElitzurValid" 147 | ) 148 | )(_ shouldBe 1) 149 | .counter( 150 | ScioMetrics.counter( 151 | "com.spotify.elitzur.TestClasses.ListTest", 152 | "t/CountryCodeTesting/ElitzurValid" 153 | ) 154 | )(_ shouldBe 2) 155 | .counter( 156 | ScioMetrics.counter( 157 | "com.spotify.elitzur.TestClasses.ArrayTest", 158 | "t/CountryCodeTesting/ElitzurInvalid" 159 | ) 160 | )(_ shouldBe 2) 161 | .counter( 162 | ScioMetrics.counter( 163 | "com.spotify.elitzur.TestClasses.VectorTest", 164 | "t/CountryCodeTesting/ElitzurValid" 165 | ) 166 | )(_ shouldBe 2) 167 | .counter( 168 | ScioMetrics.counter( 169 | "com.spotify.elitzur.TestClasses.NestedRecordSequence", 170 | "nested.t/CountryCodeTesting/ElitzurValid" 171 | ) 172 | )(_ shouldBe 2) 173 | .run() 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /elitzur-core/src/test/scala/com/spotify/elitzur/validators/ValidatorTest.scala: -------------------------------------------------------------------------------- 1 | package com.spotify.elitzur.validators 2 | 3 | import com.spotify.elitzur.validators.DynamicRecordValidatorTest.TestMetricsReporter 4 | import com.spotify.elitzur.{AgeTesting, CountryCodeTesting, MetricsReporter} 5 | import org.scalatest.flatspec.AnyFlatSpec 6 | import org.scalatest.matchers.should.Matchers 7 | 8 | case class Outer( 9 | country: ValidationStatus[CountryCodeTesting], 10 | innerStatus: ValidationStatus[Inner], 11 | inner: Inner, 12 | age: AgeTesting, 13 | ageOpt: Option[AgeTesting], 14 | repeatedAge: List[AgeTesting], 15 | repeatedInner: List[Inner] 16 | ) 17 | 18 | case class Inner( 19 | countryStatus: ValidationStatus[CountryCodeTesting], 20 | country: CountryCodeTesting, 21 | countryOpt: Option[CountryCodeTesting] 22 | ) 23 | 24 | //scalastyle:off magic.number 25 | class ValidatorTest extends AnyFlatSpec with Matchers { 26 | 27 | val inner = Inner( 28 | countryStatus = Unvalidated(CountryCodeTesting("US")), 29 | country = CountryCodeTesting("CA"), 30 | countryOpt = Some(CountryCodeTesting("SE")) 31 | ) 32 | 33 | "Validator" should "validate valid record" in { 34 | implicit val metricsReporter: MetricsReporter = DynamicRecordValidatorTest.metricsReporter() 35 | val validator = Validator.gen[Outer] 36 | val result = validator.validateRecord( 37 | Unvalidated( 38 | Outer( 39 | country = Unvalidated(CountryCodeTesting("US")), 40 | innerStatus = Unvalidated(inner), 41 | inner = inner, 42 | age = AgeTesting(25L), 43 | ageOpt = Some(AgeTesting(45L)), 44 | repeatedAge = List(AgeTesting(50L), AgeTesting(10L)), 45 | repeatedInner = List(inner, inner, inner) 46 | ) 47 | ) 48 | ) 49 | result.isValid shouldBe true 50 | val testMetrics = metricsReporter.asInstanceOf[TestMetricsReporter] 51 | testMetrics.getValid( 52 | "com.spotify.elitzur.validators.Outer", 53 | "country", 54 | "CountryCodeTesting" 55 | ) shouldEqual 1 56 | testMetrics.getValid( 57 | "com.spotify.elitzur.validators.Outer", 58 | "innerStatus.countryStatus", 59 | "CountryCodeTesting" 60 | ) shouldEqual 1 61 | testMetrics.getValid( 62 | "com.spotify.elitzur.validators.Outer", 63 | "innerStatus.country", 64 | "CountryCodeTesting" 65 | ) shouldEqual 1 66 | testMetrics.getValid( 67 | "com.spotify.elitzur.validators.Outer", 68 | "innerStatus.countryOpt", 69 | "CountryCodeTesting" 70 | ) shouldEqual 1 71 | testMetrics.getValid( 72 | "com.spotify.elitzur.validators.Outer", 73 | "inner.countryStatus", 74 | "CountryCodeTesting" 75 | ) shouldEqual 1 76 | testMetrics.getValid( 77 | "com.spotify.elitzur.validators.Outer", 78 | "inner.country", 79 | "CountryCodeTesting" 80 | ) shouldEqual 1 81 | testMetrics.getValid( 82 | "com.spotify.elitzur.validators.Outer", 83 | "inner.countryOpt", 84 | "CountryCodeTesting" 85 | ) shouldEqual 1 86 | testMetrics.getValid( 87 | "com.spotify.elitzur.validators.Outer", 88 | "age", 89 | "AgeTesting" 90 | ) shouldEqual 1 91 | testMetrics.getValid( 92 | "com.spotify.elitzur.validators.Outer", 93 | "ageOpt", 94 | "AgeTesting" 95 | ) shouldEqual 1 96 | testMetrics.getValid( 97 | "com.spotify.elitzur.validators.Outer", 98 | "repeatedAge", 99 | "AgeTesting" 100 | ) shouldEqual 2 101 | testMetrics.getValid( 102 | "com.spotify.elitzur.validators.Outer", 103 | "repeatedInner.countryStatus", 104 | "CountryCodeTesting" 105 | ) shouldEqual 3 106 | testMetrics.getValid( 107 | "com.spotify.elitzur.validators.Outer", 108 | "repeatedInner.country", 109 | "CountryCodeTesting" 110 | ) shouldEqual 3 111 | testMetrics.getValid( 112 | "com.spotify.elitzur.validators.Outer", 113 | "repeatedInner.countryOpt", 114 | "CountryCodeTesting" 115 | ) shouldEqual 3 116 | } 117 | 118 | "Validator" should "validate invalid record" in { 119 | implicit val metricsReporter: MetricsReporter = DynamicRecordValidatorTest.metricsReporter() 120 | val validator = Validator.gen[Outer] 121 | val result = validator.validateRecord( 122 | Unvalidated( 123 | Outer( 124 | country = Unvalidated(CountryCodeTesting("FOO")), 125 | innerStatus = Unvalidated(inner), 126 | inner = inner, 127 | age = AgeTesting(25L), 128 | ageOpt = Some(AgeTesting(45L)), 129 | repeatedAge = List(AgeTesting(50L), AgeTesting(1000L)), 130 | repeatedInner = List(inner, inner, inner) 131 | ) 132 | ) 133 | ) 134 | result.isValid shouldBe false 135 | val testMetrics = metricsReporter.asInstanceOf[TestMetricsReporter] 136 | testMetrics.getInvalid( 137 | "com.spotify.elitzur.validators.Outer", 138 | "country", 139 | "CountryCodeTesting" 140 | ) shouldEqual 1 141 | testMetrics.getValid( 142 | "com.spotify.elitzur.validators.Outer", 143 | "innerStatus.countryStatus", 144 | "CountryCodeTesting" 145 | ) shouldEqual 1 146 | testMetrics.getValid( 147 | "com.spotify.elitzur.validators.Outer", 148 | "innerStatus.country", 149 | "CountryCodeTesting" 150 | ) shouldEqual 1 151 | testMetrics.getValid( 152 | "com.spotify.elitzur.validators.Outer", 153 | "innerStatus.countryOpt", 154 | "CountryCodeTesting" 155 | ) shouldEqual 1 156 | testMetrics.getValid( 157 | "com.spotify.elitzur.validators.Outer", 158 | "inner.countryStatus", 159 | "CountryCodeTesting" 160 | ) shouldEqual 1 161 | testMetrics.getValid( 162 | "com.spotify.elitzur.validators.Outer", 163 | "inner.country", 164 | "CountryCodeTesting" 165 | ) shouldEqual 1 166 | testMetrics.getValid( 167 | "com.spotify.elitzur.validators.Outer", 168 | "inner.countryOpt", 169 | "CountryCodeTesting" 170 | ) shouldEqual 1 171 | testMetrics.getValid( 172 | "com.spotify.elitzur.validators.Outer", 173 | "age", 174 | "AgeTesting" 175 | ) shouldEqual 1 176 | testMetrics.getValid( 177 | "com.spotify.elitzur.validators.Outer", 178 | "ageOpt", 179 | "AgeTesting" 180 | ) shouldEqual 1 181 | testMetrics.getValid( 182 | "com.spotify.elitzur.validators.Outer", 183 | "repeatedAge", 184 | "AgeTesting" 185 | ) shouldEqual 1 186 | testMetrics.getInvalid( 187 | "com.spotify.elitzur.validators.Outer", 188 | "repeatedAge", 189 | "AgeTesting" 190 | ) shouldEqual 1 191 | testMetrics.getValid( 192 | "com.spotify.elitzur.validators.Outer", 193 | "repeatedInner.countryStatus", 194 | "CountryCodeTesting" 195 | ) shouldEqual 3 196 | testMetrics.getValid( 197 | "com.spotify.elitzur.validators.Outer", 198 | "repeatedInner.country", 199 | "CountryCodeTesting" 200 | ) shouldEqual 3 201 | testMetrics.getValid( 202 | "com.spotify.elitzur.validators.Outer", 203 | "repeatedInner.countryOpt", 204 | "CountryCodeTesting" 205 | ) shouldEqual 3 206 | } 207 | } 208 | //scalastyle:on magic.number 209 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This code of conduct outlines our expectations for participants within the **Spotify FOSS** community, as well as steps to reporting unacceptable behavior. We are committed to providing a welcoming and inspiring community for all and expect our code of conduct to be honored. Anyone who violates this code of conduct may be banned from the community. 4 | 5 | Our open source community strives to: 6 | 7 | * **Be friendly and patient.** 8 | * **Be welcoming**: We strive to be a community that welcomes and supports people of all backgrounds and identities. This includes, but is not limited to members of any race, ethnicity, culture, national origin, colour, immigration status, social and economic class, educational level, sex, sexual orientation, gender identity and expression, age, size, family status, political belief, religion, and mental and physical ability. 9 | * **Be considerate**: Your work will be used by other people, and you in turn will depend on the work of others. Any decision you take will affect users and colleagues, and you should take those consequences into account when making decisions. Remember that we're a world-wide community, so you might not be communicating in someone else's primary language. 10 | * **Be respectful**: Not all of us will agree all the time, but disagreement is no excuse for poor behavior and poor manners. We might all experience some frustration now and then, but we cannot allow that frustration to turn into a personal attack. It’s important to remember that a community where people feel uncomfortable or threatened is not a productive one. 11 | * **Be careful in the words that we choose**: we are a community of professionals, and we conduct ourselves professionally. Be kind to others. Do not insult or put down other participants. Harassment and other exclusionary behavior aren't acceptable. 12 | * **Try to understand why we disagree**: Disagreements, both social and technical, happen all the time. It is important that we resolve disagreements and differing views constructively. Remember that we’re different. The strength of our community comes from its diversity, people from a wide range of backgrounds. Different people have different perspectives on issues. Being unable to understand why someone holds a viewpoint doesn’t mean that they’re wrong. Don’t forget that it is human to err and blaming each other doesn’t get us anywhere. Instead, focus on helping to resolve issues and learning from mistakes. 13 | 14 | ## Definitions 15 | 16 | Harassment includes, but is not limited to: 17 | 18 | - Offensive comments related to gender, gender identity and expression, sexual orientation, disability, mental illness, neuro(a)typicality, physical appearance, body size, race, age, regional discrimination, political or religious affiliation 19 | - Unwelcome comments regarding a person’s lifestyle choices and practices, including those related to food, health, parenting, drugs, and employment 20 | - Deliberate misgendering. This includes deadnaming or persistently using a pronoun that does not correctly reflect a person's gender identity. You must address people by the name they give you when not addressing them by their username or handle 21 | - Physical contact and simulated physical contact (eg, textual descriptions like “*hug*” or “*backrub*”) without consent or after a request to stop 22 | - Threats of violence, both physical and psychological 23 | - Incitement of violence towards any individual, including encouraging a person to commit suicide or to engage in self-harm 24 | - Deliberate intimidation 25 | - Stalking or following 26 | - Harassing photography or recording, including logging online activity for harassment purposes 27 | - Sustained disruption of discussion 28 | - Unwelcome sexual attention, including gratuitous or off-topic sexual images or behaviour 29 | - Pattern of inappropriate social contact, such as requesting/assuming inappropriate levels of intimacy with others 30 | - Continued one-on-one communication after requests to cease 31 | - Deliberate “outing” of any aspect of a person’s identity without their consent except as necessary to protect others from intentional abuse 32 | - Publication of non-harassing private communication 33 | 34 | Our open source community prioritizes marginalized people’s safety over privileged people’s comfort. We will not act on complaints regarding: 35 | 36 | - ‘Reverse’ -isms, including ‘reverse racism,’ ‘reverse sexism,’ and ‘cisphobia’ 37 | - Reasonable communication of boundaries, such as “leave me alone,” “go away,” or “I’m not discussing this with you” 38 | - Refusal to explain or debate social justice concepts 39 | - Communicating in a ‘tone’ you don’t find congenial 40 | - Criticizing racist, sexist, cissexist, or otherwise oppressive behavior or assumptions 41 | 42 | 43 | ### Diversity Statement 44 | 45 | We encourage everyone to participate and are committed to building a community for all. Although we will fail at times, we seek to treat everyone both as fairly and equally as possible. Whenever a participant has made a mistake, we expect them to take responsibility for it. If someone has been harmed or offended, it is our responsibility to listen carefully and respectfully, and do our best to right the wrong. 46 | 47 | Although this list cannot be exhaustive, we explicitly honor diversity in age, gender, gender identity or expression, culture, ethnicity, language, national origin, political beliefs, profession, race, religion, sexual orientation, socioeconomic status, and technical ability. We will not tolerate discrimination based on any of the protected 48 | characteristics above, including participants with disabilities. 49 | 50 | ### Reporting Issues 51 | 52 | If you experience or witness unacceptable behavior—or have any other concerns—please report it by contacting us via **fossboard@spotify.com**. All reports will be handled with discretion. In your report please include: 53 | 54 | - Your contact information. 55 | - Names (real, nicknames, or pseudonyms) of any individuals involved. If there are additional witnesses, please 56 | include them as well. Your account of what occurred, and if you believe the incident is ongoing. If there is a publicly available record (e.g. a mailing list archive or a public IRC logger), please include a link. 57 | - Any additional information that may be helpful. 58 | 59 | After filing a report, a representative will contact you personally, review the incident, follow up with any additional questions, and make a decision as to how to respond. If the person who is harassing you is part of the response team, they will recuse themselves from handling your incident. If the complaint originates from a member of the response team, it will be handled by a different member of the response team. We will respect confidentiality requests for the purpose of protecting victims of abuse. 60 | 61 | ### Attribution & Acknowledgements 62 | 63 | We all stand on the shoulders of giants across many open source communities. We'd like to thank the communities and projects that established code of conducts and diversity statements as our inspiration: 64 | 65 | * [Django](https://www.djangoproject.com/conduct/reporting/) 66 | * [Python](https://www.python.org/community/diversity/) 67 | * [Ubuntu](http://www.ubuntu.com/about/about-ubuntu/conduct) 68 | * [Contributor Covenant](http://contributor-covenant.org/) 69 | * [Geek Feminism](http://geekfeminism.org/about/code-of-conduct/) 70 | * [Citizen Code of Conduct](http://citizencodeofconduct.org/) 71 | -------------------------------------------------------------------------------- /benchmarking/src/main/scala/com/spotify/elitzur/Benchmarking.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Spotify AB. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | package com.spotify.elitzur 18 | 19 | import java.util.concurrent.TimeUnit 20 | 21 | import com.spotify.elitzur.validators.{PostValidation, Unvalidated, Validator} 22 | import com.spotify.elitzur.scio._ 23 | import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Mode, OutputTimeUnit} 24 | 25 | object CaseClassesToValidate { 26 | case class ThreeFields( 27 | age: AgeExample, 28 | countryCode: CountryCodeExample, 29 | nnl: NonNegativeLongExample) 30 | 31 | case class TenFieldsThreeValidation( 32 | age: AgeExample, 33 | countryCode: CountryCodeExample, 34 | nnl: NonNegativeLongExample, 35 | a: String, 36 | b: Long, 37 | c: Long, 38 | d: Double, 39 | e: String, 40 | f: String, 41 | g: String 42 | ) 43 | 44 | 45 | case class TenFields( 46 | age: AgeExample, 47 | countryCode: CountryCodeExample, 48 | nnl: NonNegativeLongExample, 49 | br: BucketizedReferrerExample, 50 | age2: AgeExample, 51 | age3: AgeExample, 52 | age4: AgeExample, 53 | age5: AgeExample, 54 | countryCode2: CountryCodeExample, 55 | nnl2: NonNegativeLongExample 56 | ) 57 | 58 | 59 | case class TwentyFieldsTenValidation( 60 | age: AgeExample, 61 | countryCode: CountryCodeExample, 62 | nnl: NonNegativeLongExample, 63 | br: BucketizedReferrerExample, 64 | age2: AgeExample, 65 | age3: AgeExample, 66 | age4: AgeExample, 67 | age5: AgeExample, 68 | countryCode2: CountryCodeExample, 69 | nnl2: NonNegativeLongExample, 70 | a: String, 71 | b: Long, 72 | c: Long, 73 | d: Double, 74 | e: String, 75 | f: String, 76 | g: String, 77 | h: Long, 78 | i: Double, 79 | j: Long 80 | ) 81 | 82 | 83 | case class FiveNestedFiveFields(field1: FiveFields, 84 | field2: FiveFields, 85 | field3: FiveFields, 86 | field4: FiveFields, 87 | field5: FiveFields) 88 | 89 | case class FiveFields(age: AgeExample, 90 | countryCode: CountryCodeExample, 91 | nnl: NonNegativeLongExample, 92 | br: BucketizedReferrerExample, 93 | age2: AgeExample 94 | ) 95 | } 96 | 97 | object CaseClassValidators { 98 | 99 | import CaseClassesToValidate._ 100 | 101 | def genThreeFields(): Validator[ThreeFields] = { 102 | Validator.gen[ThreeFields] 103 | } 104 | 105 | def genTenFieldThreeV(): Validator[TenFieldsThreeValidation] = { 106 | Validator.gen[TenFieldsThreeValidation] 107 | } 108 | 109 | def genTenFields(): Validator[TenFields] = { 110 | Validator.gen[TenFields] 111 | } 112 | 113 | def genTwentyFieldTenV(): Validator[TwentyFieldsTenValidation] = { 114 | Validator.gen[TwentyFieldsTenValidation] 115 | } 116 | 117 | def genFiveNestedFive(): Validator[FiveNestedFiveFields] = { 118 | Validator.gen[FiveNestedFiveFields] 119 | } 120 | 121 | val threeVal: Validator[ThreeFields] = genThreeFields() 122 | val tenVal: Validator[TenFields] = genTenFields() 123 | val tenV3Val: Validator[TenFieldsThreeValidation] = genTenFieldThreeV() 124 | val twentyV10Val: Validator[TwentyFieldsTenValidation] = genTwentyFieldTenV() 125 | val fiveNFiveVal: Validator[FiveNestedFiveFields] = genFiveNestedFive() 126 | } 127 | 128 | //scalastyle:off magic.number 129 | object Fields { 130 | import CaseClassesToValidate._ 131 | 132 | val Three = ThreeFields( 133 | AgeExample(10L), 134 | CountryCodeExample("US"), 135 | NonNegativeLongExample(0L) 136 | ) 137 | 138 | val Ten = TenFields( 139 | AgeExample(1L), 140 | CountryCodeExample("US"), 141 | NonNegativeLongExample(0L), 142 | BucketizedReferrerExample("home"), 143 | AgeExample(5L), 144 | AgeExample(5L), 145 | AgeExample(5L), 146 | AgeExample(5L), 147 | CountryCodeExample("SE"), 148 | NonNegativeLongExample(5L) 149 | ) 150 | 151 | val Twenty = TwentyFieldsTenValidation( 152 | AgeExample(1L), 153 | CountryCodeExample("US"), 154 | NonNegativeLongExample(0L), 155 | BucketizedReferrerExample("home"), 156 | AgeExample(5L), 157 | AgeExample(5L), 158 | AgeExample(5L), 159 | AgeExample(5L), 160 | CountryCodeExample("SE"), 161 | NonNegativeLongExample(5L), 162 | "", 163 | 1L, 164 | 0L, 165 | 1.0, 166 | "", 167 | "", 168 | "", 169 | 1L, 170 | 1.0, 171 | 1L 172 | ) 173 | 174 | val TenV3 = TenFieldsThreeValidation( 175 | AgeExample(1L), 176 | CountryCodeExample("US"), 177 | NonNegativeLongExample(0L), 178 | "", 179 | 0L, 180 | 0L, 181 | 1.0, 182 | "", 183 | "", 184 | "" 185 | ) 186 | 187 | val fiveN5 = FiveNestedFiveFields( 188 | FiveFields( 189 | AgeExample(1L), 190 | CountryCodeExample("US"), 191 | NonNegativeLongExample(2L), 192 | BucketizedReferrerExample("home"), 193 | AgeExample(10L) 194 | ), 195 | FiveFields( 196 | AgeExample(2L), 197 | CountryCodeExample("SE"), 198 | NonNegativeLongExample(5L), 199 | BucketizedReferrerExample("search"), 200 | AgeExample(30L) 201 | ), 202 | FiveFields( 203 | AgeExample(3L), 204 | CountryCodeExample("CA"), 205 | NonNegativeLongExample(9L), 206 | BucketizedReferrerExample("browse"), 207 | AgeExample(11L) 208 | ), 209 | FiveFields( 210 | AgeExample(4L), 211 | CountryCodeExample("GY"), 212 | NonNegativeLongExample(32L), 213 | BucketizedReferrerExample("radio"), 214 | AgeExample(15L) 215 | ), 216 | FiveFields( 217 | AgeExample(5L), 218 | CountryCodeExample("MX"), 219 | NonNegativeLongExample(50L), 220 | BucketizedReferrerExample("other"), 221 | AgeExample(60L) 222 | ) 223 | ) 224 | } 225 | 226 | class Benchmarking { 227 | import CaseClassValidators._ 228 | import CaseClassesToValidate._ 229 | import Fields._ 230 | 231 | @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.SECONDS) 232 | def validateThree(): PostValidation[ThreeFields] = { 233 | threeVal.validateRecord(Unvalidated(Three)) 234 | } 235 | 236 | @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.SECONDS) 237 | def validateTen(): PostValidation[TenFields] = { 238 | tenVal.validateRecord(Unvalidated(Ten)) 239 | } 240 | 241 | @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.SECONDS) 242 | def validateTwenty(): PostValidation[TwentyFieldsTenValidation] = { 243 | twentyV10Val.validateRecord(Unvalidated(Twenty)) 244 | } 245 | 246 | @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.SECONDS) 247 | def validateTenV3(): PostValidation[TenFieldsThreeValidation] = { 248 | tenV3Val.validateRecord(Unvalidated(TenV3)) 249 | } 250 | 251 | @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.SECONDS) 252 | def validatefiveN5(): PostValidation[FiveNestedFiveFields] = { 253 | fiveNFiveVal.validateRecord(Unvalidated(fiveN5)) 254 | } 255 | } 256 | //scalastyle:on magic.number 257 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elitzur Data Validation 2 | 3 | [![Lifecycle](https://img.shields.io/badge/lifecycle-production-1ed760.svg)](https://img.shields.io/badge/lifecycle-production-1ed760.svg) 4 | [![Build Status](https://img.shields.io/circleci/project/github/spotify/elitzur/master.svg)](https://circleci.com/gh/spotify/elitzur) 5 | 6 | ## Overview 7 | 8 | This library allows you to: 9 | 10 | * use custom Scala types in your Scio data pipeline 11 | * perform validation on these types 12 | * [filter or transform invalid data](#custom-validation-behaviour) 13 | * use Beam counters to check validity 14 | * convert Avro records into custom-typed case classes 15 | 16 | This README will show you how to incorporate this library into your Scio data pipeline. 17 | 18 | #### What does Elitzur mean? 19 | The [Elitzur-Vaidman bomb tester](https://en.wikipedia.org/wiki/Elitzur%E2%80%93Vaidman_bomb_tester) 20 | is a thought experiment that verifies a bomb is functional without having to detonate it. 21 | 22 | ## How to use in your data pipeline 23 | 24 | ### Step by step integration guide 25 | 26 | 1. You must be using Scio version `0.8.1` or greater. 27 | 28 | 2. Add these libraries in your `build.sbt` library dependencies 29 | 30 | You'll always want to add: 31 | 32 | ```sbtshell 33 | "com.spotify" %% "elitzur-core" % "CURRENT_VERSION", 34 | "com.spotify" %% "elitzur-scio" % "CURRENT_VERSION" 35 | ``` 36 | 37 | If you're using Avro case class conversions you'll additionally want: 38 | 39 | ```sbtshell 40 | "com.spotify" %% "elitzur-avro" % "CURRENT_VERSION" 41 | ``` 42 | 43 | The current version is available in Releases tab on Github. 44 | 45 | You will also need to create appropriate validation types for your data. 46 | 47 | 3. Follow instructions below for Avro if you want automatic conversions. However, you can also use Elitzur with any hand crafted case class simply by changing the types of the fields you want to validate to their corresponding Validation Types. 48 | 49 | For example: 50 | 51 | ```scala 52 | case class User(userId: String, country: String, age: Long) 53 | val user = User("test", "US", 25L) 54 | ``` 55 | would become: 56 | 57 | ```scala 58 | case class User(userId: UserId, country: CountryCode, age: Age) 59 | val user = User(UserId("test"), CountryCode("US"), Age(25L)) 60 | ``` 61 | 62 | 4. Call `.validate()` on your SCollection. This will validate all of the ValidationTypes in 63 | your data and complete the actions specified by your configuration. You'll need these imports to call .validate() successfully: 64 | 65 | ```scala 66 | import com.spotify.elitzur.validators._ 67 | import com.spotify.elitzur.scio._ 68 | ``` 69 | 70 | 71 | #### Avro 72 | 1. Manually define case classes containing only the fields you care about for your pipeline replacing 73 | relevant fields with Validation Type classes. 74 | 75 | For example, given the following avro schema: 76 | 77 | ``` 78 | { 79 | "name": "MyRecordAvro", 80 | "namespace": "com.spotify.skeleton.schema", 81 | "type": "record", 82 | "fields": [ 83 | { 84 | "name": "userAge", 85 | "type": "long", 86 | "doc": "{validationType: age}" 87 | }, 88 | { 89 | "name": "userFloat", 90 | "type": "float", 91 | "doc": "floating" 92 | }, 93 | { 94 | "name": "userDouble", 95 | "type": "double", 96 | "doc": "{validationType: nonNegativeDouble}" 97 | }, 98 | { 99 | "name": "userLong", 100 | "type": "long", 101 | "doc": "{validationType: nonNegativeLong}" 102 | }, 103 | { 104 | "name": "inner", 105 | "type": "com.spotify.skeleton.schema.InnerNestedType" 106 | }, 107 | ] 108 | } 109 | ``` 110 | 111 | We could have a case class for a pipeline that only uses `userAge`, `userFloat`, and `userDouble` 112 | defined: 113 | 114 | ```scala 115 | case class MyRecord(userAge: Age, userFloat: Float, userDouble: NonNegativeDouble) 116 | ``` 117 | 118 | Note that if you have a field that is nullable in your Avro schema (i.e. if your field is defined in Avro as a union with null), you'll want to wrap that type in an `Option` in your case class. 119 | 120 | 2. In order convert to/from Avro you must import our converters. You'll also need to import the validators. 121 | 122 | ```scala 123 | import com.spotify.elitzur.converters.avro._ 124 | import com.spotify.elitzur.validators._ 125 | import com.spotify.elitzur.scio._ 126 | ``` 127 | 128 | These imports contain implicit values which need to be pulled into scope. IntelliJ may mark the imports as unused, but if you don't have them, compilation will fail with a "could not find implicit value" message. 129 | 130 | 3. Call fromAvro on your SCollection, providing the target type 131 | 132 | ```scala 133 | val coll: SCollection[MyRecordAvro] = ??? 134 | coll.fromAvro[MyRecord] 135 | ``` 136 | 137 | 4. After validation or when outputting, you can convert to an Avro record using toAvro 138 | ```scala 139 | val coll: SCollection[MyRecord] = ??? 140 | coll.toAvro[OutputAvro] 141 | ``` 142 | 143 | ### Specific Field Types 144 | 145 | #### Using `Option` and `NULLABLE` 146 | 147 | When using fields `Option` or type `NULLABLE` it is important to note that when the value is `None` 148 | or missing this will count as **Valid** for now. 149 | 150 | #### Using `List` and `REPEATED` 151 | 152 | This is supported, **but** requires iteration of each List independently. It is up to the user to 153 | determine whether this is more efficient than flattening prior to validation, since this can vary on a case-by-case basis. 154 | 155 | #### Complex Avro Types 156 | 157 | Complex avro types are currently not supported. This includes Unions (except those used for nullable fields), 158 | Fixed, and Map. 159 | 160 | #### Avro Enums 161 | 162 | Elitzur supports conversion of Avro enums to enums represented with the scala library `Enumeratum`. 163 | In order to convert an avro enum simply use a matching Enumeratum enum as the type for the corresponding field in your case class. 164 | You can read more about Enumeratum [here](https://github.com/lloydmeta/enumeratum). 165 | 166 | ## Additional configuration 167 | 168 | If you would like to customize what happens when a validation rule is violated you can pass in a 169 | `ValidationRecordConfig` when you call `validate` 170 | 171 | So for example this will throw an exception whenever it sees an invalid country code and won't log 172 | any counters for the age field. 173 | 174 | ```scala 175 | pipe.validate(ValidationRecordConfig("country" -> ThrowException, "age" -> NoCounter)) 176 | ``` 177 | 178 | For nested fields simply separate fields names with `.` 179 | 180 | ```scala 181 | pipe.validate(ValidationRecordConfig("nested.country" -> ThrowException, "nested.age" -> NoCounter)) 182 | ``` 183 | 184 | The available configurations are: 185 | 186 | * `ThrowException` controls if an exception is thrown on violation of rules. By default no exception 187 | is thrown. If you'd like to override this add your field to this annotation above the class or case 188 | class 189 | * Example: `ValidationRecordConfig("country" -> ThrowException)` 190 | * `NoCounter` controls if a Beam counter is logged on violation of rules, by default a counter is 191 | recorded, if you'd like to override this add your field to this annotation above the class or case 192 | class 193 | * Example: ValidationRecordConfig("age" -> NoCounter) 194 | 195 | 196 | #### Custom Validation Behaviour 197 | When you create your own case classes you have the option to wrap Validation Types in a `ValidationStatus` 198 | type that will let you access the validation status of that field (Valid/Invalid) in code. 199 | For example: 200 | a `ValidationStatus[CountryCode]` will be either `Valid(CountryCode("US"))` or `Invalid(CountryCode("USA"))` 201 | after performing validation. 202 | If these are wrapped in ValidationStatus, they will always come out of `validate()` wrapped in Valid/Invalid. You can also wrap an entire record in Valid/Invalid (it's invalid if at least one field is invalid) by calling `validateWithResult()` on your to-be-validated SCollection. 203 | 204 | This allows you to match on the Validation and respond to invalid results however you see fit. 205 | 206 | Case classes or fields wrapped in the `Invalid` ValidationStatus will be filtered out if you call `flatten` or `flatMap` on a collection or SCollection of them. 207 | 208 | PLEASE NOTE this does have performance costs so it should only be used when additional customization 209 | is necessary. This will increase your data size and slow down shuffles. 210 | 211 | Here is a more complete example: 212 | ```scala 213 | case class Record(userId: UserId, country: ValidationStatus[CountryCode], age: ValidationStatus[Age]) 214 | val pipe: SCollection[Record] = ??? 215 | 216 | pipe.validate() 217 | .filter(_.country.isValid) 218 | .map(r => r.age match { 219 | case Valid(age) => age.data 220 | case Invalid(_) => 0L 221 | }) 222 | ``` 223 | the above code will filter all Records with invalid countries and return the age where invalid ages 224 | are replaced with zero. 225 | 226 | An example of filtering out Invalid results could look like: 227 | ```scala 228 | case class Record(userId: UserId, country: CountryCode, age: Age) 229 | val pipe: SCollection[Record] = ??? 230 | 231 | pipe.validateWithResult().flatten // contains only Valid values 232 | ``` 233 | 234 | When constructing case class containing these fields you wrap the Validation Type classes in 235 | `Unvalidated`. 236 | 237 | ```scala 238 | val r = Record(UserId("test"), Unvalidated(CountryCode("US")), Unvalidated(Age(25L))) 239 | ``` 240 | 241 | You can also use this on nested records. It will be Invalid if any Validation Type fields in the 242 | nested record are invalid. Otherwise it will be valid. 243 | 244 | 245 | ## Testing Avro pipelines with toAvroDefault 246 | In your pipeline code you should always use `toAvro` if you want to convert a case class to an Avro record, as it's performance optimized and when you're writing a pipeline you have control of your output schemas. `toAvro` will fail if an Avro record has a required field that is not specified in the case class you're converting to Avro record. This is a user code error - your pipeline should write an output Avro schema that does not contain required fields that aren't in the case class you're converting. 247 | 248 | If you're writing a unit test and don't have control of the input schema, you may want to use `toAvroDefault` to do an Avro conversion to generate input data from a case class. Note that we do not make performance guarantees around `toAvroDefault`, so it should be used only in tests and not in production workflows. 249 | 250 | The test workflow using `toAvroDefault` works like this: 251 | 252 | 1. Generate case classes containing the fields you care about (this can be done in several ways) 253 | 2. Generate a SpecificRecord of your input type with values in all required fields (this can be done with `specificRecordOf` in [Ratatool-Scalacheck](https://github.com/spotify/ratatool/tree/master/ratatool-scalacheck#usage)) 254 | 3. Use both the case classes you've generated and the SpecificRecord as arguments to `toAvroDefault` 255 | 4. The output of `toAvroDefault` for a case class will be a SpecificRecord where all fields in your case class are copied into the record, and all fields not specified in the case class will have the values given in your "default" record. Repeated nested records will take their default values from the first nested record in the repeated list. 256 | 257 | 258 | ## Code of Conduct 259 | 260 | This project adheres to the Spotify FOSS Code of Conduct. By participating, you are expected to honor this code. 261 | --------------------------------------------------------------------------------