├── project ├── build.properties ├── plugin.sbt ├── Versions.scala ├── Settings.scala └── Dependencies.scala ├── hbase └── src │ ├── test │ ├── resources │ │ ├── application.conf │ │ └── log4j.properties │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── hbase │ │ ├── HBase2Mock.scala │ │ ├── HBaseMock.scala │ │ └── HBaseConnectorSuite.scala │ └── main │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── hbase │ ├── ConfigurationKeys.scala │ ├── HBaseConnectorCreator.scala │ └── HBaseConnector.scala ├── core └── src │ └── main │ ├── resources │ └── reference.conf │ ├── java │ └── it │ │ └── agilelab │ │ └── darwin │ │ ├── annotations │ │ └── AvroSerde.java │ │ └── manager │ │ └── IdSchemaPair.java │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── manager │ ├── CachedEagerAvroSchemaManager.scala │ ├── LazyAvroSchemaManager.scala │ ├── CachedLazyAvroSchemaManager.scala │ ├── AvroSchemaCacheFingerprint.scala │ ├── AvroSchemaCache.scala │ ├── CachedAvroSchemaManager.scala │ └── AvroSchemaManagerFactory.scala ├── spark-application └── src │ ├── dist │ └── conf │ │ └── application.conf │ └── main │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── app │ └── spark │ ├── classes │ ├── Ignored.scala │ ├── Food.scala │ ├── Menu.scala │ ├── Price.scala │ ├── Order.scala │ └── MenuItem.scala │ ├── SparkConfigurationKeys.scala │ ├── SchemaManagerSparkApp.scala │ ├── SparkManager.scala │ └── GenericMainClass.scala ├── .jvmopts ├── docs └── img │ ├── logo │ ├── darwin-icon.ai │ └── darwin-icon.png │ ├── darwin_interaction.jpg │ ├── darwin_lazy_schema.jpg │ ├── darwin_lazy_cached_schema.jpg │ └── darwin_eager_cached_schema.jpg ├── rest └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ ├── it │ │ └── agilelab │ │ │ └── darwin │ │ │ └── connector │ │ │ └── rest │ │ │ ├── RestConnectorCreator.scala │ │ │ ├── RestConnectorOptions.scala │ │ │ ├── JsonProtocol.scala │ │ │ └── RestConnector.scala │ │ └── scalaj │ │ └── http │ │ ├── OAuth.scala │ │ └── DigestAuth.scala │ └── test │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── rest │ └── RestConnectorSuite.scala ├── hbase1 └── src │ └── main │ ├── resources │ └── META-INF │ │ └── services │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── hbase │ └── HBaseUtils.scala ├── hbase2 └── src │ └── main │ ├── resources │ └── META-INF │ │ └── services │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── hbase │ └── HBaseUtils.scala ├── mongo └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── mongo │ │ ├── ConfigurationKeys.scala │ │ ├── ConfigurationMongoModels.scala │ │ ├── MongoConnectorCreator.scala │ │ └── MongoConnector.scala │ └── test │ ├── resources │ ├── mongo.conf │ └── mongomock.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── mongo │ └── MongoConnectorTest.scala ├── mock-connector └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── mock │ │ ├── testclasses │ │ ├── MockClassChild.scala │ │ ├── MockClassAlone.scala │ │ └── MockClassParent.scala │ │ ├── MockConnectorCreator.scala │ │ ├── ConfigurationKeys.scala │ │ └── MockConnector.scala │ └── test │ ├── resources │ └── test │ │ ├── MockClassAlone.avsc │ │ └── MockClassParent.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── mock │ └── MockConnectorSpec.scala ├── multi-connector └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── multi │ │ └── MultiConnectorCreator.scala │ └── test │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── multi │ └── MultiConnectorSpec.scala ├── confluent └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── confluent │ │ ├── HoconToMap.scala │ │ ├── ConfluentConnectorCreator.scala │ │ ├── ConfluentConnectorOptions.scala │ │ └── Main.scala │ └── test │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── confluent │ ├── ConfluentConnectorCreatorSuite.scala │ └── ConfluentConnectorSuite.scala ├── postgres └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── postgres │ │ ├── ConfigurationKeys.scala │ │ ├── PostgresConnectorCreator.scala │ │ ├── PostgresConnection.scala │ │ └── PostgresConnector.scala │ └── test │ ├── resources │ ├── postgres.properties │ └── postgresmock.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── postgres │ ├── Postgres2Mock.scala │ ├── PostgresMock.scala │ └── PostgresConnectorSuite.scala ├── mock-application └── src │ └── test │ ├── resources │ ├── application.conf │ ├── OneField.avsc │ ├── test │ │ ├── MockClassAlone.avsc │ │ └── MockClassParent.avsc │ └── MyNestedClass.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── app │ └── mock │ ├── classes │ ├── NotToBeRegisteredClass.scala │ ├── NewClass.scala │ ├── OneField.scala │ ├── MyTrait.scala │ ├── MyClass.scala │ ├── MyNestedAbstractClass.scala │ └── MyNestedClass.scala │ ├── TwoConnectorsSpec.scala │ ├── ManagerUtilsSuite.scala │ ├── LazyApplicationSuite.scala │ ├── CachedLazyApplicationSuite.scala │ └── CachedEagerApplicationSuite.scala ├── make.sh ├── publish.sh ├── common └── src │ ├── main │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ ├── manager │ │ ├── exception │ │ │ ├── DarwinException.scala │ │ │ └── ConnectorNotFoundException.scala │ │ ├── util │ │ │ ├── ConfigurationKeys.scala │ │ │ ├── ConfigUtil.scala │ │ │ └── ByteArrayUtils.scala │ │ └── SchemaPayloadPair.java │ │ └── common │ │ ├── Logging.scala │ │ ├── package.scala │ │ ├── ConnectorCreator.scala │ │ ├── JavaVersion.scala │ │ ├── DarwinConcurrentHashMap.scala │ │ ├── ConnectorFactory.scala │ │ ├── SchemaReader.scala │ │ └── compat │ │ └── package.scala │ └── test │ ├── resources │ └── test │ │ ├── MockClassAlone.avsc │ │ └── MockClassParent.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ ├── manager │ └── util │ │ ├── BigEndianAvroSingleObjectEncodingUtilsSpec.scala │ │ └── LittleEndianAvroSingleObjectEncodingUtilsSpec.scala │ └── common │ ├── CompatSpec.scala │ ├── DarwinConcurrentHashMapRunner.scala │ ├── DarwinTrieConcurrentHashMapSpec.scala │ ├── DarwinJava8ConcurrentHashMapSpec.scala │ └── DarwinConcurrentHashMapSpec.scala ├── rest-server └── src │ └── main │ ├── resources │ └── reference.conf │ ├── scala │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── server │ │ └── rest │ │ ├── Service.scala │ │ ├── Main.scala │ │ ├── JsonSupport.scala │ │ ├── DarwinService.scala │ │ └── HttpApp.scala │ └── postman │ └── darwinrest.postman_collection.json ├── .scalafmt.conf ├── .github └── workflows │ └── ci.yml ├── bump-version.sh ├── .gitignore └── CONTRIBUTING.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.9.8 2 | -------------------------------------------------------------------------------- /hbase/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | isSecure: false -------------------------------------------------------------------------------- /core/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | createTable: false 2 | -------------------------------------------------------------------------------- /spark-application/src/dist/conf/application.conf: -------------------------------------------------------------------------------- 1 | spark.yarn.maxAppAttempts: 1 -------------------------------------------------------------------------------- /.jvmopts: -------------------------------------------------------------------------------- 1 | -Dfile.encoding=UTF-8 2 | -Xms1024m 3 | -Xmx1024m 4 | -Xss4M 5 | -XX:ReservedCodeCacheSize=128m -------------------------------------------------------------------------------- /docs/img/logo/darwin-icon.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/HEAD/docs/img/logo/darwin-icon.ai -------------------------------------------------------------------------------- /docs/img/darwin_interaction.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/HEAD/docs/img/darwin_interaction.jpg -------------------------------------------------------------------------------- /docs/img/darwin_lazy_schema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/HEAD/docs/img/darwin_lazy_schema.jpg -------------------------------------------------------------------------------- /docs/img/logo/darwin-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/HEAD/docs/img/logo/darwin-icon.png -------------------------------------------------------------------------------- /docs/img/darwin_lazy_cached_schema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/HEAD/docs/img/darwin_lazy_cached_schema.jpg -------------------------------------------------------------------------------- /docs/img/darwin_eager_cached_schema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/HEAD/docs/img/darwin_eager_cached_schema.jpg -------------------------------------------------------------------------------- /rest/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.rest.RestConnectorCreator 2 | -------------------------------------------------------------------------------- /hbase1/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.hbase.HBaseConnectorCreator 2 | -------------------------------------------------------------------------------- /hbase2/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.hbase.HBaseConnectorCreator 2 | -------------------------------------------------------------------------------- /mongo/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.mongo.MongoConnectorCreator 2 | -------------------------------------------------------------------------------- /mock-connector/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.mock.MockConnectorCreator 2 | -------------------------------------------------------------------------------- /multi-connector/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.multi.MultiConnectorCreator -------------------------------------------------------------------------------- /confluent/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.confluent.ConfluentConnectorCreator 2 | -------------------------------------------------------------------------------- /core/src/main/java/it/agilelab/darwin/annotations/AvroSerde.java: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.annotations; 2 | 3 | public @interface AvroSerde { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /postgres/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.postgres.PostgresConnectorCreator 2 | -------------------------------------------------------------------------------- /project/plugin.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") 2 | addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12") 3 | -------------------------------------------------------------------------------- /mock-application/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | type: cached_eager 2 | connector: "mock" 3 | resources: ["test/MockClassParent.avsc", "test/MockClassAlone.avsc"] -------------------------------------------------------------------------------- /mock-application/src/test/resources/OneField.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"OneField","namespace":"it.agilelab.darwin.app.mock.classes","fields":[{"name":"one","type":"int"}]} -------------------------------------------------------------------------------- /hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBase2Mock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | case class HBase2Mock(one: Boolean, two: Long) 4 | -------------------------------------------------------------------------------- /postgres/src/test/resources/postgres.properties: -------------------------------------------------------------------------------- 1 | host = localhost:5432 2 | db = postgres 3 | username = postgres 4 | password = mysecretpassword 5 | table = schema_registry 6 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Ignored.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark.classes 2 | 3 | case class Ignored(name: String) 4 | -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sbt -v clean scalastyle +test +doc darwin-hbase2-connector/clean darwin-hbase2-connector/scalastyle +darwin-hbase2-connector/test +darwin-hbase2-connector/doc 3 | -------------------------------------------------------------------------------- /mongo/src/test/resources/mongo.conf: -------------------------------------------------------------------------------- 1 | username = "mongo" 2 | password = "mongo" 3 | host = ["localhost:12345"] 4 | database = "test" 5 | collection = "collection_test" 6 | timeout = 5000 7 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/NotToBeRegisteredClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | case class NotToBeRegisteredClass() 4 | -------------------------------------------------------------------------------- /postgres/src/test/scala/it/agilelab/darwin/connector/postgres/Postgres2Mock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | case class Postgres2Mock(one: Boolean, two: Long) 4 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/NewClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | case class NewClass(one: Long, two: String, three: Int) 4 | -------------------------------------------------------------------------------- /hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBaseMock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | case class HBaseMock(one: Int, two: String, three: Long, four: HBase2Mock) 4 | -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CI_RELEASE='+publishSigned;+darwin-hbase2-connector/publishSigned' 3 | export CI_SNAPSHOT_RELEASE='+publish;+darwin-hbase2-connector/publish' 4 | sbt -v ci-release 5 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/exception/DarwinException.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.exception 2 | 3 | class DarwinException(message: String) extends RuntimeException(message) 4 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassChild.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock.testclasses 2 | 3 | case class MockClassChild(twoOne: Long, twoTwo: String) 4 | -------------------------------------------------------------------------------- /postgres/src/test/scala/it/agilelab/darwin/connector/postgres/PostgresMock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | case class PostgresMock(one: Int, two: String, three: Long, four: Postgres2Mock) 4 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/OneField.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class OneField(one: Int) 7 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassAlone.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock.testclasses 2 | 3 | case class MockClassAlone(fry: String, bender: Long, leela: Int, zoidberg: Boolean) 4 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyTrait.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | trait MyTrait { 7 | def value: Int 8 | } 9 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassParent.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock.testclasses 2 | 3 | case class MockClassParent(one: Int, two: String, three: Long, four: MockClassChild) 4 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Food.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class Food(name: String, allergen: Boolean) 7 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Menu.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class Menu(name: String, items: Seq[MenuItem]) 7 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Price.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class Price(amount: Float, discount: Float) 7 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Order.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class Order(entries: Seq[(MenuItem, Int)], table: String) 7 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class MyClass(override val value: Int, otherVale: Long) extends MyTrait 7 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/MenuItem.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class MenuItem(name: String, price: Price, components: Seq[Food]) 7 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/Logging.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import org.slf4j.{ Logger, LoggerFactory } 4 | 5 | trait Logging { 6 | private lazy val _log = LoggerFactory.getLogger(getClass.getName) 7 | 8 | def log: Logger = _log 9 | } 10 | -------------------------------------------------------------------------------- /common/src/test/resources/test/MockClassAlone.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]} 2 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyNestedAbstractClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | abstract class MyNestedAbstractClass[T <: MyTrait](id: Int, myClass: T) 7 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/manager/util/BigEndianAvroSingleObjectEncodingUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.nio.ByteOrder 4 | 5 | class BigEndianAvroSingleObjectEncodingUtilsSpec extends AvroSingleObjectEncodingUtilsSpec(ByteOrder.BIG_ENDIAN) 6 | -------------------------------------------------------------------------------- /mock-application/src/test/resources/test/MockClassAlone.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]} 2 | -------------------------------------------------------------------------------- /mock-connector/src/test/resources/test/MockClassAlone.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]} 2 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/manager/util/LittleEndianAvroSingleObjectEncodingUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.nio.ByteOrder 4 | 5 | class LittleEndianAvroSingleObjectEncodingUtilsSpec extends AvroSingleObjectEncodingUtilsSpec(ByteOrder.LITTLE_ENDIAN) 6 | -------------------------------------------------------------------------------- /project/Versions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * @author andreaL 3 | */ 4 | object Versions { 5 | val scala_211 = "2.11.12" 6 | val scala_210 = "2.10.7" 7 | val scala = "2.12.13" 8 | val scala_213 = "2.13.5" 9 | val crossScalaVersions = Seq(scala_210, scala_211, scala, scala_213) 10 | } 11 | -------------------------------------------------------------------------------- /rest-server/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | akka { 2 | loggers = ["akka.event.slf4j.Slf4jLogger"] 3 | loglevel = "DEBUG" 4 | logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" 5 | } 6 | 7 | darwin { 8 | type = "lazy" 9 | connector = "mock" 10 | } 11 | 12 | darwin-rest{ 13 | interface = "localhost" 14 | port = 8080 15 | } -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyNestedClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class MyNestedClass(id: Int, myClass: MyClass, my2Class: Map[String, MyClass]) 7 | extends MyNestedAbstractClass[MyClass](id, myClass) 8 | -------------------------------------------------------------------------------- /mock-application/src/test/resources/MyNestedClass.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MyNestedClass","namespace":"it.agilelab.darwin.app.mock.classes","fields":[{"name":"id","type":"int"},{"name":"myClass","type":{"type":"record","name":"MyClass","fields":[{"name":"value","type":"int"},{"name":"otherVale","type":"long"}]}},{"name":"my2Class","type":{"type":"map","values":"MyClass"}}]} -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/package.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin 2 | 3 | package object common { 4 | 5 | def using[A <: AutoCloseable, B](closeable: A)(f: A => B): B = { 6 | try { 7 | f(closeable) 8 | } finally { 9 | closeable.close() 10 | } 11 | } 12 | 13 | final val LONG_SIZE = 8 14 | final val INT_SIZE = 4 15 | 16 | } 17 | -------------------------------------------------------------------------------- /common/src/test/resources/test/MockClassParent.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]} 2 | -------------------------------------------------------------------------------- /hbase/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=WARN, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1} - %m%n 9 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/Service.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.actor.ActorSystem 4 | import akka.http.scaladsl.server.Route 5 | import akka.stream.ActorMaterializer 6 | 7 | trait Service { 8 | implicit val system: ActorSystem 9 | implicit val materializer: ActorMaterializer 10 | 11 | def route: Route 12 | } 13 | -------------------------------------------------------------------------------- /mock-application/src/test/resources/test/MockClassParent.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]} 2 | -------------------------------------------------------------------------------- /mock-connector/src/test/resources/test/MockClassParent.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]} 2 | -------------------------------------------------------------------------------- /postgres/src/main/scala/it/agilelab/darwin/connector/postgres/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | object ConfigurationKeys { 4 | val TABLE: String = "table" 5 | val HOST: String = "host" 6 | val DATABASE: String = "db" 7 | val USER: String = "username" 8 | val PASSWORD: String = "password" 9 | val MODE: String = "mode" 10 | } 11 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/exception/ConnectorNotFoundException.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.exception 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.manager.util.ConfigUtil 5 | 6 | class ConnectorNotFoundException(val config: Config) extends RuntimeException(s"Cannot find Darwin connector") { 7 | 8 | def confAsString(): String = ConfigUtil.printConfig(config) 9 | 10 | } 11 | -------------------------------------------------------------------------------- /hbase/src/main/scala/it/agilelab/darwin/connector/hbase/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | object ConfigurationKeys { 4 | val TABLE: String = "table" 5 | val NAMESPACE: String = "namespace" 6 | val HBASE_SITE: String = "hbaseSite" 7 | val CORE_SITE: String = "coreSite" 8 | val IS_SECURE: String = "isSecure" 9 | val PRINCIPAL: String = "principal" 10 | val KEYTAB_PATH: String = "keytabPath" 11 | } 12 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/util/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | object ConfigurationKeys { 4 | 5 | val CREATE_TABLE = "createTable" 6 | 7 | val CONNECTOR = "connector" 8 | 9 | val MANAGER_TYPE: String = "type" 10 | val ENDIANNESS: String = "endianness" 11 | val CACHED_EAGER: String = "cached_eager" 12 | val CACHED_LAZY: String = "cached_lazy" 13 | val LAZY: String = "lazy" 14 | } 15 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "2.7.2" 2 | project.git = true 3 | encoding = "UTF-8" 4 | maxColumn = 120 5 | align.preset = most 6 | continuationIndent.defnSite = 2 7 | assumeStandardLibraryStripMargin = true 8 | docstrings = ScalaDoc 9 | lineEndings = unix 10 | includeCurlyBraceInSelectChains = false 11 | danglingParentheses.preset = true 12 | spaces { 13 | inImportCurlyBraces = true 14 | } 15 | optIn.annotationNewlines = true 16 | 17 | rewrite.rules = [SortImports, SortModifiers] -------------------------------------------------------------------------------- /hbase1/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseUtils.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import org.apache.hadoop.hbase.{ HColumnDescriptor, HTableDescriptor, TableName } 4 | import org.apache.hadoop.hbase.client.Admin 5 | 6 | object HBaseUtils { 7 | def createTable(admin: Admin, tableName: TableName, columnFamily: Array[Byte]): Unit = { 8 | admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(columnFamily))) 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/MockConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator } 5 | 6 | class MockConnectorCreator extends ConnectorCreator { 7 | override def create(config: Config): Connector = new MockConnector(config) 8 | 9 | /** 10 | * @return the name of the Connector 11 | */ 12 | override def name(): String = "mock" 13 | } 14 | -------------------------------------------------------------------------------- /postgres/src/main/scala/it/agilelab/darwin/connector/postgres/PostgresConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator } 5 | 6 | class PostgresConnectorCreator extends ConnectorCreator { 7 | override def create(config: Config): Connector = new PostgresConnector(config) 8 | 9 | /** 10 | * @return the name of the Connector 11 | */ 12 | override def name(): String = "postgresql" 13 | } 14 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/SparkConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark 2 | 3 | trait SparkConfigurationKeys { 4 | val SPARK_APP_NAME = "spark.app.name" 5 | val SPARK_CORES = "spark.executor.cores" 6 | val PARALLELISM: String = "parallelism" 7 | val SPARK_DRIVER_CORES = "spark.driver.cores" 8 | val SPARK_EXECUTOR_INSTANCES = "spark.executor.instances" 9 | val SPARK_DEFAULT_PARALLELISM = "spark.default.parallelism" 10 | } 11 | 12 | object SparkConfigurationKeys extends SparkConfigurationKeys 13 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | object ConfigurationKeys { 4 | 5 | val USERNAME: String = "username" // the username 6 | val PASSWORD: String = "password" // the password 7 | val HOST: String = "host" // the hostname where you want to connect 8 | val DATABASE: String = "database" // the name of the database in which the user is defined 9 | val COLLECTION: String = "collection" // the collection name 10 | val TIMEOUT: String = "timeout" // the timeout max to wait the results 11 | 12 | } 13 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/CompatSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import org.scalatest.flatspec.AnyFlatSpec 4 | import org.scalatest.matchers.should.Matchers 5 | import compat._ 6 | 7 | class CompatSpec extends AnyFlatSpec with Matchers { 8 | 9 | "RightBiasedEither" should "map correctly on left side" in { 10 | Left[Int, String](3).rightMap { 11 | "Hello" + _ 12 | } shouldBe Left[Int, String](3) 13 | } 14 | 15 | it should "map correctly on right side" in { 16 | Right[Int, String]("Darwin").rightMap { 17 | "Hello " + _ 18 | } shouldBe Right[Int, String]("Hello Darwin") 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /hbase2/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseUtils.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import org.apache.hadoop.hbase.TableName 4 | import org.apache.hadoop.hbase.client.{ Admin, ColumnFamilyDescriptorBuilder, TableDescriptorBuilder } 5 | 6 | object HBaseUtils { 7 | def createTable(admin: Admin, tableName: TableName, columnFamily: Array[Byte]): Unit = { 8 | admin.createTable( 9 | TableDescriptorBuilder 10 | .newBuilder(tableName) 11 | .setColumnFamily( 12 | ColumnFamilyDescriptorBuilder.newBuilder(columnFamily).build() 13 | ) 14 | .build() 15 | ) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /core/src/main/java/it/agilelab/darwin/manager/IdSchemaPair.java: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager; 2 | 3 | import org.apache.avro.Schema; 4 | 5 | public class IdSchemaPair { 6 | private final long id; 7 | private final Schema schema; 8 | 9 | private IdSchemaPair(long id, Schema schema) { 10 | this.id = id; 11 | this.schema = schema; 12 | } 13 | 14 | public long getId() { 15 | return id; 16 | } 17 | 18 | public Schema getSchema() { 19 | return schema; 20 | } 21 | 22 | public static IdSchemaPair create(long id, Schema schema) { 23 | return new IdSchemaPair(id, schema); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /mongo/src/test/resources/mongomock.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "PostgresMock", 4 | "namespace" : "it.agilelab.darwin.connector.postgres", 5 | "fields" : [ { 6 | "name" : "one", 7 | "type" : "int" 8 | }, { 9 | "name" : "two", 10 | "type" : "string" 11 | }, { 12 | "name" : "three", 13 | "type" : "long" 14 | }, { 15 | "name" : "four", 16 | "type" : { 17 | "type" : "record", 18 | "name" : "Postgres2Mock", 19 | "fields" : [ { 20 | "name" : "one", 21 | "type" : "boolean" 22 | }, { 23 | "name" : "two", 24 | "type" : "long" 25 | } ] 26 | } 27 | } ] 28 | } 29 | -------------------------------------------------------------------------------- /postgres/src/test/resources/postgresmock.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "PostgresMock", 4 | "namespace" : "it.agilelab.darwin.connector.postgres", 5 | "fields" : [ { 6 | "name" : "one", 7 | "type" : "int" 8 | }, { 9 | "name" : "two", 10 | "type" : "string" 11 | }, { 12 | "name" : "three", 13 | "type" : "long" 14 | }, { 15 | "name" : "four", 16 | "type" : { 17 | "type" : "record", 18 | "name" : "Postgres2Mock", 19 | "fields" : [ { 20 | "name" : "one", 21 | "type" : "boolean" 22 | }, { 23 | "name" : "two", 24 | "type" : "long" 25 | } ] 26 | } 27 | } ] 28 | } 29 | -------------------------------------------------------------------------------- /hbase/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging } 5 | 6 | class HBaseConnectorCreator extends ConnectorCreator with Logging { 7 | override def create(config: Config): Connector = { 8 | log.debug("creating the HBase connector") 9 | val connector: Connector = HBaseConnector.instance(config) 10 | log.debug("HBase connector created") 11 | connector 12 | } 13 | 14 | /** 15 | * @return the name of the Connector 16 | */ 17 | override def name(): String = "hbase" 18 | } 19 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/CachedEagerAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | 5 | import it.agilelab.darwin.common.Connector 6 | import org.apache.avro.Schema 7 | 8 | /** 9 | * Implementation of CachedAvroSchemaManager that loads all the schemas into the cache at startup and doesn't 10 | * perform any other accesses to the storage: each retrieve is performed onto the cache. 11 | */ 12 | class CachedEagerAvroSchemaManager(connector: Connector, endianness: ByteOrder) 13 | extends CachedAvroSchemaManager(connector, endianness) { 14 | override def getSchema(id: Long): Option[Schema] = cache.getSchema(id) 15 | } 16 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/ConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import com.typesafe.config.Config 4 | 5 | /** 6 | * A generic interface used to create the [[Connector]] found in the classpath. 7 | */ 8 | trait ConnectorCreator { 9 | 10 | /** 11 | * @return the name of the Connector 12 | */ 13 | def name(): String 14 | 15 | /** 16 | * This method should be overridden in each connector module returning its implementation. 17 | * 18 | * @param config configuration that will be used to create the correct implementation of [[Connector]] 19 | * @return the specific instance of [[Connector]] 20 | */ 21 | def create(config: Config): Connector 22 | } 23 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/SchemaPayloadPair.java: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager; 2 | 3 | import org.apache.avro.Schema; 4 | 5 | public class SchemaPayloadPair { 6 | private final Schema schema; 7 | private final byte[] payload; 8 | 9 | private SchemaPayloadPair(Schema schema, byte[] payload) { 10 | this.schema = schema; 11 | this.payload = payload; 12 | } 13 | 14 | public Schema getSchema() { 15 | return schema; 16 | } 17 | 18 | public byte[] getPayload() { 19 | return payload; 20 | } 21 | 22 | public static SchemaPayloadPair create(Schema schema, byte[] payload) { 23 | return new SchemaPayloadPair(schema, payload); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | tags: 7 | - '*' 8 | pull_request: {} 9 | jobs: 10 | ci: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | - uses: coursier/cache-action@v6 17 | - uses: coursier/setup-action@v1 18 | with: 19 | jvm: zulu:8.0.402 20 | - run: ./make.sh && ./publish.sh 21 | env: 22 | SBT_NATIVE_CLIENT: false 23 | PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} 24 | PGP_SECRET: ${{ secrets.PGP_SECRET }} 25 | SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} 26 | SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} 27 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | object ConfigurationKeys { 4 | val FILES = "files" 5 | val RESOURCES = "resources" 6 | val MODE = "mode" 7 | val STRICT = "strict" 8 | val PERMISSIVE = "permissive" 9 | 10 | sealed trait Mode 11 | 12 | object Mode { 13 | def parse(string: String): Mode = { 14 | string.toLowerCase match { 15 | case STRICT => Strict 16 | case PERMISSIVE => Permissive 17 | case other: String => throw new IllegalArgumentException(s"Unknown mode: $other") 18 | } 19 | } 20 | } 21 | 22 | case object Strict extends Mode 23 | 24 | case object Permissive extends Mode 25 | 26 | } 27 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging } 5 | 6 | class RestConnectorCreator extends ConnectorCreator with Logging { 7 | 8 | override def create(config: Config): Connector = { 9 | log.debug("creating rest connector") 10 | 11 | val restOptions = RestConnectorOptions.fromConfig(config) 12 | log.info("rest options are {}", restOptions) 13 | 14 | val rest = new RestConnector(restOptions, config) 15 | log.debug("created rest connector") 16 | rest 17 | } 18 | 19 | /** 20 | * @return the name of the Connector 21 | */ 22 | override def name(): String = "rest" 23 | } 24 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/ConfigurationMongoModels.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | import scala.concurrent.duration.Duration 4 | 5 | object ConfigurationMongoModels { 6 | 7 | sealed trait BaseMongoConfig { 8 | def database: String 9 | def collection: String 10 | def timeout: Duration 11 | } 12 | 13 | case class MongoConfig( 14 | database: String, 15 | collection: String, 16 | timeout: Duration 17 | ) extends BaseMongoConfig 18 | 19 | case class MongoConnectorConfig( 20 | username: String, 21 | password: String, 22 | database: String, 23 | collection: String, 24 | hosts: Seq[String], 25 | timeout: Duration 26 | ) extends BaseMongoConfig 27 | 28 | val DEFAULT_DURATION = 5000 29 | 30 | } 31 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/Main.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.actor.ActorSystem 4 | import akka.stream.ActorMaterializer 5 | import com.typesafe.config.ConfigFactory 6 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory 7 | 8 | object Main { 9 | 10 | def main(args: Array[String]): Unit = { 11 | 12 | implicit val actorSystem: ActorSystem = ActorSystem() 13 | implicit val materializer: ActorMaterializer = ActorMaterializer() 14 | 15 | val config = ConfigFactory.load() 16 | val schemaManagerConfig = config.getConfig("darwin") 17 | val restConfig = config.getConfig("darwin-rest") 18 | val schemaManager = AvroSchemaManagerFactory.initialize(schemaManagerConfig) 19 | 20 | HttpApp(restConfig, DarwinService(schemaManager)).run() 21 | 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnectorOptions.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import java.net.URI 4 | 5 | import com.typesafe.config.Config 6 | 7 | case class RestConnectorOptions(protocol: String, host: String, port: Int, basePath: String) { 8 | def endpoint(path: String): String = 9 | URI.create(s"$protocol://$host:$port").resolve(basePath).resolve(path).toString 10 | } 11 | 12 | object RestConnectorOptions { 13 | 14 | private val PROTOCOL = "protocol" 15 | private val HOST = "host" 16 | private val PORT = "port" 17 | private val BASE_PATH = "basePath" 18 | 19 | def fromConfig(config: Config): RestConnectorOptions = 20 | RestConnectorOptions( 21 | config.getString(PROTOCOL), 22 | config.getString(HOST), 23 | config.getInt(PORT), 24 | config.getString(BASE_PATH) 25 | ) 26 | } 27 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/util/ConfigUtil.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.nio.ByteOrder 4 | 5 | import com.typesafe.config.{ Config, ConfigRenderOptions } 6 | 7 | object ConfigUtil { 8 | def printConfig(conf: Config): String = { 9 | conf.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false)) 10 | } 11 | 12 | def printSmallConfig(conf: Config): String = { 13 | conf.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false)) 14 | } 15 | 16 | def stringToEndianness(string: String): ByteOrder = { 17 | string.toUpperCase match { 18 | case "BIG_ENDIAN" => ByteOrder.BIG_ENDIAN 19 | case "LITTLE_ENDIAN" => ByteOrder.LITTLE_ENDIAN 20 | case _ => throw new IllegalArgumentException(s"Unknown endianness: $string") 21 | } 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/JavaVersion.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | object JavaVersion { 4 | 5 | /** 6 | * @return the JVM version in use, It returns an Integer indicating the major version i 7 | */ 8 | def current(): Int = { 9 | val propertyValue = System.getProperty("java.version") 10 | parseJavaVersion(propertyValue) 11 | } 12 | 13 | /** 14 | * @return the JVM version represented by the input string, It returns an Integer indicating the major version i 15 | */ 16 | def parseJavaVersion(propertyValue: String): Int = { 17 | val splits = propertyValue.split("\\.") 18 | if (propertyValue.startsWith("1.")) { 19 | splits(1).takeWhile(isDigit).toInt 20 | } else { 21 | splits(0).takeWhile(isDigit).toInt 22 | } 23 | } 24 | private val digits = ('0' to '9').toSet 25 | private def isDigit(c: Char): Boolean = { 26 | digits.contains(c) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/LazyAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | 5 | import it.agilelab.darwin.common.Connector 6 | import org.apache.avro.Schema 7 | 8 | /** 9 | * Implementation of AvroSchemaManager that performs all the operations directly on the storage (retrievals and 10 | * insertions). 11 | */ 12 | class LazyAvroSchemaManager(connector: Connector, endianness: ByteOrder) 13 | extends AvroSchemaManager(connector, endianness) { 14 | 15 | override def getSchema(id: Long): Option[Schema] = connector.findSchema(id) 16 | 17 | override def registerAll(schemas: Seq[Schema]): Seq[(Long, Schema)] = { 18 | val schemasWithIds = schemas.map(s => getId(s) -> s) 19 | connector.insert(schemasWithIds) 20 | schemasWithIds 21 | } 22 | 23 | override def reload(): AvroSchemaManager = this 24 | 25 | override def getAll: Seq[(Long, Schema)] = connector.fullLoad() 26 | } 27 | -------------------------------------------------------------------------------- /postgres/src/main/scala/it/agilelab/darwin/connector/postgres/PostgresConnection.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | import java.sql.{ Connection, DriverManager } 4 | 5 | import com.typesafe.config.Config 6 | 7 | trait PostgresConnection { 8 | 9 | private var connectionUrl: String = "" 10 | private val driverName: String = "org.postgresql.Driver" 11 | 12 | protected def setConnectionConfig(config: Config) = { 13 | val db = config.getString(ConfigurationKeys.DATABASE) 14 | val host = config.getString(ConfigurationKeys.HOST) 15 | val user = config.getString(ConfigurationKeys.USER) 16 | val password = config.getString(ConfigurationKeys.PASSWORD) 17 | connectionUrl = s"jdbc:postgresql://$host/$db?user=$user&password=$password" 18 | } 19 | 20 | protected def getConnection: Connection = { 21 | Class.forName(driverName) 22 | val connection: Connection = DriverManager.getConnection(connectionUrl) 23 | connection 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/CachedLazyAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | 5 | import it.agilelab.darwin.common.Connector 6 | import org.apache.avro.Schema 7 | 8 | /** 9 | * Implementation of CachedAvroSchemaManager that loads all the schemas into the cache at startup and perform 10 | * all the retrieves onto the cache; an access to the storage is performed only if there is a cache miss. 11 | */ 12 | class CachedLazyAvroSchemaManager(connector: Connector, endianness: ByteOrder) 13 | extends CachedAvroSchemaManager(connector, endianness) { 14 | 15 | override def getSchema(id: Long): Option[Schema] = { 16 | cache.getSchema(id).orElse { 17 | val schema: Option[Schema] = connector.findSchema(id) 18 | schema.foreach(s => _cache.set(Some(cache.insert(Seq(id -> s))))) 19 | schema 20 | } 21 | } 22 | 23 | override def getAll: Seq[(Long, Schema)] = { 24 | _cache.set(Some(cache.insert(connector.fullLoad()))) 25 | cache.getAll 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinConcurrentHashMapRunner.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.{ DarwinJava8ConcurrentHashMap, DarwinTrieConcurrentHashMap } 4 | import org.scalatest.flatspec.AnyFlatSpec 5 | import org.scalatest.matchers.should.Matchers 6 | import org.scalatest.{ BeforeAndAfter, BeforeAndAfterAll } 7 | 8 | sealed private[common] class DarwinConcurrentHashMapRunner[K, V](sut: () => DarwinConcurrentHashMap[K, V]) 9 | extends AnyFlatSpec 10 | with Matchers 11 | with BeforeAndAfterAll 12 | with BeforeAndAfter { 13 | 14 | protected class DefaultException extends Exception("Side effect evaluated!") 15 | 16 | protected def anEmptySut: DarwinConcurrentHashMap[K, V] = sut() 17 | 18 | } 19 | 20 | abstract class DarwinJava8ConcurrentHashMapRunner[K, V] 21 | extends DarwinConcurrentHashMapRunner[K, V](() => new DarwinJava8ConcurrentHashMap) 22 | abstract class DarwinJava7ConcurrentHashMapRunner[K, V] 23 | extends DarwinConcurrentHashMapRunner[K, V](() => new DarwinTrieConcurrentHashMap) 24 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/HoconToMap.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.{ ConfigObject, ConfigValue } 4 | import it.agilelab.darwin.common.compat.{ JMapConverter, SetConverter } 5 | 6 | import scala.collection.mutable 7 | 8 | private[confluent] object HoconToMap { 9 | 10 | private def walk(root: ConfigValue): Map[String, AnyRef] = { 11 | val result = mutable.HashMap.empty[String, AnyRef] 12 | 13 | def doWalk(path: String, r: ConfigValue): Unit = { 14 | 15 | r match { 16 | case o: ConfigObject => 17 | o.keySet().toScala().foreach { key => 18 | val nextPath = if (path.isEmpty) key else path + "." + key 19 | doWalk(nextPath, o.get(key)) 20 | } 21 | case _ => 22 | result += path -> r.unwrapped() 23 | } 24 | } 25 | 26 | doWalk("", root) 27 | 28 | result.toMap 29 | } 30 | 31 | def convert(configValue: ConfigValue): java.util.Map[String, AnyRef] = { 32 | walk(configValue).toJava() 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.Config 4 | import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient 5 | import it.agilelab.darwin.common.compat._ 6 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging } 7 | 8 | class ConfluentConnectorCreator extends ConnectorCreator with Logging { 9 | 10 | override def create(config: Config): Connector = { 11 | log.debug("creating confluent connector") 12 | 13 | val confluentOptions = ConfluentConnectorOptions.fromConfig(config) 14 | log.info("confluent options are {}", confluentOptions) 15 | 16 | val client = new CachedSchemaRegistryClient( 17 | confluentOptions.endpoints.toJavaList(), 18 | confluentOptions.maxCachedSchemas, 19 | confluentOptions.config 20 | ) 21 | 22 | val rest = new ConfluentConnector(confluentOptions, client) 23 | log.debug("created confluent connector") 24 | rest 25 | } 26 | 27 | /** 28 | * @return the name of the Connector 29 | */ 30 | override def name(): String = "confluent" 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaCacheFingerprint.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import it.agilelab.darwin.common.Logging 4 | import org.apache.avro.Schema 5 | 6 | /** 7 | * Implementation of [[AvroSchemaCache]] that uses Fingerprint64 as IDs. 8 | * 9 | * @param schemas a sequence of (ID, schema) used to initialize the cache values 10 | */ 11 | case class AvroSchemaCacheFingerprint(schemas: Seq[(Long, Schema)], fingerPrinter: Schema => Long) 12 | extends AvroSchemaCache(schemas) 13 | with Logging { 14 | log.debug(s"initialization of the cache with ${schemas.size} schemas") 15 | private val _table: Map[Long, Schema] = schemas.toMap 16 | log.debug("cache initialized") 17 | 18 | override def getSchema(id: Long): Option[Schema] = _table.get(id) 19 | 20 | override def contains(schema: Schema): (Boolean, Long) = { 21 | val id = fingerPrinter(schema) 22 | _table.contains(id) -> id 23 | } 24 | 25 | override def insert(values: Seq[(Long, Schema)]): AvroSchemaCache = 26 | AvroSchemaCacheFingerprint(_table.toSeq ++ values, fingerPrinter) 27 | 28 | /** 29 | * Retrieves all registered schemas 30 | * 31 | * @return A Sequence of (ID, Schema) 32 | */ 33 | override def getAll: Seq[(Long, Schema)] = _table.toSeq 34 | } 35 | -------------------------------------------------------------------------------- /bump-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 2 ]; then 4 | echo "Illegal number of parameters, you need to pass two parameters" 5 | exit 1 6 | fi 7 | 8 | unameOut="$(uname -s)" 9 | case "${unameOut}" in 10 | Linux*) machine=Linux;; 11 | Darwin*) machine=Mac;; 12 | *) machine=UNKNOWN 13 | esac 14 | 15 | if [ "$machine" = 'UNKNOWN' ]; then 16 | echo "Unknown os... aborting" 17 | exit 2 18 | fi 19 | 20 | echo "Running on $machine.." 21 | 22 | OLD_VERSION=$1 23 | NEW_VERSION=$2 24 | FILES_TO_CHANGE=$(git grep -l "$OLD_VERSION" | grep -v ".*\.ai\|.*\.svg\|.*\.xml") # there is an ai file that always matches... 25 | 26 | if [ -z "$FILES_TO_CHANGE" ]; then 27 | echo "No files to change..." 28 | exit 0 29 | fi 30 | 31 | echo "Bumping from version $OLD_VERSION to version $NEW_VERSION" 32 | echo "Editing the following files:" 33 | echo "" 34 | echo "$FILES_TO_CHANGE" 35 | echo "----------------------------" 36 | 37 | while IFS= read -r line; do 38 | case "${machine}" in 39 | Linux*) sed -i "s/${OLD_VERSION}/${NEW_VERSION}/g" $line;; 40 | Mac*) sed -i '' -e "s/${OLD_VERSION}/${NEW_VERSION}/g" $line;; 41 | esac 42 | git add $line 43 | done <<< "$FILES_TO_CHANGE" 44 | 45 | 46 | echo "Press enter to commit:" 47 | read 48 | 49 | git commit -e -m "Bump version to $NEW_VERSION" 50 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorOptions.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | 6 | case class ConfluentConnectorOptions( 7 | endpoints: List[String], 8 | config: java.util.Map[String, AnyRef], 9 | maxCachedSchemas: Int 10 | ) 11 | 12 | object ConfluentConnectorOptions { 13 | 14 | val ENDPOINTS_CONFIG_KEY = "endpoints" 15 | val MAX_CACHED_SCHEMA_KEYS = "max-cached-schemas" 16 | 17 | def fromConfig(config: Config): ConfluentConnectorOptions = { 18 | 19 | if (!config.hasPath(ENDPOINTS_CONFIG_KEY)) { 20 | throw new IllegalArgumentException( 21 | s"Missing [${ENDPOINTS_CONFIG_KEY}] configuration key for ${classOf[ConfluentConnector].getName}" 22 | ) 23 | } 24 | 25 | if (!config.hasPath(MAX_CACHED_SCHEMA_KEYS)) { 26 | throw new IllegalArgumentException( 27 | s"Missing [${MAX_CACHED_SCHEMA_KEYS}] configuration key for ${classOf[ConfluentConnector].getName}" 28 | ) 29 | } 30 | 31 | val endpoints = config.getStringList(ENDPOINTS_CONFIG_KEY).toScala().toList 32 | val maxCachedSchemas = config.getInt(MAX_CACHED_SCHEMA_KEYS) 33 | val other = config.root() 34 | 35 | ConfluentConnectorOptions(endpoints, HoconToMap.convert(other), maxCachedSchemas) 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/JsonProtocol.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import java.io.InputStream 4 | 5 | import org.apache.avro.Schema 6 | import org.codehaus.jackson.map.ObjectMapper 7 | import org.codehaus.jackson.node.JsonNodeFactory 8 | import it.agilelab.darwin.common.compat._ 9 | 10 | trait JsonProtocol { 11 | val objectMapper = new ObjectMapper() 12 | 13 | def toJson(schemas: Seq[(Long, Schema)]): String = { 14 | 15 | val data = schemas.map { case (_, schema) => 16 | objectMapper.readTree(schema.toString) 17 | }.foldLeft(JsonNodeFactory.instance.arrayNode()) { case (array, node) => 18 | array.add(node) 19 | array 20 | } 21 | 22 | objectMapper.writeValueAsString(data) 23 | } 24 | 25 | def toSeqOfIdSchema(in: InputStream): Seq[(Long, Schema)] = { 26 | val node = objectMapper.readTree(in) 27 | 28 | node.getElements 29 | .toScala() 30 | .map { node => 31 | val id = node.get("id").asText().toLong 32 | val schemaNode = node.get("schema") 33 | 34 | val schemaToString = objectMapper.writeValueAsString(schemaNode) 35 | 36 | val parser = new Schema.Parser() 37 | 38 | val schema = parser.parse(schemaToString) 39 | 40 | (id, schema) 41 | } 42 | .toVector 43 | } 44 | 45 | def toSchema(in: InputStream): Schema = { 46 | val parser = new Schema.Parser() 47 | parser.parse(in) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/Main.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import java.util.Collections 4 | 5 | import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient 6 | import it.agilelab.darwin.common.compat._ 7 | import org.apache.avro.{ Schema, SchemaBuilder } 8 | 9 | object Main { 10 | def main(args: Array[String]): Unit = { 11 | // to run this main https://github.com/confluentinc/cp-all-in-one/blob/6.0.0-post/cp-all-in-one/docker-compose.yml 12 | // TODO make this main meaningful 13 | val maxSchemas = 1000 14 | val options = ConfluentConnectorOptions(List("http://localhost:8081"), Collections.emptyMap(), maxSchemas) 15 | 16 | val client = new CachedSchemaRegistryClient( 17 | options.endpoints.toJavaList(), 18 | options.maxCachedSchemas, 19 | options.config 20 | ) 21 | val connector = new ConfluentConnector(options, client) 22 | 23 | connector.fullLoad().foreach(println) 24 | 25 | val expected: Schema = SchemaBuilder 26 | .record("myrecord") 27 | .namespace("it.agilelab.record") 28 | .fields() 29 | .requiredString("myfield") 30 | .optionalString("ciccio") 31 | .endRecord() 32 | 33 | expected.addProp("x-darwin-subject", "prova2-value": AnyRef) 34 | 35 | val id = connector.fingerprint(expected) 36 | 37 | connector.insert(Seq((id, expected))) 38 | 39 | connector.fullLoad().foreach(println) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/TwoConnectorsSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import it.agilelab.darwin.common.ConnectorFactory 5 | import it.agilelab.darwin.connector.hbase.HBaseConnectorCreator 6 | import it.agilelab.darwin.connector.mock.MockConnectorCreator 7 | import it.agilelab.darwin.connector.postgres.PostgresConnectorCreator 8 | import it.agilelab.darwin.manager.util.ConfigurationKeys 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | import org.scalatest.matchers.should.Matchers 11 | 12 | class TwoConnectorsSpec extends AnyFlatSpec with Matchers { 13 | it should "have both HBase and Postgresql available" in { 14 | ConnectorFactory.creators().map(_.getClass) should contain theSameElementsAs ( 15 | classOf[HBaseConnectorCreator] :: classOf[PostgresConnectorCreator] :: classOf[MockConnectorCreator] :: Nil 16 | ) 17 | } 18 | 19 | it should "choose HBase connector over Postgresql one" in { 20 | val config = ConfigFactory.parseString(s"""${ConfigurationKeys.CONNECTOR}: hbase""") 21 | ConnectorFactory.creator(config).map(_.getClass) should be(Some(classOf[HBaseConnectorCreator])) 22 | } 23 | 24 | it should "choose Postgresql connector over HBase one" in { 25 | val config = ConfigFactory.parseString(s"""${ConfigurationKeys.CONNECTOR}: postgresql""") 26 | ConnectorFactory.creator(config).map(_.getClass) should be(Some(classOf[PostgresConnectorCreator])) 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinTrieConcurrentHashMapSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.DarwinTrieConcurrentHashMap 4 | 5 | class DarwinTrieConcurrentHashMapSpec extends DarwinJava7ConcurrentHashMapRunner[String, Int] { 6 | 7 | private def defaultWithSideEffect: Int = throw new DefaultException 8 | 9 | private val aKey = "aKey" 10 | private val aValue = 1 11 | 12 | it should "not evaluate the default param when key found - getOrElse" in { 13 | val sut = anEmptySut 14 | 15 | sut.getOrElseUpdate(aKey, aValue) 16 | 17 | lazy val res = sut.getOrElse(aKey, defaultWithSideEffect) 18 | 19 | sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]] 20 | noException should be thrownBy res 21 | res shouldBe aValue 22 | } 23 | 24 | it should "evaluate the default param when key NOT found - getOrElse" in { 25 | val sut = anEmptySut 26 | sut.getOrElseUpdate(aKey, aValue) 27 | 28 | lazy val res = sut.getOrElse("anotherKey", defaultWithSideEffect) 29 | 30 | sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]] 31 | an[DefaultException] should be thrownBy res 32 | } 33 | 34 | it should "evaluate the default param when key is null - getOrElse" in { 35 | val sut = anEmptySut 36 | 37 | lazy val res = sut.getOrElse(null, defaultWithSideEffect) 38 | 39 | sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]] 40 | an[DefaultException] should be thrownBy res 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinJava8ConcurrentHashMapSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.DarwinJava8ConcurrentHashMap 4 | 5 | class DarwinJava8ConcurrentHashMapSpec extends DarwinJava8ConcurrentHashMapRunner[String, Int] { 6 | 7 | private def defaultWithSideEffect: Int = throw new DefaultException 8 | private val aKey = "aKey" 9 | private val aValue = 1 10 | 11 | it should "not evaluate the default param when key found - getOrElse" in { 12 | val sut = anEmptySut 13 | sut.getOrElseUpdate(aKey, aValue) 14 | 15 | lazy val res = sut.getOrElse(aKey, defaultWithSideEffect) 16 | 17 | sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]] 18 | noException should be thrownBy res 19 | res shouldBe aValue 20 | } 21 | 22 | it should "evaluate the default param when key NOT found - getOrElse" in { 23 | val sut = anEmptySut 24 | 25 | sut.getOrElseUpdate(aKey, aValue) 26 | 27 | lazy val res = sut.getOrElse("anotherKey", defaultWithSideEffect) 28 | 29 | sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]] 30 | an[DefaultException] should be thrownBy res 31 | } 32 | 33 | it should "not evaluate the default param when key is null - getOrElse" in { 34 | val sut = anEmptySut 35 | 36 | lazy val res = sut.getOrElse(null, defaultWithSideEffect) 37 | 38 | sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]] 39 | an[NullPointerException] should be thrownBy res 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaCache.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import org.apache.avro.Schema 4 | 5 | /** 6 | * Generic definition of the cache used by the manager to store the data loaded from the external storage. 7 | * @param schemas a sequence of (ID, schema) used to initialize the cache values 8 | */ 9 | abstract class AvroSchemaCache(schemas: Seq[(Long, Schema)]) { 10 | 11 | /** 12 | * Retrieves a registered schema for the input ID. 13 | * 14 | * @param id the Long ID of the schema 15 | * @return the Schema associated to the input ID 16 | */ 17 | def getSchema(id: Long): Option[Schema] 18 | 19 | /** 20 | * Tests if the input schema is contained inside the cache. 21 | * 22 | * @param schema a Schema that the cache could contain 23 | * @return a pair containing: a boolean that is true if the schema is contained in the cache and the ID of the 24 | * schema in any case 25 | */ 26 | def contains(schema: Schema): (Boolean, Long) 27 | 28 | /** 29 | * Creates a new instance of [[AvroSchemaCache]] with the original values plus the input ones. 30 | * 31 | * @param values new pair (ID, schema) to insert inside the cache 32 | * @return a new instance of [[AvroSchemaCache]] containing the new values in addition to the original ones. 33 | */ 34 | def insert(values: Seq[(Long, Schema)]): AvroSchemaCache 35 | 36 | /** 37 | * Retrieves all registered schemas 38 | * 39 | * @return A Sequence of (ID, Schema) 40 | */ 41 | def getAll: Seq[(Long, Schema)] 42 | } 43 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnector.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.Connector 5 | import org.apache.avro.Schema 6 | import scalaj.http.Http 7 | 8 | class RestConnector(options: RestConnectorOptions, config: Config) extends Connector with JsonProtocol { 9 | 10 | override def fullLoad(): Seq[(Long, Schema)] = { 11 | Http(options.endpoint("schemas/")).execute(toSeqOfIdSchema).body 12 | } 13 | 14 | override def insert(schemas: Seq[(Long, Schema)]): Unit = { 15 | 16 | val response = Http(options.endpoint("schemas/")) 17 | .header("Content-Type", "application/json") 18 | .postData(toJson(schemas)) 19 | .asString 20 | 21 | if (response.isError) { 22 | throw new Exception(response.body) 23 | } 24 | 25 | } 26 | 27 | override def createTable(): Unit = {} 28 | 29 | override def tableExists(): Boolean = true 30 | 31 | override def tableCreationHint(): String = "" 32 | 33 | override def findSchema(id: Long): Option[Schema] = { 34 | 35 | val response = Http(options.endpoint(s"schemas/$id")).execute(toSchema) 36 | 37 | if (response.code == 404) { 38 | None 39 | } else { 40 | Some(response.body) 41 | } 42 | } 43 | 44 | /** 45 | * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id). 46 | * This API might not be implemented by all connectors, which should return None 47 | */ 48 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None 49 | } 50 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/JsonSupport.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport 4 | import org.apache.avro.Schema 5 | import spray.json.{ DefaultJsonProtocol, JsObject, JsString, JsValue, JsonParser, PrettyPrinter, RootJsonFormat } 6 | 7 | trait JsonSupport extends SprayJsonSupport with DefaultJsonProtocol { 8 | implicit val printer: PrettyPrinter.type = PrettyPrinter 9 | 10 | implicit val schemaFormat: RootJsonFormat[Schema] = new RootJsonFormat[Schema] { 11 | 12 | override def write(obj: Schema): JsValue = JsonParser(obj.toString(true)) 13 | 14 | override def read(json: JsValue): Schema = new Schema.Parser().parse(json.prettyPrint) 15 | } 16 | 17 | implicit val schemaWithIdFormat: RootJsonFormat[(Long, Schema)] = new RootJsonFormat[(Long, Schema)] { 18 | 19 | override def write(obj: (Long, Schema)): JsValue = JsObject( 20 | Map( 21 | "id" -> JsString(obj._1.toString), 22 | "schema" -> schemaFormat.write(obj._2) 23 | ) 24 | ) 25 | 26 | override def read(json: JsValue): (Long, Schema) = json match { 27 | case JsObject(fields) => 28 | val id = fields.get("id") match { 29 | case Some(JsString(number)) => number 30 | case _ => throw new Exception("Id field should be a long") 31 | } 32 | 33 | val schema = fields.get("schema") match { 34 | case Some(x @ JsObject(_)) => x 35 | case _ => throw new Exception("schema should be an object") 36 | } 37 | 38 | (id.toLong, schemaFormat.read(schema)) 39 | case _ => throw new Exception("should be an object") 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/DarwinService.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.actor.ActorSystem 4 | import akka.http.scaladsl.model.{ HttpResponse, StatusCodes } 5 | import akka.http.scaladsl.server.directives.DebuggingDirectives 6 | import akka.http.scaladsl.server.{ Directives, Route } 7 | import akka.stream.ActorMaterializer 8 | import akka.stream.Attributes.LogLevels 9 | import it.agilelab.darwin.manager.AvroSchemaManager 10 | import org.apache.avro.Schema 11 | 12 | trait DarwinService extends Service with Directives with DebuggingDirectives with JsonSupport { 13 | 14 | val manager: AvroSchemaManager 15 | 16 | override def route: Route = logRequestResult(("darwin", LogLevels.Debug)) { 17 | get { 18 | path("schemas" / LongNumber.?) { 19 | case Some(id) => 20 | manager.getSchema(id) match { 21 | case Some(schema) => complete(schema) 22 | case None => 23 | complete { 24 | HttpResponse(StatusCodes.NotFound) 25 | } 26 | } 27 | case None => complete(manager.getAll) 28 | } 29 | } ~ post { 30 | path("schemas" / PathEnd) { 31 | entity(as[Seq[Schema]]) { schemas => 32 | complete { 33 | manager.registerAll(schemas).map(_._1) 34 | } 35 | } 36 | } 37 | } 38 | } 39 | } 40 | 41 | object DarwinService { 42 | def apply(asm: AvroSchemaManager)(implicit s: ActorSystem, m: ActorMaterializer): DarwinService = new DarwinService { 43 | implicit override val materializer: ActorMaterializer = m 44 | implicit override val system: ActorSystem = s 45 | override val manager: AvroSchemaManager = asm 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /rest-server/src/main/postman/darwinrest.postman_collection.json: -------------------------------------------------------------------------------- 1 | { 2 | "info": { 3 | "_postman_id": "dcaadeb7-ecb8-4bc9-9d4d-47fe6a2857df", 4 | "name": "darwinrest", 5 | "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" 6 | }, 7 | "item": [ 8 | { 9 | "name": "get-all-schemas", 10 | "request": { 11 | "method": "GET", 12 | "header": [], 13 | "body": { 14 | "mode": "raw", 15 | "raw": "" 16 | }, 17 | "url": { 18 | "raw": "localhost:8080/schemas/", 19 | "host": [ 20 | "localhost" 21 | ], 22 | "port": "8080", 23 | "path": [ 24 | "schemas", 25 | "" 26 | ] 27 | } 28 | }, 29 | "response": [] 30 | }, 31 | { 32 | "name": "get-one-schema", 33 | "request": { 34 | "method": "GET", 35 | "header": [], 36 | "body": { 37 | "mode": "raw", 38 | "raw": "" 39 | }, 40 | "url": { 41 | "raw": "localhost:8080/schemas/1", 42 | "host": [ 43 | "localhost" 44 | ], 45 | "port": "8080", 46 | "path": [ 47 | "schemas", 48 | "1" 49 | ] 50 | } 51 | }, 52 | "response": [] 53 | }, 54 | { 55 | "name": "post-a-schema", 56 | "request": { 57 | "method": "POST", 58 | "header": [ 59 | { 60 | "key": "Content-Type", 61 | "name": "Content-Type", 62 | "value": "application/json", 63 | "type": "text" 64 | } 65 | ], 66 | "body": { 67 | "mode": "raw", 68 | "raw": "[{\n\t\"type\": \"array\",\n\t\"items\": \"string\"\n}]" 69 | }, 70 | "url": { 71 | "raw": "localhost:8080/schemas/", 72 | "host": [ 73 | "localhost" 74 | ], 75 | "port": "8080", 76 | "path": [ 77 | "schemas", 78 | "" 79 | ] 80 | } 81 | }, 82 | "response": [] 83 | } 84 | ] 85 | } -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinConcurrentHashMapSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util.concurrent.atomic.AtomicInteger 4 | 5 | import org.scalatest.BeforeAndAfter 6 | import org.scalatest.flatspec.AnyFlatSpec 7 | import org.scalatest.matchers.should.Matchers 8 | 9 | class DarwinConcurrentHashMapSpec extends AnyFlatSpec with Matchers with BeforeAndAfter { 10 | private val realJavaVersion = System.getProperty("java.version") 11 | 12 | after { 13 | System.setProperty("java.version", realJavaVersion) 14 | } 15 | 16 | def test(): Unit = { 17 | val threadNumber = 1000 18 | val map = DarwinConcurrentHashMap.empty[String, Int] 19 | var counter = 0 20 | val threadCounter = new AtomicInteger(0) 21 | val runnables = for (_ <- 1 to threadNumber) yield { 22 | new Runnable { 23 | override def run(): Unit = { 24 | threadCounter.incrementAndGet() 25 | val res = map.getOrElseUpdate( 26 | "A", { 27 | counter += 1 28 | counter 29 | } 30 | ) 31 | res should be(1) 32 | } 33 | } 34 | } 35 | val threads = for (r <- runnables) yield { 36 | val t = new Thread(r) 37 | t 38 | } 39 | for (t <- threads) { 40 | t.start() 41 | } 42 | for (t <- threads) { 43 | t.join() 44 | } 45 | threadCounter.get() should be(threadNumber) 46 | } 47 | 48 | it should "not evaluate the value if the key is present JAVA 8" in { 49 | test() 50 | } 51 | 52 | it should "not evaluate the value if the key is present JAVA 7" in { 53 | if (JavaVersion.parseJavaVersion(realJavaVersion) >= 8) { 54 | System.setProperty("java.version", "1.7") 55 | test() 56 | } else { 57 | assert(true) 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /confluent/src/test/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorCreatorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory } 4 | import org.scalatest.flatspec.AnyFlatSpec 5 | import org.scalatest.matchers.should.Matchers 6 | 7 | class ConfluentConnectorCreatorSuite extends AnyFlatSpec with Matchers { 8 | 9 | "connector" should "create an instance" in { 10 | 11 | val conf: Config = ConfigFactory.parseString(""" 12 | | endpoints: ["endpoint-one", "endpoint-two"] 13 | | max-cached-schemas: 1000 14 | | 15 | | kafka.schemaregistry.other: 1 16 | | kafka.schemaregistry: { 17 | | other2: "stringa" 18 | | } 19 | |""".stripMargin) 20 | 21 | val connector = new ConfluentConnectorCreator() 22 | 23 | val options = ConfluentConnectorOptions.fromConfig(conf) 24 | 25 | val result = connector.create(conf) 26 | 27 | assert(result != null) 28 | 29 | val endpoints = options.config.get("endpoints").asInstanceOf[java.util.List[String]] 30 | 31 | endpoints.get(0) should be("endpoint-one") 32 | endpoints.get(1) should be("endpoint-two") 33 | 34 | options.config.get("kafka.schemaregistry.other").asInstanceOf[Int] should be(1) 35 | options.config.get("kafka.schemaregistry.other2").asInstanceOf[String] should be("stringa") 36 | 37 | val maxCached = 1000 38 | options.config.get("max-cached-schemas").asInstanceOf[Int] should be(maxCached) 39 | 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/DarwinConcurrentHashMap.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util.function.{ Function => JFunction } 4 | 5 | import scala.collection.concurrent.TrieMap 6 | 7 | /** 8 | * A thread safe lock-free concurrent map that exposes only getOrElseUpdate and getOrElse methods 9 | * It is backed by either a scala.collection.concurrent.TrieMap or java.util.concurrent.ConcurrentHashMap 10 | * depending on the JVM that executes Darwin. 11 | * JVM 8 or later use java's ConcurrentHashMap while earlier versions use scala's TrieMap 12 | * 13 | * Obtain the "correct" instance using {{{DarwinConcurrentHashMap.empty}}} factory method. 14 | */ 15 | trait DarwinConcurrentHashMap[K, V] { 16 | def getOrElseUpdate(k: K, newValue: => V): V 17 | 18 | def getOrElse(k: K, default: => V): V 19 | } 20 | 21 | object DarwinConcurrentHashMap { 22 | 23 | private[common] class DarwinJava8ConcurrentHashMap[K, V] extends DarwinConcurrentHashMap[K, V] { 24 | private val innerMap = new java.util.concurrent.ConcurrentHashMap[K, V]() 25 | 26 | override def getOrElseUpdate(k: K, newValue: => V): V = { 27 | innerMap.computeIfAbsent( 28 | k, 29 | new JFunction[K, V]() { 30 | override def apply(t: K): V = newValue 31 | } 32 | ) 33 | } 34 | 35 | override def getOrElse(k: K, default: => V): V = 36 | Option(innerMap.get(k)).getOrElse(default) 37 | } 38 | 39 | private[common] class DarwinTrieConcurrentHashMap[K, V] extends DarwinConcurrentHashMap[K, V] { 40 | private val innerMap = TrieMap.empty[K, V] 41 | 42 | override def getOrElseUpdate(k: K, newValue: => V): V = innerMap.getOrElseUpdate(k, newValue) 43 | 44 | override def getOrElse(k: K, default: => V): V = innerMap.getOrElse(k, default) 45 | } 46 | 47 | private val isJavaAtLeast8 = JavaVersion.current() >= 8 48 | 49 | def empty[K, V]: DarwinConcurrentHashMap[K, V] = { 50 | if (isJavaAtLeast8) { 51 | new DarwinJava8ConcurrentHashMap() 52 | } else { 53 | new DarwinTrieConcurrentHashMap() 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/HttpApp.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import java.util.concurrent.Executor 4 | 5 | import akka.actor.ActorSystem 6 | import akka.http.scaladsl.Http 7 | import akka.http.scaladsl.server.RouteConcatenation 8 | import akka.stream.ActorMaterializer 9 | import com.typesafe.config.Config 10 | import it.agilelab.darwin.common.Logging 11 | 12 | import scala.concurrent.duration.Duration 13 | import scala.concurrent.{ Await, ExecutionContext, ExecutionContextExecutor } 14 | 15 | class HttpApp(config: Config, services: Service*)(implicit system: ActorSystem, materializer: ActorMaterializer) 16 | extends Logging { 17 | def run(): Unit = { 18 | val interface = config.getString("interface") 19 | val port = config.getInt("port") 20 | 21 | val route = RouteConcatenation.concat(services.map(_.route): _*) 22 | 23 | log.info("Starting http server on {}:{}", interface, port) 24 | val eventuallyBinding = Http().bindAndHandle(route, interface, port) 25 | val binding = Await.result(eventuallyBinding, Duration.Inf) 26 | log.info("Started http server on {}:{}", interface, port) 27 | 28 | val shutdownThread = new Thread(new Runnable { 29 | override def run(): Unit = { 30 | implicit val ec: ExecutionContext = newSameThreadExecutor 31 | log.info("Received shutdown hook") 32 | 33 | val termination = for { 34 | _ <- binding.unbind() 35 | terminated <- system.terminate() 36 | } yield terminated 37 | 38 | Await.ready(termination, Duration.Inf) 39 | log.info("Shutdown") 40 | } 41 | }) 42 | 43 | shutdownThread.setName("shutdown") 44 | 45 | Runtime.getRuntime.addShutdownHook(shutdownThread) 46 | 47 | log.info("registered shutdown hook") 48 | } 49 | 50 | private def newSameThreadExecutor: ExecutionContextExecutor = ExecutionContext.fromExecutor(new Executor { 51 | override def execute(command: Runnable): Unit = command.run() 52 | }) 53 | } 54 | 55 | object HttpApp { 56 | def apply(config: Config, services: Service*)(implicit 57 | system: ActorSystem, 58 | materializer: ActorMaterializer 59 | ): HttpApp = 60 | new HttpApp(config, services: _*) 61 | } 62 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/ConnectorFactory.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util.ServiceLoader 4 | 5 | import com.typesafe.config.Config 6 | import it.agilelab.darwin.manager.exception.ConnectorNotFoundException 7 | import it.agilelab.darwin.manager.util.ConfigurationKeys 8 | 9 | import it.agilelab.darwin.common.compat._ 10 | 11 | /** 12 | * Used to obtain the correct implementation of [[Connector]] found on the classpath using the [[ConnectorCreator]] 13 | */ 14 | object ConnectorFactory extends Logging { 15 | 16 | /** 17 | * Retrieves all the registered [[ConnectorCreator]] in the classpath. 18 | * 19 | * @return a sequence of all the loaded [[ConnectorCreator]] 20 | */ 21 | def creators(): Seq[ConnectorCreator] = { 22 | val creators = ServiceLoader.load(classOf[ConnectorCreator]).toScala().toSeq 23 | log.debug(s"${creators.size} available connector creators found") 24 | creators 25 | } 26 | 27 | /** 28 | * @return the first ConnectorCreator, use ONLY if you are sure that just one is available in the classpath 29 | */ 30 | def creator(): Option[ConnectorCreator] = creators().headOption 31 | 32 | /** 33 | * @return the ConnectorCreator identified by the name given as input 34 | */ 35 | def creator(name: String): Option[ConnectorCreator] = { 36 | creators().find(_.name() == name) 37 | } 38 | 39 | /** 40 | * @return the ConnectorCreator identified by the name given as input 41 | */ 42 | def creator(conf: Config): Option[ConnectorCreator] = { 43 | if (conf.hasPath(ConfigurationKeys.CONNECTOR)) { 44 | creator(conf.getString(ConfigurationKeys.CONNECTOR)) 45 | } else { 46 | creator() 47 | } 48 | } 49 | 50 | def connector(config: Config): Connector = { 51 | val cnt = creator(config) 52 | .map(_.create(config)) 53 | .getOrElse(throw new ConnectorNotFoundException(config)) 54 | if (config.hasPath(ConfigurationKeys.CREATE_TABLE) && config.getBoolean(ConfigurationKeys.CREATE_TABLE)) { 55 | cnt.createTable() 56 | } else if (!cnt.tableExists()) { 57 | log.warn(s"Darwin table does not exists and has not been created (${ConfigurationKeys.CREATE_TABLE} was false)") 58 | log.warn(cnt.tableCreationHint()) 59 | } 60 | cnt 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/ManagerUtilsSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import java.nio.{ ByteBuffer, ByteOrder } 4 | 5 | import com.typesafe.config.ConfigFactory 6 | import it.agilelab.darwin.common.SchemaReader 7 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory 8 | import it.agilelab.darwin.manager.util.{ AvroSingleObjectEncodingUtils, ConfigurationKeys } 9 | import it.agilelab.darwin.manager.util.ByteArrayUtils._ 10 | 11 | import scala.util.Random 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | class BigEndianManagerUtilsSuite extends ManagerUtilsSuite(ByteOrder.BIG_ENDIAN) 16 | 17 | class LittleEndianManagerUtilsSuite extends ManagerUtilsSuite(ByteOrder.LITTLE_ENDIAN) 18 | 19 | abstract class ManagerUtilsSuite(endianness: ByteOrder) extends AnyFlatSpec with Matchers { 20 | 21 | "AvroSchemaManager utilities" should "create a Single-Object encoded byte array" in { 22 | val ORIGINAL_LENGTH: Int = 10 23 | val originalSchema = SchemaReader.readFromResources("OneField.avsc") 24 | val config = 25 | ConfigFactory 26 | .parseMap(new java.util.HashMap[String, String]() { 27 | { 28 | put(ConfigurationKeys.MANAGER_TYPE, ConfigurationKeys.CACHED_EAGER) 29 | put(ConfigurationKeys.ENDIANNESS, endianness.toString) 30 | } 31 | }) 32 | .withFallback(ConfigFactory.load()) 33 | .resolve() 34 | val manager = AvroSchemaManagerFactory.initialize(config) 35 | manager.registerAll(Seq(originalSchema)) 36 | val originalPayload = new Array[Byte](ORIGINAL_LENGTH) 37 | Random.nextBytes(originalPayload) 38 | val data: Array[Byte] = manager.generateAvroSingleObjectEncoded(originalPayload, originalSchema) 39 | assert(AvroSingleObjectEncodingUtils.isAvroSingleObjectEncoded(data)) 40 | val (schema, payload) = manager.retrieveSchemaAndAvroPayload(data) 41 | assert(schema == originalSchema) 42 | assert(originalPayload sameElements payload) 43 | } 44 | 45 | it should "convert a long to byte array and back" in { 46 | val longs = (1 to 10).map(_ => Random.nextLong()) 47 | 48 | assert( 49 | longs == longs.map(x => 50 | AvroSingleObjectEncodingUtils 51 | .readLong(ByteBuffer.wrap(x.longToByteArray(endianness)), endianness) 52 | ) 53 | ) 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/CachedAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | import java.util.concurrent.atomic.AtomicReference 5 | 6 | import it.agilelab.darwin.common.Connector 7 | import org.apache.avro.Schema 8 | 9 | /** 10 | * Implementation of AvroSchemaManager that defines a cache where the storage data is loaded, in order to reduce the 11 | * number of accesses to the storage. 12 | */ 13 | abstract class CachedAvroSchemaManager(connector: Connector, endianness: ByteOrder) 14 | extends AvroSchemaManager(connector, endianness) { 15 | protected val _cache: AtomicReference[Option[AvroSchemaCache]] = new AtomicReference[Option[AvroSchemaCache]](None) 16 | 17 | def cache: AvroSchemaCache = _cache.get 18 | .getOrElse( 19 | throw new IllegalAccessException( 20 | "Cache not loaded: accesses are allowed only if the cache has been " + 21 | "loaded" 22 | ) 23 | ) 24 | 25 | initialize() 26 | 27 | private def initialize(): Unit = { 28 | log.debug("cache initialization...") 29 | _cache.compareAndSet(None, Some(AvroSchemaCacheFingerprint(connector.fullLoad(), connector.fingerprint))) 30 | log.debug("cache initialized") 31 | } 32 | 33 | /** 34 | * Reloads all the schemas from the previously configured storage. 35 | * Throws an exception if the cache wasn't already loaded (the getInstance method must always be used to 36 | * initialize the cache using the required configuration). 37 | */ 38 | override def reload(): AvroSchemaManager = { 39 | log.debug("reloading cache...") 40 | _cache.set(Some(AvroSchemaCacheFingerprint(connector.fullLoad(), connector.fingerprint))) 41 | log.debug("cache reloaded") 42 | this 43 | } 44 | 45 | override def registerAll(schemas: Seq[Schema]): Seq[(Long, Schema)] = { 46 | log.debug(s"registering ${schemas.size} schemas...") 47 | val (alreadyInCache, notInCache) = schemas.map(s => (cache.contains(s), s)).partition(_._1._1) 48 | val inserted = notInCache.map(e => e._1._2 -> e._2) 49 | connector.insert(inserted) 50 | val allSchemas = alreadyInCache.map(e => e._1._2 -> e._2) ++ inserted 51 | _cache.set(Some(cache.insert(inserted))) //TODO review 52 | log.debug(s"${allSchemas.size} schemas registered") 53 | allSchemas 54 | } 55 | 56 | /** 57 | * Retrieves all registered schemas 58 | * 59 | * @return A Sequence of (ID, Schema) 60 | */ 61 | override def getAll: Seq[(Long, Schema)] = cache.getAll 62 | } 63 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/SchemaManagerSparkApp.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark 2 | 3 | import java.nio.ByteOrder 4 | 5 | import com.typesafe.config.{ Config, ConfigFactory } 6 | import it.agilelab.darwin.app.spark.classes._ 7 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory 8 | import org.apache.avro.reflect.ReflectData 9 | import org.apache.hadoop.fs.FileSystem 10 | import org.apache.spark.sql.SparkSession 11 | import org.slf4j.{ Logger, LoggerFactory } 12 | 13 | object SchemaManagerSparkApp extends GenericMainClass with SparkManager { 14 | 15 | val mainLogger: Logger = LoggerFactory.getLogger("SchemaManagerSparkApp") 16 | 17 | val endianness: ByteOrder = ByteOrder.BIG_ENDIAN 18 | 19 | override protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int = { 20 | import sparkSession.implicits._ 21 | 22 | val ds = sparkSession.createDataset(sparkSession.sparkContext.parallelize(1 to 1000, 20)) 23 | mainLogger.info("Registering schemas") 24 | // val reflections = new Reflections("it.agilelab.darwin.app.spark.classes") 25 | // val annotationClass: Class[AvroSerde] = classOf[AvroSerde] 26 | // val classes = reflections.getTypesAnnotatedWith(annotationClass).asScala.toSeq 27 | // .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) 28 | // val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName))) 29 | val schemas = Seq( 30 | ReflectData.get().getSchema(classOf[Menu]), 31 | ReflectData.get().getSchema(classOf[MenuItem]), 32 | ReflectData.get().getSchema(classOf[Food]), 33 | ReflectData.get().getSchema(classOf[Order]), 34 | ReflectData.get().getSchema(classOf[Price]) 35 | ) 36 | val conf = ConfigFactory.load() 37 | val manager = AvroSchemaManagerFactory.initialize(conf) 38 | val registeredIDs: Seq[Long] = manager.registerAll(schemas).map(_._1) 39 | mainLogger.info("Schemas registered") 40 | 41 | mainLogger.info("Getting ID for a schema") 42 | manager.getId(ReflectData.get().getSchema(classOf[Menu])) 43 | mainLogger.info("ID retrieved for the schema") 44 | 45 | mainLogger.info("Get Schema from ID") 46 | val d2 = ds.map { x => 47 | AvroSchemaManagerFactory.initialize(conf).getSchema(registeredIDs(x % registeredIDs.size)) 48 | x 49 | } 50 | d2.count() 51 | mainLogger.info("All schemas obtained") 52 | 10 53 | } 54 | 55 | override protected def handleException(exception: Throwable, applicationSettings: Config): Unit = { 56 | mainLogger.error(exception.getMessage) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/SparkManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.hadoop.hbase.HBaseConfiguration 5 | import org.apache.spark.SparkConf 6 | import org.apache.spark.sql.SparkSession 7 | import org.slf4j.{ Logger, LoggerFactory } 8 | 9 | import scala.collection.JavaConverters._ 10 | 11 | trait SparkManager { 12 | 13 | val sparkManagerLogger: Logger = LoggerFactory.getLogger("SparkManager") 14 | 15 | /** 16 | * @return a SparkConf given the settings 17 | */ 18 | protected def createSparkConf(settings: Config): SparkConf = { 19 | // Add conf file configurations 20 | val sparkSettings = 21 | if (settings.hasPath("spark")) { 22 | settings.getConfig("spark").entrySet().asScala.map(e => ("spark." + e.getKey, e.getValue.unwrapped().toString)) 23 | } else { 24 | Seq() 25 | } 26 | // Add hbase related hadoop confs 27 | val hconfs = HBaseConfiguration.create().asScala.map { entry => 28 | "spark.hadoop." + entry.getKey -> entry.getValue 29 | } 30 | new SparkConf() 31 | // Use spark.app.name to set the spark app name 32 | .setAll(hconfs) 33 | .setAll(sparkSettings) 34 | } 35 | 36 | private def withSparkConf(settings: Config)(f: SparkConf => SparkSession): SparkSession = f( 37 | { 38 | createSparkConf(settings) 39 | } 40 | ) 41 | 42 | /** 43 | * @return a SparkSession given the settings 44 | */ 45 | protected def makeSparkSession(settings: Config): SparkSession = withSparkConf(settings) { conf => 46 | SparkSession 47 | .builder() 48 | .config(conf) 49 | .getOrCreate() 50 | } 51 | 52 | /** 53 | * @return the default Spark parallelism given the sparkSession and the config. 54 | * It tries to infer it from the SparkSession, if it is not possible, it gathers it from the Config 55 | */ 56 | protected def defaultParallelism(implicit sparkSession: SparkSession, config: Config): Int = { 57 | sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_EXECUTOR_INSTANCES) match { 58 | case Some(instances) => 59 | sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_CORES).getOrElse("1").toInt * instances.toInt 60 | case None => 61 | sparkManagerLogger.info( 62 | "Spark is configured with dynamic allocation, default parallelism will be gathered from app " + 63 | "conf: " + 64 | "next.process.parallelism" 65 | ) 66 | if (config.hasPath(SparkConfigurationKeys.PARALLELISM)) { 67 | config.getInt(SparkConfigurationKeys.PARALLELISM) 68 | } else { 69 | sparkManagerLogger.info("next.process.parallelism was not set fallback to sparkSession.defaultParallelism") 70 | sparkSession.sparkContext.defaultParallelism 71 | } 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /multi-connector/src/main/scala/it/agilelab/darwin/connector/multi/MultiConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.multi 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, ConnectorFactory } 6 | import it.agilelab.darwin.manager.exception.DarwinException 7 | 8 | object MultiConnectorCreator { 9 | val REGISTRATOR = "registrar" 10 | val CONFLUENT_SINGLE_OBJECT_ENCODING = "confluent-single-object-encoding" 11 | val STANDARD_SINGLE_OBJECT_ENCODING = "standard-single-object-encoding" 12 | } 13 | class MultiConnectorCreator extends ConnectorCreator { 14 | 15 | /** 16 | * @return the name of the Connector 17 | */ 18 | override def name(): String = "multi" 19 | 20 | private def mergeConf(conf: Config, path: String): Config = { 21 | 22 | conf 23 | .getConfig(path) 24 | .entrySet() 25 | .toScala() 26 | .map(_.getKey) 27 | .foldLeft(conf)((z, x) => z.withValue(x, conf.getValue(path + "." + x))) 28 | } 29 | 30 | override def create(config: Config): Connector = { 31 | val registrarName = 32 | config.getString(MultiConnectorCreator.REGISTRATOR) 33 | 34 | val confluentConnectorType = 35 | if (config.hasPath(MultiConnectorCreator.CONFLUENT_SINGLE_OBJECT_ENCODING)) { 36 | Some(config.getString(MultiConnectorCreator.CONFLUENT_SINGLE_OBJECT_ENCODING)) 37 | } else { 38 | None 39 | } 40 | 41 | val standardConnectorTypes = config 42 | .getStringList(MultiConnectorCreator.STANDARD_SINGLE_OBJECT_ENCODING) 43 | .toScala() 44 | 45 | val registrar = createAndMergeConfigs(config, registrarName) 46 | 47 | val confluentConnector = 48 | confluentConnectorType.map { cName => 49 | createIfNotRegistrar(registrarName, registrar, cName, config) 50 | } 51 | 52 | val singleObjectConnectors = standardConnectorTypes.map { cName => 53 | createIfNotRegistrar(registrarName, registrar, cName, config) 54 | }.toList 55 | 56 | new MultiConnector( 57 | registrar, 58 | confluentConnector, 59 | singleObjectConnectors 60 | ) 61 | } 62 | 63 | private def createAndMergeConfigs(config: Config, registrarName: String) = { 64 | ConnectorFactory 65 | .creator(registrarName) 66 | .map(creator => creator.create(mergeConf(config, registrarName))) 67 | .getOrElse(throw new DarwinException("No connector creator for name " + registrarName)) 68 | } 69 | 70 | private def createIfNotRegistrar(registrarName: String, registrar: Connector, cName: String, config: Config) = { 71 | if (cName == registrarName) { 72 | registrar 73 | } else { 74 | ConnectorFactory 75 | .creator(cName) 76 | .map(creator => creator.create(mergeConf(config, cName))) 77 | .getOrElse(throw new DarwinException("No connector creator for name " + cName)) 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/SchemaReader.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.io.{ File, IOException, InputStream } 4 | 5 | import org.apache.avro.{ Schema, SchemaParseException } 6 | 7 | object SchemaReader { 8 | 9 | def readFromResources(p: String): Schema = { 10 | using(getClass.getClassLoader.getResourceAsStream(p)) { stream => 11 | read(stream) 12 | } 13 | } 14 | 15 | def read(f: File): Schema = { 16 | val parser = new Schema.Parser() 17 | parser.parse(f) 18 | } 19 | 20 | def read(s: String): Schema = { 21 | val parser = new Schema.Parser() 22 | parser.parse(s) 23 | } 24 | 25 | /** 26 | * Does not close the InputStream 27 | */ 28 | def read(is: InputStream): Schema = { 29 | val parser = new Schema.Parser() 30 | parser.parse(is) 31 | } 32 | 33 | def safeReadFromResources(p: String): Either[SchemaReaderError, Schema] = { 34 | Option(getClass.getClassLoader.getResourceAsStream(p)).fold[Either[SchemaReaderError, Schema]]( 35 | Left(ResourceNotFoundError(s"Cannot find resource: $p")) 36 | ) { stream => 37 | try { 38 | safeRead(stream) 39 | } catch { 40 | case e: SchemaParseException => Left(SchemaParserError(e)) 41 | case e: IOException => Left(IOError(e)) 42 | case e: Throwable => Left(UnknownError(e)) 43 | } finally { 44 | stream.close() 45 | } 46 | } 47 | } 48 | 49 | def safeRead(f: File): Either[SchemaReaderError, Schema] = { 50 | try { 51 | Right(new Schema.Parser().parse(f)) 52 | } catch { 53 | case e: SchemaParseException => Left(SchemaParserError(e)) 54 | case e: IOException => Left(IOError(e)) 55 | case e: Throwable => Left(UnknownError(e)) 56 | } 57 | } 58 | 59 | def safeRead(s: String): Either[SchemaReaderError, Schema] = { 60 | try { 61 | Right(new Schema.Parser().parse(s)) 62 | } catch { 63 | case e: SchemaParseException => Left(SchemaParserError(e)) 64 | case e: IOException => Left(IOError(e)) 65 | case e: Throwable => Left(UnknownError(e)) 66 | } 67 | } 68 | 69 | /** 70 | * Does not close the InputStream 71 | */ 72 | def safeRead(is: InputStream): Either[SchemaReaderError, Schema] = { 73 | try { 74 | Right(new Schema.Parser().parse(is)) 75 | } catch { 76 | case e: SchemaParseException => Left(SchemaParserError(e)) 77 | case e: IOException => Left(IOError(e)) 78 | case e: Throwable => Left(UnknownError(e)) 79 | } 80 | } 81 | 82 | sealed trait SchemaReaderError 83 | 84 | case class SchemaParserError(exception: SchemaParseException) extends SchemaReaderError 85 | 86 | case class IOError(exception: IOException) extends SchemaReaderError 87 | 88 | case class ResourceNotFoundError(msg: String) extends SchemaReaderError 89 | 90 | case class UnknownError(t: Throwable) extends SchemaReaderError 91 | 92 | } 93 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/MongoConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | import com.mongodb.Block 4 | import com.typesafe.config.Config 5 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator } 6 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.MongoConnectorConfig 7 | import org.mongodb.scala.connection.ClusterSettings 8 | import org.mongodb.scala.{ MongoClient, MongoClientSettings, MongoCredential, ServerAddress } 9 | import it.agilelab.darwin.common.compat._ 10 | import scala.concurrent.duration.Duration 11 | 12 | class MongoConnectorCreator extends ConnectorCreator { 13 | 14 | override def create(config: Config): Connector = { 15 | 16 | val mongoConfig: MongoConnectorConfig = createConfig(config) 17 | new MongoConnector(createConnection(mongoConfig), mongoConfig) 18 | } 19 | 20 | /** 21 | * @return the name of the Connector 22 | */ 23 | override def name(): String = "mongo" 24 | 25 | /** 26 | * return the MongoClient 27 | * @param mongoConf : config to create a connection to MongoDB 28 | * @return MongoClient 29 | */ 30 | private def createConnection(mongoConf: MongoConnectorConfig): MongoClient = { 31 | 32 | val credential: MongoCredential = 33 | MongoCredential.createCredential(mongoConf.username, mongoConf.database, mongoConf.password.toCharArray) 34 | 35 | val hosts: Seq[ServerAddress] = mongoConf.hosts.map(host => new ServerAddress(host)) 36 | 37 | val settings: MongoClientSettings = MongoClientSettings 38 | .builder() 39 | .credential(credential) 40 | .applyToClusterSettings(new Block[ClusterSettings.Builder] { 41 | override def apply(builder: ClusterSettings.Builder): Unit = 42 | builder.hosts(java.util.Arrays.asList(hosts: _*)) 43 | }) 44 | .build() 45 | 46 | MongoClient(settings) 47 | } 48 | 49 | /** 50 | * create MongoConnectorConfig started from a configuration file 51 | * @param config: configurations parsed from the file 52 | * @return MongoConnectorConfig 53 | */ 54 | def createConfig(config: Config): MongoConnectorConfig = { 55 | require(config.hasPath(ConfigurationKeys.USERNAME)) 56 | require(config.hasPath(ConfigurationKeys.PASSWORD)) 57 | require(config.hasPath(ConfigurationKeys.HOST)) 58 | require(config.hasPath(ConfigurationKeys.DATABASE)) 59 | require(config.hasPath(ConfigurationKeys.COLLECTION)) 60 | 61 | MongoConnectorConfig( 62 | config.getString(ConfigurationKeys.USERNAME), 63 | config.getString(ConfigurationKeys.PASSWORD), 64 | config.getString(ConfigurationKeys.DATABASE), 65 | config.getString(ConfigurationKeys.COLLECTION), 66 | config.getStringList(ConfigurationKeys.HOST).toScala().toSeq, 67 | if (config.hasPath(ConfigurationKeys.TIMEOUT)) { 68 | Duration.create(config.getInt(ConfigurationKeys.TIMEOUT), "millis") 69 | } else { 70 | Duration.create(ConfigurationMongoModels.DEFAULT_DURATION, "millis") 71 | } 72 | ) 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /postgres/src/test/scala/it/agilelab/darwin/connector/postgres/PostgresConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory, ConfigValueFactory } 4 | import it.agilelab.darwin.common.Connector 5 | import org.apache.avro.{ Schema, SchemaNormalization } 6 | import org.scalatest.BeforeAndAfterAll 7 | import ru.yandex.qatools.embed.postgresql.EmbeddedPostgres 8 | import ru.yandex.qatools.embed.postgresql.distribution.Version 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | import org.scalatest.matchers.should.Matchers 11 | 12 | class PostgresConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll { 13 | val embeddedPostgres: EmbeddedPostgres = new EmbeddedPostgres(Version.V9_6_11) 14 | 15 | override protected def beforeAll(): Unit = { 16 | super.beforeAll() 17 | val port = 5432 18 | val host = "localhost" 19 | val dbname = "postgres" 20 | val username = "postgres" 21 | val password = "mysecretpassword" 22 | embeddedPostgres.start(host, port, dbname, username, password) 23 | val config: Config = ConfigFactory.load("postgres.properties") 24 | val connector: Connector = new PostgresConnectorCreator().create(config) 25 | connector.createTable() 26 | } 27 | 28 | override protected def afterAll(): Unit = { 29 | super.afterAll() 30 | embeddedPostgres.stop() 31 | } 32 | 33 | it should "multiple insert and retrieve [No conf - OneTransaction]" in { 34 | val config: Config = ConfigFactory.load("postgres.properties") 35 | val connector: Connector = new PostgresConnectorCreator().create(config) 36 | test(connector) 37 | } 38 | 39 | it should "multiple insert and retrieve [OneTransaction]" in { 40 | val config: Config = ConfigFactory 41 | .load("postgres.properties") 42 | .withValue(ConfigurationKeys.MODE, ConfigValueFactory.fromAnyRef(OneTransaction.value)) 43 | val connector: Connector = new PostgresConnectorCreator().create(config) 44 | test(connector) 45 | } 46 | 47 | it should "multiple insert and retrieve [ExceptionDriven]" in { 48 | val config: Config = ConfigFactory 49 | .load("postgres.properties") 50 | .withValue(ConfigurationKeys.MODE, ConfigValueFactory.fromAnyRef(ExceptionDriven.value)) 51 | val connector: Connector = new PostgresConnectorCreator().create(config) 52 | test(connector) 53 | } 54 | 55 | private def test(connector: Connector) = { 56 | val outerSchema = new Schema.Parser().parse(getClass.getClassLoader.getResourceAsStream("postgresmock.avsc")) 57 | val innerSchema = outerSchema.getField("four").schema() 58 | val schemas = Seq(innerSchema, outerSchema) 59 | .map(s => SchemaNormalization.parsingFingerprint64(s) -> s) 60 | connector.insert(schemas) 61 | connector.insert(schemas) 62 | connector.insert(schemas) 63 | connector.insert(schemas) 64 | val loaded: Seq[(Long, Schema)] = connector.fullLoad() 65 | assert(loaded.size == schemas.size) 66 | assert(loaded.forall(schemas.contains)) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaManagerFactory.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ ConnectorFactory, DarwinConcurrentHashMap, Logging } 5 | import it.agilelab.darwin.manager.exception.ConnectorNotFoundException 6 | import it.agilelab.darwin.manager.util.{ ConfigUtil, ConfigurationKeys } 7 | 8 | /** 9 | * Factory used to obtain the desired implementation of AvroSchemaManager. 10 | * First of all the initialize method should be called passing the configuration (it will return an instance of 11 | * AvroSchemaManager. Then, the same instance can be retrieved using the getInstance method without passing the 12 | * configuration anymore. 13 | */ 14 | object AvroSchemaManagerFactory extends Logging { 15 | 16 | private val _instancePool: DarwinConcurrentHashMap[String, AvroSchemaManager] = 17 | DarwinConcurrentHashMap.empty[String, AvroSchemaManager] 18 | 19 | private def configKey(c: Config): String = { 20 | ConfigUtil.printConfig(c) 21 | } 22 | 23 | /** 24 | * Returns an instance of AvroSchemaManager that can be used to register and retrieve schemas. 25 | * 26 | * @param config the Config that is passed to the connector 27 | * @return an instance of AvroSchemaManager 28 | */ 29 | @throws[ConnectorNotFoundException] 30 | def initialize(config: Config): AvroSchemaManager = { 31 | val key = configKey(config) 32 | lazy val mappingFunc = { 33 | log.debug("creating instance of AvroSchemaManager") 34 | val endianness = ConfigUtil.stringToEndianness(config.getString(ConfigurationKeys.ENDIANNESS)) 35 | val result = config.getString(ConfigurationKeys.MANAGER_TYPE) match { 36 | case ConfigurationKeys.CACHED_EAGER => 37 | new CachedEagerAvroSchemaManager(ConnectorFactory.connector(config), endianness) 38 | case ConfigurationKeys.CACHED_LAZY => 39 | new CachedLazyAvroSchemaManager(ConnectorFactory.connector(config), endianness) 40 | case ConfigurationKeys.LAZY => 41 | new LazyAvroSchemaManager(ConnectorFactory.connector(config), endianness) 42 | case _ => 43 | throw new IllegalArgumentException( 44 | s"No valid manager can be created for" + 45 | s" ${ConfigurationKeys.MANAGER_TYPE} key ${config.getString(ConfigurationKeys.MANAGER_TYPE)}" 46 | ) 47 | } 48 | log.debug("AvroSchemaManager instance created") 49 | result 50 | } 51 | _instancePool.getOrElseUpdate(key, mappingFunc) 52 | } 53 | 54 | /** 55 | * Returns the initialized instance of AvroSchemaManager that can be used to register and retrieve schemas. 56 | * The instance must be created once using the initialize method passing a configuration before calling this method. 57 | * 58 | * @return the initialized instance of AvroSchemaManager 59 | */ 60 | def getInstance(config: Config): AvroSchemaManager = { 61 | _instancePool.getOrElse( 62 | configKey(config), 63 | throw new IllegalArgumentException( 64 | s"No valid manager can be found for" + 65 | s" ${ConfigurationKeys.MANAGER_TYPE} key ${config.getString(ConfigurationKeys.MANAGER_TYPE)}" 66 | ) 67 | ) 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBaseConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import java.nio.file.Files 4 | import java.util.UUID 5 | 6 | import com.typesafe.config.{ ConfigFactory, ConfigValueFactory } 7 | import it.agilelab.darwin.common.Connector 8 | import org.apache.avro.reflect.ReflectData 9 | import org.apache.avro.{ Schema, SchemaNormalization } 10 | import org.apache.hadoop.hbase.{ HBaseConfiguration, HBaseTestingUtility, MiniHBaseCluster } 11 | import org.scalatest.BeforeAndAfterAll 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | class HBaseConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll { 16 | 17 | private var connector: Connector = _ 18 | private var minicluster: MiniHBaseCluster = _ 19 | 20 | "HBaseConnector" should "load all existing schemas" in { 21 | connector.fullLoad() 22 | } 23 | 24 | it should "insert and retrieve" in { 25 | val schemas = Seq(ReflectData.get().getSchema(classOf[HBaseMock]), ReflectData.get().getSchema(classOf[HBase2Mock])) 26 | .map(s => SchemaNormalization.parsingFingerprint64(s) -> s) 27 | connector.insert(schemas) 28 | val loaded: Seq[(Long, Schema)] = connector.fullLoad() 29 | assert(loaded.size == schemas.size) 30 | assert(loaded.forall(schemas.contains)) 31 | val schema = connector.findSchema(loaded.head._1) 32 | assert(schema.isDefined) 33 | assert(schema.get == loaded.head._2) 34 | val noSchema = connector.findSchema(-1L) 35 | assert(noSchema.isEmpty) 36 | } 37 | 38 | "connector.tableCreationHint" should "print the correct hint for table creation" in { 39 | connector.tableCreationHint() should be("""To create namespace and table from an HBase shell issue: 40 | | create_namespace 'AVRO' 41 | | create 'AVRO:SCHEMA_REPOSITORY', '0'""".stripMargin) 42 | } 43 | 44 | "connector.tableExists" should "return true with existent table" in { 45 | connector.tableExists() should be(true) 46 | } 47 | 48 | override def beforeAll(): Unit = { 49 | val testUUID = UUID.randomUUID().toString 50 | val hConf = HBaseConfiguration.create() 51 | hConf.set("test.build.data.basedirectory", s"./target/hbase-test-data-$testUUID") 52 | val util = new HBaseTestingUtility(hConf) 53 | minicluster = util.startMiniCluster(1, true) 54 | val confFile = Files.createTempFile(testUUID, ".xml") 55 | // Hbase connector can only load configurations from a file path so we need to render the hadoop conf 56 | val stream = Files.newOutputStream(confFile) 57 | // mc.getConfiguration.writeXml(System.out) 58 | minicluster.getConfiguration.writeXml(stream) 59 | stream.flush() 60 | stream.close() 61 | // HbaseConnector will only load conf if hbase-site and core-site are given, 62 | // we give the same file to each. 63 | sys.addShutdownHook(minicluster.shutdown()) 64 | val config = ConfigFactory 65 | .load() 66 | .withValue(ConfigurationKeys.HBASE_SITE, ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString)) 67 | .withValue(ConfigurationKeys.CORE_SITE, ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString)) 68 | connector = new HBaseConnectorCreator().create(config) 69 | connector.createTable() 70 | } 71 | 72 | override def afterAll(): Unit = { 73 | minicluster.shutdown() 74 | minicluster.waitUntilShutDown() 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/compat/package.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util 4 | 5 | /** 6 | * Converters java <-> scala that works between 2.10, 2.11, 2.12, 2.13 7 | */ 8 | package object compat { 9 | def toScala[A](jIterable: java.lang.Iterable[A]): scala.collection.Iterable[A] = { 10 | new Iterable[A] { 11 | def iterator: scala.collection.Iterator[A] = toScala(jIterable.iterator()) 12 | } 13 | } 14 | 15 | def toScala[A](jIterator: java.util.Iterator[A]): scala.collection.Iterator[A] = { 16 | new scala.collection.Iterator[A] { 17 | def next() = jIterator.next() 18 | def hasNext = jIterator.hasNext() 19 | } 20 | } 21 | 22 | def toScala[A, B](jIterator: java.util.Map[A, B]): scala.collection.Map[A, B] = { 23 | toScala(jIterator.entrySet().iterator()).map(x => (x.getKey, x.getValue)).toMap 24 | } 25 | 26 | def toScala[A](jSet: java.util.Set[A]): scala.collection.Set[A] = { 27 | val iterator = jSet.iterator() 28 | val builder = Set.newBuilder[A] 29 | while (iterator.hasNext) { 30 | builder += iterator.next() 31 | } 32 | builder.result() 33 | } 34 | 35 | def toJava[A](iterable: scala.collection.Iterable[A]): java.lang.Iterable[A] = new java.lang.Iterable[A] { 36 | override def iterator(): util.Iterator[A] = new util.Iterator[A] { 37 | private val it = iterable.iterator 38 | override def hasNext: Boolean = it.hasNext 39 | override def next(): A = it.next() 40 | } 41 | } 42 | 43 | def toJava[A](list: List[A]): java.util.List[A] = { 44 | val arraylist = new util.ArrayList[A]() 45 | list.foreach(arraylist.add) 46 | arraylist 47 | } 48 | 49 | implicit class IterableConverter[A](jIterable: java.lang.Iterable[A]) { 50 | def toScala(): scala.collection.Iterable[A] = { 51 | compat.toScala(jIterable) 52 | } 53 | } 54 | 55 | implicit class SetConverter[A](jSet: java.util.Set[A]) { 56 | def toScala(): scala.collection.Set[A] = { 57 | compat.toScala(jSet) 58 | } 59 | } 60 | 61 | implicit class JIterableConverter[A](iterable: scala.collection.Iterable[A]) { 62 | def toJava(): java.lang.Iterable[A] = { 63 | compat.toJava(iterable) 64 | } 65 | 66 | def toJavaList(): java.util.List[A] = { 67 | compat.toJava(iterable.toList) 68 | } 69 | } 70 | 71 | implicit class JMapConverter[A, B](map: scala.collection.Map[A, B]) { 72 | def toJava(): java.util.Map[A, B] = { 73 | val hashmap: util.Map[A, B] = new util.HashMap[A, B]() 74 | map.foreach { case (k, v) => 75 | hashmap.put(k, v) 76 | } 77 | hashmap 78 | } 79 | 80 | } 81 | 82 | implicit class IteratorConverter[A](jIterator: java.util.Iterator[A]) { 83 | def toScala(): scala.collection.Iterator[A] = { 84 | compat.toScala(jIterator) 85 | } 86 | } 87 | 88 | implicit class MapConverter[A, B](jmap: java.util.Map[A, B]) { 89 | def toScala(): collection.Map[A, B] = { 90 | compat.toScala(jmap) 91 | } 92 | } 93 | 94 | implicit class RightBiasedEither[+L, +R](val self: Either[L, R]) extends AnyVal { 95 | def rightMap[R1](f: R => R1): Either[L, R1] = { 96 | self match { 97 | case Right(v) => Right(f(v)) 98 | case _ => self.asInstanceOf[Either[L, R1]] 99 | } 100 | } 101 | 102 | def rightFlatMap[L1 >: L, R1](f: R => Either[L1, R1]): Either[L1, R1] = { 103 | self match { 104 | case Right(v) => f(v) 105 | case _ => self.asInstanceOf[Either[L1, R1]] 106 | } 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/util/ByteArrayUtils.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.io.OutputStream 4 | import java.nio.{ ByteBuffer, ByteOrder } 5 | 6 | import it.agilelab.darwin.common.{ INT_SIZE, LONG_SIZE } 7 | 8 | private[darwin] object ByteArrayUtils { 9 | 10 | implicit class EnrichedLong(val l: Long) extends AnyVal { 11 | 12 | /** 13 | * Converts Long to Array[Byte] honoring the input endianness 14 | */ 15 | def longToByteArray(endianness: ByteOrder): Array[Byte] = { 16 | ByteBuffer 17 | .allocate(LONG_SIZE) 18 | .order(endianness) 19 | .putLong(l) 20 | .array() 21 | } 22 | 23 | def truncateIntToByteArray(endianess: ByteOrder): Array[Byte] = { 24 | ByteBuffer 25 | .allocate(INT_SIZE) 26 | .order(endianess) 27 | .putInt(l.toInt) 28 | .array() 29 | } 30 | 31 | /** 32 | * Writes to the stream the enriched long honoring the input endianness 33 | */ 34 | def writeToStream(os: OutputStream, endianness: ByteOrder): Unit = { 35 | endianness match { 36 | case ByteOrder.BIG_ENDIAN => 37 | os.write((l >>> 56).asInstanceOf[Int]) 38 | os.write((l >>> 48).asInstanceOf[Int]) 39 | os.write((l >>> 40).asInstanceOf[Int]) 40 | os.write((l >>> 32).asInstanceOf[Int]) 41 | os.write((l >>> 24).asInstanceOf[Int]) 42 | os.write((l >>> 16).asInstanceOf[Int]) 43 | os.write((l >>> 8).asInstanceOf[Int]) 44 | os.write((l >>> 0).asInstanceOf[Int]) 45 | case ByteOrder.LITTLE_ENDIAN => 46 | os.write((l >>> 0).asInstanceOf[Int]) 47 | os.write((l >>> 8).asInstanceOf[Int]) 48 | os.write((l >>> 16).asInstanceOf[Int]) 49 | os.write((l >>> 24).asInstanceOf[Int]) 50 | os.write((l >>> 32).asInstanceOf[Int]) 51 | os.write((l >>> 40).asInstanceOf[Int]) 52 | os.write((l >>> 48).asInstanceOf[Int]) 53 | os.write((l >>> 56).asInstanceOf[Int]) 54 | case other: Any => 55 | throw new IllegalArgumentException("Unknown ByteOrder: " + other) 56 | } 57 | } 58 | } 59 | 60 | implicit class EnrichedInt(val l: Int) extends AnyVal { 61 | 62 | def intToByteArray(endianess: ByteOrder): Array[Byte] = { 63 | ByteBuffer 64 | .allocate(INT_SIZE) 65 | .order(endianess) 66 | .putInt(l.toInt) 67 | .array() 68 | } 69 | 70 | /** 71 | * Writes to the stream the enriched long honoring the input endianness 72 | */ 73 | def writeIntToStream(os: OutputStream, endianness: ByteOrder): Unit = { 74 | endianness match { 75 | case ByteOrder.BIG_ENDIAN => 76 | os.write((l >>> 24)) 77 | os.write((l >>> 16)) 78 | os.write((l >>> 8)) 79 | os.write((l >>> 0)) 80 | case ByteOrder.LITTLE_ENDIAN => 81 | os.write((l >>> 0)) 82 | os.write((l >>> 8)) 83 | os.write((l >>> 16)) 84 | os.write((l >>> 24)) 85 | case other: Any => 86 | throw new IllegalArgumentException("Unknown ByteOrder: " + other) 87 | } 88 | } 89 | } 90 | 91 | def arrayEquals(b1: Array[Byte], b2: Array[Byte], start1: Int, start2: Int, length: Int): Boolean = { 92 | require(length > 0, "length must be positive") 93 | var i = start1 94 | var j = start2 95 | var areEqual = true 96 | while (areEqual && i < start1 + length) { 97 | if (b1(i) != b2(j)) { 98 | areEqual = false 99 | } 100 | i += 1 101 | j += 1 102 | } 103 | areEqual 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /rest/src/test/scala/it/agilelab/darwin/connector/rest/RestConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import com.github.tomakehurst.wiremock.WireMockServer 4 | import com.github.tomakehurst.wiremock.client.WireMock._ 5 | import com.github.tomakehurst.wiremock.core.WireMockConfiguration 6 | import com.typesafe.config.ConfigFactory 7 | import org.apache.avro.{ Schema, SchemaBuilder } 8 | import org.scalatest.{ BeforeAndAfterEach, OptionValues } 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | 11 | class RestConnectorSuite extends AnyFlatSpec with BeforeAndAfterEach with OptionValues { 12 | 13 | private val wireMockServer = new WireMockServer(WireMockConfiguration.wireMockConfig().dynamicPort()) 14 | 15 | private def config(port: Int) = ConfigFactory.parseString(s""" 16 | | protocol: "http" 17 | | host: "localhost" 18 | | port: ${wireMockServer.port()} 19 | | basePath: "/" 20 | """.stripMargin) 21 | 22 | override def beforeEach(): Unit = { 23 | wireMockServer.start() 24 | } 25 | 26 | override def afterEach(): Unit = { 27 | wireMockServer.stop() 28 | } 29 | 30 | "rest connector" should "get all schemas" in { 31 | 32 | val connector = new RestConnectorCreator().create(config(wireMockServer.port())) 33 | 34 | val schemaId1 = -3577210133426481249L 35 | val schemaId2 = 5920968314789803198L 36 | 37 | wireMockServer.stubFor { 38 | get(urlPathEqualTo("/schemas/")).willReturn { 39 | aResponse().withBody { 40 | s""" 41 | |[{ 42 | | "id": "$schemaId1", 43 | | "schema": { 44 | | "items": "string", 45 | | "type": "array" 46 | | } 47 | | }, { 48 | | "id": "$schemaId2", 49 | | "schema": { 50 | | "items": "int", 51 | | "type": "array" 52 | | } 53 | | }] 54 | """.stripMargin 55 | } 56 | } 57 | } 58 | 59 | val result = connector.fullLoad() 60 | 61 | assert(result.contains((schemaId1, SchemaBuilder.array().items(Schema.create(Schema.Type.STRING))))) 62 | assert(result.contains((schemaId2, SchemaBuilder.array().items(Schema.create(Schema.Type.INT))))) 63 | assert(result.size == 2) 64 | 65 | wireMockServer.verify { 66 | getRequestedFor(urlPathEqualTo("/schemas/")) 67 | } 68 | 69 | } 70 | 71 | "rest connector" should "get one schemas" in { 72 | 73 | val schemaId = -3577210133426481249L 74 | val connector = new RestConnectorCreator().create(config(wireMockServer.port())) 75 | 76 | wireMockServer.stubFor { 77 | get(urlPathEqualTo(s"/schemas/$schemaId")).willReturn { 78 | aResponse().withBody { 79 | """ 80 | | { 81 | | "items": "string", 82 | | "type": "array" 83 | | } 84 | """.stripMargin 85 | } 86 | } 87 | } 88 | 89 | val result = connector.findSchema(schemaId).value 90 | 91 | val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING)) 92 | 93 | assert(result == expected) 94 | 95 | wireMockServer.verify { 96 | getRequestedFor(urlPathEqualTo(s"/schemas/$schemaId")) 97 | } 98 | 99 | } 100 | 101 | "rest connector" should "post schemas" in { 102 | val connector = new RestConnectorCreator().create(config(wireMockServer.port())) 103 | 104 | val schema = SchemaBuilder.array().items(Schema.create(Schema.Type.INT)) 105 | 106 | wireMockServer.stubFor { 107 | post(urlEqualTo("/schemas/")).withHeader("Content-Type", equalTo("application/json")) 108 | } 109 | 110 | connector.insert(Seq((0, schema))) 111 | 112 | val request = """[{"type":"array","items":"int"}]""" 113 | 114 | wireMockServer.verify { 115 | postRequestedFor(urlEqualTo("/schemas/")).withRequestBody(equalTo(request)) 116 | } 117 | 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/MockConnector.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | import it.agilelab.darwin.common.{ Connector, Logging, SchemaReader } 6 | import org.apache.avro.{ Schema, SchemaNormalization } 7 | 8 | import scala.collection.mutable 9 | 10 | class MockConnectorException(msg: String, t: Option[Throwable]) extends RuntimeException(msg) { 11 | def this(msg: String) = this(msg, None) 12 | 13 | def this(t: Throwable) = this(t.getMessage, Some(t)) 14 | 15 | override def getCause: Throwable = t match { 16 | case Some(value) => value 17 | case None => super.getCause 18 | } 19 | } 20 | 21 | class MockConnector(config: Config) extends Connector with Logging { 22 | 23 | private[this] var loaded: Boolean = false 24 | 25 | val mode: ConfigurationKeys.Mode = if (config.hasPath(ConfigurationKeys.MODE)) { 26 | ConfigurationKeys.Mode.parse(config.getString(ConfigurationKeys.MODE)) 27 | } else { 28 | ConfigurationKeys.Strict 29 | } 30 | 31 | private def files = if (config.hasPath(ConfigurationKeys.FILES)) { 32 | config.getStringList(ConfigurationKeys.FILES).toScala().map { s => 33 | try { 34 | SchemaReader.safeRead(new java.io.File(s)) 35 | } catch { 36 | case t: Throwable => Left(SchemaReader.UnknownError(t)) 37 | } 38 | } 39 | } else { 40 | Nil 41 | } 42 | 43 | private def resources = if (config.hasPath(ConfigurationKeys.RESOURCES)) { 44 | config.getStringList(ConfigurationKeys.RESOURCES).toScala().map { s => 45 | try { 46 | SchemaReader.safeReadFromResources(s) 47 | } catch { 48 | case t: Throwable => Left(SchemaReader.UnknownError(t)) 49 | } 50 | } 51 | } else { 52 | Nil 53 | } 54 | 55 | private def handleError(error: SchemaReader.SchemaReaderError): Unit = { 56 | mode match { 57 | case ConfigurationKeys.Strict => 58 | error match { 59 | case SchemaReader.SchemaParserError(exception) => 60 | throw new MockConnectorException(exception) 61 | case SchemaReader.IOError(exception) => throw new MockConnectorException(exception) 62 | case SchemaReader.ResourceNotFoundError(msg) => throw new MockConnectorException(msg) 63 | case SchemaReader.UnknownError(t) => throw new MockConnectorException(t) 64 | } 65 | case ConfigurationKeys.Permissive => 66 | error match { 67 | case SchemaReader.SchemaParserError(exception) => log.warn(exception.getMessage, exception) 68 | case SchemaReader.IOError(exception) => log.warn(exception.getMessage, exception) 69 | case SchemaReader.ResourceNotFoundError(msg) => log.warn(msg) 70 | case SchemaReader.UnknownError(t) => log.warn(t.getMessage, t) 71 | } 72 | } 73 | } 74 | 75 | private val table: mutable.Map[Long, Schema] = mutable.Map.empty[Long, Schema] 76 | 77 | override def fullLoad(): Seq[(Long, Schema)] = { 78 | (resources ++ files).foreach { 79 | case Left(error) => handleError(error) 80 | case Right(schema) => table(SchemaNormalization.parsingFingerprint64(schema)) = schema 81 | } 82 | table.toSeq 83 | } 84 | 85 | override def insert(schemas: Seq[(Long, Schema)]): Unit = { 86 | schemas.foreach { case (id, schema) => 87 | table(id) = schema 88 | } 89 | } 90 | 91 | override def findSchema(id: Long): Option[Schema] = { 92 | if (!loaded) { 93 | this.synchronized { 94 | if (!loaded) { 95 | fullLoad() 96 | loaded = true 97 | } 98 | } 99 | } 100 | table.get(id) 101 | } 102 | 103 | override def createTable(): Unit = () 104 | 105 | override def tableExists(): Boolean = true 106 | 107 | override def tableCreationHint(): String = "No table needs to be created since mock connecto" 108 | 109 | /** 110 | * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id). 111 | * This API might not be implemented by all connectors, which should return None 112 | */ 113 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = 114 | table.find(_._2.getFullName == identifier) 115 | } 116 | -------------------------------------------------------------------------------- /mock-connector/src/test/scala/it/agilelab/darwin/connector/mock/MockConnectorSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.apache.avro.Schema 5 | import org.apache.avro.Schema.Type 6 | import org.scalatest.flatspec.AnyFlatSpec 7 | import org.scalatest.matchers.should.Matchers 8 | 9 | import java.nio.file.Paths 10 | import java.util 11 | 12 | class MockConnectorSpec extends AnyFlatSpec with Matchers { 13 | 14 | private val p = Paths 15 | .get(".") 16 | .resolve("mock-connector") 17 | .resolve("src") 18 | .resolve("test") 19 | .resolve("resources") 20 | .resolve("test") 21 | 22 | it should "load the schema manually inserted" in { 23 | val connector = new MockConnectorCreator().create(ConfigFactory.empty()) 24 | connector.insert((3L, Schema.create(Type.BYTES)) :: Nil) 25 | connector.fullLoad() should have size 1 26 | } 27 | 28 | it should "load the schema automatically from resources" in { 29 | val connector = new MockConnectorCreator().create(ConfigFactory.parseMap { 30 | new java.util.HashMap[String, Object] { 31 | put(ConfigurationKeys.RESOURCES, util.Arrays.asList("test/MockClassAlone.avsc", "test/MockClassParent.avsc")) 32 | } 33 | }) 34 | connector.fullLoad() should have size 2 35 | } 36 | 37 | it should "load the schema automatically from files" in { 38 | val connector = new MockConnectorCreator().create(ConfigFactory.parseMap { 39 | new java.util.HashMap[String, Object] { 40 | put( 41 | ConfigurationKeys.FILES, 42 | util.Arrays.asList(p.resolve("MockClassAlone.avsc").toString, p.resolve("MockClassParent.avsc").toString) 43 | ) 44 | } 45 | }) 46 | connector.fullLoad() should have size 2 47 | } 48 | 49 | it should "not throw any exception in case of missing file in permissive mode" in { 50 | val connector = new MockConnectorCreator().create(ConfigFactory.parseMap { 51 | new java.util.HashMap[String, Object] { 52 | put( 53 | ConfigurationKeys.FILES, 54 | util.Arrays.asList( 55 | p.resolve("DoesNotExists.avsc").toString, 56 | p.resolve("MockClassAlone.avsc").toString, 57 | p.resolve("MockClassParent.avsc").toString 58 | ) 59 | ) 60 | put(ConfigurationKeys.MODE, "permissive") 61 | } 62 | }) 63 | connector.fullLoad() should have size 2 64 | } 65 | 66 | it should "throw an exception in case of missing file in strict mode" in { 67 | intercept[MockConnectorException] { 68 | new MockConnectorCreator() 69 | .create(ConfigFactory.parseMap { 70 | new java.util.HashMap[String, Object] { 71 | put( 72 | ConfigurationKeys.FILES, 73 | util.Arrays.asList( 74 | p.resolve("DoesNotExists.avsc").toString, 75 | p.resolve("MockClassAlone.avsc").toString, 76 | p.resolve("MockClassParent.avsc").toString 77 | ) 78 | ) 79 | } 80 | }) 81 | .fullLoad() 82 | } 83 | } 84 | 85 | it should "return Some schema if asked for the latest schema" in { 86 | val connector = 87 | new MockConnectorCreator() 88 | .create(ConfigFactory.parseMap { 89 | new java.util.HashMap[String, Object] { 90 | put( 91 | ConfigurationKeys.FILES, 92 | util.Arrays.asList( 93 | p.resolve("MockClassAlone.avsc").toString, 94 | p.resolve("MockClassParent.avsc").toString 95 | ) 96 | ) 97 | } 98 | }) 99 | val all = connector.fullLoad() 100 | connector.retrieveLatestSchema("it.agilelab.darwin.connector.mock.testclasses.MockClassAlone") shouldBe all.find( 101 | _._2.getName == "MockClassAlone" 102 | ) 103 | 104 | } 105 | 106 | it should "return None schema if asked for the latest schema" in { 107 | val connector = 108 | new MockConnectorCreator() 109 | .create(ConfigFactory.parseMap { 110 | new java.util.HashMap[String, Object] { 111 | put( 112 | ConfigurationKeys.FILES, 113 | util.Arrays.asList( 114 | p.resolve("MockClassAlone.avsc").toString, 115 | p.resolve("MockClassParent.avsc").toString 116 | ) 117 | ) 118 | } 119 | }) 120 | connector.fullLoad() 121 | connector.retrieveLatestSchema("DoesNotExists") shouldBe None 122 | 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/GenericMainClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark 2 | 3 | import java.text.SimpleDateFormat 4 | import java.util.Date 5 | 6 | import com.typesafe.config.{ Config, ConfigFactory } 7 | import org.apache.hadoop.fs.FileSystem 8 | import org.apache.spark.sql.SparkSession 9 | import org.slf4j.{ Logger, LoggerFactory } 10 | import scala.collection.JavaConverters._ 11 | 12 | trait GenericMainClass { 13 | self: SparkManager => 14 | 15 | val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager") 16 | 17 | private def makeFileSystem(session: SparkSession): FileSystem = { 18 | if (session.sparkContext.isLocal) { 19 | FileSystem.getLocal(session.sparkContext.hadoopConfiguration) 20 | } else { 21 | FileSystem.get(session.sparkContext.hadoopConfiguration) 22 | } 23 | } 24 | 25 | /** 26 | * @param settings configuration loaded from multiple ".conf" files: the default ones as per typesafe Config and 27 | * another ".conf" file that has the same name as the application 28 | * @param fs the default file system of the application executed context 29 | * @param sparkSession the sparkSession that has been created and will be used in the application 30 | * @return true if the application ends successfully false otherwise 31 | */ 32 | protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int 33 | 34 | /** 35 | * Override in order to handle specific exceptions 36 | */ 37 | protected def handleException(exception: Throwable, applicationSettings: Config) 38 | 39 | /** 40 | * It executes the following ordered steps: 41 | *