├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .jvmopts ├── .scalafmt.conf ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── build.sbt ├── bump-version.sh ├── common └── src │ ├── main │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ ├── common │ │ ├── Connector.scala │ │ ├── ConnectorCreator.scala │ │ ├── ConnectorFactory.scala │ │ ├── DarwinConcurrentHashMap.scala │ │ ├── JavaVersion.scala │ │ ├── Logging.scala │ │ ├── SchemaReader.scala │ │ ├── compat │ │ │ └── package.scala │ │ └── package.scala │ │ └── manager │ │ ├── SchemaPayloadPair.java │ │ ├── exception │ │ ├── ConnectorNotFoundException.scala │ │ └── DarwinException.scala │ │ └── util │ │ ├── AvroSingleObjectEncodingUtils.scala │ │ ├── ByteArrayUtils.scala │ │ ├── ConfigUtil.scala │ │ ├── ConfigurationKeys.scala │ │ └── ConfluentSingleObjectEncoding.scala │ └── test │ ├── resources │ └── test │ │ ├── MockClassAlone.avsc │ │ └── MockClassParent.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ ├── common │ ├── CompatSpec.scala │ ├── DarwinConcurrentHashMapRunner.scala │ ├── DarwinConcurrentHashMapSpec.scala │ ├── DarwinJava8ConcurrentHashMapSpec.scala │ └── DarwinTrieConcurrentHashMapSpec.scala │ └── manager │ └── util │ ├── AvroSingleObjectEncodingUtilsSpec.scala │ ├── BigEndianAvroSingleObjectEncodingUtilsSpec.scala │ ├── ConfluentAvroSingleObjectEncodingSpec.scala │ └── LittleEndianAvroSingleObjectEncodingUtilsSpec.scala ├── confluent └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── confluent │ │ ├── ConfluentConnector.scala │ │ ├── ConfluentConnectorCreator.scala │ │ ├── ConfluentConnectorOptions.scala │ │ ├── HoconToMap.scala │ │ └── Main.scala │ └── test │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── confluent │ ├── ConfluentConnectorCreatorSuite.scala │ └── ConfluentConnectorSuite.scala ├── core └── src │ └── main │ ├── java │ └── it │ │ └── agilelab │ │ └── darwin │ │ ├── annotations │ │ └── AvroSerde.java │ │ └── manager │ │ └── IdSchemaPair.java │ ├── resources │ └── reference.conf │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── manager │ ├── AvroSchemaCache.scala │ ├── AvroSchemaCacheFingerprint.scala │ ├── AvroSchemaManager.scala │ ├── AvroSchemaManagerFactory.scala │ ├── CachedAvroSchemaManager.scala │ ├── CachedEagerAvroSchemaManager.scala │ ├── CachedLazyAvroSchemaManager.scala │ └── LazyAvroSchemaManager.scala ├── docs ├── img │ ├── darwin_eager_cached_schema.jpg │ ├── darwin_interaction.jpg │ ├── darwin_lazy_cached_schema.jpg │ ├── darwin_lazy_schema.jpg │ └── logo │ │ ├── darwin-icon.ai │ │ ├── darwin-icon.png │ │ └── darwin-icon.svg └── src │ ├── darwin_eager_cached_schema.xml │ ├── darwin_interaction.xml │ ├── darwin_lazy_cached_schema.xml │ └── darwin_lazy_schema.xml ├── hbase └── src │ ├── main │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── hbase │ │ ├── ConfigurationKeys.scala │ │ ├── HBaseConnector.scala │ │ └── HBaseConnectorCreator.scala │ └── test │ ├── resources │ ├── application.conf │ └── log4j.properties │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── hbase │ ├── HBase2Mock.scala │ ├── HBaseConnectorSuite.scala │ └── HBaseMock.scala ├── hbase1 └── src │ └── main │ ├── resources │ └── META-INF │ │ └── services │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── hbase │ └── HBaseUtils.scala ├── hbase2 └── src │ └── main │ ├── resources │ └── META-INF │ │ └── services │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── hbase │ └── HBaseUtils.scala ├── make.sh ├── mock-application └── src │ └── test │ ├── resources │ ├── MyNestedClass.avsc │ ├── OneField.avsc │ ├── application.conf │ └── test │ │ ├── MockClassAlone.avsc │ │ └── MockClassParent.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── app │ └── mock │ ├── CachedEagerApplicationSuite.scala │ ├── CachedLazyApplicationSuite.scala │ ├── LazyApplicationSuite.scala │ ├── ManagerUtilsSuite.scala │ ├── TwoConnectorsSpec.scala │ └── classes │ ├── MyClass.scala │ ├── MyNestedAbstractClass.scala │ ├── MyNestedClass.scala │ ├── MyTrait.scala │ ├── NewClass.scala │ ├── NotToBeRegisteredClass.scala │ └── OneField.scala ├── mock-connector └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── mock │ │ ├── ConfigurationKeys.scala │ │ ├── MockConnector.scala │ │ ├── MockConnectorCreator.scala │ │ └── testclasses │ │ ├── MockClassAlone.scala │ │ ├── MockClassChild.scala │ │ └── MockClassParent.scala │ └── test │ ├── resources │ └── test │ │ ├── MockClassAlone.avsc │ │ └── MockClassParent.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── mock │ └── MockConnectorSpec.scala ├── mongo └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── mongo │ │ ├── ConfigurationKeys.scala │ │ ├── ConfigurationMongoModels.scala │ │ ├── MongoConnector.scala │ │ └── MongoConnectorCreator.scala │ └── test │ ├── resources │ ├── mongo.conf │ └── mongomock.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── mongo │ └── MongoConnectorTest.scala ├── multi-connector └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── multi │ │ ├── MultiConnector.scala │ │ └── MultiConnectorCreator.scala │ └── test │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── multi │ └── MultiConnectorSpec.scala ├── postgres └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ └── it │ │ └── agilelab │ │ └── darwin │ │ └── connector │ │ └── postgres │ │ ├── ConfigurationKeys.scala │ │ ├── PostgresConnection.scala │ │ ├── PostgresConnector.scala │ │ └── PostgresConnectorCreator.scala │ └── test │ ├── resources │ ├── postgres.properties │ └── postgresmock.avsc │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── postgres │ ├── Postgres2Mock.scala │ ├── PostgresConnectorSuite.scala │ └── PostgresMock.scala ├── project ├── Dependencies.scala ├── Settings.scala ├── Versions.scala ├── build.properties └── plugin.sbt ├── publish.sh ├── rest-server └── src │ └── main │ ├── postman │ └── darwinrest.postman_collection.json │ ├── resources │ └── reference.conf │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── server │ └── rest │ ├── DarwinService.scala │ ├── HttpApp.scala │ ├── JsonSupport.scala │ ├── Main.scala │ └── Service.scala ├── rest └── src │ ├── main │ ├── java │ │ └── scalaj │ │ │ └── http │ │ │ └── Base64.java │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── it.agilelab.darwin.common.ConnectorCreator │ └── scala │ │ ├── it │ │ └── agilelab │ │ │ └── darwin │ │ │ └── connector │ │ │ └── rest │ │ │ ├── JsonProtocol.scala │ │ │ ├── RestConnector.scala │ │ │ ├── RestConnectorCreator.scala │ │ │ └── RestConnectorOptions.scala │ │ └── scalaj │ │ └── http │ │ ├── DigestAuth.scala │ │ ├── Http.scala │ │ └── OAuth.scala │ └── test │ └── scala │ └── it │ └── agilelab │ └── darwin │ └── connector │ └── rest │ └── RestConnectorSuite.scala ├── scalastyle-config.xml └── spark-application └── src ├── dist └── conf │ └── application.conf └── main └── scala └── it └── agilelab └── darwin └── app └── spark ├── GenericMainClass.scala ├── SchemaManagerSparkApp.scala ├── SparkConfigurationKeys.scala ├── SparkManager.scala └── classes ├── Food.scala ├── Ignored.scala ├── Menu.scala ├── MenuItem.scala ├── Order.scala └── Price.scala /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | tags: 7 | - '*' 8 | pull_request: {} 9 | jobs: 10 | ci: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | - uses: coursier/cache-action@v6 17 | - uses: coursier/setup-action@v1 18 | with: 19 | jvm: zulu:8.0.402 20 | - run: ./make.sh && ./publish.sh 21 | env: 22 | SBT_NATIVE_CLIENT: false 23 | PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} 24 | PGP_SECRET: ${{ secrets.PGP_SECRET }} 25 | SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} 26 | SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/osx,sbt,java,linux,maven,scala,windows,intellij 3 | 4 | ### Intellij ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/codeStyles/* 14 | .idea/markdown-navigator/* 15 | .idea/**/shelf 16 | 17 | # Generated files 18 | .idea/**/contentModel.xml 19 | 20 | # Sensitive or high-churn files 21 | .idea/**/dataSources/ 22 | .idea/**/dataSources.ids 23 | .idea/**/dataSources.local.xml 24 | .idea/**/sqlDataSources.xml 25 | .idea/**/dynamic.xml 26 | .idea/**/uiDesigner.xml 27 | .idea/**/dbnavigator.xml 28 | 29 | # Gradle 30 | .idea/**/gradle.xml 31 | .idea/**/libraries 32 | 33 | # Gradle and Maven with auto-import 34 | # When using Gradle or Maven with auto-import, you should exclude module files, 35 | # since they will be recreated, and may cause churn. Uncomment if using 36 | # auto-import. 37 | .idea/modules.xml 38 | .idea/*.iml 39 | .idea/modules 40 | .idea/*.xml 41 | # CMake 42 | cmake-build-*/ 43 | 44 | # Mongo Explorer plugin 45 | .idea/**/mongoSettings.xml 46 | 47 | # File-based project format 48 | *.iws 49 | 50 | # IntelliJ 51 | out/ 52 | 53 | # mpeltonen/sbt-idea plugin 54 | .idea_modules/ 55 | 56 | # JIRA plugin 57 | atlassian-ide-plugin.xml 58 | 59 | # Cursive Clojure plugin 60 | .idea/replstate.xml 61 | 62 | # Crashlytics plugin (for Android Studio and IntelliJ) 63 | com_crashlytics_export_strings.xml 64 | crashlytics.properties 65 | crashlytics-build.properties 66 | fabric.properties 67 | 68 | # Editor-based Rest Client 69 | .idea/httpRequests 70 | 71 | # Android studio 3.1+ serialized cache file 72 | .idea/caches/build_file_checksums.ser 73 | 74 | ### Intellij Patch ### 75 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 76 | 77 | # *.iml 78 | # modules.xml 79 | # .idea/misc.xml 80 | # *.ipr 81 | 82 | # Sonarlint plugin 83 | .idea/sonarlint 84 | 85 | ### Java ### 86 | # Compiled class file 87 | *.class 88 | 89 | # Log file 90 | *.log 91 | 92 | # BlueJ files 93 | *.ctxt 94 | 95 | # Mobile Tools for Java (J2ME) 96 | .mtj.tmp/ 97 | 98 | # Package Files # 99 | *.jar 100 | *.war 101 | *.nar 102 | *.ear 103 | *.zip 104 | *.tar.gz 105 | *.rar 106 | 107 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 108 | hs_err_pid* 109 | 110 | ### Linux ### 111 | *~ 112 | 113 | # temporary files which can be created if a process still has a handle open of a deleted file 114 | .fuse_hidden* 115 | 116 | # KDE directory preferences 117 | .directory 118 | 119 | # Linux trash folder which might appear on any partition or disk 120 | .Trash-* 121 | 122 | # .nfs files are created when an open file is removed but is still being accessed 123 | .nfs* 124 | 125 | ### Maven ### 126 | target/ 127 | pom.xml.tag 128 | pom.xml.releaseBackup 129 | pom.xml.versionsBackup 130 | pom.xml.next 131 | release.properties 132 | dependency-reduced-pom.xml 133 | buildNumber.properties 134 | .mvn/timing.properties 135 | .mvn/wrapper/maven-wrapper.jar 136 | 137 | ### OSX ### 138 | # General 139 | .DS_Store 140 | .AppleDouble 141 | .LSOverride 142 | 143 | # Icon must end with two \r 144 | Icon 145 | 146 | # Thumbnails 147 | ._* 148 | 149 | # Files that might appear in the root of a volume 150 | .DocumentRevisions-V100 151 | .fseventsd 152 | .Spotlight-V100 153 | .TemporaryItems 154 | .Trashes 155 | .VolumeIcon.icns 156 | .com.apple.timemachine.donotpresent 157 | 158 | # Directories potentially created on remote AFP share 159 | .AppleDB 160 | .AppleDesktop 161 | Network Trash Folder 162 | Temporary Items 163 | .apdisk 164 | 165 | ### SBT ### 166 | # Simple Build Tool 167 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control 168 | 169 | dist/* 170 | lib_managed/ 171 | src_managed/ 172 | project/boot/ 173 | project/plugins/project/ 174 | .history 175 | .cache 176 | .lib/ 177 | 178 | ### Scala ### 179 | 180 | ### Windows ### 181 | # Windows thumbnail cache files 182 | Thumbs.db 183 | ehthumbs.db 184 | ehthumbs_vista.db 185 | 186 | # Dump file 187 | *.stackdump 188 | 189 | # Folder config file 190 | [Dd]esktop.ini 191 | 192 | # Recycle Bin used on file shares 193 | $RECYCLE.BIN/ 194 | 195 | # Windows Installer files 196 | *.cab 197 | *.msi 198 | *.msix 199 | *.msm 200 | *.msp 201 | 202 | # Windows shortcuts 203 | *.lnk 204 | 205 | metals.sbt 206 | .bloop/* 207 | .metals/* 208 | project/.bloop/* 209 | # End of https://www.gitignore.io/api/osx,sbt,java,linux,maven,scala,windows,intellij 210 | .bsp/ 211 | .vscode/settings.json 212 | project/project/.bloop/bloop.settings.json 213 | .gitignore 214 | project/project/.bloop/darwin-build-build.json 215 | -------------------------------------------------------------------------------- /.jvmopts: -------------------------------------------------------------------------------- 1 | -Dfile.encoding=UTF-8 2 | -Xms1024m 3 | -Xmx1024m 4 | -Xss4M 5 | -XX:ReservedCodeCacheSize=128m -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "2.7.2" 2 | project.git = true 3 | encoding = "UTF-8" 4 | maxColumn = 120 5 | align.preset = most 6 | continuationIndent.defnSite = 2 7 | assumeStandardLibraryStripMargin = true 8 | docstrings = ScalaDoc 9 | lineEndings = unix 10 | includeCurlyBraceInSelectChains = false 11 | danglingParentheses.preset = true 12 | spaces { 13 | inImportCurlyBraces = true 14 | } 15 | optIn.annotationNewlines = true 16 | 17 | rewrite.rules = [SortImports, SortModifiers] -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # HOW TO CONTRIBUTE 2 | 3 | We are always very happy to have contributions, whether for trivial cleanups or big new features. 4 | 5 | If you don't know Java or Scala you can still contribute to the project. 6 | Code is not the only way to contribute to the project. We strongly value documentation and gladly accept improvements to the documentation. 7 | 8 | # REPORTING AN ISSUE 9 | 10 | Reporting potential issues as Github issues is more than welcome as a significant contribution to the project. But please be aware that Github issues should not be used for FAQs: 11 | if you have a question or are simply not sure if it is really an issue or not, please contact us ([through gitter](https://gitter.im/agile-lab-darwin/community)) first before you create a new issue. 12 | 13 | # CONTRIBUTING A CODE CHANGE 14 | 15 | To submit a change for inclusion, please do the following: 16 | - If the change is non-trivial please include some unit tests that cover the new functionality. 17 | - If you are introducing a completely new feature or API it is a good idea to start a markdown description in the Github issue itself and get consensus on the basic design first. 18 | - Make sure you have observed the recommendations in the style guide. (scalastyle:check should pass with no errors or warnings) 19 | - Follow the detailed instructions in Contributing Code Changes. 20 | 21 | ## Contributing code changes 22 | 23 | ### Overview 24 | 25 | Generally, Darwin uses: 26 | - Github issues to track logical issues, including bugs and improvements 27 | - Github pull requests to manage the review and merge of specific code changes 28 | 29 | ### Github issues 30 | 31 | - Find the existing Github issue that the change pertains to. 32 | - Do not create a new issue if creating a change to address an existing issue in Github; add to the existing discussion and work instead. 33 | - To avoid conflicts, assign the Github issue to yourself if you plan to work on it. 34 | - Look for existing pull requests that are linked to the issue, to understand if someone is already working on it. 35 | - If required, create a new issue (below shows some critical fields to fill-in): 36 | - Provide a descriptive Title. "Update web UI" or "Problem in scheduler" is not sufficient. "Support NiFi SchemaRegistry interface and add meta-connector" is good. 37 | - Write a detailed Description. For bug reports, this should ideally include a short reproduction of the problem. For new features, it may include a design document. 38 | - To avoid conflicts, assign the issue to yourself if you plan to work on it. Leave it unassigned otherwise. 39 | - Do not include a patch file; pull requests are used to propose the actual change. 40 | - If the change is a large change, consider inviting discussion on the issue on gitter first before proceeding to implement the change. 41 | 42 | 43 | ### Pull Request 44 | 45 | - Fork the Github repository at if you haven't already 46 | - Clone your fork, create a new branch, push commits to the branch. 47 | - Consider whether documentation or tests need to be added or updated as part of the change, and add them as needed (doc changes should be submitted along with code change in the same PR). 48 | - Run all tests using `make.sh` script. 49 | - Open a pull request against the develop branch. 50 | - The PR title should usually be of the form [#issue-number]: Title, where [#issue number] is the relevant Github issue number and Title may be the issue title or a more specific title describing the PR itself. 51 | - If the pull request is still a work in progress, and so is not ready to be merged, but needs to be pushed to Github to facilitate review, use the draft mode of Github PR.. 52 | - Consider identifying committers or other contributors who have worked on the code being changed. The easiest is to simply follow GitHub's automatic suggestions. You can add @username in the PR description to ping them immediately. 53 | - Once ready, the PR `checks` box will be updated. 54 | - Investigate and fix failures caused by the pull the request 55 | - Fixes can simply be pushed to the same branch from which you opened your pull request. 56 | - Please address feedback via additional commits instead of amending existing commits. This makes it easier for the reviewers to know what has changed since the last review. All commits will be squashed into a single one by the committer via GitHub's squash button or by a script as part of the merge process. 57 | - CI will automatically re-test when new commits are pushed. 58 | - Despite our efforts, Darwin may have flaky tests at any given point, which may cause a build to fail. You need to ping committers to trigger a new build. If the failure is unrelated to your pull request and you have been able to run the tests locally successfully, please mention it in the pull request. 59 | 60 | ### The Review Process 61 | 62 | - Other reviewers, including committers, may comment on the changes and suggest modifications. Changes can be added by simply pushing more commits to the same branch. 63 | - Please add a comment and "@" the reviewer in the PR if you have addressed reviewers' comments. Even though GitHub sends notifications when new commits are pushed, it is helpful to know that the PR is ready for review once again. 64 | - Lively, polite, rapid technical debate is encouraged from everyone in the community. The outcome may be a rejection of the entire change. 65 | - Reviewers can indicate that a change looks suitable for merging by approving it via GitHub's review interface. This indicates the strongest level of technical sign-off on a patch and it means: "I've looked at this thoroughly and take as much ownership as if I wrote the patch myself". If you approve a pull request, you will be expected to help with bugs or follow-up issues on the patch. Consistent, judicious use of pull request approvals is a great way to gain credibility as a reviewer with the broader community. Darwin reviewers will typically include the acronym LGTM in their approval comment. This was the convention used to approve pull requests before the "approve" feature was introduced by GitHub. 66 | - Sometimes, other changes will be merged which conflict with your pull request's changes. The PR can't be merged until the conflict is resolved. This can be resolved with "git fetch origin" followed by "git merge origin/develop" and resolving the conflicts by hand, then pushing the result to your branch. 67 | - Try to be responsive to the discussion rather than let days pass between replies. 68 | 69 | ### Closing Your Pull Request / issue 70 | 71 | - If a change is accepted, it will be merged and the pull request will automatically be closed, along with the associated issue if any 72 | - If your pull request is ultimately rejected, please close it. 73 | - If a pull request has gotten little or no attention, consider improving the description or the change itself and ping likely reviewers again after a few days. Consider proposing a change that's easier to include, like a smaller and/or less invasive change. 74 | - If a pull request is closed because it is deemed not the right approach to resolve an issue, then leave the issue open. However if the review makes it clear that the issue identified in the issue is not going to be resolved by any pull request (not a problem, won't fix) then also resolve the issue. 75 | 76 | *This document is heavily inspired by Kafka/Apache contribution guidelines.* 77 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import sbt.Keys.baseDirectory 2 | 3 | /* 4 | * Main build definition. 5 | * 6 | * See project/Settings.scala for the settings definitions. 7 | * See project/Dependencies.scala for the dependencies definitions. 8 | * See project/Versions.scala for the versions definitions. 9 | */ 10 | ThisBuild / dynverVTagPrefix := false 11 | 12 | lazy val root = Project("darwin", file(".")) 13 | .settings(Settings.commonSettings: _*) 14 | .settings(libraryDependencies ++= Dependencies.core_deps) 15 | .settings(Settings.notPublishSettings) 16 | .aggregate( 17 | core, 18 | coreCommon, 19 | hbaseConnector, 20 | postgresConnector, 21 | mockConnector, 22 | mockApplication, 23 | restConnector, 24 | mongoConnector, 25 | confluentConnector, 26 | multiConnector 27 | ) 28 | 29 | lazy val core = Project("darwin-core", file("core")) 30 | .settings(Settings.commonSettings: _*) 31 | .dependsOn(coreCommon) 32 | .settings(libraryDependencies ++= Dependencies.core_deps) 33 | .settings(crossScalaVersions := Versions.crossScalaVersions) 34 | 35 | lazy val coreCommon = Project("darwin-core-common", file("common")) 36 | .settings(Settings.commonSettings: _*) 37 | .settings(libraryDependencies ++= Dependencies.core_deps) 38 | .settings(crossScalaVersions := Versions.crossScalaVersions) 39 | 40 | lazy val hbaseConnector = Project("darwin-hbase-connector", file("hbase1")) 41 | .settings(Settings.commonSettings: _*) 42 | .dependsOn(coreCommon) 43 | .settings(libraryDependencies ++= Dependencies.hbase_conn_dep) 44 | .settings(crossScalaVersions := Versions.crossScalaVersions) 45 | .settings(Compile / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "main" / "scala") 46 | .settings(Test / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "scala") 47 | .settings(Test / unmanagedResourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "resources") 48 | .settings(Settings.hbaseTestSettings) 49 | 50 | lazy val hbaseConnector2 = Project("darwin-hbase2-connector", file("hbase2")) 51 | .settings(Settings.commonSettings: _*) 52 | .dependsOn(coreCommon) 53 | .settings(libraryDependencies ++= Dependencies.hbase2_conn_dep) 54 | .settings(crossScalaVersions := Versions.crossScalaVersions) 55 | .settings(Compile / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "main" / "scala") 56 | .settings(Test / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "scala") 57 | .settings(Test / unmanagedResourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "resources") 58 | .settings(Settings.hbase2TestSettings) 59 | 60 | lazy val postgresConnector = Project("darwin-postgres-connector", file("postgres")) 61 | .settings(Settings.commonSettings: _*) 62 | .dependsOn(coreCommon) 63 | .settings(libraryDependencies ++= Dependencies.postgres_conn_dep) 64 | .settings(crossScalaVersions := Versions.crossScalaVersions) 65 | 66 | lazy val restConnector = Project("darwin-rest-connector", file("rest")) 67 | .settings(Settings.commonSettings: _*) 68 | .dependsOn(coreCommon) 69 | .settings( 70 | libraryDependencies ++= Dependencies.core_deps ++ Dependencies.wireMock :+ Dependencies.scalatest 71 | ) 72 | .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213)) 73 | 74 | lazy val confluentConnector = Project("darwin-confluent-connector", file("confluent")) 75 | .settings(Settings.commonSettings: _*) 76 | .dependsOn(coreCommon) 77 | .settings( 78 | libraryDependencies ++= Dependencies.core_deps ++ 79 | Dependencies.wireMock ++ 80 | Dependencies.confluentSchemaRegistryDependencies :+ Dependencies.scalatest 81 | ) 82 | .settings(crossScalaVersions := Versions.crossScalaVersions) 83 | 84 | lazy val restServer = Project("darwin-rest-server", file("rest-server")) 85 | .settings(Settings.commonSettings: _*) 86 | .dependsOn(coreCommon, mockConnector) 87 | .settings(libraryDependencies ++= Dependencies.restServer) 88 | .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213)) 89 | .dependsOn(core, hbaseConnector, postgresConnector, mockConnector) 90 | 91 | lazy val mongoConnector = Project("darwin-mongo-connector", file("mongo")) 92 | .settings(Settings.commonSettings: _*) 93 | .dependsOn(coreCommon) 94 | .settings(libraryDependencies ++= Dependencies.mongo_conn) 95 | .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213)) 96 | 97 | lazy val mockConnector = Project("darwin-mock-connector", file("mock-connector")) 98 | .settings(Settings.commonSettings: _*) 99 | .dependsOn(coreCommon) 100 | .settings(libraryDependencies ++= Dependencies.mock_conn) 101 | .settings(crossScalaVersions := Versions.crossScalaVersions) 102 | 103 | lazy val mockApplication = Project("darwin-mock-application", file("mock-application")) 104 | .settings(Settings.commonSettings: _*) 105 | .dependsOn(core, mockConnector, postgresConnector, hbaseConnector) 106 | .settings(libraryDependencies ++= Dependencies.mock_app_dep) 107 | .settings(crossScalaVersions := Versions.crossScalaVersions) 108 | .settings(Settings.notPublishSettings) 109 | 110 | lazy val sparkApplication = Project("darwin-spark-application", file("spark-application")) 111 | .settings(Settings.commonSettings: _*) 112 | .dependsOn(core, hbaseConnector, postgresConnector) 113 | .settings(libraryDependencies ++= Dependencies.spark_app) 114 | .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211)) 115 | .settings(Settings.notPublishSettings) 116 | 117 | lazy val multiConnector = Project("darwin-multi-connector", file("multi-connector")) 118 | .settings(Settings.commonSettings: _*) 119 | .dependsOn(coreCommon) 120 | .dependsOn(core) 121 | .dependsOn(mockConnector % Test) 122 | .dependsOn(confluentConnector % Test) 123 | .settings(crossScalaVersions := Versions.crossScalaVersions) 124 | .settings(libraryDependencies += Dependencies.scalatest) 125 | -------------------------------------------------------------------------------- /bump-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 2 ]; then 4 | echo "Illegal number of parameters, you need to pass two parameters" 5 | exit 1 6 | fi 7 | 8 | unameOut="$(uname -s)" 9 | case "${unameOut}" in 10 | Linux*) machine=Linux;; 11 | Darwin*) machine=Mac;; 12 | *) machine=UNKNOWN 13 | esac 14 | 15 | if [ "$machine" = 'UNKNOWN' ]; then 16 | echo "Unknown os... aborting" 17 | exit 2 18 | fi 19 | 20 | echo "Running on $machine.." 21 | 22 | OLD_VERSION=$1 23 | NEW_VERSION=$2 24 | FILES_TO_CHANGE=$(git grep -l "$OLD_VERSION" | grep -v ".*\.ai\|.*\.svg\|.*\.xml") # there is an ai file that always matches... 25 | 26 | if [ -z "$FILES_TO_CHANGE" ]; then 27 | echo "No files to change..." 28 | exit 0 29 | fi 30 | 31 | echo "Bumping from version $OLD_VERSION to version $NEW_VERSION" 32 | echo "Editing the following files:" 33 | echo "" 34 | echo "$FILES_TO_CHANGE" 35 | echo "----------------------------" 36 | 37 | while IFS= read -r line; do 38 | case "${machine}" in 39 | Linux*) sed -i "s/${OLD_VERSION}/${NEW_VERSION}/g" $line;; 40 | Mac*) sed -i '' -e "s/${OLD_VERSION}/${NEW_VERSION}/g" $line;; 41 | esac 42 | git add $line 43 | done <<< "$FILES_TO_CHANGE" 44 | 45 | 46 | echo "Press enter to commit:" 47 | read 48 | 49 | git commit -e -m "Bump version to $NEW_VERSION" 50 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/ConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import com.typesafe.config.Config 4 | 5 | /** 6 | * A generic interface used to create the [[Connector]] found in the classpath. 7 | */ 8 | trait ConnectorCreator { 9 | 10 | /** 11 | * @return the name of the Connector 12 | */ 13 | def name(): String 14 | 15 | /** 16 | * This method should be overridden in each connector module returning its implementation. 17 | * 18 | * @param config configuration that will be used to create the correct implementation of [[Connector]] 19 | * @return the specific instance of [[Connector]] 20 | */ 21 | def create(config: Config): Connector 22 | } 23 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/ConnectorFactory.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util.ServiceLoader 4 | 5 | import com.typesafe.config.Config 6 | import it.agilelab.darwin.manager.exception.ConnectorNotFoundException 7 | import it.agilelab.darwin.manager.util.ConfigurationKeys 8 | 9 | import it.agilelab.darwin.common.compat._ 10 | 11 | /** 12 | * Used to obtain the correct implementation of [[Connector]] found on the classpath using the [[ConnectorCreator]] 13 | */ 14 | object ConnectorFactory extends Logging { 15 | 16 | /** 17 | * Retrieves all the registered [[ConnectorCreator]] in the classpath. 18 | * 19 | * @return a sequence of all the loaded [[ConnectorCreator]] 20 | */ 21 | def creators(): Seq[ConnectorCreator] = { 22 | val creators = ServiceLoader.load(classOf[ConnectorCreator]).toScala().toSeq 23 | log.debug(s"${creators.size} available connector creators found") 24 | creators 25 | } 26 | 27 | /** 28 | * @return the first ConnectorCreator, use ONLY if you are sure that just one is available in the classpath 29 | */ 30 | def creator(): Option[ConnectorCreator] = creators().headOption 31 | 32 | /** 33 | * @return the ConnectorCreator identified by the name given as input 34 | */ 35 | def creator(name: String): Option[ConnectorCreator] = { 36 | creators().find(_.name() == name) 37 | } 38 | 39 | /** 40 | * @return the ConnectorCreator identified by the name given as input 41 | */ 42 | def creator(conf: Config): Option[ConnectorCreator] = { 43 | if (conf.hasPath(ConfigurationKeys.CONNECTOR)) { 44 | creator(conf.getString(ConfigurationKeys.CONNECTOR)) 45 | } else { 46 | creator() 47 | } 48 | } 49 | 50 | def connector(config: Config): Connector = { 51 | val cnt = creator(config) 52 | .map(_.create(config)) 53 | .getOrElse(throw new ConnectorNotFoundException(config)) 54 | if (config.hasPath(ConfigurationKeys.CREATE_TABLE) && config.getBoolean(ConfigurationKeys.CREATE_TABLE)) { 55 | cnt.createTable() 56 | } else if (!cnt.tableExists()) { 57 | log.warn(s"Darwin table does not exists and has not been created (${ConfigurationKeys.CREATE_TABLE} was false)") 58 | log.warn(cnt.tableCreationHint()) 59 | } 60 | cnt 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/DarwinConcurrentHashMap.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util.function.{ Function => JFunction } 4 | 5 | import scala.collection.concurrent.TrieMap 6 | 7 | /** 8 | * A thread safe lock-free concurrent map that exposes only getOrElseUpdate and getOrElse methods 9 | * It is backed by either a scala.collection.concurrent.TrieMap or java.util.concurrent.ConcurrentHashMap 10 | * depending on the JVM that executes Darwin. 11 | * JVM 8 or later use java's ConcurrentHashMap while earlier versions use scala's TrieMap 12 | * 13 | * Obtain the "correct" instance using {{{DarwinConcurrentHashMap.empty}}} factory method. 14 | */ 15 | trait DarwinConcurrentHashMap[K, V] { 16 | def getOrElseUpdate(k: K, newValue: => V): V 17 | 18 | def getOrElse(k: K, default: => V): V 19 | } 20 | 21 | object DarwinConcurrentHashMap { 22 | 23 | private[common] class DarwinJava8ConcurrentHashMap[K, V] extends DarwinConcurrentHashMap[K, V] { 24 | private val innerMap = new java.util.concurrent.ConcurrentHashMap[K, V]() 25 | 26 | override def getOrElseUpdate(k: K, newValue: => V): V = { 27 | innerMap.computeIfAbsent( 28 | k, 29 | new JFunction[K, V]() { 30 | override def apply(t: K): V = newValue 31 | } 32 | ) 33 | } 34 | 35 | override def getOrElse(k: K, default: => V): V = 36 | Option(innerMap.get(k)).getOrElse(default) 37 | } 38 | 39 | private[common] class DarwinTrieConcurrentHashMap[K, V] extends DarwinConcurrentHashMap[K, V] { 40 | private val innerMap = TrieMap.empty[K, V] 41 | 42 | override def getOrElseUpdate(k: K, newValue: => V): V = innerMap.getOrElseUpdate(k, newValue) 43 | 44 | override def getOrElse(k: K, default: => V): V = innerMap.getOrElse(k, default) 45 | } 46 | 47 | private val isJavaAtLeast8 = JavaVersion.current() >= 8 48 | 49 | def empty[K, V]: DarwinConcurrentHashMap[K, V] = { 50 | if (isJavaAtLeast8) { 51 | new DarwinJava8ConcurrentHashMap() 52 | } else { 53 | new DarwinTrieConcurrentHashMap() 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/JavaVersion.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | object JavaVersion { 4 | 5 | /** 6 | * @return the JVM version in use, It returns an Integer indicating the major version i 7 | */ 8 | def current(): Int = { 9 | val propertyValue = System.getProperty("java.version") 10 | parseJavaVersion(propertyValue) 11 | } 12 | 13 | /** 14 | * @return the JVM version represented by the input string, It returns an Integer indicating the major version i 15 | */ 16 | def parseJavaVersion(propertyValue: String): Int = { 17 | val splits = propertyValue.split("\\.") 18 | if (propertyValue.startsWith("1.")) { 19 | splits(1).takeWhile(isDigit).toInt 20 | } else { 21 | splits(0).takeWhile(isDigit).toInt 22 | } 23 | } 24 | private val digits = ('0' to '9').toSet 25 | private def isDigit(c: Char): Boolean = { 26 | digits.contains(c) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/Logging.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import org.slf4j.{ Logger, LoggerFactory } 4 | 5 | trait Logging { 6 | private lazy val _log = LoggerFactory.getLogger(getClass.getName) 7 | 8 | def log: Logger = _log 9 | } 10 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/SchemaReader.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.io.{ File, IOException, InputStream } 4 | 5 | import org.apache.avro.{ Schema, SchemaParseException } 6 | 7 | object SchemaReader { 8 | 9 | def readFromResources(p: String): Schema = { 10 | using(getClass.getClassLoader.getResourceAsStream(p)) { stream => 11 | read(stream) 12 | } 13 | } 14 | 15 | def read(f: File): Schema = { 16 | val parser = new Schema.Parser() 17 | parser.parse(f) 18 | } 19 | 20 | def read(s: String): Schema = { 21 | val parser = new Schema.Parser() 22 | parser.parse(s) 23 | } 24 | 25 | /** 26 | * Does not close the InputStream 27 | */ 28 | def read(is: InputStream): Schema = { 29 | val parser = new Schema.Parser() 30 | parser.parse(is) 31 | } 32 | 33 | def safeReadFromResources(p: String): Either[SchemaReaderError, Schema] = { 34 | Option(getClass.getClassLoader.getResourceAsStream(p)).fold[Either[SchemaReaderError, Schema]]( 35 | Left(ResourceNotFoundError(s"Cannot find resource: $p")) 36 | ) { stream => 37 | try { 38 | safeRead(stream) 39 | } catch { 40 | case e: SchemaParseException => Left(SchemaParserError(e)) 41 | case e: IOException => Left(IOError(e)) 42 | case e: Throwable => Left(UnknownError(e)) 43 | } finally { 44 | stream.close() 45 | } 46 | } 47 | } 48 | 49 | def safeRead(f: File): Either[SchemaReaderError, Schema] = { 50 | try { 51 | Right(new Schema.Parser().parse(f)) 52 | } catch { 53 | case e: SchemaParseException => Left(SchemaParserError(e)) 54 | case e: IOException => Left(IOError(e)) 55 | case e: Throwable => Left(UnknownError(e)) 56 | } 57 | } 58 | 59 | def safeRead(s: String): Either[SchemaReaderError, Schema] = { 60 | try { 61 | Right(new Schema.Parser().parse(s)) 62 | } catch { 63 | case e: SchemaParseException => Left(SchemaParserError(e)) 64 | case e: IOException => Left(IOError(e)) 65 | case e: Throwable => Left(UnknownError(e)) 66 | } 67 | } 68 | 69 | /** 70 | * Does not close the InputStream 71 | */ 72 | def safeRead(is: InputStream): Either[SchemaReaderError, Schema] = { 73 | try { 74 | Right(new Schema.Parser().parse(is)) 75 | } catch { 76 | case e: SchemaParseException => Left(SchemaParserError(e)) 77 | case e: IOException => Left(IOError(e)) 78 | case e: Throwable => Left(UnknownError(e)) 79 | } 80 | } 81 | 82 | sealed trait SchemaReaderError 83 | 84 | case class SchemaParserError(exception: SchemaParseException) extends SchemaReaderError 85 | 86 | case class IOError(exception: IOException) extends SchemaReaderError 87 | 88 | case class ResourceNotFoundError(msg: String) extends SchemaReaderError 89 | 90 | case class UnknownError(t: Throwable) extends SchemaReaderError 91 | 92 | } 93 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/compat/package.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util 4 | 5 | /** 6 | * Converters java <-> scala that works between 2.10, 2.11, 2.12, 2.13 7 | */ 8 | package object compat { 9 | def toScala[A](jIterable: java.lang.Iterable[A]): scala.collection.Iterable[A] = { 10 | new Iterable[A] { 11 | def iterator: scala.collection.Iterator[A] = toScala(jIterable.iterator()) 12 | } 13 | } 14 | 15 | def toScala[A](jIterator: java.util.Iterator[A]): scala.collection.Iterator[A] = { 16 | new scala.collection.Iterator[A] { 17 | def next() = jIterator.next() 18 | def hasNext = jIterator.hasNext() 19 | } 20 | } 21 | 22 | def toScala[A, B](jIterator: java.util.Map[A, B]): scala.collection.Map[A, B] = { 23 | toScala(jIterator.entrySet().iterator()).map(x => (x.getKey, x.getValue)).toMap 24 | } 25 | 26 | def toScala[A](jSet: java.util.Set[A]): scala.collection.Set[A] = { 27 | val iterator = jSet.iterator() 28 | val builder = Set.newBuilder[A] 29 | while (iterator.hasNext) { 30 | builder += iterator.next() 31 | } 32 | builder.result() 33 | } 34 | 35 | def toJava[A](iterable: scala.collection.Iterable[A]): java.lang.Iterable[A] = new java.lang.Iterable[A] { 36 | override def iterator(): util.Iterator[A] = new util.Iterator[A] { 37 | private val it = iterable.iterator 38 | override def hasNext: Boolean = it.hasNext 39 | override def next(): A = it.next() 40 | } 41 | } 42 | 43 | def toJava[A](list: List[A]): java.util.List[A] = { 44 | val arraylist = new util.ArrayList[A]() 45 | list.foreach(arraylist.add) 46 | arraylist 47 | } 48 | 49 | implicit class IterableConverter[A](jIterable: java.lang.Iterable[A]) { 50 | def toScala(): scala.collection.Iterable[A] = { 51 | compat.toScala(jIterable) 52 | } 53 | } 54 | 55 | implicit class SetConverter[A](jSet: java.util.Set[A]) { 56 | def toScala(): scala.collection.Set[A] = { 57 | compat.toScala(jSet) 58 | } 59 | } 60 | 61 | implicit class JIterableConverter[A](iterable: scala.collection.Iterable[A]) { 62 | def toJava(): java.lang.Iterable[A] = { 63 | compat.toJava(iterable) 64 | } 65 | 66 | def toJavaList(): java.util.List[A] = { 67 | compat.toJava(iterable.toList) 68 | } 69 | } 70 | 71 | implicit class JMapConverter[A, B](map: scala.collection.Map[A, B]) { 72 | def toJava(): java.util.Map[A, B] = { 73 | val hashmap: util.Map[A, B] = new util.HashMap[A, B]() 74 | map.foreach { case (k, v) => 75 | hashmap.put(k, v) 76 | } 77 | hashmap 78 | } 79 | 80 | } 81 | 82 | implicit class IteratorConverter[A](jIterator: java.util.Iterator[A]) { 83 | def toScala(): scala.collection.Iterator[A] = { 84 | compat.toScala(jIterator) 85 | } 86 | } 87 | 88 | implicit class MapConverter[A, B](jmap: java.util.Map[A, B]) { 89 | def toScala(): collection.Map[A, B] = { 90 | compat.toScala(jmap) 91 | } 92 | } 93 | 94 | implicit class RightBiasedEither[+L, +R](val self: Either[L, R]) extends AnyVal { 95 | def rightMap[R1](f: R => R1): Either[L, R1] = { 96 | self match { 97 | case Right(v) => Right(f(v)) 98 | case _ => self.asInstanceOf[Either[L, R1]] 99 | } 100 | } 101 | 102 | def rightFlatMap[L1 >: L, R1](f: R => Either[L1, R1]): Either[L1, R1] = { 103 | self match { 104 | case Right(v) => f(v) 105 | case _ => self.asInstanceOf[Either[L1, R1]] 106 | } 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/common/package.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin 2 | 3 | package object common { 4 | 5 | def using[A <: AutoCloseable, B](closeable: A)(f: A => B): B = { 6 | try { 7 | f(closeable) 8 | } finally { 9 | closeable.close() 10 | } 11 | } 12 | 13 | final val LONG_SIZE = 8 14 | final val INT_SIZE = 4 15 | 16 | } 17 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/SchemaPayloadPair.java: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager; 2 | 3 | import org.apache.avro.Schema; 4 | 5 | public class SchemaPayloadPair { 6 | private final Schema schema; 7 | private final byte[] payload; 8 | 9 | private SchemaPayloadPair(Schema schema, byte[] payload) { 10 | this.schema = schema; 11 | this.payload = payload; 12 | } 13 | 14 | public Schema getSchema() { 15 | return schema; 16 | } 17 | 18 | public byte[] getPayload() { 19 | return payload; 20 | } 21 | 22 | public static SchemaPayloadPair create(Schema schema, byte[] payload) { 23 | return new SchemaPayloadPair(schema, payload); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/exception/ConnectorNotFoundException.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.exception 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.manager.util.ConfigUtil 5 | 6 | class ConnectorNotFoundException(val config: Config) extends RuntimeException(s"Cannot find Darwin connector") { 7 | 8 | def confAsString(): String = ConfigUtil.printConfig(config) 9 | 10 | } 11 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/exception/DarwinException.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.exception 2 | 3 | class DarwinException(message: String) extends RuntimeException(message) 4 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/util/ByteArrayUtils.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.io.OutputStream 4 | import java.nio.{ ByteBuffer, ByteOrder } 5 | 6 | import it.agilelab.darwin.common.{ INT_SIZE, LONG_SIZE } 7 | 8 | private[darwin] object ByteArrayUtils { 9 | 10 | implicit class EnrichedLong(val l: Long) extends AnyVal { 11 | 12 | /** 13 | * Converts Long to Array[Byte] honoring the input endianness 14 | */ 15 | def longToByteArray(endianness: ByteOrder): Array[Byte] = { 16 | ByteBuffer 17 | .allocate(LONG_SIZE) 18 | .order(endianness) 19 | .putLong(l) 20 | .array() 21 | } 22 | 23 | def truncateIntToByteArray(endianess: ByteOrder): Array[Byte] = { 24 | ByteBuffer 25 | .allocate(INT_SIZE) 26 | .order(endianess) 27 | .putInt(l.toInt) 28 | .array() 29 | } 30 | 31 | /** 32 | * Writes to the stream the enriched long honoring the input endianness 33 | */ 34 | def writeToStream(os: OutputStream, endianness: ByteOrder): Unit = { 35 | endianness match { 36 | case ByteOrder.BIG_ENDIAN => 37 | os.write((l >>> 56).asInstanceOf[Int]) 38 | os.write((l >>> 48).asInstanceOf[Int]) 39 | os.write((l >>> 40).asInstanceOf[Int]) 40 | os.write((l >>> 32).asInstanceOf[Int]) 41 | os.write((l >>> 24).asInstanceOf[Int]) 42 | os.write((l >>> 16).asInstanceOf[Int]) 43 | os.write((l >>> 8).asInstanceOf[Int]) 44 | os.write((l >>> 0).asInstanceOf[Int]) 45 | case ByteOrder.LITTLE_ENDIAN => 46 | os.write((l >>> 0).asInstanceOf[Int]) 47 | os.write((l >>> 8).asInstanceOf[Int]) 48 | os.write((l >>> 16).asInstanceOf[Int]) 49 | os.write((l >>> 24).asInstanceOf[Int]) 50 | os.write((l >>> 32).asInstanceOf[Int]) 51 | os.write((l >>> 40).asInstanceOf[Int]) 52 | os.write((l >>> 48).asInstanceOf[Int]) 53 | os.write((l >>> 56).asInstanceOf[Int]) 54 | case other: Any => 55 | throw new IllegalArgumentException("Unknown ByteOrder: " + other) 56 | } 57 | } 58 | } 59 | 60 | implicit class EnrichedInt(val l: Int) extends AnyVal { 61 | 62 | def intToByteArray(endianess: ByteOrder): Array[Byte] = { 63 | ByteBuffer 64 | .allocate(INT_SIZE) 65 | .order(endianess) 66 | .putInt(l.toInt) 67 | .array() 68 | } 69 | 70 | /** 71 | * Writes to the stream the enriched long honoring the input endianness 72 | */ 73 | def writeIntToStream(os: OutputStream, endianness: ByteOrder): Unit = { 74 | endianness match { 75 | case ByteOrder.BIG_ENDIAN => 76 | os.write((l >>> 24)) 77 | os.write((l >>> 16)) 78 | os.write((l >>> 8)) 79 | os.write((l >>> 0)) 80 | case ByteOrder.LITTLE_ENDIAN => 81 | os.write((l >>> 0)) 82 | os.write((l >>> 8)) 83 | os.write((l >>> 16)) 84 | os.write((l >>> 24)) 85 | case other: Any => 86 | throw new IllegalArgumentException("Unknown ByteOrder: " + other) 87 | } 88 | } 89 | } 90 | 91 | def arrayEquals(b1: Array[Byte], b2: Array[Byte], start1: Int, start2: Int, length: Int): Boolean = { 92 | require(length > 0, "length must be positive") 93 | var i = start1 94 | var j = start2 95 | var areEqual = true 96 | while (areEqual && i < start1 + length) { 97 | if (b1(i) != b2(j)) { 98 | areEqual = false 99 | } 100 | i += 1 101 | j += 1 102 | } 103 | areEqual 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/util/ConfigUtil.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.nio.ByteOrder 4 | 5 | import com.typesafe.config.{ Config, ConfigRenderOptions } 6 | 7 | object ConfigUtil { 8 | def printConfig(conf: Config): String = { 9 | conf.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false)) 10 | } 11 | 12 | def printSmallConfig(conf: Config): String = { 13 | conf.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false)) 14 | } 15 | 16 | def stringToEndianness(string: String): ByteOrder = { 17 | string.toUpperCase match { 18 | case "BIG_ENDIAN" => ByteOrder.BIG_ENDIAN 19 | case "LITTLE_ENDIAN" => ByteOrder.LITTLE_ENDIAN 20 | case _ => throw new IllegalArgumentException(s"Unknown endianness: $string") 21 | } 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /common/src/main/scala/it/agilelab/darwin/manager/util/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | object ConfigurationKeys { 4 | 5 | val CREATE_TABLE = "createTable" 6 | 7 | val CONNECTOR = "connector" 8 | 9 | val MANAGER_TYPE: String = "type" 10 | val ENDIANNESS: String = "endianness" 11 | val CACHED_EAGER: String = "cached_eager" 12 | val CACHED_LAZY: String = "cached_lazy" 13 | val LAZY: String = "lazy" 14 | } 15 | -------------------------------------------------------------------------------- /common/src/test/resources/test/MockClassAlone.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]} 2 | -------------------------------------------------------------------------------- /common/src/test/resources/test/MockClassParent.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]} 2 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/CompatSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import org.scalatest.flatspec.AnyFlatSpec 4 | import org.scalatest.matchers.should.Matchers 5 | import compat._ 6 | 7 | class CompatSpec extends AnyFlatSpec with Matchers { 8 | 9 | "RightBiasedEither" should "map correctly on left side" in { 10 | Left[Int, String](3).rightMap { 11 | "Hello" + _ 12 | } shouldBe Left[Int, String](3) 13 | } 14 | 15 | it should "map correctly on right side" in { 16 | Right[Int, String]("Darwin").rightMap { 17 | "Hello " + _ 18 | } shouldBe Right[Int, String]("Hello Darwin") 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinConcurrentHashMapRunner.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.{ DarwinJava8ConcurrentHashMap, DarwinTrieConcurrentHashMap } 4 | import org.scalatest.flatspec.AnyFlatSpec 5 | import org.scalatest.matchers.should.Matchers 6 | import org.scalatest.{ BeforeAndAfter, BeforeAndAfterAll } 7 | 8 | sealed private[common] class DarwinConcurrentHashMapRunner[K, V](sut: () => DarwinConcurrentHashMap[K, V]) 9 | extends AnyFlatSpec 10 | with Matchers 11 | with BeforeAndAfterAll 12 | with BeforeAndAfter { 13 | 14 | protected class DefaultException extends Exception("Side effect evaluated!") 15 | 16 | protected def anEmptySut: DarwinConcurrentHashMap[K, V] = sut() 17 | 18 | } 19 | 20 | abstract class DarwinJava8ConcurrentHashMapRunner[K, V] 21 | extends DarwinConcurrentHashMapRunner[K, V](() => new DarwinJava8ConcurrentHashMap) 22 | abstract class DarwinJava7ConcurrentHashMapRunner[K, V] 23 | extends DarwinConcurrentHashMapRunner[K, V](() => new DarwinTrieConcurrentHashMap) 24 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinConcurrentHashMapSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import java.util.concurrent.atomic.AtomicInteger 4 | 5 | import org.scalatest.BeforeAndAfter 6 | import org.scalatest.flatspec.AnyFlatSpec 7 | import org.scalatest.matchers.should.Matchers 8 | 9 | class DarwinConcurrentHashMapSpec extends AnyFlatSpec with Matchers with BeforeAndAfter { 10 | private val realJavaVersion = System.getProperty("java.version") 11 | 12 | after { 13 | System.setProperty("java.version", realJavaVersion) 14 | } 15 | 16 | def test(): Unit = { 17 | val threadNumber = 1000 18 | val map = DarwinConcurrentHashMap.empty[String, Int] 19 | var counter = 0 20 | val threadCounter = new AtomicInteger(0) 21 | val runnables = for (_ <- 1 to threadNumber) yield { 22 | new Runnable { 23 | override def run(): Unit = { 24 | threadCounter.incrementAndGet() 25 | val res = map.getOrElseUpdate( 26 | "A", { 27 | counter += 1 28 | counter 29 | } 30 | ) 31 | res should be(1) 32 | } 33 | } 34 | } 35 | val threads = for (r <- runnables) yield { 36 | val t = new Thread(r) 37 | t 38 | } 39 | for (t <- threads) { 40 | t.start() 41 | } 42 | for (t <- threads) { 43 | t.join() 44 | } 45 | threadCounter.get() should be(threadNumber) 46 | } 47 | 48 | it should "not evaluate the value if the key is present JAVA 8" in { 49 | test() 50 | } 51 | 52 | it should "not evaluate the value if the key is present JAVA 7" in { 53 | if (JavaVersion.parseJavaVersion(realJavaVersion) >= 8) { 54 | System.setProperty("java.version", "1.7") 55 | test() 56 | } else { 57 | assert(true) 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinJava8ConcurrentHashMapSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.DarwinJava8ConcurrentHashMap 4 | 5 | class DarwinJava8ConcurrentHashMapSpec extends DarwinJava8ConcurrentHashMapRunner[String, Int] { 6 | 7 | private def defaultWithSideEffect: Int = throw new DefaultException 8 | private val aKey = "aKey" 9 | private val aValue = 1 10 | 11 | it should "not evaluate the default param when key found - getOrElse" in { 12 | val sut = anEmptySut 13 | sut.getOrElseUpdate(aKey, aValue) 14 | 15 | lazy val res = sut.getOrElse(aKey, defaultWithSideEffect) 16 | 17 | sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]] 18 | noException should be thrownBy res 19 | res shouldBe aValue 20 | } 21 | 22 | it should "evaluate the default param when key NOT found - getOrElse" in { 23 | val sut = anEmptySut 24 | 25 | sut.getOrElseUpdate(aKey, aValue) 26 | 27 | lazy val res = sut.getOrElse("anotherKey", defaultWithSideEffect) 28 | 29 | sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]] 30 | an[DefaultException] should be thrownBy res 31 | } 32 | 33 | it should "not evaluate the default param when key is null - getOrElse" in { 34 | val sut = anEmptySut 35 | 36 | lazy val res = sut.getOrElse(null, defaultWithSideEffect) 37 | 38 | sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]] 39 | an[NullPointerException] should be thrownBy res 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/common/DarwinTrieConcurrentHashMapSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.common 2 | 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.DarwinTrieConcurrentHashMap 4 | 5 | class DarwinTrieConcurrentHashMapSpec extends DarwinJava7ConcurrentHashMapRunner[String, Int] { 6 | 7 | private def defaultWithSideEffect: Int = throw new DefaultException 8 | 9 | private val aKey = "aKey" 10 | private val aValue = 1 11 | 12 | it should "not evaluate the default param when key found - getOrElse" in { 13 | val sut = anEmptySut 14 | 15 | sut.getOrElseUpdate(aKey, aValue) 16 | 17 | lazy val res = sut.getOrElse(aKey, defaultWithSideEffect) 18 | 19 | sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]] 20 | noException should be thrownBy res 21 | res shouldBe aValue 22 | } 23 | 24 | it should "evaluate the default param when key NOT found - getOrElse" in { 25 | val sut = anEmptySut 26 | sut.getOrElseUpdate(aKey, aValue) 27 | 28 | lazy val res = sut.getOrElse("anotherKey", defaultWithSideEffect) 29 | 30 | sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]] 31 | an[DefaultException] should be thrownBy res 32 | } 33 | 34 | it should "evaluate the default param when key is null - getOrElse" in { 35 | val sut = anEmptySut 36 | 37 | lazy val res = sut.getOrElse(null, defaultWithSideEffect) 38 | 39 | sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]] 40 | an[DefaultException] should be thrownBy res 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/manager/util/BigEndianAvroSingleObjectEncodingUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.nio.ByteOrder 4 | 5 | class BigEndianAvroSingleObjectEncodingUtilsSpec extends AvroSingleObjectEncodingUtilsSpec(ByteOrder.BIG_ENDIAN) 6 | -------------------------------------------------------------------------------- /common/src/test/scala/it/agilelab/darwin/manager/util/LittleEndianAvroSingleObjectEncodingUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager.util 2 | 3 | import java.nio.ByteOrder 4 | 5 | class LittleEndianAvroSingleObjectEncodingUtilsSpec extends AvroSingleObjectEncodingUtilsSpec(ByteOrder.LITTLE_ENDIAN) 6 | -------------------------------------------------------------------------------- /confluent/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.confluent.ConfluentConnectorCreator 2 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.Config 4 | import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient 5 | import it.agilelab.darwin.common.compat._ 6 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging } 7 | 8 | class ConfluentConnectorCreator extends ConnectorCreator with Logging { 9 | 10 | override def create(config: Config): Connector = { 11 | log.debug("creating confluent connector") 12 | 13 | val confluentOptions = ConfluentConnectorOptions.fromConfig(config) 14 | log.info("confluent options are {}", confluentOptions) 15 | 16 | val client = new CachedSchemaRegistryClient( 17 | confluentOptions.endpoints.toJavaList(), 18 | confluentOptions.maxCachedSchemas, 19 | confluentOptions.config 20 | ) 21 | 22 | val rest = new ConfluentConnector(confluentOptions, client) 23 | log.debug("created confluent connector") 24 | rest 25 | } 26 | 27 | /** 28 | * @return the name of the Connector 29 | */ 30 | override def name(): String = "confluent" 31 | } 32 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorOptions.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | 6 | case class ConfluentConnectorOptions( 7 | endpoints: List[String], 8 | config: java.util.Map[String, AnyRef], 9 | maxCachedSchemas: Int 10 | ) 11 | 12 | object ConfluentConnectorOptions { 13 | 14 | val ENDPOINTS_CONFIG_KEY = "endpoints" 15 | val MAX_CACHED_SCHEMA_KEYS = "max-cached-schemas" 16 | 17 | def fromConfig(config: Config): ConfluentConnectorOptions = { 18 | 19 | if (!config.hasPath(ENDPOINTS_CONFIG_KEY)) { 20 | throw new IllegalArgumentException( 21 | s"Missing [${ENDPOINTS_CONFIG_KEY}] configuration key for ${classOf[ConfluentConnector].getName}" 22 | ) 23 | } 24 | 25 | if (!config.hasPath(MAX_CACHED_SCHEMA_KEYS)) { 26 | throw new IllegalArgumentException( 27 | s"Missing [${MAX_CACHED_SCHEMA_KEYS}] configuration key for ${classOf[ConfluentConnector].getName}" 28 | ) 29 | } 30 | 31 | val endpoints = config.getStringList(ENDPOINTS_CONFIG_KEY).toScala().toList 32 | val maxCachedSchemas = config.getInt(MAX_CACHED_SCHEMA_KEYS) 33 | val other = config.root() 34 | 35 | ConfluentConnectorOptions(endpoints, HoconToMap.convert(other), maxCachedSchemas) 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/HoconToMap.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.{ ConfigObject, ConfigValue } 4 | import it.agilelab.darwin.common.compat.{ JMapConverter, SetConverter } 5 | 6 | import scala.collection.mutable 7 | 8 | private[confluent] object HoconToMap { 9 | 10 | private def walk(root: ConfigValue): Map[String, AnyRef] = { 11 | val result = mutable.HashMap.empty[String, AnyRef] 12 | 13 | def doWalk(path: String, r: ConfigValue): Unit = { 14 | 15 | r match { 16 | case o: ConfigObject => 17 | o.keySet().toScala().foreach { key => 18 | val nextPath = if (path.isEmpty) key else path + "." + key 19 | doWalk(nextPath, o.get(key)) 20 | } 21 | case _ => 22 | result += path -> r.unwrapped() 23 | } 24 | } 25 | 26 | doWalk("", root) 27 | 28 | result.toMap 29 | } 30 | 31 | def convert(configValue: ConfigValue): java.util.Map[String, AnyRef] = { 32 | walk(configValue).toJava() 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /confluent/src/main/scala/it/agilelab/darwin/connector/confluent/Main.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import java.util.Collections 4 | 5 | import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient 6 | import it.agilelab.darwin.common.compat._ 7 | import org.apache.avro.{ Schema, SchemaBuilder } 8 | 9 | object Main { 10 | def main(args: Array[String]): Unit = { 11 | // to run this main https://github.com/confluentinc/cp-all-in-one/blob/6.0.0-post/cp-all-in-one/docker-compose.yml 12 | // TODO make this main meaningful 13 | val maxSchemas = 1000 14 | val options = ConfluentConnectorOptions(List("http://localhost:8081"), Collections.emptyMap(), maxSchemas) 15 | 16 | val client = new CachedSchemaRegistryClient( 17 | options.endpoints.toJavaList(), 18 | options.maxCachedSchemas, 19 | options.config 20 | ) 21 | val connector = new ConfluentConnector(options, client) 22 | 23 | connector.fullLoad().foreach(println) 24 | 25 | val expected: Schema = SchemaBuilder 26 | .record("myrecord") 27 | .namespace("it.agilelab.record") 28 | .fields() 29 | .requiredString("myfield") 30 | .optionalString("ciccio") 31 | .endRecord() 32 | 33 | expected.addProp("x-darwin-subject", "prova2-value": AnyRef) 34 | 35 | val id = connector.fingerprint(expected) 36 | 37 | connector.insert(Seq((id, expected))) 38 | 39 | connector.fullLoad().foreach(println) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /confluent/src/test/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorCreatorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory } 4 | import org.scalatest.flatspec.AnyFlatSpec 5 | import org.scalatest.matchers.should.Matchers 6 | 7 | class ConfluentConnectorCreatorSuite extends AnyFlatSpec with Matchers { 8 | 9 | "connector" should "create an instance" in { 10 | 11 | val conf: Config = ConfigFactory.parseString(""" 12 | | endpoints: ["endpoint-one", "endpoint-two"] 13 | | max-cached-schemas: 1000 14 | | 15 | | kafka.schemaregistry.other: 1 16 | | kafka.schemaregistry: { 17 | | other2: "stringa" 18 | | } 19 | |""".stripMargin) 20 | 21 | val connector = new ConfluentConnectorCreator() 22 | 23 | val options = ConfluentConnectorOptions.fromConfig(conf) 24 | 25 | val result = connector.create(conf) 26 | 27 | assert(result != null) 28 | 29 | val endpoints = options.config.get("endpoints").asInstanceOf[java.util.List[String]] 30 | 31 | endpoints.get(0) should be("endpoint-one") 32 | endpoints.get(1) should be("endpoint-two") 33 | 34 | options.config.get("kafka.schemaregistry.other").asInstanceOf[Int] should be(1) 35 | options.config.get("kafka.schemaregistry.other2").asInstanceOf[String] should be("stringa") 36 | 37 | val maxCached = 1000 38 | options.config.get("max-cached-schemas").asInstanceOf[Int] should be(maxCached) 39 | 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /confluent/src/test/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.confluent 2 | 3 | import java.util.Collections 4 | 5 | import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient 6 | import org.apache.avro.{ Schema, SchemaBuilder } 7 | import org.scalatest.flatspec.AnyFlatSpec 8 | import org.scalatest.matchers.should.Matchers 9 | import org.scalatest.{ BeforeAndAfterEach, OptionValues } 10 | 11 | class ConfluentConnectorSuite extends AnyFlatSpec with BeforeAndAfterEach with OptionValues with Matchers { 12 | 13 | "confluent connector" should "insert schemas and generate ids" in { 14 | 15 | val mockRegistryClient = new MockSchemaRegistryClient() 16 | val maxCachedSchemas = 1000 17 | val connector = new ConfluentConnector( 18 | options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas), 19 | client = mockRegistryClient 20 | ) 21 | 22 | val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING)) 23 | expected.addProp("x-darwin-subject", "prova": AnyRef) 24 | 25 | val id = connector.fingerprint(expected) 26 | 27 | connector.insert(Seq((id, expected))) 28 | 29 | connector.findSchema(id).value shouldBe expected 30 | 31 | } 32 | 33 | "confluent connector" should "be able to preload schemas" in { 34 | 35 | val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING)) 36 | expected.addProp("x-darwin-subject", "prova": AnyRef) 37 | val expected2 = SchemaBuilder.array().items(Schema.create(Schema.Type.INT)) 38 | expected2.addProp("x-darwin-subject", "prova2": AnyRef) 39 | 40 | val mockRegistryClient = new MockSchemaRegistryClient() 41 | 42 | mockRegistryClient.register("prova", expected) 43 | mockRegistryClient.register("prova2", expected2) 44 | 45 | val maxCachedSchemas = 1000 46 | 47 | val connector = new ConfluentConnector( 48 | options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas), 49 | client = mockRegistryClient 50 | ) 51 | 52 | val fullLoaded = connector.fullLoad() 53 | 54 | fullLoaded should contain theSameElementsAs Seq((1, expected), (2, expected2)) 55 | 56 | } 57 | 58 | "confluent connector" should "be able to fetch latest schema for subject" in { 59 | 60 | val expected = SchemaBuilder 61 | .record("record") 62 | .fields() 63 | .requiredString("stringField") 64 | .endRecord() 65 | 66 | val expected2 = SchemaBuilder 67 | .record("record") 68 | .fields() 69 | .requiredString("stringField") 70 | .nullableString("stringField2", "default-for-nullable") 71 | .endRecord() 72 | 73 | expected.addProp("x-darwin-subject", "prova": AnyRef) 74 | expected2.addProp("x-darwin-subject", "prova": AnyRef) 75 | 76 | val mockRegistryClient = new MockSchemaRegistryClient() 77 | 78 | mockRegistryClient.register("prova", expected) 79 | mockRegistryClient.register("prova", expected2) 80 | 81 | val maxCachedSchemas = 1000 82 | 83 | val connector = new ConfluentConnector( 84 | options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas), 85 | client = mockRegistryClient 86 | ) 87 | 88 | val fullLoaded = connector.fullLoad() 89 | 90 | fullLoaded should contain theSameElementsAs Seq((1, expected), (2, expected2)) 91 | 92 | val latestResult = connector.findIdForSubjectLatestVersion("prova") 93 | 94 | val allVersions = connector.findVersionsForSubject("prova") 95 | 96 | val parser = (schema: String) => new Schema.Parser().parse(schema) 97 | 98 | val versionsByVersionId = allVersions 99 | .map(x => connector.findIdForSubjectVersion("prova", x)) 100 | .map(x => x.getId -> parser(x.getSchema)) 101 | 102 | latestResult.getId should be(2) 103 | 104 | versionsByVersionId should contain theSameElementsAs Seq((1, expected), (2, expected2)) 105 | 106 | } 107 | 108 | "confluent connector" should "detect a missing x-darwin-subject" in { 109 | val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING)) 110 | 111 | val mockRegistryClient = new MockSchemaRegistryClient() 112 | 113 | mockRegistryClient.register("prova", expected) 114 | 115 | val maxCachedSchemas = 1000 116 | 117 | val connector = new ConfluentConnector( 118 | options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas), 119 | client = mockRegistryClient 120 | ) 121 | 122 | val exception = intercept[IllegalArgumentException] { 123 | connector.insert(Seq(expected).map(schema => connector.fingerprint(schema) -> schema)) 124 | } 125 | 126 | exception.getMessage should be("Schema does not contain the [x-darwin-subject] extension") 127 | 128 | } 129 | 130 | it should "return None if fetching latest schema of non-existing subject" in { 131 | 132 | val mockRegistryClient = new MockSchemaRegistryClient() 133 | 134 | val maxCachedSchemas = 1000 135 | 136 | val connector = new ConfluentConnector( 137 | options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas), 138 | client = mockRegistryClient 139 | ) 140 | 141 | connector.retrieveLatestSchema("pippo") shouldBe None 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /core/src/main/java/it/agilelab/darwin/annotations/AvroSerde.java: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.annotations; 2 | 3 | public @interface AvroSerde { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /core/src/main/java/it/agilelab/darwin/manager/IdSchemaPair.java: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager; 2 | 3 | import org.apache.avro.Schema; 4 | 5 | public class IdSchemaPair { 6 | private final long id; 7 | private final Schema schema; 8 | 9 | private IdSchemaPair(long id, Schema schema) { 10 | this.id = id; 11 | this.schema = schema; 12 | } 13 | 14 | public long getId() { 15 | return id; 16 | } 17 | 18 | public Schema getSchema() { 19 | return schema; 20 | } 21 | 22 | public static IdSchemaPair create(long id, Schema schema) { 23 | return new IdSchemaPair(id, schema); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | createTable: false 2 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaCache.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import org.apache.avro.Schema 4 | 5 | /** 6 | * Generic definition of the cache used by the manager to store the data loaded from the external storage. 7 | * @param schemas a sequence of (ID, schema) used to initialize the cache values 8 | */ 9 | abstract class AvroSchemaCache(schemas: Seq[(Long, Schema)]) { 10 | 11 | /** 12 | * Retrieves a registered schema for the input ID. 13 | * 14 | * @param id the Long ID of the schema 15 | * @return the Schema associated to the input ID 16 | */ 17 | def getSchema(id: Long): Option[Schema] 18 | 19 | /** 20 | * Tests if the input schema is contained inside the cache. 21 | * 22 | * @param schema a Schema that the cache could contain 23 | * @return a pair containing: a boolean that is true if the schema is contained in the cache and the ID of the 24 | * schema in any case 25 | */ 26 | def contains(schema: Schema): (Boolean, Long) 27 | 28 | /** 29 | * Creates a new instance of [[AvroSchemaCache]] with the original values plus the input ones. 30 | * 31 | * @param values new pair (ID, schema) to insert inside the cache 32 | * @return a new instance of [[AvroSchemaCache]] containing the new values in addition to the original ones. 33 | */ 34 | def insert(values: Seq[(Long, Schema)]): AvroSchemaCache 35 | 36 | /** 37 | * Retrieves all registered schemas 38 | * 39 | * @return A Sequence of (ID, Schema) 40 | */ 41 | def getAll: Seq[(Long, Schema)] 42 | } 43 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaCacheFingerprint.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import it.agilelab.darwin.common.Logging 4 | import org.apache.avro.Schema 5 | 6 | /** 7 | * Implementation of [[AvroSchemaCache]] that uses Fingerprint64 as IDs. 8 | * 9 | * @param schemas a sequence of (ID, schema) used to initialize the cache values 10 | */ 11 | case class AvroSchemaCacheFingerprint(schemas: Seq[(Long, Schema)], fingerPrinter: Schema => Long) 12 | extends AvroSchemaCache(schemas) 13 | with Logging { 14 | log.debug(s"initialization of the cache with ${schemas.size} schemas") 15 | private val _table: Map[Long, Schema] = schemas.toMap 16 | log.debug("cache initialized") 17 | 18 | override def getSchema(id: Long): Option[Schema] = _table.get(id) 19 | 20 | override def contains(schema: Schema): (Boolean, Long) = { 21 | val id = fingerPrinter(schema) 22 | _table.contains(id) -> id 23 | } 24 | 25 | override def insert(values: Seq[(Long, Schema)]): AvroSchemaCache = 26 | AvroSchemaCacheFingerprint(_table.toSeq ++ values, fingerPrinter) 27 | 28 | /** 29 | * Retrieves all registered schemas 30 | * 31 | * @return A Sequence of (ID, Schema) 32 | */ 33 | override def getAll: Seq[(Long, Schema)] = _table.toSeq 34 | } 35 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaManagerFactory.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ ConnectorFactory, DarwinConcurrentHashMap, Logging } 5 | import it.agilelab.darwin.manager.exception.ConnectorNotFoundException 6 | import it.agilelab.darwin.manager.util.{ ConfigUtil, ConfigurationKeys } 7 | 8 | /** 9 | * Factory used to obtain the desired implementation of AvroSchemaManager. 10 | * First of all the initialize method should be called passing the configuration (it will return an instance of 11 | * AvroSchemaManager. Then, the same instance can be retrieved using the getInstance method without passing the 12 | * configuration anymore. 13 | */ 14 | object AvroSchemaManagerFactory extends Logging { 15 | 16 | private val _instancePool: DarwinConcurrentHashMap[String, AvroSchemaManager] = 17 | DarwinConcurrentHashMap.empty[String, AvroSchemaManager] 18 | 19 | private def configKey(c: Config): String = { 20 | ConfigUtil.printConfig(c) 21 | } 22 | 23 | /** 24 | * Returns an instance of AvroSchemaManager that can be used to register and retrieve schemas. 25 | * 26 | * @param config the Config that is passed to the connector 27 | * @return an instance of AvroSchemaManager 28 | */ 29 | @throws[ConnectorNotFoundException] 30 | def initialize(config: Config): AvroSchemaManager = { 31 | val key = configKey(config) 32 | lazy val mappingFunc = { 33 | log.debug("creating instance of AvroSchemaManager") 34 | val endianness = ConfigUtil.stringToEndianness(config.getString(ConfigurationKeys.ENDIANNESS)) 35 | val result = config.getString(ConfigurationKeys.MANAGER_TYPE) match { 36 | case ConfigurationKeys.CACHED_EAGER => 37 | new CachedEagerAvroSchemaManager(ConnectorFactory.connector(config), endianness) 38 | case ConfigurationKeys.CACHED_LAZY => 39 | new CachedLazyAvroSchemaManager(ConnectorFactory.connector(config), endianness) 40 | case ConfigurationKeys.LAZY => 41 | new LazyAvroSchemaManager(ConnectorFactory.connector(config), endianness) 42 | case _ => 43 | throw new IllegalArgumentException( 44 | s"No valid manager can be created for" + 45 | s" ${ConfigurationKeys.MANAGER_TYPE} key ${config.getString(ConfigurationKeys.MANAGER_TYPE)}" 46 | ) 47 | } 48 | log.debug("AvroSchemaManager instance created") 49 | result 50 | } 51 | _instancePool.getOrElseUpdate(key, mappingFunc) 52 | } 53 | 54 | /** 55 | * Returns the initialized instance of AvroSchemaManager that can be used to register and retrieve schemas. 56 | * The instance must be created once using the initialize method passing a configuration before calling this method. 57 | * 58 | * @return the initialized instance of AvroSchemaManager 59 | */ 60 | def getInstance(config: Config): AvroSchemaManager = { 61 | _instancePool.getOrElse( 62 | configKey(config), 63 | throw new IllegalArgumentException( 64 | s"No valid manager can be found for" + 65 | s" ${ConfigurationKeys.MANAGER_TYPE} key ${config.getString(ConfigurationKeys.MANAGER_TYPE)}" 66 | ) 67 | ) 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/CachedAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | import java.util.concurrent.atomic.AtomicReference 5 | 6 | import it.agilelab.darwin.common.Connector 7 | import org.apache.avro.Schema 8 | 9 | /** 10 | * Implementation of AvroSchemaManager that defines a cache where the storage data is loaded, in order to reduce the 11 | * number of accesses to the storage. 12 | */ 13 | abstract class CachedAvroSchemaManager(connector: Connector, endianness: ByteOrder) 14 | extends AvroSchemaManager(connector, endianness) { 15 | protected val _cache: AtomicReference[Option[AvroSchemaCache]] = new AtomicReference[Option[AvroSchemaCache]](None) 16 | 17 | def cache: AvroSchemaCache = _cache.get 18 | .getOrElse( 19 | throw new IllegalAccessException( 20 | "Cache not loaded: accesses are allowed only if the cache has been " + 21 | "loaded" 22 | ) 23 | ) 24 | 25 | initialize() 26 | 27 | private def initialize(): Unit = { 28 | log.debug("cache initialization...") 29 | _cache.compareAndSet(None, Some(AvroSchemaCacheFingerprint(connector.fullLoad(), connector.fingerprint))) 30 | log.debug("cache initialized") 31 | } 32 | 33 | /** 34 | * Reloads all the schemas from the previously configured storage. 35 | * Throws an exception if the cache wasn't already loaded (the getInstance method must always be used to 36 | * initialize the cache using the required configuration). 37 | */ 38 | override def reload(): AvroSchemaManager = { 39 | log.debug("reloading cache...") 40 | _cache.set(Some(AvroSchemaCacheFingerprint(connector.fullLoad(), connector.fingerprint))) 41 | log.debug("cache reloaded") 42 | this 43 | } 44 | 45 | override def registerAll(schemas: Seq[Schema]): Seq[(Long, Schema)] = { 46 | log.debug(s"registering ${schemas.size} schemas...") 47 | val (alreadyInCache, notInCache) = schemas.map(s => (cache.contains(s), s)).partition(_._1._1) 48 | val inserted = notInCache.map(e => e._1._2 -> e._2) 49 | connector.insert(inserted) 50 | val allSchemas = alreadyInCache.map(e => e._1._2 -> e._2) ++ inserted 51 | _cache.set(Some(cache.insert(inserted))) //TODO review 52 | log.debug(s"${allSchemas.size} schemas registered") 53 | allSchemas 54 | } 55 | 56 | /** 57 | * Retrieves all registered schemas 58 | * 59 | * @return A Sequence of (ID, Schema) 60 | */ 61 | override def getAll: Seq[(Long, Schema)] = cache.getAll 62 | } 63 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/CachedEagerAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | 5 | import it.agilelab.darwin.common.Connector 6 | import org.apache.avro.Schema 7 | 8 | /** 9 | * Implementation of CachedAvroSchemaManager that loads all the schemas into the cache at startup and doesn't 10 | * perform any other accesses to the storage: each retrieve is performed onto the cache. 11 | */ 12 | class CachedEagerAvroSchemaManager(connector: Connector, endianness: ByteOrder) 13 | extends CachedAvroSchemaManager(connector, endianness) { 14 | override def getSchema(id: Long): Option[Schema] = cache.getSchema(id) 15 | } 16 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/CachedLazyAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | 5 | import it.agilelab.darwin.common.Connector 6 | import org.apache.avro.Schema 7 | 8 | /** 9 | * Implementation of CachedAvroSchemaManager that loads all the schemas into the cache at startup and perform 10 | * all the retrieves onto the cache; an access to the storage is performed only if there is a cache miss. 11 | */ 12 | class CachedLazyAvroSchemaManager(connector: Connector, endianness: ByteOrder) 13 | extends CachedAvroSchemaManager(connector, endianness) { 14 | 15 | override def getSchema(id: Long): Option[Schema] = { 16 | cache.getSchema(id).orElse { 17 | val schema: Option[Schema] = connector.findSchema(id) 18 | schema.foreach(s => _cache.set(Some(cache.insert(Seq(id -> s))))) 19 | schema 20 | } 21 | } 22 | 23 | override def getAll: Seq[(Long, Schema)] = { 24 | _cache.set(Some(cache.insert(connector.fullLoad()))) 25 | cache.getAll 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /core/src/main/scala/it/agilelab/darwin/manager/LazyAvroSchemaManager.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.manager 2 | 3 | import java.nio.ByteOrder 4 | 5 | import it.agilelab.darwin.common.Connector 6 | import org.apache.avro.Schema 7 | 8 | /** 9 | * Implementation of AvroSchemaManager that performs all the operations directly on the storage (retrievals and 10 | * insertions). 11 | */ 12 | class LazyAvroSchemaManager(connector: Connector, endianness: ByteOrder) 13 | extends AvroSchemaManager(connector, endianness) { 14 | 15 | override def getSchema(id: Long): Option[Schema] = connector.findSchema(id) 16 | 17 | override def registerAll(schemas: Seq[Schema]): Seq[(Long, Schema)] = { 18 | val schemasWithIds = schemas.map(s => getId(s) -> s) 19 | connector.insert(schemasWithIds) 20 | schemasWithIds 21 | } 22 | 23 | override def reload(): AvroSchemaManager = this 24 | 25 | override def getAll: Seq[(Long, Schema)] = connector.fullLoad() 26 | } 27 | -------------------------------------------------------------------------------- /docs/img/darwin_eager_cached_schema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_eager_cached_schema.jpg -------------------------------------------------------------------------------- /docs/img/darwin_interaction.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_interaction.jpg -------------------------------------------------------------------------------- /docs/img/darwin_lazy_cached_schema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_lazy_cached_schema.jpg -------------------------------------------------------------------------------- /docs/img/darwin_lazy_schema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_lazy_schema.jpg -------------------------------------------------------------------------------- /docs/img/logo/darwin-icon.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/logo/darwin-icon.ai -------------------------------------------------------------------------------- /docs/img/logo/darwin-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/logo/darwin-icon.png -------------------------------------------------------------------------------- /hbase/src/main/scala/it/agilelab/darwin/connector/hbase/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | object ConfigurationKeys { 4 | val TABLE: String = "table" 5 | val NAMESPACE: String = "namespace" 6 | val HBASE_SITE: String = "hbaseSite" 7 | val CORE_SITE: String = "coreSite" 8 | val IS_SECURE: String = "isSecure" 9 | val PRINCIPAL: String = "principal" 10 | val KEYTAB_PATH: String = "keytabPath" 11 | } 12 | -------------------------------------------------------------------------------- /hbase/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseConnector.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | import it.agilelab.darwin.common.{ using, Connector, Logging } 6 | import org.apache.avro.Schema 7 | import org.apache.avro.Schema.Parser 8 | import org.apache.commons.io.IOUtils 9 | import org.apache.hadoop.conf.Configuration 10 | import org.apache.hadoop.fs.Path 11 | import org.apache.hadoop.hbase._ 12 | import org.apache.hadoop.hbase.client._ 13 | import org.apache.hadoop.hbase.security.User 14 | import org.apache.hadoop.hbase.util.Bytes 15 | import org.apache.hadoop.security.UserGroupInformation 16 | 17 | object HBaseConnector extends Logging { 18 | 19 | private var _instance: HBaseConnector = _ 20 | 21 | def instance(hbaseConfig: Config): HBaseConnector = { 22 | synchronized { 23 | if (_instance == null) { 24 | log.debug("Initialization of HBase connector") 25 | _instance = HBaseConnector(hbaseConfig) 26 | log.debug("HBase connector initialized") 27 | } 28 | } 29 | _instance 30 | } 31 | } 32 | 33 | case class HBaseConnector(config: Config) extends Connector with Logging { 34 | 35 | val DEFAULT_NAMESPACE: String = "AVRO" 36 | val DEFAULT_TABLENAME: String = "SCHEMA_REPOSITORY" 37 | 38 | val TABLE_NAME_STRING: String = if (config.hasPath(ConfigurationKeys.TABLE)) { 39 | config.getString(ConfigurationKeys.TABLE) 40 | } else { 41 | DEFAULT_TABLENAME 42 | } 43 | 44 | val NAMESPACE_STRING: String = if (config.hasPath(ConfigurationKeys.NAMESPACE)) { 45 | config.getString(ConfigurationKeys.NAMESPACE) 46 | } else { 47 | DEFAULT_NAMESPACE 48 | } 49 | 50 | lazy val TABLE_NAME: TableName = TableName.valueOf(Bytes.toBytes(NAMESPACE_STRING), Bytes.toBytes(TABLE_NAME_STRING)) 51 | 52 | val CF_STRING = "0" 53 | val CF: Array[Byte] = Bytes.toBytes(CF_STRING) 54 | val QUALIFIER_SCHEMA: Array[Byte] = Bytes.toBytes("schema") 55 | val QUALIFIER_NAME: Array[Byte] = Bytes.toBytes("name") 56 | val QUALIFIER_NAMESPACE: Array[Byte] = Bytes.toBytes("namespace") 57 | 58 | log.debug("Creating default HBaseConfiguration") 59 | val configuration: Configuration = HBaseConfiguration.create() 60 | log.debug("Created default HBaseConfiguration") 61 | 62 | if (config.hasPath(ConfigurationKeys.CORE_SITE) && config.hasPath(ConfigurationKeys.HBASE_SITE)) { 63 | log.debug(addResourceMessage(config.getString(ConfigurationKeys.CORE_SITE))) 64 | configuration.addResource(new Path(config.getString(ConfigurationKeys.CORE_SITE))) 65 | log.debug(addResourceMessage(config.getString(ConfigurationKeys.HBASE_SITE))) 66 | configuration.addResource(new Path(config.getString(ConfigurationKeys.HBASE_SITE))) 67 | } 68 | 69 | private def addResourceMessage(s: String) = { 70 | val ADDING_RESOURCE = "Adding resource: " 71 | ADDING_RESOURCE + s 72 | } 73 | 74 | val connection: Connection = if (config.getBoolean(ConfigurationKeys.IS_SECURE)) { 75 | log.debug(s"Calling UserGroupInformation.setConfiguration()") 76 | UserGroupInformation.setConfiguration(configuration) 77 | 78 | log.debug( 79 | s"Calling UserGroupInformation.loginUserFromKeytab(${config.getString(ConfigurationKeys.PRINCIPAL)}, " + 80 | s"${config.getString(ConfigurationKeys.KEYTAB_PATH)})" 81 | ) 82 | val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI( 83 | config.getString(ConfigurationKeys.PRINCIPAL), 84 | config.getString(ConfigurationKeys.KEYTAB_PATH) 85 | ) 86 | UserGroupInformation.setLoginUser(ugi) 87 | val user = User.create(ugi) 88 | log.trace( 89 | s"initialization of HBase connection with configuration:\n " + 90 | s"${configuration.iterator().toScala().map { entry => entry.getKey -> entry.getValue }.mkString("\n")}" 91 | ) 92 | ConnectionFactory.createConnection(configuration, user) 93 | } else { 94 | log.trace( 95 | s"initialization of HBase connection with configuration:\n " + 96 | s"${configuration.iterator().toScala().map { entry => entry.getKey -> entry.getValue }.mkString("\n")}" 97 | ) 98 | ConnectionFactory.createConnection(configuration) 99 | } 100 | 101 | log.debug("HBase connection initialized") 102 | sys.addShutdownHook { 103 | // log.info(s"closing HBase connection pool") 104 | IOUtils.closeQuietly(connection) 105 | } 106 | 107 | //TODO this must be a def (a new Parser is created each time) because if the same Parser is used, it fails if you 108 | //TODO parse a class A and after it a class B that has a field of type A => ERROR: Can't redefine type A. 109 | //TODO Sadly the Schema.parse() method that would solve this problem is now deprecated 110 | private def parser: Parser = new Parser() 111 | 112 | override def fullLoad(): Seq[(Long, Schema)] = { 113 | log.debug(s"loading all schemas from table $NAMESPACE_STRING:$TABLE_NAME_STRING") 114 | val scanner: Iterable[Result] = connection.getTable(TABLE_NAME).getScanner(CF, QUALIFIER_SCHEMA).toScala() 115 | val schemas = scanner.map { result => 116 | val key = Bytes.toLong(result.getRow) 117 | val value = Bytes.toString(result.getValue(CF, QUALIFIER_SCHEMA)) 118 | key -> parser.parse(value) 119 | }.toSeq 120 | log.debug(s"${schemas.size} loaded from HBase") 121 | schemas 122 | } 123 | 124 | override def insert(schemas: Seq[(Long, Schema)]): Unit = { 125 | if (schemas.nonEmpty) { 126 | 127 | log.debug(s"inserting ${schemas.size} schemas in HBase table $NAMESPACE_STRING:$TABLE_NAME_STRING") 128 | using(connection.getBufferedMutator(TABLE_NAME)) { mutator => 129 | schemas.map { case (id, schema) => 130 | val put = new Put(Bytes.toBytes(id)) 131 | put.addColumn(CF, QUALIFIER_SCHEMA, Bytes.toBytes(schema.toString)) 132 | put.addColumn(CF, QUALIFIER_NAME, Bytes.toBytes(schema.getName)) 133 | put.addColumn(CF, QUALIFIER_NAMESPACE, Bytes.toBytes(schema.getNamespace)) 134 | put 135 | }.foreach(mutator.mutate) 136 | mutator.flush() 137 | log.debug(s"insertion of schemas into $NAMESPACE_STRING:$TABLE_NAME_STRING successful") 138 | } 139 | } 140 | 141 | } 142 | 143 | override def createTable(): Unit = { 144 | using(connection.getAdmin) { admin => 145 | if (!admin.listNamespaceDescriptors().exists(_.getName == NAMESPACE_STRING)) { 146 | log.info(s"Namespace $NAMESPACE_STRING does not exists, creating it") 147 | admin.createNamespace(NamespaceDescriptor.create(NAMESPACE_STRING).build()) 148 | } 149 | if (!tableExists()) { 150 | log.info(s"Table $TABLE_NAME does not exists, creating it") 151 | HBaseUtils.createTable(admin, TABLE_NAME, CF) 152 | } 153 | } 154 | } 155 | 156 | override def tableExists(): Boolean = { 157 | using(connection.getAdmin) { admin => 158 | admin.tableExists(TABLE_NAME) 159 | } 160 | } 161 | 162 | override def tableCreationHint(): String = { 163 | s"""To create namespace and table from an HBase shell issue: 164 | | create_namespace '$NAMESPACE_STRING' 165 | | create '$NAMESPACE_STRING:$TABLE_NAME_STRING', '$CF_STRING'""".stripMargin 166 | } 167 | 168 | override def findSchema(id: Long): Option[Schema] = { 169 | log.debug(s"loading a schema with id = $id from table $NAMESPACE_STRING:$TABLE_NAME_STRING") 170 | val get: Get = new Get(Bytes.toBytes(id)) 171 | get.addColumn(CF, QUALIFIER_SCHEMA) 172 | val result: Result = connection.getTable(TABLE_NAME).get(get) 173 | val value: Option[Array[Byte]] = Option(result.getValue(CF, QUALIFIER_SCHEMA)) 174 | val schema: Option[Schema] = value.map(v => parser.parse(Bytes.toString(v))) 175 | log.debug(s"$schema loaded from HBase for id = $id") 176 | schema 177 | } 178 | 179 | /** 180 | * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id). 181 | * This API might not be implemented by all connectors, which should return None 182 | */ 183 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None 184 | } 185 | -------------------------------------------------------------------------------- /hbase/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging } 5 | 6 | class HBaseConnectorCreator extends ConnectorCreator with Logging { 7 | override def create(config: Config): Connector = { 8 | log.debug("creating the HBase connector") 9 | val connector: Connector = HBaseConnector.instance(config) 10 | log.debug("HBase connector created") 11 | connector 12 | } 13 | 14 | /** 15 | * @return the name of the Connector 16 | */ 17 | override def name(): String = "hbase" 18 | } 19 | -------------------------------------------------------------------------------- /hbase/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | isSecure: false -------------------------------------------------------------------------------- /hbase/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=WARN, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1} - %m%n 9 | -------------------------------------------------------------------------------- /hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBase2Mock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | case class HBase2Mock(one: Boolean, two: Long) 4 | -------------------------------------------------------------------------------- /hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBaseConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import java.nio.file.Files 4 | import java.util.UUID 5 | 6 | import com.typesafe.config.{ ConfigFactory, ConfigValueFactory } 7 | import it.agilelab.darwin.common.Connector 8 | import org.apache.avro.reflect.ReflectData 9 | import org.apache.avro.{ Schema, SchemaNormalization } 10 | import org.apache.hadoop.hbase.{ HBaseConfiguration, HBaseTestingUtility, MiniHBaseCluster } 11 | import org.scalatest.BeforeAndAfterAll 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | class HBaseConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll { 16 | 17 | private var connector: Connector = _ 18 | private var minicluster: MiniHBaseCluster = _ 19 | 20 | "HBaseConnector" should "load all existing schemas" in { 21 | connector.fullLoad() 22 | } 23 | 24 | it should "insert and retrieve" in { 25 | val schemas = Seq(ReflectData.get().getSchema(classOf[HBaseMock]), ReflectData.get().getSchema(classOf[HBase2Mock])) 26 | .map(s => SchemaNormalization.parsingFingerprint64(s) -> s) 27 | connector.insert(schemas) 28 | val loaded: Seq[(Long, Schema)] = connector.fullLoad() 29 | assert(loaded.size == schemas.size) 30 | assert(loaded.forall(schemas.contains)) 31 | val schema = connector.findSchema(loaded.head._1) 32 | assert(schema.isDefined) 33 | assert(schema.get == loaded.head._2) 34 | val noSchema = connector.findSchema(-1L) 35 | assert(noSchema.isEmpty) 36 | } 37 | 38 | "connector.tableCreationHint" should "print the correct hint for table creation" in { 39 | connector.tableCreationHint() should be("""To create namespace and table from an HBase shell issue: 40 | | create_namespace 'AVRO' 41 | | create 'AVRO:SCHEMA_REPOSITORY', '0'""".stripMargin) 42 | } 43 | 44 | "connector.tableExists" should "return true with existent table" in { 45 | connector.tableExists() should be(true) 46 | } 47 | 48 | override def beforeAll(): Unit = { 49 | val testUUID = UUID.randomUUID().toString 50 | val hConf = HBaseConfiguration.create() 51 | hConf.set("test.build.data.basedirectory", s"./target/hbase-test-data-$testUUID") 52 | val util = new HBaseTestingUtility(hConf) 53 | minicluster = util.startMiniCluster(1, true) 54 | val confFile = Files.createTempFile(testUUID, ".xml") 55 | // Hbase connector can only load configurations from a file path so we need to render the hadoop conf 56 | val stream = Files.newOutputStream(confFile) 57 | // mc.getConfiguration.writeXml(System.out) 58 | minicluster.getConfiguration.writeXml(stream) 59 | stream.flush() 60 | stream.close() 61 | // HbaseConnector will only load conf if hbase-site and core-site are given, 62 | // we give the same file to each. 63 | sys.addShutdownHook(minicluster.shutdown()) 64 | val config = ConfigFactory 65 | .load() 66 | .withValue(ConfigurationKeys.HBASE_SITE, ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString)) 67 | .withValue(ConfigurationKeys.CORE_SITE, ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString)) 68 | connector = new HBaseConnectorCreator().create(config) 69 | connector.createTable() 70 | } 71 | 72 | override def afterAll(): Unit = { 73 | minicluster.shutdown() 74 | minicluster.waitUntilShutDown() 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBaseMock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | case class HBaseMock(one: Int, two: String, three: Long, four: HBase2Mock) 4 | -------------------------------------------------------------------------------- /hbase1/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.hbase.HBaseConnectorCreator 2 | -------------------------------------------------------------------------------- /hbase1/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseUtils.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import org.apache.hadoop.hbase.{ HColumnDescriptor, HTableDescriptor, TableName } 4 | import org.apache.hadoop.hbase.client.Admin 5 | 6 | object HBaseUtils { 7 | def createTable(admin: Admin, tableName: TableName, columnFamily: Array[Byte]): Unit = { 8 | admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(columnFamily))) 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /hbase2/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.hbase.HBaseConnectorCreator 2 | -------------------------------------------------------------------------------- /hbase2/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseUtils.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.hbase 2 | 3 | import org.apache.hadoop.hbase.TableName 4 | import org.apache.hadoop.hbase.client.{ Admin, ColumnFamilyDescriptorBuilder, TableDescriptorBuilder } 5 | 6 | object HBaseUtils { 7 | def createTable(admin: Admin, tableName: TableName, columnFamily: Array[Byte]): Unit = { 8 | admin.createTable( 9 | TableDescriptorBuilder 10 | .newBuilder(tableName) 11 | .setColumnFamily( 12 | ColumnFamilyDescriptorBuilder.newBuilder(columnFamily).build() 13 | ) 14 | .build() 15 | ) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sbt -v clean scalastyle +test +doc darwin-hbase2-connector/clean darwin-hbase2-connector/scalastyle +darwin-hbase2-connector/test +darwin-hbase2-connector/doc 3 | -------------------------------------------------------------------------------- /mock-application/src/test/resources/MyNestedClass.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MyNestedClass","namespace":"it.agilelab.darwin.app.mock.classes","fields":[{"name":"id","type":"int"},{"name":"myClass","type":{"type":"record","name":"MyClass","fields":[{"name":"value","type":"int"},{"name":"otherVale","type":"long"}]}},{"name":"my2Class","type":{"type":"map","values":"MyClass"}}]} -------------------------------------------------------------------------------- /mock-application/src/test/resources/OneField.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"OneField","namespace":"it.agilelab.darwin.app.mock.classes","fields":[{"name":"one","type":"int"}]} -------------------------------------------------------------------------------- /mock-application/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | type: cached_eager 2 | connector: "mock" 3 | resources: ["test/MockClassParent.avsc", "test/MockClassAlone.avsc"] -------------------------------------------------------------------------------- /mock-application/src/test/resources/test/MockClassAlone.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]} 2 | -------------------------------------------------------------------------------- /mock-application/src/test/resources/test/MockClassParent.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]} 2 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/CachedEagerApplicationSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory } 4 | import it.agilelab.darwin.annotations.AvroSerde 5 | import it.agilelab.darwin.app.mock.classes.{ MyClass, MyNestedClass, NewClass, OneField } 6 | import it.agilelab.darwin.common.compat._ 7 | import it.agilelab.darwin.common.{ Connector, ConnectorFactory, SchemaReader } 8 | import it.agilelab.darwin.manager.{ AvroSchemaManager, CachedEagerAvroSchemaManager } 9 | import org.apache.avro.reflect.ReflectData 10 | import org.apache.avro.{ Schema, SchemaNormalization } 11 | import org.reflections.Reflections 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | import java.lang.reflect.Modifier 16 | import java.nio.ByteOrder 17 | 18 | class BigEndianCachedEagerApplicationSuite extends CachedEagerApplicationSuite(ByteOrder.BIG_ENDIAN) 19 | 20 | class LittleEndianCachedEagerApplicationSuite extends CachedEagerApplicationSuite(ByteOrder.LITTLE_ENDIAN) 21 | 22 | abstract class CachedEagerApplicationSuite(val endianness: ByteOrder) extends AnyFlatSpec with Matchers { 23 | 24 | private val mockClassAloneFingerprint = 6675579114512671233L 25 | private val mockClassParentFingerprint = -6310800772237892477L 26 | 27 | private val config: Config = ConfigFactory.load() 28 | private val connector: Connector = ConnectorFactory.connector(config) 29 | private val manager: AvroSchemaManager = new CachedEagerAvroSchemaManager(connector, endianness) 30 | 31 | "CachedEagerAvroSchemaManager" should "not fail after the initialization" in { 32 | val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) 33 | assert(manager.registerAll(schemas).size == 1) 34 | } 35 | 36 | it should "register a new schema" in { 37 | val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) 38 | manager.registerAll(schemas) 39 | 40 | val id = manager.getId(schemas.head) 41 | assert(manager.getSchema(id).isDefined) 42 | assert(schemas.head == manager.getSchema(id).get) 43 | } 44 | 45 | it should "get all previously registered schemas" in { 46 | val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc") 47 | val schema0 = manager.getSchema(mockClassAloneFingerprint) 48 | val schema1 = manager.getSchema(mockClassParentFingerprint) 49 | assert(schema0.isDefined) 50 | assert(schema1.isDefined) 51 | assert(schema0.get != schema1.get) 52 | assert(schema != schema0.get) 53 | assert(schema != schema1.get) 54 | } 55 | 56 | it should "generate all schemas for all the annotated classes with @AvroSerde" in { 57 | val reflections = new Reflections("it.agilelab.darwin.app.mock.classes") 58 | 59 | val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString 60 | val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString 61 | val myClassSchema = ReflectData.get().getSchema(classOf[MyClass]).toString 62 | 63 | val annotationClass: Class[AvroSerde] = classOf[AvroSerde] 64 | val classes = reflections 65 | .getTypesAnnotatedWith(annotationClass) 66 | .toScala() 67 | .toSeq 68 | .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) 69 | val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString) 70 | Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas 71 | } 72 | 73 | it should "reload all schemas from the connector" in { 74 | val newSchema = ReflectData.get().getSchema(classOf[NewClass]) 75 | val newId = SchemaNormalization.parsingFingerprint64(newSchema) 76 | assert(manager.getSchema(newId).isEmpty) 77 | 78 | connector.insert(Seq(newId -> newSchema)) 79 | assert(manager.getSchema(newId).isEmpty) 80 | 81 | manager.reload() 82 | assert(manager.getSchema(newId).isDefined) 83 | assert(manager.getSchema(newId).get == newSchema) 84 | } 85 | 86 | it should "not call getId when retrieving a schema out of the cache" in { 87 | val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]) 88 | var calls = 0 89 | val manager = new CachedEagerAvroSchemaManager( 90 | new Connector { 91 | override def createTable(): Unit = () 92 | override def tableExists(): Boolean = true 93 | override def tableCreationHint(): String = "" 94 | override def fullLoad(): Seq[(Long, Schema)] = Seq.empty 95 | override def insert(schemas: Seq[(Long, Schema)]): Unit = () 96 | override def findSchema(id: Long): Option[Schema] = Some(oneFieldSchema) 97 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = Some(1L -> oneFieldSchema) 98 | }, 99 | endianness 100 | ) { 101 | override def getId(schema: Schema): Long = { 102 | calls += 1 103 | super.getId(schema) 104 | } 105 | } 106 | manager.getSchema(3L) shouldNot be(null) // scalastyle:ignore 107 | calls shouldBe 0 108 | } 109 | 110 | it should "not find the latest schema" in { 111 | manager.retrieveLatestSchema("asdf") shouldBe None 112 | } 113 | 114 | it should "find the latest schema" in { 115 | manager.retrieveLatestSchema("it.agilelab.darwin.connector.mock.testclasses.MockClassParent") shouldBe Some( 116 | mockClassParentFingerprint -> manager.getSchema(mockClassParentFingerprint).get 117 | ) 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/CachedLazyApplicationSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory } 4 | import it.agilelab.darwin.annotations.AvroSerde 5 | import it.agilelab.darwin.app.mock.classes.{ MyClass, MyNestedClass, NewClass, OneField } 6 | import it.agilelab.darwin.common.compat._ 7 | import it.agilelab.darwin.common.{ Connector, ConnectorFactory, SchemaReader } 8 | import it.agilelab.darwin.manager.{ AvroSchemaManager, CachedLazyAvroSchemaManager } 9 | import org.apache.avro.reflect.ReflectData 10 | import org.apache.avro.{ Schema, SchemaNormalization } 11 | import org.reflections.Reflections 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | import java.lang.reflect.Modifier 16 | import java.nio.ByteOrder 17 | 18 | class BigEndianCachedLazyApplicationSuite extends CachedLazyApplicationSuite(ByteOrder.BIG_ENDIAN) 19 | 20 | class LittleEndianCachedLazyApplicationSuite extends CachedLazyApplicationSuite(ByteOrder.LITTLE_ENDIAN) 21 | 22 | abstract class CachedLazyApplicationSuite(val endianness: ByteOrder) extends AnyFlatSpec with Matchers { 23 | 24 | private val mockClassAloneFingerprint = 6675579114512671233L 25 | private val mockClassParentFingerprint = -6310800772237892477L 26 | val config: Config = ConfigFactory.load() 27 | val connector: Connector = ConnectorFactory.connector(config) 28 | val manager: AvroSchemaManager = new CachedLazyAvroSchemaManager(connector, endianness) 29 | 30 | "CachedLazyAvroSchemaManager" should "not fail after the initialization" in { 31 | val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) 32 | assert(manager.registerAll(schemas).size == 1) 33 | } 34 | 35 | it should "load all existing schemas and register a new one" in { 36 | val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) 37 | manager.getSchema(0L) 38 | 39 | manager.registerAll(schemas) 40 | 41 | val id = manager.getId(schemas.head) 42 | assert(manager.getSchema(id).isDefined) 43 | assert(schemas.head == manager.getSchema(id).get) 44 | } 45 | 46 | it should "get all previously registered schemas" in { 47 | val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc") 48 | val schema0 = manager.getSchema(mockClassAloneFingerprint) 49 | val schema1 = manager.getSchema(mockClassParentFingerprint) 50 | assert(schema0.isDefined) 51 | assert(schema1.isDefined) 52 | assert(schema0.get != schema1.get) 53 | assert(schema != schema0.get) 54 | assert(schema != schema1.get) 55 | } 56 | 57 | it should "generate all schemas for all the annotated classes with @AvroSerde" in { 58 | val reflections = new Reflections("it.agilelab.darwin.app.mock.classes") 59 | 60 | val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString 61 | val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString 62 | val myClassSchema = ReflectData.get().getSchema(classOf[MyClass]).toString 63 | 64 | val annotationClass: Class[AvroSerde] = classOf[AvroSerde] 65 | val classes = reflections 66 | .getTypesAnnotatedWith(annotationClass) 67 | .toScala() 68 | .toSeq 69 | .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) 70 | val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString) 71 | Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas 72 | } 73 | 74 | it should "reload all schemas from the connector" in { 75 | val newSchema = ReflectData.get().getSchema(classOf[NewClass]) 76 | val newId = SchemaNormalization.parsingFingerprint64(newSchema) 77 | assert(manager.getSchema(newId).isEmpty) 78 | 79 | connector.insert(Seq(newId -> newSchema)) 80 | assert(manager.getSchema(newId).isDefined) 81 | assert(manager.getSchema(newId).get == newSchema) 82 | } 83 | 84 | it should "not call getId when retrieving a schema out of the cache" in { 85 | val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]) 86 | var calls = 0 87 | val manager = new CachedLazyAvroSchemaManager( 88 | new Connector { 89 | override def createTable(): Unit = () 90 | override def tableExists(): Boolean = true 91 | override def tableCreationHint(): String = "" 92 | override def fullLoad(): Seq[(Long, Schema)] = Seq.empty 93 | override def insert(schemas: Seq[(Long, Schema)]): Unit = () 94 | override def findSchema(id: Long): Option[Schema] = Some(oneFieldSchema) 95 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = Some(1L -> oneFieldSchema) 96 | }, 97 | endianness 98 | ) { 99 | override def getId(schema: Schema): Long = { 100 | calls += 1 101 | super.getId(schema) 102 | } 103 | } 104 | manager.getSchema(3L) shouldNot be(null) // scalastyle:ignore 105 | calls shouldBe 0 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/LazyApplicationSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory } 4 | import it.agilelab.darwin.annotations.AvroSerde 5 | import it.agilelab.darwin.app.mock.classes.{ MyClass, MyNestedClass, NewClass, OneField } 6 | import it.agilelab.darwin.common.compat._ 7 | import it.agilelab.darwin.common.{ Connector, ConnectorFactory, SchemaReader } 8 | import it.agilelab.darwin.manager.{ AvroSchemaManager, LazyAvroSchemaManager } 9 | import org.apache.avro.reflect.ReflectData 10 | import org.apache.avro.{ Schema, SchemaNormalization } 11 | import org.reflections.Reflections 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | import java.lang.reflect.Modifier 16 | import java.nio.ByteOrder 17 | 18 | class BigEndianLazyApplicationSuite extends LazyApplicationSuite(ByteOrder.BIG_ENDIAN) 19 | 20 | class LittleEndianLazyApplicationSuite extends LazyApplicationSuite(ByteOrder.LITTLE_ENDIAN) 21 | 22 | abstract class LazyApplicationSuite(endianness: ByteOrder) extends AnyFlatSpec with Matchers { 23 | private val mockClassAloneFingerprint = 6675579114512671233L 24 | private val mockClassParentFingerprint = -6310800772237892477L 25 | val config: Config = ConfigFactory.load() 26 | val connector: Connector = ConnectorFactory.connector(config) 27 | val manager: AvroSchemaManager = new LazyAvroSchemaManager(connector, endianness) 28 | 29 | "LazyAvroSchemaManager" should "not fail after the initialization" in { 30 | val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) 31 | assert(manager.registerAll(schemas).size == 1) 32 | } 33 | 34 | it should "load all existing schemas and register a new one" in { 35 | val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) 36 | manager.getSchema(mockClassAloneFingerprint) 37 | 38 | manager.registerAll(schemas) 39 | 40 | val id = manager.getId(schemas.head) 41 | assert(manager.getSchema(id).isDefined) 42 | assert(schemas.head == manager.getSchema(id).get) 43 | } 44 | 45 | it should "get all previously registered schemas" in { 46 | val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc") 47 | val schema0 = manager.getSchema(mockClassAloneFingerprint) 48 | val schema1 = manager.getSchema(mockClassParentFingerprint) 49 | assert(schema0.isDefined) 50 | assert(schema1.isDefined) 51 | assert(schema0.get != schema1.get) 52 | assert(schema != schema0.get) 53 | assert(schema != schema1.get) 54 | } 55 | 56 | it should "generate all schemas for all the annotated classes with @AvroSerde" in { 57 | val reflections = new Reflections("it.agilelab.darwin.app.mock.classes") 58 | 59 | val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString 60 | val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString 61 | val myClassSchema = ReflectData.get().getSchema(classOf[MyClass]).toString 62 | 63 | val annotationClass: Class[AvroSerde] = classOf[AvroSerde] 64 | val classes = reflections 65 | .getTypesAnnotatedWith(annotationClass) 66 | .toScala() 67 | .toSeq 68 | .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) 69 | val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString) 70 | Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas 71 | } 72 | 73 | it should "reload all schemas from the connector" in { 74 | val newSchema = ReflectData.get().getSchema(classOf[NewClass]) 75 | val newId = SchemaNormalization.parsingFingerprint64(newSchema) 76 | assert(manager.getSchema(newId).isEmpty) 77 | 78 | connector.insert(Seq(newId -> newSchema)) 79 | assert(manager.getSchema(newId).isDefined) 80 | assert(manager.getSchema(newId).get == newSchema) 81 | } 82 | 83 | it should "not call getId when retrieving a schema out of the cache" in { 84 | val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]) 85 | var calls = 0 86 | val manager = new LazyAvroSchemaManager( 87 | new Connector { 88 | override def createTable(): Unit = () 89 | override def tableExists(): Boolean = true 90 | override def tableCreationHint(): String = "" 91 | override def fullLoad(): Seq[(Long, Schema)] = Seq.empty 92 | override def insert(schemas: Seq[(Long, Schema)]): Unit = () 93 | override def findSchema(id: Long): Option[Schema] = Some(oneFieldSchema) 94 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = Some(1L -> oneFieldSchema) 95 | }, 96 | endianness 97 | ) { 98 | override def getId(schema: Schema): Long = { 99 | calls += 1 100 | super.getId(schema) 101 | } 102 | } 103 | manager.getSchema(3L) shouldNot be(null) // scalastyle:ignore 104 | calls shouldBe 0 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/ManagerUtilsSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import java.nio.{ ByteBuffer, ByteOrder } 4 | 5 | import com.typesafe.config.ConfigFactory 6 | import it.agilelab.darwin.common.SchemaReader 7 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory 8 | import it.agilelab.darwin.manager.util.{ AvroSingleObjectEncodingUtils, ConfigurationKeys } 9 | import it.agilelab.darwin.manager.util.ByteArrayUtils._ 10 | 11 | import scala.util.Random 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | 15 | class BigEndianManagerUtilsSuite extends ManagerUtilsSuite(ByteOrder.BIG_ENDIAN) 16 | 17 | class LittleEndianManagerUtilsSuite extends ManagerUtilsSuite(ByteOrder.LITTLE_ENDIAN) 18 | 19 | abstract class ManagerUtilsSuite(endianness: ByteOrder) extends AnyFlatSpec with Matchers { 20 | 21 | "AvroSchemaManager utilities" should "create a Single-Object encoded byte array" in { 22 | val ORIGINAL_LENGTH: Int = 10 23 | val originalSchema = SchemaReader.readFromResources("OneField.avsc") 24 | val config = 25 | ConfigFactory 26 | .parseMap(new java.util.HashMap[String, String]() { 27 | { 28 | put(ConfigurationKeys.MANAGER_TYPE, ConfigurationKeys.CACHED_EAGER) 29 | put(ConfigurationKeys.ENDIANNESS, endianness.toString) 30 | } 31 | }) 32 | .withFallback(ConfigFactory.load()) 33 | .resolve() 34 | val manager = AvroSchemaManagerFactory.initialize(config) 35 | manager.registerAll(Seq(originalSchema)) 36 | val originalPayload = new Array[Byte](ORIGINAL_LENGTH) 37 | Random.nextBytes(originalPayload) 38 | val data: Array[Byte] = manager.generateAvroSingleObjectEncoded(originalPayload, originalSchema) 39 | assert(AvroSingleObjectEncodingUtils.isAvroSingleObjectEncoded(data)) 40 | val (schema, payload) = manager.retrieveSchemaAndAvroPayload(data) 41 | assert(schema == originalSchema) 42 | assert(originalPayload sameElements payload) 43 | } 44 | 45 | it should "convert a long to byte array and back" in { 46 | val longs = (1 to 10).map(_ => Random.nextLong()) 47 | 48 | assert( 49 | longs == longs.map(x => 50 | AvroSingleObjectEncodingUtils 51 | .readLong(ByteBuffer.wrap(x.longToByteArray(endianness)), endianness) 52 | ) 53 | ) 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/TwoConnectorsSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import it.agilelab.darwin.common.ConnectorFactory 5 | import it.agilelab.darwin.connector.hbase.HBaseConnectorCreator 6 | import it.agilelab.darwin.connector.mock.MockConnectorCreator 7 | import it.agilelab.darwin.connector.postgres.PostgresConnectorCreator 8 | import it.agilelab.darwin.manager.util.ConfigurationKeys 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | import org.scalatest.matchers.should.Matchers 11 | 12 | class TwoConnectorsSpec extends AnyFlatSpec with Matchers { 13 | it should "have both HBase and Postgresql available" in { 14 | ConnectorFactory.creators().map(_.getClass) should contain theSameElementsAs ( 15 | classOf[HBaseConnectorCreator] :: classOf[PostgresConnectorCreator] :: classOf[MockConnectorCreator] :: Nil 16 | ) 17 | } 18 | 19 | it should "choose HBase connector over Postgresql one" in { 20 | val config = ConfigFactory.parseString(s"""${ConfigurationKeys.CONNECTOR}: hbase""") 21 | ConnectorFactory.creator(config).map(_.getClass) should be(Some(classOf[HBaseConnectorCreator])) 22 | } 23 | 24 | it should "choose Postgresql connector over HBase one" in { 25 | val config = ConfigFactory.parseString(s"""${ConfigurationKeys.CONNECTOR}: postgresql""") 26 | ConnectorFactory.creator(config).map(_.getClass) should be(Some(classOf[PostgresConnectorCreator])) 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class MyClass(override val value: Int, otherVale: Long) extends MyTrait 7 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyNestedAbstractClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | abstract class MyNestedAbstractClass[T <: MyTrait](id: Int, myClass: T) 7 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyNestedClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class MyNestedClass(id: Int, myClass: MyClass, my2Class: Map[String, MyClass]) 7 | extends MyNestedAbstractClass[MyClass](id, myClass) 8 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyTrait.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | trait MyTrait { 7 | def value: Int 8 | } 9 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/NewClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | case class NewClass(one: Long, two: String, three: Int) 4 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/NotToBeRegisteredClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | case class NotToBeRegisteredClass() 4 | -------------------------------------------------------------------------------- /mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/OneField.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.mock.classes 2 | 3 | import it.agilelab.darwin.annotations.AvroSerde 4 | 5 | @AvroSerde 6 | case class OneField(one: Int) 7 | -------------------------------------------------------------------------------- /mock-connector/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.mock.MockConnectorCreator 2 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | object ConfigurationKeys { 4 | val FILES = "files" 5 | val RESOURCES = "resources" 6 | val MODE = "mode" 7 | val STRICT = "strict" 8 | val PERMISSIVE = "permissive" 9 | 10 | sealed trait Mode 11 | 12 | object Mode { 13 | def parse(string: String): Mode = { 14 | string.toLowerCase match { 15 | case STRICT => Strict 16 | case PERMISSIVE => Permissive 17 | case other: String => throw new IllegalArgumentException(s"Unknown mode: $other") 18 | } 19 | } 20 | } 21 | 22 | case object Strict extends Mode 23 | 24 | case object Permissive extends Mode 25 | 26 | } 27 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/MockConnector.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | import it.agilelab.darwin.common.{ Connector, Logging, SchemaReader } 6 | import org.apache.avro.{ Schema, SchemaNormalization } 7 | 8 | import scala.collection.mutable 9 | 10 | class MockConnectorException(msg: String, t: Option[Throwable]) extends RuntimeException(msg) { 11 | def this(msg: String) = this(msg, None) 12 | 13 | def this(t: Throwable) = this(t.getMessage, Some(t)) 14 | 15 | override def getCause: Throwable = t match { 16 | case Some(value) => value 17 | case None => super.getCause 18 | } 19 | } 20 | 21 | class MockConnector(config: Config) extends Connector with Logging { 22 | 23 | private[this] var loaded: Boolean = false 24 | 25 | val mode: ConfigurationKeys.Mode = if (config.hasPath(ConfigurationKeys.MODE)) { 26 | ConfigurationKeys.Mode.parse(config.getString(ConfigurationKeys.MODE)) 27 | } else { 28 | ConfigurationKeys.Strict 29 | } 30 | 31 | private def files = if (config.hasPath(ConfigurationKeys.FILES)) { 32 | config.getStringList(ConfigurationKeys.FILES).toScala().map { s => 33 | try { 34 | SchemaReader.safeRead(new java.io.File(s)) 35 | } catch { 36 | case t: Throwable => Left(SchemaReader.UnknownError(t)) 37 | } 38 | } 39 | } else { 40 | Nil 41 | } 42 | 43 | private def resources = if (config.hasPath(ConfigurationKeys.RESOURCES)) { 44 | config.getStringList(ConfigurationKeys.RESOURCES).toScala().map { s => 45 | try { 46 | SchemaReader.safeReadFromResources(s) 47 | } catch { 48 | case t: Throwable => Left(SchemaReader.UnknownError(t)) 49 | } 50 | } 51 | } else { 52 | Nil 53 | } 54 | 55 | private def handleError(error: SchemaReader.SchemaReaderError): Unit = { 56 | mode match { 57 | case ConfigurationKeys.Strict => 58 | error match { 59 | case SchemaReader.SchemaParserError(exception) => 60 | throw new MockConnectorException(exception) 61 | case SchemaReader.IOError(exception) => throw new MockConnectorException(exception) 62 | case SchemaReader.ResourceNotFoundError(msg) => throw new MockConnectorException(msg) 63 | case SchemaReader.UnknownError(t) => throw new MockConnectorException(t) 64 | } 65 | case ConfigurationKeys.Permissive => 66 | error match { 67 | case SchemaReader.SchemaParserError(exception) => log.warn(exception.getMessage, exception) 68 | case SchemaReader.IOError(exception) => log.warn(exception.getMessage, exception) 69 | case SchemaReader.ResourceNotFoundError(msg) => log.warn(msg) 70 | case SchemaReader.UnknownError(t) => log.warn(t.getMessage, t) 71 | } 72 | } 73 | } 74 | 75 | private val table: mutable.Map[Long, Schema] = mutable.Map.empty[Long, Schema] 76 | 77 | override def fullLoad(): Seq[(Long, Schema)] = { 78 | (resources ++ files).foreach { 79 | case Left(error) => handleError(error) 80 | case Right(schema) => table(SchemaNormalization.parsingFingerprint64(schema)) = schema 81 | } 82 | table.toSeq 83 | } 84 | 85 | override def insert(schemas: Seq[(Long, Schema)]): Unit = { 86 | schemas.foreach { case (id, schema) => 87 | table(id) = schema 88 | } 89 | } 90 | 91 | override def findSchema(id: Long): Option[Schema] = { 92 | if (!loaded) { 93 | this.synchronized { 94 | if (!loaded) { 95 | fullLoad() 96 | loaded = true 97 | } 98 | } 99 | } 100 | table.get(id) 101 | } 102 | 103 | override def createTable(): Unit = () 104 | 105 | override def tableExists(): Boolean = true 106 | 107 | override def tableCreationHint(): String = "No table needs to be created since mock connecto" 108 | 109 | /** 110 | * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id). 111 | * This API might not be implemented by all connectors, which should return None 112 | */ 113 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = 114 | table.find(_._2.getFullName == identifier) 115 | } 116 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/MockConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator } 5 | 6 | class MockConnectorCreator extends ConnectorCreator { 7 | override def create(config: Config): Connector = new MockConnector(config) 8 | 9 | /** 10 | * @return the name of the Connector 11 | */ 12 | override def name(): String = "mock" 13 | } 14 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassAlone.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock.testclasses 2 | 3 | case class MockClassAlone(fry: String, bender: Long, leela: Int, zoidberg: Boolean) 4 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassChild.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock.testclasses 2 | 3 | case class MockClassChild(twoOne: Long, twoTwo: String) 4 | -------------------------------------------------------------------------------- /mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassParent.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock.testclasses 2 | 3 | case class MockClassParent(one: Int, two: String, three: Long, four: MockClassChild) 4 | -------------------------------------------------------------------------------- /mock-connector/src/test/resources/test/MockClassAlone.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]} 2 | -------------------------------------------------------------------------------- /mock-connector/src/test/resources/test/MockClassParent.avsc: -------------------------------------------------------------------------------- 1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]} 2 | -------------------------------------------------------------------------------- /mock-connector/src/test/scala/it/agilelab/darwin/connector/mock/MockConnectorSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mock 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.apache.avro.Schema 5 | import org.apache.avro.Schema.Type 6 | import org.scalatest.flatspec.AnyFlatSpec 7 | import org.scalatest.matchers.should.Matchers 8 | 9 | import java.nio.file.Paths 10 | import java.util 11 | 12 | class MockConnectorSpec extends AnyFlatSpec with Matchers { 13 | 14 | private val p = Paths 15 | .get(".") 16 | .resolve("mock-connector") 17 | .resolve("src") 18 | .resolve("test") 19 | .resolve("resources") 20 | .resolve("test") 21 | 22 | it should "load the schema manually inserted" in { 23 | val connector = new MockConnectorCreator().create(ConfigFactory.empty()) 24 | connector.insert((3L, Schema.create(Type.BYTES)) :: Nil) 25 | connector.fullLoad() should have size 1 26 | } 27 | 28 | it should "load the schema automatically from resources" in { 29 | val connector = new MockConnectorCreator().create(ConfigFactory.parseMap { 30 | new java.util.HashMap[String, Object] { 31 | put(ConfigurationKeys.RESOURCES, util.Arrays.asList("test/MockClassAlone.avsc", "test/MockClassParent.avsc")) 32 | } 33 | }) 34 | connector.fullLoad() should have size 2 35 | } 36 | 37 | it should "load the schema automatically from files" in { 38 | val connector = new MockConnectorCreator().create(ConfigFactory.parseMap { 39 | new java.util.HashMap[String, Object] { 40 | put( 41 | ConfigurationKeys.FILES, 42 | util.Arrays.asList(p.resolve("MockClassAlone.avsc").toString, p.resolve("MockClassParent.avsc").toString) 43 | ) 44 | } 45 | }) 46 | connector.fullLoad() should have size 2 47 | } 48 | 49 | it should "not throw any exception in case of missing file in permissive mode" in { 50 | val connector = new MockConnectorCreator().create(ConfigFactory.parseMap { 51 | new java.util.HashMap[String, Object] { 52 | put( 53 | ConfigurationKeys.FILES, 54 | util.Arrays.asList( 55 | p.resolve("DoesNotExists.avsc").toString, 56 | p.resolve("MockClassAlone.avsc").toString, 57 | p.resolve("MockClassParent.avsc").toString 58 | ) 59 | ) 60 | put(ConfigurationKeys.MODE, "permissive") 61 | } 62 | }) 63 | connector.fullLoad() should have size 2 64 | } 65 | 66 | it should "throw an exception in case of missing file in strict mode" in { 67 | intercept[MockConnectorException] { 68 | new MockConnectorCreator() 69 | .create(ConfigFactory.parseMap { 70 | new java.util.HashMap[String, Object] { 71 | put( 72 | ConfigurationKeys.FILES, 73 | util.Arrays.asList( 74 | p.resolve("DoesNotExists.avsc").toString, 75 | p.resolve("MockClassAlone.avsc").toString, 76 | p.resolve("MockClassParent.avsc").toString 77 | ) 78 | ) 79 | } 80 | }) 81 | .fullLoad() 82 | } 83 | } 84 | 85 | it should "return Some schema if asked for the latest schema" in { 86 | val connector = 87 | new MockConnectorCreator() 88 | .create(ConfigFactory.parseMap { 89 | new java.util.HashMap[String, Object] { 90 | put( 91 | ConfigurationKeys.FILES, 92 | util.Arrays.asList( 93 | p.resolve("MockClassAlone.avsc").toString, 94 | p.resolve("MockClassParent.avsc").toString 95 | ) 96 | ) 97 | } 98 | }) 99 | val all = connector.fullLoad() 100 | connector.retrieveLatestSchema("it.agilelab.darwin.connector.mock.testclasses.MockClassAlone") shouldBe all.find( 101 | _._2.getName == "MockClassAlone" 102 | ) 103 | 104 | } 105 | 106 | it should "return None schema if asked for the latest schema" in { 107 | val connector = 108 | new MockConnectorCreator() 109 | .create(ConfigFactory.parseMap { 110 | new java.util.HashMap[String, Object] { 111 | put( 112 | ConfigurationKeys.FILES, 113 | util.Arrays.asList( 114 | p.resolve("MockClassAlone.avsc").toString, 115 | p.resolve("MockClassParent.avsc").toString 116 | ) 117 | ) 118 | } 119 | }) 120 | connector.fullLoad() 121 | connector.retrieveLatestSchema("DoesNotExists") shouldBe None 122 | 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /mongo/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.mongo.MongoConnectorCreator 2 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | object ConfigurationKeys { 4 | 5 | val USERNAME: String = "username" // the username 6 | val PASSWORD: String = "password" // the password 7 | val HOST: String = "host" // the hostname where you want to connect 8 | val DATABASE: String = "database" // the name of the database in which the user is defined 9 | val COLLECTION: String = "collection" // the collection name 10 | val TIMEOUT: String = "timeout" // the timeout max to wait the results 11 | 12 | } 13 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/ConfigurationMongoModels.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | import scala.concurrent.duration.Duration 4 | 5 | object ConfigurationMongoModels { 6 | 7 | sealed trait BaseMongoConfig { 8 | def database: String 9 | def collection: String 10 | def timeout: Duration 11 | } 12 | 13 | case class MongoConfig( 14 | database: String, 15 | collection: String, 16 | timeout: Duration 17 | ) extends BaseMongoConfig 18 | 19 | case class MongoConnectorConfig( 20 | username: String, 21 | password: String, 22 | database: String, 23 | collection: String, 24 | hosts: Seq[String], 25 | timeout: Duration 26 | ) extends BaseMongoConfig 27 | 28 | val DEFAULT_DURATION = 5000 29 | 30 | } 31 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/MongoConnector.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | import com.mongodb.{ BasicDBObject, ErrorCategory } 4 | import it.agilelab.darwin.common.{ Connector, Logging } 5 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.BaseMongoConfig 6 | import org.apache.avro.Schema 7 | import org.apache.avro.Schema.Parser 8 | import org.mongodb.scala.bson.{ BsonDocument, BsonValue } 9 | import org.mongodb.scala.{ bson, Document, MongoClient, MongoCollection, MongoWriteException } 10 | 11 | import scala.concurrent.ExecutionContext.Implicits.global 12 | import scala.concurrent.Await 13 | import scala.util.{ Failure, Try } 14 | 15 | class MongoConnector(mongoClient: MongoClient, mongoConfig: BaseMongoConfig) extends Connector with Logging { 16 | 17 | private def parser: Parser = new Parser() 18 | 19 | override def fullLoad(): Seq[(Long, Schema)] = { 20 | 21 | log.debug(s"loading all schemas from collection ${mongoConfig.collection}") 22 | val collection = 23 | mongoClient 24 | .getDatabase(mongoConfig.database) 25 | .getCollection(mongoConfig.collection) 26 | 27 | val schemas: Seq[Try[(Long, Schema)]] = 28 | Await.result( 29 | collection 30 | .find() 31 | .map { document => 32 | for { 33 | key <- extract(document, "_id", _.asInt64().getValue) 34 | schemaStr <- extract(document, "schema", _.asString().getValue) 35 | schema <- Try(parser.parse(schemaStr)) 36 | } yield key -> schema 37 | } 38 | .toFuture(), 39 | mongoConfig.timeout 40 | ) 41 | log.debug(s"${schemas.size} loaded from MongoDB") 42 | // this way the first exception is thrown, but we can change this line 43 | // to support different error handling strategies 44 | schemas.map(_.get) 45 | } 46 | 47 | private def extract[A](d: Document, fieldName: String, f: BsonValue => A): Try[A] = { 48 | d.filterKeys(k => k == fieldName) 49 | .headOption 50 | .fold[Try[A]](Failure(new RuntimeException(s"Cannot find $fieldName field in document"))) { case (_, value) => 51 | Try(f(value)).recoverWith { case t: Throwable => 52 | Failure(new RuntimeException(s"$fieldName was not of expected type", t)) 53 | } 54 | } 55 | } 56 | 57 | override def insert(schemas: Seq[(Long, Schema)]): Unit = { 58 | 59 | log.debug(s"inclusion of new schemas in the collection ${mongoConfig.collection}") 60 | 61 | schemas.foreach { case (id, schema) => 62 | val document = new BsonDocument 63 | document.put("_id", bson.BsonInt64(id)) 64 | document.put("schema", bson.BsonString(schema.toString)) 65 | document.put("name", bson.BsonString(schema.getName)) 66 | document.put("namespace", bson.BsonString(schema.getNamespace)) 67 | 68 | insertIfNotExists(mongoClient.getDatabase(mongoConfig.database).getCollection(mongoConfig.collection), document) 69 | } 70 | } 71 | 72 | private def insertIfNotExists(collection: MongoCollection[Document], document: BsonDocument): Unit = { 73 | try { 74 | Await.result(collection.insertOne(document).toFuture(), mongoConfig.timeout) 75 | } catch { 76 | case ex: MongoWriteException if ex.getError.getCategory == ErrorCategory.DUPLICATE_KEY => 77 | log.info("document already present, doing nothing") 78 | } 79 | () 80 | } 81 | 82 | override def createTable(): Unit = { 83 | log.debug(s"Creating collection ${mongoConfig.collection}") 84 | try { 85 | Await.result( 86 | mongoClient.getDatabase(mongoConfig.database).createCollection(mongoConfig.collection).toFuture(), 87 | mongoConfig.timeout 88 | ) 89 | log.info(s"collection ${mongoConfig.collection} has been correctly created") 90 | } catch { 91 | case e: Exception => log.info(s"collection ${mongoConfig.collection} was not created. \n ${e.getMessage}") 92 | } 93 | } 94 | 95 | override def tableExists(): Boolean = { 96 | Await.result( 97 | mongoClient 98 | .getDatabase(mongoConfig.database) 99 | .listCollectionNames() 100 | .filter(x => x == mongoConfig.collection) 101 | .toFuture() 102 | .map(_.size), 103 | mongoConfig.timeout 104 | ) == 1 105 | } 106 | 107 | override def tableCreationHint(): String = { 108 | s"""To create the collection from shell perform the following command: 109 | |db.createCollection(${mongoConfig.collection}) 110 | """.stripMargin 111 | } 112 | 113 | override def findSchema(id: Long): Option[Schema] = { 114 | 115 | val query = new BasicDBObject 116 | query.put("_id", bson.BsonInt64(id)) 117 | 118 | val documents = 119 | mongoClient 120 | .getDatabase(mongoConfig.database) 121 | .getCollection(mongoConfig.collection) 122 | .find(query) 123 | .toFuture() 124 | 125 | val schemaValue: Seq[String] = 126 | for { 127 | document <- Await.result(documents, mongoConfig.timeout) 128 | field <- document 129 | if field._1 == "schema" 130 | } yield field._2.asString().getValue 131 | schemaValue.headOption.map(parser.parse) 132 | } 133 | 134 | /** 135 | * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id). 136 | * This API might not be implemented by all connectors, which should return None 137 | */ 138 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None 139 | } 140 | -------------------------------------------------------------------------------- /mongo/src/main/scala/it/agilelab/darwin/connector/mongo/MongoConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | import com.mongodb.Block 4 | import com.typesafe.config.Config 5 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator } 6 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.MongoConnectorConfig 7 | import org.mongodb.scala.connection.ClusterSettings 8 | import org.mongodb.scala.{ MongoClient, MongoClientSettings, MongoCredential, ServerAddress } 9 | import it.agilelab.darwin.common.compat._ 10 | import scala.concurrent.duration.Duration 11 | 12 | class MongoConnectorCreator extends ConnectorCreator { 13 | 14 | override def create(config: Config): Connector = { 15 | 16 | val mongoConfig: MongoConnectorConfig = createConfig(config) 17 | new MongoConnector(createConnection(mongoConfig), mongoConfig) 18 | } 19 | 20 | /** 21 | * @return the name of the Connector 22 | */ 23 | override def name(): String = "mongo" 24 | 25 | /** 26 | * return the MongoClient 27 | * @param mongoConf : config to create a connection to MongoDB 28 | * @return MongoClient 29 | */ 30 | private def createConnection(mongoConf: MongoConnectorConfig): MongoClient = { 31 | 32 | val credential: MongoCredential = 33 | MongoCredential.createCredential(mongoConf.username, mongoConf.database, mongoConf.password.toCharArray) 34 | 35 | val hosts: Seq[ServerAddress] = mongoConf.hosts.map(host => new ServerAddress(host)) 36 | 37 | val settings: MongoClientSettings = MongoClientSettings 38 | .builder() 39 | .credential(credential) 40 | .applyToClusterSettings(new Block[ClusterSettings.Builder] { 41 | override def apply(builder: ClusterSettings.Builder): Unit = 42 | builder.hosts(java.util.Arrays.asList(hosts: _*)) 43 | }) 44 | .build() 45 | 46 | MongoClient(settings) 47 | } 48 | 49 | /** 50 | * create MongoConnectorConfig started from a configuration file 51 | * @param config: configurations parsed from the file 52 | * @return MongoConnectorConfig 53 | */ 54 | def createConfig(config: Config): MongoConnectorConfig = { 55 | require(config.hasPath(ConfigurationKeys.USERNAME)) 56 | require(config.hasPath(ConfigurationKeys.PASSWORD)) 57 | require(config.hasPath(ConfigurationKeys.HOST)) 58 | require(config.hasPath(ConfigurationKeys.DATABASE)) 59 | require(config.hasPath(ConfigurationKeys.COLLECTION)) 60 | 61 | MongoConnectorConfig( 62 | config.getString(ConfigurationKeys.USERNAME), 63 | config.getString(ConfigurationKeys.PASSWORD), 64 | config.getString(ConfigurationKeys.DATABASE), 65 | config.getString(ConfigurationKeys.COLLECTION), 66 | config.getStringList(ConfigurationKeys.HOST).toScala().toSeq, 67 | if (config.hasPath(ConfigurationKeys.TIMEOUT)) { 68 | Duration.create(config.getInt(ConfigurationKeys.TIMEOUT), "millis") 69 | } else { 70 | Duration.create(ConfigurationMongoModels.DEFAULT_DURATION, "millis") 71 | } 72 | ) 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /mongo/src/test/resources/mongo.conf: -------------------------------------------------------------------------------- 1 | username = "mongo" 2 | password = "mongo" 3 | host = ["localhost:12345"] 4 | database = "test" 5 | collection = "collection_test" 6 | timeout = 5000 7 | -------------------------------------------------------------------------------- /mongo/src/test/resources/mongomock.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "PostgresMock", 4 | "namespace" : "it.agilelab.darwin.connector.postgres", 5 | "fields" : [ { 6 | "name" : "one", 7 | "type" : "int" 8 | }, { 9 | "name" : "two", 10 | "type" : "string" 11 | }, { 12 | "name" : "three", 13 | "type" : "long" 14 | }, { 15 | "name" : "four", 16 | "type" : { 17 | "type" : "record", 18 | "name" : "Postgres2Mock", 19 | "fields" : [ { 20 | "name" : "one", 21 | "type" : "boolean" 22 | }, { 23 | "name" : "two", 24 | "type" : "long" 25 | } ] 26 | } 27 | } ] 28 | } 29 | -------------------------------------------------------------------------------- /mongo/src/test/scala/it/agilelab/darwin/connector/mongo/MongoConnectorTest.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.mongo 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory } 4 | import de.flapdoodle.embed.mongo.{ MongodExecutable, MongodProcess, MongodStarter } 5 | import de.flapdoodle.embed.mongo.config.{ IMongodConfig, MongodConfigBuilder, Net } 6 | import de.flapdoodle.embed.mongo.distribution.Version 7 | import de.flapdoodle.embed.process.runtime.Network 8 | import org.scalatest.BeforeAndAfterAll 9 | import org.mongodb.scala.MongoClient 10 | import it.agilelab.darwin.common.Connector 11 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.MongoConfig 12 | import org.scalatest.flatspec.AnyFlatSpec 13 | import org.scalatest.matchers.should.Matchers 14 | import org.apache.avro.Schema 15 | import org.apache.avro.Schema.Parser 16 | 17 | import scala.concurrent.Await 18 | import scala.concurrent.duration.Duration 19 | 20 | class MongoConnectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll { 21 | 22 | val port = 12345 23 | val config: Config = ConfigFactory.load("mongo.conf") 24 | val starter: MongodStarter = MongodStarter.getDefaultInstance 25 | val mongodConfig: IMongodConfig = 26 | new MongodConfigBuilder() 27 | .version(Version.Main.PRODUCTION) 28 | .net(new Net("localhost", port, Network.localhostIsIPv6)) 29 | .build 30 | val mongoConfig: MongoConfig = MongoConfig( 31 | config.getString(ConfigurationKeys.DATABASE), 32 | config.getString(ConfigurationKeys.COLLECTION), 33 | if (config.hasPath(ConfigurationKeys.TIMEOUT)) { 34 | Duration.create(config.getInt(ConfigurationKeys.TIMEOUT), "millis") 35 | } else { 36 | Duration.create(ConfigurationMongoModels.DEFAULT_DURATION, "millis") 37 | } 38 | ) 39 | val mongodExecutable: MongodExecutable = starter.prepare(mongodConfig) 40 | var mongod: MongodProcess = _ 41 | var mongoClient: MongoClient = _ 42 | var connector: Connector = _ 43 | 44 | override protected def beforeAll(): Unit = { 45 | super.beforeAll() 46 | 47 | mongod = mongodExecutable.start 48 | mongoClient = MongoClient( 49 | s"mongodb://${config.getStringList(ConfigurationKeys.HOST).get(0)}/" + 50 | s"${config.getString(ConfigurationKeys.DATABASE)}" 51 | ) 52 | connector = new MongoConnector(mongoClient, mongoConfig) 53 | connector.createTable() 54 | } 55 | 56 | override protected def afterAll(): Unit = { 57 | mongod.stop() 58 | 59 | super.afterAll() 60 | } 61 | 62 | "Table collection_test" should "be created by connector" in { 63 | connector.createTable() 64 | assert(connector.tableExists()) 65 | } 66 | 67 | "schemas" should "be inserted into collection" in { 68 | val schema: Schema = new Parser().parse(getClass.getClassLoader.getResourceAsStream("mongomock.avsc")) 69 | val schemas = Seq((0L, schema), (1L, schema)) 70 | connector.insert(schemas) 71 | val numberOfDocuments = 72 | Await.result( 73 | mongoClient 74 | .getDatabase(config.getString(ConfigurationKeys.DATABASE)) 75 | .getCollection(config.getString(ConfigurationKeys.COLLECTION)) 76 | .countDocuments() 77 | .toFuture(), 78 | mongoConfig.timeout 79 | ) 80 | assert(numberOfDocuments == 2) 81 | } 82 | 83 | "schema" should "not be inserted into collection because because there is already a scheme with the same id" in { 84 | val schema: Schema = new Parser().parse(getClass.getClassLoader.getResourceAsStream("mongomock.avsc")) 85 | val schemas = Seq((0L, schema)) 86 | connector.insert(schemas) 87 | val numberOfDocuments = 88 | Await.result( 89 | mongoClient 90 | .getDatabase(config.getString(ConfigurationKeys.DATABASE)) 91 | .getCollection(config.getString(ConfigurationKeys.COLLECTION)) 92 | .countDocuments() 93 | .toFuture(), 94 | mongoConfig.timeout 95 | ) 96 | assert(numberOfDocuments == 2) 97 | } 98 | 99 | "full load" should "return a list of lenght equals to 2" in { 100 | val schemas: Seq[(Long, Schema)] = connector.fullLoad() 101 | assert(schemas.length == 2) 102 | } 103 | 104 | "find schema" should "return a schema" in { 105 | val schema: Option[Schema] = connector.findSchema(0L) 106 | assert(schema.isDefined) 107 | } 108 | 109 | "find schema" should "return a None" in { 110 | val schema: Option[Schema] = connector.findSchema(3L) 111 | assert(schema.isEmpty) 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /multi-connector/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.multi.MultiConnectorCreator -------------------------------------------------------------------------------- /multi-connector/src/main/scala/it/agilelab/darwin/connector/multi/MultiConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.multi 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.compat._ 5 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, ConnectorFactory } 6 | import it.agilelab.darwin.manager.exception.DarwinException 7 | 8 | object MultiConnectorCreator { 9 | val REGISTRATOR = "registrar" 10 | val CONFLUENT_SINGLE_OBJECT_ENCODING = "confluent-single-object-encoding" 11 | val STANDARD_SINGLE_OBJECT_ENCODING = "standard-single-object-encoding" 12 | } 13 | class MultiConnectorCreator extends ConnectorCreator { 14 | 15 | /** 16 | * @return the name of the Connector 17 | */ 18 | override def name(): String = "multi" 19 | 20 | private def mergeConf(conf: Config, path: String): Config = { 21 | 22 | conf 23 | .getConfig(path) 24 | .entrySet() 25 | .toScala() 26 | .map(_.getKey) 27 | .foldLeft(conf)((z, x) => z.withValue(x, conf.getValue(path + "." + x))) 28 | } 29 | 30 | override def create(config: Config): Connector = { 31 | val registrarName = 32 | config.getString(MultiConnectorCreator.REGISTRATOR) 33 | 34 | val confluentConnectorType = 35 | if (config.hasPath(MultiConnectorCreator.CONFLUENT_SINGLE_OBJECT_ENCODING)) { 36 | Some(config.getString(MultiConnectorCreator.CONFLUENT_SINGLE_OBJECT_ENCODING)) 37 | } else { 38 | None 39 | } 40 | 41 | val standardConnectorTypes = config 42 | .getStringList(MultiConnectorCreator.STANDARD_SINGLE_OBJECT_ENCODING) 43 | .toScala() 44 | 45 | val registrar = createAndMergeConfigs(config, registrarName) 46 | 47 | val confluentConnector = 48 | confluentConnectorType.map { cName => 49 | createIfNotRegistrar(registrarName, registrar, cName, config) 50 | } 51 | 52 | val singleObjectConnectors = standardConnectorTypes.map { cName => 53 | createIfNotRegistrar(registrarName, registrar, cName, config) 54 | }.toList 55 | 56 | new MultiConnector( 57 | registrar, 58 | confluentConnector, 59 | singleObjectConnectors 60 | ) 61 | } 62 | 63 | private def createAndMergeConfigs(config: Config, registrarName: String) = { 64 | ConnectorFactory 65 | .creator(registrarName) 66 | .map(creator => creator.create(mergeConf(config, registrarName))) 67 | .getOrElse(throw new DarwinException("No connector creator for name " + registrarName)) 68 | } 69 | 70 | private def createIfNotRegistrar(registrarName: String, registrar: Connector, cName: String, config: Config) = { 71 | if (cName == registrarName) { 72 | registrar 73 | } else { 74 | ConnectorFactory 75 | .creator(cName) 76 | .map(creator => creator.create(mergeConf(config, cName))) 77 | .getOrElse(throw new DarwinException("No connector creator for name " + cName)) 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /multi-connector/src/test/scala/it/agilelab/darwin/connector/multi/MultiConnectorSpec.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.multi 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient 5 | import it.agilelab.darwin.common.ConnectorFactory 6 | import it.agilelab.darwin.connector.confluent.{ ConfluentConnector, ConfluentConnectorOptions } 7 | import it.agilelab.darwin.connector.mock.{ ConfigurationKeys, MockConnector, MockConnectorCreator } 8 | import it.agilelab.darwin.manager.LazyAvroSchemaManager 9 | import org.apache.avro.SchemaBuilder 10 | import org.scalatest.BeforeAndAfterAll 11 | import org.scalatest.flatspec.AnyFlatSpec 12 | import org.scalatest.matchers.should.Matchers 13 | 14 | import java.io.{ ByteArrayInputStream, ByteArrayOutputStream } 15 | import java.nio.file.Paths 16 | import java.nio.{ ByteBuffer, ByteOrder } 17 | import java.util 18 | import java.util.Collections 19 | 20 | class MultiConnectorSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll { 21 | private val p = Paths 22 | .get(".") 23 | .resolve("mock-connector") 24 | .resolve("src") 25 | .resolve("test") 26 | .resolve("resources") 27 | .resolve("test") 28 | 29 | private def mockConnector() = { 30 | new MockConnectorCreator().create(ConfigFactory.parseMap { 31 | new util.HashMap[String, Object] { 32 | put( 33 | ConfigurationKeys.FILES, 34 | util.Arrays.asList( 35 | p.resolve("DoesNotExists.avsc").toString, 36 | p.resolve("MockClassAlone.avsc").toString, 37 | p.resolve("MockClassParent.avsc").toString 38 | ) 39 | ) 40 | put(ConfigurationKeys.MODE, "permissive") 41 | } 42 | }) 43 | } 44 | 45 | private def mockConfluentConnector() = { 46 | new ConfluentConnector( 47 | options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), 1000), 48 | client = new MockSchemaRegistryClient() 49 | ) 50 | } 51 | 52 | it should "start with mock and confluent-mock connector" in { 53 | val confluent = mockConfluentConnector() 54 | val mock = mockConnector() 55 | val multiC = new MultiConnector( 56 | confluent, 57 | Some(confluent), 58 | List(mock) 59 | ) 60 | 61 | val initiallyLoaded = multiC.fullLoad() 62 | initiallyLoaded.size shouldBe 2 63 | initiallyLoaded.foreach { case (id, schema) => 64 | multiC.extractId( 65 | mock.generateAvroSingleObjectEncoded(Array.emptyByteArray, schema, ByteOrder.BIG_ENDIAN, mock.fingerprint _), 66 | ByteOrder.BIG_ENDIAN 67 | ) shouldBe id 68 | } 69 | } 70 | 71 | it should "register a schema using the registrar" in { 72 | val confluent = mockConfluentConnector() 73 | val mock = mockConnector() 74 | val multiC = new MultiConnector( 75 | confluent, 76 | Some(confluent), 77 | List(mock) 78 | ) 79 | 80 | val schemaToInsert = SchemaBuilder 81 | .record("Test") 82 | .prop("x-darwin-subject", "test-value") 83 | .fields() 84 | .requiredLong("numero") 85 | .endRecord() 86 | val manager = new LazyAvroSchemaManager(multiC, ByteOrder.BIG_ENDIAN) 87 | val id = manager.registerAll(Seq(schemaToInsert)).head._1 88 | multiC.fullLoad().size shouldBe 3 89 | val parsedId = manager.extractId( 90 | Array(0x00: Byte) ++ 91 | ByteBuffer.wrap(Array.ofDim[Byte](4)).putInt(id.toInt).array() 92 | ) 93 | parsedId shouldBe id 94 | } 95 | 96 | it should "be created with a confluent connector and a mock one" in { 97 | val multiConnectorCreator = ConnectorFactory.creator("multi").get 98 | val connector: MultiConnector = multiConnectorCreator 99 | .create( 100 | ConfigFactory.parseString( 101 | s""" 102 | | type = "eager" 103 | | connector = "multi" 104 | | registrar = "confluent" 105 | | confluent-single-object-encoding: "confluent" 106 | | standard-single-object-encoding: ["mock"] 107 | | confluent { 108 | | endpoints: ["http://schema-registry-00:7777", "http://schema-registry-01:7777"] 109 | | max-cached-schemas: 1000 110 | | } 111 | | mock { 112 | | ${ConfigurationKeys.FILES} = [ 113 | | ${p.resolve("DoesNotExists.avsc").toString}, 114 | | ${p.resolve("MockClassAlone.avsc").toString}, 115 | | ${p.resolve("MockClassParent.avsc").toString} 116 | | ] 117 | | ${ConfigurationKeys.MODE} = "permissive" 118 | | } 119 | |""".stripMargin 120 | ) 121 | ) 122 | .asInstanceOf[MultiConnector] 123 | assert(connector.registrar.isInstanceOf[ConfluentConnector]) 124 | assert(connector.confluentConnector.exists(_.isInstanceOf[ConfluentConnector])) 125 | assert(connector.singleObjectEncodingConnectors.forall(_.isInstanceOf[MockConnector])) 126 | } 127 | 128 | it should "be created with only a mock connector" in { 129 | val multiConnectorCreator = ConnectorFactory.creator("multi").get 130 | val connector: MultiConnector = multiConnectorCreator 131 | .create( 132 | ConfigFactory.parseString( 133 | s""" 134 | | type = "eager" 135 | | connector = "multi" 136 | | registrar = "mock" 137 | | standard-single-object-encoding: ["mock"] 138 | | mock { 139 | | ${ConfigurationKeys.FILES} = [ 140 | | ${p.resolve("DoesNotExists.avsc").toString}, 141 | | ${p.resolve("MockClassAlone.avsc").toString}, 142 | | ${p.resolve("MockClassParent.avsc").toString} 143 | | ] 144 | | ${ConfigurationKeys.MODE} = "permissive" 145 | | } 146 | |""".stripMargin 147 | ) 148 | ) 149 | .asInstanceOf[MultiConnector] 150 | connector.confluentConnector shouldBe empty 151 | assert(connector.registrar.isInstanceOf[MockConnector]) 152 | assert(connector.singleObjectEncodingConnectors.forall(_.isInstanceOf[MockConnector])) 153 | } 154 | 155 | it should "extract schema and payload from confluent encoded byte array" in { 156 | val confluent = mockConfluentConnector() 157 | val mock = mockConnector() 158 | val multiC = new MultiConnector( 159 | confluent, 160 | Some(confluent), 161 | List(mock) 162 | ) 163 | val schemaToInsert = SchemaBuilder 164 | .record("Testa") 165 | .prop("x-darwin-subject", "test-value") 166 | .fields() 167 | .requiredLong("numera") 168 | .endRecord() 169 | val manager = new LazyAvroSchemaManager(multiC, ByteOrder.BIG_ENDIAN) 170 | val id = manager.registerAll(Seq(schemaToInsert)).head._1 171 | manager.extractId( 172 | manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert) 173 | ) shouldBe id 174 | 175 | manager.extractId( 176 | ByteBuffer.wrap(manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert)) 177 | ) shouldBe id 178 | 179 | val stream = new ByteArrayOutputStream() 180 | manager.generateAvroSingleObjectEncoded(stream, id)(identity) 181 | manager.extractId( 182 | new ByteArrayInputStream(stream.toByteArray) 183 | ) shouldBe Right(id) 184 | 185 | manager.extractSchema(new ByteArrayInputStream(stream.toByteArray)) shouldBe Right(schemaToInsert) 186 | 187 | val soe = ByteBuffer.wrap(manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert)) 188 | manager.retrieveSchemaAndAvroPayload(soe) shouldBe schemaToInsert 189 | 190 | manager.retrieveSchemaAndAvroPayload( 191 | ByteBuffer.wrap(manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert)) 192 | ) shouldBe schemaToInsert 193 | 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /postgres/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.postgres.PostgresConnectorCreator 2 | -------------------------------------------------------------------------------- /postgres/src/main/scala/it/agilelab/darwin/connector/postgres/ConfigurationKeys.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | object ConfigurationKeys { 4 | val TABLE: String = "table" 5 | val HOST: String = "host" 6 | val DATABASE: String = "db" 7 | val USER: String = "username" 8 | val PASSWORD: String = "password" 9 | val MODE: String = "mode" 10 | } 11 | -------------------------------------------------------------------------------- /postgres/src/main/scala/it/agilelab/darwin/connector/postgres/PostgresConnection.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | import java.sql.{ Connection, DriverManager } 4 | 5 | import com.typesafe.config.Config 6 | 7 | trait PostgresConnection { 8 | 9 | private var connectionUrl: String = "" 10 | private val driverName: String = "org.postgresql.Driver" 11 | 12 | protected def setConnectionConfig(config: Config) = { 13 | val db = config.getString(ConfigurationKeys.DATABASE) 14 | val host = config.getString(ConfigurationKeys.HOST) 15 | val user = config.getString(ConfigurationKeys.USER) 16 | val password = config.getString(ConfigurationKeys.PASSWORD) 17 | connectionUrl = s"jdbc:postgresql://$host/$db?user=$user&password=$password" 18 | } 19 | 20 | protected def getConnection: Connection = { 21 | Class.forName(driverName) 22 | val connection: Connection = DriverManager.getConnection(connectionUrl) 23 | connection 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /postgres/src/main/scala/it/agilelab/darwin/connector/postgres/PostgresConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator } 5 | 6 | class PostgresConnectorCreator extends ConnectorCreator { 7 | override def create(config: Config): Connector = new PostgresConnector(config) 8 | 9 | /** 10 | * @return the name of the Connector 11 | */ 12 | override def name(): String = "postgresql" 13 | } 14 | -------------------------------------------------------------------------------- /postgres/src/test/resources/postgres.properties: -------------------------------------------------------------------------------- 1 | host = localhost:5432 2 | db = postgres 3 | username = postgres 4 | password = mysecretpassword 5 | table = schema_registry 6 | -------------------------------------------------------------------------------- /postgres/src/test/resources/postgresmock.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "PostgresMock", 4 | "namespace" : "it.agilelab.darwin.connector.postgres", 5 | "fields" : [ { 6 | "name" : "one", 7 | "type" : "int" 8 | }, { 9 | "name" : "two", 10 | "type" : "string" 11 | }, { 12 | "name" : "three", 13 | "type" : "long" 14 | }, { 15 | "name" : "four", 16 | "type" : { 17 | "type" : "record", 18 | "name" : "Postgres2Mock", 19 | "fields" : [ { 20 | "name" : "one", 21 | "type" : "boolean" 22 | }, { 23 | "name" : "two", 24 | "type" : "long" 25 | } ] 26 | } 27 | } ] 28 | } 29 | -------------------------------------------------------------------------------- /postgres/src/test/scala/it/agilelab/darwin/connector/postgres/Postgres2Mock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | case class Postgres2Mock(one: Boolean, two: Long) 4 | -------------------------------------------------------------------------------- /postgres/src/test/scala/it/agilelab/darwin/connector/postgres/PostgresConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory, ConfigValueFactory } 4 | import it.agilelab.darwin.common.Connector 5 | import org.apache.avro.{ Schema, SchemaNormalization } 6 | import org.scalatest.BeforeAndAfterAll 7 | import ru.yandex.qatools.embed.postgresql.EmbeddedPostgres 8 | import ru.yandex.qatools.embed.postgresql.distribution.Version 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | import org.scalatest.matchers.should.Matchers 11 | 12 | class PostgresConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll { 13 | val embeddedPostgres: EmbeddedPostgres = new EmbeddedPostgres(Version.V9_6_11) 14 | 15 | override protected def beforeAll(): Unit = { 16 | super.beforeAll() 17 | val port = 5432 18 | val host = "localhost" 19 | val dbname = "postgres" 20 | val username = "postgres" 21 | val password = "mysecretpassword" 22 | embeddedPostgres.start(host, port, dbname, username, password) 23 | val config: Config = ConfigFactory.load("postgres.properties") 24 | val connector: Connector = new PostgresConnectorCreator().create(config) 25 | connector.createTable() 26 | } 27 | 28 | override protected def afterAll(): Unit = { 29 | super.afterAll() 30 | embeddedPostgres.stop() 31 | } 32 | 33 | it should "multiple insert and retrieve [No conf - OneTransaction]" in { 34 | val config: Config = ConfigFactory.load("postgres.properties") 35 | val connector: Connector = new PostgresConnectorCreator().create(config) 36 | test(connector) 37 | } 38 | 39 | it should "multiple insert and retrieve [OneTransaction]" in { 40 | val config: Config = ConfigFactory 41 | .load("postgres.properties") 42 | .withValue(ConfigurationKeys.MODE, ConfigValueFactory.fromAnyRef(OneTransaction.value)) 43 | val connector: Connector = new PostgresConnectorCreator().create(config) 44 | test(connector) 45 | } 46 | 47 | it should "multiple insert and retrieve [ExceptionDriven]" in { 48 | val config: Config = ConfigFactory 49 | .load("postgres.properties") 50 | .withValue(ConfigurationKeys.MODE, ConfigValueFactory.fromAnyRef(ExceptionDriven.value)) 51 | val connector: Connector = new PostgresConnectorCreator().create(config) 52 | test(connector) 53 | } 54 | 55 | private def test(connector: Connector) = { 56 | val outerSchema = new Schema.Parser().parse(getClass.getClassLoader.getResourceAsStream("postgresmock.avsc")) 57 | val innerSchema = outerSchema.getField("four").schema() 58 | val schemas = Seq(innerSchema, outerSchema) 59 | .map(s => SchemaNormalization.parsingFingerprint64(s) -> s) 60 | connector.insert(schemas) 61 | connector.insert(schemas) 62 | connector.insert(schemas) 63 | connector.insert(schemas) 64 | val loaded: Seq[(Long, Schema)] = connector.fullLoad() 65 | assert(loaded.size == schemas.size) 66 | assert(loaded.forall(schemas.contains)) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /postgres/src/test/scala/it/agilelab/darwin/connector/postgres/PostgresMock.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.postgres 2 | 3 | case class PostgresMock(one: Int, two: String, three: Long, four: Postgres2Mock) 4 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | /** 4 | * @author andreaL 5 | */ 6 | object Dependencies { 7 | 8 | lazy val scalatest = "org.scalatest" %% "scalatest" % "3.1.1" % "test" 9 | lazy val avro = "org.apache.avro" % "avro" % "1.8.2" 10 | lazy val typesafe_config = "com.typesafe" % "config" % "1.3.1" 11 | lazy val avro4s = "com.sksamuel.avro4s" %% "avro4s-core" % "1.8.3" 12 | lazy val hbase_server = "org.apache.hbase" % "hbase-server" % "1.2.2" % "provided" 13 | lazy val hbase_common = "org.apache.hbase" % "hbase-common" % "1.2.2" % "provided" 14 | lazy val hadoop_common = "org.apache.hadoop" % "hadoop-common" % "2.7.7" % "provided" 15 | lazy val hbase2_server = "org.apache.hbase" % "hbase-server" % "2.1.10" % "provided" 16 | lazy val hbase2_common = "org.apache.hbase" % "hbase-common" % "2.1.10" % "provided" 17 | lazy val reflections = "org.reflections" % "reflections" % "0.9.11" % Test 18 | lazy val spark_core = "org.apache.spark" %% "spark-core" % "2.4.5" % "provided" 19 | lazy val spark_sql = "org.apache.spark" %% "spark-sql" % "2.4.5" % "provided" 20 | lazy val postgres_conn = "org.postgresql" % "postgresql" % "9.3-1100-jdbc4" 21 | lazy val junit = "org.junit.jupiter" % "junit-jupiter-api" % "5.3.2" % Test 22 | lazy val mongo = "org.mongodb.scala" %% "mongo-scala-driver" % "2.8.0" % "provided" 23 | lazy val mongoTest = "de.flapdoodle.embed" % "de.flapdoodle.embed.mongo" % "2.2.0" % "test" 24 | 25 | lazy val postgres_embedded = "ru.yandex.qatools.embed" % "postgresql-embedded" % "2.10" % Test 26 | 27 | lazy val akka = Seq( 28 | "com.typesafe.akka" %% "akka-stream" % "2.5.26", 29 | "com.typesafe.akka" %% "akka-slf4j" % "2.5.26", 30 | "com.typesafe.akka" %% "akka-stream-testkit" % "2.5.26" % Test, 31 | "com.typesafe.akka" %% "akka-http" % "10.1.11", 32 | "com.typesafe.akka" %% "akka-http-testkit" % "10.1.11" % Test, 33 | "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.11" 34 | ) 35 | 36 | lazy val logback = "ch.qos.logback" % "logback-classic" % "1.2.3" 37 | 38 | //we are using hbase testing utilities so we need to resolve the jars containing the tests 39 | //these jars are not resolved with default ivy behavior, also we need to enable in settings 40 | //the resolution of transitive dependencies for jars in test scope 41 | lazy val hbaseTestDependencies = Seq( 42 | ("org.apache.hbase" % "hbase-testing-util" % "1.2.2").classifier("tests") % Test, 43 | ("org.apache.hadoop" % "hadoop-common" % "2.7.7").classifier("tests") % Test, 44 | ("org.apache.hbase" % "hbase-server" % "1.2.2").classifier("tests") % Test, 45 | ("org.apache.hbase" % "hbase" % "1.2.2") % Test, 46 | ("org.apache.hbase" % "hbase-hadoop-compat" % "1.2.2") % Test, 47 | ("org.apache.hbase" % "hbase-hadoop-compat" % "1.2.2").classifier("tests") % Test, 48 | ("org.apache.hbase" % "hbase-hadoop2-compat" % "1.2.2") % Test, 49 | ("org.apache.hbase" % "hbase-hadoop2-compat" % "1.2.2").classifier("tests") % Test, 50 | ("org.apache.hbase" % "hbase-common" % "1.2.2").classifier("tests") % Test, 51 | ("org.apache.hbase" % "hbase" % "1.2.2").classifier("tests") % Test exclude ("org.apache.hbase", "hbase"), 52 | ("org.apache.hadoop" % "hadoop-hdfs" % "2.7.7").classifier("tests") % Test, 53 | ("org.apache.hadoop" % "hadoop-hdfs" % "2.7.7") % Test 54 | ) 55 | 56 | lazy val hbase2TestDependencies = Seq( 57 | ("org.apache.hbase" % "hbase-testing-util" % "2.1.10").classifier("tests") % Test, 58 | ("org.apache.hadoop" % "hadoop-common" % "2.7.7").classifier("tests") % Test, 59 | ("org.apache.hbase" % "hbase-server" % "2.1.10").classifier("tests") % Test, 60 | ("org.apache.hbase" % "hbase" % "2.1.10") % Test, 61 | ("org.apache.hbase" % "hbase-hadoop-compat" % "2.1.10") % Test, 62 | ("org.apache.hbase" % "hbase-hadoop-compat" % "2.1.10").classifier("tests") % Test, 63 | ("org.apache.hbase" % "hbase-hadoop2-compat" % "2.1.10") % Test, 64 | ("org.apache.hbase" % "hbase-hadoop2-compat" % "2.1.10").classifier("tests") % Test, 65 | ("org.apache.hbase" % "hbase-metrics" % "2.1.10") % Test, 66 | ("org.apache.hbase" % "hbase-metrics-api" % "2.1.10") % Test, 67 | ("org.apache.hbase" % "hbase-http" % "2.1.10") % Test, 68 | ("org.apache.hbase" % "hbase-common" % "2.1.10").classifier("tests") % Test, 69 | ("org.apache.hbase" % "hbase" % "2.1.10").classifier("tests") % Test exclude ("org.apache.hbase", "hbase"), 70 | ("org.apache.hadoop" % "hadoop-hdfs" % "2.7.7").classifier("tests") % Test, 71 | ("org.apache.hadoop" % "hadoop-hdfs" % "2.7.7") % Test 72 | ) 73 | 74 | lazy val confluentSchemaRegistryDependencies = Seq( 75 | "io.confluent" % "kafka-schema-registry-client" % "4.1.4", //this version is compatible with java7 76 | "org.apache.kafka" % "kafka-clients" % "2.2.2-cp3" % Provided 77 | ) 78 | 79 | lazy val wireMock = 80 | Seq("com.github.tomakehurst" % "wiremock-jre8" % "2.21.0" % Test, "xmlunit" % "xmlunit" % "1.6" % Test) 81 | 82 | lazy val restServer = core_deps ++ Seq(logback) ++ akka 83 | lazy val core_deps = Seq(scalatest, avro, typesafe_config, junit) 84 | lazy val mock_app_dep = core_deps ++ Seq(reflections, hbase_common) 85 | lazy val mock_conn = core_deps ++ Seq(reflections) 86 | lazy val hbase_conn_dep = core_deps ++ Seq(hbase_common, hbase_server, hadoop_common) 87 | lazy val hbase2_conn_dep = core_deps ++ Seq(hbase2_common, hbase2_server, hadoop_common) 88 | lazy val postgres_conn_dep = core_deps :+ postgres_conn :+ postgres_embedded 89 | lazy val spark_app = mock_app_dep ++ Seq(spark_core, spark_sql, hbase_common) 90 | lazy val mongo_conn = core_deps ++ Seq(mongo, mongoTest) 91 | 92 | } 93 | -------------------------------------------------------------------------------- /project/Settings.scala: -------------------------------------------------------------------------------- 1 | import org.scalastyle.sbt.ScalastylePlugin.autoImport._ 2 | import sbt.Keys._ 3 | import sbt.{ Def, _ } 4 | 5 | /** 6 | * @author andreaL 7 | */ 8 | object Settings { 9 | 10 | val SCALA_210 = Some((2L, 10L)) 11 | val SCALA_211 = Some((2L, 11L)) 12 | val SCALA_212 = Some((2L, 12L)) 13 | val SCALA_213 = Some((2L, 13L)) 14 | 15 | def scalacOptionsVersion(scalaVersion: String): Seq[String] = { 16 | Seq( 17 | "-deprecation", 18 | "-feature", 19 | "-unchecked", 20 | "-Xlint", 21 | "-Ywarn-dead-code", 22 | "-encoding", 23 | "UTF-8" 24 | ) ++ { 25 | CrossVersion.partialVersion(scalaVersion) match { 26 | case SCALA_210 => 27 | Seq("-target:jvm-1.7", "-Ywarn-inaccessible") 28 | case SCALA_211 => 29 | Seq("-Xfatal-warnings", "-Ywarn-inaccessible", "-Ywarn-unused-import", "-Ywarn-infer-any", "-target:jvm-1.7") 30 | case SCALA_212 => 31 | Seq("-Xfatal-warnings", "-Ywarn-inaccessible", "-Ywarn-unused-import", "-Ywarn-infer-any", "-target:jvm-1.8") 32 | case SCALA_213 => 33 | Seq("-Xfatal-warnings", "-Xlint:inaccessible", "-Ywarn-unused:imports", "-Xlint:infer-any", "-target:jvm-1.8") 34 | case version: Option[(Long, Long)] => 35 | throw new Exception(s"Unknown scala version: $version") 36 | } 37 | } 38 | } 39 | 40 | def scalaDocOptionsVersion(scalaVersion: String): Seq[String] = { 41 | CrossVersion.partialVersion(scalaVersion) match { 42 | case SCALA_210 | SCALA_211 => scalacOptionsVersion(scalaVersion) 43 | case SCALA_212 => scalacOptionsVersion(scalaVersion) ++ Seq("-no-java-comments") 44 | case SCALA_213 => scalacOptionsVersion(scalaVersion) ++ Seq("-no-java-comments") 45 | case version: Option[(Long, Long)] => throw new Exception(s"Unknown scala version: $version") 46 | } 47 | } 48 | 49 | def javacOptionsVersion(scalaVersion: String): Seq[String] = { 50 | CrossVersion.partialVersion(scalaVersion) match { 51 | case SCALA_210 => 52 | Seq("-source", "1.7", "-target", "1.7") 53 | case SCALA_211 => 54 | Seq("-source", "1.7", "-target", "1.7") 55 | case SCALA_212 => 56 | Seq("-source", "1.8", "-target", "1.8") 57 | case SCALA_213 => 58 | Seq("-source", "1.8", "-target", "1.8") 59 | case version: Option[(Long, Long)] => 60 | throw new Exception(s"Unknown scala version: $version") 61 | } 62 | } 63 | 64 | lazy val projectSettings = Seq( 65 | organization := "it.agilelab", 66 | licenses += ("Apache-2.0", url("https://www.apache.org/licenses/LICENSE-2.0.txt")), 67 | homepage := Some(url("https://github.com/agile-lab-dev/darwin")), 68 | description := "Avro Schema Evolution made easy", 69 | javacOptions ++= javacOptionsVersion(scalaVersion.value), 70 | scalacOptions ++= scalacOptionsVersion(scalaVersion.value), 71 | Compile / doc / scalacOptions ++= scalaDocOptionsVersion(scalaVersion.value), 72 | versionScheme := Some("early-semver"), 73 | useCoursier := false, 74 | developers := List( 75 | Developer("amurgia", "Antonio Murgia", "antonio.murgia@agilelab.it", url("https://github.com/tmnd1991")), 76 | Developer("lpirazzini", "Lorenzo Pirazzini", "lorenzo.pirazzini@agilelab.it", url("https://github.com/SpyQuel")), 77 | Developer("rcoluccio", "Roberto Coluccio", "roberto.coluccio@agilelab.it", url("https://github.com/erond")), 78 | Developer("alatella", "Andrea Latella", "andrea.latella@agilelab.it", url("https://github.com/andr3a87")), 79 | Developer("cventrella", "Carlo Ventrella", "carlo.ventrella@agilelab.it", url("https://www.agilelab.it")), 80 | Developer("dicardi", "Davide Icardi", "davide.icardi@agilelab.it", url("https://github.com/davideicardi")), 81 | Developer("nbidotti", "Nicolò Bidotti", "nicolo.bidotti@agilelab.it", url("https://github.com/nicolobidotti")), 82 | Developer("andrea-rockt", "Andrea Fonti", "andrea.fonti@agilelab.it", url("https://github.com/andrea-rockt")) 83 | ) 84 | ) 85 | 86 | val clouderaHadoopReleaseRepo = "cloudera" at "https://repository.cloudera.com/artifactory/cloudera-repos/" 87 | val confluent = "confluent" at "https://packages.confluent.io/maven/" 88 | 89 | lazy val customResolvers = Seq( 90 | clouderaHadoopReleaseRepo, 91 | confluent 92 | ) 93 | 94 | lazy val buildSettings: Seq[SettingsDefinition] = { 95 | //this is an hack to resolve correctly rs-api 96 | // [warn] [FAILED ] javax.ws.rs#javax.ws.rs-api;2.1!javax.ws.rs-api.${packaging.type}: (0ms) 97 | // https://github.com/sbt/sbt/issues/3618 98 | sys.props += "packaging.type" -> "jar" 99 | Seq( 100 | resolvers ++= customResolvers, 101 | scalaVersion := Versions.scala 102 | ) 103 | } 104 | 105 | lazy val commonSettings = projectSettings ++ buildSettings ++ scalastyleSettings 106 | 107 | lazy val hbaseTestSettings: SettingsDefinition = { 108 | //enable resolution of transitive dependencies of jars containing tests 109 | //needed to run tests over hbase minicluster 110 | Test / transitiveClassifiers := Seq(Artifact.TestsClassifier, Artifact.SourceClassifier) 111 | libraryDependencies ++= Dependencies.hbaseTestDependencies 112 | } 113 | 114 | lazy val hbase2TestSettings: SettingsDefinition = { 115 | //enable resolution of transitive dependencies of jars containing tests 116 | //needed to run tests over hbase minicluster 117 | Test / transitiveClassifiers := Seq(Artifact.TestsClassifier, Artifact.SourceClassifier) 118 | libraryDependencies ++= Dependencies.hbase2TestDependencies 119 | } 120 | 121 | lazy val notPublishSettings = Seq(publish / skip := true) 122 | 123 | lazy val scalastyleSettings = Seq(scalastyleFailOnWarning := true) 124 | } 125 | -------------------------------------------------------------------------------- /project/Versions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * @author andreaL 3 | */ 4 | object Versions { 5 | val scala_211 = "2.11.12" 6 | val scala_210 = "2.10.7" 7 | val scala = "2.12.13" 8 | val scala_213 = "2.13.5" 9 | val crossScalaVersions = Seq(scala_210, scala_211, scala, scala_213) 10 | } 11 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.9.8 2 | -------------------------------------------------------------------------------- /project/plugin.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") 2 | addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12") 3 | -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CI_RELEASE='+publishSigned;+darwin-hbase2-connector/publishSigned' 3 | export CI_SNAPSHOT_RELEASE='+publish;+darwin-hbase2-connector/publish' 4 | sbt -v ci-release 5 | -------------------------------------------------------------------------------- /rest-server/src/main/postman/darwinrest.postman_collection.json: -------------------------------------------------------------------------------- 1 | { 2 | "info": { 3 | "_postman_id": "dcaadeb7-ecb8-4bc9-9d4d-47fe6a2857df", 4 | "name": "darwinrest", 5 | "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" 6 | }, 7 | "item": [ 8 | { 9 | "name": "get-all-schemas", 10 | "request": { 11 | "method": "GET", 12 | "header": [], 13 | "body": { 14 | "mode": "raw", 15 | "raw": "" 16 | }, 17 | "url": { 18 | "raw": "localhost:8080/schemas/", 19 | "host": [ 20 | "localhost" 21 | ], 22 | "port": "8080", 23 | "path": [ 24 | "schemas", 25 | "" 26 | ] 27 | } 28 | }, 29 | "response": [] 30 | }, 31 | { 32 | "name": "get-one-schema", 33 | "request": { 34 | "method": "GET", 35 | "header": [], 36 | "body": { 37 | "mode": "raw", 38 | "raw": "" 39 | }, 40 | "url": { 41 | "raw": "localhost:8080/schemas/1", 42 | "host": [ 43 | "localhost" 44 | ], 45 | "port": "8080", 46 | "path": [ 47 | "schemas", 48 | "1" 49 | ] 50 | } 51 | }, 52 | "response": [] 53 | }, 54 | { 55 | "name": "post-a-schema", 56 | "request": { 57 | "method": "POST", 58 | "header": [ 59 | { 60 | "key": "Content-Type", 61 | "name": "Content-Type", 62 | "value": "application/json", 63 | "type": "text" 64 | } 65 | ], 66 | "body": { 67 | "mode": "raw", 68 | "raw": "[{\n\t\"type\": \"array\",\n\t\"items\": \"string\"\n}]" 69 | }, 70 | "url": { 71 | "raw": "localhost:8080/schemas/", 72 | "host": [ 73 | "localhost" 74 | ], 75 | "port": "8080", 76 | "path": [ 77 | "schemas", 78 | "" 79 | ] 80 | } 81 | }, 82 | "response": [] 83 | } 84 | ] 85 | } -------------------------------------------------------------------------------- /rest-server/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | akka { 2 | loggers = ["akka.event.slf4j.Slf4jLogger"] 3 | loglevel = "DEBUG" 4 | logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" 5 | } 6 | 7 | darwin { 8 | type = "lazy" 9 | connector = "mock" 10 | } 11 | 12 | darwin-rest{ 13 | interface = "localhost" 14 | port = 8080 15 | } -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/DarwinService.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.actor.ActorSystem 4 | import akka.http.scaladsl.model.{ HttpResponse, StatusCodes } 5 | import akka.http.scaladsl.server.directives.DebuggingDirectives 6 | import akka.http.scaladsl.server.{ Directives, Route } 7 | import akka.stream.ActorMaterializer 8 | import akka.stream.Attributes.LogLevels 9 | import it.agilelab.darwin.manager.AvroSchemaManager 10 | import org.apache.avro.Schema 11 | 12 | trait DarwinService extends Service with Directives with DebuggingDirectives with JsonSupport { 13 | 14 | val manager: AvroSchemaManager 15 | 16 | override def route: Route = logRequestResult(("darwin", LogLevels.Debug)) { 17 | get { 18 | path("schemas" / LongNumber.?) { 19 | case Some(id) => 20 | manager.getSchema(id) match { 21 | case Some(schema) => complete(schema) 22 | case None => 23 | complete { 24 | HttpResponse(StatusCodes.NotFound) 25 | } 26 | } 27 | case None => complete(manager.getAll) 28 | } 29 | } ~ post { 30 | path("schemas" / PathEnd) { 31 | entity(as[Seq[Schema]]) { schemas => 32 | complete { 33 | manager.registerAll(schemas).map(_._1) 34 | } 35 | } 36 | } 37 | } 38 | } 39 | } 40 | 41 | object DarwinService { 42 | def apply(asm: AvroSchemaManager)(implicit s: ActorSystem, m: ActorMaterializer): DarwinService = new DarwinService { 43 | implicit override val materializer: ActorMaterializer = m 44 | implicit override val system: ActorSystem = s 45 | override val manager: AvroSchemaManager = asm 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/HttpApp.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import java.util.concurrent.Executor 4 | 5 | import akka.actor.ActorSystem 6 | import akka.http.scaladsl.Http 7 | import akka.http.scaladsl.server.RouteConcatenation 8 | import akka.stream.ActorMaterializer 9 | import com.typesafe.config.Config 10 | import it.agilelab.darwin.common.Logging 11 | 12 | import scala.concurrent.duration.Duration 13 | import scala.concurrent.{ Await, ExecutionContext, ExecutionContextExecutor } 14 | 15 | class HttpApp(config: Config, services: Service*)(implicit system: ActorSystem, materializer: ActorMaterializer) 16 | extends Logging { 17 | def run(): Unit = { 18 | val interface = config.getString("interface") 19 | val port = config.getInt("port") 20 | 21 | val route = RouteConcatenation.concat(services.map(_.route): _*) 22 | 23 | log.info("Starting http server on {}:{}", interface, port) 24 | val eventuallyBinding = Http().bindAndHandle(route, interface, port) 25 | val binding = Await.result(eventuallyBinding, Duration.Inf) 26 | log.info("Started http server on {}:{}", interface, port) 27 | 28 | val shutdownThread = new Thread(new Runnable { 29 | override def run(): Unit = { 30 | implicit val ec: ExecutionContext = newSameThreadExecutor 31 | log.info("Received shutdown hook") 32 | 33 | val termination = for { 34 | _ <- binding.unbind() 35 | terminated <- system.terminate() 36 | } yield terminated 37 | 38 | Await.ready(termination, Duration.Inf) 39 | log.info("Shutdown") 40 | } 41 | }) 42 | 43 | shutdownThread.setName("shutdown") 44 | 45 | Runtime.getRuntime.addShutdownHook(shutdownThread) 46 | 47 | log.info("registered shutdown hook") 48 | } 49 | 50 | private def newSameThreadExecutor: ExecutionContextExecutor = ExecutionContext.fromExecutor(new Executor { 51 | override def execute(command: Runnable): Unit = command.run() 52 | }) 53 | } 54 | 55 | object HttpApp { 56 | def apply(config: Config, services: Service*)(implicit 57 | system: ActorSystem, 58 | materializer: ActorMaterializer 59 | ): HttpApp = 60 | new HttpApp(config, services: _*) 61 | } 62 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/JsonSupport.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport 4 | import org.apache.avro.Schema 5 | import spray.json.{ DefaultJsonProtocol, JsObject, JsString, JsValue, JsonParser, PrettyPrinter, RootJsonFormat } 6 | 7 | trait JsonSupport extends SprayJsonSupport with DefaultJsonProtocol { 8 | implicit val printer: PrettyPrinter.type = PrettyPrinter 9 | 10 | implicit val schemaFormat: RootJsonFormat[Schema] = new RootJsonFormat[Schema] { 11 | 12 | override def write(obj: Schema): JsValue = JsonParser(obj.toString(true)) 13 | 14 | override def read(json: JsValue): Schema = new Schema.Parser().parse(json.prettyPrint) 15 | } 16 | 17 | implicit val schemaWithIdFormat: RootJsonFormat[(Long, Schema)] = new RootJsonFormat[(Long, Schema)] { 18 | 19 | override def write(obj: (Long, Schema)): JsValue = JsObject( 20 | Map( 21 | "id" -> JsString(obj._1.toString), 22 | "schema" -> schemaFormat.write(obj._2) 23 | ) 24 | ) 25 | 26 | override def read(json: JsValue): (Long, Schema) = json match { 27 | case JsObject(fields) => 28 | val id = fields.get("id") match { 29 | case Some(JsString(number)) => number 30 | case _ => throw new Exception("Id field should be a long") 31 | } 32 | 33 | val schema = fields.get("schema") match { 34 | case Some(x @ JsObject(_)) => x 35 | case _ => throw new Exception("schema should be an object") 36 | } 37 | 38 | (id.toLong, schemaFormat.read(schema)) 39 | case _ => throw new Exception("should be an object") 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/Main.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.actor.ActorSystem 4 | import akka.stream.ActorMaterializer 5 | import com.typesafe.config.ConfigFactory 6 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory 7 | 8 | object Main { 9 | 10 | def main(args: Array[String]): Unit = { 11 | 12 | implicit val actorSystem: ActorSystem = ActorSystem() 13 | implicit val materializer: ActorMaterializer = ActorMaterializer() 14 | 15 | val config = ConfigFactory.load() 16 | val schemaManagerConfig = config.getConfig("darwin") 17 | val restConfig = config.getConfig("darwin-rest") 18 | val schemaManager = AvroSchemaManagerFactory.initialize(schemaManagerConfig) 19 | 20 | HttpApp(restConfig, DarwinService(schemaManager)).run() 21 | 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /rest-server/src/main/scala/it/agilelab/darwin/server/rest/Service.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.server.rest 2 | 3 | import akka.actor.ActorSystem 4 | import akka.http.scaladsl.server.Route 5 | import akka.stream.ActorMaterializer 6 | 7 | trait Service { 8 | implicit val system: ActorSystem 9 | implicit val materializer: ActorMaterializer 10 | 11 | def route: Route 12 | } 13 | -------------------------------------------------------------------------------- /rest/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator: -------------------------------------------------------------------------------- 1 | it.agilelab.darwin.connector.rest.RestConnectorCreator 2 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/JsonProtocol.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import java.io.InputStream 4 | 5 | import org.apache.avro.Schema 6 | import org.codehaus.jackson.map.ObjectMapper 7 | import org.codehaus.jackson.node.JsonNodeFactory 8 | import it.agilelab.darwin.common.compat._ 9 | 10 | trait JsonProtocol { 11 | val objectMapper = new ObjectMapper() 12 | 13 | def toJson(schemas: Seq[(Long, Schema)]): String = { 14 | 15 | val data = schemas.map { case (_, schema) => 16 | objectMapper.readTree(schema.toString) 17 | }.foldLeft(JsonNodeFactory.instance.arrayNode()) { case (array, node) => 18 | array.add(node) 19 | array 20 | } 21 | 22 | objectMapper.writeValueAsString(data) 23 | } 24 | 25 | def toSeqOfIdSchema(in: InputStream): Seq[(Long, Schema)] = { 26 | val node = objectMapper.readTree(in) 27 | 28 | node.getElements 29 | .toScala() 30 | .map { node => 31 | val id = node.get("id").asText().toLong 32 | val schemaNode = node.get("schema") 33 | 34 | val schemaToString = objectMapper.writeValueAsString(schemaNode) 35 | 36 | val parser = new Schema.Parser() 37 | 38 | val schema = parser.parse(schemaToString) 39 | 40 | (id, schema) 41 | } 42 | .toVector 43 | } 44 | 45 | def toSchema(in: InputStream): Schema = { 46 | val parser = new Schema.Parser() 47 | parser.parse(in) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnector.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.Connector 5 | import org.apache.avro.Schema 6 | import scalaj.http.Http 7 | 8 | class RestConnector(options: RestConnectorOptions, config: Config) extends Connector with JsonProtocol { 9 | 10 | override def fullLoad(): Seq[(Long, Schema)] = { 11 | Http(options.endpoint("schemas/")).execute(toSeqOfIdSchema).body 12 | } 13 | 14 | override def insert(schemas: Seq[(Long, Schema)]): Unit = { 15 | 16 | val response = Http(options.endpoint("schemas/")) 17 | .header("Content-Type", "application/json") 18 | .postData(toJson(schemas)) 19 | .asString 20 | 21 | if (response.isError) { 22 | throw new Exception(response.body) 23 | } 24 | 25 | } 26 | 27 | override def createTable(): Unit = {} 28 | 29 | override def tableExists(): Boolean = true 30 | 31 | override def tableCreationHint(): String = "" 32 | 33 | override def findSchema(id: Long): Option[Schema] = { 34 | 35 | val response = Http(options.endpoint(s"schemas/$id")).execute(toSchema) 36 | 37 | if (response.code == 404) { 38 | None 39 | } else { 40 | Some(response.body) 41 | } 42 | } 43 | 44 | /** 45 | * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id). 46 | * This API might not be implemented by all connectors, which should return None 47 | */ 48 | override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None 49 | } 50 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnectorCreator.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import com.typesafe.config.Config 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging } 5 | 6 | class RestConnectorCreator extends ConnectorCreator with Logging { 7 | 8 | override def create(config: Config): Connector = { 9 | log.debug("creating rest connector") 10 | 11 | val restOptions = RestConnectorOptions.fromConfig(config) 12 | log.info("rest options are {}", restOptions) 13 | 14 | val rest = new RestConnector(restOptions, config) 15 | log.debug("created rest connector") 16 | rest 17 | } 18 | 19 | /** 20 | * @return the name of the Connector 21 | */ 22 | override def name(): String = "rest" 23 | } 24 | -------------------------------------------------------------------------------- /rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnectorOptions.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import java.net.URI 4 | 5 | import com.typesafe.config.Config 6 | 7 | case class RestConnectorOptions(protocol: String, host: String, port: Int, basePath: String) { 8 | def endpoint(path: String): String = 9 | URI.create(s"$protocol://$host:$port").resolve(basePath).resolve(path).toString 10 | } 11 | 12 | object RestConnectorOptions { 13 | 14 | private val PROTOCOL = "protocol" 15 | private val HOST = "host" 16 | private val PORT = "port" 17 | private val BASE_PATH = "basePath" 18 | 19 | def fromConfig(config: Config): RestConnectorOptions = 20 | RestConnectorOptions( 21 | config.getString(PROTOCOL), 22 | config.getString(HOST), 23 | config.getInt(PORT), 24 | config.getString(BASE_PATH) 25 | ) 26 | } 27 | -------------------------------------------------------------------------------- /rest/src/main/scala/scalaj/http/DigestAuth.scala: -------------------------------------------------------------------------------- 1 | package scalaj.http 2 | // scalastyle:off 3 | import java.nio.charset.StandardCharsets 4 | import java.security.MessageDigest 5 | import java.util.Locale 6 | 7 | import scala.collection.immutable.VectorBuilder 8 | import scala.util.Random 9 | 10 | case class WwwAuthenticate(authType: String, params: Map[String, String]) 11 | object DigestAuth { 12 | 13 | def trimQuotes(str: String): String = { 14 | if (str.length >= 2 && str.charAt(0) == '"' && str.charAt(str.length - 1) == '"') { 15 | str.substring(1, str.length - 1) 16 | } else { 17 | str 18 | } 19 | } 20 | 21 | // need to parse one char at a time rather than split on comma because values can be 22 | // quoted comma separated strings 23 | def splitParams(params: String): IndexedSeq[String] = { 24 | val builder = new VectorBuilder[String]() 25 | var start = 0 26 | var i = 0 27 | var quotes = 0 28 | while (i < params.length) { 29 | params.charAt(i) match { 30 | case '\\' => i += 1 31 | case '"' => quotes += 1 32 | case ',' => 33 | if (quotes % 2 == 0) { 34 | val item = params.substring(start, i).trim() 35 | if (item.length > 0) { 36 | builder += item 37 | } 38 | start = i + 1 39 | } 40 | case _ => // nada 41 | } 42 | i += 1 43 | } 44 | builder += params.substring(start).trim() 45 | builder.result() 46 | } 47 | 48 | def getAuthDetails(headerValue: String): Option[WwwAuthenticate] = { 49 | headerValue.indexOf(' ') match { 50 | case indexOfSpace if indexOfSpace > 0 => 51 | val authType = headerValue.substring(0, indexOfSpace) 52 | val params: Map[String, String] = splitParams(headerValue.substring(indexOfSpace + 1)) 53 | .flatMap(param => { 54 | param.split("=", 2) match { 55 | case Array(key, value) => Some(key.trim.toLowerCase(Locale.ENGLISH) -> trimQuotes(value.trim)) 56 | case _ => None 57 | } 58 | }) 59 | .toMap 60 | Some(WwwAuthenticate(authType, params)) 61 | case _ => None 62 | } 63 | } 64 | 65 | val HexArray = "0123456789abcdef".toCharArray() 66 | 67 | def hex(bytes: Array[Byte]): String = { 68 | val hexChars = new Array[Char](bytes.length * 2) 69 | var j = 0 70 | while (j < bytes.length) { 71 | val v = bytes(j) & 0xff 72 | hexChars(j * 2) = HexArray(v >>> 4) 73 | hexChars(j * 2 + 1) = HexArray(v & 0x0f) 74 | j += 1 75 | } 76 | new String(hexChars) 77 | } 78 | 79 | val DigestPrefix = "Digest" 80 | 81 | def createHeaderValue( 82 | username: String, 83 | password: String, 84 | method: String, 85 | uri: String, 86 | content: Array[Byte], 87 | serverParams: Map[String, String], 88 | testClientNonce: Option[String] = None 89 | ): Option[String] = { 90 | val algorithm = serverParams.getOrElse("algorithm", "MD5") 91 | val digester = Option(MessageDigest.getInstance(algorithm)).getOrElse( 92 | throw new Exception("unsupported digest algorithm" + algorithm) 93 | ) 94 | def hexDigest(str: String): String = hex(digester.digest(str.getBytes(StandardCharsets.ISO_8859_1))) 95 | for { 96 | realm <- serverParams.get("realm") 97 | nonce <- serverParams.get("nonce") 98 | } yield { 99 | val qopOpt: Option[String] = serverParams 100 | .get("qop") 101 | .flatMap(serverQop => { 102 | val serverQopValues = serverQop.split(',').map(_.trim) 103 | if (serverQopValues.contains("auth")) Some("auth") 104 | else if (serverQopValues.contains("auth-int")) Some("auth-int") 105 | else None 106 | }) 107 | val a1 = username + ":" + realm + ":" + password 108 | val hashA1: String = hexDigest(a1) 109 | val a2 = method + ":" + uri + { 110 | if (qopOpt.exists(_ == "auth-int")) ":" + hex(digester.digest(content)) else "" 111 | } 112 | val hashA2: String = hexDigest(a2) 113 | 114 | val (nonceCountOpt, clientNonceOpt, a3) = qopOpt match { 115 | case Some(qop) => 116 | val nc = "00000001" 117 | val clientNonce = testClientNonce.getOrElse({ 118 | val bytes = new Array[Byte](16) 119 | Random.nextBytes(bytes) 120 | hex(bytes) 121 | }) 122 | val a3 = hashA1 + ":" + nonce + ":" + nc + ":" + clientNonce + ":" + qop + ":" + hashA2 123 | (Some(nc), Some(clientNonce), a3) 124 | case _ => 125 | (None, None, hashA1 + ":" + nonce + ":" + hashA2) 126 | } 127 | val hashA3: String = hexDigest(a3) 128 | val sb = new StringBuilder(DigestPrefix).append(" ") 129 | def appendQuoted(key: String, value: String): StringBuilder = { 130 | sb.append(key + "=\"").append(value).append("\"") 131 | } 132 | appendQuoted("username", username).append(", ") 133 | appendQuoted("realm", realm).append(", ") 134 | appendQuoted("nonce", nonce).append(", ") 135 | serverParams 136 | .get("opaque") 137 | .foreach(opaque => { 138 | appendQuoted("opaque", opaque).append(", ") 139 | }) 140 | appendQuoted("algorithm", algorithm).append(", ") 141 | appendQuoted("uri", uri).append(", ") 142 | for { 143 | qop <- qopOpt 144 | nonceCount <- nonceCountOpt 145 | clientNonce <- clientNonceOpt 146 | } { 147 | appendQuoted("qop", qop).append(", ") 148 | appendQuoted("nc", nonceCount).append(", ") 149 | appendQuoted("cnonce", clientNonce).append(", ") 150 | } 151 | appendQuoted("response", hashA3) 152 | sb.toString() 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /rest/src/main/scala/scalaj/http/OAuth.scala: -------------------------------------------------------------------------------- 1 | package scalaj.http 2 | // scalastyle:off 3 | /** scalaj.http 4 | * Copyright 2010 Jonathan Hoffman 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import java.net.URL 20 | 21 | case class Token(key: String, secret: String) 22 | 23 | /** utility methods used by [[scalaj.http.HttpRequest]] */ 24 | object OAuth { 25 | import java.net.URI 26 | import javax.crypto.Mac 27 | import javax.crypto.spec.SecretKeySpec 28 | val MAC = "HmacSHA1" 29 | 30 | def sign(req: HttpRequest, consumer: Token, token: Option[Token], verifier: Option[String]): HttpRequest = { 31 | req.option(conn => { 32 | val baseParams: Seq[(String, String)] = Seq( 33 | ("oauth_timestamp", (System.currentTimeMillis / 1000).toString), 34 | ("oauth_nonce", System.currentTimeMillis.toString) 35 | ) 36 | 37 | var (oauthParams, signature) = getSig(baseParams, req, consumer, token, verifier) 38 | 39 | oauthParams +:= (("oauth_signature", signature)) 40 | conn.setRequestProperty( 41 | "Authorization", 42 | "OAuth " + oauthParams.map(p => p._1 + "=\"" + percentEncode(p._2) + "\"").mkString(",") 43 | ) 44 | }) 45 | } 46 | 47 | def getSig( 48 | baseParams: Seq[(String, String)], 49 | req: HttpRequest, 50 | consumer: Token, 51 | token: Option[Token], 52 | verifier: Option[String] 53 | ): (Seq[(String, String)], String) = { 54 | var oauthParams = ("oauth_version", "1.0") +: ("oauth_consumer_key", consumer.key) +: ( 55 | "oauth_signature_method", 56 | "HMAC-SHA1" 57 | ) +: baseParams 58 | 59 | token.foreach { t => 60 | oauthParams +:= (("oauth_token", t.key)) 61 | } 62 | 63 | verifier.foreach { v => 64 | oauthParams +:= (("oauth_verifier", v)) 65 | } 66 | // oauth1.0 specifies that only querystring and x-www-form-urlencoded body parameters should be included in signature 67 | // req.params from multi-part requests are included in the multi-part request body and should NOT be included 68 | val allTheParams = if (req.connectFunc.isInstanceOf[MultiPartConnectFunc]) { 69 | oauthParams 70 | } else { 71 | req.params ++ oauthParams 72 | } 73 | 74 | val baseString = Seq(req.method.toUpperCase, normalizeUrl(new URL(req.url)), normalizeParams(allTheParams)) 75 | .map(percentEncode) 76 | .mkString("&") 77 | 78 | val keyString = percentEncode(consumer.secret) + "&" + token.map(t => percentEncode(t.secret)).getOrElse("") 79 | val key = new SecretKeySpec(keyString.getBytes(HttpConstants.utf8), MAC) 80 | val mac = Mac.getInstance(MAC) 81 | mac.init(key) 82 | val text = baseString.getBytes(HttpConstants.utf8) 83 | (oauthParams, HttpConstants.base64(mac.doFinal(text))) 84 | } 85 | 86 | private def normalizeParams(params: Seq[(String, String)]) = { 87 | percentEncode(params).sortWith(_ < _).mkString("&") 88 | } 89 | 90 | private def normalizeUrl(url: URL) = { 91 | val uri = new URI(url.toString) 92 | val scheme = uri.getScheme().toLowerCase() 93 | var authority = uri.getAuthority().toLowerCase() 94 | val dropPort = (scheme.equals("http") && uri.getPort() == 80) || (scheme.equals("https") && uri.getPort() == 443) 95 | if (dropPort) { 96 | // find the last : in the authority 97 | val index = authority.lastIndexOf(":") 98 | if (index >= 0) { 99 | authority = authority.substring(0, index) 100 | } 101 | } 102 | var path = uri.getRawPath() 103 | if (path == null || path.length() <= 0) { 104 | path = "/" // conforms to RFC 2616 section 3.2.2 105 | } 106 | // we know that there is no query and no fragment here. 107 | scheme + "://" + authority + path 108 | } 109 | 110 | def percentEncode(params: Seq[(String, String)]): Seq[String] = { 111 | params.map(p => percentEncode(p._1) + "=" + percentEncode(p._2)) 112 | } 113 | 114 | def percentEncode(s: String): String = { 115 | if (s == null) "" 116 | else { 117 | HttpConstants.urlEncode(s, HttpConstants.utf8).replace("+", "%20").replace("*", "%2A").replace("%7E", "~") 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /rest/src/test/scala/it/agilelab/darwin/connector/rest/RestConnectorSuite.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.connector.rest 2 | 3 | import com.github.tomakehurst.wiremock.WireMockServer 4 | import com.github.tomakehurst.wiremock.client.WireMock._ 5 | import com.github.tomakehurst.wiremock.core.WireMockConfiguration 6 | import com.typesafe.config.ConfigFactory 7 | import org.apache.avro.{ Schema, SchemaBuilder } 8 | import org.scalatest.{ BeforeAndAfterEach, OptionValues } 9 | import org.scalatest.flatspec.AnyFlatSpec 10 | 11 | class RestConnectorSuite extends AnyFlatSpec with BeforeAndAfterEach with OptionValues { 12 | 13 | private val wireMockServer = new WireMockServer(WireMockConfiguration.wireMockConfig().dynamicPort()) 14 | 15 | private def config(port: Int) = ConfigFactory.parseString(s""" 16 | | protocol: "http" 17 | | host: "localhost" 18 | | port: ${wireMockServer.port()} 19 | | basePath: "/" 20 | """.stripMargin) 21 | 22 | override def beforeEach(): Unit = { 23 | wireMockServer.start() 24 | } 25 | 26 | override def afterEach(): Unit = { 27 | wireMockServer.stop() 28 | } 29 | 30 | "rest connector" should "get all schemas" in { 31 | 32 | val connector = new RestConnectorCreator().create(config(wireMockServer.port())) 33 | 34 | val schemaId1 = -3577210133426481249L 35 | val schemaId2 = 5920968314789803198L 36 | 37 | wireMockServer.stubFor { 38 | get(urlPathEqualTo("/schemas/")).willReturn { 39 | aResponse().withBody { 40 | s""" 41 | |[{ 42 | | "id": "$schemaId1", 43 | | "schema": { 44 | | "items": "string", 45 | | "type": "array" 46 | | } 47 | | }, { 48 | | "id": "$schemaId2", 49 | | "schema": { 50 | | "items": "int", 51 | | "type": "array" 52 | | } 53 | | }] 54 | """.stripMargin 55 | } 56 | } 57 | } 58 | 59 | val result = connector.fullLoad() 60 | 61 | assert(result.contains((schemaId1, SchemaBuilder.array().items(Schema.create(Schema.Type.STRING))))) 62 | assert(result.contains((schemaId2, SchemaBuilder.array().items(Schema.create(Schema.Type.INT))))) 63 | assert(result.size == 2) 64 | 65 | wireMockServer.verify { 66 | getRequestedFor(urlPathEqualTo("/schemas/")) 67 | } 68 | 69 | } 70 | 71 | "rest connector" should "get one schemas" in { 72 | 73 | val schemaId = -3577210133426481249L 74 | val connector = new RestConnectorCreator().create(config(wireMockServer.port())) 75 | 76 | wireMockServer.stubFor { 77 | get(urlPathEqualTo(s"/schemas/$schemaId")).willReturn { 78 | aResponse().withBody { 79 | """ 80 | | { 81 | | "items": "string", 82 | | "type": "array" 83 | | } 84 | """.stripMargin 85 | } 86 | } 87 | } 88 | 89 | val result = connector.findSchema(schemaId).value 90 | 91 | val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING)) 92 | 93 | assert(result == expected) 94 | 95 | wireMockServer.verify { 96 | getRequestedFor(urlPathEqualTo(s"/schemas/$schemaId")) 97 | } 98 | 99 | } 100 | 101 | "rest connector" should "post schemas" in { 102 | val connector = new RestConnectorCreator().create(config(wireMockServer.port())) 103 | 104 | val schema = SchemaBuilder.array().items(Schema.create(Schema.Type.INT)) 105 | 106 | wireMockServer.stubFor { 107 | post(urlEqualTo("/schemas/")).withHeader("Content-Type", equalTo("application/json")) 108 | } 109 | 110 | connector.insert(Seq((0, schema))) 111 | 112 | val request = """[{"type":"array","items":"int"}]""" 113 | 114 | wireMockServer.verify { 115 | postRequestedFor(urlEqualTo("/schemas/")).withRequestBody(equalTo(request)) 116 | } 117 | 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /spark-application/src/dist/conf/application.conf: -------------------------------------------------------------------------------- 1 | spark.yarn.maxAppAttempts: 1 -------------------------------------------------------------------------------- /spark-application/src/main/scala/it/agilelab/darwin/app/spark/GenericMainClass.scala: -------------------------------------------------------------------------------- 1 | package it.agilelab.darwin.app.spark 2 | 3 | import java.text.SimpleDateFormat 4 | import java.util.Date 5 | 6 | import com.typesafe.config.{ Config, ConfigFactory } 7 | import org.apache.hadoop.fs.FileSystem 8 | import org.apache.spark.sql.SparkSession 9 | import org.slf4j.{ Logger, LoggerFactory } 10 | import scala.collection.JavaConverters._ 11 | 12 | trait GenericMainClass { 13 | self: SparkManager => 14 | 15 | val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager") 16 | 17 | private def makeFileSystem(session: SparkSession): FileSystem = { 18 | if (session.sparkContext.isLocal) { 19 | FileSystem.getLocal(session.sparkContext.hadoopConfiguration) 20 | } else { 21 | FileSystem.get(session.sparkContext.hadoopConfiguration) 22 | } 23 | } 24 | 25 | /** 26 | * @param settings configuration loaded from multiple ".conf" files: the default ones as per typesafe Config and 27 | * another ".conf" file that has the same name as the application 28 | * @param fs the default file system of the application executed context 29 | * @param sparkSession the sparkSession that has been created and will be used in the application 30 | * @return true if the application ends successfully false otherwise 31 | */ 32 | protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int 33 | 34 | /** 35 | * Override in order to handle specific exceptions 36 | */ 37 | protected def handleException(exception: Throwable, applicationSettings: Config) 38 | 39 | /** 40 | * It executes the following ordered steps: 41 | *