├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .jvmopts
├── .scalafmt.conf
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── build.sbt
├── bump-version.sh
├── common
    └── src
    │   ├── main
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               ├── common
    │       │                   ├── Connector.scala
    │       │                   ├── ConnectorCreator.scala
    │       │                   ├── ConnectorFactory.scala
    │       │                   ├── DarwinConcurrentHashMap.scala
    │       │                   ├── JavaVersion.scala
    │       │                   ├── Logging.scala
    │       │                   ├── SchemaReader.scala
    │       │                   ├── compat
    │       │                   │   └── package.scala
    │       │                   └── package.scala
    │       │               └── manager
    │       │                   ├── SchemaPayloadPair.java
    │       │                   ├── exception
    │       │                       ├── ConnectorNotFoundException.scala
    │       │                       └── DarwinException.scala
    │       │                   └── util
    │       │                       ├── AvroSingleObjectEncodingUtils.scala
    │       │                       ├── ByteArrayUtils.scala
    │       │                       ├── ConfigUtil.scala
    │       │                       ├── ConfigurationKeys.scala
    │       │                       └── ConfluentSingleObjectEncoding.scala
    │   └── test
    │       ├── resources
    │           └── test
    │           │   ├── MockClassAlone.avsc
    │           │   └── MockClassParent.avsc
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       ├── common
    │                           ├── CompatSpec.scala
    │                           ├── DarwinConcurrentHashMapRunner.scala
    │                           ├── DarwinConcurrentHashMapSpec.scala
    │                           ├── DarwinJava8ConcurrentHashMapSpec.scala
    │                           └── DarwinTrieConcurrentHashMapSpec.scala
    │                       └── manager
    │                           └── util
    │                               ├── AvroSingleObjectEncodingUtilsSpec.scala
    │                               ├── BigEndianAvroSingleObjectEncodingUtilsSpec.scala
    │                               ├── ConfluentAvroSingleObjectEncodingSpec.scala
    │                               └── LittleEndianAvroSingleObjectEncodingUtilsSpec.scala
├── confluent
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── META-INF
    │       │   │   └── services
    │       │   │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               └── connector
    │       │                   └── confluent
    │       │                       ├── ConfluentConnector.scala
    │       │                       ├── ConfluentConnectorCreator.scala
    │       │                       ├── ConfluentConnectorOptions.scala
    │       │                       ├── HoconToMap.scala
    │       │                       └── Main.scala
    │   └── test
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── confluent
    │                               ├── ConfluentConnectorCreatorSuite.scala
    │                               └── ConfluentConnectorSuite.scala
├── core
    └── src
    │   └── main
    │       ├── java
    │           └── it
    │           │   └── agilelab
    │           │       └── darwin
    │           │           ├── annotations
    │           │               └── AvroSerde.java
    │           │           └── manager
    │           │               └── IdSchemaPair.java
    │       ├── resources
    │           └── reference.conf
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── manager
    │                           ├── AvroSchemaCache.scala
    │                           ├── AvroSchemaCacheFingerprint.scala
    │                           ├── AvroSchemaManager.scala
    │                           ├── AvroSchemaManagerFactory.scala
    │                           ├── CachedAvroSchemaManager.scala
    │                           ├── CachedEagerAvroSchemaManager.scala
    │                           ├── CachedLazyAvroSchemaManager.scala
    │                           └── LazyAvroSchemaManager.scala
├── docs
    ├── img
    │   ├── darwin_eager_cached_schema.jpg
    │   ├── darwin_interaction.jpg
    │   ├── darwin_lazy_cached_schema.jpg
    │   ├── darwin_lazy_schema.jpg
    │   └── logo
    │   │   ├── darwin-icon.ai
    │   │   ├── darwin-icon.png
    │   │   └── darwin-icon.svg
    └── src
    │   ├── darwin_eager_cached_schema.xml
    │   ├── darwin_interaction.xml
    │   ├── darwin_lazy_cached_schema.xml
    │   └── darwin_lazy_schema.xml
├── hbase
    └── src
    │   ├── main
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               └── connector
    │       │                   └── hbase
    │       │                       ├── ConfigurationKeys.scala
    │       │                       ├── HBaseConnector.scala
    │       │                       └── HBaseConnectorCreator.scala
    │   └── test
    │       ├── resources
    │           ├── application.conf
    │           └── log4j.properties
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── hbase
    │                               ├── HBase2Mock.scala
    │                               ├── HBaseConnectorSuite.scala
    │                               └── HBaseMock.scala
├── hbase1
    └── src
    │   └── main
    │       ├── resources
    │           └── META-INF
    │           │   └── services
    │           │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── hbase
    │                               └── HBaseUtils.scala
├── hbase2
    └── src
    │   └── main
    │       ├── resources
    │           └── META-INF
    │           │   └── services
    │           │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── hbase
    │                               └── HBaseUtils.scala
├── make.sh
├── mock-application
    └── src
    │   └── test
    │       ├── resources
    │           ├── MyNestedClass.avsc
    │           ├── OneField.avsc
    │           ├── application.conf
    │           └── test
    │           │   ├── MockClassAlone.avsc
    │           │   └── MockClassParent.avsc
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── app
    │                           └── mock
    │                               ├── CachedEagerApplicationSuite.scala
    │                               ├── CachedLazyApplicationSuite.scala
    │                               ├── LazyApplicationSuite.scala
    │                               ├── ManagerUtilsSuite.scala
    │                               ├── TwoConnectorsSpec.scala
    │                               └── classes
    │                                   ├── MyClass.scala
    │                                   ├── MyNestedAbstractClass.scala
    │                                   ├── MyNestedClass.scala
    │                                   ├── MyTrait.scala
    │                                   ├── NewClass.scala
    │                                   ├── NotToBeRegisteredClass.scala
    │                                   └── OneField.scala
├── mock-connector
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── META-INF
    │       │   │   └── services
    │       │   │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               └── connector
    │       │                   └── mock
    │       │                       ├── ConfigurationKeys.scala
    │       │                       ├── MockConnector.scala
    │       │                       ├── MockConnectorCreator.scala
    │       │                       └── testclasses
    │       │                           ├── MockClassAlone.scala
    │       │                           ├── MockClassChild.scala
    │       │                           └── MockClassParent.scala
    │   └── test
    │       ├── resources
    │           └── test
    │           │   ├── MockClassAlone.avsc
    │           │   └── MockClassParent.avsc
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── mock
    │                               └── MockConnectorSpec.scala
├── mongo
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── META-INF
    │       │   │   └── services
    │       │   │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               └── connector
    │       │                   └── mongo
    │       │                       ├── ConfigurationKeys.scala
    │       │                       ├── ConfigurationMongoModels.scala
    │       │                       ├── MongoConnector.scala
    │       │                       └── MongoConnectorCreator.scala
    │   └── test
    │       ├── resources
    │           ├── mongo.conf
    │           └── mongomock.avsc
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── mongo
    │                               └── MongoConnectorTest.scala
├── multi-connector
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── META-INF
    │       │   │   └── services
    │       │   │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               └── connector
    │       │                   └── multi
    │       │                       ├── MultiConnector.scala
    │       │                       └── MultiConnectorCreator.scala
    │   └── test
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── multi
    │                               └── MultiConnectorSpec.scala
├── postgres
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── META-INF
    │       │   │   └── services
    │       │   │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │       │   └── it
    │       │       └── agilelab
    │       │           └── darwin
    │       │               └── connector
    │       │                   └── postgres
    │       │                       ├── ConfigurationKeys.scala
    │       │                       ├── PostgresConnection.scala
    │       │                       ├── PostgresConnector.scala
    │       │                       └── PostgresConnectorCreator.scala
    │   └── test
    │       ├── resources
    │           ├── postgres.properties
    │           └── postgresmock.avsc
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── postgres
    │                               ├── Postgres2Mock.scala
    │                               ├── PostgresConnectorSuite.scala
    │                               └── PostgresMock.scala
├── project
    ├── Dependencies.scala
    ├── Settings.scala
    ├── Versions.scala
    ├── build.properties
    └── plugin.sbt
├── publish.sh
├── rest-server
    └── src
    │   └── main
    │       ├── postman
    │           └── darwinrest.postman_collection.json
    │       ├── resources
    │           └── reference.conf
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── server
    │                           └── rest
    │                               ├── DarwinService.scala
    │                               ├── HttpApp.scala
    │                               ├── JsonSupport.scala
    │                               ├── Main.scala
    │                               └── Service.scala
├── rest
    └── src
    │   ├── main
    │       ├── java
    │       │   └── scalaj
    │       │   │   └── http
    │       │   │       └── Base64.java
    │       ├── resources
    │       │   └── META-INF
    │       │   │   └── services
    │       │   │       └── it.agilelab.darwin.common.ConnectorCreator
    │       └── scala
    │       │   ├── it
    │       │       └── agilelab
    │       │       │   └── darwin
    │       │       │       └── connector
    │       │       │           └── rest
    │       │       │               ├── JsonProtocol.scala
    │       │       │               ├── RestConnector.scala
    │       │       │               ├── RestConnectorCreator.scala
    │       │       │               └── RestConnectorOptions.scala
    │       │   └── scalaj
    │       │       └── http
    │       │           ├── DigestAuth.scala
    │       │           ├── Http.scala
    │       │           └── OAuth.scala
    │   └── test
    │       └── scala
    │           └── it
    │               └── agilelab
    │                   └── darwin
    │                       └── connector
    │                           └── rest
    │                               └── RestConnectorSuite.scala
├── scalastyle-config.xml
└── spark-application
    └── src
        ├── dist
            └── conf
            │   └── application.conf
        └── main
            └── scala
                └── it
                    └── agilelab
                        └── darwin
                            └── app
                                └── spark
                                    ├── GenericMainClass.scala
                                    ├── SchemaManagerSparkApp.scala
                                    ├── SparkConfigurationKeys.scala
                                    ├── SparkManager.scala
                                    └── classes
                                        ├── Food.scala
                                        ├── Ignored.scala
                                        ├── Menu.scala
                                        ├── MenuItem.scala
                                        ├── Order.scala
                                        └── Price.scala


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI 
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |     tags:
 7 |       - '*'
 8 |   pull_request: {}
 9 | jobs:
10 |   ci:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |         with:
15 |           fetch-depth: 0
16 |       - uses: coursier/cache-action@v6
17 |       - uses: coursier/setup-action@v1
18 |         with:
19 |           jvm: zulu:8.0.402
20 |       - run: ./make.sh && ./publish.sh
21 |         env:
22 |           SBT_NATIVE_CLIENT: false
23 |           PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }}
24 |           PGP_SECRET: ${{ secrets.PGP_SECRET }}
25 |           SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
26 |           SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/osx,sbt,java,linux,maven,scala,windows,intellij
  3 | 
  4 | ### Intellij ###
  5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
  6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  7 | 
  8 | # User-specific stuff
  9 | .idea/**/workspace.xml
 10 | .idea/**/tasks.xml
 11 | .idea/**/usage.statistics.xml
 12 | .idea/**/dictionaries
 13 | .idea/codeStyles/*
 14 | .idea/markdown-navigator/*
 15 | .idea/**/shelf
 16 | 
 17 | # Generated files
 18 | .idea/**/contentModel.xml
 19 | 
 20 | # Sensitive or high-churn files
 21 | .idea/**/dataSources/
 22 | .idea/**/dataSources.ids
 23 | .idea/**/dataSources.local.xml
 24 | .idea/**/sqlDataSources.xml
 25 | .idea/**/dynamic.xml
 26 | .idea/**/uiDesigner.xml
 27 | .idea/**/dbnavigator.xml
 28 | 
 29 | # Gradle
 30 | .idea/**/gradle.xml
 31 | .idea/**/libraries
 32 | 
 33 | # Gradle and Maven with auto-import
 34 | # When using Gradle or Maven with auto-import, you should exclude module files,
 35 | # since they will be recreated, and may cause churn.  Uncomment if using
 36 | # auto-import.
 37 | .idea/modules.xml
 38 | .idea/*.iml
 39 | .idea/modules
 40 | .idea/*.xml
 41 | # CMake
 42 | cmake-build-*/
 43 | 
 44 | # Mongo Explorer plugin
 45 | .idea/**/mongoSettings.xml
 46 | 
 47 | # File-based project format
 48 | *.iws
 49 | 
 50 | # IntelliJ
 51 | out/
 52 | 
 53 | # mpeltonen/sbt-idea plugin
 54 | .idea_modules/
 55 | 
 56 | # JIRA plugin
 57 | atlassian-ide-plugin.xml
 58 | 
 59 | # Cursive Clojure plugin
 60 | .idea/replstate.xml
 61 | 
 62 | # Crashlytics plugin (for Android Studio and IntelliJ)
 63 | com_crashlytics_export_strings.xml
 64 | crashlytics.properties
 65 | crashlytics-build.properties
 66 | fabric.properties
 67 | 
 68 | # Editor-based Rest Client
 69 | .idea/httpRequests
 70 | 
 71 | # Android studio 3.1+ serialized cache file
 72 | .idea/caches/build_file_checksums.ser
 73 | 
 74 | ### Intellij Patch ###
 75 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
 76 | 
 77 | # *.iml
 78 | # modules.xml
 79 | # .idea/misc.xml
 80 | # *.ipr
 81 | 
 82 | # Sonarlint plugin
 83 | .idea/sonarlint
 84 | 
 85 | ### Java ###
 86 | # Compiled class file
 87 | *.class
 88 | 
 89 | # Log file
 90 | *.log
 91 | 
 92 | # BlueJ files
 93 | *.ctxt
 94 | 
 95 | # Mobile Tools for Java (J2ME)
 96 | .mtj.tmp/
 97 | 
 98 | # Package Files #
 99 | *.jar
100 | *.war
101 | *.nar
102 | *.ear
103 | *.zip
104 | *.tar.gz
105 | *.rar
106 | 
107 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
108 | hs_err_pid*
109 | 
110 | ### Linux ###
111 | *~
112 | 
113 | # temporary files which can be created if a process still has a handle open of a deleted file
114 | .fuse_hidden*
115 | 
116 | # KDE directory preferences
117 | .directory
118 | 
119 | # Linux trash folder which might appear on any partition or disk
120 | .Trash-*
121 | 
122 | # .nfs files are created when an open file is removed but is still being accessed
123 | .nfs*
124 | 
125 | ### Maven ###
126 | target/
127 | pom.xml.tag
128 | pom.xml.releaseBackup
129 | pom.xml.versionsBackup
130 | pom.xml.next
131 | release.properties
132 | dependency-reduced-pom.xml
133 | buildNumber.properties
134 | .mvn/timing.properties
135 | .mvn/wrapper/maven-wrapper.jar
136 | 
137 | ### OSX ###
138 | # General
139 | .DS_Store
140 | .AppleDouble
141 | .LSOverride
142 | 
143 | # Icon must end with two \r
144 | Icon
145 | 
146 | # Thumbnails
147 | ._*
148 | 
149 | # Files that might appear in the root of a volume
150 | .DocumentRevisions-V100
151 | .fseventsd
152 | .Spotlight-V100
153 | .TemporaryItems
154 | .Trashes
155 | .VolumeIcon.icns
156 | .com.apple.timemachine.donotpresent
157 | 
158 | # Directories potentially created on remote AFP share
159 | .AppleDB
160 | .AppleDesktop
161 | Network Trash Folder
162 | Temporary Items
163 | .apdisk
164 | 
165 | ### SBT ###
166 | # Simple Build Tool
167 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
168 | 
169 | dist/*
170 | lib_managed/
171 | src_managed/
172 | project/boot/
173 | project/plugins/project/
174 | .history
175 | .cache
176 | .lib/
177 | 
178 | ### Scala ###
179 | 
180 | ### Windows ###
181 | # Windows thumbnail cache files
182 | Thumbs.db
183 | ehthumbs.db
184 | ehthumbs_vista.db
185 | 
186 | # Dump file
187 | *.stackdump
188 | 
189 | # Folder config file
190 | [Dd]esktop.ini
191 | 
192 | # Recycle Bin used on file shares
193 | $RECYCLE.BIN/
194 | 
195 | # Windows Installer files
196 | *.cab
197 | *.msi
198 | *.msix
199 | *.msm
200 | *.msp
201 | 
202 | # Windows shortcuts
203 | *.lnk
204 | 
205 | metals.sbt
206 | .bloop/*
207 | .metals/*
208 | project/.bloop/*
209 | # End of https://www.gitignore.io/api/osx,sbt,java,linux,maven,scala,windows,intellij
210 | .bsp/
211 | .vscode/settings.json
212 | project/project/.bloop/bloop.settings.json
213 | .gitignore
214 | project/project/.bloop/darwin-build-build.json
215 | 


--------------------------------------------------------------------------------
/.jvmopts:
--------------------------------------------------------------------------------
1 | -Dfile.encoding=UTF-8
2 | -Xms1024m
3 | -Xmx1024m
4 | -Xss4M
5 | -XX:ReservedCodeCacheSize=128m


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
 1 | version = "2.7.2"
 2 | project.git = true
 3 | encoding = "UTF-8"
 4 | maxColumn = 120
 5 | align.preset = most
 6 | continuationIndent.defnSite = 2
 7 | assumeStandardLibraryStripMargin = true
 8 | docstrings = ScalaDoc
 9 | lineEndings = unix
10 | includeCurlyBraceInSelectChains = false
11 | danglingParentheses.preset = true
12 | spaces {
13 |   inImportCurlyBraces = true
14 | }
15 | optIn.annotationNewlines = true
16 | 
17 | rewrite.rules = [SortImports, SortModifiers]


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # HOW TO CONTRIBUTE
 2 | 
 3 | We are always very happy to have contributions, whether for trivial cleanups or big new features.
 4 | 
 5 | If you don't know Java or Scala you can still contribute to the project. 
 6 | Code is not the only way to contribute to the project. We strongly value documentation and gladly accept improvements to the documentation.
 7 | 
 8 | # REPORTING AN ISSUE
 9 | 
10 | Reporting potential issues as Github issues is more than welcome as a significant contribution to the project. But please be aware that Github issues should not be used for FAQs: 
11 | if you have a question or are simply not sure if it is really an issue or not, please contact us ([through gitter](https://gitter.im/agile-lab-darwin/community)) first before you create a new issue.
12 |  
13 | # CONTRIBUTING A CODE CHANGE
14 | 
15 | To submit a change for inclusion, please do the following:
16 | - If the change is non-trivial please include some unit tests that cover the new functionality.
17 | - If you are introducing a completely new feature or API it is a good idea to start a markdown description in the Github issue itself and get consensus on the basic design first.
18 | - Make sure you have observed the recommendations in the style guide. (scalastyle:check should pass with no errors or warnings)
19 | - Follow the detailed instructions in Contributing Code Changes.
20 | 
21 | ## Contributing code changes
22 | 
23 | ### Overview
24 | 
25 | Generally, Darwin uses:
26 | - Github issues to track logical issues, including bugs and improvements
27 | - Github pull requests to manage the review and merge of specific code changes
28 | 
29 | ### Github issues
30 | 
31 | - Find the existing Github issue that the change pertains to.
32 | - Do not create a new issue if creating a change to address an existing issue in Github; add to the existing discussion and work instead.
33 | - To avoid conflicts, assign the Github issue to yourself if you plan to work on it.
34 | - Look for existing pull requests that are linked to the issue, to understand if someone is already working on it.
35 | - If required, create a new issue (below shows some critical fields to fill-in):
36 |   - Provide a descriptive Title. "Update web UI" or "Problem in scheduler" is not sufficient. "Support NiFi SchemaRegistry interface and add meta-connector" is good.
37 |   - Write a detailed Description. For bug reports, this should ideally include a short reproduction of the problem. For new features, it may include a design document. 
38 |   - To avoid conflicts, assign the issue to yourself if you plan to work on it. Leave it unassigned otherwise.
39 |   - Do not include a patch file; pull requests are used to propose the actual change.
40 |   - If the change is a large change, consider inviting discussion on the issue on gitter first before proceeding to implement the change. 
41 | 
42 | 
43 | ### Pull Request
44 | 
45 | - Fork the Github repository at if you haven't already 
46 | - Clone your fork, create a new branch, push commits to the branch.
47 | - Consider whether documentation or tests need to be added or updated as part of the change, and add them as needed (doc changes should be submitted along with code change in the same PR).
48 | - Run all tests using `make.sh` script.
49 | - Open a pull request against the develop branch.
50 | - The PR title should usually be of the form [#issue-number]: Title, where [#issue number] is the relevant Github issue number and Title may be the issue title or a more specific title describing the PR itself.
51 | - If the pull request is still a work in progress, and so is not ready to be merged, but needs to be pushed to Github to facilitate review, use the draft mode of Github PR..
52 | - Consider identifying committers or other contributors who have worked on the code being changed. The easiest is to simply follow GitHub's automatic suggestions. You can add @username in the PR description to ping them immediately.
53 | - Once ready, the PR `checks` box will be updated.
54 | - Investigate and fix failures caused by the pull the request
55 | - Fixes can simply be pushed to the same branch from which you opened your pull request.
56 | - Please address feedback via additional commits instead of amending existing commits. This makes it easier for the reviewers to know what has changed since the last review. All commits will be squashed into a single one by the committer via GitHub's squash button or by a script as part of the merge process.
57 | - CI will automatically re-test when new commits are pushed.
58 | - Despite our efforts, Darwin may have flaky tests at any given point, which may cause a build to fail. You need to ping committers to trigger a new build. If the failure is unrelated to your pull request and you have been able to run the tests locally successfully, please mention it in the pull request.
59 | 
60 | ### The Review Process
61 | 
62 | - Other reviewers, including committers, may comment on the changes and suggest modifications. Changes can be added by simply pushing more commits to the same branch.
63 | - Please add a comment and "@" the reviewer in the PR if you have addressed reviewers' comments. Even though GitHub sends notifications when new commits are pushed, it is helpful to know that the PR is ready for review once again.
64 | - Lively, polite, rapid technical debate is encouraged from everyone in the community. The outcome may be a rejection of the entire change.
65 | - Reviewers can indicate that a change looks suitable for merging by approving it via GitHub's review interface. This indicates the strongest level of technical sign-off on a patch and it means: "I've looked at this thoroughly and take as much ownership as if I wrote the patch myself". If you approve a pull request, you will be expected to help with bugs or follow-up issues on the patch. Consistent, judicious use of pull request approvals is a great way to gain credibility as a reviewer with the broader community. Darwin reviewers will typically include the acronym LGTM in their approval comment. This was the convention used to approve pull requests before the "approve" feature was introduced by GitHub.
66 | - Sometimes, other changes will be merged which conflict with your pull request's changes. The PR can't be merged until the conflict is resolved. This can be resolved with "git fetch origin" followed by "git merge origin/develop" and resolving the conflicts by hand, then pushing the result to your branch.
67 | - Try to be responsive to the discussion rather than let days pass between replies.
68 | 
69 | ### Closing Your Pull Request / issue
70 | 
71 | - If a change is accepted, it will be merged and the pull request will automatically be closed, along with the associated issue if any
72 | - If your pull request is ultimately rejected, please close it.
73 | - If a pull request has gotten little or no attention, consider improving the description or the change itself and ping likely reviewers again after a few days. Consider proposing a change that's easier to include, like a smaller and/or less invasive change.
74 | - If a pull request is closed because it is deemed not the right approach to resolve an issue, then leave the issue open. However if the review makes it clear that the issue identified in the issue is not going to be resolved by any pull request (not a problem, won't fix) then also resolve the issue. 
75 | 
76 | *This document is heavily inspired by Kafka/Apache contribution guidelines.*
77 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
  1 | import sbt.Keys.baseDirectory
  2 | 
  3 | /*
  4 |  * Main build definition.
  5 |  *
  6 |  * See project/Settings.scala for the settings definitions.
  7 |  * See project/Dependencies.scala for the dependencies definitions.
  8 |  * See project/Versions.scala for the versions definitions.
  9 |  */
 10 | ThisBuild / dynverVTagPrefix := false
 11 | 
 12 | lazy val root = Project("darwin", file("."))
 13 |   .settings(Settings.commonSettings: _*)
 14 |   .settings(libraryDependencies ++= Dependencies.core_deps)
 15 |   .settings(Settings.notPublishSettings)
 16 |   .aggregate(
 17 |     core,
 18 |     coreCommon,
 19 |     hbaseConnector,
 20 |     postgresConnector,
 21 |     mockConnector,
 22 |     mockApplication,
 23 |     restConnector,
 24 |     mongoConnector,
 25 |     confluentConnector,
 26 |     multiConnector
 27 |   )
 28 | 
 29 | lazy val core = Project("darwin-core", file("core"))
 30 |   .settings(Settings.commonSettings: _*)
 31 |   .dependsOn(coreCommon)
 32 |   .settings(libraryDependencies ++= Dependencies.core_deps)
 33 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
 34 | 
 35 | lazy val coreCommon = Project("darwin-core-common", file("common"))
 36 |   .settings(Settings.commonSettings: _*)
 37 |   .settings(libraryDependencies ++= Dependencies.core_deps)
 38 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
 39 | 
 40 | lazy val hbaseConnector = Project("darwin-hbase-connector", file("hbase1"))
 41 |   .settings(Settings.commonSettings: _*)
 42 |   .dependsOn(coreCommon)
 43 |   .settings(libraryDependencies ++= Dependencies.hbase_conn_dep)
 44 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
 45 |   .settings(Compile / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "main" / "scala")
 46 |   .settings(Test / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "scala")
 47 |   .settings(Test / unmanagedResourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "resources")
 48 |   .settings(Settings.hbaseTestSettings)
 49 | 
 50 | lazy val hbaseConnector2 = Project("darwin-hbase2-connector", file("hbase2"))
 51 |   .settings(Settings.commonSettings: _*)
 52 |   .dependsOn(coreCommon)
 53 |   .settings(libraryDependencies ++= Dependencies.hbase2_conn_dep)
 54 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
 55 |   .settings(Compile / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "main" / "scala")
 56 |   .settings(Test / unmanagedSourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "scala")
 57 |   .settings(Test / unmanagedResourceDirectories += baseDirectory.value / ".." / "hbase" / "src" / "test" / "resources")
 58 |   .settings(Settings.hbase2TestSettings)
 59 | 
 60 | lazy val postgresConnector = Project("darwin-postgres-connector", file("postgres"))
 61 |   .settings(Settings.commonSettings: _*)
 62 |   .dependsOn(coreCommon)
 63 |   .settings(libraryDependencies ++= Dependencies.postgres_conn_dep)
 64 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
 65 | 
 66 | lazy val restConnector = Project("darwin-rest-connector", file("rest"))
 67 |   .settings(Settings.commonSettings: _*)
 68 |   .dependsOn(coreCommon)
 69 |   .settings(
 70 |     libraryDependencies ++= Dependencies.core_deps ++ Dependencies.wireMock :+ Dependencies.scalatest
 71 |   )
 72 |   .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213))
 73 | 
 74 | lazy val confluentConnector = Project("darwin-confluent-connector", file("confluent"))
 75 |   .settings(Settings.commonSettings: _*)
 76 |   .dependsOn(coreCommon)
 77 |   .settings(
 78 |     libraryDependencies ++= Dependencies.core_deps ++
 79 |       Dependencies.wireMock ++
 80 |       Dependencies.confluentSchemaRegistryDependencies :+ Dependencies.scalatest
 81 |   )
 82 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
 83 | 
 84 | lazy val restServer = Project("darwin-rest-server", file("rest-server"))
 85 |   .settings(Settings.commonSettings: _*)
 86 |   .dependsOn(coreCommon, mockConnector)
 87 |   .settings(libraryDependencies ++= Dependencies.restServer)
 88 |   .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213))
 89 |   .dependsOn(core, hbaseConnector, postgresConnector, mockConnector)
 90 | 
 91 | lazy val mongoConnector = Project("darwin-mongo-connector", file("mongo"))
 92 |   .settings(Settings.commonSettings: _*)
 93 |   .dependsOn(coreCommon)
 94 |   .settings(libraryDependencies ++= Dependencies.mongo_conn)
 95 |   .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211, Versions.scala_213))
 96 | 
 97 | lazy val mockConnector = Project("darwin-mock-connector", file("mock-connector"))
 98 |   .settings(Settings.commonSettings: _*)
 99 |   .dependsOn(coreCommon)
100 |   .settings(libraryDependencies ++= Dependencies.mock_conn)
101 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
102 | 
103 | lazy val mockApplication = Project("darwin-mock-application", file("mock-application"))
104 |   .settings(Settings.commonSettings: _*)
105 |   .dependsOn(core, mockConnector, postgresConnector, hbaseConnector)
106 |   .settings(libraryDependencies ++= Dependencies.mock_app_dep)
107 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
108 |   .settings(Settings.notPublishSettings)
109 | 
110 | lazy val sparkApplication = Project("darwin-spark-application", file("spark-application"))
111 |   .settings(Settings.commonSettings: _*)
112 |   .dependsOn(core, hbaseConnector, postgresConnector)
113 |   .settings(libraryDependencies ++= Dependencies.spark_app)
114 |   .settings(crossScalaVersions := Seq(Versions.scala, Versions.scala_211))
115 |   .settings(Settings.notPublishSettings)
116 | 
117 | lazy val multiConnector = Project("darwin-multi-connector", file("multi-connector"))
118 |   .settings(Settings.commonSettings: _*)
119 |   .dependsOn(coreCommon)
120 |   .dependsOn(core)
121 |   .dependsOn(mockConnector % Test)
122 |   .dependsOn(confluentConnector % Test)
123 |   .settings(crossScalaVersions := Versions.crossScalaVersions)
124 |   .settings(libraryDependencies += Dependencies.scalatest)
125 | 


--------------------------------------------------------------------------------
/bump-version.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$#" -ne 2 ]; then
 4 |     echo "Illegal number of parameters, you need to pass two parameters"
 5 |     exit 1
 6 | fi
 7 | 
 8 | unameOut="$(uname -s)"
 9 | case "${unameOut}" in
10 |     Linux*)     machine=Linux;;
11 |     Darwin*)    machine=Mac;;
12 |     *)          machine=UNKNOWN
13 | esac
14 | 
15 | if [ "$machine" = 'UNKNOWN' ]; then
16 |   echo "Unknown os... aborting"
17 |   exit 2
18 | fi
19 | 
20 | echo "Running on $machine.."
21 | 
22 | OLD_VERSION=$1
23 | NEW_VERSION=$2
24 | FILES_TO_CHANGE=$(git grep -l "$OLD_VERSION" | grep -v ".*\.ai\|.*\.svg\|.*\.xml") # there is an ai file that always matches...
25 | 
26 | if [ -z "$FILES_TO_CHANGE" ]; then
27 |   echo "No files to change..."
28 |   exit 0
29 | fi
30 | 
31 | echo "Bumping from version $OLD_VERSION to version $NEW_VERSION"
32 | echo "Editing the following files:"
33 | echo ""
34 | echo "$FILES_TO_CHANGE"
35 | echo "----------------------------"
36 | 
37 | while IFS= read -r line; do
38 |     case "${machine}" in
39 |       Linux*)     sed -i "s/${OLD_VERSION}/${NEW_VERSION}/g" $line;;
40 |       Mac*)       sed -i '' -e "s/${OLD_VERSION}/${NEW_VERSION}/g" $line;;
41 |     esac
42 |     git add $line
43 | done <<< "$FILES_TO_CHANGE"
44 | 
45 | 
46 | echo "Press enter to commit:"
47 | read
48 | 
49 | git commit -e -m "Bump version to $NEW_VERSION"
50 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/ConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import com.typesafe.config.Config
 4 | 
 5 | /**
 6 |   * A generic interface used to create the [[Connector]] found in the classpath.
 7 |   */
 8 | trait ConnectorCreator {
 9 | 
10 |   /**
11 |     * @return the name of the Connector
12 |     */
13 |   def name(): String
14 | 
15 |   /**
16 |     * This method should be overridden in each connector module returning its implementation.
17 |     *
18 |     * @param config configuration that will be used to create the correct implementation of [[Connector]]
19 |     * @return the specific instance of [[Connector]]
20 |     */
21 |   def create(config: Config): Connector
22 | }
23 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/ConnectorFactory.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import java.util.ServiceLoader
 4 | 
 5 | import com.typesafe.config.Config
 6 | import it.agilelab.darwin.manager.exception.ConnectorNotFoundException
 7 | import it.agilelab.darwin.manager.util.ConfigurationKeys
 8 | 
 9 | import it.agilelab.darwin.common.compat._
10 | 
11 | /**
12 |   * Used to obtain the correct implementation of [[Connector]] found on the classpath using the [[ConnectorCreator]]
13 |   */
14 | object ConnectorFactory extends Logging {
15 | 
16 |   /**
17 |     * Retrieves all the registered [[ConnectorCreator]] in the classpath.
18 |     *
19 |     * @return a sequence of all the loaded [[ConnectorCreator]]
20 |     */
21 |   def creators(): Seq[ConnectorCreator] = {
22 |     val creators = ServiceLoader.load(classOf[ConnectorCreator]).toScala().toSeq
23 |     log.debug(s"${creators.size} available connector creators found")
24 |     creators
25 |   }
26 | 
27 |   /**
28 |     * @return the first ConnectorCreator, use ONLY if you are sure that just one is available in the classpath
29 |     */
30 |   def creator(): Option[ConnectorCreator] = creators().headOption
31 | 
32 |   /**
33 |     * @return the ConnectorCreator identified by the name given as input
34 |     */
35 |   def creator(name: String): Option[ConnectorCreator] = {
36 |     creators().find(_.name() == name)
37 |   }
38 | 
39 |   /**
40 |     * @return the ConnectorCreator identified by the name given as input
41 |     */
42 |   def creator(conf: Config): Option[ConnectorCreator] = {
43 |     if (conf.hasPath(ConfigurationKeys.CONNECTOR)) {
44 |       creator(conf.getString(ConfigurationKeys.CONNECTOR))
45 |     } else {
46 |       creator()
47 |     }
48 |   }
49 | 
50 |   def connector(config: Config): Connector = {
51 |     val cnt = creator(config)
52 |       .map(_.create(config))
53 |       .getOrElse(throw new ConnectorNotFoundException(config))
54 |     if (config.hasPath(ConfigurationKeys.CREATE_TABLE) && config.getBoolean(ConfigurationKeys.CREATE_TABLE)) {
55 |       cnt.createTable()
56 |     } else if (!cnt.tableExists()) {
57 |       log.warn(s"Darwin table does not exists and has not been created (${ConfigurationKeys.CREATE_TABLE} was false)")
58 |       log.warn(cnt.tableCreationHint())
59 |     }
60 |     cnt
61 |   }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/DarwinConcurrentHashMap.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import java.util.function.{ Function => JFunction }
 4 | 
 5 | import scala.collection.concurrent.TrieMap
 6 | 
 7 | /**
 8 |   * A thread safe lock-free concurrent map that exposes only getOrElseUpdate and getOrElse methods
 9 |   * It is backed by either a scala.collection.concurrent.TrieMap or java.util.concurrent.ConcurrentHashMap
10 |   * depending on the JVM that executes Darwin.
11 |   * JVM 8 or later use java's ConcurrentHashMap while earlier versions use scala's TrieMap
12 |   *
13 |   * Obtain the "correct" instance using {{{DarwinConcurrentHashMap.empty}}} factory method.
14 |   */
15 | trait DarwinConcurrentHashMap[K, V] {
16 |   def getOrElseUpdate(k: K, newValue: => V): V
17 | 
18 |   def getOrElse(k: K, default: => V): V
19 | }
20 | 
21 | object DarwinConcurrentHashMap {
22 | 
23 |   private[common] class DarwinJava8ConcurrentHashMap[K, V] extends DarwinConcurrentHashMap[K, V] {
24 |     private val innerMap = new java.util.concurrent.ConcurrentHashMap[K, V]()
25 | 
26 |     override def getOrElseUpdate(k: K, newValue: => V): V = {
27 |       innerMap.computeIfAbsent(
28 |         k,
29 |         new JFunction[K, V]() {
30 |           override def apply(t: K): V = newValue
31 |         }
32 |       )
33 |     }
34 | 
35 |     override def getOrElse(k: K, default: => V): V =
36 |       Option(innerMap.get(k)).getOrElse(default)
37 |   }
38 | 
39 |   private[common] class DarwinTrieConcurrentHashMap[K, V] extends DarwinConcurrentHashMap[K, V] {
40 |     private val innerMap = TrieMap.empty[K, V]
41 | 
42 |     override def getOrElseUpdate(k: K, newValue: => V): V = innerMap.getOrElseUpdate(k, newValue)
43 | 
44 |     override def getOrElse(k: K, default: => V): V = innerMap.getOrElse(k, default)
45 |   }
46 | 
47 |   private val isJavaAtLeast8 = JavaVersion.current() >= 8
48 | 
49 |   def empty[K, V]: DarwinConcurrentHashMap[K, V] = {
50 |     if (isJavaAtLeast8) {
51 |       new DarwinJava8ConcurrentHashMap()
52 |     } else {
53 |       new DarwinTrieConcurrentHashMap()
54 |     }
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/JavaVersion.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | object JavaVersion {
 4 | 
 5 |   /**
 6 |     * @return the JVM version in use, It returns an Integer indicating the major version i
 7 |     */
 8 |   def current(): Int = {
 9 |     val propertyValue = System.getProperty("java.version")
10 |     parseJavaVersion(propertyValue)
11 |   }
12 | 
13 |   /**
14 |     * @return the JVM version represented by the input string, It returns an Integer indicating the major version i
15 |     */
16 |   def parseJavaVersion(propertyValue: String): Int = {
17 |     val splits = propertyValue.split("\\.")
18 |     if (propertyValue.startsWith("1.")) {
19 |       splits(1).takeWhile(isDigit).toInt
20 |     } else {
21 |       splits(0).takeWhile(isDigit).toInt
22 |     }
23 |   }
24 |   private val digits = ('0' to '9').toSet
25 |   private def isDigit(c: Char): Boolean = {
26 |     digits.contains(c)
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/Logging.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import org.slf4j.{ Logger, LoggerFactory }
 4 | 
 5 | trait Logging {
 6 |   private lazy val _log = LoggerFactory.getLogger(getClass.getName)
 7 | 
 8 |   def log: Logger = _log
 9 | }
10 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/SchemaReader.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import java.io.{ File, IOException, InputStream }
 4 | 
 5 | import org.apache.avro.{ Schema, SchemaParseException }
 6 | 
 7 | object SchemaReader {
 8 | 
 9 |   def readFromResources(p: String): Schema = {
10 |     using(getClass.getClassLoader.getResourceAsStream(p)) { stream =>
11 |       read(stream)
12 |     }
13 |   }
14 | 
15 |   def read(f: File): Schema = {
16 |     val parser = new Schema.Parser()
17 |     parser.parse(f)
18 |   }
19 | 
20 |   def read(s: String): Schema = {
21 |     val parser = new Schema.Parser()
22 |     parser.parse(s)
23 |   }
24 | 
25 |   /**
26 |     * Does not close the InputStream
27 |     */
28 |   def read(is: InputStream): Schema = {
29 |     val parser = new Schema.Parser()
30 |     parser.parse(is)
31 |   }
32 | 
33 |   def safeReadFromResources(p: String): Either[SchemaReaderError, Schema] = {
34 |     Option(getClass.getClassLoader.getResourceAsStream(p)).fold[Either[SchemaReaderError, Schema]](
35 |       Left(ResourceNotFoundError(s"Cannot find resource: $p"))
36 |     ) { stream =>
37 |       try {
38 |         safeRead(stream)
39 |       } catch {
40 |         case e: SchemaParseException => Left(SchemaParserError(e))
41 |         case e: IOException          => Left(IOError(e))
42 |         case e: Throwable            => Left(UnknownError(e))
43 |       } finally {
44 |         stream.close()
45 |       }
46 |     }
47 |   }
48 | 
49 |   def safeRead(f: File): Either[SchemaReaderError, Schema] = {
50 |     try {
51 |       Right(new Schema.Parser().parse(f))
52 |     } catch {
53 |       case e: SchemaParseException => Left(SchemaParserError(e))
54 |       case e: IOException          => Left(IOError(e))
55 |       case e: Throwable            => Left(UnknownError(e))
56 |     }
57 |   }
58 | 
59 |   def safeRead(s: String): Either[SchemaReaderError, Schema] = {
60 |     try {
61 |       Right(new Schema.Parser().parse(s))
62 |     } catch {
63 |       case e: SchemaParseException => Left(SchemaParserError(e))
64 |       case e: IOException          => Left(IOError(e))
65 |       case e: Throwable            => Left(UnknownError(e))
66 |     }
67 |   }
68 | 
69 |   /**
70 |     * Does not close the InputStream
71 |     */
72 |   def safeRead(is: InputStream): Either[SchemaReaderError, Schema] = {
73 |     try {
74 |       Right(new Schema.Parser().parse(is))
75 |     } catch {
76 |       case e: SchemaParseException => Left(SchemaParserError(e))
77 |       case e: IOException          => Left(IOError(e))
78 |       case e: Throwable            => Left(UnknownError(e))
79 |     }
80 |   }
81 | 
82 |   sealed trait SchemaReaderError
83 | 
84 |   case class SchemaParserError(exception: SchemaParseException) extends SchemaReaderError
85 | 
86 |   case class IOError(exception: IOException) extends SchemaReaderError
87 | 
88 |   case class ResourceNotFoundError(msg: String) extends SchemaReaderError
89 | 
90 |   case class UnknownError(t: Throwable) extends SchemaReaderError
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/compat/package.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.common
  2 | 
  3 | import java.util
  4 | 
  5 | /**
  6 |   * Converters java <-> scala that works between 2.10, 2.11, 2.12, 2.13
  7 |   */
  8 | package object compat {
  9 |   def toScala[A](jIterable: java.lang.Iterable[A]): scala.collection.Iterable[A] = {
 10 |     new Iterable[A] {
 11 |       def iterator: scala.collection.Iterator[A] = toScala(jIterable.iterator())
 12 |     }
 13 |   }
 14 | 
 15 |   def toScala[A](jIterator: java.util.Iterator[A]): scala.collection.Iterator[A] = {
 16 |     new scala.collection.Iterator[A] {
 17 |       def next()  = jIterator.next()
 18 |       def hasNext = jIterator.hasNext()
 19 |     }
 20 |   }
 21 | 
 22 |   def toScala[A, B](jIterator: java.util.Map[A, B]): scala.collection.Map[A, B] = {
 23 |     toScala(jIterator.entrySet().iterator()).map(x => (x.getKey, x.getValue)).toMap
 24 |   }
 25 | 
 26 |   def toScala[A](jSet: java.util.Set[A]): scala.collection.Set[A] = {
 27 |     val iterator = jSet.iterator()
 28 |     val builder  = Set.newBuilder[A]
 29 |     while (iterator.hasNext) {
 30 |       builder += iterator.next()
 31 |     }
 32 |     builder.result()
 33 |   }
 34 | 
 35 |   def toJava[A](iterable: scala.collection.Iterable[A]): java.lang.Iterable[A] = new java.lang.Iterable[A] {
 36 |     override def iterator(): util.Iterator[A] = new util.Iterator[A] {
 37 |       private val it                = iterable.iterator
 38 |       override def hasNext: Boolean = it.hasNext
 39 |       override def next(): A        = it.next()
 40 |     }
 41 |   }
 42 | 
 43 |   def toJava[A](list: List[A]): java.util.List[A] = {
 44 |     val arraylist = new util.ArrayList[A]()
 45 |     list.foreach(arraylist.add)
 46 |     arraylist
 47 |   }
 48 | 
 49 |   implicit class IterableConverter[A](jIterable: java.lang.Iterable[A]) {
 50 |     def toScala(): scala.collection.Iterable[A] = {
 51 |       compat.toScala(jIterable)
 52 |     }
 53 |   }
 54 | 
 55 |   implicit class SetConverter[A](jSet: java.util.Set[A]) {
 56 |     def toScala(): scala.collection.Set[A] = {
 57 |       compat.toScala(jSet)
 58 |     }
 59 |   }
 60 | 
 61 |   implicit class JIterableConverter[A](iterable: scala.collection.Iterable[A]) {
 62 |     def toJava(): java.lang.Iterable[A] = {
 63 |       compat.toJava(iterable)
 64 |     }
 65 | 
 66 |     def toJavaList(): java.util.List[A] = {
 67 |       compat.toJava(iterable.toList)
 68 |     }
 69 |   }
 70 | 
 71 |   implicit class JMapConverter[A, B](map: scala.collection.Map[A, B]) {
 72 |     def toJava(): java.util.Map[A, B] = {
 73 |       val hashmap: util.Map[A, B] = new util.HashMap[A, B]()
 74 |       map.foreach { case (k, v) =>
 75 |         hashmap.put(k, v)
 76 |       }
 77 |       hashmap
 78 |     }
 79 | 
 80 |   }
 81 | 
 82 |   implicit class IteratorConverter[A](jIterator: java.util.Iterator[A]) {
 83 |     def toScala(): scala.collection.Iterator[A] = {
 84 |       compat.toScala(jIterator)
 85 |     }
 86 |   }
 87 | 
 88 |   implicit class MapConverter[A, B](jmap: java.util.Map[A, B]) {
 89 |     def toScala(): collection.Map[A, B] = {
 90 |       compat.toScala(jmap)
 91 |     }
 92 |   }
 93 | 
 94 |   implicit class RightBiasedEither[+L, +R](val self: Either[L, R]) extends AnyVal {
 95 |     def rightMap[R1](f: R => R1): Either[L, R1] = {
 96 |       self match {
 97 |         case Right(v) => Right(f(v))
 98 |         case _        => self.asInstanceOf[Either[L, R1]]
 99 |       }
100 |     }
101 | 
102 |     def rightFlatMap[L1 >: L, R1](f: R => Either[L1, R1]): Either[L1, R1] = {
103 |       self match {
104 |         case Right(v) => f(v)
105 |         case _        => self.asInstanceOf[Either[L1, R1]]
106 |       }
107 |     }
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/common/package.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin
 2 | 
 3 | package object common {
 4 | 
 5 |   def using[A <: AutoCloseable, B](closeable: A)(f: A => B): B = {
 6 |     try {
 7 |       f(closeable)
 8 |     } finally {
 9 |       closeable.close()
10 |     }
11 |   }
12 | 
13 |   final val LONG_SIZE = 8
14 |   final val INT_SIZE  = 4
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/manager/SchemaPayloadPair.java:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager;
 2 | 
 3 | import org.apache.avro.Schema;
 4 | 
 5 | public class SchemaPayloadPair {
 6 |     private final Schema schema;
 7 |     private final byte[] payload;
 8 | 
 9 |     private SchemaPayloadPair(Schema schema, byte[] payload) {
10 |         this.schema = schema;
11 |         this.payload = payload;
12 |     }
13 | 
14 |     public Schema getSchema() {
15 |         return schema;
16 |     }
17 | 
18 |     public byte[] getPayload() {
19 |         return payload;
20 |     }
21 | 
22 |     public static SchemaPayloadPair create(Schema schema, byte[] payload) {
23 |         return new SchemaPayloadPair(schema, payload);
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/manager/exception/ConnectorNotFoundException.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager.exception
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.manager.util.ConfigUtil
 5 | 
 6 | class ConnectorNotFoundException(val config: Config) extends RuntimeException(s"Cannot find Darwin connector") {
 7 | 
 8 |   def confAsString(): String = ConfigUtil.printConfig(config)
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/manager/exception/DarwinException.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.manager.exception
2 | 
3 | class DarwinException(message: String) extends RuntimeException(message)
4 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/manager/util/ByteArrayUtils.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.manager.util
  2 | 
  3 | import java.io.OutputStream
  4 | import java.nio.{ ByteBuffer, ByteOrder }
  5 | 
  6 | import it.agilelab.darwin.common.{ INT_SIZE, LONG_SIZE }
  7 | 
  8 | private[darwin] object ByteArrayUtils {
  9 | 
 10 |   implicit class EnrichedLong(val l: Long) extends AnyVal {
 11 | 
 12 |     /**
 13 |       * Converts Long to Array[Byte] honoring the input endianness
 14 |       */
 15 |     def longToByteArray(endianness: ByteOrder): Array[Byte] = {
 16 |       ByteBuffer
 17 |         .allocate(LONG_SIZE)
 18 |         .order(endianness)
 19 |         .putLong(l)
 20 |         .array()
 21 |     }
 22 | 
 23 |     def truncateIntToByteArray(endianess: ByteOrder): Array[Byte] = {
 24 |       ByteBuffer
 25 |         .allocate(INT_SIZE)
 26 |         .order(endianess)
 27 |         .putInt(l.toInt)
 28 |         .array()
 29 |     }
 30 | 
 31 |     /**
 32 |       * Writes to the stream the enriched long honoring the input endianness
 33 |       */
 34 |     def writeToStream(os: OutputStream, endianness: ByteOrder): Unit = {
 35 |       endianness match {
 36 |         case ByteOrder.BIG_ENDIAN    =>
 37 |           os.write((l >>> 56).asInstanceOf[Int])
 38 |           os.write((l >>> 48).asInstanceOf[Int])
 39 |           os.write((l >>> 40).asInstanceOf[Int])
 40 |           os.write((l >>> 32).asInstanceOf[Int])
 41 |           os.write((l >>> 24).asInstanceOf[Int])
 42 |           os.write((l >>> 16).asInstanceOf[Int])
 43 |           os.write((l >>> 8).asInstanceOf[Int])
 44 |           os.write((l >>> 0).asInstanceOf[Int])
 45 |         case ByteOrder.LITTLE_ENDIAN =>
 46 |           os.write((l >>> 0).asInstanceOf[Int])
 47 |           os.write((l >>> 8).asInstanceOf[Int])
 48 |           os.write((l >>> 16).asInstanceOf[Int])
 49 |           os.write((l >>> 24).asInstanceOf[Int])
 50 |           os.write((l >>> 32).asInstanceOf[Int])
 51 |           os.write((l >>> 40).asInstanceOf[Int])
 52 |           os.write((l >>> 48).asInstanceOf[Int])
 53 |           os.write((l >>> 56).asInstanceOf[Int])
 54 |         case other: Any =>
 55 |           throw new IllegalArgumentException("Unknown ByteOrder: " + other)
 56 |       }
 57 |     }
 58 |   }
 59 | 
 60 |   implicit class EnrichedInt(val l: Int) extends AnyVal {
 61 | 
 62 |     def intToByteArray(endianess: ByteOrder): Array[Byte] = {
 63 |       ByteBuffer
 64 |         .allocate(INT_SIZE)
 65 |         .order(endianess)
 66 |         .putInt(l.toInt)
 67 |         .array()
 68 |     }
 69 | 
 70 |     /**
 71 |       * Writes to the stream the enriched long honoring the input endianness
 72 |       */
 73 |     def writeIntToStream(os: OutputStream, endianness: ByteOrder): Unit = {
 74 |       endianness match {
 75 |         case ByteOrder.BIG_ENDIAN    =>
 76 |           os.write((l >>> 24))
 77 |           os.write((l >>> 16))
 78 |           os.write((l >>> 8))
 79 |           os.write((l >>> 0))
 80 |         case ByteOrder.LITTLE_ENDIAN =>
 81 |           os.write((l >>> 0))
 82 |           os.write((l >>> 8))
 83 |           os.write((l >>> 16))
 84 |           os.write((l >>> 24))
 85 |         case other: Any =>
 86 |           throw new IllegalArgumentException("Unknown ByteOrder: " + other)
 87 |       }
 88 |     }
 89 |   }
 90 | 
 91 |   def arrayEquals(b1: Array[Byte], b2: Array[Byte], start1: Int, start2: Int, length: Int): Boolean = {
 92 |     require(length > 0, "length must be positive")
 93 |     var i        = start1
 94 |     var j        = start2
 95 |     var areEqual = true
 96 |     while (areEqual && i < start1 + length) {
 97 |       if (b1(i) != b2(j)) {
 98 |         areEqual = false
 99 |       }
100 |       i += 1
101 |       j += 1
102 |     }
103 |     areEqual
104 |   }
105 | 
106 | }
107 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/manager/util/ConfigUtil.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager.util
 2 | 
 3 | import java.nio.ByteOrder
 4 | 
 5 | import com.typesafe.config.{ Config, ConfigRenderOptions }
 6 | 
 7 | object ConfigUtil {
 8 |   def printConfig(conf: Config): String = {
 9 |     conf.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false))
10 |   }
11 | 
12 |   def printSmallConfig(conf: Config): String = {
13 |     conf.root().render(ConfigRenderOptions.defaults().setComments(false).setOriginComments(false))
14 |   }
15 | 
16 |   def stringToEndianness(string: String): ByteOrder = {
17 |     string.toUpperCase match {
18 |       case "BIG_ENDIAN"    => ByteOrder.BIG_ENDIAN
19 |       case "LITTLE_ENDIAN" => ByteOrder.LITTLE_ENDIAN
20 |       case _               => throw new IllegalArgumentException(s"Unknown endianness: $string")
21 |     }
22 |   }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/common/src/main/scala/it/agilelab/darwin/manager/util/ConfigurationKeys.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager.util
 2 | 
 3 | object ConfigurationKeys {
 4 | 
 5 |   val CREATE_TABLE = "createTable"
 6 | 
 7 |   val CONNECTOR = "connector"
 8 | 
 9 |   val MANAGER_TYPE: String = "type"
10 |   val ENDIANNESS: String   = "endianness"
11 |   val CACHED_EAGER: String = "cached_eager"
12 |   val CACHED_LAZY: String  = "cached_lazy"
13 |   val LAZY: String         = "lazy"
14 | }
15 | 


--------------------------------------------------------------------------------
/common/src/test/resources/test/MockClassAlone.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]}
2 | 


--------------------------------------------------------------------------------
/common/src/test/resources/test/MockClassParent.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]}
2 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/common/CompatSpec.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import org.scalatest.flatspec.AnyFlatSpec
 4 | import org.scalatest.matchers.should.Matchers
 5 | import compat._
 6 | 
 7 | class CompatSpec extends AnyFlatSpec with Matchers {
 8 | 
 9 |   "RightBiasedEither" should "map correctly on left side" in {
10 |     Left[Int, String](3).rightMap {
11 |       "Hello" + _
12 |     } shouldBe Left[Int, String](3)
13 |   }
14 | 
15 |   it should "map correctly on right side" in {
16 |     Right[Int, String]("Darwin").rightMap {
17 |       "Hello " + _
18 |     } shouldBe Right[Int, String]("Hello Darwin")
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/common/DarwinConcurrentHashMapRunner.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.{ DarwinJava8ConcurrentHashMap, DarwinTrieConcurrentHashMap }
 4 | import org.scalatest.flatspec.AnyFlatSpec
 5 | import org.scalatest.matchers.should.Matchers
 6 | import org.scalatest.{ BeforeAndAfter, BeforeAndAfterAll }
 7 | 
 8 | sealed private[common] class DarwinConcurrentHashMapRunner[K, V](sut: () => DarwinConcurrentHashMap[K, V])
 9 |     extends AnyFlatSpec
10 |     with Matchers
11 |     with BeforeAndAfterAll
12 |     with BeforeAndAfter {
13 | 
14 |   protected class DefaultException extends Exception("Side effect evaluated!")
15 | 
16 |   protected def anEmptySut: DarwinConcurrentHashMap[K, V] = sut()
17 | 
18 | }
19 | 
20 | abstract class DarwinJava8ConcurrentHashMapRunner[K, V]
21 |     extends DarwinConcurrentHashMapRunner[K, V](() => new DarwinJava8ConcurrentHashMap)
22 | abstract class DarwinJava7ConcurrentHashMapRunner[K, V]
23 |     extends DarwinConcurrentHashMapRunner[K, V](() => new DarwinTrieConcurrentHashMap)
24 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/common/DarwinConcurrentHashMapSpec.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import java.util.concurrent.atomic.AtomicInteger
 4 | 
 5 | import org.scalatest.BeforeAndAfter
 6 | import org.scalatest.flatspec.AnyFlatSpec
 7 | import org.scalatest.matchers.should.Matchers
 8 | 
 9 | class DarwinConcurrentHashMapSpec extends AnyFlatSpec with Matchers with BeforeAndAfter {
10 |   private val realJavaVersion = System.getProperty("java.version")
11 | 
12 |   after {
13 |     System.setProperty("java.version", realJavaVersion)
14 |   }
15 | 
16 |   def test(): Unit = {
17 |     val threadNumber  = 1000
18 |     val map           = DarwinConcurrentHashMap.empty[String, Int]
19 |     var counter       = 0
20 |     val threadCounter = new AtomicInteger(0)
21 |     val runnables     = for (_ <- 1 to threadNumber) yield {
22 |       new Runnable {
23 |         override def run(): Unit = {
24 |           threadCounter.incrementAndGet()
25 |           val res = map.getOrElseUpdate(
26 |             "A", {
27 |               counter += 1
28 |               counter
29 |             }
30 |           )
31 |           res should be(1)
32 |         }
33 |       }
34 |     }
35 |     val threads       = for (r <- runnables) yield {
36 |       val t = new Thread(r)
37 |       t
38 |     }
39 |     for (t <- threads) {
40 |       t.start()
41 |     }
42 |     for (t <- threads) {
43 |       t.join()
44 |     }
45 |     threadCounter.get() should be(threadNumber)
46 |   }
47 | 
48 |   it should "not evaluate the value if the key is present JAVA 8" in {
49 |     test()
50 |   }
51 | 
52 |   it should "not evaluate the value if the key is present JAVA 7" in {
53 |     if (JavaVersion.parseJavaVersion(realJavaVersion) >= 8) {
54 |       System.setProperty("java.version", "1.7")
55 |       test()
56 |     } else {
57 |       assert(true)
58 |     }
59 |   }
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/common/DarwinJava8ConcurrentHashMapSpec.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.DarwinJava8ConcurrentHashMap
 4 | 
 5 | class DarwinJava8ConcurrentHashMapSpec extends DarwinJava8ConcurrentHashMapRunner[String, Int] {
 6 | 
 7 |   private def defaultWithSideEffect: Int = throw new DefaultException
 8 |   private val aKey                       = "aKey"
 9 |   private val aValue                     = 1
10 | 
11 |   it should "not evaluate the default param when key found - getOrElse" in {
12 |     val sut = anEmptySut
13 |     sut.getOrElseUpdate(aKey, aValue)
14 | 
15 |     lazy val res = sut.getOrElse(aKey, defaultWithSideEffect)
16 | 
17 |     sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]]
18 |     noException should be thrownBy res
19 |     res shouldBe aValue
20 |   }
21 | 
22 |   it should "evaluate the default param when key NOT found - getOrElse" in {
23 |     val sut = anEmptySut
24 | 
25 |     sut.getOrElseUpdate(aKey, aValue)
26 | 
27 |     lazy val res = sut.getOrElse("anotherKey", defaultWithSideEffect)
28 | 
29 |     sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]]
30 |     an[DefaultException] should be thrownBy res
31 |   }
32 | 
33 |   it should "not evaluate the default param when key is null - getOrElse" in {
34 |     val sut = anEmptySut
35 | 
36 |     lazy val res = sut.getOrElse(null, defaultWithSideEffect)
37 | 
38 |     sut shouldBe a[DarwinJava8ConcurrentHashMap[_, _]]
39 |     an[NullPointerException] should be thrownBy res
40 |   }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/common/DarwinTrieConcurrentHashMapSpec.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.common
 2 | 
 3 | import it.agilelab.darwin.common.DarwinConcurrentHashMap.DarwinTrieConcurrentHashMap
 4 | 
 5 | class DarwinTrieConcurrentHashMapSpec extends DarwinJava7ConcurrentHashMapRunner[String, Int] {
 6 | 
 7 |   private def defaultWithSideEffect: Int = throw new DefaultException
 8 | 
 9 |   private val aKey   = "aKey"
10 |   private val aValue = 1
11 | 
12 |   it should "not evaluate the default param when key found - getOrElse" in {
13 |     val sut = anEmptySut
14 | 
15 |     sut.getOrElseUpdate(aKey, aValue)
16 | 
17 |     lazy val res = sut.getOrElse(aKey, defaultWithSideEffect)
18 | 
19 |     sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]]
20 |     noException should be thrownBy res
21 |     res shouldBe aValue
22 |   }
23 | 
24 |   it should "evaluate the default param when key NOT found - getOrElse" in {
25 |     val sut = anEmptySut
26 |     sut.getOrElseUpdate(aKey, aValue)
27 | 
28 |     lazy val res = sut.getOrElse("anotherKey", defaultWithSideEffect)
29 | 
30 |     sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]]
31 |     an[DefaultException] should be thrownBy res
32 |   }
33 | 
34 |   it should "evaluate the default param when key is null - getOrElse" in {
35 |     val sut = anEmptySut
36 | 
37 |     lazy val res = sut.getOrElse(null, defaultWithSideEffect)
38 | 
39 |     sut shouldBe a[DarwinTrieConcurrentHashMap[_, _]]
40 |     an[DefaultException] should be thrownBy res
41 |   }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/manager/util/BigEndianAvroSingleObjectEncodingUtilsSpec.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.manager.util
2 | 
3 | import java.nio.ByteOrder
4 | 
5 | class BigEndianAvroSingleObjectEncodingUtilsSpec extends AvroSingleObjectEncodingUtilsSpec(ByteOrder.BIG_ENDIAN)
6 | 


--------------------------------------------------------------------------------
/common/src/test/scala/it/agilelab/darwin/manager/util/LittleEndianAvroSingleObjectEncodingUtilsSpec.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.manager.util
2 | 
3 | import java.nio.ByteOrder
4 | 
5 | class LittleEndianAvroSingleObjectEncodingUtilsSpec extends AvroSingleObjectEncodingUtilsSpec(ByteOrder.LITTLE_ENDIAN)
6 | 


--------------------------------------------------------------------------------
/confluent/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.confluent.ConfluentConnectorCreator
2 | 


--------------------------------------------------------------------------------
/confluent/src/main/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.confluent
 2 | 
 3 | import com.typesafe.config.Config
 4 | import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient
 5 | import it.agilelab.darwin.common.compat._
 6 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging }
 7 | 
 8 | class ConfluentConnectorCreator extends ConnectorCreator with Logging {
 9 | 
10 |   override def create(config: Config): Connector = {
11 |     log.debug("creating confluent connector")
12 | 
13 |     val confluentOptions = ConfluentConnectorOptions.fromConfig(config)
14 |     log.info("confluent options are {}", confluentOptions)
15 | 
16 |     val client = new CachedSchemaRegistryClient(
17 |       confluentOptions.endpoints.toJavaList(),
18 |       confluentOptions.maxCachedSchemas,
19 |       confluentOptions.config
20 |     )
21 | 
22 |     val rest = new ConfluentConnector(confluentOptions, client)
23 |     log.debug("created confluent connector")
24 |     rest
25 |   }
26 | 
27 |   /**
28 |     * @return the name of the Connector
29 |     */
30 |   override def name(): String = "confluent"
31 | }
32 | 


--------------------------------------------------------------------------------
/confluent/src/main/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorOptions.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.confluent
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.compat._
 5 | 
 6 | case class ConfluentConnectorOptions(
 7 |   endpoints: List[String],
 8 |   config: java.util.Map[String, AnyRef],
 9 |   maxCachedSchemas: Int
10 | )
11 | 
12 | object ConfluentConnectorOptions {
13 | 
14 |   val ENDPOINTS_CONFIG_KEY   = "endpoints"
15 |   val MAX_CACHED_SCHEMA_KEYS = "max-cached-schemas"
16 | 
17 |   def fromConfig(config: Config): ConfluentConnectorOptions = {
18 | 
19 |     if (!config.hasPath(ENDPOINTS_CONFIG_KEY)) {
20 |       throw new IllegalArgumentException(
21 |         s"Missing [${ENDPOINTS_CONFIG_KEY}] configuration key for ${classOf[ConfluentConnector].getName}"
22 |       )
23 |     }
24 | 
25 |     if (!config.hasPath(MAX_CACHED_SCHEMA_KEYS)) {
26 |       throw new IllegalArgumentException(
27 |         s"Missing [${MAX_CACHED_SCHEMA_KEYS}] configuration key for ${classOf[ConfluentConnector].getName}"
28 |       )
29 |     }
30 | 
31 |     val endpoints        = config.getStringList(ENDPOINTS_CONFIG_KEY).toScala().toList
32 |     val maxCachedSchemas = config.getInt(MAX_CACHED_SCHEMA_KEYS)
33 |     val other            = config.root()
34 | 
35 |     ConfluentConnectorOptions(endpoints, HoconToMap.convert(other), maxCachedSchemas)
36 | 
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/confluent/src/main/scala/it/agilelab/darwin/connector/confluent/HoconToMap.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.confluent
 2 | 
 3 | import com.typesafe.config.{ ConfigObject, ConfigValue }
 4 | import it.agilelab.darwin.common.compat.{ JMapConverter, SetConverter }
 5 | 
 6 | import scala.collection.mutable
 7 | 
 8 | private[confluent] object HoconToMap {
 9 | 
10 |   private def walk(root: ConfigValue): Map[String, AnyRef] = {
11 |     val result = mutable.HashMap.empty[String, AnyRef]
12 | 
13 |     def doWalk(path: String, r: ConfigValue): Unit = {
14 | 
15 |       r match {
16 |         case o: ConfigObject =>
17 |           o.keySet().toScala().foreach { key =>
18 |             val nextPath = if (path.isEmpty) key else path + "." + key
19 |             doWalk(nextPath, o.get(key))
20 |           }
21 |         case _               =>
22 |           result += path -> r.unwrapped()
23 |       }
24 |     }
25 | 
26 |     doWalk("", root)
27 | 
28 |     result.toMap
29 |   }
30 | 
31 |   def convert(configValue: ConfigValue): java.util.Map[String, AnyRef] = {
32 |     walk(configValue).toJava()
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/confluent/src/main/scala/it/agilelab/darwin/connector/confluent/Main.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.confluent
 2 | 
 3 | import java.util.Collections
 4 | 
 5 | import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient
 6 | import it.agilelab.darwin.common.compat._
 7 | import org.apache.avro.{ Schema, SchemaBuilder }
 8 | 
 9 | object Main {
10 |   def main(args: Array[String]): Unit = {
11 |     // to run this main https://github.com/confluentinc/cp-all-in-one/blob/6.0.0-post/cp-all-in-one/docker-compose.yml
12 |     // TODO make this main meaningful
13 |     val maxSchemas = 1000
14 |     val options    = ConfluentConnectorOptions(List("http://localhost:8081"), Collections.emptyMap(), maxSchemas)
15 | 
16 |     val client    = new CachedSchemaRegistryClient(
17 |       options.endpoints.toJavaList(),
18 |       options.maxCachedSchemas,
19 |       options.config
20 |     )
21 |     val connector = new ConfluentConnector(options, client)
22 | 
23 |     connector.fullLoad().foreach(println)
24 | 
25 |     val expected: Schema = SchemaBuilder
26 |       .record("myrecord")
27 |       .namespace("it.agilelab.record")
28 |       .fields()
29 |       .requiredString("myfield")
30 |       .optionalString("ciccio")
31 |       .endRecord()
32 | 
33 |     expected.addProp("x-darwin-subject", "prova2-value": AnyRef)
34 | 
35 |     val id = connector.fingerprint(expected)
36 | 
37 |     connector.insert(Seq((id, expected)))
38 | 
39 |     connector.fullLoad().foreach(println)
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/confluent/src/test/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorCreatorSuite.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.confluent
 2 | 
 3 | import com.typesafe.config.{ Config, ConfigFactory }
 4 | import org.scalatest.flatspec.AnyFlatSpec
 5 | import org.scalatest.matchers.should.Matchers
 6 | 
 7 | class ConfluentConnectorCreatorSuite extends AnyFlatSpec with Matchers {
 8 | 
 9 |   "connector" should "create an instance" in {
10 | 
11 |     val conf: Config = ConfigFactory.parseString("""
12 |                                                    | endpoints: ["endpoint-one", "endpoint-two"]
13 |                                                    | max-cached-schemas: 1000
14 |                                                    |
15 |                                                    | kafka.schemaregistry.other: 1
16 |                                                    | kafka.schemaregistry: {
17 |                                                    |   other2: "stringa"
18 |                                                    | }
19 |                                                    |""".stripMargin)
20 | 
21 |     val connector = new ConfluentConnectorCreator()
22 | 
23 |     val options = ConfluentConnectorOptions.fromConfig(conf)
24 | 
25 |     val result = connector.create(conf)
26 | 
27 |     assert(result != null)
28 | 
29 |     val endpoints = options.config.get("endpoints").asInstanceOf[java.util.List[String]]
30 | 
31 |     endpoints.get(0) should be("endpoint-one")
32 |     endpoints.get(1) should be("endpoint-two")
33 | 
34 |     options.config.get("kafka.schemaregistry.other").asInstanceOf[Int] should be(1)
35 |     options.config.get("kafka.schemaregistry.other2").asInstanceOf[String] should be("stringa")
36 | 
37 |     val maxCached = 1000
38 |     options.config.get("max-cached-schemas").asInstanceOf[Int] should be(maxCached)
39 | 
40 |   }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/confluent/src/test/scala/it/agilelab/darwin/connector/confluent/ConfluentConnectorSuite.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.confluent
  2 | 
  3 | import java.util.Collections
  4 | 
  5 | import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient
  6 | import org.apache.avro.{ Schema, SchemaBuilder }
  7 | import org.scalatest.flatspec.AnyFlatSpec
  8 | import org.scalatest.matchers.should.Matchers
  9 | import org.scalatest.{ BeforeAndAfterEach, OptionValues }
 10 | 
 11 | class ConfluentConnectorSuite extends AnyFlatSpec with BeforeAndAfterEach with OptionValues with Matchers {
 12 | 
 13 |   "confluent connector" should "insert schemas and generate ids" in {
 14 | 
 15 |     val mockRegistryClient = new MockSchemaRegistryClient()
 16 |     val maxCachedSchemas   = 1000
 17 |     val connector          = new ConfluentConnector(
 18 |       options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas),
 19 |       client = mockRegistryClient
 20 |     )
 21 | 
 22 |     val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING))
 23 |     expected.addProp("x-darwin-subject", "prova": AnyRef)
 24 | 
 25 |     val id = connector.fingerprint(expected)
 26 | 
 27 |     connector.insert(Seq((id, expected)))
 28 | 
 29 |     connector.findSchema(id).value shouldBe expected
 30 | 
 31 |   }
 32 | 
 33 |   "confluent connector" should "be able to preload schemas" in {
 34 | 
 35 |     val expected  = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING))
 36 |     expected.addProp("x-darwin-subject", "prova": AnyRef)
 37 |     val expected2 = SchemaBuilder.array().items(Schema.create(Schema.Type.INT))
 38 |     expected2.addProp("x-darwin-subject", "prova2": AnyRef)
 39 | 
 40 |     val mockRegistryClient = new MockSchemaRegistryClient()
 41 | 
 42 |     mockRegistryClient.register("prova", expected)
 43 |     mockRegistryClient.register("prova2", expected2)
 44 | 
 45 |     val maxCachedSchemas = 1000
 46 | 
 47 |     val connector = new ConfluentConnector(
 48 |       options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas),
 49 |       client = mockRegistryClient
 50 |     )
 51 | 
 52 |     val fullLoaded = connector.fullLoad()
 53 | 
 54 |     fullLoaded should contain theSameElementsAs Seq((1, expected), (2, expected2))
 55 | 
 56 |   }
 57 | 
 58 |   "confluent connector" should "be able to fetch latest schema for subject" in {
 59 | 
 60 |     val expected = SchemaBuilder
 61 |       .record("record")
 62 |       .fields()
 63 |       .requiredString("stringField")
 64 |       .endRecord()
 65 | 
 66 |     val expected2 = SchemaBuilder
 67 |       .record("record")
 68 |       .fields()
 69 |       .requiredString("stringField")
 70 |       .nullableString("stringField2", "default-for-nullable")
 71 |       .endRecord()
 72 | 
 73 |     expected.addProp("x-darwin-subject", "prova": AnyRef)
 74 |     expected2.addProp("x-darwin-subject", "prova": AnyRef)
 75 | 
 76 |     val mockRegistryClient = new MockSchemaRegistryClient()
 77 | 
 78 |     mockRegistryClient.register("prova", expected)
 79 |     mockRegistryClient.register("prova", expected2)
 80 | 
 81 |     val maxCachedSchemas = 1000
 82 | 
 83 |     val connector = new ConfluentConnector(
 84 |       options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas),
 85 |       client = mockRegistryClient
 86 |     )
 87 | 
 88 |     val fullLoaded = connector.fullLoad()
 89 | 
 90 |     fullLoaded should contain theSameElementsAs Seq((1, expected), (2, expected2))
 91 | 
 92 |     val latestResult = connector.findIdForSubjectLatestVersion("prova")
 93 | 
 94 |     val allVersions = connector.findVersionsForSubject("prova")
 95 | 
 96 |     val parser = (schema: String) => new Schema.Parser().parse(schema)
 97 | 
 98 |     val versionsByVersionId = allVersions
 99 |       .map(x => connector.findIdForSubjectVersion("prova", x))
100 |       .map(x => x.getId -> parser(x.getSchema))
101 | 
102 |     latestResult.getId should be(2)
103 | 
104 |     versionsByVersionId should contain theSameElementsAs Seq((1, expected), (2, expected2))
105 | 
106 |   }
107 | 
108 |   "confluent connector" should "detect a missing x-darwin-subject" in {
109 |     val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING))
110 | 
111 |     val mockRegistryClient = new MockSchemaRegistryClient()
112 | 
113 |     mockRegistryClient.register("prova", expected)
114 | 
115 |     val maxCachedSchemas = 1000
116 | 
117 |     val connector = new ConfluentConnector(
118 |       options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas),
119 |       client = mockRegistryClient
120 |     )
121 | 
122 |     val exception = intercept[IllegalArgumentException] {
123 |       connector.insert(Seq(expected).map(schema => connector.fingerprint(schema) -> schema))
124 |     }
125 | 
126 |     exception.getMessage should be("Schema does not contain the [x-darwin-subject] extension")
127 | 
128 |   }
129 | 
130 |   it should "return None if fetching latest schema of non-existing subject" in {
131 | 
132 |     val mockRegistryClient = new MockSchemaRegistryClient()
133 | 
134 |     val maxCachedSchemas = 1000
135 | 
136 |     val connector = new ConfluentConnector(
137 |       options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), maxCachedSchemas),
138 |       client = mockRegistryClient
139 |     )
140 | 
141 |     connector.retrieveLatestSchema("pippo") shouldBe None
142 |   }
143 | }
144 | 


--------------------------------------------------------------------------------
/core/src/main/java/it/agilelab/darwin/annotations/AvroSerde.java:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.annotations;
2 | 
3 | public @interface AvroSerde {
4 | 
5 | }
6 | 


--------------------------------------------------------------------------------
/core/src/main/java/it/agilelab/darwin/manager/IdSchemaPair.java:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager;
 2 | 
 3 | import org.apache.avro.Schema;
 4 | 
 5 | public class IdSchemaPair {
 6 |     private final long id;
 7 |     private final Schema schema;
 8 | 
 9 |     private IdSchemaPair(long id, Schema schema) {
10 |         this.id = id;
11 |         this.schema = schema;
12 |     }
13 | 
14 |     public long getId() {
15 |         return id;
16 |     }
17 | 
18 |     public Schema getSchema() {
19 |         return schema;
20 |     }
21 | 
22 |     public static IdSchemaPair create(long id, Schema schema) {
23 |         return new IdSchemaPair(id, schema);
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/core/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | createTable: false
2 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaCache.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import org.apache.avro.Schema
 4 | 
 5 | /**
 6 |   * Generic definition of the cache used by the manager to store the data loaded from the external storage.
 7 |   * @param schemas a sequence of (ID, schema) used to initialize the cache values
 8 |   */
 9 | abstract class AvroSchemaCache(schemas: Seq[(Long, Schema)]) {
10 | 
11 |   /**
12 |     * Retrieves a registered schema for the input ID.
13 |     *
14 |     * @param id the Long ID of the schema
15 |     * @return the Schema associated to the input ID
16 |     */
17 |   def getSchema(id: Long): Option[Schema]
18 | 
19 |   /**
20 |     * Tests if the input schema is contained inside the cache.
21 |     *
22 |     * @param schema a Schema that the cache could contain
23 |     * @return a pair containing: a boolean that is true if the schema is contained in the cache and the ID of the
24 |     *         schema in any case
25 |     */
26 |   def contains(schema: Schema): (Boolean, Long)
27 | 
28 |   /**
29 |     * Creates a new instance of [[AvroSchemaCache]] with the original values plus the input ones.
30 |     *
31 |     * @param values new pair (ID, schema) to insert inside the cache
32 |     * @return a new instance of [[AvroSchemaCache]] containing the new values in addition to the original ones.
33 |     */
34 |   def insert(values: Seq[(Long, Schema)]): AvroSchemaCache
35 | 
36 |   /**
37 |     * Retrieves all registered schemas
38 |     *
39 |     * @return A Sequence of (ID, Schema)
40 |     */
41 |   def getAll: Seq[(Long, Schema)]
42 | }
43 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaCacheFingerprint.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import it.agilelab.darwin.common.Logging
 4 | import org.apache.avro.Schema
 5 | 
 6 | /**
 7 |   * Implementation of [[AvroSchemaCache]] that uses Fingerprint64 as IDs.
 8 |   *
 9 |   * @param schemas a sequence of (ID, schema) used to initialize the cache values
10 |   */
11 | case class AvroSchemaCacheFingerprint(schemas: Seq[(Long, Schema)], fingerPrinter: Schema => Long)
12 |     extends AvroSchemaCache(schemas)
13 |     with Logging {
14 |   log.debug(s"initialization of the cache with ${schemas.size} schemas")
15 |   private val _table: Map[Long, Schema] = schemas.toMap
16 |   log.debug("cache initialized")
17 | 
18 |   override def getSchema(id: Long): Option[Schema] = _table.get(id)
19 | 
20 |   override def contains(schema: Schema): (Boolean, Long) = {
21 |     val id = fingerPrinter(schema)
22 |     _table.contains(id) -> id
23 |   }
24 | 
25 |   override def insert(values: Seq[(Long, Schema)]): AvroSchemaCache =
26 |     AvroSchemaCacheFingerprint(_table.toSeq ++ values, fingerPrinter)
27 | 
28 |   /**
29 |     * Retrieves all registered schemas
30 |     *
31 |     * @return A Sequence of (ID, Schema)
32 |     */
33 |   override def getAll: Seq[(Long, Schema)] = _table.toSeq
34 | }
35 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/AvroSchemaManagerFactory.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.{ ConnectorFactory, DarwinConcurrentHashMap, Logging }
 5 | import it.agilelab.darwin.manager.exception.ConnectorNotFoundException
 6 | import it.agilelab.darwin.manager.util.{ ConfigUtil, ConfigurationKeys }
 7 | 
 8 | /**
 9 |   * Factory used to obtain the desired implementation of AvroSchemaManager.
10 |   * First of all the initialize method should be called passing the configuration (it will return an instance of
11 |   * AvroSchemaManager. Then, the same instance can be retrieved using the getInstance method without passing the
12 |   * configuration anymore.
13 |   */
14 | object AvroSchemaManagerFactory extends Logging {
15 | 
16 |   private val _instancePool: DarwinConcurrentHashMap[String, AvroSchemaManager] =
17 |     DarwinConcurrentHashMap.empty[String, AvroSchemaManager]
18 | 
19 |   private def configKey(c: Config): String = {
20 |     ConfigUtil.printConfig(c)
21 |   }
22 | 
23 |   /**
24 |     * Returns an instance of AvroSchemaManager that can be used to register and retrieve schemas.
25 |     *
26 |     * @param config the Config that is passed to the connector
27 |     * @return an instance of AvroSchemaManager
28 |     */
29 |   @throws[ConnectorNotFoundException]
30 |   def initialize(config: Config): AvroSchemaManager = {
31 |     val key = configKey(config)
32 |     lazy val mappingFunc = {
33 |       log.debug("creating instance of AvroSchemaManager")
34 |       val endianness = ConfigUtil.stringToEndianness(config.getString(ConfigurationKeys.ENDIANNESS))
35 |       val result     = config.getString(ConfigurationKeys.MANAGER_TYPE) match {
36 |         case ConfigurationKeys.CACHED_EAGER =>
37 |           new CachedEagerAvroSchemaManager(ConnectorFactory.connector(config), endianness)
38 |         case ConfigurationKeys.CACHED_LAZY  =>
39 |           new CachedLazyAvroSchemaManager(ConnectorFactory.connector(config), endianness)
40 |         case ConfigurationKeys.LAZY         =>
41 |           new LazyAvroSchemaManager(ConnectorFactory.connector(config), endianness)
42 |         case _                              =>
43 |           throw new IllegalArgumentException(
44 |             s"No valid manager can be created for" +
45 |               s" ${ConfigurationKeys.MANAGER_TYPE} key ${config.getString(ConfigurationKeys.MANAGER_TYPE)}"
46 |           )
47 |       }
48 |       log.debug("AvroSchemaManager instance created")
49 |       result
50 |     }
51 |     _instancePool.getOrElseUpdate(key, mappingFunc)
52 |   }
53 | 
54 |   /**
55 |     * Returns the initialized instance of AvroSchemaManager that can be used to register and retrieve schemas.
56 |     * The instance must be created once using the initialize method passing a configuration before calling this method.
57 |     *
58 |     * @return the initialized instance of AvroSchemaManager
59 |     */
60 |   def getInstance(config: Config): AvroSchemaManager = {
61 |     _instancePool.getOrElse(
62 |       configKey(config),
63 |       throw new IllegalArgumentException(
64 |         s"No valid manager can be found for" +
65 |           s" ${ConfigurationKeys.MANAGER_TYPE} key ${config.getString(ConfigurationKeys.MANAGER_TYPE)}"
66 |       )
67 |     )
68 |   }
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/CachedAvroSchemaManager.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import java.nio.ByteOrder
 4 | import java.util.concurrent.atomic.AtomicReference
 5 | 
 6 | import it.agilelab.darwin.common.Connector
 7 | import org.apache.avro.Schema
 8 | 
 9 | /**
10 |   * Implementation of AvroSchemaManager that defines a cache where the storage data is loaded, in order to reduce the
11 |   * number of accesses to the storage.
12 |   */
13 | abstract class CachedAvroSchemaManager(connector: Connector, endianness: ByteOrder)
14 |     extends AvroSchemaManager(connector, endianness) {
15 |   protected val _cache: AtomicReference[Option[AvroSchemaCache]] = new AtomicReference[Option[AvroSchemaCache]](None)
16 | 
17 |   def cache: AvroSchemaCache = _cache.get
18 |     .getOrElse(
19 |       throw new IllegalAccessException(
20 |         "Cache not loaded: accesses are allowed only if the cache has been " +
21 |           "loaded"
22 |       )
23 |     )
24 | 
25 |   initialize()
26 | 
27 |   private def initialize(): Unit = {
28 |     log.debug("cache initialization...")
29 |     _cache.compareAndSet(None, Some(AvroSchemaCacheFingerprint(connector.fullLoad(), connector.fingerprint)))
30 |     log.debug("cache initialized")
31 |   }
32 | 
33 |   /**
34 |     * Reloads all the schemas from the previously configured storage.
35 |     * Throws an exception if the cache wasn't already loaded (the getInstance method must always be used to
36 |     * initialize the cache using the required configuration).
37 |     */
38 |   override def reload(): AvroSchemaManager = {
39 |     log.debug("reloading cache...")
40 |     _cache.set(Some(AvroSchemaCacheFingerprint(connector.fullLoad(), connector.fingerprint)))
41 |     log.debug("cache reloaded")
42 |     this
43 |   }
44 | 
45 |   override def registerAll(schemas: Seq[Schema]): Seq[(Long, Schema)] = {
46 |     log.debug(s"registering ${schemas.size} schemas...")
47 |     val (alreadyInCache, notInCache) = schemas.map(s => (cache.contains(s), s)).partition(_._1._1)
48 |     val inserted                     = notInCache.map(e => e._1._2 -> e._2)
49 |     connector.insert(inserted)
50 |     val allSchemas                   = alreadyInCache.map(e => e._1._2 -> e._2) ++ inserted
51 |     _cache.set(Some(cache.insert(inserted))) //TODO review
52 |     log.debug(s"${allSchemas.size} schemas registered")
53 |     allSchemas
54 |   }
55 | 
56 |   /**
57 |     * Retrieves all registered schemas
58 |     *
59 |     * @return A Sequence of (ID, Schema)
60 |     */
61 |   override def getAll: Seq[(Long, Schema)] = cache.getAll
62 | }
63 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/CachedEagerAvroSchemaManager.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import java.nio.ByteOrder
 4 | 
 5 | import it.agilelab.darwin.common.Connector
 6 | import org.apache.avro.Schema
 7 | 
 8 | /**
 9 |   * Implementation of CachedAvroSchemaManager that loads all the schemas into the cache at startup and doesn't
10 |   * perform any other accesses to the storage: each retrieve is performed onto the cache.
11 |   */
12 | class CachedEagerAvroSchemaManager(connector: Connector, endianness: ByteOrder)
13 |     extends CachedAvroSchemaManager(connector, endianness) {
14 |   override def getSchema(id: Long): Option[Schema] = cache.getSchema(id)
15 | }
16 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/CachedLazyAvroSchemaManager.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import java.nio.ByteOrder
 4 | 
 5 | import it.agilelab.darwin.common.Connector
 6 | import org.apache.avro.Schema
 7 | 
 8 | /**
 9 |   * Implementation of CachedAvroSchemaManager that loads all the schemas into the cache at startup and perform
10 |   * all the retrieves onto the cache; an access to the storage is performed only if there is a cache miss.
11 |   */
12 | class CachedLazyAvroSchemaManager(connector: Connector, endianness: ByteOrder)
13 |     extends CachedAvroSchemaManager(connector, endianness) {
14 | 
15 |   override def getSchema(id: Long): Option[Schema] = {
16 |     cache.getSchema(id).orElse {
17 |       val schema: Option[Schema] = connector.findSchema(id)
18 |       schema.foreach(s => _cache.set(Some(cache.insert(Seq(id -> s)))))
19 |       schema
20 |     }
21 |   }
22 | 
23 |   override def getAll: Seq[(Long, Schema)] = {
24 |     _cache.set(Some(cache.insert(connector.fullLoad())))
25 |     cache.getAll
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/core/src/main/scala/it/agilelab/darwin/manager/LazyAvroSchemaManager.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.manager
 2 | 
 3 | import java.nio.ByteOrder
 4 | 
 5 | import it.agilelab.darwin.common.Connector
 6 | import org.apache.avro.Schema
 7 | 
 8 | /**
 9 |   * Implementation of AvroSchemaManager that performs all the operations directly on the storage (retrievals and
10 |   * insertions).
11 |   */
12 | class LazyAvroSchemaManager(connector: Connector, endianness: ByteOrder)
13 |     extends AvroSchemaManager(connector, endianness) {
14 | 
15 |   override def getSchema(id: Long): Option[Schema] = connector.findSchema(id)
16 | 
17 |   override def registerAll(schemas: Seq[Schema]): Seq[(Long, Schema)] = {
18 |     val schemasWithIds = schemas.map(s => getId(s) -> s)
19 |     connector.insert(schemasWithIds)
20 |     schemasWithIds
21 |   }
22 | 
23 |   override def reload(): AvroSchemaManager = this
24 | 
25 |   override def getAll: Seq[(Long, Schema)] = connector.fullLoad()
26 | }
27 | 


--------------------------------------------------------------------------------
/docs/img/darwin_eager_cached_schema.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_eager_cached_schema.jpg


--------------------------------------------------------------------------------
/docs/img/darwin_interaction.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_interaction.jpg


--------------------------------------------------------------------------------
/docs/img/darwin_lazy_cached_schema.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_lazy_cached_schema.jpg


--------------------------------------------------------------------------------
/docs/img/darwin_lazy_schema.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/darwin_lazy_schema.jpg


--------------------------------------------------------------------------------
/docs/img/logo/darwin-icon.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/logo/darwin-icon.ai


--------------------------------------------------------------------------------
/docs/img/logo/darwin-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agile-lab-dev/darwin/3063185e49489703378a9ec360e9fe999eee7f69/docs/img/logo/darwin-icon.png


--------------------------------------------------------------------------------
/hbase/src/main/scala/it/agilelab/darwin/connector/hbase/ConfigurationKeys.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.hbase
 2 | 
 3 | object ConfigurationKeys {
 4 |   val TABLE: String       = "table"
 5 |   val NAMESPACE: String   = "namespace"
 6 |   val HBASE_SITE: String  = "hbaseSite"
 7 |   val CORE_SITE: String   = "coreSite"
 8 |   val IS_SECURE: String   = "isSecure"
 9 |   val PRINCIPAL: String   = "principal"
10 |   val KEYTAB_PATH: String = "keytabPath"
11 | }
12 | 


--------------------------------------------------------------------------------
/hbase/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseConnector.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.hbase
  2 | 
  3 | import com.typesafe.config.Config
  4 | import it.agilelab.darwin.common.compat._
  5 | import it.agilelab.darwin.common.{ using, Connector, Logging }
  6 | import org.apache.avro.Schema
  7 | import org.apache.avro.Schema.Parser
  8 | import org.apache.commons.io.IOUtils
  9 | import org.apache.hadoop.conf.Configuration
 10 | import org.apache.hadoop.fs.Path
 11 | import org.apache.hadoop.hbase._
 12 | import org.apache.hadoop.hbase.client._
 13 | import org.apache.hadoop.hbase.security.User
 14 | import org.apache.hadoop.hbase.util.Bytes
 15 | import org.apache.hadoop.security.UserGroupInformation
 16 | 
 17 | object HBaseConnector extends Logging {
 18 | 
 19 |   private var _instance: HBaseConnector = _
 20 | 
 21 |   def instance(hbaseConfig: Config): HBaseConnector = {
 22 |     synchronized {
 23 |       if (_instance == null) {
 24 |         log.debug("Initialization of HBase connector")
 25 |         _instance = HBaseConnector(hbaseConfig)
 26 |         log.debug("HBase connector initialized")
 27 |       }
 28 |     }
 29 |     _instance
 30 |   }
 31 | }
 32 | 
 33 | case class HBaseConnector(config: Config) extends Connector with Logging {
 34 | 
 35 |   val DEFAULT_NAMESPACE: String = "AVRO"
 36 |   val DEFAULT_TABLENAME: String = "SCHEMA_REPOSITORY"
 37 | 
 38 |   val TABLE_NAME_STRING: String = if (config.hasPath(ConfigurationKeys.TABLE)) {
 39 |     config.getString(ConfigurationKeys.TABLE)
 40 |   } else {
 41 |     DEFAULT_TABLENAME
 42 |   }
 43 | 
 44 |   val NAMESPACE_STRING: String = if (config.hasPath(ConfigurationKeys.NAMESPACE)) {
 45 |     config.getString(ConfigurationKeys.NAMESPACE)
 46 |   } else {
 47 |     DEFAULT_NAMESPACE
 48 |   }
 49 | 
 50 |   lazy val TABLE_NAME: TableName = TableName.valueOf(Bytes.toBytes(NAMESPACE_STRING), Bytes.toBytes(TABLE_NAME_STRING))
 51 | 
 52 |   val CF_STRING                        = "0"
 53 |   val CF: Array[Byte]                  = Bytes.toBytes(CF_STRING)
 54 |   val QUALIFIER_SCHEMA: Array[Byte]    = Bytes.toBytes("schema")
 55 |   val QUALIFIER_NAME: Array[Byte]      = Bytes.toBytes("name")
 56 |   val QUALIFIER_NAMESPACE: Array[Byte] = Bytes.toBytes("namespace")
 57 | 
 58 |   log.debug("Creating default HBaseConfiguration")
 59 |   val configuration: Configuration = HBaseConfiguration.create()
 60 |   log.debug("Created default HBaseConfiguration")
 61 | 
 62 |   if (config.hasPath(ConfigurationKeys.CORE_SITE) && config.hasPath(ConfigurationKeys.HBASE_SITE)) {
 63 |     log.debug(addResourceMessage(config.getString(ConfigurationKeys.CORE_SITE)))
 64 |     configuration.addResource(new Path(config.getString(ConfigurationKeys.CORE_SITE)))
 65 |     log.debug(addResourceMessage(config.getString(ConfigurationKeys.HBASE_SITE)))
 66 |     configuration.addResource(new Path(config.getString(ConfigurationKeys.HBASE_SITE)))
 67 |   }
 68 | 
 69 |   private def addResourceMessage(s: String) = {
 70 |     val ADDING_RESOURCE = "Adding resource: "
 71 |     ADDING_RESOURCE + s
 72 |   }
 73 | 
 74 |   val connection: Connection = if (config.getBoolean(ConfigurationKeys.IS_SECURE)) {
 75 |     log.debug(s"Calling UserGroupInformation.setConfiguration()")
 76 |     UserGroupInformation.setConfiguration(configuration)
 77 | 
 78 |     log.debug(
 79 |       s"Calling UserGroupInformation.loginUserFromKeytab(${config.getString(ConfigurationKeys.PRINCIPAL)}, " +
 80 |         s"${config.getString(ConfigurationKeys.KEYTAB_PATH)})"
 81 |     )
 82 |     val ugi  = UserGroupInformation.loginUserFromKeytabAndReturnUGI(
 83 |       config.getString(ConfigurationKeys.PRINCIPAL),
 84 |       config.getString(ConfigurationKeys.KEYTAB_PATH)
 85 |     )
 86 |     UserGroupInformation.setLoginUser(ugi)
 87 |     val user = User.create(ugi)
 88 |     log.trace(
 89 |       s"initialization of HBase connection with configuration:\n " +
 90 |         s"${configuration.iterator().toScala().map { entry => entry.getKey -> entry.getValue }.mkString("\n")}"
 91 |     )
 92 |     ConnectionFactory.createConnection(configuration, user)
 93 |   } else {
 94 |     log.trace(
 95 |       s"initialization of HBase connection with configuration:\n " +
 96 |         s"${configuration.iterator().toScala().map { entry => entry.getKey -> entry.getValue }.mkString("\n")}"
 97 |     )
 98 |     ConnectionFactory.createConnection(configuration)
 99 |   }
100 | 
101 |   log.debug("HBase connection initialized")
102 |   sys.addShutdownHook {
103 |     //  log.info(s"closing HBase connection pool")
104 |     IOUtils.closeQuietly(connection)
105 |   }
106 | 
107 |   //TODO this must be a def (a new Parser is created each time) because if the same Parser is used, it fails if you
108 |   //TODO parse a class A and after it a class B that has a field of type A => ERROR: Can't redefine type A.
109 |   //TODO Sadly the Schema.parse() method that would solve this problem is now deprecated
110 |   private def parser: Parser = new Parser()
111 | 
112 |   override def fullLoad(): Seq[(Long, Schema)] = {
113 |     log.debug(s"loading all schemas from table $NAMESPACE_STRING:$TABLE_NAME_STRING")
114 |     val scanner: Iterable[Result] = connection.getTable(TABLE_NAME).getScanner(CF, QUALIFIER_SCHEMA).toScala()
115 |     val schemas                   = scanner.map { result =>
116 |       val key   = Bytes.toLong(result.getRow)
117 |       val value = Bytes.toString(result.getValue(CF, QUALIFIER_SCHEMA))
118 |       key -> parser.parse(value)
119 |     }.toSeq
120 |     log.debug(s"${schemas.size} loaded from HBase")
121 |     schemas
122 |   }
123 | 
124 |   override def insert(schemas: Seq[(Long, Schema)]): Unit = {
125 |     if (schemas.nonEmpty) {
126 | 
127 |       log.debug(s"inserting ${schemas.size} schemas in HBase table $NAMESPACE_STRING:$TABLE_NAME_STRING")
128 |       using(connection.getBufferedMutator(TABLE_NAME)) { mutator =>
129 |         schemas.map { case (id, schema) =>
130 |           val put = new Put(Bytes.toBytes(id))
131 |           put.addColumn(CF, QUALIFIER_SCHEMA, Bytes.toBytes(schema.toString))
132 |           put.addColumn(CF, QUALIFIER_NAME, Bytes.toBytes(schema.getName))
133 |           put.addColumn(CF, QUALIFIER_NAMESPACE, Bytes.toBytes(schema.getNamespace))
134 |           put
135 |         }.foreach(mutator.mutate)
136 |         mutator.flush()
137 |         log.debug(s"insertion of schemas into $NAMESPACE_STRING:$TABLE_NAME_STRING successful")
138 |       }
139 |     }
140 | 
141 |   }
142 | 
143 |   override def createTable(): Unit = {
144 |     using(connection.getAdmin) { admin =>
145 |       if (!admin.listNamespaceDescriptors().exists(_.getName == NAMESPACE_STRING)) {
146 |         log.info(s"Namespace $NAMESPACE_STRING does not exists, creating it")
147 |         admin.createNamespace(NamespaceDescriptor.create(NAMESPACE_STRING).build())
148 |       }
149 |       if (!tableExists()) {
150 |         log.info(s"Table $TABLE_NAME does not exists, creating it")
151 |         HBaseUtils.createTable(admin, TABLE_NAME, CF)
152 |       }
153 |     }
154 |   }
155 | 
156 |   override def tableExists(): Boolean = {
157 |     using(connection.getAdmin) { admin =>
158 |       admin.tableExists(TABLE_NAME)
159 |     }
160 |   }
161 | 
162 |   override def tableCreationHint(): String = {
163 |     s"""To create namespace and table from an HBase shell issue:
164 |        |  create_namespace '$NAMESPACE_STRING'
165 |        |  create '$NAMESPACE_STRING:$TABLE_NAME_STRING', '$CF_STRING'""".stripMargin
166 |   }
167 | 
168 |   override def findSchema(id: Long): Option[Schema] = {
169 |     log.debug(s"loading a schema with id = $id from table $NAMESPACE_STRING:$TABLE_NAME_STRING")
170 |     val get: Get                   = new Get(Bytes.toBytes(id))
171 |     get.addColumn(CF, QUALIFIER_SCHEMA)
172 |     val result: Result             = connection.getTable(TABLE_NAME).get(get)
173 |     val value: Option[Array[Byte]] = Option(result.getValue(CF, QUALIFIER_SCHEMA))
174 |     val schema: Option[Schema]     = value.map(v => parser.parse(Bytes.toString(v)))
175 |     log.debug(s"$schema loaded from HBase for id = $id")
176 |     schema
177 |   }
178 | 
179 |   /**
180 |    * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id).
181 |    * This API might not be implemented by all connectors, which should return None
182 |    */
183 |   override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None
184 | }
185 | 


--------------------------------------------------------------------------------
/hbase/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.hbase
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging }
 5 | 
 6 | class HBaseConnectorCreator extends ConnectorCreator with Logging {
 7 |   override def create(config: Config): Connector = {
 8 |     log.debug("creating the HBase connector")
 9 |     val connector: Connector = HBaseConnector.instance(config)
10 |     log.debug("HBase connector created")
11 |     connector
12 |   }
13 | 
14 |   /**
15 |     * @return the name of the Connector
16 |     */
17 |   override def name(): String = "hbase"
18 | }
19 | 


--------------------------------------------------------------------------------
/hbase/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | isSecure: false


--------------------------------------------------------------------------------
/hbase/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Root logger option
2 | log4j.rootLogger=WARN, stdout
3 | 
4 | # Direct log messages to stdout
5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
6 | log4j.appender.stdout.Target=System.out
7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1} - %m%n
9 | 


--------------------------------------------------------------------------------
/hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBase2Mock.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.hbase
2 | 
3 | case class HBase2Mock(one: Boolean, two: Long)
4 | 


--------------------------------------------------------------------------------
/hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBaseConnectorSuite.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.hbase
 2 | 
 3 | import java.nio.file.Files
 4 | import java.util.UUID
 5 | 
 6 | import com.typesafe.config.{ ConfigFactory, ConfigValueFactory }
 7 | import it.agilelab.darwin.common.Connector
 8 | import org.apache.avro.reflect.ReflectData
 9 | import org.apache.avro.{ Schema, SchemaNormalization }
10 | import org.apache.hadoop.hbase.{ HBaseConfiguration, HBaseTestingUtility, MiniHBaseCluster }
11 | import org.scalatest.BeforeAndAfterAll
12 | import org.scalatest.flatspec.AnyFlatSpec
13 | import org.scalatest.matchers.should.Matchers
14 | 
15 | class HBaseConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll {
16 | 
17 |   private var connector: Connector          = _
18 |   private var minicluster: MiniHBaseCluster = _
19 | 
20 |   "HBaseConnector" should "load all existing schemas" in {
21 |     connector.fullLoad()
22 |   }
23 | 
24 |   it should "insert and retrieve" in {
25 |     val schemas                     = Seq(ReflectData.get().getSchema(classOf[HBaseMock]), ReflectData.get().getSchema(classOf[HBase2Mock]))
26 |       .map(s => SchemaNormalization.parsingFingerprint64(s) -> s)
27 |     connector.insert(schemas)
28 |     val loaded: Seq[(Long, Schema)] = connector.fullLoad()
29 |     assert(loaded.size == schemas.size)
30 |     assert(loaded.forall(schemas.contains))
31 |     val schema                      = connector.findSchema(loaded.head._1)
32 |     assert(schema.isDefined)
33 |     assert(schema.get == loaded.head._2)
34 |     val noSchema                    = connector.findSchema(-1L)
35 |     assert(noSchema.isEmpty)
36 |   }
37 | 
38 |   "connector.tableCreationHint" should "print the correct hint for table creation" in {
39 |     connector.tableCreationHint() should be("""To create namespace and table from an HBase shell issue:
40 |                                               |  create_namespace 'AVRO'
41 |                                               |  create 'AVRO:SCHEMA_REPOSITORY', '0'""".stripMargin)
42 |   }
43 | 
44 |   "connector.tableExists" should "return true with existent table" in {
45 |     connector.tableExists() should be(true)
46 |   }
47 | 
48 |   override def beforeAll(): Unit = {
49 |     val testUUID = UUID.randomUUID().toString
50 |     val hConf    = HBaseConfiguration.create()
51 |     hConf.set("test.build.data.basedirectory", s"./target/hbase-test-data-$testUUID")
52 |     val util     = new HBaseTestingUtility(hConf)
53 |     minicluster = util.startMiniCluster(1, true)
54 |     val confFile = Files.createTempFile(testUUID, ".xml")
55 |     // Hbase connector can only load configurations from a file path so we need to render the hadoop conf
56 |     val stream   = Files.newOutputStream(confFile)
57 |     // mc.getConfiguration.writeXml(System.out)
58 |     minicluster.getConfiguration.writeXml(stream)
59 |     stream.flush()
60 |     stream.close()
61 |     // HbaseConnector will only load conf if hbase-site and core-site are given,
62 |     // we give the same file to each.
63 |     sys.addShutdownHook(minicluster.shutdown())
64 |     val config   = ConfigFactory
65 |       .load()
66 |       .withValue(ConfigurationKeys.HBASE_SITE, ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString))
67 |       .withValue(ConfigurationKeys.CORE_SITE, ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString))
68 |     connector = new HBaseConnectorCreator().create(config)
69 |     connector.createTable()
70 |   }
71 | 
72 |   override def afterAll(): Unit = {
73 |     minicluster.shutdown()
74 |     minicluster.waitUntilShutDown()
75 |   }
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/hbase/src/test/scala/it/agilelab/darwin/connector/hbase/HBaseMock.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.hbase
2 | 
3 | case class HBaseMock(one: Int, two: String, three: Long, four: HBase2Mock)
4 | 


--------------------------------------------------------------------------------
/hbase1/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.hbase.HBaseConnectorCreator
2 | 


--------------------------------------------------------------------------------
/hbase1/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseUtils.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.hbase
 2 | 
 3 | import org.apache.hadoop.hbase.{ HColumnDescriptor, HTableDescriptor, TableName }
 4 | import org.apache.hadoop.hbase.client.Admin
 5 | 
 6 | object HBaseUtils {
 7 |   def createTable(admin: Admin, tableName: TableName, columnFamily: Array[Byte]): Unit = {
 8 |     admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(columnFamily)))
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/hbase2/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.hbase.HBaseConnectorCreator
2 | 


--------------------------------------------------------------------------------
/hbase2/src/main/scala/it/agilelab/darwin/connector/hbase/HBaseUtils.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.hbase
 2 | 
 3 | import org.apache.hadoop.hbase.TableName
 4 | import org.apache.hadoop.hbase.client.{ Admin, ColumnFamilyDescriptorBuilder, TableDescriptorBuilder }
 5 | 
 6 | object HBaseUtils {
 7 |   def createTable(admin: Admin, tableName: TableName, columnFamily: Array[Byte]): Unit = {
 8 |     admin.createTable(
 9 |       TableDescriptorBuilder
10 |         .newBuilder(tableName)
11 |         .setColumnFamily(
12 |           ColumnFamilyDescriptorBuilder.newBuilder(columnFamily).build()
13 |         )
14 |         .build()
15 |     )
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sbt -v clean scalastyle +test +doc darwin-hbase2-connector/clean darwin-hbase2-connector/scalastyle +darwin-hbase2-connector/test +darwin-hbase2-connector/doc
3 | 


--------------------------------------------------------------------------------
/mock-application/src/test/resources/MyNestedClass.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MyNestedClass","namespace":"it.agilelab.darwin.app.mock.classes","fields":[{"name":"id","type":"int"},{"name":"myClass","type":{"type":"record","name":"MyClass","fields":[{"name":"value","type":"int"},{"name":"otherVale","type":"long"}]}},{"name":"my2Class","type":{"type":"map","values":"MyClass"}}]}


--------------------------------------------------------------------------------
/mock-application/src/test/resources/OneField.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"OneField","namespace":"it.agilelab.darwin.app.mock.classes","fields":[{"name":"one","type":"int"}]}


--------------------------------------------------------------------------------
/mock-application/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | type: cached_eager
2 | connector: "mock"
3 | resources: ["test/MockClassParent.avsc", "test/MockClassAlone.avsc"]


--------------------------------------------------------------------------------
/mock-application/src/test/resources/test/MockClassAlone.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]}
2 | 


--------------------------------------------------------------------------------
/mock-application/src/test/resources/test/MockClassParent.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]}
2 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/CachedEagerApplicationSuite.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.app.mock
  2 | 
  3 | import com.typesafe.config.{ Config, ConfigFactory }
  4 | import it.agilelab.darwin.annotations.AvroSerde
  5 | import it.agilelab.darwin.app.mock.classes.{ MyClass, MyNestedClass, NewClass, OneField }
  6 | import it.agilelab.darwin.common.compat._
  7 | import it.agilelab.darwin.common.{ Connector, ConnectorFactory, SchemaReader }
  8 | import it.agilelab.darwin.manager.{ AvroSchemaManager, CachedEagerAvroSchemaManager }
  9 | import org.apache.avro.reflect.ReflectData
 10 | import org.apache.avro.{ Schema, SchemaNormalization }
 11 | import org.reflections.Reflections
 12 | import org.scalatest.flatspec.AnyFlatSpec
 13 | import org.scalatest.matchers.should.Matchers
 14 | 
 15 | import java.lang.reflect.Modifier
 16 | import java.nio.ByteOrder
 17 | 
 18 | class BigEndianCachedEagerApplicationSuite extends CachedEagerApplicationSuite(ByteOrder.BIG_ENDIAN)
 19 | 
 20 | class LittleEndianCachedEagerApplicationSuite extends CachedEagerApplicationSuite(ByteOrder.LITTLE_ENDIAN)
 21 | 
 22 | abstract class CachedEagerApplicationSuite(val endianness: ByteOrder) extends AnyFlatSpec with Matchers {
 23 | 
 24 |   private val mockClassAloneFingerprint  = 6675579114512671233L
 25 |   private val mockClassParentFingerprint = -6310800772237892477L
 26 | 
 27 |   private val config: Config             = ConfigFactory.load()
 28 |   private val connector: Connector       = ConnectorFactory.connector(config)
 29 |   private val manager: AvroSchemaManager = new CachedEagerAvroSchemaManager(connector, endianness)
 30 | 
 31 |   "CachedEagerAvroSchemaManager" should "not fail after the initialization" in {
 32 |     val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc"))
 33 |     assert(manager.registerAll(schemas).size == 1)
 34 |   }
 35 | 
 36 |   it should "register a new schema" in {
 37 |     val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc"))
 38 |     manager.registerAll(schemas)
 39 | 
 40 |     val id = manager.getId(schemas.head)
 41 |     assert(manager.getSchema(id).isDefined)
 42 |     assert(schemas.head == manager.getSchema(id).get)
 43 |   }
 44 | 
 45 |   it should "get all previously registered schemas" in {
 46 |     val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc")
 47 |     val schema0        = manager.getSchema(mockClassAloneFingerprint)
 48 |     val schema1        = manager.getSchema(mockClassParentFingerprint)
 49 |     assert(schema0.isDefined)
 50 |     assert(schema1.isDefined)
 51 |     assert(schema0.get != schema1.get)
 52 |     assert(schema != schema0.get)
 53 |     assert(schema != schema1.get)
 54 |   }
 55 | 
 56 |   it should "generate all schemas for all the annotated classes with @AvroSerde" in {
 57 |     val reflections = new Reflections("it.agilelab.darwin.app.mock.classes")
 58 | 
 59 |     val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString
 60 |     val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString
 61 |     val myClassSchema  = ReflectData.get().getSchema(classOf[MyClass]).toString
 62 | 
 63 |     val annotationClass: Class[AvroSerde] = classOf[AvroSerde]
 64 |     val classes                           = reflections
 65 |       .getTypesAnnotatedWith(annotationClass)
 66 |       .toScala()
 67 |       .toSeq
 68 |       .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers))
 69 |     val schemas                           = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString)
 70 |     Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas
 71 |   }
 72 | 
 73 |   it should "reload all schemas from the connector" in {
 74 |     val newSchema = ReflectData.get().getSchema(classOf[NewClass])
 75 |     val newId     = SchemaNormalization.parsingFingerprint64(newSchema)
 76 |     assert(manager.getSchema(newId).isEmpty)
 77 | 
 78 |     connector.insert(Seq(newId -> newSchema))
 79 |     assert(manager.getSchema(newId).isEmpty)
 80 | 
 81 |     manager.reload()
 82 |     assert(manager.getSchema(newId).isDefined)
 83 |     assert(manager.getSchema(newId).get == newSchema)
 84 |   }
 85 | 
 86 |   it should "not call getId when retrieving a schema out of the cache" in {
 87 |     val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField])
 88 |     var calls          = 0
 89 |     val manager        = new CachedEagerAvroSchemaManager(
 90 |       new Connector {
 91 |         override def createTable(): Unit                                              = ()
 92 |         override def tableExists(): Boolean                                           = true
 93 |         override def tableCreationHint(): String                                      = ""
 94 |         override def fullLoad(): Seq[(Long, Schema)]                                  = Seq.empty
 95 |         override def insert(schemas: Seq[(Long, Schema)]): Unit                       = ()
 96 |         override def findSchema(id: Long): Option[Schema]                             = Some(oneFieldSchema)
 97 |         override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = Some(1L -> oneFieldSchema)
 98 |       },
 99 |       endianness
100 |     ) {
101 |       override def getId(schema: Schema): Long = {
102 |         calls += 1
103 |         super.getId(schema)
104 |       }
105 |     }
106 |     manager.getSchema(3L) shouldNot be(null) // scalastyle:ignore
107 |     calls shouldBe 0
108 |   }
109 | 
110 |   it should "not find the latest schema" in {
111 |     manager.retrieveLatestSchema("asdf") shouldBe None
112 |   }
113 | 
114 |   it should "find the latest schema" in {
115 |     manager.retrieveLatestSchema("it.agilelab.darwin.connector.mock.testclasses.MockClassParent") shouldBe Some(
116 |       mockClassParentFingerprint -> manager.getSchema(mockClassParentFingerprint).get
117 |     )
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/CachedLazyApplicationSuite.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.app.mock
  2 | 
  3 | import com.typesafe.config.{ Config, ConfigFactory }
  4 | import it.agilelab.darwin.annotations.AvroSerde
  5 | import it.agilelab.darwin.app.mock.classes.{ MyClass, MyNestedClass, NewClass, OneField }
  6 | import it.agilelab.darwin.common.compat._
  7 | import it.agilelab.darwin.common.{ Connector, ConnectorFactory, SchemaReader }
  8 | import it.agilelab.darwin.manager.{ AvroSchemaManager, CachedLazyAvroSchemaManager }
  9 | import org.apache.avro.reflect.ReflectData
 10 | import org.apache.avro.{ Schema, SchemaNormalization }
 11 | import org.reflections.Reflections
 12 | import org.scalatest.flatspec.AnyFlatSpec
 13 | import org.scalatest.matchers.should.Matchers
 14 | 
 15 | import java.lang.reflect.Modifier
 16 | import java.nio.ByteOrder
 17 | 
 18 | class BigEndianCachedLazyApplicationSuite extends CachedLazyApplicationSuite(ByteOrder.BIG_ENDIAN)
 19 | 
 20 | class LittleEndianCachedLazyApplicationSuite extends CachedLazyApplicationSuite(ByteOrder.LITTLE_ENDIAN)
 21 | 
 22 | abstract class CachedLazyApplicationSuite(val endianness: ByteOrder) extends AnyFlatSpec with Matchers {
 23 | 
 24 |   private val mockClassAloneFingerprint  = 6675579114512671233L
 25 |   private val mockClassParentFingerprint = -6310800772237892477L
 26 |   val config: Config                     = ConfigFactory.load()
 27 |   val connector: Connector               = ConnectorFactory.connector(config)
 28 |   val manager: AvroSchemaManager         = new CachedLazyAvroSchemaManager(connector, endianness)
 29 | 
 30 |   "CachedLazyAvroSchemaManager" should "not fail after the initialization" in {
 31 |     val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc"))
 32 |     assert(manager.registerAll(schemas).size == 1)
 33 |   }
 34 | 
 35 |   it should "load all existing schemas and register a new one" in {
 36 |     val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc"))
 37 |     manager.getSchema(0L)
 38 | 
 39 |     manager.registerAll(schemas)
 40 | 
 41 |     val id = manager.getId(schemas.head)
 42 |     assert(manager.getSchema(id).isDefined)
 43 |     assert(schemas.head == manager.getSchema(id).get)
 44 |   }
 45 | 
 46 |   it should "get all previously registered schemas" in {
 47 |     val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc")
 48 |     val schema0        = manager.getSchema(mockClassAloneFingerprint)
 49 |     val schema1        = manager.getSchema(mockClassParentFingerprint)
 50 |     assert(schema0.isDefined)
 51 |     assert(schema1.isDefined)
 52 |     assert(schema0.get != schema1.get)
 53 |     assert(schema != schema0.get)
 54 |     assert(schema != schema1.get)
 55 |   }
 56 | 
 57 |   it should "generate all schemas for all the annotated classes with @AvroSerde" in {
 58 |     val reflections = new Reflections("it.agilelab.darwin.app.mock.classes")
 59 | 
 60 |     val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString
 61 |     val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString
 62 |     val myClassSchema  = ReflectData.get().getSchema(classOf[MyClass]).toString
 63 | 
 64 |     val annotationClass: Class[AvroSerde] = classOf[AvroSerde]
 65 |     val classes                           = reflections
 66 |       .getTypesAnnotatedWith(annotationClass)
 67 |       .toScala()
 68 |       .toSeq
 69 |       .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers))
 70 |     val schemas                           = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString)
 71 |     Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas
 72 |   }
 73 | 
 74 |   it should "reload all schemas from the connector" in {
 75 |     val newSchema = ReflectData.get().getSchema(classOf[NewClass])
 76 |     val newId     = SchemaNormalization.parsingFingerprint64(newSchema)
 77 |     assert(manager.getSchema(newId).isEmpty)
 78 | 
 79 |     connector.insert(Seq(newId -> newSchema))
 80 |     assert(manager.getSchema(newId).isDefined)
 81 |     assert(manager.getSchema(newId).get == newSchema)
 82 |   }
 83 | 
 84 |   it should "not call getId when retrieving a schema out of the cache" in {
 85 |     val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField])
 86 |     var calls          = 0
 87 |     val manager        = new CachedLazyAvroSchemaManager(
 88 |       new Connector {
 89 |         override def createTable(): Unit                                              = ()
 90 |         override def tableExists(): Boolean                                           = true
 91 |         override def tableCreationHint(): String                                      = ""
 92 |         override def fullLoad(): Seq[(Long, Schema)]                                  = Seq.empty
 93 |         override def insert(schemas: Seq[(Long, Schema)]): Unit                       = ()
 94 |         override def findSchema(id: Long): Option[Schema]                             = Some(oneFieldSchema)
 95 |         override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = Some(1L -> oneFieldSchema)
 96 |       },
 97 |       endianness
 98 |     ) {
 99 |       override def getId(schema: Schema): Long = {
100 |         calls += 1
101 |         super.getId(schema)
102 |       }
103 |     }
104 |     manager.getSchema(3L) shouldNot be(null) // scalastyle:ignore
105 |     calls shouldBe 0
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/LazyApplicationSuite.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.app.mock
  2 | 
  3 | import com.typesafe.config.{ Config, ConfigFactory }
  4 | import it.agilelab.darwin.annotations.AvroSerde
  5 | import it.agilelab.darwin.app.mock.classes.{ MyClass, MyNestedClass, NewClass, OneField }
  6 | import it.agilelab.darwin.common.compat._
  7 | import it.agilelab.darwin.common.{ Connector, ConnectorFactory, SchemaReader }
  8 | import it.agilelab.darwin.manager.{ AvroSchemaManager, LazyAvroSchemaManager }
  9 | import org.apache.avro.reflect.ReflectData
 10 | import org.apache.avro.{ Schema, SchemaNormalization }
 11 | import org.reflections.Reflections
 12 | import org.scalatest.flatspec.AnyFlatSpec
 13 | import org.scalatest.matchers.should.Matchers
 14 | 
 15 | import java.lang.reflect.Modifier
 16 | import java.nio.ByteOrder
 17 | 
 18 | class BigEndianLazyApplicationSuite extends LazyApplicationSuite(ByteOrder.BIG_ENDIAN)
 19 | 
 20 | class LittleEndianLazyApplicationSuite extends LazyApplicationSuite(ByteOrder.LITTLE_ENDIAN)
 21 | 
 22 | abstract class LazyApplicationSuite(endianness: ByteOrder) extends AnyFlatSpec with Matchers {
 23 |   private val mockClassAloneFingerprint  = 6675579114512671233L
 24 |   private val mockClassParentFingerprint = -6310800772237892477L
 25 |   val config: Config                     = ConfigFactory.load()
 26 |   val connector: Connector               = ConnectorFactory.connector(config)
 27 |   val manager: AvroSchemaManager         = new LazyAvroSchemaManager(connector, endianness)
 28 | 
 29 |   "LazyAvroSchemaManager" should "not fail after the initialization" in {
 30 |     val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc"))
 31 |     assert(manager.registerAll(schemas).size == 1)
 32 |   }
 33 | 
 34 |   it should "load all existing schemas and register a new one" in {
 35 |     val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc"))
 36 |     manager.getSchema(mockClassAloneFingerprint)
 37 | 
 38 |     manager.registerAll(schemas)
 39 | 
 40 |     val id = manager.getId(schemas.head)
 41 |     assert(manager.getSchema(id).isDefined)
 42 |     assert(schemas.head == manager.getSchema(id).get)
 43 |   }
 44 | 
 45 |   it should "get all previously registered schemas" in {
 46 |     val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc")
 47 |     val schema0        = manager.getSchema(mockClassAloneFingerprint)
 48 |     val schema1        = manager.getSchema(mockClassParentFingerprint)
 49 |     assert(schema0.isDefined)
 50 |     assert(schema1.isDefined)
 51 |     assert(schema0.get != schema1.get)
 52 |     assert(schema != schema0.get)
 53 |     assert(schema != schema1.get)
 54 |   }
 55 | 
 56 |   it should "generate all schemas for all the annotated classes with @AvroSerde" in {
 57 |     val reflections = new Reflections("it.agilelab.darwin.app.mock.classes")
 58 | 
 59 |     val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString
 60 |     val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString
 61 |     val myClassSchema  = ReflectData.get().getSchema(classOf[MyClass]).toString
 62 | 
 63 |     val annotationClass: Class[AvroSerde] = classOf[AvroSerde]
 64 |     val classes                           = reflections
 65 |       .getTypesAnnotatedWith(annotationClass)
 66 |       .toScala()
 67 |       .toSeq
 68 |       .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers))
 69 |     val schemas                           = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString)
 70 |     Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas
 71 |   }
 72 | 
 73 |   it should "reload all schemas from the connector" in {
 74 |     val newSchema = ReflectData.get().getSchema(classOf[NewClass])
 75 |     val newId     = SchemaNormalization.parsingFingerprint64(newSchema)
 76 |     assert(manager.getSchema(newId).isEmpty)
 77 | 
 78 |     connector.insert(Seq(newId -> newSchema))
 79 |     assert(manager.getSchema(newId).isDefined)
 80 |     assert(manager.getSchema(newId).get == newSchema)
 81 |   }
 82 | 
 83 |   it should "not call getId when retrieving a schema out of the cache" in {
 84 |     val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField])
 85 |     var calls          = 0
 86 |     val manager        = new LazyAvroSchemaManager(
 87 |       new Connector {
 88 |         override def createTable(): Unit                                              = ()
 89 |         override def tableExists(): Boolean                                           = true
 90 |         override def tableCreationHint(): String                                      = ""
 91 |         override def fullLoad(): Seq[(Long, Schema)]                                  = Seq.empty
 92 |         override def insert(schemas: Seq[(Long, Schema)]): Unit                       = ()
 93 |         override def findSchema(id: Long): Option[Schema]                             = Some(oneFieldSchema)
 94 |         override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = Some(1L -> oneFieldSchema)
 95 |       },
 96 |       endianness
 97 |     ) {
 98 |       override def getId(schema: Schema): Long = {
 99 |         calls += 1
100 |         super.getId(schema)
101 |       }
102 |     }
103 |     manager.getSchema(3L) shouldNot be(null) // scalastyle:ignore
104 |     calls shouldBe 0
105 |   }
106 | }
107 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/ManagerUtilsSuite.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.app.mock
 2 | 
 3 | import java.nio.{ ByteBuffer, ByteOrder }
 4 | 
 5 | import com.typesafe.config.ConfigFactory
 6 | import it.agilelab.darwin.common.SchemaReader
 7 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory
 8 | import it.agilelab.darwin.manager.util.{ AvroSingleObjectEncodingUtils, ConfigurationKeys }
 9 | import it.agilelab.darwin.manager.util.ByteArrayUtils._
10 | 
11 | import scala.util.Random
12 | import org.scalatest.flatspec.AnyFlatSpec
13 | import org.scalatest.matchers.should.Matchers
14 | 
15 | class BigEndianManagerUtilsSuite extends ManagerUtilsSuite(ByteOrder.BIG_ENDIAN)
16 | 
17 | class LittleEndianManagerUtilsSuite extends ManagerUtilsSuite(ByteOrder.LITTLE_ENDIAN)
18 | 
19 | abstract class ManagerUtilsSuite(endianness: ByteOrder) extends AnyFlatSpec with Matchers {
20 | 
21 |   "AvroSchemaManager utilities" should "create a Single-Object encoded byte array" in {
22 |     val ORIGINAL_LENGTH: Int = 10
23 |     val originalSchema       = SchemaReader.readFromResources("OneField.avsc")
24 |     val config               =
25 |       ConfigFactory
26 |         .parseMap(new java.util.HashMap[String, String]() {
27 |           {
28 |             put(ConfigurationKeys.MANAGER_TYPE, ConfigurationKeys.CACHED_EAGER)
29 |             put(ConfigurationKeys.ENDIANNESS, endianness.toString)
30 |           }
31 |         })
32 |         .withFallback(ConfigFactory.load())
33 |         .resolve()
34 |     val manager              = AvroSchemaManagerFactory.initialize(config)
35 |     manager.registerAll(Seq(originalSchema))
36 |     val originalPayload      = new Array[Byte](ORIGINAL_LENGTH)
37 |     Random.nextBytes(originalPayload)
38 |     val data: Array[Byte]    = manager.generateAvroSingleObjectEncoded(originalPayload, originalSchema)
39 |     assert(AvroSingleObjectEncodingUtils.isAvroSingleObjectEncoded(data))
40 |     val (schema, payload)    = manager.retrieveSchemaAndAvroPayload(data)
41 |     assert(schema == originalSchema)
42 |     assert(originalPayload sameElements payload)
43 |   }
44 | 
45 |   it should "convert a long to byte array and back" in {
46 |     val longs = (1 to 10).map(_ => Random.nextLong())
47 | 
48 |     assert(
49 |       longs == longs.map(x =>
50 |         AvroSingleObjectEncodingUtils
51 |           .readLong(ByteBuffer.wrap(x.longToByteArray(endianness)), endianness)
52 |       )
53 |     )
54 |   }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/TwoConnectorsSpec.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.app.mock
 2 | 
 3 | import com.typesafe.config.ConfigFactory
 4 | import it.agilelab.darwin.common.ConnectorFactory
 5 | import it.agilelab.darwin.connector.hbase.HBaseConnectorCreator
 6 | import it.agilelab.darwin.connector.mock.MockConnectorCreator
 7 | import it.agilelab.darwin.connector.postgres.PostgresConnectorCreator
 8 | import it.agilelab.darwin.manager.util.ConfigurationKeys
 9 | import org.scalatest.flatspec.AnyFlatSpec
10 | import org.scalatest.matchers.should.Matchers
11 | 
12 | class TwoConnectorsSpec extends AnyFlatSpec with Matchers {
13 |   it should "have both HBase and Postgresql available" in {
14 |     ConnectorFactory.creators().map(_.getClass) should contain theSameElementsAs (
15 |       classOf[HBaseConnectorCreator] :: classOf[PostgresConnectorCreator] :: classOf[MockConnectorCreator] :: Nil
16 |     )
17 |   }
18 | 
19 |   it should "choose HBase connector over Postgresql one" in {
20 |     val config = ConfigFactory.parseString(s"""${ConfigurationKeys.CONNECTOR}: hbase""")
21 |     ConnectorFactory.creator(config).map(_.getClass) should be(Some(classOf[HBaseConnectorCreator]))
22 |   }
23 | 
24 |   it should "choose Postgresql connector over HBase one" in {
25 |     val config = ConfigFactory.parseString(s"""${ConfigurationKeys.CONNECTOR}: postgresql""")
26 |     ConnectorFactory.creator(config).map(_.getClass) should be(Some(classOf[PostgresConnectorCreator]))
27 |   }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyClass.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class MyClass(override val value: Int, otherVale: Long) extends MyTrait
7 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyNestedAbstractClass.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | abstract class MyNestedAbstractClass[T <: MyTrait](id: Int, myClass: T)
7 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyNestedClass.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class MyNestedClass(id: Int, myClass: MyClass, my2Class: Map[String, MyClass])
7 |     extends MyNestedAbstractClass[MyClass](id, myClass)
8 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/MyTrait.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | trait MyTrait {
7 |   def value: Int
8 | }
9 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/NewClass.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | case class NewClass(one: Long, two: String, three: Int)
4 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/NotToBeRegisteredClass.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | case class NotToBeRegisteredClass()
4 | 


--------------------------------------------------------------------------------
/mock-application/src/test/scala/it/agilelab/darwin/app/mock/classes/OneField.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.mock.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class OneField(one: Int)
7 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.mock.MockConnectorCreator
2 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/ConfigurationKeys.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.mock
 2 | 
 3 | object ConfigurationKeys {
 4 |   val FILES      = "files"
 5 |   val RESOURCES  = "resources"
 6 |   val MODE       = "mode"
 7 |   val STRICT     = "strict"
 8 |   val PERMISSIVE = "permissive"
 9 | 
10 |   sealed trait Mode
11 | 
12 |   object Mode {
13 |     def parse(string: String): Mode = {
14 |       string.toLowerCase match {
15 |         case STRICT        => Strict
16 |         case PERMISSIVE    => Permissive
17 |         case other: String => throw new IllegalArgumentException(s"Unknown mode: $other")
18 |       }
19 |     }
20 |   }
21 | 
22 |   case object Strict extends Mode
23 | 
24 |   case object Permissive extends Mode
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/MockConnector.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.mock
  2 | 
  3 | import com.typesafe.config.Config
  4 | import it.agilelab.darwin.common.compat._
  5 | import it.agilelab.darwin.common.{ Connector, Logging, SchemaReader }
  6 | import org.apache.avro.{ Schema, SchemaNormalization }
  7 | 
  8 | import scala.collection.mutable
  9 | 
 10 | class MockConnectorException(msg: String, t: Option[Throwable]) extends RuntimeException(msg) {
 11 |   def this(msg: String) = this(msg, None)
 12 | 
 13 |   def this(t: Throwable) = this(t.getMessage, Some(t))
 14 | 
 15 |   override def getCause: Throwable = t match {
 16 |     case Some(value) => value
 17 |     case None        => super.getCause
 18 |   }
 19 | }
 20 | 
 21 | class MockConnector(config: Config) extends Connector with Logging {
 22 | 
 23 |   private[this] var loaded: Boolean = false
 24 | 
 25 |   val mode: ConfigurationKeys.Mode = if (config.hasPath(ConfigurationKeys.MODE)) {
 26 |     ConfigurationKeys.Mode.parse(config.getString(ConfigurationKeys.MODE))
 27 |   } else {
 28 |     ConfigurationKeys.Strict
 29 |   }
 30 | 
 31 |   private def files = if (config.hasPath(ConfigurationKeys.FILES)) {
 32 |     config.getStringList(ConfigurationKeys.FILES).toScala().map { s =>
 33 |       try {
 34 |         SchemaReader.safeRead(new java.io.File(s))
 35 |       } catch {
 36 |         case t: Throwable => Left(SchemaReader.UnknownError(t))
 37 |       }
 38 |     }
 39 |   } else {
 40 |     Nil
 41 |   }
 42 | 
 43 |   private def resources = if (config.hasPath(ConfigurationKeys.RESOURCES)) {
 44 |     config.getStringList(ConfigurationKeys.RESOURCES).toScala().map { s =>
 45 |       try {
 46 |         SchemaReader.safeReadFromResources(s)
 47 |       } catch {
 48 |         case t: Throwable => Left(SchemaReader.UnknownError(t))
 49 |       }
 50 |     }
 51 |   } else {
 52 |     Nil
 53 |   }
 54 | 
 55 |   private def handleError(error: SchemaReader.SchemaReaderError): Unit = {
 56 |     mode match {
 57 |       case ConfigurationKeys.Strict     =>
 58 |         error match {
 59 |           case SchemaReader.SchemaParserError(exception) =>
 60 |             throw new MockConnectorException(exception)
 61 |           case SchemaReader.IOError(exception)           => throw new MockConnectorException(exception)
 62 |           case SchemaReader.ResourceNotFoundError(msg)   => throw new MockConnectorException(msg)
 63 |           case SchemaReader.UnknownError(t)              => throw new MockConnectorException(t)
 64 |         }
 65 |       case ConfigurationKeys.Permissive =>
 66 |         error match {
 67 |           case SchemaReader.SchemaParserError(exception) => log.warn(exception.getMessage, exception)
 68 |           case SchemaReader.IOError(exception)           => log.warn(exception.getMessage, exception)
 69 |           case SchemaReader.ResourceNotFoundError(msg)   => log.warn(msg)
 70 |           case SchemaReader.UnknownError(t)              => log.warn(t.getMessage, t)
 71 |         }
 72 |     }
 73 |   }
 74 | 
 75 |   private val table: mutable.Map[Long, Schema] = mutable.Map.empty[Long, Schema]
 76 | 
 77 |   override def fullLoad(): Seq[(Long, Schema)] = {
 78 |     (resources ++ files).foreach {
 79 |       case Left(error)   => handleError(error)
 80 |       case Right(schema) => table(SchemaNormalization.parsingFingerprint64(schema)) = schema
 81 |     }
 82 |     table.toSeq
 83 |   }
 84 | 
 85 |   override def insert(schemas: Seq[(Long, Schema)]): Unit = {
 86 |     schemas.foreach { case (id, schema) =>
 87 |       table(id) = schema
 88 |     }
 89 |   }
 90 | 
 91 |   override def findSchema(id: Long): Option[Schema] = {
 92 |     if (!loaded) {
 93 |       this.synchronized {
 94 |         if (!loaded) {
 95 |           fullLoad()
 96 |           loaded = true
 97 |         }
 98 |       }
 99 |     }
100 |     table.get(id)
101 |   }
102 | 
103 |   override def createTable(): Unit = ()
104 | 
105 |   override def tableExists(): Boolean = true
106 | 
107 |   override def tableCreationHint(): String = "No table needs to be created since mock connecto"
108 | 
109 |   /**
110 |     * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id).
111 |     * This API might not be implemented by all connectors, which should return None
112 |     */
113 |   override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] =
114 |     table.find(_._2.getFullName == identifier)
115 | }
116 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/MockConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.mock
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator }
 5 | 
 6 | class MockConnectorCreator extends ConnectorCreator {
 7 |   override def create(config: Config): Connector = new MockConnector(config)
 8 | 
 9 |   /**
10 |     * @return the name of the Connector
11 |     */
12 |   override def name(): String = "mock"
13 | }
14 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassAlone.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.mock.testclasses
2 | 
3 | case class MockClassAlone(fry: String, bender: Long, leela: Int, zoidberg: Boolean)
4 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassChild.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.mock.testclasses
2 | 
3 | case class MockClassChild(twoOne: Long, twoTwo: String)
4 | 


--------------------------------------------------------------------------------
/mock-connector/src/main/scala/it/agilelab/darwin/connector/mock/testclasses/MockClassParent.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.mock.testclasses
2 | 
3 | case class MockClassParent(one: Int, two: String, three: Long, four: MockClassChild)
4 | 


--------------------------------------------------------------------------------
/mock-connector/src/test/resources/test/MockClassAlone.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MockClassAlone","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"fry","type":"string"},{"name":"bender","type":"long"},{"name":"leela","type":"int"},{"name":"zoidberg","type":"boolean"}]}
2 | 


--------------------------------------------------------------------------------
/mock-connector/src/test/resources/test/MockClassParent.avsc:
--------------------------------------------------------------------------------
1 | {"type":"record","name":"MockClassParent","namespace":"it.agilelab.darwin.connector.mock.testclasses","fields":[{"name":"one","type":"int"},{"name":"two","type":"string"},{"name":"three","type":"long"},{"name":"four","type":{"type":"record","name":"MockClassChild","fields":[{"name":"twoOne","type":"long"},{"name":"twoTwo","type":"string"}]}}]}
2 | 


--------------------------------------------------------------------------------
/mock-connector/src/test/scala/it/agilelab/darwin/connector/mock/MockConnectorSpec.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.mock
  2 | 
  3 | import com.typesafe.config.ConfigFactory
  4 | import org.apache.avro.Schema
  5 | import org.apache.avro.Schema.Type
  6 | import org.scalatest.flatspec.AnyFlatSpec
  7 | import org.scalatest.matchers.should.Matchers
  8 | 
  9 | import java.nio.file.Paths
 10 | import java.util
 11 | 
 12 | class MockConnectorSpec extends AnyFlatSpec with Matchers {
 13 | 
 14 |   private val p = Paths
 15 |     .get(".")
 16 |     .resolve("mock-connector")
 17 |     .resolve("src")
 18 |     .resolve("test")
 19 |     .resolve("resources")
 20 |     .resolve("test")
 21 | 
 22 |   it should "load the schema manually inserted" in {
 23 |     val connector = new MockConnectorCreator().create(ConfigFactory.empty())
 24 |     connector.insert((3L, Schema.create(Type.BYTES)) :: Nil)
 25 |     connector.fullLoad() should have size 1
 26 |   }
 27 | 
 28 |   it should "load the schema automatically from resources" in {
 29 |     val connector = new MockConnectorCreator().create(ConfigFactory.parseMap {
 30 |       new java.util.HashMap[String, Object] {
 31 |         put(ConfigurationKeys.RESOURCES, util.Arrays.asList("test/MockClassAlone.avsc", "test/MockClassParent.avsc"))
 32 |       }
 33 |     })
 34 |     connector.fullLoad() should have size 2
 35 |   }
 36 | 
 37 |   it should "load the schema automatically from files" in {
 38 |     val connector = new MockConnectorCreator().create(ConfigFactory.parseMap {
 39 |       new java.util.HashMap[String, Object] {
 40 |         put(
 41 |           ConfigurationKeys.FILES,
 42 |           util.Arrays.asList(p.resolve("MockClassAlone.avsc").toString, p.resolve("MockClassParent.avsc").toString)
 43 |         )
 44 |       }
 45 |     })
 46 |     connector.fullLoad() should have size 2
 47 |   }
 48 | 
 49 |   it should "not throw any exception in case of missing file in permissive mode" in {
 50 |     val connector = new MockConnectorCreator().create(ConfigFactory.parseMap {
 51 |       new java.util.HashMap[String, Object] {
 52 |         put(
 53 |           ConfigurationKeys.FILES,
 54 |           util.Arrays.asList(
 55 |             p.resolve("DoesNotExists.avsc").toString,
 56 |             p.resolve("MockClassAlone.avsc").toString,
 57 |             p.resolve("MockClassParent.avsc").toString
 58 |           )
 59 |         )
 60 |         put(ConfigurationKeys.MODE, "permissive")
 61 |       }
 62 |     })
 63 |     connector.fullLoad() should have size 2
 64 |   }
 65 | 
 66 |   it should "throw an exception in case of missing file in strict mode" in {
 67 |     intercept[MockConnectorException] {
 68 |       new MockConnectorCreator()
 69 |         .create(ConfigFactory.parseMap {
 70 |           new java.util.HashMap[String, Object] {
 71 |             put(
 72 |               ConfigurationKeys.FILES,
 73 |               util.Arrays.asList(
 74 |                 p.resolve("DoesNotExists.avsc").toString,
 75 |                 p.resolve("MockClassAlone.avsc").toString,
 76 |                 p.resolve("MockClassParent.avsc").toString
 77 |               )
 78 |             )
 79 |           }
 80 |         })
 81 |         .fullLoad()
 82 |     }
 83 |   }
 84 | 
 85 |   it should "return Some schema if asked for the latest schema" in {
 86 |     val connector =
 87 |       new MockConnectorCreator()
 88 |         .create(ConfigFactory.parseMap {
 89 |           new java.util.HashMap[String, Object] {
 90 |             put(
 91 |               ConfigurationKeys.FILES,
 92 |               util.Arrays.asList(
 93 |                 p.resolve("MockClassAlone.avsc").toString,
 94 |                 p.resolve("MockClassParent.avsc").toString
 95 |               )
 96 |             )
 97 |           }
 98 |         })
 99 |     val all       = connector.fullLoad()
100 |     connector.retrieveLatestSchema("it.agilelab.darwin.connector.mock.testclasses.MockClassAlone") shouldBe all.find(
101 |       _._2.getName == "MockClassAlone"
102 |     )
103 | 
104 |   }
105 | 
106 |   it should "return None schema if asked for the latest schema" in {
107 |     val connector =
108 |       new MockConnectorCreator()
109 |         .create(ConfigFactory.parseMap {
110 |           new java.util.HashMap[String, Object] {
111 |             put(
112 |               ConfigurationKeys.FILES,
113 |               util.Arrays.asList(
114 |                 p.resolve("MockClassAlone.avsc").toString,
115 |                 p.resolve("MockClassParent.avsc").toString
116 |               )
117 |             )
118 |           }
119 |         })
120 |     connector.fullLoad()
121 |     connector.retrieveLatestSchema("DoesNotExists") shouldBe None
122 | 
123 |   }
124 | 
125 | }
126 | 


--------------------------------------------------------------------------------
/mongo/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.mongo.MongoConnectorCreator
2 | 


--------------------------------------------------------------------------------
/mongo/src/main/scala/it/agilelab/darwin/connector/mongo/ConfigurationKeys.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.mongo
 2 | 
 3 | object ConfigurationKeys {
 4 | 
 5 |   val USERNAME: String   = "username"   // the username
 6 |   val PASSWORD: String   = "password"   // the password
 7 |   val HOST: String       = "host"       // the hostname where you want to connect
 8 |   val DATABASE: String   = "database"   // the name of the database in which the user is defined
 9 |   val COLLECTION: String = "collection" // the collection name
10 |   val TIMEOUT: String    = "timeout"    // the timeout max to wait the results
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/mongo/src/main/scala/it/agilelab/darwin/connector/mongo/ConfigurationMongoModels.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.mongo
 2 | 
 3 | import scala.concurrent.duration.Duration
 4 | 
 5 | object ConfigurationMongoModels {
 6 | 
 7 |   sealed trait BaseMongoConfig {
 8 |     def database: String
 9 |     def collection: String
10 |     def timeout: Duration
11 |   }
12 | 
13 |   case class MongoConfig(
14 |     database: String,
15 |     collection: String,
16 |     timeout: Duration
17 |   ) extends BaseMongoConfig
18 | 
19 |   case class MongoConnectorConfig(
20 |     username: String,
21 |     password: String,
22 |     database: String,
23 |     collection: String,
24 |     hosts: Seq[String],
25 |     timeout: Duration
26 |   ) extends BaseMongoConfig
27 | 
28 |   val DEFAULT_DURATION = 5000
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/mongo/src/main/scala/it/agilelab/darwin/connector/mongo/MongoConnector.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.mongo
  2 | 
  3 | import com.mongodb.{ BasicDBObject, ErrorCategory }
  4 | import it.agilelab.darwin.common.{ Connector, Logging }
  5 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.BaseMongoConfig
  6 | import org.apache.avro.Schema
  7 | import org.apache.avro.Schema.Parser
  8 | import org.mongodb.scala.bson.{ BsonDocument, BsonValue }
  9 | import org.mongodb.scala.{ bson, Document, MongoClient, MongoCollection, MongoWriteException }
 10 | 
 11 | import scala.concurrent.ExecutionContext.Implicits.global
 12 | import scala.concurrent.Await
 13 | import scala.util.{ Failure, Try }
 14 | 
 15 | class MongoConnector(mongoClient: MongoClient, mongoConfig: BaseMongoConfig) extends Connector with Logging {
 16 | 
 17 |   private def parser: Parser = new Parser()
 18 | 
 19 |   override def fullLoad(): Seq[(Long, Schema)] = {
 20 | 
 21 |     log.debug(s"loading all schemas from collection ${mongoConfig.collection}")
 22 |     val collection =
 23 |       mongoClient
 24 |         .getDatabase(mongoConfig.database)
 25 |         .getCollection(mongoConfig.collection)
 26 | 
 27 |     val schemas: Seq[Try[(Long, Schema)]] =
 28 |       Await.result(
 29 |         collection
 30 |           .find()
 31 |           .map { document =>
 32 |             for {
 33 |               key       <- extract(document, "_id", _.asInt64().getValue)
 34 |               schemaStr <- extract(document, "schema", _.asString().getValue)
 35 |               schema    <- Try(parser.parse(schemaStr))
 36 |             } yield key -> schema
 37 |           }
 38 |           .toFuture(),
 39 |         mongoConfig.timeout
 40 |       )
 41 |     log.debug(s"${schemas.size} loaded from MongoDB")
 42 |     // this way the first exception is thrown, but we can change this line
 43 |     // to support different error handling strategies
 44 |     schemas.map(_.get)
 45 |   }
 46 | 
 47 |   private def extract[A](d: Document, fieldName: String, f: BsonValue => A): Try[A] = {
 48 |     d.filterKeys(k => k == fieldName)
 49 |       .headOption
 50 |       .fold[Try[A]](Failure(new RuntimeException(s"Cannot find $fieldName field in document"))) { case (_, value) =>
 51 |         Try(f(value)).recoverWith { case t: Throwable =>
 52 |           Failure(new RuntimeException(s"$fieldName was not of expected type", t))
 53 |         }
 54 |       }
 55 |   }
 56 | 
 57 |   override def insert(schemas: Seq[(Long, Schema)]): Unit = {
 58 | 
 59 |     log.debug(s"inclusion of new schemas in the collection ${mongoConfig.collection}")
 60 | 
 61 |     schemas.foreach { case (id, schema) =>
 62 |       val document = new BsonDocument
 63 |       document.put("_id", bson.BsonInt64(id))
 64 |       document.put("schema", bson.BsonString(schema.toString))
 65 |       document.put("name", bson.BsonString(schema.getName))
 66 |       document.put("namespace", bson.BsonString(schema.getNamespace))
 67 | 
 68 |       insertIfNotExists(mongoClient.getDatabase(mongoConfig.database).getCollection(mongoConfig.collection), document)
 69 |     }
 70 |   }
 71 | 
 72 |   private def insertIfNotExists(collection: MongoCollection[Document], document: BsonDocument): Unit = {
 73 |     try {
 74 |       Await.result(collection.insertOne(document).toFuture(), mongoConfig.timeout)
 75 |     } catch {
 76 |       case ex: MongoWriteException if ex.getError.getCategory == ErrorCategory.DUPLICATE_KEY =>
 77 |         log.info("document already present, doing nothing")
 78 |     }
 79 |     ()
 80 |   }
 81 | 
 82 |   override def createTable(): Unit = {
 83 |     log.debug(s"Creating collection ${mongoConfig.collection}")
 84 |     try {
 85 |       Await.result(
 86 |         mongoClient.getDatabase(mongoConfig.database).createCollection(mongoConfig.collection).toFuture(),
 87 |         mongoConfig.timeout
 88 |       )
 89 |       log.info(s"collection ${mongoConfig.collection} has been correctly created")
 90 |     } catch {
 91 |       case e: Exception => log.info(s"collection ${mongoConfig.collection} was not created. \n ${e.getMessage}")
 92 |     }
 93 |   }
 94 | 
 95 |   override def tableExists(): Boolean = {
 96 |     Await.result(
 97 |       mongoClient
 98 |         .getDatabase(mongoConfig.database)
 99 |         .listCollectionNames()
100 |         .filter(x => x == mongoConfig.collection)
101 |         .toFuture()
102 |         .map(_.size),
103 |       mongoConfig.timeout
104 |     ) == 1
105 |   }
106 | 
107 |   override def tableCreationHint(): String = {
108 |     s"""To create the collection from shell perform the following command:
109 |        |db.createCollection(${mongoConfig.collection})
110 |      """.stripMargin
111 |   }
112 | 
113 |   override def findSchema(id: Long): Option[Schema] = {
114 | 
115 |     val query = new BasicDBObject
116 |     query.put("_id", bson.BsonInt64(id))
117 | 
118 |     val documents =
119 |       mongoClient
120 |         .getDatabase(mongoConfig.database)
121 |         .getCollection(mongoConfig.collection)
122 |         .find(query)
123 |         .toFuture()
124 | 
125 |     val schemaValue: Seq[String] =
126 |       for {
127 |         document <- Await.result(documents, mongoConfig.timeout)
128 |         field    <- document
129 |         if field._1 == "schema"
130 |       } yield field._2.asString().getValue
131 |     schemaValue.headOption.map(parser.parse)
132 |   }
133 | 
134 |   /**
135 |    * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id).
136 |    * This API might not be implemented by all connectors, which should return None
137 |    */
138 |   override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None
139 | }
140 | 


--------------------------------------------------------------------------------
/mongo/src/main/scala/it/agilelab/darwin/connector/mongo/MongoConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.mongo
 2 | 
 3 | import com.mongodb.Block
 4 | import com.typesafe.config.Config
 5 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator }
 6 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.MongoConnectorConfig
 7 | import org.mongodb.scala.connection.ClusterSettings
 8 | import org.mongodb.scala.{ MongoClient, MongoClientSettings, MongoCredential, ServerAddress }
 9 | import it.agilelab.darwin.common.compat._
10 | import scala.concurrent.duration.Duration
11 | 
12 | class MongoConnectorCreator extends ConnectorCreator {
13 | 
14 |   override def create(config: Config): Connector = {
15 | 
16 |     val mongoConfig: MongoConnectorConfig = createConfig(config)
17 |     new MongoConnector(createConnection(mongoConfig), mongoConfig)
18 |   }
19 | 
20 |   /**
21 |     * @return the name of the Connector
22 |     */
23 |   override def name(): String = "mongo"
24 | 
25 |   /**
26 |     * return the MongoClient
27 |     * @param mongoConf : config to create a connection to MongoDB
28 |     * @return MongoClient
29 |     */
30 |   private def createConnection(mongoConf: MongoConnectorConfig): MongoClient = {
31 | 
32 |     val credential: MongoCredential =
33 |       MongoCredential.createCredential(mongoConf.username, mongoConf.database, mongoConf.password.toCharArray)
34 | 
35 |     val hosts: Seq[ServerAddress] = mongoConf.hosts.map(host => new ServerAddress(host))
36 | 
37 |     val settings: MongoClientSettings = MongoClientSettings
38 |       .builder()
39 |       .credential(credential)
40 |       .applyToClusterSettings(new Block[ClusterSettings.Builder] {
41 |         override def apply(builder: ClusterSettings.Builder): Unit =
42 |           builder.hosts(java.util.Arrays.asList(hosts: _*))
43 |       })
44 |       .build()
45 | 
46 |     MongoClient(settings)
47 |   }
48 | 
49 |   /**
50 |     * create MongoConnectorConfig started from a configuration file
51 |     * @param config: configurations parsed from the file
52 |     * @return MongoConnectorConfig
53 |     */
54 |   def createConfig(config: Config): MongoConnectorConfig = {
55 |     require(config.hasPath(ConfigurationKeys.USERNAME))
56 |     require(config.hasPath(ConfigurationKeys.PASSWORD))
57 |     require(config.hasPath(ConfigurationKeys.HOST))
58 |     require(config.hasPath(ConfigurationKeys.DATABASE))
59 |     require(config.hasPath(ConfigurationKeys.COLLECTION))
60 | 
61 |     MongoConnectorConfig(
62 |       config.getString(ConfigurationKeys.USERNAME),
63 |       config.getString(ConfigurationKeys.PASSWORD),
64 |       config.getString(ConfigurationKeys.DATABASE),
65 |       config.getString(ConfigurationKeys.COLLECTION),
66 |       config.getStringList(ConfigurationKeys.HOST).toScala().toSeq,
67 |       if (config.hasPath(ConfigurationKeys.TIMEOUT)) {
68 |         Duration.create(config.getInt(ConfigurationKeys.TIMEOUT), "millis")
69 |       } else {
70 |         Duration.create(ConfigurationMongoModels.DEFAULT_DURATION, "millis")
71 |       }
72 |     )
73 |   }
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/mongo/src/test/resources/mongo.conf:
--------------------------------------------------------------------------------
1 | username = "mongo"
2 | password = "mongo"
3 | host = ["localhost:12345"]
4 | database = "test"
5 | collection = "collection_test"
6 | timeout = 5000
7 | 


--------------------------------------------------------------------------------
/mongo/src/test/resources/mongomock.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type" : "record",
 3 |   "name" : "PostgresMock",
 4 |   "namespace" : "it.agilelab.darwin.connector.postgres",
 5 |   "fields" : [ {
 6 |     "name" : "one",
 7 |     "type" : "int"
 8 |   }, {
 9 |     "name" : "two",
10 |     "type" : "string"
11 |   }, {
12 |     "name" : "three",
13 |     "type" : "long"
14 |   }, {
15 |     "name" : "four",
16 |     "type" : {
17 |       "type" : "record",
18 |       "name" : "Postgres2Mock",
19 |       "fields" : [ {
20 |         "name" : "one",
21 |         "type" : "boolean"
22 |       }, {
23 |         "name" : "two",
24 |         "type" : "long"
25 |       } ]
26 |     }
27 |   } ]
28 | }
29 | 


--------------------------------------------------------------------------------
/mongo/src/test/scala/it/agilelab/darwin/connector/mongo/MongoConnectorTest.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.mongo
  2 | 
  3 | import com.typesafe.config.{ Config, ConfigFactory }
  4 | import de.flapdoodle.embed.mongo.{ MongodExecutable, MongodProcess, MongodStarter }
  5 | import de.flapdoodle.embed.mongo.config.{ IMongodConfig, MongodConfigBuilder, Net }
  6 | import de.flapdoodle.embed.mongo.distribution.Version
  7 | import de.flapdoodle.embed.process.runtime.Network
  8 | import org.scalatest.BeforeAndAfterAll
  9 | import org.mongodb.scala.MongoClient
 10 | import it.agilelab.darwin.common.Connector
 11 | import it.agilelab.darwin.connector.mongo.ConfigurationMongoModels.MongoConfig
 12 | import org.scalatest.flatspec.AnyFlatSpec
 13 | import org.scalatest.matchers.should.Matchers
 14 | import org.apache.avro.Schema
 15 | import org.apache.avro.Schema.Parser
 16 | 
 17 | import scala.concurrent.Await
 18 | import scala.concurrent.duration.Duration
 19 | 
 20 | class MongoConnectorTest extends AnyFlatSpec with Matchers with BeforeAndAfterAll {
 21 | 
 22 |   val port                               = 12345
 23 |   val config: Config                     = ConfigFactory.load("mongo.conf")
 24 |   val starter: MongodStarter             = MongodStarter.getDefaultInstance
 25 |   val mongodConfig: IMongodConfig        =
 26 |     new MongodConfigBuilder()
 27 |       .version(Version.Main.PRODUCTION)
 28 |       .net(new Net("localhost", port, Network.localhostIsIPv6))
 29 |       .build
 30 |   val mongoConfig: MongoConfig           = MongoConfig(
 31 |     config.getString(ConfigurationKeys.DATABASE),
 32 |     config.getString(ConfigurationKeys.COLLECTION),
 33 |     if (config.hasPath(ConfigurationKeys.TIMEOUT)) {
 34 |       Duration.create(config.getInt(ConfigurationKeys.TIMEOUT), "millis")
 35 |     } else {
 36 |       Duration.create(ConfigurationMongoModels.DEFAULT_DURATION, "millis")
 37 |     }
 38 |   )
 39 |   val mongodExecutable: MongodExecutable = starter.prepare(mongodConfig)
 40 |   var mongod: MongodProcess              = _
 41 |   var mongoClient: MongoClient           = _
 42 |   var connector: Connector               = _
 43 | 
 44 |   override protected def beforeAll(): Unit = {
 45 |     super.beforeAll()
 46 | 
 47 |     mongod = mongodExecutable.start
 48 |     mongoClient = MongoClient(
 49 |       s"mongodb://${config.getStringList(ConfigurationKeys.HOST).get(0)}/" +
 50 |         s"${config.getString(ConfigurationKeys.DATABASE)}"
 51 |     )
 52 |     connector = new MongoConnector(mongoClient, mongoConfig)
 53 |     connector.createTable()
 54 |   }
 55 | 
 56 |   override protected def afterAll(): Unit = {
 57 |     mongod.stop()
 58 | 
 59 |     super.afterAll()
 60 |   }
 61 | 
 62 |   "Table collection_test" should "be created by connector" in {
 63 |     connector.createTable()
 64 |     assert(connector.tableExists())
 65 |   }
 66 | 
 67 |   "schemas" should "be inserted into collection" in {
 68 |     val schema: Schema    = new Parser().parse(getClass.getClassLoader.getResourceAsStream("mongomock.avsc"))
 69 |     val schemas           = Seq((0L, schema), (1L, schema))
 70 |     connector.insert(schemas)
 71 |     val numberOfDocuments =
 72 |       Await.result(
 73 |         mongoClient
 74 |           .getDatabase(config.getString(ConfigurationKeys.DATABASE))
 75 |           .getCollection(config.getString(ConfigurationKeys.COLLECTION))
 76 |           .countDocuments()
 77 |           .toFuture(),
 78 |         mongoConfig.timeout
 79 |       )
 80 |     assert(numberOfDocuments == 2)
 81 |   }
 82 | 
 83 |   "schema" should "not be inserted into collection because because there is already a scheme with the same id" in {
 84 |     val schema: Schema    = new Parser().parse(getClass.getClassLoader.getResourceAsStream("mongomock.avsc"))
 85 |     val schemas           = Seq((0L, schema))
 86 |     connector.insert(schemas)
 87 |     val numberOfDocuments =
 88 |       Await.result(
 89 |         mongoClient
 90 |           .getDatabase(config.getString(ConfigurationKeys.DATABASE))
 91 |           .getCollection(config.getString(ConfigurationKeys.COLLECTION))
 92 |           .countDocuments()
 93 |           .toFuture(),
 94 |         mongoConfig.timeout
 95 |       )
 96 |     assert(numberOfDocuments == 2)
 97 |   }
 98 | 
 99 |   "full load" should "return a list of lenght equals to 2" in {
100 |     val schemas: Seq[(Long, Schema)] = connector.fullLoad()
101 |     assert(schemas.length == 2)
102 |   }
103 | 
104 |   "find schema" should "return a schema" in {
105 |     val schema: Option[Schema] = connector.findSchema(0L)
106 |     assert(schema.isDefined)
107 |   }
108 | 
109 |   "find schema" should "return a None" in {
110 |     val schema: Option[Schema] = connector.findSchema(3L)
111 |     assert(schema.isEmpty)
112 |   }
113 | 
114 | }
115 | 


--------------------------------------------------------------------------------
/multi-connector/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.multi.MultiConnectorCreator


--------------------------------------------------------------------------------
/multi-connector/src/main/scala/it/agilelab/darwin/connector/multi/MultiConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.multi
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.compat._
 5 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, ConnectorFactory }
 6 | import it.agilelab.darwin.manager.exception.DarwinException
 7 | 
 8 | object MultiConnectorCreator {
 9 |   val REGISTRATOR                      = "registrar"
10 |   val CONFLUENT_SINGLE_OBJECT_ENCODING = "confluent-single-object-encoding"
11 |   val STANDARD_SINGLE_OBJECT_ENCODING  = "standard-single-object-encoding"
12 | }
13 | class MultiConnectorCreator extends ConnectorCreator {
14 | 
15 |   /**
16 |     * @return the name of the Connector
17 |     */
18 |   override def name(): String = "multi"
19 | 
20 |   private def mergeConf(conf: Config, path: String): Config = {
21 | 
22 |     conf
23 |       .getConfig(path)
24 |       .entrySet()
25 |       .toScala()
26 |       .map(_.getKey)
27 |       .foldLeft(conf)((z, x) => z.withValue(x, conf.getValue(path + "." + x)))
28 |   }
29 | 
30 |   override def create(config: Config): Connector = {
31 |     val registrarName =
32 |       config.getString(MultiConnectorCreator.REGISTRATOR)
33 | 
34 |     val confluentConnectorType =
35 |       if (config.hasPath(MultiConnectorCreator.CONFLUENT_SINGLE_OBJECT_ENCODING)) {
36 |         Some(config.getString(MultiConnectorCreator.CONFLUENT_SINGLE_OBJECT_ENCODING))
37 |       } else {
38 |         None
39 |       }
40 | 
41 |     val standardConnectorTypes = config
42 |       .getStringList(MultiConnectorCreator.STANDARD_SINGLE_OBJECT_ENCODING)
43 |       .toScala()
44 | 
45 |     val registrar = createAndMergeConfigs(config, registrarName)
46 | 
47 |     val confluentConnector =
48 |       confluentConnectorType.map { cName =>
49 |         createIfNotRegistrar(registrarName, registrar, cName, config)
50 |       }
51 | 
52 |     val singleObjectConnectors = standardConnectorTypes.map { cName =>
53 |       createIfNotRegistrar(registrarName, registrar, cName, config)
54 |     }.toList
55 | 
56 |     new MultiConnector(
57 |       registrar,
58 |       confluentConnector,
59 |       singleObjectConnectors
60 |     )
61 |   }
62 | 
63 |   private def createAndMergeConfigs(config: Config, registrarName: String) = {
64 |     ConnectorFactory
65 |       .creator(registrarName)
66 |       .map(creator => creator.create(mergeConf(config, registrarName)))
67 |       .getOrElse(throw new DarwinException("No connector creator for name " + registrarName))
68 |   }
69 | 
70 |   private def createIfNotRegistrar(registrarName: String, registrar: Connector, cName: String, config: Config) = {
71 |     if (cName == registrarName) {
72 |       registrar
73 |     } else {
74 |       ConnectorFactory
75 |         .creator(cName)
76 |         .map(creator => creator.create(mergeConf(config, cName)))
77 |         .getOrElse(throw new DarwinException("No connector creator for name " + cName))
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/multi-connector/src/test/scala/it/agilelab/darwin/connector/multi/MultiConnectorSpec.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.multi
  2 | 
  3 | import com.typesafe.config.ConfigFactory
  4 | import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient
  5 | import it.agilelab.darwin.common.ConnectorFactory
  6 | import it.agilelab.darwin.connector.confluent.{ ConfluentConnector, ConfluentConnectorOptions }
  7 | import it.agilelab.darwin.connector.mock.{ ConfigurationKeys, MockConnector, MockConnectorCreator }
  8 | import it.agilelab.darwin.manager.LazyAvroSchemaManager
  9 | import org.apache.avro.SchemaBuilder
 10 | import org.scalatest.BeforeAndAfterAll
 11 | import org.scalatest.flatspec.AnyFlatSpec
 12 | import org.scalatest.matchers.should.Matchers
 13 | 
 14 | import java.io.{ ByteArrayInputStream, ByteArrayOutputStream }
 15 | import java.nio.file.Paths
 16 | import java.nio.{ ByteBuffer, ByteOrder }
 17 | import java.util
 18 | import java.util.Collections
 19 | 
 20 | class MultiConnectorSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll {
 21 |   private val p = Paths
 22 |     .get(".")
 23 |     .resolve("mock-connector")
 24 |     .resolve("src")
 25 |     .resolve("test")
 26 |     .resolve("resources")
 27 |     .resolve("test")
 28 | 
 29 |   private def mockConnector() = {
 30 |     new MockConnectorCreator().create(ConfigFactory.parseMap {
 31 |       new util.HashMap[String, Object] {
 32 |         put(
 33 |           ConfigurationKeys.FILES,
 34 |           util.Arrays.asList(
 35 |             p.resolve("DoesNotExists.avsc").toString,
 36 |             p.resolve("MockClassAlone.avsc").toString,
 37 |             p.resolve("MockClassParent.avsc").toString
 38 |           )
 39 |         )
 40 |         put(ConfigurationKeys.MODE, "permissive")
 41 |       }
 42 |     })
 43 |   }
 44 | 
 45 |   private def mockConfluentConnector() = {
 46 |     new ConfluentConnector(
 47 |       options = ConfluentConnectorOptions(List.empty, Collections.emptyMap(), 1000),
 48 |       client = new MockSchemaRegistryClient()
 49 |     )
 50 |   }
 51 | 
 52 |   it should "start with mock and confluent-mock connector" in {
 53 |     val confluent = mockConfluentConnector()
 54 |     val mock      = mockConnector()
 55 |     val multiC    = new MultiConnector(
 56 |       confluent,
 57 |       Some(confluent),
 58 |       List(mock)
 59 |     )
 60 | 
 61 |     val initiallyLoaded = multiC.fullLoad()
 62 |     initiallyLoaded.size shouldBe 2
 63 |     initiallyLoaded.foreach { case (id, schema) =>
 64 |       multiC.extractId(
 65 |         mock.generateAvroSingleObjectEncoded(Array.emptyByteArray, schema, ByteOrder.BIG_ENDIAN, mock.fingerprint _),
 66 |         ByteOrder.BIG_ENDIAN
 67 |       ) shouldBe id
 68 |     }
 69 |   }
 70 | 
 71 |   it should "register a schema using the registrar" in {
 72 |     val confluent = mockConfluentConnector()
 73 |     val mock      = mockConnector()
 74 |     val multiC    = new MultiConnector(
 75 |       confluent,
 76 |       Some(confluent),
 77 |       List(mock)
 78 |     )
 79 | 
 80 |     val schemaToInsert = SchemaBuilder
 81 |       .record("Test")
 82 |       .prop("x-darwin-subject", "test-value")
 83 |       .fields()
 84 |       .requiredLong("numero")
 85 |       .endRecord()
 86 |     val manager        = new LazyAvroSchemaManager(multiC, ByteOrder.BIG_ENDIAN)
 87 |     val id             = manager.registerAll(Seq(schemaToInsert)).head._1
 88 |     multiC.fullLoad().size shouldBe 3
 89 |     val parsedId       = manager.extractId(
 90 |       Array(0x00: Byte) ++
 91 |         ByteBuffer.wrap(Array.ofDim[Byte](4)).putInt(id.toInt).array()
 92 |     )
 93 |     parsedId shouldBe id
 94 |   }
 95 | 
 96 |   it should "be created with a confluent connector and a mock one" in {
 97 |     val multiConnectorCreator     = ConnectorFactory.creator("multi").get
 98 |     val connector: MultiConnector = multiConnectorCreator
 99 |       .create(
100 |         ConfigFactory.parseString(
101 |           s"""
102 |              |  type = "eager"
103 |              |  connector = "multi"
104 |              |  registrar = "confluent"
105 |              |  confluent-single-object-encoding: "confluent"
106 |              |  standard-single-object-encoding: ["mock"]
107 |              |  confluent {
108 |              |    endpoints: ["http://schema-registry-00:7777", "http://schema-registry-01:7777"]
109 |              |    max-cached-schemas: 1000
110 |              |  }
111 |              |  mock {
112 |              |    ${ConfigurationKeys.FILES} = [
113 |              |      ${p.resolve("DoesNotExists.avsc").toString},
114 |              |      ${p.resolve("MockClassAlone.avsc").toString},
115 |              |      ${p.resolve("MockClassParent.avsc").toString}
116 |              |    ]
117 |              |    ${ConfigurationKeys.MODE} = "permissive"
118 |              |  }
119 |              |""".stripMargin
120 |         )
121 |       )
122 |       .asInstanceOf[MultiConnector]
123 |     assert(connector.registrar.isInstanceOf[ConfluentConnector])
124 |     assert(connector.confluentConnector.exists(_.isInstanceOf[ConfluentConnector]))
125 |     assert(connector.singleObjectEncodingConnectors.forall(_.isInstanceOf[MockConnector]))
126 |   }
127 | 
128 |   it should "be created with only a mock connector" in {
129 |     val multiConnectorCreator     = ConnectorFactory.creator("multi").get
130 |     val connector: MultiConnector = multiConnectorCreator
131 |       .create(
132 |         ConfigFactory.parseString(
133 |           s"""
134 |              |  type = "eager"
135 |              |  connector = "multi"
136 |              |  registrar = "mock"
137 |              |  standard-single-object-encoding: ["mock"]
138 |              |  mock {
139 |              |    ${ConfigurationKeys.FILES} = [
140 |              |      ${p.resolve("DoesNotExists.avsc").toString},
141 |              |      ${p.resolve("MockClassAlone.avsc").toString},
142 |              |      ${p.resolve("MockClassParent.avsc").toString}
143 |              |    ]
144 |              |    ${ConfigurationKeys.MODE} = "permissive"
145 |              |  }
146 |              |""".stripMargin
147 |         )
148 |       )
149 |       .asInstanceOf[MultiConnector]
150 |     connector.confluentConnector shouldBe empty
151 |     assert(connector.registrar.isInstanceOf[MockConnector])
152 |     assert(connector.singleObjectEncodingConnectors.forall(_.isInstanceOf[MockConnector]))
153 |   }
154 | 
155 |   it should "extract schema and payload from confluent encoded byte array" in {
156 |     val confluent      = mockConfluentConnector()
157 |     val mock           = mockConnector()
158 |     val multiC         = new MultiConnector(
159 |       confluent,
160 |       Some(confluent),
161 |       List(mock)
162 |     )
163 |     val schemaToInsert = SchemaBuilder
164 |       .record("Testa")
165 |       .prop("x-darwin-subject", "test-value")
166 |       .fields()
167 |       .requiredLong("numera")
168 |       .endRecord()
169 |     val manager        = new LazyAvroSchemaManager(multiC, ByteOrder.BIG_ENDIAN)
170 |     val id             = manager.registerAll(Seq(schemaToInsert)).head._1
171 |     manager.extractId(
172 |       manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert)
173 |     ) shouldBe id
174 | 
175 |     manager.extractId(
176 |       ByteBuffer.wrap(manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert))
177 |     ) shouldBe id
178 | 
179 |     val stream = new ByteArrayOutputStream()
180 |     manager.generateAvroSingleObjectEncoded(stream, id)(identity)
181 |     manager.extractId(
182 |       new ByteArrayInputStream(stream.toByteArray)
183 |     ) shouldBe Right(id)
184 | 
185 |     manager.extractSchema(new ByteArrayInputStream(stream.toByteArray)) shouldBe Right(schemaToInsert)
186 | 
187 |     val soe = ByteBuffer.wrap(manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert))
188 |     manager.retrieveSchemaAndAvroPayload(soe) shouldBe schemaToInsert
189 | 
190 |     manager.retrieveSchemaAndAvroPayload(
191 |       ByteBuffer.wrap(manager.generateAvroSingleObjectEncoded(Array.emptyByteArray, schemaToInsert))
192 |     ) shouldBe schemaToInsert
193 | 
194 |   }
195 | }
196 | 


--------------------------------------------------------------------------------
/postgres/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.postgres.PostgresConnectorCreator
2 | 


--------------------------------------------------------------------------------
/postgres/src/main/scala/it/agilelab/darwin/connector/postgres/ConfigurationKeys.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.postgres
 2 | 
 3 | object ConfigurationKeys {
 4 |   val TABLE: String    = "table"
 5 |   val HOST: String     = "host"
 6 |   val DATABASE: String = "db"
 7 |   val USER: String     = "username"
 8 |   val PASSWORD: String = "password"
 9 |   val MODE: String     = "mode"
10 | }
11 | 


--------------------------------------------------------------------------------
/postgres/src/main/scala/it/agilelab/darwin/connector/postgres/PostgresConnection.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.postgres
 2 | 
 3 | import java.sql.{ Connection, DriverManager }
 4 | 
 5 | import com.typesafe.config.Config
 6 | 
 7 | trait PostgresConnection {
 8 | 
 9 |   private var connectionUrl: String = ""
10 |   private val driverName: String    = "org.postgresql.Driver"
11 | 
12 |   protected def setConnectionConfig(config: Config) = {
13 |     val db       = config.getString(ConfigurationKeys.DATABASE)
14 |     val host     = config.getString(ConfigurationKeys.HOST)
15 |     val user     = config.getString(ConfigurationKeys.USER)
16 |     val password = config.getString(ConfigurationKeys.PASSWORD)
17 |     connectionUrl = s"jdbc:postgresql://$host/$db?user=$user&password=$password"
18 |   }
19 | 
20 |   protected def getConnection: Connection = {
21 |     Class.forName(driverName)
22 |     val connection: Connection = DriverManager.getConnection(connectionUrl)
23 |     connection
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/postgres/src/main/scala/it/agilelab/darwin/connector/postgres/PostgresConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.postgres
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator }
 5 | 
 6 | class PostgresConnectorCreator extends ConnectorCreator {
 7 |   override def create(config: Config): Connector = new PostgresConnector(config)
 8 | 
 9 |   /**
10 |     * @return the name of the Connector
11 |     */
12 |   override def name(): String = "postgresql"
13 | }
14 | 


--------------------------------------------------------------------------------
/postgres/src/test/resources/postgres.properties:
--------------------------------------------------------------------------------
1 | host = localhost:5432
2 | db = postgres
3 | username = postgres
4 | password = mysecretpassword
5 | table = schema_registry
6 | 


--------------------------------------------------------------------------------
/postgres/src/test/resources/postgresmock.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type" : "record",
 3 |   "name" : "PostgresMock",
 4 |   "namespace" : "it.agilelab.darwin.connector.postgres",
 5 |   "fields" : [ {
 6 |     "name" : "one",
 7 |     "type" : "int"
 8 |   }, {
 9 |     "name" : "two",
10 |     "type" : "string"
11 |   }, {
12 |     "name" : "three",
13 |     "type" : "long"
14 |   }, {
15 |     "name" : "four",
16 |     "type" : {
17 |       "type" : "record",
18 |       "name" : "Postgres2Mock",
19 |       "fields" : [ {
20 |         "name" : "one",
21 |         "type" : "boolean"
22 |       }, {
23 |         "name" : "two",
24 |         "type" : "long"
25 |       } ]
26 |     }
27 |   } ]
28 | }
29 | 


--------------------------------------------------------------------------------
/postgres/src/test/scala/it/agilelab/darwin/connector/postgres/Postgres2Mock.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.postgres
2 | 
3 | case class Postgres2Mock(one: Boolean, two: Long)
4 | 


--------------------------------------------------------------------------------
/postgres/src/test/scala/it/agilelab/darwin/connector/postgres/PostgresConnectorSuite.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.postgres
 2 | 
 3 | import com.typesafe.config.{ Config, ConfigFactory, ConfigValueFactory }
 4 | import it.agilelab.darwin.common.Connector
 5 | import org.apache.avro.{ Schema, SchemaNormalization }
 6 | import org.scalatest.BeforeAndAfterAll
 7 | import ru.yandex.qatools.embed.postgresql.EmbeddedPostgres
 8 | import ru.yandex.qatools.embed.postgresql.distribution.Version
 9 | import org.scalatest.flatspec.AnyFlatSpec
10 | import org.scalatest.matchers.should.Matchers
11 | 
12 | class PostgresConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll {
13 |   val embeddedPostgres: EmbeddedPostgres = new EmbeddedPostgres(Version.V9_6_11)
14 | 
15 |   override protected def beforeAll(): Unit = {
16 |     super.beforeAll()
17 |     val port                 = 5432
18 |     val host                 = "localhost"
19 |     val dbname               = "postgres"
20 |     val username             = "postgres"
21 |     val password             = "mysecretpassword"
22 |     embeddedPostgres.start(host, port, dbname, username, password)
23 |     val config: Config       = ConfigFactory.load("postgres.properties")
24 |     val connector: Connector = new PostgresConnectorCreator().create(config)
25 |     connector.createTable()
26 |   }
27 | 
28 |   override protected def afterAll(): Unit = {
29 |     super.afterAll()
30 |     embeddedPostgres.stop()
31 |   }
32 | 
33 |   it should "multiple insert and retrieve [No conf - OneTransaction]" in {
34 |     val config: Config       = ConfigFactory.load("postgres.properties")
35 |     val connector: Connector = new PostgresConnectorCreator().create(config)
36 |     test(connector)
37 |   }
38 | 
39 |   it should "multiple insert and retrieve [OneTransaction]" in {
40 |     val config: Config       = ConfigFactory
41 |       .load("postgres.properties")
42 |       .withValue(ConfigurationKeys.MODE, ConfigValueFactory.fromAnyRef(OneTransaction.value))
43 |     val connector: Connector = new PostgresConnectorCreator().create(config)
44 |     test(connector)
45 |   }
46 | 
47 |   it should "multiple insert and retrieve [ExceptionDriven]" in {
48 |     val config: Config       = ConfigFactory
49 |       .load("postgres.properties")
50 |       .withValue(ConfigurationKeys.MODE, ConfigValueFactory.fromAnyRef(ExceptionDriven.value))
51 |     val connector: Connector = new PostgresConnectorCreator().create(config)
52 |     test(connector)
53 |   }
54 | 
55 |   private def test(connector: Connector) = {
56 |     val outerSchema                 = new Schema.Parser().parse(getClass.getClassLoader.getResourceAsStream("postgresmock.avsc"))
57 |     val innerSchema                 = outerSchema.getField("four").schema()
58 |     val schemas                     = Seq(innerSchema, outerSchema)
59 |       .map(s => SchemaNormalization.parsingFingerprint64(s) -> s)
60 |     connector.insert(schemas)
61 |     connector.insert(schemas)
62 |     connector.insert(schemas)
63 |     connector.insert(schemas)
64 |     val loaded: Seq[(Long, Schema)] = connector.fullLoad()
65 |     assert(loaded.size == schemas.size)
66 |     assert(loaded.forall(schemas.contains))
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/postgres/src/test/scala/it/agilelab/darwin/connector/postgres/PostgresMock.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.connector.postgres
2 | 
3 | case class PostgresMock(one: Int, two: String, three: Long, four: Postgres2Mock)
4 | 


--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | 
 3 | /**
 4 |   * @author andreaL
 5 |   */
 6 | object Dependencies {
 7 | 
 8 |   lazy val scalatest       = "org.scalatest"       %% "scalatest"                 % "3.1.1"  % "test"
 9 |   lazy val avro            = "org.apache.avro"      % "avro"                      % "1.8.2"
10 |   lazy val typesafe_config = "com.typesafe"         % "config"                    % "1.3.1"
11 |   lazy val avro4s          = "com.sksamuel.avro4s" %% "avro4s-core"               % "1.8.3"
12 |   lazy val hbase_server    = "org.apache.hbase"     % "hbase-server"              % "1.2.2"  % "provided"
13 |   lazy val hbase_common    = "org.apache.hbase"     % "hbase-common"              % "1.2.2"  % "provided"
14 |   lazy val hadoop_common   = "org.apache.hadoop"    % "hadoop-common"             % "2.7.7"  % "provided"
15 |   lazy val hbase2_server   = "org.apache.hbase"     % "hbase-server"              % "2.1.10" % "provided"
16 |   lazy val hbase2_common   = "org.apache.hbase"     % "hbase-common"              % "2.1.10" % "provided"
17 |   lazy val reflections     = "org.reflections"      % "reflections"               % "0.9.11" % Test
18 |   lazy val spark_core      = "org.apache.spark"    %% "spark-core"                % "2.4.5"  % "provided"
19 |   lazy val spark_sql       = "org.apache.spark"    %% "spark-sql"                 % "2.4.5"  % "provided"
20 |   lazy val postgres_conn   = "org.postgresql"       % "postgresql"                % "9.3-1100-jdbc4"
21 |   lazy val junit           = "org.junit.jupiter"    % "junit-jupiter-api"         % "5.3.2"  % Test
22 |   lazy val mongo           = "org.mongodb.scala"   %% "mongo-scala-driver"        % "2.8.0"  % "provided"
23 |   lazy val mongoTest       = "de.flapdoodle.embed"  % "de.flapdoodle.embed.mongo" % "2.2.0"  % "test"
24 | 
25 |   lazy val postgres_embedded = "ru.yandex.qatools.embed" % "postgresql-embedded" % "2.10" % Test
26 | 
27 |   lazy val akka = Seq(
28 |     "com.typesafe.akka" %% "akka-stream"          % "2.5.26",
29 |     "com.typesafe.akka" %% "akka-slf4j"           % "2.5.26",
30 |     "com.typesafe.akka" %% "akka-stream-testkit"  % "2.5.26"  % Test,
31 |     "com.typesafe.akka" %% "akka-http"            % "10.1.11",
32 |     "com.typesafe.akka" %% "akka-http-testkit"    % "10.1.11" % Test,
33 |     "com.typesafe.akka" %% "akka-http-spray-json" % "10.1.11"
34 |   )
35 | 
36 |   lazy val logback = "ch.qos.logback" % "logback-classic" % "1.2.3"
37 | 
38 |   //we are using hbase testing utilities so we need to resolve the jars containing the tests
39 |   //these jars are not resolved with default ivy behavior, also we need to enable in settings
40 |   //the resolution of transitive dependencies for jars in test scope
41 |   lazy val hbaseTestDependencies = Seq(
42 |     ("org.apache.hbase"  % "hbase-testing-util"   % "1.2.2").classifier("tests") % Test,
43 |     ("org.apache.hadoop" % "hadoop-common"        % "2.7.7").classifier("tests") % Test,
44 |     ("org.apache.hbase"  % "hbase-server"         % "1.2.2").classifier("tests") % Test,
45 |     ("org.apache.hbase"  % "hbase"                % "1.2.2") % Test,
46 |     ("org.apache.hbase"  % "hbase-hadoop-compat"  % "1.2.2") % Test,
47 |     ("org.apache.hbase"  % "hbase-hadoop-compat"  % "1.2.2").classifier("tests") % Test,
48 |     ("org.apache.hbase"  % "hbase-hadoop2-compat" % "1.2.2") % Test,
49 |     ("org.apache.hbase"  % "hbase-hadoop2-compat" % "1.2.2").classifier("tests") % Test,
50 |     ("org.apache.hbase"  % "hbase-common"         % "1.2.2").classifier("tests") % Test,
51 |     ("org.apache.hbase" % "hbase" % "1.2.2").classifier("tests") % Test exclude ("org.apache.hbase", "hbase"),
52 |     ("org.apache.hadoop" % "hadoop-hdfs"          % "2.7.7").classifier("tests") % Test,
53 |     ("org.apache.hadoop" % "hadoop-hdfs"          % "2.7.7") % Test
54 |   )
55 | 
56 |   lazy val hbase2TestDependencies = Seq(
57 |     ("org.apache.hbase"  % "hbase-testing-util"   % "2.1.10").classifier("tests") % Test,
58 |     ("org.apache.hadoop" % "hadoop-common"        % "2.7.7").classifier("tests") % Test,
59 |     ("org.apache.hbase"  % "hbase-server"         % "2.1.10").classifier("tests") % Test,
60 |     ("org.apache.hbase"  % "hbase"                % "2.1.10") % Test,
61 |     ("org.apache.hbase"  % "hbase-hadoop-compat"  % "2.1.10") % Test,
62 |     ("org.apache.hbase"  % "hbase-hadoop-compat"  % "2.1.10").classifier("tests") % Test,
63 |     ("org.apache.hbase"  % "hbase-hadoop2-compat" % "2.1.10") % Test,
64 |     ("org.apache.hbase"  % "hbase-hadoop2-compat" % "2.1.10").classifier("tests") % Test,
65 |     ("org.apache.hbase"  % "hbase-metrics"        % "2.1.10") % Test,
66 |     ("org.apache.hbase"  % "hbase-metrics-api"    % "2.1.10") % Test,
67 |     ("org.apache.hbase"  % "hbase-http"           % "2.1.10") % Test,
68 |     ("org.apache.hbase"  % "hbase-common"         % "2.1.10").classifier("tests") % Test,
69 |     ("org.apache.hbase" % "hbase" % "2.1.10").classifier("tests") % Test exclude ("org.apache.hbase", "hbase"),
70 |     ("org.apache.hadoop" % "hadoop-hdfs"          % "2.7.7").classifier("tests") % Test,
71 |     ("org.apache.hadoop" % "hadoop-hdfs"          % "2.7.7")  % Test
72 |   )
73 | 
74 |   lazy val confluentSchemaRegistryDependencies = Seq(
75 |     "io.confluent"     % "kafka-schema-registry-client" % "4.1.4", //this version is compatible with java7
76 |     "org.apache.kafka" % "kafka-clients"                % "2.2.2-cp3" % Provided
77 |   )
78 | 
79 |   lazy val wireMock          =
80 |     Seq("com.github.tomakehurst" % "wiremock-jre8" % "2.21.0" % Test, "xmlunit" % "xmlunit" % "1.6" % Test)
81 | 
82 |   lazy val restServer        = core_deps ++ Seq(logback) ++ akka
83 |   lazy val core_deps         = Seq(scalatest, avro, typesafe_config, junit)
84 |   lazy val mock_app_dep      = core_deps ++ Seq(reflections, hbase_common)
85 |   lazy val mock_conn         = core_deps ++ Seq(reflections)
86 |   lazy val hbase_conn_dep    = core_deps ++ Seq(hbase_common, hbase_server, hadoop_common)
87 |   lazy val hbase2_conn_dep   = core_deps ++ Seq(hbase2_common, hbase2_server, hadoop_common)
88 |   lazy val postgres_conn_dep = core_deps :+ postgres_conn :+ postgres_embedded
89 |   lazy val spark_app         = mock_app_dep ++ Seq(spark_core, spark_sql, hbase_common)
90 |   lazy val mongo_conn        = core_deps ++ Seq(mongo, mongoTest)
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/project/Settings.scala:
--------------------------------------------------------------------------------
  1 | import org.scalastyle.sbt.ScalastylePlugin.autoImport._
  2 | import sbt.Keys._
  3 | import sbt.{ Def, _ }
  4 | 
  5 | /**
  6 |   * @author andreaL
  7 |   */
  8 | object Settings {
  9 | 
 10 |   val SCALA_210 = Some((2L, 10L))
 11 |   val SCALA_211 = Some((2L, 11L))
 12 |   val SCALA_212 = Some((2L, 12L))
 13 |   val SCALA_213 = Some((2L, 13L))
 14 | 
 15 |   def scalacOptionsVersion(scalaVersion: String): Seq[String] = {
 16 |     Seq(
 17 |       "-deprecation",
 18 |       "-feature",
 19 |       "-unchecked",
 20 |       "-Xlint",
 21 |       "-Ywarn-dead-code",
 22 |       "-encoding",
 23 |       "UTF-8"
 24 |     ) ++ {
 25 |       CrossVersion.partialVersion(scalaVersion) match {
 26 |         case SCALA_210                     =>
 27 |           Seq("-target:jvm-1.7", "-Ywarn-inaccessible")
 28 |         case SCALA_211                     =>
 29 |           Seq("-Xfatal-warnings", "-Ywarn-inaccessible", "-Ywarn-unused-import", "-Ywarn-infer-any", "-target:jvm-1.7")
 30 |         case SCALA_212                     =>
 31 |           Seq("-Xfatal-warnings", "-Ywarn-inaccessible", "-Ywarn-unused-import", "-Ywarn-infer-any", "-target:jvm-1.8")
 32 |         case SCALA_213                     =>
 33 |           Seq("-Xfatal-warnings", "-Xlint:inaccessible", "-Ywarn-unused:imports", "-Xlint:infer-any", "-target:jvm-1.8")
 34 |         case version: Option[(Long, Long)] =>
 35 |           throw new Exception(s"Unknown scala version: $version")
 36 |       }
 37 |     }
 38 |   }
 39 | 
 40 |   def scalaDocOptionsVersion(scalaVersion: String): Seq[String] = {
 41 |     CrossVersion.partialVersion(scalaVersion) match {
 42 |       case SCALA_210 | SCALA_211         => scalacOptionsVersion(scalaVersion)
 43 |       case SCALA_212                     => scalacOptionsVersion(scalaVersion) ++ Seq("-no-java-comments")
 44 |       case SCALA_213                     => scalacOptionsVersion(scalaVersion) ++ Seq("-no-java-comments")
 45 |       case version: Option[(Long, Long)] => throw new Exception(s"Unknown scala version: $version")
 46 |     }
 47 |   }
 48 | 
 49 |   def javacOptionsVersion(scalaVersion: String): Seq[String] = {
 50 |     CrossVersion.partialVersion(scalaVersion) match {
 51 |       case SCALA_210                     =>
 52 |         Seq("-source", "1.7", "-target", "1.7")
 53 |       case SCALA_211                     =>
 54 |         Seq("-source", "1.7", "-target", "1.7")
 55 |       case SCALA_212                     =>
 56 |         Seq("-source", "1.8", "-target", "1.8")
 57 |       case SCALA_213                     =>
 58 |         Seq("-source", "1.8", "-target", "1.8")
 59 |       case version: Option[(Long, Long)] =>
 60 |         throw new Exception(s"Unknown scala version: $version")
 61 |     }
 62 |   }
 63 | 
 64 |   lazy val projectSettings = Seq(
 65 |     organization := "it.agilelab",
 66 |     licenses += ("Apache-2.0", url("https://www.apache.org/licenses/LICENSE-2.0.txt")),
 67 |     homepage := Some(url("https://github.com/agile-lab-dev/darwin")),
 68 |     description := "Avro Schema Evolution made easy",
 69 |     javacOptions ++= javacOptionsVersion(scalaVersion.value),
 70 |     scalacOptions ++= scalacOptionsVersion(scalaVersion.value),
 71 |     Compile / doc / scalacOptions ++= scalaDocOptionsVersion(scalaVersion.value),
 72 |     versionScheme := Some("early-semver"),
 73 |     useCoursier := false,
 74 |     developers := List(
 75 |       Developer("amurgia", "Antonio Murgia", "antonio.murgia@agilelab.it", url("https://github.com/tmnd1991")),
 76 |       Developer("lpirazzini", "Lorenzo Pirazzini", "lorenzo.pirazzini@agilelab.it", url("https://github.com/SpyQuel")),
 77 |       Developer("rcoluccio", "Roberto Coluccio", "roberto.coluccio@agilelab.it", url("https://github.com/erond")),
 78 |       Developer("alatella", "Andrea Latella", "andrea.latella@agilelab.it", url("https://github.com/andr3a87")),
 79 |       Developer("cventrella", "Carlo Ventrella", "carlo.ventrella@agilelab.it", url("https://www.agilelab.it")),
 80 |       Developer("dicardi", "Davide Icardi", "davide.icardi@agilelab.it", url("https://github.com/davideicardi")),
 81 |       Developer("nbidotti", "Nicolò Bidotti", "nicolo.bidotti@agilelab.it", url("https://github.com/nicolobidotti")),
 82 |       Developer("andrea-rockt", "Andrea Fonti", "andrea.fonti@agilelab.it", url("https://github.com/andrea-rockt"))
 83 |     )
 84 |   )
 85 | 
 86 |   val clouderaHadoopReleaseRepo = "cloudera" at "https://repository.cloudera.com/artifactory/cloudera-repos/"
 87 |   val confluent                 = "confluent" at "https://packages.confluent.io/maven/"
 88 | 
 89 |   lazy val customResolvers = Seq(
 90 |     clouderaHadoopReleaseRepo,
 91 |     confluent
 92 |   )
 93 | 
 94 |   lazy val buildSettings: Seq[SettingsDefinition] = {
 95 |     //this is an hack to resolve correctly rs-api
 96 |     // [warn] [FAILED     ] javax.ws.rs#javax.ws.rs-api;2.1!javax.ws.rs-api.${packaging.type}:  (0ms)
 97 |     // https://github.com/sbt/sbt/issues/3618
 98 |     sys.props += "packaging.type" -> "jar"
 99 |     Seq(
100 |       resolvers ++= customResolvers,
101 |       scalaVersion := Versions.scala
102 |     )
103 |   }
104 | 
105 |   lazy val commonSettings = projectSettings ++ buildSettings ++ scalastyleSettings
106 | 
107 |   lazy val hbaseTestSettings: SettingsDefinition = {
108 |     //enable resolution of transitive dependencies of jars containing tests
109 |     //needed to run tests over hbase minicluster
110 |     Test / transitiveClassifiers := Seq(Artifact.TestsClassifier, Artifact.SourceClassifier)
111 |     libraryDependencies ++= Dependencies.hbaseTestDependencies
112 |   }
113 | 
114 |   lazy val hbase2TestSettings: SettingsDefinition = {
115 |     //enable resolution of transitive dependencies of jars containing tests
116 |     //needed to run tests over hbase minicluster
117 |     Test / transitiveClassifiers := Seq(Artifact.TestsClassifier, Artifact.SourceClassifier)
118 |     libraryDependencies ++= Dependencies.hbase2TestDependencies
119 |   }
120 | 
121 |   lazy val notPublishSettings = Seq(publish / skip := true)
122 | 
123 |   lazy val scalastyleSettings = Seq(scalastyleFailOnWarning := true)
124 | }
125 | 


--------------------------------------------------------------------------------
/project/Versions.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * @author andreaL
 3 |   */
 4 | object Versions {
 5 |   val scala_211          = "2.11.12"
 6 |   val scala_210          = "2.10.7"
 7 |   val scala              = "2.12.13"
 8 |   val scala_213          = "2.13.5"
 9 |   val crossScalaVersions = Seq(scala_210, scala_211, scala, scala_213)
10 | }
11 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.9.8
2 | 


--------------------------------------------------------------------------------
/project/plugin.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
2 | addSbtPlugin("com.github.sbt"  % "sbt-ci-release"        % "1.5.12")
3 | 


--------------------------------------------------------------------------------
/publish.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CI_RELEASE='+publishSigned;+darwin-hbase2-connector/publishSigned'
3 | export CI_SNAPSHOT_RELEASE='+publish;+darwin-hbase2-connector/publish'
4 | sbt -v ci-release
5 | 


--------------------------------------------------------------------------------
/rest-server/src/main/postman/darwinrest.postman_collection.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"info": {
 3 | 		"_postman_id": "dcaadeb7-ecb8-4bc9-9d4d-47fe6a2857df",
 4 | 		"name": "darwinrest",
 5 | 		"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
 6 | 	},
 7 | 	"item": [
 8 | 		{
 9 | 			"name": "get-all-schemas",
10 | 			"request": {
11 | 				"method": "GET",
12 | 				"header": [],
13 | 				"body": {
14 | 					"mode": "raw",
15 | 					"raw": ""
16 | 				},
17 | 				"url": {
18 | 					"raw": "localhost:8080/schemas/",
19 | 					"host": [
20 | 						"localhost"
21 | 					],
22 | 					"port": "8080",
23 | 					"path": [
24 | 						"schemas",
25 | 						""
26 | 					]
27 | 				}
28 | 			},
29 | 			"response": []
30 | 		},
31 | 		{
32 | 			"name": "get-one-schema",
33 | 			"request": {
34 | 				"method": "GET",
35 | 				"header": [],
36 | 				"body": {
37 | 					"mode": "raw",
38 | 					"raw": ""
39 | 				},
40 | 				"url": {
41 | 					"raw": "localhost:8080/schemas/1",
42 | 					"host": [
43 | 						"localhost"
44 | 					],
45 | 					"port": "8080",
46 | 					"path": [
47 | 						"schemas",
48 | 						"1"
49 | 					]
50 | 				}
51 | 			},
52 | 			"response": []
53 | 		},
54 | 		{
55 | 			"name": "post-a-schema",
56 | 			"request": {
57 | 				"method": "POST",
58 | 				"header": [
59 | 					{
60 | 						"key": "Content-Type",
61 | 						"name": "Content-Type",
62 | 						"value": "application/json",
63 | 						"type": "text"
64 | 					}
65 | 				],
66 | 				"body": {
67 | 					"mode": "raw",
68 | 					"raw": "[{\n\t\"type\": \"array\",\n\t\"items\": \"string\"\n}]"
69 | 				},
70 | 				"url": {
71 | 					"raw": "localhost:8080/schemas/",
72 | 					"host": [
73 | 						"localhost"
74 | 					],
75 | 					"port": "8080",
76 | 					"path": [
77 | 						"schemas",
78 | 						""
79 | 					]
80 | 				}
81 | 			},
82 | 			"response": []
83 | 		}
84 | 	]
85 | }


--------------------------------------------------------------------------------
/rest-server/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
 1 | akka {
 2 |   loggers = ["akka.event.slf4j.Slf4jLogger"]
 3 |   loglevel = "DEBUG"
 4 |   logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
 5 | }
 6 | 
 7 | darwin {
 8 |   type = "lazy"
 9 |   connector = "mock"
10 | }
11 | 
12 | darwin-rest{
13 |   interface = "localhost"
14 |   port = 8080
15 | }


--------------------------------------------------------------------------------
/rest-server/src/main/scala/it/agilelab/darwin/server/rest/DarwinService.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.server.rest
 2 | 
 3 | import akka.actor.ActorSystem
 4 | import akka.http.scaladsl.model.{ HttpResponse, StatusCodes }
 5 | import akka.http.scaladsl.server.directives.DebuggingDirectives
 6 | import akka.http.scaladsl.server.{ Directives, Route }
 7 | import akka.stream.ActorMaterializer
 8 | import akka.stream.Attributes.LogLevels
 9 | import it.agilelab.darwin.manager.AvroSchemaManager
10 | import org.apache.avro.Schema
11 | 
12 | trait DarwinService extends Service with Directives with DebuggingDirectives with JsonSupport {
13 | 
14 |   val manager: AvroSchemaManager
15 | 
16 |   override def route: Route = logRequestResult(("darwin", LogLevels.Debug)) {
17 |     get {
18 |       path("schemas" / LongNumber.?) {
19 |         case Some(id) =>
20 |           manager.getSchema(id) match {
21 |             case Some(schema) => complete(schema)
22 |             case None         =>
23 |               complete {
24 |                 HttpResponse(StatusCodes.NotFound)
25 |               }
26 |           }
27 |         case None     => complete(manager.getAll)
28 |       }
29 |     } ~ post {
30 |       path("schemas" / PathEnd) {
31 |         entity(as[Seq[Schema]]) { schemas =>
32 |           complete {
33 |             manager.registerAll(schemas).map(_._1)
34 |           }
35 |         }
36 |       }
37 |     }
38 |   }
39 | }
40 | 
41 | object DarwinService {
42 |   def apply(asm: AvroSchemaManager)(implicit s: ActorSystem, m: ActorMaterializer): DarwinService = new DarwinService {
43 |     implicit override val materializer: ActorMaterializer = m
44 |     implicit override val system: ActorSystem             = s
45 |     override val manager: AvroSchemaManager               = asm
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/rest-server/src/main/scala/it/agilelab/darwin/server/rest/HttpApp.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.server.rest
 2 | 
 3 | import java.util.concurrent.Executor
 4 | 
 5 | import akka.actor.ActorSystem
 6 | import akka.http.scaladsl.Http
 7 | import akka.http.scaladsl.server.RouteConcatenation
 8 | import akka.stream.ActorMaterializer
 9 | import com.typesafe.config.Config
10 | import it.agilelab.darwin.common.Logging
11 | 
12 | import scala.concurrent.duration.Duration
13 | import scala.concurrent.{ Await, ExecutionContext, ExecutionContextExecutor }
14 | 
15 | class HttpApp(config: Config, services: Service*)(implicit system: ActorSystem, materializer: ActorMaterializer)
16 |     extends Logging {
17 |   def run(): Unit = {
18 |     val interface = config.getString("interface")
19 |     val port      = config.getInt("port")
20 | 
21 |     val route = RouteConcatenation.concat(services.map(_.route): _*)
22 | 
23 |     log.info("Starting http server on {}:{}", interface, port)
24 |     val eventuallyBinding = Http().bindAndHandle(route, interface, port)
25 |     val binding           = Await.result(eventuallyBinding, Duration.Inf)
26 |     log.info("Started http server on {}:{}", interface, port)
27 | 
28 |     val shutdownThread = new Thread(new Runnable {
29 |       override def run(): Unit = {
30 |         implicit val ec: ExecutionContext = newSameThreadExecutor
31 |         log.info("Received shutdown hook")
32 | 
33 |         val termination = for {
34 |           _          <- binding.unbind()
35 |           terminated <- system.terminate()
36 |         } yield terminated
37 | 
38 |         Await.ready(termination, Duration.Inf)
39 |         log.info("Shutdown")
40 |       }
41 |     })
42 | 
43 |     shutdownThread.setName("shutdown")
44 | 
45 |     Runtime.getRuntime.addShutdownHook(shutdownThread)
46 | 
47 |     log.info("registered shutdown hook")
48 |   }
49 | 
50 |   private def newSameThreadExecutor: ExecutionContextExecutor = ExecutionContext.fromExecutor(new Executor {
51 |     override def execute(command: Runnable): Unit = command.run()
52 |   })
53 | }
54 | 
55 | object HttpApp {
56 |   def apply(config: Config, services: Service*)(implicit
57 |     system: ActorSystem,
58 |     materializer: ActorMaterializer
59 |   ): HttpApp =
60 |     new HttpApp(config, services: _*)
61 | }
62 | 


--------------------------------------------------------------------------------
/rest-server/src/main/scala/it/agilelab/darwin/server/rest/JsonSupport.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.server.rest
 2 | 
 3 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport
 4 | import org.apache.avro.Schema
 5 | import spray.json.{ DefaultJsonProtocol, JsObject, JsString, JsValue, JsonParser, PrettyPrinter, RootJsonFormat }
 6 | 
 7 | trait JsonSupport extends SprayJsonSupport with DefaultJsonProtocol {
 8 |   implicit val printer: PrettyPrinter.type = PrettyPrinter
 9 | 
10 |   implicit val schemaFormat: RootJsonFormat[Schema] = new RootJsonFormat[Schema] {
11 | 
12 |     override def write(obj: Schema): JsValue = JsonParser(obj.toString(true))
13 | 
14 |     override def read(json: JsValue): Schema = new Schema.Parser().parse(json.prettyPrint)
15 |   }
16 | 
17 |   implicit val schemaWithIdFormat: RootJsonFormat[(Long, Schema)] = new RootJsonFormat[(Long, Schema)] {
18 | 
19 |     override def write(obj: (Long, Schema)): JsValue = JsObject(
20 |       Map(
21 |         "id"     -> JsString(obj._1.toString),
22 |         "schema" -> schemaFormat.write(obj._2)
23 |       )
24 |     )
25 | 
26 |     override def read(json: JsValue): (Long, Schema) = json match {
27 |       case JsObject(fields) =>
28 |         val id = fields.get("id") match {
29 |           case Some(JsString(number)) => number
30 |           case _                      => throw new Exception("Id field should be a long")
31 |         }
32 | 
33 |         val schema = fields.get("schema") match {
34 |           case Some(x @ JsObject(_)) => x
35 |           case _                     => throw new Exception("schema should be an object")
36 |         }
37 | 
38 |         (id.toLong, schemaFormat.read(schema))
39 |       case _                => throw new Exception("should be an object")
40 |     }
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/rest-server/src/main/scala/it/agilelab/darwin/server/rest/Main.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.server.rest
 2 | 
 3 | import akka.actor.ActorSystem
 4 | import akka.stream.ActorMaterializer
 5 | import com.typesafe.config.ConfigFactory
 6 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory
 7 | 
 8 | object Main {
 9 | 
10 |   def main(args: Array[String]): Unit = {
11 | 
12 |     implicit val actorSystem: ActorSystem        = ActorSystem()
13 |     implicit val materializer: ActorMaterializer = ActorMaterializer()
14 | 
15 |     val config              = ConfigFactory.load()
16 |     val schemaManagerConfig = config.getConfig("darwin")
17 |     val restConfig          = config.getConfig("darwin-rest")
18 |     val schemaManager       = AvroSchemaManagerFactory.initialize(schemaManagerConfig)
19 | 
20 |     HttpApp(restConfig, DarwinService(schemaManager)).run()
21 | 
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/rest-server/src/main/scala/it/agilelab/darwin/server/rest/Service.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.server.rest
 2 | 
 3 | import akka.actor.ActorSystem
 4 | import akka.http.scaladsl.server.Route
 5 | import akka.stream.ActorMaterializer
 6 | 
 7 | trait Service {
 8 |   implicit val system: ActorSystem
 9 |   implicit val materializer: ActorMaterializer
10 | 
11 |   def route: Route
12 | }
13 | 


--------------------------------------------------------------------------------
/rest/src/main/resources/META-INF/services/it.agilelab.darwin.common.ConnectorCreator:
--------------------------------------------------------------------------------
1 | it.agilelab.darwin.connector.rest.RestConnectorCreator
2 | 


--------------------------------------------------------------------------------
/rest/src/main/scala/it/agilelab/darwin/connector/rest/JsonProtocol.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.rest
 2 | 
 3 | import java.io.InputStream
 4 | 
 5 | import org.apache.avro.Schema
 6 | import org.codehaus.jackson.map.ObjectMapper
 7 | import org.codehaus.jackson.node.JsonNodeFactory
 8 | import it.agilelab.darwin.common.compat._
 9 | 
10 | trait JsonProtocol {
11 |   val objectMapper = new ObjectMapper()
12 | 
13 |   def toJson(schemas: Seq[(Long, Schema)]): String = {
14 | 
15 |     val data = schemas.map { case (_, schema) =>
16 |       objectMapper.readTree(schema.toString)
17 |     }.foldLeft(JsonNodeFactory.instance.arrayNode()) { case (array, node) =>
18 |       array.add(node)
19 |       array
20 |     }
21 | 
22 |     objectMapper.writeValueAsString(data)
23 |   }
24 | 
25 |   def toSeqOfIdSchema(in: InputStream): Seq[(Long, Schema)] = {
26 |     val node = objectMapper.readTree(in)
27 | 
28 |     node.getElements
29 |       .toScala()
30 |       .map { node =>
31 |         val id         = node.get("id").asText().toLong
32 |         val schemaNode = node.get("schema")
33 | 
34 |         val schemaToString = objectMapper.writeValueAsString(schemaNode)
35 | 
36 |         val parser = new Schema.Parser()
37 | 
38 |         val schema = parser.parse(schemaToString)
39 | 
40 |         (id, schema)
41 |       }
42 |       .toVector
43 |   }
44 | 
45 |   def toSchema(in: InputStream): Schema = {
46 |     val parser = new Schema.Parser()
47 |     parser.parse(in)
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnector.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.rest
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.Connector
 5 | import org.apache.avro.Schema
 6 | import scalaj.http.Http
 7 | 
 8 | class RestConnector(options: RestConnectorOptions, config: Config) extends Connector with JsonProtocol {
 9 | 
10 |   override def fullLoad(): Seq[(Long, Schema)] = {
11 |     Http(options.endpoint("schemas/")).execute(toSeqOfIdSchema).body
12 |   }
13 | 
14 |   override def insert(schemas: Seq[(Long, Schema)]): Unit = {
15 | 
16 |     val response = Http(options.endpoint("schemas/"))
17 |       .header("Content-Type", "application/json")
18 |       .postData(toJson(schemas))
19 |       .asString
20 | 
21 |     if (response.isError) {
22 |       throw new Exception(response.body)
23 |     }
24 | 
25 |   }
26 | 
27 |   override def createTable(): Unit = {}
28 | 
29 |   override def tableExists(): Boolean = true
30 | 
31 |   override def tableCreationHint(): String = ""
32 | 
33 |   override def findSchema(id: Long): Option[Schema] = {
34 | 
35 |     val response = Http(options.endpoint(s"schemas/$id")).execute(toSchema)
36 | 
37 |     if (response.code == 404) {
38 |       None
39 |     } else {
40 |       Some(response.body)
41 |     }
42 |   }
43 | 
44 |   /**
45 |    * Retrieves the latest schema for a given string identifier (not to be confused with the fingerprint id).
46 |    * This API might not be implemented by all connectors, which should return None
47 |    */
48 |   override def retrieveLatestSchema(identifier: String): Option[(Long, Schema)] = None
49 | }
50 | 


--------------------------------------------------------------------------------
/rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnectorCreator.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.rest
 2 | 
 3 | import com.typesafe.config.Config
 4 | import it.agilelab.darwin.common.{ Connector, ConnectorCreator, Logging }
 5 | 
 6 | class RestConnectorCreator extends ConnectorCreator with Logging {
 7 | 
 8 |   override def create(config: Config): Connector = {
 9 |     log.debug("creating rest connector")
10 | 
11 |     val restOptions = RestConnectorOptions.fromConfig(config)
12 |     log.info("rest options are {}", restOptions)
13 | 
14 |     val rest = new RestConnector(restOptions, config)
15 |     log.debug("created rest connector")
16 |     rest
17 |   }
18 | 
19 |   /**
20 |     * @return the name of the Connector
21 |     */
22 |   override def name(): String = "rest"
23 | }
24 | 


--------------------------------------------------------------------------------
/rest/src/main/scala/it/agilelab/darwin/connector/rest/RestConnectorOptions.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.connector.rest
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import com.typesafe.config.Config
 6 | 
 7 | case class RestConnectorOptions(protocol: String, host: String, port: Int, basePath: String) {
 8 |   def endpoint(path: String): String =
 9 |     URI.create(s"$protocol://$host:$port").resolve(basePath).resolve(path).toString
10 | }
11 | 
12 | object RestConnectorOptions {
13 | 
14 |   private val PROTOCOL  = "protocol"
15 |   private val HOST      = "host"
16 |   private val PORT      = "port"
17 |   private val BASE_PATH = "basePath"
18 | 
19 |   def fromConfig(config: Config): RestConnectorOptions =
20 |     RestConnectorOptions(
21 |       config.getString(PROTOCOL),
22 |       config.getString(HOST),
23 |       config.getInt(PORT),
24 |       config.getString(BASE_PATH)
25 |     )
26 | }
27 | 


--------------------------------------------------------------------------------
/rest/src/main/scala/scalaj/http/DigestAuth.scala:
--------------------------------------------------------------------------------
  1 | package scalaj.http
  2 | // scalastyle:off
  3 | import java.nio.charset.StandardCharsets
  4 | import java.security.MessageDigest
  5 | import java.util.Locale
  6 | 
  7 | import scala.collection.immutable.VectorBuilder
  8 | import scala.util.Random
  9 | 
 10 | case class WwwAuthenticate(authType: String, params: Map[String, String])
 11 | object DigestAuth {
 12 | 
 13 |   def trimQuotes(str: String): String = {
 14 |     if (str.length >= 2 && str.charAt(0) == '"' && str.charAt(str.length - 1) == '"') {
 15 |       str.substring(1, str.length - 1)
 16 |     } else {
 17 |       str
 18 |     }
 19 |   }
 20 | 
 21 |   // need to parse one char at a time rather than split on comma because values can be
 22 |   // quoted comma separated strings
 23 |   def splitParams(params: String): IndexedSeq[String] = {
 24 |     val builder = new VectorBuilder[String]()
 25 |     var start   = 0
 26 |     var i       = 0
 27 |     var quotes  = 0
 28 |     while (i < params.length) {
 29 |       params.charAt(i) match {
 30 |         case '\\' => i += 1
 31 |         case '"'  => quotes += 1
 32 |         case ','  =>
 33 |           if (quotes % 2 == 0) {
 34 |             val item = params.substring(start, i).trim()
 35 |             if (item.length > 0) {
 36 |               builder += item
 37 |             }
 38 |             start = i + 1
 39 |           }
 40 |         case _    => // nada
 41 |       }
 42 |       i += 1
 43 |     }
 44 |     builder += params.substring(start).trim()
 45 |     builder.result()
 46 |   }
 47 | 
 48 |   def getAuthDetails(headerValue: String): Option[WwwAuthenticate] = {
 49 |     headerValue.indexOf(' ') match {
 50 |       case indexOfSpace if indexOfSpace > 0 =>
 51 |         val authType                    = headerValue.substring(0, indexOfSpace)
 52 |         val params: Map[String, String] = splitParams(headerValue.substring(indexOfSpace + 1))
 53 |           .flatMap(param => {
 54 |             param.split("=", 2) match {
 55 |               case Array(key, value) => Some(key.trim.toLowerCase(Locale.ENGLISH) -> trimQuotes(value.trim))
 56 |               case _                 => None
 57 |             }
 58 |           })
 59 |           .toMap
 60 |         Some(WwwAuthenticate(authType, params))
 61 |       case _                                => None
 62 |     }
 63 |   }
 64 | 
 65 |   val HexArray = "0123456789abcdef".toCharArray()
 66 | 
 67 |   def hex(bytes: Array[Byte]): String = {
 68 |     val hexChars = new Array[Char](bytes.length * 2)
 69 |     var j        = 0
 70 |     while (j < bytes.length) {
 71 |       val v = bytes(j) & 0xff
 72 |       hexChars(j * 2) = HexArray(v >>> 4)
 73 |       hexChars(j * 2 + 1) = HexArray(v & 0x0f)
 74 |       j += 1
 75 |     }
 76 |     new String(hexChars)
 77 |   }
 78 | 
 79 |   val DigestPrefix = "Digest"
 80 | 
 81 |   def createHeaderValue(
 82 |     username: String,
 83 |     password: String,
 84 |     method: String,
 85 |     uri: String,
 86 |     content: Array[Byte],
 87 |     serverParams: Map[String, String],
 88 |     testClientNonce: Option[String] = None
 89 |   ): Option[String] = {
 90 |     val algorithm                      = serverParams.getOrElse("algorithm", "MD5")
 91 |     val digester                       = Option(MessageDigest.getInstance(algorithm)).getOrElse(
 92 |       throw new Exception("unsupported digest algorithm" + algorithm)
 93 |     )
 94 |     def hexDigest(str: String): String = hex(digester.digest(str.getBytes(StandardCharsets.ISO_8859_1)))
 95 |     for {
 96 |       realm <- serverParams.get("realm")
 97 |       nonce <- serverParams.get("nonce")
 98 |     } yield {
 99 |       val qopOpt: Option[String] = serverParams
100 |         .get("qop")
101 |         .flatMap(serverQop => {
102 |           val serverQopValues = serverQop.split(',').map(_.trim)
103 |           if (serverQopValues.contains("auth")) Some("auth")
104 |           else if (serverQopValues.contains("auth-int")) Some("auth-int")
105 |           else None
106 |         })
107 |       val a1                     = username + ":" + realm + ":" + password
108 |       val hashA1: String         = hexDigest(a1)
109 |       val a2                     = method + ":" + uri + {
110 |         if (qopOpt.exists(_ == "auth-int")) ":" + hex(digester.digest(content)) else ""
111 |       }
112 |       val hashA2: String         = hexDigest(a2)
113 | 
114 |       val (nonceCountOpt, clientNonceOpt, a3) = qopOpt match {
115 |         case Some(qop) =>
116 |           val nc          = "00000001"
117 |           val clientNonce = testClientNonce.getOrElse({
118 |             val bytes = new Array[Byte](16)
119 |             Random.nextBytes(bytes)
120 |             hex(bytes)
121 |           })
122 |           val a3          = hashA1 + ":" + nonce + ":" + nc + ":" + clientNonce + ":" + qop + ":" + hashA2
123 |           (Some(nc), Some(clientNonce), a3)
124 |         case _         =>
125 |           (None, None, hashA1 + ":" + nonce + ":" + hashA2)
126 |       }
127 |       val hashA3: String                      = hexDigest(a3)
128 |       val sb                                  = new StringBuilder(DigestPrefix).append(" ")
129 |       def appendQuoted(key: String, value: String): StringBuilder = {
130 |         sb.append(key + "=\"").append(value).append("\"")
131 |       }
132 |       appendQuoted("username", username).append(", ")
133 |       appendQuoted("realm", realm).append(", ")
134 |       appendQuoted("nonce", nonce).append(", ")
135 |       serverParams
136 |         .get("opaque")
137 |         .foreach(opaque => {
138 |           appendQuoted("opaque", opaque).append(", ")
139 |         })
140 |       appendQuoted("algorithm", algorithm).append(", ")
141 |       appendQuoted("uri", uri).append(", ")
142 |       for {
143 |         qop <- qopOpt
144 |         nonceCount  <- nonceCountOpt
145 |         clientNonce <- clientNonceOpt
146 |       } {
147 |         appendQuoted("qop", qop).append(", ")
148 |         appendQuoted("nc", nonceCount).append(", ")
149 |         appendQuoted("cnonce", clientNonce).append(", ")
150 |       }
151 |       appendQuoted("response", hashA3)
152 |       sb.toString()
153 |     }
154 |   }
155 | }
156 | 


--------------------------------------------------------------------------------
/rest/src/main/scala/scalaj/http/OAuth.scala:
--------------------------------------------------------------------------------
  1 | package scalaj.http
  2 | // scalastyle:off
  3 | /** scalaj.http
  4 |   *  Copyright 2010 Jonathan Hoffman
  5 |   *
  6 |   *  Licensed under the Apache License, Version 2.0 (the "License");
  7 |   *  you may not use this file except in compliance with the License.
  8 |   *  You may obtain a copy of the License at
  9 |   *
 10 |   *      http://www.apache.org/licenses/LICENSE-2.0
 11 |   *
 12 |   *  Unless required by applicable law or agreed to in writing, software
 13 |   *  distributed under the License is distributed on an "AS IS" BASIS,
 14 |   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |   *  See the License for the specific language governing permissions and
 16 |   *  limitations under the License.
 17 |   */
 18 | 
 19 | import java.net.URL
 20 | 
 21 | case class Token(key: String, secret: String)
 22 | 
 23 | /** utility methods used by [[scalaj.http.HttpRequest]] */
 24 | object OAuth {
 25 |   import java.net.URI
 26 |   import javax.crypto.Mac
 27 |   import javax.crypto.spec.SecretKeySpec
 28 |   val MAC = "HmacSHA1"
 29 | 
 30 |   def sign(req: HttpRequest, consumer: Token, token: Option[Token], verifier: Option[String]): HttpRequest = {
 31 |     req.option(conn => {
 32 |       val baseParams: Seq[(String, String)] = Seq(
 33 |         ("oauth_timestamp", (System.currentTimeMillis / 1000).toString),
 34 |         ("oauth_nonce", System.currentTimeMillis.toString)
 35 |       )
 36 | 
 37 |       var (oauthParams, signature) = getSig(baseParams, req, consumer, token, verifier)
 38 | 
 39 |       oauthParams +:= (("oauth_signature", signature))
 40 |       conn.setRequestProperty(
 41 |         "Authorization",
 42 |         "OAuth " + oauthParams.map(p => p._1 + "=\"" + percentEncode(p._2) + "\"").mkString(",")
 43 |       )
 44 |     })
 45 |   }
 46 | 
 47 |   def getSig(
 48 |     baseParams: Seq[(String, String)],
 49 |     req: HttpRequest,
 50 |     consumer: Token,
 51 |     token: Option[Token],
 52 |     verifier: Option[String]
 53 |   ): (Seq[(String, String)], String) = {
 54 |     var oauthParams = ("oauth_version", "1.0") +: ("oauth_consumer_key", consumer.key) +: (
 55 |       "oauth_signature_method",
 56 |       "HMAC-SHA1"
 57 |     ) +: baseParams
 58 | 
 59 |     token.foreach { t =>
 60 |       oauthParams +:= (("oauth_token", t.key))
 61 |     }
 62 | 
 63 |     verifier.foreach { v =>
 64 |       oauthParams +:= (("oauth_verifier", v))
 65 |     }
 66 |     // oauth1.0 specifies that only querystring and x-www-form-urlencoded body parameters should be included in signature
 67 |     // req.params from multi-part requests are included in the multi-part request body and should NOT be included
 68 |     val allTheParams = if (req.connectFunc.isInstanceOf[MultiPartConnectFunc]) {
 69 |       oauthParams
 70 |     } else {
 71 |       req.params ++ oauthParams
 72 |     }
 73 | 
 74 |     val baseString = Seq(req.method.toUpperCase, normalizeUrl(new URL(req.url)), normalizeParams(allTheParams))
 75 |       .map(percentEncode)
 76 |       .mkString("&")
 77 | 
 78 |     val keyString = percentEncode(consumer.secret) + "&" + token.map(t => percentEncode(t.secret)).getOrElse("")
 79 |     val key       = new SecretKeySpec(keyString.getBytes(HttpConstants.utf8), MAC)
 80 |     val mac       = Mac.getInstance(MAC)
 81 |     mac.init(key)
 82 |     val text      = baseString.getBytes(HttpConstants.utf8)
 83 |     (oauthParams, HttpConstants.base64(mac.doFinal(text)))
 84 |   }
 85 | 
 86 |   private def normalizeParams(params: Seq[(String, String)]) = {
 87 |     percentEncode(params).sortWith(_ < _).mkString("&")
 88 |   }
 89 | 
 90 |   private def normalizeUrl(url: URL) = {
 91 |     val uri       = new URI(url.toString)
 92 |     val scheme    = uri.getScheme().toLowerCase()
 93 |     var authority = uri.getAuthority().toLowerCase()
 94 |     val dropPort  = (scheme.equals("http") && uri.getPort() == 80) || (scheme.equals("https") && uri.getPort() == 443)
 95 |     if (dropPort) {
 96 |       // find the last : in the authority
 97 |       val index = authority.lastIndexOf(":")
 98 |       if (index >= 0) {
 99 |         authority = authority.substring(0, index)
100 |       }
101 |     }
102 |     var path      = uri.getRawPath()
103 |     if (path == null || path.length() <= 0) {
104 |       path = "/" // conforms to RFC 2616 section 3.2.2
105 |     }
106 |     // we know that there is no query and no fragment here.
107 |     scheme + "://" + authority + path
108 |   }
109 | 
110 |   def percentEncode(params: Seq[(String, String)]): Seq[String] = {
111 |     params.map(p => percentEncode(p._1) + "=" + percentEncode(p._2))
112 |   }
113 | 
114 |   def percentEncode(s: String): String = {
115 |     if (s == null) ""
116 |     else {
117 |       HttpConstants.urlEncode(s, HttpConstants.utf8).replace("+", "%20").replace("*", "%2A").replace("%7E", "~")
118 |     }
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------
/rest/src/test/scala/it/agilelab/darwin/connector/rest/RestConnectorSuite.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.connector.rest
  2 | 
  3 | import com.github.tomakehurst.wiremock.WireMockServer
  4 | import com.github.tomakehurst.wiremock.client.WireMock._
  5 | import com.github.tomakehurst.wiremock.core.WireMockConfiguration
  6 | import com.typesafe.config.ConfigFactory
  7 | import org.apache.avro.{ Schema, SchemaBuilder }
  8 | import org.scalatest.{ BeforeAndAfterEach, OptionValues }
  9 | import org.scalatest.flatspec.AnyFlatSpec
 10 | 
 11 | class RestConnectorSuite extends AnyFlatSpec with BeforeAndAfterEach with OptionValues {
 12 | 
 13 |   private val wireMockServer = new WireMockServer(WireMockConfiguration.wireMockConfig().dynamicPort())
 14 | 
 15 |   private def config(port: Int) = ConfigFactory.parseString(s"""
 16 |                                                                | protocol: "http"
 17 |                                                                | host: "localhost"
 18 |                                                                | port: ${wireMockServer.port()}
 19 |                                                                | basePath: "/"
 20 |       """.stripMargin)
 21 | 
 22 |   override def beforeEach(): Unit = {
 23 |     wireMockServer.start()
 24 |   }
 25 | 
 26 |   override def afterEach(): Unit = {
 27 |     wireMockServer.stop()
 28 |   }
 29 | 
 30 |   "rest connector" should "get all schemas" in {
 31 | 
 32 |     val connector = new RestConnectorCreator().create(config(wireMockServer.port()))
 33 | 
 34 |     val schemaId1 = -3577210133426481249L
 35 |     val schemaId2 = 5920968314789803198L
 36 | 
 37 |     wireMockServer.stubFor {
 38 |       get(urlPathEqualTo("/schemas/")).willReturn {
 39 |         aResponse().withBody {
 40 |           s"""
 41 |              |[{
 42 |              |  "id": "$schemaId1",
 43 |              |  "schema": {
 44 |              |    "items": "string",
 45 |              |    "type": "array"
 46 |              |  }
 47 |              | }, {
 48 |              |  "id": "$schemaId2",
 49 |              |  "schema": {
 50 |              |    "items": "int",
 51 |              |    "type": "array"
 52 |              |  }
 53 |              | }]
 54 |           """.stripMargin
 55 |         }
 56 |       }
 57 |     }
 58 | 
 59 |     val result = connector.fullLoad()
 60 | 
 61 |     assert(result.contains((schemaId1, SchemaBuilder.array().items(Schema.create(Schema.Type.STRING)))))
 62 |     assert(result.contains((schemaId2, SchemaBuilder.array().items(Schema.create(Schema.Type.INT)))))
 63 |     assert(result.size == 2)
 64 | 
 65 |     wireMockServer.verify {
 66 |       getRequestedFor(urlPathEqualTo("/schemas/"))
 67 |     }
 68 | 
 69 |   }
 70 | 
 71 |   "rest connector" should "get one schemas" in {
 72 | 
 73 |     val schemaId  = -3577210133426481249L
 74 |     val connector = new RestConnectorCreator().create(config(wireMockServer.port()))
 75 | 
 76 |     wireMockServer.stubFor {
 77 |       get(urlPathEqualTo(s"/schemas/$schemaId")).willReturn {
 78 |         aResponse().withBody {
 79 |           """
 80 |             | {
 81 |             |    "items": "string",
 82 |             |    "type": "array"
 83 |             | }
 84 |           """.stripMargin
 85 |         }
 86 |       }
 87 |     }
 88 | 
 89 |     val result = connector.findSchema(schemaId).value
 90 | 
 91 |     val expected = SchemaBuilder.array().items(Schema.create(Schema.Type.STRING))
 92 | 
 93 |     assert(result == expected)
 94 | 
 95 |     wireMockServer.verify {
 96 |       getRequestedFor(urlPathEqualTo(s"/schemas/$schemaId"))
 97 |     }
 98 | 
 99 |   }
100 | 
101 |   "rest connector" should "post schemas" in {
102 |     val connector = new RestConnectorCreator().create(config(wireMockServer.port()))
103 | 
104 |     val schema = SchemaBuilder.array().items(Schema.create(Schema.Type.INT))
105 | 
106 |     wireMockServer.stubFor {
107 |       post(urlEqualTo("/schemas/")).withHeader("Content-Type", equalTo("application/json"))
108 |     }
109 | 
110 |     connector.insert(Seq((0, schema)))
111 | 
112 |     val request = """[{"type":"array","items":"int"}]"""
113 | 
114 |     wireMockServer.verify {
115 |       postRequestedFor(urlEqualTo("/schemas/")).withRequestBody(equalTo(request))
116 |     }
117 | 
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/spark-application/src/dist/conf/application.conf:
--------------------------------------------------------------------------------
1 | spark.yarn.maxAppAttempts: 1


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/GenericMainClass.scala:
--------------------------------------------------------------------------------
  1 | package it.agilelab.darwin.app.spark
  2 | 
  3 | import java.text.SimpleDateFormat
  4 | import java.util.Date
  5 | 
  6 | import com.typesafe.config.{ Config, ConfigFactory }
  7 | import org.apache.hadoop.fs.FileSystem
  8 | import org.apache.spark.sql.SparkSession
  9 | import org.slf4j.{ Logger, LoggerFactory }
 10 | import scala.collection.JavaConverters._
 11 | 
 12 | trait GenericMainClass {
 13 |   self: SparkManager =>
 14 | 
 15 |   val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager")
 16 | 
 17 |   private def makeFileSystem(session: SparkSession): FileSystem = {
 18 |     if (session.sparkContext.isLocal) {
 19 |       FileSystem.getLocal(session.sparkContext.hadoopConfiguration)
 20 |     } else {
 21 |       FileSystem.get(session.sparkContext.hadoopConfiguration)
 22 |     }
 23 |   }
 24 | 
 25 |   /**
 26 |     * @param settings     configuration loaded from multiple ".conf" files: the default ones as per typesafe Config and
 27 |     *                     another ".conf" file that has the same name as the application
 28 |     * @param fs           the default file system of the application executed context
 29 |     * @param sparkSession the sparkSession that has been created and will be used in the application
 30 |     * @return true if the application ends successfully false otherwise
 31 |     */
 32 |   protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int
 33 | 
 34 |   /**
 35 |     * Override in order to handle specific exceptions
 36 |     */
 37 |   protected def handleException(exception: Throwable, applicationSettings: Config)
 38 | 
 39 |   /**
 40 |     * It executes the following ordered steps:
 41 |     * <ol>
 42 |     * <li>load the configuration</li>
 43 |     * <li>creates a SparkSession</li>
 44 |     * <li>instantiates a file system</li>
 45 |     * <li>logs the application start time</li>
 46 |     * <li>executes the [[runJob]] methods</li>
 47 |     * <li>logs the application end time</li>
 48 |     * <li>logs the application duration</li>
 49 |     * <li>stops the sparkSession</li>
 50 |     * </ol>
 51 |     * If an exception is thrown during step 5 calls the [[handleException]]
 52 |     */
 53 |   final def main(args: Array[String]): Unit = {
 54 |     val globalConfig = getGlobalConfig
 55 | 
 56 |     try {
 57 | 
 58 |       genericMainClassLogger.info(s"Creating SparkContext, Sqlcontext and FileSystem")
 59 |       val sparkSession = makeSparkSession(globalConfig)
 60 | 
 61 |       val fs = makeFileSystem(sparkSession)
 62 | 
 63 |       genericMainClassLogger.info("Starting application")
 64 |       val startTime         = System.currentTimeMillis()
 65 |       val end               =
 66 |         runJob(globalConfig)(fs, sparkSession)
 67 |       val exectime          = System.currentTimeMillis() - startTime
 68 |       val exectimeFormatted = new SimpleDateFormat("mm:ss:SSS").format(new Date(exectime))
 69 |       if (end >= 0) {
 70 |         genericMainClassLogger.info(s"Execution finished in [$exectimeFormatted]")
 71 |         genericMainClassLogger.info("Closing application")
 72 |         sparkSession.stop()
 73 |       } else {
 74 |         genericMainClassLogger.info(s"Execution stopped after [$exectimeFormatted]")
 75 |         System.exit(end)
 76 |       }
 77 |     } catch {
 78 |       case e: Throwable =>
 79 |         genericMainClassLogger.error(e.getMessage)
 80 |         handleException(e, globalConfig)
 81 |         throw e
 82 |     }
 83 |   }
 84 | 
 85 |   /**
 86 |     * Loads env vars, system properties and the "spark.app.name".conf config file, returning all of them as
 87 |     * [[Config]] object.
 88 |     *
 89 |     * By launching a Spark process with the spark-submit tool and specifying with "--name MyAppName", the system
 90 |     * automatically drives itself to look for a MyAppName.conf file in the classpath and tries to load it as [[Config]].
 91 |     *
 92 |     * If no such file is provided/found (it could happen also by declaring a wrong --name arg, the system falls back
 93 |     * to framework's default configuration parameters.
 94 |     *
 95 |     * @return a Typesafe [[Config]] object
 96 |     */
 97 |   // scalastyle:off
 98 |   private def getGlobalConfig: Config = {
 99 |     genericMainClassLogger.debug("system environment vars")
100 |     for ((k, v) <- System.getenv().asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")
101 | 
102 |     genericMainClassLogger.debug("system properties")
103 |     for ((k, v) <- System.getProperties.asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")
104 | 
105 |     ConfigFactory.load()
106 |   }
107 | 
108 |   // scalastyle:on
109 | 
110 | }
111 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/SchemaManagerSparkApp.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.app.spark
 2 | 
 3 | import java.nio.ByteOrder
 4 | 
 5 | import com.typesafe.config.{ Config, ConfigFactory }
 6 | import it.agilelab.darwin.app.spark.classes._
 7 | import it.agilelab.darwin.manager.AvroSchemaManagerFactory
 8 | import org.apache.avro.reflect.ReflectData
 9 | import org.apache.hadoop.fs.FileSystem
10 | import org.apache.spark.sql.SparkSession
11 | import org.slf4j.{ Logger, LoggerFactory }
12 | 
13 | object SchemaManagerSparkApp extends GenericMainClass with SparkManager {
14 | 
15 |   val mainLogger: Logger = LoggerFactory.getLogger("SchemaManagerSparkApp")
16 | 
17 |   val endianness: ByteOrder = ByteOrder.BIG_ENDIAN
18 | 
19 |   override protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int = {
20 |     import sparkSession.implicits._
21 | 
22 |     val ds                       = sparkSession.createDataset(sparkSession.sparkContext.parallelize(1 to 1000, 20))
23 |     mainLogger.info("Registering schemas")
24 |     //    val reflections = new Reflections("it.agilelab.darwin.app.spark.classes")
25 |     //    val annotationClass: Class[AvroSerde] = classOf[AvroSerde]
26 |     //    val classes = reflections.getTypesAnnotatedWith(annotationClass).asScala.toSeq
27 |     //      .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers))
28 |     //    val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)))
29 |     val schemas                  = Seq(
30 |       ReflectData.get().getSchema(classOf[Menu]),
31 |       ReflectData.get().getSchema(classOf[MenuItem]),
32 |       ReflectData.get().getSchema(classOf[Food]),
33 |       ReflectData.get().getSchema(classOf[Order]),
34 |       ReflectData.get().getSchema(classOf[Price])
35 |     )
36 |     val conf                     = ConfigFactory.load()
37 |     val manager                  = AvroSchemaManagerFactory.initialize(conf)
38 |     val registeredIDs: Seq[Long] = manager.registerAll(schemas).map(_._1)
39 |     mainLogger.info("Schemas registered")
40 | 
41 |     mainLogger.info("Getting ID for a schema")
42 |     manager.getId(ReflectData.get().getSchema(classOf[Menu]))
43 |     mainLogger.info("ID retrieved for the schema")
44 | 
45 |     mainLogger.info("Get Schema from ID")
46 |     val d2 = ds.map { x =>
47 |       AvroSchemaManagerFactory.initialize(conf).getSchema(registeredIDs(x % registeredIDs.size))
48 |       x
49 |     }
50 |     d2.count()
51 |     mainLogger.info("All schemas obtained")
52 |     10
53 |   }
54 | 
55 |   override protected def handleException(exception: Throwable, applicationSettings: Config): Unit = {
56 |     mainLogger.error(exception.getMessage)
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/SparkConfigurationKeys.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.app.spark
 2 | 
 3 | trait SparkConfigurationKeys {
 4 |   val SPARK_APP_NAME            = "spark.app.name"
 5 |   val SPARK_CORES               = "spark.executor.cores"
 6 |   val PARALLELISM: String       = "parallelism"
 7 |   val SPARK_DRIVER_CORES        = "spark.driver.cores"
 8 |   val SPARK_EXECUTOR_INSTANCES  = "spark.executor.instances"
 9 |   val SPARK_DEFAULT_PARALLELISM = "spark.default.parallelism"
10 | }
11 | 
12 | object SparkConfigurationKeys extends SparkConfigurationKeys
13 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/SparkManager.scala:
--------------------------------------------------------------------------------
 1 | package it.agilelab.darwin.app.spark
 2 | 
 3 | import com.typesafe.config.Config
 4 | import org.apache.hadoop.hbase.HBaseConfiguration
 5 | import org.apache.spark.SparkConf
 6 | import org.apache.spark.sql.SparkSession
 7 | import org.slf4j.{ Logger, LoggerFactory }
 8 | 
 9 | import scala.collection.JavaConverters._
10 | 
11 | trait SparkManager {
12 | 
13 |   val sparkManagerLogger: Logger = LoggerFactory.getLogger("SparkManager")
14 | 
15 |   /**
16 |     * @return a SparkConf given the settings
17 |     */
18 |   protected def createSparkConf(settings: Config): SparkConf = {
19 |     // Add conf file configurations
20 |     val sparkSettings =
21 |       if (settings.hasPath("spark")) {
22 |         settings.getConfig("spark").entrySet().asScala.map(e => ("spark." + e.getKey, e.getValue.unwrapped().toString))
23 |       } else {
24 |         Seq()
25 |       }
26 |     // Add hbase related hadoop confs
27 |     val hconfs        = HBaseConfiguration.create().asScala.map { entry =>
28 |       "spark.hadoop." + entry.getKey -> entry.getValue
29 |     }
30 |     new SparkConf()
31 |       // Use spark.app.name to set the spark app name
32 |       .setAll(hconfs)
33 |       .setAll(sparkSettings)
34 |   }
35 | 
36 |   private def withSparkConf(settings: Config)(f: SparkConf => SparkSession): SparkSession = f(
37 |     {
38 |       createSparkConf(settings)
39 |     }
40 |   )
41 | 
42 |   /**
43 |     * @return a SparkSession given the settings
44 |     */
45 |   protected def makeSparkSession(settings: Config): SparkSession = withSparkConf(settings) { conf =>
46 |     SparkSession
47 |       .builder()
48 |       .config(conf)
49 |       .getOrCreate()
50 |   }
51 | 
52 |   /**
53 |     * @return the default Spark parallelism given the sparkSession and the config.
54 |     *         It tries to infer it from the SparkSession, if it is not possible, it gathers it from the Config
55 |     */
56 |   protected def defaultParallelism(implicit sparkSession: SparkSession, config: Config): Int = {
57 |     sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_EXECUTOR_INSTANCES) match {
58 |       case Some(instances) =>
59 |         sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_CORES).getOrElse("1").toInt * instances.toInt
60 |       case None            =>
61 |         sparkManagerLogger.info(
62 |           "Spark is configured with dynamic allocation, default parallelism will be gathered from app " +
63 |             "conf: " +
64 |             "next.process.parallelism"
65 |         )
66 |         if (config.hasPath(SparkConfigurationKeys.PARALLELISM)) {
67 |           config.getInt(SparkConfigurationKeys.PARALLELISM)
68 |         } else {
69 |           sparkManagerLogger.info("next.process.parallelism was not set fallback to sparkSession.defaultParallelism")
70 |           sparkSession.sparkContext.defaultParallelism
71 |         }
72 |     }
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Food.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.spark.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class Food(name: String, allergen: Boolean)
7 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Ignored.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.spark.classes
2 | 
3 | case class Ignored(name: String)
4 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Menu.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.spark.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class Menu(name: String, items: Seq[MenuItem])
7 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/MenuItem.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.spark.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class MenuItem(name: String, price: Price, components: Seq[Food])
7 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Order.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.spark.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class Order(entries: Seq[(MenuItem, Int)], table: String)
7 | 


--------------------------------------------------------------------------------
/spark-application/src/main/scala/it/agilelab/darwin/app/spark/classes/Price.scala:
--------------------------------------------------------------------------------
1 | package it.agilelab.darwin.app.spark.classes
2 | 
3 | import it.agilelab.darwin.annotations.AvroSerde
4 | 
5 | @AvroSerde
6 | case class Price(amount: Float, discount: Float)
7 | 


--------------------------------------------------------------------------------