├── .gitignore
├── .scalafmt.conf
├── .travis.yml
├── LICENSE
├── README.md
├── build.sbt
├── docker-compose.yml
├── project
    ├── Dependencies.scala
    ├── build.properties
    ├── plugins.sbt
    └── project
    │   └── plugins.sbt
├── publish.sh
├── pubring.gpg.enc
├── resources
    └── images
    │   ├── 001.png
    │   ├── 002.png
    │   ├── 003.png
    │   ├── 004.png
    │   ├── 005.png
    │   ├── 006.png
    │   ├── 007.png
    │   ├── 008.png
    │   ├── schemer-logo-text-wide.png
    │   ├── schemer-logo-text-wide.svg
    │   ├── schemer-logo-text.png
    │   ├── schemer-logo-text.svg
    │   ├── schemer-logo.png
    │   └── schemer-logo.svg
├── schemer-core
    └── src
    │   ├── main
    │       └── scala
    │       │   └── schemer
    │       │       ├── AvroSchema.scala
    │       │       ├── CSVSchema.scala
    │       │       ├── JSONSchema.scala
    │       │       ├── ParquetSchema.scala
    │       │       ├── SchemaLike.scala
    │       │       ├── Schemer.scala
    │       │       └── utils
    │       │           ├── JSONUtil.scala
    │       │           └── JsonSchemaValidationUtil.scala
    │   └── test
    │       ├── resources
    │           ├── test.csv
    │           ├── test.json
    │           └── test.tsv
    │       └── scala
    │           └── schemer
    │               ├── AvroSchemaSpec.scala
    │               ├── CSVSchemaSpec.scala
    │               ├── Helpers.scala
    │               ├── JSONSchemaSpec.scala
    │               └── ParquetSchemaSpec.scala
├── schemer-registry
    └── src
    │   └── main
    │       ├── resources
    │           ├── application.conf
    │           ├── aws-core-site.xml
    │           ├── db
    │           │   └── migration
    │           │   │   └── V1__creates_schemas.sql
    │           └── graphql
    │           │   └── graphiql.html
    │       └── scala
    │           └── schemer
    │               └── registry
    │                   ├── actors
    │                       └── InferActor.scala
    │                   ├── dao
    │                       └── SchemaDao.scala
    │                   ├── exceptions
    │                       ├── SchemerException.scala
    │                       ├── SchemerInferenceException.scala
    │                       ├── SchemerSchemaCreationException.scala
    │                       └── SchemerSchemaVersionCreationException.scala
    │                   ├── graphql
    │                       ├── CustomGraphQLResolver.scala
    │                       ├── GraphQLService.scala
    │                       └── schema
    │                       │   ├── GraphQLCustomTypes.scala
    │                       │   ├── InferType.scala
    │                       │   ├── MetadataType.scala
    │                       │   ├── MutationType.scala
    │                       │   ├── SchemaDefinition.scala
    │                       │   └── SchemaType.scala
    │                   ├── models
    │                       └── Schema.scala
    │                   ├── package.scala
    │                   ├── routes
    │                       ├── GraphQLRoutes.scala
    │                       ├── HealthRoutes.scala
    │                       ├── Routes.scala
    │                       └── SwaggerRoutes.scala
    │                   ├── server
    │                       ├── ConfigWithDefault.scala
    │                       ├── InferenceConfig.scala
    │                       ├── Main.scala
    │                       ├── Modules.scala
    │                       └── ServerConfig.scala
    │                   ├── sql
    │                       ├── DatabaseConfig.scala
    │                       ├── SqlDatabase.scala
    │                       └── package.scala
    │                   └── utils
    │                       ├── Clock.scala
    │                       └── DateTimeUtils.scala
├── schemer-ui.md
├── secring.gpg.enc
└── sonatype.sbt


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target/
3 | *.log
4 | schemer_db/
5 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
 1 | style = defaultWithAlign
 2 | maxColumn = 120
 3 | align.openParenCallSite = false
 4 | align.openParenDefnSite = false
 5 | danglingParentheses = true
 6 | 
 7 | rewrite.rules = [RedundantBraces, RedundantParens, SortImports, PreferCurlyFors]
 8 | rewrite.redundantBraces.includeUnitMethods = true
 9 | rewrite.redundantBraces.stringInterpolation = true
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: scala
 2 | scala:
 3 | - 2.11.11
 4 | jdk:
 5 | - oraclejdk8
 6 | sudo: required
 7 | services:
 8 | - docker
 9 | before_cache:
10 | - find $HOME/.sbt -name "*.lock" | xargs rm
11 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
12 | cache:
13 |   directories:
14 |   - "$HOME/.ivy2/cache"
15 |   - "$HOME/.sbt/boot/"
16 | before_deploy:
17 | - openssl aes-256-cbc -pass pass:$ENCRYPTION_PASSWORD -in secring.gpg.enc -out local.secring.gpg
18 |   -d
19 | - openssl aes-256-cbc -pass pass:$ENCRYPTION_PASSWORD -in pubring.gpg.enc -out local.pubring.gpg
20 |   -d
21 | deploy:
22 | - provider: script
23 |   script: "./publish.sh"
24 |   skip_cleanup: true
25 |   on:
26 |     tags: true
27 |     jdk: oraclejdk8
28 |     scala: 2.11.11
29 | env:
30 |   global:
31 |   - BUILD_LABEL=1.0.${TRAVIS_BUILD_NUMBER}
32 |   - secure: KUG4LdZBUn11TWMysOvF7jbwkHZzWRUIJqz6HlcXEOIdN4rzt2An0L2KB9L5L+DG6M2e0aHyl6ajeopSF8SYV3OYmPe2RKO2pJifhfn9Z301gpV3nKQKp2maZHTeoOSeWNOHV1APJAd5WyHsmPp1zKGHc7ePLcse9f45KuK0YxOnM+i1APUzWsRdS3PSaDfByP++LYNh4V8Bz4Xl75TgLs/kUtY2xxmFakWZe0PE87qjdadAI42Q7/b6sWHb17WNIzMg/8Vfn//C7XsQjmhXS5dwa2JilyFTZKLJ0N89h2KoMGgF4rJQWS7DLo5mFccqnAvx1iN4gZvzmvyQgZ2JCK4QxAxxC1umcJvJ4Yf7pzZaHqMrthCLz8eNixFSYtoCskJzyrHyow99MhOTnv9NcsOZxdT25j20wI2E2JxoYRDLOZcBd7xSiXCmyzQE3/V+FEaqCUcY2dFHdA8V6GnOmetycgxyWd23S7dCM4IQEB8UAnCM/UgIWGIhTAMbxUtJKEIMfA+VVQwJUPvKNkVB2HjUUK5q+//+B9H9WJrvR16urcyC8MpgWJWex5J7mwlvs1999t1S/EUPUmHlJQX7XQuN5kSOyQ2B+L+r0FW8LyHx4ffxy1qW9x9kFzGsWNKTKJd3PWJONKlcdWLYU/T56EFBzWZwPt74sVCRhtQ0G30=
33 |   - secure: 2hLgeG61N9TZ8xNRSMCMlMiVatsoxsTT6Gj1ohbZC2cMTsEmMVPvYxOkBrjqu2VsJBcmyyiKqw8LX1FQBvU3eDIOss+mIQndjTvJIZIw1jx2KK6TNliT4nK0sORyHFDAmXR43AZPjpJVsg28uR80/Nwr6+7OoZ6Cy8kA2w+iO33vW2V/jXhD3H1XQRd+T7NE9RQigcY7OVjq1SqHE+6sH8CVQL94cqzV7fhHVeC5oBoze4bn+KcmK/iRl6zW9cXlyS+kjhPkVrS0NOADcXnCjUkkw+5N1TWSABXSBBO3VxkuE6W311T/NeNYFDP7CrjzrH1XEyTXjf9A3RVyJVRIEaDX4SHadb6eLRWSAgTO1d6tRxl8IcsnDoYghiMw61ggx4tkV6OQa5nF0U+0GJOYC/HLnqSIkv7/YM6LMp9AprWlWD+wB9wQ4l/09Ssed7hz/VoWnL7ymDIj/fJnsFnoOIJebciAo2pLWyftjjBIxEnQbGVyXSRdXVgxKpTyIeQY57xZPBugi4Q2KmBrLl+l2ean49eb827mMhM8FBdyLqbM8FKFln8E7Hokzw835/HKn23pqzLGO8/H/6rHOBkqwmjoL06FbiqHZyV16XCr0Jy/Bh6xoQaCYoE8ZhcTYzeBkTDzJIcfXqWIvNRUQzwHu+EogEXju/3c5G/2nITdz+g=
34 |   - secure: yyiWyS++Q/ZUvRcRRi8Md0u7VqwQvA74cn57eSLJuUFEHsr4fDjVqkA9HhwUCWdCJC5UIm72c9gqkuyVjq9Vxb3M+YPF6h+sCsOfH3bDowq8ozWBIC1W8giORNAoANVcaFnDgd+9RkXqkIAarVgyff1r5Cdks5EfQ0lsCnaphdyrO0h1jQx8A5UMCZU6za52NDviyn92TUH5Z+XEBJzWg7/WV18zfr8/5VPK5ErYzcNk6s9Gq8WfLtf3b9aCdOkjBB1HzqB/pqEMOfxtERjSBtw66lB5Fzn6xk0O0cn4jfzyI/DpFfzi9Ecwuwm/kMX+ZSPUOe/riEt5/D5wJKEMio0G2Zt77Ulkq72ed2h801gLzjuNLvN3b+zk38Va0bPgWGARFVGVbAytve1xcpCJxQ8ZG+hgFq95WoTYguWUT2ny3xo+0aZm+r/jqT8o05p/UjbzPsTAeTsL0m8Qxr/+JPwvuLZIekzIg23JBe5PPHaIp5PdgYfCv+s02TdZNGEIYa9K3jo6yUsnZXClJaZov9tmmkvyw95/2bhgZfWn5dDqyNu/YrpsQ9k+sDfC01UmXXHl10jAWBy+wjAYw2F7UTR9/MVXC041j6vZmOrfVLdYvXvJslmVjR2qUq9zBrF6uZIOCaARMoy8YCcJFTruHFemdUJ0ElCOtrfpuZkDnkY=
35 |   - secure: vvDoGnUlOIDqvN1+zKjxXUeOCEQ9AFURhXV1y8lUDBqZXHshoEV6MZKe7cL2vkBrCGLgEAOlX7vplW44spIxkL78zLtyxmUCsla4VcCT41iYkrBCM4LOKlzsKM3U0xoo76EP9zYXul6O/vIL9W9RvL78RyiC3C4vC8tUahuoxaWpyYvOFcKEI7sVf1O7/HeAKqaRRitsAuk8nfZDkTMU8FRjvxp2aY7I3hGaQVVMKe3lpns3R7T74FuQvnkfyr7T/O0/GnGKJELl8lBtdQlsWoZOXYQL7ZzaEWVjFsmWTNqtKU7DMv9j5eC/4JWqnQK2A3Eda0bPx0sgbymM79IAqBw5fwauyfUFMOsvmA/bEDoDF8cD/moPJmZBkxa2HC1DKgpsWE46fFJqqTZKaaaDZulkviGfqBUbwmZqGDlm1Xo3vrYBCOm1tXgzkuPRoksoGN3K3u2gUL9IeE0kiR0xx0qp62R/Kh51cycY1/w8yaNNOfB+1YdcMb7wv3SWGWohE0R/ke7x5luOvV5TCyqOJjmQb1OkzT4AeqACYjTBK6JhgcE5XqsveaF4NuKSA6CtfV19jgyFDEKjU8202SWeLzQew/RFFTSUea3xD/PfBQcKkGizfmCHF1tJmEqFgP09u7uqt0X8q3xls3882CGU46MB6GNmkB+nwXu2DCD1WyQ=
36 |   - secure: loYtA4a1MMNZDyezwFf/2PNkJeli0E9mpmzeHeWQ1oB5+CVHE/vruOhXPHKDBU/RUBqM2Y09d00fA1Jz0N+0KFhj2nAJDZz2UR6LysOo56Xps4E+NSmMd72qntQtwP3qmWa6kqUYjr31y4i4O2zwlG/gRmq1BNBoouco3BIat8CMOram3WKgjyQSnvLIq9jtrq45uRc0po+nCzqnyGwmwG2sZKkajPTQpO4VjTPu+fmW8bbGhSC62UI2JpWnWev/8CNKaERvzs7s4iwf2ksMoS55iQSyixMp5k1L/qKtee6dYkbZli8gSuhdyNYaIIzTfV42YEUT7Jmwdxcw24sdgHcrCcQNtnVAGvW/EyMq+f1lwoJTabekfAcij30W+lwShnbS/69itl7gozm5dcClmihrtMUgQbb50b8Err+HHayy8xGDipCvtpoAku9NrSG+aX5BZA6BXmZw0VfUiPZXMQFMWA/n40cqzZclPb3cKQSbDd0WfukkmL/nWh2RKxyOLZgH0FLgzoDt3U8pUdR2YmDqZidUoVG9Iz4Lt5c7CTzNSoEaUhWiSFC+tytwn8OPzYq1uJtwv8lPaWvqQA/5+c9Tni5G2LsfRrrBlxSkHkCWqPuhVze5ByxLRank7CBOwBjLOJmv9tZt4NJX7YJhdYAs/jV7ya4jJBVmw1zUdhk=
37 |   - secure: G0Wv6Odzqh6GrkyxbV6v8Xq1seQD97rX9C+NfuzwTAh2MVUopnPZGh/Oakiwdb/kTAIpb70dKrZaYqv+mTz/1oTQMg4T2e1OAgItZwR/d6SB/cOKjkxtFgI/q0N5/ukw1N5LE7LrTwoBWtoYIdTO4EqoxTcTkbx+VJczzMAlD3VRICzMsvpxXoOw0BkvESTxEU/yrX3AxRA5VziVvFeSb8cpmgvNYAWmlczaujChPedWOWoO0Z2lFhFU8LYBBzsIwBSIOd6f93vlOCOGQtYOCx4FybRkHs7B1Yjihfz68prR047vDvuT4y+evI9xIpvk3AjN2zkV92EXCo26/fqbepooiQ2cVSpQs+RtTT5P2jPasWFwm1K9G6a/5Ir1dFB7CXKl9EmEUxBnRR0+ZWYWssVc6sTduxvSpMx80CBlWWsHkSlbD/by5fWQptgQp4DYG63Pifc+MuPEHjhl3z1PvyhtccWtGbrYsCNoWlGRT497E8QaBQ8BValqg9sgZ/He9RzzHB6UBve8co92W8rgfOdE+JlgJEr+oIkbB5jnnJIGqC6cDW10aWfjCzIotfmk/MACvaaLTkXT4U786iktf0AR06kPOKssFGrvrqSFqui/ugbHsggBYbfgRC0D76VLQdBPFSuJNplgJamGvFGeoQHk6v+v08dNrQ70S/qp894=
38 |   - secure: q5a1a4lDIQLaU32xilGkP91zGiKqW+m5KwuzwAhAIirmHZ9sD6cky5zoOPc6oYCeE4UGIowv9gkGJyMPMD+Jl7pom47/FfmvsS/pcMCzasgWueEsjF2Y9TRHdRPyeGFejJzLgOWu78ss8HOobmlHZPrXzl3Zxx+HkLfwFJ23bH7IIyBYmhC54XGCBuIxU/sbkUctCICOUNXxpzLey63dV/O4cGLzYkpXMtS/6CbwwQj/Hj+TKzaaonqeOAMY4hPj9WE0Vc/QnhstGGHHDIAhBYkNdvs1NLNvxRa1R0i+uGGjnQVhWXOfrKnFhn5qWHOYXvk58f+iW2Ey69Tg21iqOA9SBgrjtDUQLRhP9iAQJbu6EevKLFZcXqfeFJtDfDiUTxpycLWj8GD4k+sz40O2DcIGiPRvzI5v1ZN87baeojUGCjP7I3c2Mv4XigylZzhPJ7m9rPV/G5WG8aKaK/qUpJ7ynl4XpFAMcC7QGb+eQXOw6BqFMhDw4sGR2qu3LQ/flxmpzXZPkHyLK6m2oSGXVbk0w2UvO9pnF+XillQ4YOzB1O+OqbqjZtDu+ZmrB4NK/QzvyGyUS6C/7ancDdlgHZhcoLw50jp27msD+JBv5NcR0y1IAvPwwWqiyg/teaOxl/hZ80BT0AwpC0nNc83UbORGYGcLr6o46OoZNP5xlQ4=
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # schemer
 2 | [![Build Status](https://travis-ci.org/indix/schemer.svg?branch=master)](https://travis-ci.org/indix/schemer) [![Maven](https://maven-badges.herokuapp.com/maven-central/com.indix/schemer-core_2.11/badge.svg)](http://repo1.maven.org/maven2/com/indix/schemer-core_2.11/) [![Docker Pulls](https://img.shields.io/docker/pulls/indix/schemer-registry.svg)](https://hub.docker.com/r/indix/schemer-registry/)
 3 | 
 4 | <p align="center">
 5 |     <img src="resources/images/schemer-logo-text-wide.png" width="444" height="256"/>
 6 | </p>
 7 | 
 8 | Schema registry with support for CSV, TSV, AVRO, JSON and Parquet. Has ability to infer schema from a given data source.
 9 | 
10 | ## Schemer UI [WIP]
11 | 
12 | <p align="center">
13 |     <img src="resources/images/001.png" />
14 | </p>
15 | 
16 | Schemer UI is the wizard based frontend for Schemer. It provides a wizard based schema creation and versioning workflow apart from browsing and search capabilities. It is a work in progress. [More screens](schemer-ui.md)
17 | 
18 | ## Schemer Core
19 | 
20 | `schemer-core` is the core library that implements most of the logic needed to understand the supported schema types along with the schema inference. To use `schemer-core` directly, just add it to your dependencies:
21 | 
22 | ```
23 | libraryDependencies += "com.indix" %% "schemer" % "v0.2.3"
24 | ```
25 | 
26 | ## Schemer Registry
27 | 
28 | `schemer-registry` is a schema registry for storing the metadata about schema and schema versions. It provides a GraphQL API for adding, viewing and inferring schemas.
29 | 
30 | Schemer Registry is available as a [docker image at DockeHub](https://hub.docker.com/r/indix/schemer-registry/)
31 | 
32 | ### Running Locally
33 | 
34 | Local docker based PostgreSQL can be run as follows:
35 | 
36 | ```
37 | docker run -e POSTGRES_USER=schemer -e POSTGRES_PASSWORD=schemer -e PGDATA=/var/lib/postgresql/data/pgdata -e POSTGRES_DB=schemer -v $(pwd)/schemer_db:/var/lib/postgresql/data/pgdata -p 5432:5432 postgres:9.5.0
38 | ```
39 | 
40 | Remove `schmer_db` folder to clear all data and start from scratch.
41 | 
42 | The registry service can be run using `sbt`:
43 | 
44 | ```bash
45 | sbt "project registry" ~reStart
46 | ```
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
  1 | import Dependencies._
  2 | import com.typesafe.sbt.packager.Keys.{daemonUser, dockerBaseImage, dockerExposedPorts, dockerRepository, packageName}
  3 | import spray.revolver.RevolverPlugin
  4 | import spray.revolver.RevolverPlugin.autoImport.Revolver
  5 | 
  6 | val libVersion = sys.env.get("TRAVIS_TAG") orElse sys.env.get("BUILD_LABEL") getOrElse s"1.0.0-${System.currentTimeMillis / 1000}-SNAPSHOT"
  7 | 
  8 | lazy val publishSettings = Seq(
  9 |   publishMavenStyle := true,
 10 |   pgpSecretRing := file("local.secring.gpg"),
 11 |   pgpPublicRing := file("local.pubring.gpg"),
 12 |   pgpPassphrase := Some(sys.env.getOrElse("GPG_PASSPHRASE", "").toCharArray),
 13 |   credentials += Credentials(
 14 |     "Sonatype Nexus Repository Manager",
 15 |     "oss.sonatype.org",
 16 |     System.getenv("SONATYPE_USERNAME"),
 17 |     System.getenv("SONATYPE_PASSWORD")
 18 |   ),
 19 |   publishTo := {
 20 |     val nexus = "https://oss.sonatype.org/"
 21 |     if (isSnapshot.value)
 22 |       Some("snapshots" at nexus + "content/repositories/snapshots")
 23 |     else
 24 |       Some("releases" at nexus + "service/local/staging/deploy/maven2")
 25 |   },
 26 |   publishArtifact in Test := false,
 27 |   pomIncludeRepository := { _ =>
 28 |     false
 29 |   },
 30 |   pomExtra :=
 31 |     <url>https://github.com/indix/schemer</url>
 32 |       <licenses>
 33 |         <license>
 34 |           <name>Apache License</name>
 35 |           <url>https://raw.githubusercontent.com/indix/schemer/master/LICENSE</url>
 36 |           <distribution>repo</distribution>
 37 |         </license>
 38 |       </licenses>
 39 |       <scm>
 40 |         <url>git@github.com:indix/schemer.git</url>
 41 |         <connection>scm:git:git@github.com:indix/schemer.git</connection>
 42 |       </scm>
 43 |       <developers>
 44 |         <developer>
 45 |           <id>indix</id>
 46 |           <name>Indix</name>
 47 |           <url>http://www.indix.com</url>
 48 |         </developer>
 49 |       </developers>
 50 | )
 51 | 
 52 | lazy val schemer = Project(
 53 |   id = "schemer",
 54 |   base = file(".")
 55 | ) aggregate (core, registry)
 56 | 
 57 | lazy val core = (project in file("schemer-core"))
 58 |   .settings(
 59 |     inThisBuild(
 60 |       List(
 61 |         organization := "com.indix",
 62 |         scalaVersion := "2.11.11",
 63 |         crossScalaVersions := Seq("2.11.11"),
 64 |         version := libVersion,
 65 |         scalafmtOnCompile := true
 66 |       )
 67 |     ),
 68 |     name := "schemer-core",
 69 |     libraryDependencies ++= sparkStackProvided ++ Seq(jsonSchemaValidator, scalaTest)
 70 |   )
 71 |   .settings(publishSettings: _*)
 72 | 
 73 | lazy val registry = (project in file("schemer-registry"))
 74 |   .enablePlugins(BuildInfoPlugin)
 75 |   .enablePlugins(AshScriptPlugin)
 76 |   .enablePlugins(JavaAppPackaging)
 77 |   .enablePlugins(DockerPlugin)
 78 |   .settings(
 79 |     dockerBaseImage := "anapsix/alpine-java:8u131b11_server-jre_unlimited",
 80 |     packageName in Docker := "schemer-registry",
 81 |     dockerExposedPorts := Seq(9000),
 82 |     version in Docker := libVersion,
 83 |     daemonUser in Docker := "root",
 84 |     dockerRepository := Some("indix"),
 85 |     Revolver.enableDebugging(port = 5005, suspend = false)
 86 |   )
 87 |   .settings(
 88 |     inThisBuild(
 89 |       List(
 90 |         organization := "com.indix",
 91 |         scalaVersion := "2.11.11",
 92 |         version := libVersion,
 93 |         scalafmtOnCompile := true
 94 |       )
 95 |     ),
 96 |     name := "schemer-registry",
 97 |     libraryDependencies ++= sparkStack ++ akkaStack ++ loggingStack ++ Seq(
 98 |       hadoopAws,
 99 |       sangria,
100 |       sangriaSpray,
101 |       postgres,
102 |       quill,
103 |       quillAsyncPostgres,
104 |       flyway,
105 |       prometheusClient,
106 |       prometheusClientCommon,
107 |       prometheusHotspot,
108 |       scalaTest
109 |     ),
110 |     excludeDependencies ++= Seq(
111 |       ExclusionRule("com.typesafe.scala-logging", "scala-logging-slf4j_2.11")
112 |     )
113 |   ) dependsOn core
114 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   postgres:
 5 |     image: postgres:9.5.0
 6 |     ports:
 7 |     - 5432:5432
 8 |     environment:
 9 |       - POSTGRES_USER=schemer
10 |       - POSTGRES_PASSWORD=schemer
11 |       - PGDATA=/var/lib/postgresql/data/pgdata
12 |       - POSTGRES_DB=schemer
13 |     volumes:
14 |       - ./schemer_db:/var/lib/postgresql/data/pgdata
15 |   schemer:
16 |     image: indix/schemer-registry:latest
17 |     restart: always
18 |     ports:
19 |     - 9000:9000
20 |     depends_on:
21 |     - postgres
22 |     environment:
23 |     - POSTGRES_URL=postgresql://postgres:5432/schemer?user=schemer&password=schemer
24 |   
25 |     


--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt.{ExclusionRule, _}
 2 | 
 3 | object Versions {
 4 |   val sparkVersion = "2.3.1"
 5 |   val akkaHttpVersion = "10.0.10"
 6 | }
 7 | 
 8 | object Dependencies {
 9 |   lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.0.3" % Test
10 |   lazy val sparkCore = "org.apache.spark" %% "spark-core" % Versions.sparkVersion
11 |   lazy val sparkSql = "org.apache.spark" %% "spark-sql" % Versions.sparkVersion
12 |   lazy val sparkAvro = "com.databricks" %% "spark-avro" % "4.0.0"
13 | 
14 |   lazy val sparkStack = Seq(sparkCore, sparkSql, sparkAvro)
15 |   lazy val sparkStackProvided = sparkStack.map(_ % Provided)
16 | 
17 |   lazy val hadoopAws = "org.apache.hadoop" % "hadoop-aws" % "2.6.0"
18 | 
19 |   lazy val jsonSchemaValidator = "com.github.fge" % "json-schema-validator" % "2.2.6" excludeAll {
20 |     ExclusionRule("javax.mail")
21 |   }
22 | 
23 |   lazy val prometheusClient = "io.prometheus" % "simpleclient" % "0.2.0"
24 |   lazy val prometheusClientCommon = "io.prometheus" % "simpleclient_common" % "0.2.0"
25 |   lazy val prometheusHotspot = "io.prometheus" % "simpleclient_hotspot" % "0.2.0"
26 | 
27 |   lazy val akkaHttpCore = "com.typesafe.akka" %% "akka-http-core" % Versions.akkaHttpVersion
28 |   lazy val akkaHttp = "com.typesafe.akka" %% "akka-http" % Versions.akkaHttpVersion
29 |   lazy val sprayJsonAkka = "com.typesafe.akka" %% "akka-http-spray-json" % Versions.akkaHttpVersion
30 |   lazy val akkaHttpTestkit = "com.typesafe.akka" %% "akka-http-testkit" % Versions.akkaHttpVersion % Test
31 |   lazy val akkaStack =
32 |     Seq(akkaHttpCore, akkaHttp, sprayJsonAkka, akkaHttpTestkit)
33 | 
34 |   lazy val sangria = "org.sangria-graphql" %% "sangria" % "1.2.0"
35 |   lazy val sangriaSpray = "org.sangria-graphql" %% "sangria-spray-json" % "1.0.0"
36 | 
37 |   val logbackClassic = "ch.qos.logback" % "logback-classic" % "1.2.3"
38 |   val scalaLogging = "com.typesafe.scala-logging" %% "scala-logging" % "3.7.2"
39 | 
40 |   val loggingStack = Seq(logbackClassic, scalaLogging)
41 | 
42 |   val postgres = "org.postgresql" % "postgresql" % "9.4.1208"
43 |   val quill = "io.getquill" %% "quill-jdbc" % "2.3.1"
44 |   val quillAsyncPostgres = "io.getquill" %% "quill-async-postgres" % "2.3.1"
45 |   val flyway = "org.flywaydb" % "flyway-core" % "4.1.1"
46 | }
47 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.0.4
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.12")
2 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.7.0")
3 | addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.2")
4 | addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.0")
5 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.0")
6 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0")
7 | addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
8 | scalafmtOnCompile in ThisBuild := true


--------------------------------------------------------------------------------
/project/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.12")


--------------------------------------------------------------------------------
/publish.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | sbt "project core" +publishSigned
 6 | sbt sonatypeReleaseAll
 7 | 
 8 | docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD"
 9 | sbt docker:publishLocal
10 | docker push indix/schemer-registry:${TRAVIS_TAG}
11 | docker tag indix/schemer-registry:${TRAVIS_TAG} indix/schemer-registry:latest
12 | docker push indix/schemer-registry:latest
13 | 


--------------------------------------------------------------------------------
/pubring.gpg.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/pubring.gpg.enc


--------------------------------------------------------------------------------
/resources/images/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/001.png


--------------------------------------------------------------------------------
/resources/images/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/002.png


--------------------------------------------------------------------------------
/resources/images/003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/003.png


--------------------------------------------------------------------------------
/resources/images/004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/004.png


--------------------------------------------------------------------------------
/resources/images/005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/005.png


--------------------------------------------------------------------------------
/resources/images/006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/006.png


--------------------------------------------------------------------------------
/resources/images/007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/007.png


--------------------------------------------------------------------------------
/resources/images/008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/008.png


--------------------------------------------------------------------------------
/resources/images/schemer-logo-text-wide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/schemer-logo-text-wide.png


--------------------------------------------------------------------------------
/resources/images/schemer-logo-text-wide.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" width="888" height="512" viewBox="0 0 888 512">
 2 |   <g fill="none" fill-rule="evenodd">
 3 |     <g fill-rule="nonzero" transform="translate(65 77)">
 4 |       <circle cx="179.5" cy="179.5" r="179.5" fill="#324A5E"/>
 5 |       <polygon fill="#ACB3BA" points="99.334 273 93.199 267.279 172.602 180.832 81 129.345 85.09 122 186 178.713"/>
 6 |       <g fill="#FFF" transform="translate(61 103)">
 7 |         <ellipse cx="22.167" cy="22.227" rx="21.533" ry="21.661"/>
 8 |         <ellipse cx="35.326" cy="166.985" rx="21.533" ry="21.661"/>
 9 |       </g>
10 |       <polygon fill="#ACB3BA" points="247.134 280 175 179.852 225.118 73 232.763 76.529 184.769 179.005 254 275.13"/>
11 |       <g fill="#FFF" transform="translate(137 137)">
12 |         <ellipse cx="112.948" cy="138.265" rx="21.628" ry="21.664"/>
13 |         <ellipse cx="42.408" cy="42.478" rx="41.772" ry="41.841"/>
14 |       </g>
15 |       <circle cx="179" cy="179" r="30" fill="#FF7058"/>
16 |       <circle cx="228.5" cy="74.5" r="21.5" fill="#FFF"/>
17 |       <circle cx="228.5" cy="74.5" r="15.5" fill="#2C9984"/>
18 |       <circle cx="250.5" cy="275.5" r="15.5" fill="#84DBFF"/>
19 |       <circle cx="96.5" cy="270.5" r="15.5" fill="#54C0EB"/>
20 |       <circle cx="83.5" cy="125.5" r="15.5" fill="#FFD05B"/>
21 |     </g>
22 |     <text fill="#324A5E" font-family="ProductSans-Bold, Product Sans" font-size="90" font-weight="bold">
23 |       <tspan x="468.23" y="282">schemer</tspan>
24 |     </text>
25 |   </g>
26 | </svg>
27 | 


--------------------------------------------------------------------------------
/resources/images/schemer-logo-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/schemer-logo-text.png


--------------------------------------------------------------------------------
/resources/images/schemer-logo-text.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
 2 |   <g fill="none" fill-rule="evenodd">
 3 |     <g fill-rule="nonzero" transform="translate(77 33)">
 4 |       <circle cx="179.5" cy="179.5" r="179.5" fill="#324A5E"/>
 5 |       <polygon fill="#ACB3BA" points="99.334 273 93.199 267.279 172.602 180.832 81 129.345 85.09 122 186 178.713"/>
 6 |       <g fill="#FFF" transform="translate(61 103)">
 7 |         <ellipse cx="22.167" cy="22.227" rx="21.533" ry="21.661"/>
 8 |         <ellipse cx="35.326" cy="166.985" rx="21.533" ry="21.661"/>
 9 |       </g>
10 |       <polygon fill="#ACB3BA" points="247.134 280 175 179.852 225.118 73 232.763 76.529 184.769 179.005 254 275.13"/>
11 |       <g fill="#FFF" transform="translate(137 137)">
12 |         <ellipse cx="112.948" cy="138.265" rx="21.628" ry="21.664"/>
13 |         <ellipse cx="42.408" cy="42.478" rx="41.772" ry="41.841"/>
14 |       </g>
15 |       <circle cx="179" cy="179" r="30" fill="#FF7058"/>
16 |       <circle cx="228.5" cy="74.5" r="21.5" fill="#FFF"/>
17 |       <circle cx="228.5" cy="74.5" r="15.5" fill="#2C9984"/>
18 |       <circle cx="250.5" cy="275.5" r="15.5" fill="#84DBFF"/>
19 |       <circle cx="96.5" cy="270.5" r="15.5" fill="#54C0EB"/>
20 |       <circle cx="83.5" cy="125.5" r="15.5" fill="#FFD05B"/>
21 |     </g>
22 |     <text fill="#324A5E" font-family="ProductSans-Bold, Product Sans" font-size="74" font-weight="bold">
23 |       <tspan x="107.778" y="482">schemer</tspan>
24 |     </text>
25 |   </g>
26 | </svg>
27 | 


--------------------------------------------------------------------------------
/resources/images/schemer-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/resources/images/schemer-logo.png


--------------------------------------------------------------------------------
/resources/images/schemer-logo.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
 2 |   <g fill="none" transform="translate(43 43)">
 3 |     <circle cx="213" cy="213" r="213" fill="#324A5E"/>
 4 |     <polygon fill="#ACB3BA" points="117.827 324 110.523 317.218 205.05 214.741 96 153.707 100.869 145 221 212.23"/>
 5 |     <g fill="#FFF" transform="translate(72 122)">
 6 |       <ellipse cx="26.444" cy="26.343" rx="25.689" ry="25.672"/>
 7 |       <ellipse cx="42.143" cy="197.909" rx="25.689" ry="25.672"/>
 8 |     </g>
 9 |     <polygon fill="#ACB3BA" points="293.83 333 208 213.984 267.634 87 276.731 91.194 219.624 212.977 302 327.213"/>
10 |     <g fill="#FFF" transform="translate(163 163)">
11 |       <ellipse cx="133.864" cy="164.19" rx="25.634" ry="25.726"/>
12 |       <ellipse cx="50.262" cy="50.442" rx="49.508" ry="49.686"/>
13 |     </g>
14 |     <circle cx="212.5" cy="212.5" r="35.5" fill="#FF7058"/>
15 |     <circle cx="271.5" cy="88.5" r="25.5" fill="#FFF"/>
16 |     <circle cx="271.5" cy="88.5" r="18.5" fill="#2C9984"/>
17 |     <circle cx="297.5" cy="327.5" r="18.5" fill="#84DBFF"/>
18 |     <circle cx="114.5" cy="321.5" r="18.5" fill="#54C0EB"/>
19 |     <circle cx="99.5" cy="149.5" r="18.5" fill="#FFD05B"/>
20 |   </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/AvroSchema.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | import java.io.IOException
 3 | 
 4 | import com.databricks.spark.avro.SchemaConverters
 5 | import org.apache.avro.Schema.Parser
 6 | import org.apache.avro.SchemaBuilder
 7 | import org.apache.spark.sql.SparkSession
 8 | import org.apache.spark.sql.types.StructType
 9 | 
10 | import scala.util.Random
11 | 
12 | case class AvroSchemaBase() extends SchemaLikeBase[AvroSchema] {
13 |   override def infer(paths: String*)(implicit spark: SparkSession) = {
14 |     val schema = spark.read.format("com.databricks.spark.avro").load(paths: _*).schema
15 | 
16 |     AvroSchema(schema)
17 |   }
18 | }
19 | 
20 | case class AvroSchema(schema: String) extends SchemaLike {
21 | 
22 |   private def avroSchema() = new Parser().parse(schema)
23 | 
24 |   override def validate =
25 |     try {
26 |       sparkSchema()
27 |       List.empty
28 |     } catch {
29 |       case e: IOException => List(s"Error while consuming Avro schema: ${e.getMessage}")
30 |     }
31 | 
32 |   override def sparkSchema() = SchemaConverters.toSqlType(avroSchema()).dataType.asInstanceOf[StructType]
33 | 
34 |   override def toDf(paths: String*)(implicit spark: SparkSession) =
35 |     spark.read.format("com.databricks.spark.avro").load(paths: _*)
36 | }
37 | 
38 | object AvroSchema {
39 |   def apply(): AvroSchemaBase = AvroSchemaBase()
40 | 
41 |   def apply(schema: StructType): AvroSchema =
42 |     apply(schema, s"SchemerInferred_${Random.alphanumeric take 12 mkString ""}", "schemer")
43 | 
44 |   def apply(schema: StructType, record: String, namespace: String): AvroSchema = {
45 |     val builder    = SchemaBuilder.record(record).namespace(namespace)
46 |     val avroSchema = SchemaConverters.convertStructToAvro(schema, builder, namespace).toString(true)
47 |     new AvroSchema(avroSchema)
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/CSVSchema.scala:
--------------------------------------------------------------------------------
  1 | package schemer
  2 | 
  3 | import com.fasterxml.jackson.annotation.JsonProperty
  4 | import org.apache.spark.sql.types._
  5 | import org.apache.spark.sql.{DataFrame, SparkSession}
  6 | import schemer.utils.JSONUtil
  7 | 
  8 | case class CSVOptions(
  9 |     header: Boolean = true,
 10 |     headerBasedParser: Boolean = false,
 11 |     separator: String = ",",
 12 |     quoteChar: String = "\"",
 13 |     escapeChar: String = "\\"
 14 | )
 15 | 
 16 | case class CSVSchemaBase(csvOptions: CSVOptions) extends SchemaLikeBase[CSVSchema] {
 17 |   override def infer(paths: String*)(implicit @transient spark: SparkSession) = {
 18 |     val schema = spark.read
 19 |       .option("header", csvOptions.header.toString)
 20 |       .option("delimiter", csvOptions.separator)
 21 |       .option("quote", csvOptions.quoteChar)
 22 |       .option("escape", csvOptions.escapeChar)
 23 |       .option("nullValue", null)
 24 |       .option("inferSchema", "true")
 25 |       .csv(paths: _*)
 26 |       .schema
 27 | 
 28 |     CSVSchema(schema, csvOptions)
 29 |   }
 30 | }
 31 | 
 32 | case class CSVSchema(
 33 |     @JsonProperty(required = true) fields: List[CSVField],
 34 |     options: CSVOptions = CSVOptions()
 35 | ) extends SchemaLike {
 36 | 
 37 |   override def validate: List[String] =
 38 |     validateFields ++ validateMetaFields
 39 | 
 40 |   override def sparkSchema() = {
 41 |     val structFields = this.fields.map(field => StructField(field.name, getDataType(field.`type`), field.nullable))
 42 |     StructType(structFields)
 43 |   }
 44 | 
 45 |   def toDf(paths: String*)(implicit @transient spark: SparkSession) = {
 46 |     val csvDF = spark.read
 47 |       .option("delimiter", options.separator)
 48 |       .option("quote", options.quoteChar)
 49 |       .option("escape", options.escapeChar)
 50 |       .option("nullValue", null)
 51 |       .csv(paths: _*)
 52 |     val orderedSchema = reconcileSchemaFieldOrder(sparkSchema(), csvDF)
 53 | 
 54 |     spark.read
 55 |       .option("header", options.header.toString)
 56 |       .option("delimiter", options.separator)
 57 |       .option("quote", options.quoteChar)
 58 |       .option("escape", options.escapeChar)
 59 |       .option("nullValue", null)
 60 |       .schema(orderedSchema)
 61 |       .csv(paths: _*)
 62 |   }
 63 | 
 64 |   private def reconcileSchemaFieldOrder(sparkSchema: StructType, csvDF: DataFrame) =
 65 |     if (options.headerBasedParser && options.header) {
 66 |       val actualHeaders = csvDF
 67 |         .first()
 68 |         .toSeq
 69 |         .map(_.toString)
 70 |       StructType(actualHeaders.map(field => sparkSchema(sparkSchema.fieldIndex(field))))
 71 |     } else {
 72 |       sparkSchema
 73 |     }
 74 | 
 75 |   private def getDataType(csvFieldType: String) =
 76 |     csvFieldType.toLowerCase match {
 77 |       case "int" | "integer" => IntegerType
 78 |       case "long"            => LongType
 79 |       case "double"          => DoubleType
 80 |       case "float"           => FloatType
 81 |       case "string"          => StringType
 82 |       case "datetime"        => DateType
 83 |       case "boolean"         => BooleanType
 84 |       case _                 => StringType
 85 |     }
 86 | 
 87 |   private def validateFields =
 88 |     if (fields.nonEmpty) {
 89 |       List.empty
 90 |     } else {
 91 |       List("fields can't be empty in a CSVSchema")
 92 |     }
 93 | 
 94 |   private def validateMetaFields =
 95 |     if (options.header && fields.exists(_.position.isEmpty)) {
 96 |       List("CSVSchema with hasHeader=false should have valid position numbers on all fields")
 97 |     } else {
 98 |       List.empty
 99 |     }
100 | 
101 |   override def schema() =
102 |     JSONUtil.toJson(this)
103 | }
104 | 
105 | object CSVSchema {
106 |   def apply(schema: String): CSVSchema =
107 |     JSONUtil.fromJson[CSVSchema](schema)
108 | 
109 |   def apply(options: CSVOptions): CSVSchemaBase =
110 |     CSVSchemaBase(options)
111 | 
112 |   def apply(): CSVSchemaBase =
113 |     CSVSchemaBase(CSVOptions())
114 |   def apply(
115 |       schema: StructType,
116 |       options: CSVOptions
117 |   ): CSVSchema = {
118 |     val fields = schema.fields.zipWithIndex.map {
119 |       case (f: StructField, i: Int) => CSVField(f.name, f.nullable, getCsvType(f.dataType), Some(i))
120 |     }.toList
121 | 
122 |     new CSVSchema(fields, options)
123 |   }
124 | 
125 |   def apply(
126 |       schema: StructType,
127 |       options: Map[String, String]
128 |   ): CSVSchema = {
129 |     val fields = schema.fields.zipWithIndex.map {
130 |       case (f: StructField, i: Int) => CSVField(f.name, f.nullable, getCsvType(f.dataType), Some(i))
131 |     }.toList
132 | 
133 |     val csvOptions = CSVOptions(
134 |       options.getOrElse("header", "true").toBoolean,
135 |       options.getOrElse("headerBasedParser", "true").toBoolean,
136 |       options.getOrElse("separator", ","),
137 |       options.getOrElse("quoteChar", "\""),
138 |       options.getOrElse("escapeChar", "\\")
139 |     )
140 | 
141 |     new CSVSchema(fields, csvOptions)
142 |   }
143 | 
144 |   private def getCsvType(sparkType: DataType) = sparkType match {
145 |     case IntegerType => "int"
146 |     case LongType    => "long"
147 |     case DoubleType  => "double"
148 |     case FloatType   => "float"
149 |     case StringType  => "string"
150 |     case DateType    => "datetime"
151 |     case BooleanType => "boolean"
152 |     case _           => "string"
153 |   }
154 | }
155 | 
156 | case class CSVField(
157 |     name: String,
158 |     nullable: Boolean,
159 |     `type`: String,
160 |     position: Option[Int]
161 | )
162 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/JSONSchema.scala:
--------------------------------------------------------------------------------
  1 | package schemer
  2 | 
  3 | import com.fasterxml.jackson.databind.JsonNode
  4 | import com.github.fge.jackson.JsonLoader
  5 | import com.github.fge.jsonschema.main.JsonSchemaFactory
  6 | import org.apache.spark.sql.SparkSession
  7 | import org.apache.spark.sql.types._
  8 | import schemer.utils.{JSONUtil, JsonSchemaValidationUtil}
  9 | 
 10 | import scala.annotation.tailrec
 11 | import scala.collection.JavaConverters._
 12 | 
 13 | abstract trait JSONSchemaNode {
 14 |   def toJSON: String = JSONUtil.toJson(this)
 15 | }
 16 | 
 17 | case class ObjectSchema(
 18 |     `type`: String = "object",
 19 |     properties: Map[String, JSONSchemaNode],
 20 |     additionalProperties: Boolean = false,
 21 |     $schema: Option[String] = None
 22 | ) extends JSONSchemaNode
 23 | 
 24 | case class StringSchema(
 25 |     `type`: String = "string",
 26 |     format: Option[String] = None,
 27 |     pattern: Option[String] = None,
 28 |     minLength: Option[Int] = None,
 29 |     maxLength: Option[Int] = None
 30 | ) extends JSONSchemaNode
 31 | 
 32 | case class IntegerSchema(`type`: String = "integer", minimum: Option[BigInt] = None, maximum: Option[BigInt] = None)
 33 |     extends JSONSchemaNode
 34 | 
 35 | case class NumberSchema(`type`: String = "number", minimum: Option[Double] = None, maximum: Option[Double] = None)
 36 |     extends JSONSchemaNode
 37 | 
 38 | case class BooleanSchema(`type`: String = "boolean") extends JSONSchemaNode
 39 | 
 40 | case class ArraySchema(`type`: String = "array", items: JSONSchemaNode) extends JSONSchemaNode
 41 | 
 42 | case class JSONSchemaBase() extends SchemaLikeBase[JSONSchema] {
 43 | 
 44 |   @tailrec
 45 |   private def processStructFields(
 46 |       fields: List[StructField],
 47 |       accum: List[(String, JSONSchemaNode)] = Nil
 48 |   ): List[(String, JSONSchemaNode)] =
 49 |     fields match {
 50 |       case x :: xs =>
 51 |         processStructFields(xs, accum ++ List(processField(x)))
 52 |       case Nil => accum
 53 |     }
 54 | 
 55 |   private def processField(x: StructField) =
 56 |     (x.name, processDataType(x.dataType))
 57 | 
 58 |   private def processDataType(dataType: DataType): JSONSchemaNode = dataType match {
 59 |     case StringType                      => StringSchema()
 60 |     case LongType | IntegerType          => IntegerSchema()
 61 |     case DoubleType                      => NumberSchema()
 62 |     case BooleanType                     => BooleanSchema()
 63 |     case f if f.isInstanceOf[StructType] => convertSparkToJsonSchema(dataType.asInstanceOf[StructType])
 64 |     case f if f.isInstanceOf[ArrayType] =>
 65 |       ArraySchema(items = processDataType(dataType.asInstanceOf[ArrayType].elementType))
 66 |   }
 67 | 
 68 |   def convertSparkToJsonSchema(schema: StructType, draft: Option[String] = None) =
 69 |     ObjectSchema(properties = processStructFields(schema.fields.toList).toMap, $schema = draft)
 70 | 
 71 |   override def infer(paths: String*)(implicit spark: SparkSession) = {
 72 |     val sampleJsonData = spark.read.textFile(paths: _*).limit(1000)
 73 |     val schema         = spark.read.json(sampleJsonData.rdd).schema
 74 |     val jsonSchema     = convertSparkToJsonSchema(schema, Some("http://json-schema.org/draft-06/schema#")).toJSON
 75 |     JSONSchema(jsonSchema)
 76 |   }
 77 | }
 78 | 
 79 | case class JSONSchema(schema: String) extends SchemaLike {
 80 | 
 81 |   private val jsonSchema = JsonLoader.fromString(schema)
 82 | 
 83 |   override def validate: List[String] = {
 84 |     val validator    = JsonSchemaFactory.byDefault().getSyntaxValidator
 85 |     val report       = validator.validateSchema(jsonSchema)
 86 |     val syntaxErrors = JsonSchemaValidationUtil.process(report)
 87 |     if (syntaxErrors.isEmpty) {
 88 |       try {
 89 |         sparkSchema()
 90 |         List.empty
 91 |       } catch {
 92 |         case e: UnsupportedOperationException => List(e.getMessage)
 93 |       }
 94 |     } else {
 95 |       syntaxErrors
 96 |     }
 97 |   }
 98 | 
 99 |   override def sparkSchema(): StructType = jsonToStructType(jsonSchema).asInstanceOf[StructType]
100 | 
101 |   def toDf(paths: String*)(implicit spark: SparkSession) =
102 |     spark.read
103 |       .schema(sparkSchema())
104 |       .json(paths: _*)
105 | 
106 |   private def getRequiredProps(jsonSchema: JsonNode) =
107 |     if (jsonSchema.has("required") && jsonSchema.get("required").isArray) {
108 |       Some(jsonSchema.get("required").elements().asScala.map(_.asText()))
109 |     } else {
110 |       None
111 |     }
112 | 
113 |   private def toArrayType(field: JsonNode) = {
114 |     val itemsNode = field.get("items")
115 |     if (itemsNode != null && itemsNode.isArray) {
116 |       ArrayType(jsonToStructType(itemsNode.get(0)))
117 |     } else if (itemsNode != null && itemsNode.isObject) {
118 |       ArrayType(jsonToStructType(itemsNode))
119 |     } else {
120 |       ArrayType(StringType)
121 |     }
122 |   }
123 | 
124 |   private def toObjectType(jsonSchema: JsonNode) = {
125 |     val requiredFields = getRequiredProps(jsonSchema).getOrElse(List.empty)
126 |     if (jsonSchema.has("patternProperties")) {
127 |       MapType(
128 |         StringType,
129 |         jsonToStructType(jsonSchema.get("patternProperties").fields().asScala.toList.head.getValue)
130 |       )
131 |     } else {
132 |       StructType(
133 |         jsonSchema
134 |           .get("properties")
135 |           .fields()
136 |           .asScala
137 |           .toList
138 |           .map(field => {
139 |             val fieldType = jsonToStructType(field.getValue)
140 |             StructField(field.getKey, fieldType, nullable = !requiredFields.toList.contains(field.getKey))
141 |           })
142 |       )
143 |     }
144 |   }
145 | 
146 |   private def jsonToStructType(jsonSchema: JsonNode): DataType =
147 |     jsonSchema.get("type").asText() match {
148 |       case "array"   => toArrayType(jsonSchema)
149 |       case "object"  => toObjectType(jsonSchema)
150 |       case "boolean" => BooleanType
151 |       case "string"  => StringType
152 |       case "integer" => LongType
153 |       case "number"  => DoubleType
154 |       case _ =>
155 |         throw new UnsupportedOperationException(
156 |           s"Trying to convert a unsupported type ${jsonSchema.get("type").asText()}.  Types other than (boolean, string, integer, number, object, array) aren't supported"
157 |         )
158 | 
159 |     }
160 | }
161 | 
162 | object JSONSchema {
163 |   def apply(): JSONSchemaBase               = JSONSchemaBase()
164 |   def apply(schema: StructType): JSONSchema = JSONSchema(JSONSchemaBase().convertSparkToJsonSchema(schema).toJSON)
165 | }
166 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/ParquetSchema.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | import org.apache.spark.sql.SparkSession
 4 | import org.apache.spark.sql.types.StructType
 5 | 
 6 | import scala.reflect.runtime.universe._
 7 | 
 8 | sealed trait ParquetSchemaType {
 9 |   val `type`: String
10 | }
11 | 
12 | object ParquetSchemaType {
13 |   case object Avro extends ParquetSchemaType {
14 |     override val `type`: String = "avro"
15 |   }
16 |   case object Csv extends ParquetSchemaType {
17 |     override val `type`: String = "csv"
18 |   }
19 |   case object Json extends ParquetSchemaType {
20 |     override val `type`: String = "json"
21 |   }
22 | 
23 |   val supportedTypes = List(Avro, Csv, Json).map(_.`type`)
24 | }
25 | 
26 | case class ParquetSchemaBase[T <: SchemaLike: TypeTag](override val options: Map[String, String] = Map())
27 |     extends SchemaLikeBase[ParquetSchema] {
28 |   override def infer(paths: String*)(implicit spark: SparkSession) = {
29 |     val schema = spark.read.parquet(paths: _*).schema
30 |     val underlyingSchema = typeOf[T] match {
31 |       case t if t =:= typeOf[AvroSchema] => (ParquetSchemaType.Avro, AvroSchema(schema))
32 |       case t if t =:= typeOf[JSONSchema] => (ParquetSchemaType.Json, JSONSchema(schema))
33 |       case t if t =:= typeOf[CSVSchema]  => (ParquetSchemaType.Csv, CSVSchema(schema, options))
34 |     }
35 | 
36 |     ParquetSchema(underlyingSchema._2.schema(), underlyingSchema._1)
37 |   }
38 | }
39 | 
40 | case class ParquetSchema(schema: String, `type`: ParquetSchemaType) extends SchemaLike {
41 | 
42 |   val schemaType = `type` match {
43 |     case ParquetSchemaType.Avro => AvroSchema(schema)
44 |     case ParquetSchemaType.Csv  => CSVSchema(schema)
45 |     case ParquetSchemaType.Json => JSONSchema(schema)
46 |   }
47 | 
48 |   override def validate = schemaType.validate
49 | 
50 |   def toDf(paths: String*)(implicit spark: SparkSession) =
51 |     spark.read
52 |       .schema(sparkSchema())
53 |       .parquet(paths: _*)
54 | 
55 |   override def sparkSchema(): StructType = schemaType.sparkSchema()
56 | }
57 | 
58 | object ParquetSchema {
59 |   def apply[T <: SchemaLike: TypeTag]() = ParquetSchemaBase[T]()
60 |   def apply(`type`: String) = `type` match {
61 |     case ParquetSchemaType.Avro.`type` => apply[AvroSchema]()
62 |     case ParquetSchemaType.Csv.`type`  => apply[CSVSchema]()
63 |     case ParquetSchemaType.Json.`type` => apply[JSONSchema]()
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/SchemaLike.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | import org.apache.spark.sql.types.StructType
 4 | import org.apache.spark.sql.{DataFrame, SparkSession}
 5 | 
 6 | private[schemer] trait SchemaLikeBase[T <: SchemaLike] {
 7 |   val options: Map[String, String] = Map()
 8 |   def infer(paths: String*)(implicit @transient spark: SparkSession): T
 9 | }
10 | 
11 | private[schemer] trait SchemaLike {
12 |   def validate: List[String]
13 | 
14 |   def sparkSchema(): StructType
15 | 
16 |   def schema(): String
17 | 
18 |   def toDf(paths: String*)(implicit @transient spark: SparkSession): DataFrame
19 | }
20 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/Schemer.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | sealed trait SchemaType {
 4 |   val `type`: String
 5 | }
 6 | 
 7 | object SchemaType {
 8 |   case object Avro extends SchemaType {
 9 |     override val `type`: String = "avro"
10 |   }
11 |   case object Csv extends SchemaType {
12 |     override val `type`: String = "csv"
13 |   }
14 |   case object Json extends SchemaType {
15 |     override val `type`: String = "json"
16 |   }
17 |   case object ParquetAvro extends SchemaType {
18 |     override val `type`: String = "parquet_avro"
19 |   }
20 |   case object ParquetCsv extends SchemaType {
21 |     override val `type`: String = "parquet_csv"
22 |   }
23 |   case object ParquetJson extends SchemaType {
24 |     override val `type`: String = "parquet_json"
25 |   }
26 |   val supportedTypes = List(Avro, Csv, Json, ParquetAvro, ParquetCsv, ParquetJson)
27 | }
28 | 
29 | object Schemer {
30 |   def from(`type`: String, config: String): SchemaLike = `type` match {
31 |     case "avro"         => AvroSchema(config)
32 |     case "csv"          => CSVSchema(config)
33 |     case "json"         => JSONSchema(config)
34 |     case "parquet_avro" => ParquetSchema(config, ParquetSchemaType.Avro)
35 |     case "parquet_csv"  => ParquetSchema(config, ParquetSchemaType.Csv)
36 |     case "parquet_json" => ParquetSchema(config, ParquetSchemaType.Json)
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/utils/JSONUtil.scala:
--------------------------------------------------------------------------------
 1 | package schemer.utils
 2 | 
 3 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
 4 | import com.fasterxml.jackson.module.scala.DefaultScalaModule
 5 | import com.fasterxml.jackson.annotation.JsonInclude.Include
 6 | 
 7 | import scala.reflect.ClassTag
 8 | 
 9 | private[schemer] object JSONUtil {
10 |   private val mapper = new ObjectMapper()
11 | 
12 |   mapper.registerModule(DefaultScalaModule)
13 | 
14 |   mapper.setSerializationInclusion(Include.NON_NULL)
15 | 
16 |   def toJson(value: Any) = mapper.writeValueAsString(value)
17 | 
18 |   def prettyJson(value: Any) = mapper.enable(SerializationFeature.INDENT_OUTPUT).writeValueAsString(value)
19 | 
20 |   def fromJson[T: ClassTag](json: String) = {
21 |     val classType = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]]
22 |     mapper.readValue[T](json, classType)
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/schemer-core/src/main/scala/schemer/utils/JsonSchemaValidationUtil.scala:
--------------------------------------------------------------------------------
 1 | package schemer.utils
 2 | 
 3 | import com.github.fge.jsonschema.core.report.ProcessingReport
 4 | import scala.collection.JavaConverters._
 5 | 
 6 | object JsonSchemaValidationUtil {
 7 |   def process(report: ProcessingReport): List[String] =
 8 |     if (!report.isSuccess) {
 9 |       getErrorsFromReport(report)
10 |     } else {
11 |       List.empty
12 |     }
13 | 
14 |   private def getErrorsFromReport(report: ProcessingReport) = {
15 |     val errorList = report.iterator.asScala.toList
16 |       .map { message =>
17 |         message.asJson()
18 |       }
19 |       .filter { json =>
20 |         json.get("level").asText == "error"
21 |       }
22 |       .map { json =>
23 |         json.get("message").asText
24 |       }
25 |     errorList
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/schemer-core/src/test/resources/test.csv:
--------------------------------------------------------------------------------
1 | title,url,storeId
2 | iphone,http://indix.com/iphone,42
3 | galaxy,http://indix.com/galaxy,43
4 | lumia,http://indix.com/lumia,44


--------------------------------------------------------------------------------
/schemer-core/src/test/resources/test.json:
--------------------------------------------------------------------------------
1 | {"title": "iphone", "url": "http://indix.com/iphone", "imageUrls": ["http://indix.com/iphone.jpg"], "storeId": 42, "price": {"min": 10.0, "max": 100.0 }, "isAvailable": false}


--------------------------------------------------------------------------------
/schemer-core/src/test/resources/test.tsv:
--------------------------------------------------------------------------------
1 | title	url	storeId
2 | iphone	http://indix.com/iphone	42
3 | galaxy	http://indix.com/galaxy	43
4 | lumia	http://indix.com/lumia	44


--------------------------------------------------------------------------------
/schemer-core/src/test/scala/schemer/AvroSchemaSpec.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | import com.databricks.spark.avro._
 4 | import org.apache.spark.SparkConf
 5 | import org.apache.spark.sql.types.{IntegerType, StringType}
 6 | import org.apache.spark.sql.{SaveMode, SparkSession}
 7 | import org.scalatest.{FlatSpec, Matchers}
 8 | 
 9 | class AvroSchemaSpec extends FlatSpec with Matchers {
10 |   implicit val spark: SparkSession = SparkSession.builder
11 |     .config(new SparkConf())
12 |     .master("local[*]")
13 |     .getOrCreate()
14 | 
15 |   "AvroSchema" should "infer avro schema from given path" in {
16 |     import spark.implicits._
17 |     val df = Seq(TestRecord("iphone", "http://indix.com/iphone", 42)).toDF
18 | 
19 |     try {
20 |       df.write.mode(SaveMode.Overwrite).avro("test")
21 |       val schema = AvroSchema().infer("test")
22 |       schema.schema.replaceAll("SchemerInferred_[^\"]+", "SchemerInferred") should be(
23 |         "{\n  \"type\" : \"record\",\n  \"name\" : \"SchemerInferred\",\n  \"namespace\" : \"schemer\",\n  \"fields\" : [ {\n    \"name\" : \"title\",\n    \"type\" : [ \"string\", \"null\" ]\n  }, {\n    \"name\" : \"url\",\n    \"type\" : [ \"string\", \"null\" ]\n  }, {\n    \"name\" : \"storeId\",\n    \"type\" : [ \"int\", \"null\" ]\n  } ]\n}"
24 |       )
25 |     } finally {
26 |       Helpers.cleanOutputPath("test")
27 |     }
28 |   }
29 | 
30 |   it should "get spark schema" in {
31 |     val schema = AvroSchema(
32 |       "{\n  \"type\" : \"record\",\n  \"name\" : \"SchemerInferred\",\n  \"namespace\" : \"schemer\",\n  \"fields\" : [ {\n    \"name\" : \"title\",\n    \"type\" : [ \"string\", \"null\" ]\n  }, {\n    \"name\" : \"url\",\n    \"type\" : [ \"string\", \"null\" ]\n  }, {\n    \"name\" : \"storeId\",\n    \"type\" : [ \"int\", \"null\" ]\n  } ]\n}"
33 |     )
34 |     val schemaFields = schema.sparkSchema().fields
35 |     schemaFields.length should be(3)
36 | 
37 |     schemaFields(0).name should be("title")
38 |     schemaFields(0).dataType should be(StringType)
39 | 
40 |     schemaFields(1).name should be("url")
41 |     schemaFields(1).dataType should be(StringType)
42 | 
43 |     schemaFields(2).name should be("storeId")
44 |     schemaFields(2).dataType should be(IntegerType)
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/schemer-core/src/test/scala/schemer/CSVSchemaSpec.scala:
--------------------------------------------------------------------------------
  1 | package schemer
  2 | 
  3 | import org.apache.spark.SparkConf
  4 | import org.apache.spark.sql.SparkSession
  5 | import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
  6 | import org.scalatest._
  7 | 
  8 | import scala.util.Try
  9 | 
 10 | class CSVSchemaSpec extends FlatSpec with Matchers {
 11 |   implicit val spark: SparkSession = SparkSession.builder
 12 |     .config(new SparkConf())
 13 |     .master("local[*]")
 14 |     .getOrCreate()
 15 | 
 16 |   "CSVSchema" should "infer schema from given path" in {
 17 |     val path = getClass.getClassLoader.getResource("test.csv").getPath
 18 | 
 19 |     val inferredSchema = CSVSchema().infer(path)
 20 |     val fields         = inferredSchema.fields
 21 | 
 22 |     fields.length should be(3)
 23 |     fields(0).name should be("title")
 24 |     fields(0).`type` should be("string")
 25 | 
 26 |     fields(1).name should be("url")
 27 |     fields(1).`type` should be("string")
 28 | 
 29 |     fields(2).name should be("storeId")
 30 |     fields(2).`type` should be("int")
 31 |   }
 32 | 
 33 |   it should "infer schema without header from file" in {
 34 |     val path = getClass.getClassLoader.getResource("test.csv").getPath
 35 | 
 36 |     val inferredSchema = CSVSchema(CSVOptions(false)).infer(path)
 37 |     val fields         = inferredSchema.fields
 38 | 
 39 |     fields.length should be(3)
 40 |     fields(0).name should be("_c0")
 41 |     fields(0).`type` should be("string")
 42 | 
 43 |     fields(1).name should be("_c1")
 44 |     fields(1).`type` should be("string")
 45 | 
 46 |     fields(2).name should be("_c2")
 47 |     fields(2).`type` should be("string")
 48 |   }
 49 | 
 50 |   it should "infer schema and read" in {
 51 |     val path = getClass.getClassLoader.getResource("test.csv").getPath
 52 | 
 53 |     val inferredSchema = CSVSchema().infer(path)
 54 |     import spark.implicits._
 55 |     val output = inferredSchema.toDf(path).as[TestRecord].collect()
 56 | 
 57 |     output.length should be(3)
 58 |     output(0).title should be("iphone")
 59 |     output(0).url should be("http://indix.com/iphone")
 60 |     output(0).storeId should be(42)
 61 |   }
 62 | 
 63 |   it should "infer schema and read from TSV" in {
 64 |     val path = getClass.getClassLoader.getResource("test.tsv").getPath
 65 | 
 66 |     val inferredSchema = CSVSchema(CSVOptions(headerBasedParser = true, separator = "\t")).infer(path)
 67 |     import spark.implicits._
 68 |     val output = inferredSchema.toDf(path).as[TestRecord].collect()
 69 | 
 70 |     output.length should be(3)
 71 |     output(0).title should be("iphone")
 72 |     output(0).url should be("http://indix.com/iphone")
 73 |     output(0).storeId should be(42)
 74 |   }
 75 | 
 76 |   it should "infer schema and get schema json" in {
 77 |     val path = getClass.getClassLoader.getResource("test.csv").getPath
 78 | 
 79 |     val inferredSchema = CSVSchema().infer(path)
 80 | 
 81 |     inferredSchema.schema() should be(
 82 |       "{\"fields\":[{\"name\":\"title\",\"nullable\":true,\"type\":\"string\",\"position\":0},{\"name\":\"url\",\"nullable\":true,\"type\":\"string\",\"position\":1},{\"name\":\"storeId\",\"nullable\":true,\"type\":\"int\",\"position\":2}],\"options\":{\"header\":true,\"headerBasedParser\":false,\"separator\":\",\",\"quoteChar\":\"\\\"\",\"escapeChar\":\"\\\\\"}}"
 83 |     )
 84 |   }
 85 | 
 86 |   it should "get schema from json" in {
 87 |     val schema = CSVSchema(
 88 |       "{\"fields\":[{\"name\":\"title\",\"nullable\":true,\"type\":\"string\",\"position\":0},{\"name\":\"url\",\"nullable\":true,\"type\":\"string\",\"position\":1},{\"name\":\"storeId\",\"nullable\":true,\"type\":\"int\",\"position\":2}],\"options\":{\"header\":true,\"headerBasedParser\":false,\"separator\":\",\",\"quoteChar\":\"\\\"\",\"escapeChar\":\"\\\\\"}}"
 89 |     )
 90 | 
 91 |     schema.sparkSchema() should be(
 92 |       StructType(
 93 |         Seq(
 94 |           StructField("title", StringType, true),
 95 |           StructField("url", StringType, true),
 96 |           StructField("storeId", IntegerType, true)
 97 |         )
 98 |       )
 99 |     )
100 |   }
101 | 
102 |   it should "handle empty fields" in {
103 |     val schema = CSVSchema(
104 |       "{\"fields\":[], \"options\": {}}"
105 |     )
106 | 
107 |     schema.sparkSchema() should be(
108 |       StructType(List())
109 |     )
110 |   }
111 | 
112 |   it should "handle error parsing json" in {
113 |     Try(CSVSchema("{}")).failed.get.getMessage should startWith("Missing required creator property 'fields'")
114 |   }
115 | }
116 | 


--------------------------------------------------------------------------------
/schemer-core/src/test/scala/schemer/Helpers.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | import java.net.URI
 4 | 
 5 | import org.apache.hadoop.conf.Configuration
 6 | import org.apache.hadoop.fs.{FileSystem, Path}
 7 | 
 8 | case class TestRecord(title: String, url: String, storeId: Int)
 9 | 
10 | object Helpers {
11 | 
12 |   def cleanOutputPath(output: String) {
13 |     val outputPath = new Path(output)
14 |     if (fileExists(output))
15 |       outputPath.getFileSystem(new Configuration()).delete(outputPath, true)
16 |   }
17 | 
18 |   def fileExists(fileLocation: String) = {
19 |     val fs = FileSystem.get(new URI(fileLocation), new Configuration())
20 |     fs.exists(new Path(fileLocation))
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/schemer-core/src/test/scala/schemer/JSONSchemaSpec.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.sql.SparkSession
 5 | import org.apache.spark.sql.types._
 6 | import org.scalatest.{FlatSpec, Matchers}
 7 | 
 8 | class JSONSchemaSpec extends FlatSpec with Matchers {
 9 |   implicit val spark: SparkSession = SparkSession.builder
10 |     .config(new SparkConf())
11 |     .master("local[*]")
12 |     .getOrCreate()
13 | 
14 |   "JSONSchema" should "infer json schema" in {
15 |     val path = getClass.getClassLoader.getResource("test.json").getPath
16 | 
17 |     val inferredSchema = JSONSchema().infer(path)
18 |     inferredSchema.schema should be(
19 |       "{\"type\":\"object\",\"properties\":{\"imageUrls\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"url\":{\"type\":\"string\"},\"price\":{\"type\":\"object\",\"properties\":{\"max\":{\"type\":\"number\"},\"min\":{\"type\":\"number\"}},\"additionalProperties\":false},\"storeId\":{\"type\":\"integer\"},\"isAvailable\":{\"type\":\"boolean\"},\"title\":{\"type\":\"string\"}},\"additionalProperties\":false}"
20 |     )
21 | 
22 |     val fields = inferredSchema.sparkSchema().fields
23 |     fields.length should be(6)
24 |     fields.map(f => (f.name, f.dataType)) should contain allElementsOf List(
25 |       ("title", StringType),
26 |       ("url", StringType),
27 |       ("storeId", LongType),
28 |       ("price", StructType(Seq(StructField("max", DoubleType), StructField("min", DoubleType)))),
29 |       ("isAvailable", BooleanType),
30 |       ("imageUrls", ArrayType(StringType))
31 |     )
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/schemer-core/src/test/scala/schemer/ParquetSchemaSpec.scala:
--------------------------------------------------------------------------------
 1 | package schemer
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.sql.{SaveMode, SparkSession}
 5 | import org.scalatest.{FlatSpec, Matchers}
 6 | 
 7 | class ParquetSchemaSpec extends FlatSpec with Matchers {
 8 |   implicit val spark: SparkSession = SparkSession.builder
 9 |     .config(new SparkConf())
10 |     .master("local[*]")
11 |     .getOrCreate()
12 | 
13 |   "ParquetSchema" should "infer avro schema" in {
14 |     import spark.implicits._
15 |     val df = Seq(TestRecord("iphone", "http://indix.com/iphone", 42)).toDF
16 | 
17 |     val dataDir = "test_parquet_avro"
18 | 
19 |     try {
20 |       df.write.mode(SaveMode.Overwrite).parquet(dataDir)
21 |       val schema = ParquetSchema[AvroSchema]().infer(dataDir)
22 |       schema.schema.replaceAll("SchemerInferred_[^\"]+", "SchemerInferred") should be(
23 |         "{\n  \"type\" : \"record\",\n  \"name\" : \"SchemerInferred\",\n  \"namespace\" : \"schemer\",\n  \"fields\" : [ {\n    \"name\" : \"title\",\n    \"type\" : [ \"string\", \"null\" ]\n  }, {\n    \"name\" : \"url\",\n    \"type\" : [ \"string\", \"null\" ]\n  }, {\n    \"name\" : \"storeId\",\n    \"type\" : [ \"int\", \"null\" ]\n  } ]\n}"
24 |       )
25 |     } finally {
26 |       Helpers.cleanOutputPath(dataDir)
27 |     }
28 |   }
29 | 
30 |   it should "infer json schema" in {
31 |     import spark.implicits._
32 |     val df = Seq(TestRecord("iphone", "http://indix.com/iphone", 42)).toDF
33 | 
34 |     val dataDir = "test_parquet_json"
35 | 
36 |     try {
37 |       df.write.mode(SaveMode.Overwrite).parquet(dataDir)
38 |       val schema = ParquetSchema[JSONSchema]().infer(dataDir)
39 |       schema.schema.replaceAll("SchemerInferred_[^\"]+", "SchemerInferred") should be(
40 |         "{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"url\":{\"type\":\"string\"},\"storeId\":{\"type\":\"integer\"}},\"additionalProperties\":false}"
41 |       )
42 |     } finally {
43 |       Helpers.cleanOutputPath(dataDir)
44 |     }
45 |   }
46 | 
47 |   it should "infer csv schema" in {
48 |     import spark.implicits._
49 |     val df = Seq(TestRecord("iphone", "http://indix.com/iphone", 42)).toDF
50 | 
51 |     val dataDir = "test_parquet_csv"
52 | 
53 |     try {
54 |       df.write.mode(SaveMode.Overwrite).parquet(dataDir)
55 |       val schema = ParquetSchema[CSVSchema]().infer(dataDir)
56 |       schema.schema.replaceAll("SchemerInferred_[^\"]+", "SchemerInferred") should be(
57 |         "{\"fields\":[{\"name\":\"title\",\"nullable\":true,\"type\":\"string\",\"position\":0},{\"name\":\"url\",\"nullable\":true,\"type\":\"string\",\"position\":1},{\"name\":\"storeId\",\"nullable\":true,\"type\":\"int\",\"position\":2}],\"options\":{\"header\":true,\"headerBasedParser\":true,\"separator\":\",\",\"quoteChar\":\"\\\"\",\"escapeChar\":\"\\\\\"}}"
58 |       )
59 |     } finally {
60 |       Helpers.cleanOutputPath(dataDir)
61 |     }
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/resources/application.conf:
--------------------------------------------------------------------------------
 1 | akka {
 2 |   http {
 3 |     server {
 4 |       request-timeout = 90s
 5 |       idle-timeout = 120s
 6 |     }
 7 |   }
 8 | }
 9 | 
10 | registry {
11 |   server {
12 |     host = "0.0.0.0"
13 |     port = 9000
14 |     port = ${?SCHEMER_REGISTRY_PORT}
15 |   }
16 |   inference {
17 |     timeout = 60s
18 |   }
19 |   h2 {
20 |     dataSourceClassName = "org.h2.jdbcx.JdbcDataSource"
21 |     dataSource {
22 |       url = "jdbc:h2:mem:registry"
23 |     }
24 |   }
25 |   postgres {
26 |     url = "postgresql://localhost:5432/schemer?user=schemer&password=schemer"
27 |     url = ${?POSTGRES_URL}
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/resources/aws-core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | 
 4 | <!--
 5 |     Useful configurations if you're on AWS to use s3a filesystem that supports IAM Roles
 6 |     instead of expecting hard-coded AWS keys.
 7 |  -->
 8 | <configuration>
 9 |     <property>
10 |         <name>fs.s3.impl</name>
11 |         <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
12 |     </property>
13 |     <property>
14 |         <name>fs.s3n.impl</name>
15 |         <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
16 |     </property>
17 | 
18 |     <property>
19 |         <name>fs.AbstractFileSystem.s3.impl</name>
20 |         <value>org.apache.hadoop.fs.s3a.S3A</value>
21 |     </property>
22 |     <property>
23 |         <name>fs.AbstractFileSystem.s3n.impl</name>
24 |         <value>org.apache.hadoop.fs.s3a.S3A</value>
25 |     </property>
26 | </configuration>


--------------------------------------------------------------------------------
/schemer-registry/src/main/resources/db/migration/V1__creates_schemas.sql:
--------------------------------------------------------------------------------
 1 | CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
 2 | 
 3 | CREATE TABLE "namespaces"(
 4 |     "id" UUID NOT NULL DEFAULT uuid_generate_v4(),
 5 |     "name" VARCHAR NOT NULL
 6 | );
 7 | 
 8 | ALTER TABLE "namespaces" ADD CONSTRAINT "namespaces_id" PRIMARY KEY("id");
 9 | CREATE UNIQUE INDEX "namespaces_name" ON "namespaces"("name");
10 | 
11 | INSERT INTO "namespaces"("name") VALUES('default');
12 | 
13 | CREATE TABLE "schemas"(
14 |     "id" UUID NOT NULL DEFAULT uuid_generate_v4(),
15 |     "name" VARCHAR NOT NULL,
16 |     "namespace" VARCHAR NOT NULL,
17 |     "type" VARCHAR NOT NULL,
18 |     "created_on" TIMESTAMP WITH TIME ZONE NOT NULL,
19 |     "created_by" VARCHAR NOT NULL
20 | 
21 | );
22 | 
23 | ALTER TABLE "schemas" ADD CONSTRAINT "schemas_id" PRIMARY KEY("id");
24 | CREATE UNIQUE INDEX "schemas_name_namespace" ON "schemas"("name","namespace");
25 | ALTER TABLE "schemas" ADD CONSTRAINT "schemas_namespace_fk" FOREIGN KEY("namespace") REFERENCES "namespaces"("name");
26 | 
27 | CREATE TABLE "schema_versions" (
28 |      "id" UUID NOT NULL DEFAULT uuid_generate_v4(),
29 |      "schema_id" UUID NOT NULL,
30 |      "version" VARCHAR NOT NULL,
31 |      "schema" VARCHAR NOT NULL,
32 |      "created_on" TIMESTAMP WITH TIME ZONE NOT NULL,
33 |      "created_by" VARCHAR NOT NULL
34 | );
35 | ALTER TABLE "schema_versions" ADD CONSTRAINT "schema_versions_id" PRIMARY KEY("id");
36 | CREATE UNIQUE INDEX "schema_versions_version" ON "schema_versions"("schema_id", "version");
37 | ALTER TABLE "schema_versions" ADD CONSTRAINT "schema_versions_schema_fk" FOREIGN KEY("schema_id") REFERENCES "schemas"("id");


--------------------------------------------------------------------------------
/schemer-registry/src/main/resources/graphql/graphiql.html:
--------------------------------------------------------------------------------
  1 | <!--
  2 |  * LICENSE AGREEMENT For GraphiQL software
  3 |  *
  4 |  * Facebook, Inc. (“Facebook”) owns all right, title and interest, including all
  5 |  * intellectual property and other proprietary rights, in and to the GraphiQL
  6 |  * software. Subject to your compliance with these terms, you are hereby granted a
  7 |  * non-exclusive, worldwide, royalty-free copyright license to (1) use and copy the
  8 |  * GraphiQL software; and (2) reproduce and distribute the GraphiQL software as
  9 |  * part of your own software (“Your Software”). Facebook reserves all rights not
 10 |  * expressly granted to you in this license agreement.
 11 |  *
 12 |  * THE SOFTWARE AND DOCUMENTATION, IF ANY, ARE PROVIDED "AS IS" AND ANY EXPRESS OR
 13 |  * IMPLIED WARRANTIES (INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE) ARE DISCLAIMED. IN NO
 15 |  * EVENT SHALL FACEBOOK OR ITS AFFILIATES, OFFICES, DIRECTORS OR EMPLOYEES BE
 16 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 17 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 18 |  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 19 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 20 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 21 |  * THE USE OF THE SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 22 |  *
 23 |  * You will include in Your Software (e.g., in the file(s), documentation or other
 24 |  * materials accompanying your software): (1) the disclaimer set forth above; (2)
 25 |  * this sentence; and (3) the following copyright notice:
 26 |  *
 27 |  * Copyright (c) 2015, Facebook, Inc. All rights reserved.
 28 | -->
 29 | <!DOCTYPE html>
 30 | <html>
 31 | <head>
 32 |     <style>
 33 |       body {
 34 |         height: 100%;
 35 |         margin: 0;
 36 |         width: 100%;
 37 |         overflow: hidden;
 38 |       }
 39 | 
 40 |       #graphiql {
 41 |         height: 100vh;
 42 |       }
 43 |     </style>
 44 | 
 45 |     <link rel="stylesheet" href="//cdn.jsdelivr.net/graphiql/0.8.0/graphiql.css" />
 46 |     <script src="//cdn.jsdelivr.net/es6-promise/4.0.5/es6-promise.auto.min.js"></script>
 47 |     <script src="//cdn.jsdelivr.net/fetch/0.9.0/fetch.min.js"></script>
 48 |     <script src="//cdn.jsdelivr.net/react/15.3.2/react.min.js"></script>
 49 |     <script src="//cdn.jsdelivr.net/react/15.3.2/react-dom.min.js"></script>
 50 |     <script src="//cdn.jsdelivr.net/graphiql/0.8.0/graphiql.min.js"></script>
 51 | </head>
 52 | <body>
 53 | <div id="graphiql">Loading...</div>
 54 | 
 55 | <script>
 56 | 
 57 |       /**
 58 |        * This GraphiQL example illustrates how to use some of GraphiQL's props
 59 |        * in order to enable reading and updating the URL parameters, making
 60 |        * link sharing of queries a little bit easier.
 61 |        *
 62 |        * This is only one example of this kind of feature, GraphiQL exposes
 63 |        * various React params to enable interesting integrations.
 64 |        */
 65 | 
 66 |       // Parse the search string to get url parameters.
 67 |       var search = window.location.search;
 68 |       var parameters = {};
 69 |       search.substr(1).split('&').forEach(function (entry) {
 70 |         var eq = entry.indexOf('=');
 71 |         if (eq >= 0) {
 72 |           parameters[decodeURIComponent(entry.slice(0, eq))] =
 73 |             decodeURIComponent(entry.slice(eq + 1));
 74 |         }
 75 |       });
 76 | 
 77 |       // if variables was provided, try to format it.
 78 |       if (parameters.variables) {
 79 |         try {
 80 |           parameters.variables =
 81 |             JSON.stringify(JSON.parse(parameters.variables), null, 2);
 82 |         } catch (e) {
 83 |           // Do nothing, we want to display the invalid JSON as a string, rather
 84 |           // than present an error.
 85 |         }
 86 |       }
 87 | 
 88 |       // When the query and variables string is edited, update the URL bar so
 89 |       // that it can be easily shared
 90 |       function onEditQuery(newQuery) {
 91 |         parameters.query = newQuery;
 92 |         updateURL();
 93 |       }
 94 | 
 95 |       function onEditVariables(newVariables) {
 96 |         parameters.variables = newVariables;
 97 |         updateURL();
 98 |       }
 99 | 
100 |       function onEditOperationName(newOperationName) {
101 |         parameters.operationName = newOperationName;
102 |         updateURL();
103 |       }
104 | 
105 |       function updateURL() {
106 |         var newSearch = '?' + Object.keys(parameters).filter(function (key) {
107 |           return Boolean(parameters[key]);
108 |         }).map(function (key) {
109 |           return encodeURIComponent(key) + '=' +
110 |             encodeURIComponent(parameters[key]);
111 |         }).join('&');
112 |         history.replaceState(null, null, newSearch);
113 |       }
114 | 
115 |       // Defines a GraphQL fetcher using the fetch API.
116 |       function graphQLFetcher(graphQLParams) {
117 |         return fetch('/graphql', {
118 |           method: 'post',
119 |           headers: {
120 |             'Accept': 'application/json',
121 |             'Content-Type': 'application/json',
122 |           },
123 |           body: JSON.stringify(graphQLParams),
124 |           credentials: 'include',
125 |         }).then(function (response) {
126 |           return response.text();
127 |         }).then(function (responseBody) {
128 |           try {
129 |             return JSON.parse(responseBody);
130 |           } catch (error) {
131 |             return responseBody;
132 |           }
133 |         });
134 |       }
135 | 
136 |       // Render <GraphiQL /> into the body.
137 |       ReactDOM.render(
138 |         React.createElement(GraphiQL, {
139 |           fetcher: graphQLFetcher,
140 |           query: parameters.query,
141 |           variables: parameters.variables,
142 |           operationName: parameters.operationName,
143 |           onEditQuery: onEditQuery,
144 |           onEditVariables: onEditVariables,
145 |           onEditOperationName: onEditOperationName
146 |         }),
147 |         document.getElementById('graphiql')
148 |       );
149 |     </script>
150 | </body>
151 | </html>


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/actors/InferActor.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.actors
 2 | 
 3 | import akka.actor.{Actor, ActorRef, ActorSystem, Status}
 4 | import akka.event.Logging
 5 | import akka.util.Timeout
 6 | import org.apache.spark.sql.SparkSession
 7 | import schemer._
 8 | import schemer.registry.exceptions.SchemerInferenceException
 9 | 
10 | import scala.concurrent.Future
11 | import scala.util.{Failure, Random, Success}
12 | 
13 | case class JSONSchemaInferenceRequest(paths: Seq[String])
14 | case class AvroSchemaInferenceRequest(paths: Seq[String])
15 | case class ParquetSchemaInferenceRequest(`type`: String, paths: Seq[String])
16 | case class CSVSchemaInferenceRequest(options: CSVOptions, paths: Seq[String])
17 | 
18 | class InferActor(
19 |     implicit val spark: SparkSession,
20 |     implicit val system: ActorSystem,
21 |     implicit val inferTimeout: Timeout
22 | ) extends Actor {
23 |   import context.dispatcher
24 |   val logger = Logging(context.system, this)
25 | 
26 |   def receive = {
27 |     case JSONSchemaInferenceRequest(paths) =>
28 |       inferSchema(sender()) {
29 |         JSONSchema().infer(paths: _*)
30 |       }
31 |     case AvroSchemaInferenceRequest(paths) =>
32 |       inferSchema(sender()) {
33 |         AvroSchema().infer(paths: _*)
34 |       }
35 |     case CSVSchemaInferenceRequest(options, paths) =>
36 |       inferSchema(sender()) {
37 |         CSVSchema(options).infer(paths: _*)
38 |       }
39 |     case ParquetSchemaInferenceRequest(t, paths) =>
40 |       inferSchema(sender()) {
41 |         ParquetSchema(t).infer(paths: _*)
42 |       }
43 |     case _ => logger.info("Unsupported infer request")
44 |   }
45 | 
46 |   def inferSchema(sender: ActorRef)(block: => Any) = {
47 |     val jobGroup = Random.alphanumeric take 12 mkString ""
48 |     logger.info(s"Starting inference for jobGroup $jobGroup")
49 | 
50 |     val inferFuture = Future {
51 |       spark.sparkContext.setJobGroup(jobGroup, jobGroup, true)
52 |       block
53 |     } recoverWith {
54 |       case ex =>
55 |         logger.info(s"Inference for jobGroup $jobGroup failed - ${ex.getMessage}")
56 |         Future.failed(SchemerInferenceException(ex.getMessage))
57 |     }
58 | 
59 |     inferFuture onComplete {
60 |       case Success(r) =>
61 |         logger.info(s"Completing inference for jobGroup $jobGroup")
62 |         sender ! r
63 |       case Failure(f) =>
64 |         sender ! Status.Failure(f)
65 |     }
66 | 
67 |     system.scheduler.scheduleOnce(inferTimeout.duration) {
68 |       logger.info(s"Cancelling jobGroup $jobGroup")
69 |       spark.sparkContext.cancelJobGroup(jobGroup)
70 |     }
71 | 
72 |   }
73 | 
74 |   override def preStart(): Unit =
75 |     logger.info(s"Starting infer actor")
76 | }
77 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/dao/SchemaDao.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.dao
 2 | 
 3 | import java.util.UUID
 4 | 
 5 | import org.joda.time.DateTime
 6 | import schemer.registry.models.{Schema, SchemaVersion}
 7 | import schemer.registry.sql.SqlDatabase
 8 | 
 9 | import scala.concurrent.{ExecutionContext, Future}
10 | 
11 | case class PaginatedFilter(
12 |     id: Option[UUID],
13 |     first: Option[Int],
14 |     after: Option[DateTime],
15 |     last: Option[Int],
16 |     before: Option[DateTime]
17 | ) {
18 |   def take = (last orElse first).filter(_ <= 10).getOrElse(10) + 1
19 | }
20 | 
21 | class SchemaDao(val db: SqlDatabase)(implicit val ec: ExecutionContext) {
22 |   import db.ctx._
23 | 
24 |   val schemas                              = quote(querySchema[Schema]("schemas"))
25 |   def find(id: UUID)                       = run(schemas.filter(c => c.id == lift(id))).map(_.headOption)
26 |   def create(schema: Schema): Future[UUID] = run(schemas.insert(lift(schema)).returning(_.id))
27 |   def all()                                = run(schemas)
28 | 
29 |   val schemaVersions = quote(querySchema[SchemaVersion]("schema_versions"))
30 | 
31 |   def createVersion(schemaVersion: SchemaVersion): Future[UUID] =
32 |     run(schemaVersions.insert(lift(schemaVersion)).returning(_.id))
33 | 
34 |   def findFirstVersions(filter: PaginatedFilter) = {
35 |     val query = quote {
36 |       applyCursors(lift(filter)).sortBy(_.createdOn)(Ord.descNullsLast).take(lift(filter.take))
37 |     }
38 | 
39 |     run(query)
40 |   }
41 | 
42 |   def findLastVersions(filter: PaginatedFilter) = {
43 |     val query = quote {
44 |       applyCursors(lift(filter)).sortBy(_.createdOn)(Ord.ascNullsLast).take(lift(filter.take))
45 |     }
46 | 
47 |     run(query)
48 |   }
49 | 
50 |   private def applyCursors =
51 |     quote { (filter: PaginatedFilter) =>
52 |       schemaVersions
53 |         .filter(
54 |           (version: SchemaVersion) =>
55 |             filter.id.forall(_ == version.schemaId)
56 |               && filter.after > version.createdOn
57 |               && filter.before < version.createdOn
58 |         )
59 |     }
60 | 
61 |   def findLatestVersion(id: UUID) = {
62 |     val query = quote {
63 |       schemaVersions
64 |         .filter(_.schemaId == lift(id))
65 |         .filter { v1 =>
66 |           schemaVersions
67 |             .filter(_.schemaId == lift(id))
68 |             .filter { v2 =>
69 |               v1.id != v2.id && v1.createdOn < v2.createdOn
70 |             }
71 |             .isEmpty
72 |         }
73 |     }
74 | 
75 |     run(query).map(_.headOption)
76 |   }
77 | 
78 |   def findVersion(id: UUID, version: String) = {
79 |     val query = quote {
80 |       schemaVersions.filter(_.version == lift(version)).filter(_.schemaId == lift(id))
81 |     }
82 | 
83 |     run(query).map(_.headOption)
84 |   }
85 | 
86 |   def findVersion(id: UUID) = run(schemaVersions.filter(c => c.id == lift(id))).map(_.headOption)
87 | }
88 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/exceptions/SchemerException.scala:
--------------------------------------------------------------------------------
1 | package schemer.registry.exceptions
2 | 
3 | class SchemerException(message: String) extends Exception(message)
4 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/exceptions/SchemerInferenceException.scala:
--------------------------------------------------------------------------------
1 | package schemer.registry.exceptions
2 | 
3 | case class SchemerInferenceException(message: String)
4 |     extends SchemerException(s"Error while trying to infer schema - $message")
5 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/exceptions/SchemerSchemaCreationException.scala:
--------------------------------------------------------------------------------
1 | package schemer.registry.exceptions
2 | 
3 | case class SchemerSchemaCreationException(message: String)
4 |     extends SchemerException(s"Error while trying to create new schema - $message")
5 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/exceptions/SchemerSchemaVersionCreationException.scala:
--------------------------------------------------------------------------------
1 | package schemer.registry.exceptions
2 | 
3 | case class SchemerSchemaVersionCreationException(message: String)
4 |     extends SchemerException(s"Error while trying to create new schema version - $message")
5 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/CustomGraphQLResolver.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.graphql
 2 | 
 3 | import java.util.UUID
 4 | 
 5 | import sangria.execution.deferred.{Deferred, DeferredResolver}
 6 | import schemer._
 7 | import schemer.registry.models.{SchemaSchemaVersionConnection, SchemaVersion}
 8 | 
 9 | import scala.concurrent.ExecutionContext
10 | 
11 | case class InferCSVSchemaDeferred(options: CSVOptions, paths: Seq[String]) extends Deferred[CSVSchema]
12 | case class InferJSONSchemaDeferred(paths: Seq[String])                     extends Deferred[JSONSchema]
13 | case class InferParquetSchemaDeferred(`type`: String, paths: Seq[String])  extends Deferred[ParquetSchema]
14 | case class InferAvroSchemaDeferred(paths: Seq[String])                     extends Deferred[AvroSchema]
15 | 
16 | case class SchemaVersionsDeferred(
17 |     id: UUID,
18 |     first: Option[Int],
19 |     after: Option[String],
20 |     last: Option[Int],
21 |     before: Option[String]
22 | ) extends Deferred[Seq[SchemaSchemaVersionConnection]]
23 | case class SchemaVersionLatestDeferred(id: UUID) extends Deferred[Option[SchemaVersion]]
24 | 
25 | class CustomGraphQLResolver extends DeferredResolver[GraphQLService] {
26 |   override def resolve(deferred: Vector[Deferred[Any]], ctx: GraphQLService, queryState: Any)(
27 |       implicit ec: ExecutionContext
28 |   ) = {
29 |     val defMap = deferred.collect {
30 |       case InferCSVSchemaDeferred(options, paths) => "csvSchemaInference"     -> ctx.inferCSVSchema(options, paths)
31 |       case InferJSONSchemaDeferred(paths)         => "jsonSchemaInference"    -> ctx.inferJSONSchema(paths)
32 |       case InferParquetSchemaDeferred(t, paths)   => "parquetSchemaInference" -> ctx.inferParquetSchema(t, paths)
33 |       case InferAvroSchemaDeferred(paths)         => "avroSchemaInference"    -> ctx.inferAvroSchema(paths)
34 |       case SchemaVersionsDeferred(id, first, after, last, before) =>
35 |         "schemaVersions" -> ctx.schemaVersions(id, first, after, last, before)
36 |       case SchemaVersionLatestDeferred(id) => "schemaVersionLatest" -> ctx.latestSchemaVersion(id)
37 |     }
38 | 
39 |     deferred flatMap {
40 |       case InferCSVSchemaDeferred(_, _)          => defMap.filter(_._1 == "csvSchemaInference").map(_._2)
41 |       case InferJSONSchemaDeferred(_)            => defMap.filter(_._1 == "jsonSchemaInference").map(_._2)
42 |       case InferParquetSchemaDeferred(_, _)      => defMap.filter(_._1 == "parquetSchemaInference").map(_._2)
43 |       case InferAvroSchemaDeferred(_)            => defMap.filter(_._1 == "avroSchemaInference").map(_._2)
44 |       case SchemaVersionsDeferred(_, _, _, _, _) => defMap.filter(_._1 == "schemaVersions").map(_._2)
45 |       case SchemaVersionLatestDeferred(_)        => defMap.filter(_._1 == "schemaVersionLatest").map(_._2)
46 |     }
47 |   }
48 | }
49 | 
50 | object CustomGraphQLResolver {
51 |   val deferredResolver: DeferredResolver[GraphQLService] =
52 |     DeferredResolver.fetchersWithFallback(
53 |       new CustomGraphQLResolver
54 |     )
55 | }
56 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/GraphQLService.scala:
--------------------------------------------------------------------------------
  1 | package schemer.registry.graphql
  2 | 
  3 | import java.util.UUID
  4 | 
  5 | import akka.actor.{ActorRef, ActorSystem}
  6 | import akka.pattern.{ask, AskTimeoutException}
  7 | import akka.util.Timeout
  8 | import com.github.mauricio.async.db.postgresql.exceptions.GenericDatabaseException
  9 | import org.apache.spark.sql.SparkSession
 10 | import sangria.macros.derive.GraphQLField
 11 | import schemer._
 12 | import schemer.registry.Cursor
 13 | import schemer.registry.actors._
 14 | import schemer.registry.dao.{PaginatedFilter, SchemaDao}
 15 | import schemer.registry.exceptions.{
 16 |   SchemerException,
 17 |   SchemerInferenceException,
 18 |   SchemerSchemaCreationException,
 19 |   SchemerSchemaVersionCreationException
 20 | }
 21 | import schemer.registry.models._
 22 | import schemer.registry.utils.Clock
 23 | 
 24 | import scala.concurrent.{ExecutionContext, Future}
 25 | import scala.language.postfixOps
 26 | 
 27 | class GraphQLService(
 28 |     schemaDao: SchemaDao,
 29 |     inferActor: ActorRef
 30 | )(
 31 |     implicit val spark: SparkSession,
 32 |     implicit val clock: Clock,
 33 |     implicit val ec: ExecutionContext,
 34 |     implicit val system: ActorSystem,
 35 |     implicit val inferActorTimeout: Timeout
 36 | ) {
 37 | 
 38 |   def inferCSVSchema(options: CSVOptions, paths: Seq[String]) =
 39 |     inferWithActor(CSVSchemaInferenceRequest(options, paths))
 40 | 
 41 |   def inferJSONSchema(paths: Seq[String]) =
 42 |     inferWithActor(JSONSchemaInferenceRequest(paths))
 43 | 
 44 |   def inferParquetSchema(`type`: String, paths: Seq[String]) =
 45 |     inferWithActor(ParquetSchemaInferenceRequest(`type`, paths))
 46 | 
 47 |   def inferAvroSchema(paths: Seq[String]) =
 48 |     inferWithActor(AvroSchemaInferenceRequest(paths))
 49 | 
 50 |   @GraphQLField
 51 |   def addSchema(name: String, namespace: String, `type`: SchemaType, user: String) =
 52 |     schemaDao.create(Schema(name, namespace, `type`.`type`, clock.nowUtc, user)).recoverWith {
 53 |       case ex: GenericDatabaseException =>
 54 |         Future.failed(SchemerSchemaCreationException(ex.asInstanceOf[GenericDatabaseException].errorMessage.message))
 55 |       case ex =>
 56 |         Future.failed(SchemerSchemaCreationException(ex.getMessage))
 57 |     }
 58 | 
 59 |   @GraphQLField
 60 |   def addSchemaVersion(schemaId: UUID, version: String, schemaConfig: String, user: String) =
 61 |     schemaDao
 62 |       .find(schemaId)
 63 |       .flatMap {
 64 |         case Some(schema) =>
 65 |           val errors = Schemer.from(schema.`type`, schemaConfig).validate
 66 |           if (errors.isEmpty) {
 67 |             schemaDao.createVersion(SchemaVersion(null, schema.id, version, schemaConfig, clock.nowUtc, user))
 68 |           } else {
 69 |             Future.failed(
 70 |               SchemerSchemaVersionCreationException(
 71 |                 s"Error(s) validating schema config - ${errors.mkString("[", ", ", "]")}"
 72 |               )
 73 |             )
 74 |           }
 75 |         case None => Future.failed(SchemerSchemaVersionCreationException(s"Schema with id $schemaId not found"))
 76 |       }
 77 |       .recoverWith {
 78 |         case ex: GenericDatabaseException =>
 79 |           Future.failed(
 80 |             SchemerSchemaVersionCreationException(ex.asInstanceOf[GenericDatabaseException].errorMessage.message)
 81 |           )
 82 |         case ex =>
 83 |           Future.failed(SchemerSchemaVersionCreationException(ex.getMessage))
 84 |       }
 85 | 
 86 |   def allSchemas = schemaDao.all()
 87 | 
 88 |   def schema(id: UUID) = schemaDao.find(id)
 89 | 
 90 |   def schemaVersion(id: UUID) = schemaDao.findVersion(id)
 91 | 
 92 |   def schemaVersions(id: UUID, first: Option[Int], after: Option[Cursor], last: Option[Int], before: Option[Cursor]) =
 93 |     if (first.nonEmpty && last.nonEmpty) {
 94 |       Future.failed(new SchemerException("Both first and last cannot be specified"))
 95 |     } else {
 96 |       import schemer.registry.utils.DateTimeUtils._
 97 |       val filter =
 98 |         PaginatedFilter(
 99 |           Some(id),
100 |           first,
101 |           after.map(_.toDateTime),
102 |           last,
103 |           before.map(_.toDateTime)
104 |         )
105 | 
106 |       last
107 |         .fold(schemaDao.findFirstVersions(filter))(_ => schemaDao.findLastVersions(filter))
108 |         .map { versions =>
109 |           val pageInfo: PageInfo = buildPageInfo(first, last, versions.length)
110 |           val finalVersions      = Option(pageInfo.hasMore).filter(identity).fold(versions)(_ => versions.dropRight(1))
111 |           SchemaSchemaVersionConnection(
112 |             pageInfo,
113 |             finalVersions.map { version =>
114 |               SchemaSchemaVersionEdge(version.createdOn.toCursor, version)
115 |             }
116 |           )
117 |         }
118 |     }
119 | 
120 |   private def buildPageInfo(first: Option[Int], last: Option[Int], count: Int) =
121 |     PageInfo(first.exists(count > _), last.exists(count > _))
122 | 
123 |   def latestSchemaVersion(id: UUID) = schemaDao.findLatestVersion(id)
124 | 
125 |   def inferWithActor(message: Any) =
126 |     (inferActor ? message).recoverWith {
127 |       case ex: SchemerInferenceException =>
128 |         Future.failed(ex)
129 |       case _: AskTimeoutException =>
130 |         Future.failed(SchemerInferenceException("Timeout while trying to infer schema"))
131 |       case ex =>
132 |         Future.failed(SchemerInferenceException(ex.getMessage))
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/schema/GraphQLCustomTypes.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.graphql.schema
 2 | 
 3 | import java.util.UUID
 4 | 
 5 | import org.joda.time.format.ISODateTimeFormat
 6 | import org.joda.time.{DateTime, DateTimeZone}
 7 | import sangria.ast
 8 | import sangria.schema.ScalarType
 9 | import sangria.validation.ValueCoercionViolation
10 | 
11 | import scala.util.{Failure, Success, Try}
12 | 
13 | trait GraphQLCustomTypes {
14 |   case object DateCoercionViolation extends ValueCoercionViolation("Date value expected")
15 | 
16 |   def parseDate(s: String) = Try(new DateTime(s, DateTimeZone.UTC)) match {
17 |     case Success(date) => Right(date)
18 |     case Failure(_)    => Left(DateCoercionViolation)
19 |   }
20 | 
21 |   def parseUUID(s: String) = Try(UUID.fromString(s)) match {
22 |     case Success(uuid) => Right(uuid)
23 |     case Failure(_)    => Left(DateCoercionViolation)
24 |   }
25 | 
26 |   implicit val DateTimeType = ScalarType[DateTime](
27 |     "DateTime",
28 |     coerceOutput = (date: DateTime, _) => ast.StringValue(ISODateTimeFormat.dateTime().print(date)),
29 |     coerceUserInput = {
30 |       case s: String => parseDate(s)
31 |       case _         => Left(DateCoercionViolation)
32 |     },
33 |     coerceInput = {
34 |       case ast.StringValue(s, _, _) => parseDate(s)
35 |       case _                        => Left(DateCoercionViolation)
36 |     }
37 |   )
38 | 
39 |   implicit val UUIDType = ScalarType[UUID](
40 |     "UUID",
41 |     coerceOutput = (uuid: UUID, _) => ast.StringValue(uuid.toString),
42 |     coerceUserInput = {
43 |       case s: String => parseUUID(s)
44 |       case _         => Left(DateCoercionViolation)
45 |     },
46 |     coerceInput = {
47 |       case ast.StringValue(s, _, _) => parseUUID(s)
48 |       case _                        => Left(DateCoercionViolation)
49 |     }
50 |   )
51 | }
52 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/schema/InferType.scala:
--------------------------------------------------------------------------------
  1 | package schemer.registry.graphql.schema
  2 | 
  3 | import sangria.schema._
  4 | import schemer.registry.graphql.schema.SchemaDefinition.constantComplexity
  5 | import sangria.macros.derive.{deriveInputObjectType, deriveObjectType, InputObjectTypeName}
  6 | import schemer.registry.graphql._
  7 | import spray.json.DefaultJsonProtocol
  8 | import sangria.marshalling.sprayJson._
  9 | import schemer._
 10 | 
 11 | trait JSONSchemaType {
 12 |   implicit val JSONSchemaType = ObjectType(
 13 |     "JSONSchema",
 14 |     "JSON Schema",
 15 |     fields[Unit, JSONSchema](
 16 |       Field(
 17 |         "schema",
 18 |         StringType,
 19 |         description = Some("CSV Schema as JSON string"),
 20 |         complexity = constantComplexity(10),
 21 |         resolve = ctx => ctx.value.schema
 22 |       ),
 23 |       Field(
 24 |         "sparkSchema",
 25 |         StringType,
 26 |         description = Some("Spark Schema as JSON string"),
 27 |         complexity = constantComplexity(100),
 28 |         resolve = ctx => ctx.value.sparkSchema().prettyJson
 29 |       )
 30 |     )
 31 |   )
 32 | }
 33 | 
 34 | trait InferType extends JSONSchemaType with DefaultJsonProtocol {
 35 |   lazy implicit val TypeArg             = Argument("type", ParquetSchemaUnderlyingType)
 36 |   lazy implicit val PathsArg            = Argument("paths", ListInputType(StringType))
 37 |   implicit val CSVOptionsFormat         = jsonFormat5(CSVOptions.apply)
 38 |   lazy implicit val CSVOptionsInputType = deriveInputObjectType[CSVOptions](InputObjectTypeName("CSVOptionsInput"))
 39 |   lazy implicit val CSVOptionsArg       = Argument("csvOptions", OptionInputType(CSVOptionsInputType), CSVOptions())
 40 | 
 41 |   lazy implicit val CSVFieldType   = deriveObjectType[Unit, CSVField]()
 42 |   lazy implicit val CSVOptionsType = deriveObjectType[Unit, CSVOptions]()
 43 |   lazy val CSVSchemaType = ObjectType(
 44 |     "CSVSchema",
 45 |     "CSV Schema",
 46 |     fields[Unit, CSVSchema](
 47 |       Field(
 48 |         "fields",
 49 |         ListType(CSVFieldType),
 50 |         description = Some("Fields of the CSV Schema"),
 51 |         complexity = constantComplexity(1),
 52 |         resolve = ctx => ctx.value.fields
 53 |       ),
 54 |       Field(
 55 |         "options",
 56 |         CSVOptionsType,
 57 |         description = Some("Options of the CSV Schema"),
 58 |         complexity = constantComplexity(1),
 59 |         resolve = ctx => ctx.value.options
 60 |       ),
 61 |       Field(
 62 |         "schema",
 63 |         StringType,
 64 |         description = Some("CSV Schema as JSON string"),
 65 |         complexity = constantComplexity(100),
 66 |         resolve = ctx => ctx.value.schema()
 67 |       ),
 68 |       Field(
 69 |         "sparkSchema",
 70 |         StringType,
 71 |         description = Some("Spark Schema as JSON string"),
 72 |         complexity = constantComplexity(100),
 73 |         resolve = ctx => ctx.value.sparkSchema().prettyJson
 74 |       )
 75 |     )
 76 |   )
 77 | 
 78 |   lazy val ParquetSchemaUnderlyingType = EnumType(
 79 |     "ParquetSchemaType",
 80 |     Some("Supported schema types for Parquet"),
 81 |     List(
 82 |       EnumValue("Avro", value = schemer.ParquetSchemaType.Avro.`type`),
 83 |       EnumValue("Csv", value = schemer.ParquetSchemaType.Csv.`type`),
 84 |       EnumValue("Json", value = schemer.ParquetSchemaType.Json.`type`)
 85 |     )
 86 |   )
 87 | 
 88 |   lazy val ParquetSchemaType = ObjectType(
 89 |     "ParquetSchema",
 90 |     "Parquet Schema",
 91 |     fields[Unit, ParquetSchema](
 92 |       Field(
 93 |         "type",
 94 |         ParquetSchemaUnderlyingType,
 95 |         description = Some("Parquet Schema type"),
 96 |         complexity = constantComplexity(10),
 97 |         resolve = ctx => ctx.value.`type`.`type`
 98 |       ),
 99 |       Field(
100 |         "schema",
101 |         StringType,
102 |         description = Some("Parquet Schema as JSON string"),
103 |         complexity = constantComplexity(10),
104 |         resolve = ctx => ctx.value.schema
105 |       ),
106 |       Field(
107 |         "sparkSchema",
108 |         StringType,
109 |         description = Some("Spark Schema as JSON string"),
110 |         complexity = constantComplexity(100),
111 |         resolve = ctx => ctx.value.sparkSchema().prettyJson
112 |       )
113 |     )
114 |   )
115 | 
116 |   lazy val AvroSchemaType = ObjectType(
117 |     "AvroSchema",
118 |     "Avro Schema",
119 |     fields[Unit, AvroSchema](
120 |       Field(
121 |         "schema",
122 |         StringType,
123 |         description = Some("Avro Schema as string"),
124 |         complexity = constantComplexity(10),
125 |         resolve = ctx => ctx.value.schema
126 |       ),
127 |       Field(
128 |         "sparkSchema",
129 |         StringType,
130 |         description = Some("Spark Schema as JSON string"),
131 |         complexity = constantComplexity(100),
132 |         resolve = ctx => ctx.value.sparkSchema().prettyJson
133 |       )
134 |     )
135 |   )
136 | 
137 |   lazy val InferType = ObjectType(
138 |     "Inference",
139 |     "Schema Inference",
140 |     fields[GraphQLService, Unit](
141 |       Field(
142 |         "csv",
143 |         CSVSchemaType,
144 |         description = Some("CSV Schema inference"),
145 |         complexity = constantComplexity(500),
146 |         resolve = ctx => InferCSVSchemaDeferred(ctx arg CSVOptionsArg, ctx arg PathsArg),
147 |         arguments = List(CSVOptionsArg, PathsArg)
148 |       ),
149 |       Field(
150 |         "json",
151 |         JSONSchemaType,
152 |         description = Some("JSON Schema inference"),
153 |         complexity = constantComplexity(500),
154 |         resolve = ctx => InferJSONSchemaDeferred(ctx arg PathsArg),
155 |         arguments = List(PathsArg)
156 |       ),
157 |       Field(
158 |         "parquet",
159 |         ParquetSchemaType,
160 |         description = Some("Parquet Schema inference"),
161 |         complexity = constantComplexity(500),
162 |         resolve = ctx => InferParquetSchemaDeferred(ctx arg TypeArg, ctx arg PathsArg),
163 |         arguments = List(TypeArg, PathsArg)
164 |       ),
165 |       Field(
166 |         "avro",
167 |         AvroSchemaType,
168 |         description = Some("Avro Schema inference"),
169 |         complexity = constantComplexity(500),
170 |         resolve = ctx => InferAvroSchemaDeferred(ctx arg PathsArg),
171 |         arguments = List(PathsArg)
172 |       )
173 |     )
174 |   )
175 | }
176 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/schema/MetadataType.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.graphql.schema
 2 | 
 3 | import buildinfo.BuildInfo
 4 | import sangria.macros.derive.deriveObjectType
 5 | import sangria.schema.ObjectType
 6 | 
 7 | case class Metadata(version: String = BuildInfo.version)
 8 | 
 9 | trait MetadataType {
10 |   lazy val MetadataType: ObjectType[Unit, Metadata] = deriveObjectType()
11 | }
12 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/schema/MutationType.scala:
--------------------------------------------------------------------------------
1 | package schemer.registry.graphql.schema
2 | 
3 | import sangria.macros.derive.deriveContextObjectType
4 | import schemer.registry.graphql.GraphQLService
5 | 
6 | trait MutationType extends JSONSchemaType with SchemaType with GraphQLCustomTypes {
7 |   val MutationType = deriveContextObjectType[GraphQLService, GraphQLService, Unit](identity)
8 | }
9 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/schema/SchemaDefinition.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.graphql.schema
 2 | 
 3 | import sangria.schema.{fields, Args, Field, ListType, ObjectType, OptionType, Schema}
 4 | import schemer.registry.graphql.GraphQLService
 5 | import schemer.registry.models.{Schema => SSchema}
 6 | 
 7 | object SchemaDefinition extends InferType with MetadataType with MutationType with SchemaType with GraphQLCustomTypes {
 8 | 
 9 |   def constantComplexity[Ctx](complexity: Double) =
10 |     Some((_: Ctx, _: Args, child: Double) => child + complexity)
11 | 
12 |   val QueryType = ObjectType(
13 |     "Query",
14 |     "Root",
15 |     fields[GraphQLService, Unit](
16 |       Field(
17 |         "schema",
18 |         OptionType(SchemaType),
19 |         description = Some("Schema"),
20 |         resolve = ctx => ctx.ctx.schema(ctx arg IdArg),
21 |         arguments = List(IdArg)
22 |       ),
23 |       Field(
24 |         "schemas",
25 |         ListType(SchemaType),
26 |         description = Some("All Schemas"),
27 |         resolve = ctx => ctx.ctx.allSchemas
28 |       ),
29 |       Field(
30 |         "schemaVersion",
31 |         OptionType(SchemaVersionType),
32 |         description = Some("Schema Version"),
33 |         resolve = ctx => ctx.ctx.schemaVersion(ctx arg IdArg),
34 |         arguments = List(IdArg)
35 |       ),
36 |       Field(
37 |         "infer",
38 |         InferType,
39 |         description = Some("Schema Inference"),
40 |         resolve = _ => ()
41 |       ),
42 |       Field(
43 |         "metadata",
44 |         MetadataType,
45 |         description = Some("Metadata"),
46 |         complexity = constantComplexity(100),
47 |         resolve = _ => Metadata()
48 |       )
49 |     )
50 |   )
51 |   val schema = Schema(QueryType, Some(MutationType))
52 | }
53 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/graphql/schema/SchemaType.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.graphql.schema
 2 | 
 3 | import sangria.macros.derive.deriveObjectType
 4 | import sangria.schema.{Field, ObjectType, _}
 5 | import schemer.{SchemaType => SSchemaType}
 6 | import schemer.registry.graphql.{SchemaVersionLatestDeferred, SchemaVersionsDeferred}
 7 | import schemer.registry.graphql.schema.SchemaDefinition.constantComplexity
 8 | import schemer.registry.models.{
 9 |   PageInfo,
10 |   SchemaSchemaVersionConnection,
11 |   SchemaSchemaVersionEdge,
12 |   SchemaVersion,
13 |   Schema => SSchema
14 | }
15 | 
16 | trait SchemaType extends GraphQLCustomTypes {
17 |   lazy implicit val SchemaTypeType = EnumType[SSchemaType](
18 |     "SchemaType",
19 |     Some("Supported schema types"),
20 |     List(
21 |       EnumValue("Avro", value = SSchemaType.Avro),
22 |       EnumValue("Csv", value = SSchemaType.Csv),
23 |       EnumValue("Json", value = SSchemaType.Json),
24 |       EnumValue("ParquetAvro", value = SSchemaType.ParquetAvro),
25 |       EnumValue("ParquetCsv", value = SSchemaType.ParquetCsv),
26 |       EnumValue("ParquetJson", value = SSchemaType.ParquetJson)
27 |     )
28 |   )
29 |   lazy implicit val IdArg                                                                  = Argument("id", UUIDType)
30 |   lazy implicit val FirstArg                                                               = Argument("first", OptionInputType(IntType))
31 |   lazy implicit val AfterArg                                                               = Argument("after", OptionInputType(StringType))
32 |   lazy implicit val LastArg                                                                = Argument("last", OptionInputType(IntType))
33 |   lazy implicit val BeforeArg                                                              = Argument("before", OptionInputType(StringType))
34 |   lazy implicit val PageInfo: ObjectType[Unit, PageInfo]                                   = deriveObjectType()
35 |   lazy implicit val SchemaVersionType: ObjectType[Unit, SchemaVersion]                     = deriveObjectType()
36 |   lazy implicit val SchemaSchemaVersionEdgeType: ObjectType[Unit, SchemaSchemaVersionEdge] = deriveObjectType()
37 |   lazy implicit val SchemaSchemaVersionConnectionType: ObjectType[Unit, SchemaSchemaVersionConnection] =
38 |     deriveObjectType()
39 | 
40 |   val SchemaType: ObjectType[Unit, SSchema] = ObjectType(
41 |     "Schema",
42 |     "Schema",
43 |     fields[Unit, SSchema](
44 |       Field(
45 |         "id",
46 |         UUIDType,
47 |         resolve = _.value.id
48 |       ),
49 |       Field(
50 |         "name",
51 |         StringType,
52 |         resolve = _.value.name
53 |       ),
54 |       Field(
55 |         "namespace",
56 |         StringType,
57 |         resolve = _.value.namespace
58 |       ),
59 |       Field(
60 |         "type",
61 |         SchemaTypeType,
62 |         resolve = ctx => SSchemaType.supportedTypes.find(_.`type` == ctx.value.`type`).get
63 |       ),
64 |       Field(
65 |         "createdOn",
66 |         DateTimeType,
67 |         resolve = _.value.createdOn
68 |       ),
69 |       Field(
70 |         "createdBy",
71 |         StringType,
72 |         resolve = _.value.createdBy
73 |       ),
74 |       Field(
75 |         "versions",
76 |         ListType(SchemaSchemaVersionConnectionType),
77 |         resolve = ctx =>
78 |           SchemaVersionsDeferred(ctx.value.id, ctx arg FirstArg, ctx arg AfterArg, ctx arg LastArg, ctx arg BeforeArg),
79 |         complexity = constantComplexity(200),
80 |         arguments = List(FirstArg, AfterArg, LastArg, BeforeArg)
81 |       ),
82 |       Field(
83 |         "latestVersion",
84 |         OptionType(SchemaVersionType),
85 |         resolve = ctx => SchemaVersionLatestDeferred(ctx.value.id),
86 |         complexity = constantComplexity(200)
87 |       )
88 |     )
89 |   )
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/models/Schema.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.models
 2 | 
 3 | import java.util.UUID
 4 | 
 5 | import org.joda.time.DateTime
 6 | 
 7 | case class Schema(
 8 |     id: UUID,
 9 |     name: String,
10 |     namespace: String,
11 |     `type`: String,
12 |     createdOn: DateTime,
13 |     createdBy: String
14 | )
15 | 
16 | object Schema {
17 |   def apply(name: String, namespace: String, `type`: String, createdOn: DateTime, createdBy: String) =
18 |     new Schema(null, name, namespace, `type`, createdOn, createdBy)
19 | }
20 | 
21 | case class SchemaVersion(
22 |     id: UUID,
23 |     schemaId: UUID,
24 |     version: String,
25 |     schema: String,
26 |     createdOn: DateTime,
27 |     createdBy: String
28 | )
29 | case class PageInfo(hasNextPage: Boolean, hasPreviousPage: Boolean) {
30 |   def hasMore = hasNextPage || hasPreviousPage
31 | }
32 | case class SchemaSchemaVersionEdge(cursor: String, node: SchemaVersion)
33 | case class SchemaSchemaVersionConnection(pageInfo: PageInfo, edges: List[SchemaSchemaVersionEdge])
34 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/package.scala:
--------------------------------------------------------------------------------
1 | package schemer
2 | 
3 | package object registry {
4 |   type Cursor = String
5 | }
6 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/routes/GraphQLRoutes.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.routes
 2 | 
 3 | import akka.http.scaladsl.model.StatusCodes.{BadRequest, InternalServerError, OK}
 4 | import akka.http.scaladsl.server.Directives.{as, complete, entity, get, getFromResource, path, post}
 5 | import sangria.execution._
 6 | import sangria.parser.QueryParser
 7 | import sangria.schema.Schema
 8 | import spray.json.{JsObject, JsString, JsValue}
 9 | import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
10 | import akka.http.scaladsl.server.Directives._
11 | import sangria.marshalling.sprayJson._
12 | import schemer.registry.exceptions.SchemerException
13 | import schemer.registry.graphql.{CustomGraphQLResolver, GraphQLService}
14 | import schemer.registry.graphql.schema.SchemaDefinition
15 | 
16 | import scala.util.{Failure, Success}
17 | import scala.concurrent.ExecutionContext.Implicits.global
18 | 
19 | trait GraphQLRoutes {
20 |   val graphQLService: GraphQLService
21 | 
22 |   case object TooComplexQuery extends Exception
23 |   val rejectComplexQueries = QueryReducer.rejectComplexQueries(
24 |     1000,
25 |     (_: Double, _: GraphQLService) => TooComplexQuery
26 |   )
27 | 
28 |   val graphQLExceptionHandler: Executor.ExceptionHandler = {
29 |     case (_, TooComplexQuery)     => HandledException("Too complex query. Please reduce the field selection.")
30 |     case (_, e: SchemerException) => HandledException(e.getMessage)
31 |   }
32 | 
33 |   def executeGraphQLQuery(schema: Schema[GraphQLService, Unit], requestJson: JsValue) = {
34 |     val JsObject(fields) = requestJson
35 | 
36 |     val JsString(query) = fields("query")
37 | 
38 |     val operation = fields.get("operationName") collect {
39 |       case JsString(op) => op
40 |     }
41 | 
42 |     val vars = fields.get("variables") match {
43 |       case Some(obj: JsObject) => obj
44 |       case _                   => JsObject.empty
45 |     }
46 | 
47 |     QueryParser.parse(query) match {
48 | 
49 |       case Success(queryDocument) =>
50 |         complete(
51 |           Executor
52 |             .execute(
53 |               schema,
54 |               queryDocument,
55 |               graphQLService,
56 |               deferredResolver = CustomGraphQLResolver.deferredResolver,
57 |               variables = vars,
58 |               operationName = operation,
59 |               queryReducers = rejectComplexQueries :: Nil,
60 |               exceptionHandler = graphQLExceptionHandler
61 |             )
62 |             .map(OK -> _)
63 |             .recover {
64 |               case error: QueryAnalysisError => BadRequest          -> error.resolveError
65 |               case error: ErrorWithResolver  => InternalServerError -> error.resolveError
66 |             }
67 |         )
68 | 
69 |       case Failure(error) =>
70 |         complete(BadRequest -> JsObject("error" -> JsString(error.getMessage)))
71 |     }
72 |   }
73 | 
74 |   val graphQLRoutes = path("graphql") {
75 |     post {
76 |       entity(as[JsValue]) { requestJson =>
77 |         executeGraphQLQuery(SchemaDefinition.schema, requestJson)
78 |       }
79 |     } ~ get {
80 |       getFromResource("graphql/graphiql.html")
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/routes/HealthRoutes.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.routes
 2 | 
 3 | import java.io.{StringWriter, Writer}
 4 | import java.util
 5 | 
 6 | import akka.http.scaladsl.model.{HttpCharsets, HttpEntity, MediaType}
 7 | import akka.http.scaladsl.server.Directives._
 8 | import io.prometheus.client.Collector.MetricFamilySamples
 9 | import io.prometheus.client.CollectorRegistry
10 | import io.prometheus.client.exporter.common.TextFormat
11 | import io.prometheus.client.hotspot.DefaultExports
12 | 
13 | trait HealthRoutes {
14 | 
15 |   DefaultExports.initialize()
16 |   private val collectorRegistry      = CollectorRegistry.defaultRegistry
17 |   private val metricsMediaTypeParams = Map("version" -> "0.0.4")
18 |   private val metricsMediaType =
19 |     MediaType.customWithFixedCharset("text", "plain", HttpCharsets.`UTF-8`, params = metricsMediaTypeParams)
20 | 
21 |   def toPrometheusTextFormat(e: util.Enumeration[MetricFamilySamples]): String = {
22 |     val writer: Writer = new StringWriter()
23 |     TextFormat.write004(writer, e)
24 | 
25 |     writer.toString
26 |   }
27 | 
28 |   val healthRoutes = path("health") {
29 |     get {
30 |       complete {
31 |         "OK"
32 |       }
33 |     }
34 |   } ~ path("metrics") {
35 |     get {
36 |       complete {
37 |         HttpEntity(metricsMediaType, toPrometheusTextFormat(collectorRegistry.metricFamilySamples()))
38 |       }
39 |     }
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/routes/Routes.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.routes
 2 | 
 3 | import akka.http.scaladsl.model.{StatusCodes, Uri}
 4 | import akka.http.scaladsl.server.Directives._
 5 | import akka.http.scaladsl.server.{ExceptionHandler, RejectionHandler}
 6 | import com.typesafe.scalalogging.StrictLogging
 7 | 
 8 | trait Routes extends GraphQLRoutes with HealthRoutes with StrictLogging {
 9 |   private val exceptionHandler = ExceptionHandler {
10 |     case e: Exception =>
11 |       logger.error(s"Exception during client request processing: ${e.getMessage}", e)
12 |       _.complete((StatusCodes.InternalServerError, "Internal server error"))
13 |   }
14 |   val rejectionHandler  = RejectionHandler.default
15 |   val logBlackListPaths = Seq("health")
16 |   private def isBlacklistedPath(uri: Uri) =
17 |     logBlackListPaths
18 |       .map(s"/" + _)
19 |       .exists(uri.toString().contains)
20 |   val logDuration = extractRequestContext.flatMap { ctx =>
21 |     val start = System.currentTimeMillis()
22 |     mapResponse { resp =>
23 |       val d = System.currentTimeMillis() - start
24 |       if (!isBlacklistedPath(ctx.request.uri)) {
25 |         logger.info(s"[${resp.status.intValue()}] ${ctx.request.method.name} ${ctx.request.uri} took: ${d}ms")
26 |       }
27 |       resp
28 |     } & handleRejections(rejectionHandler)
29 |   }
30 |   val routes = logDuration {
31 |     handleExceptions(exceptionHandler) {
32 |       encodeResponse {
33 |         graphQLRoutes ~ healthRoutes
34 |       }
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/routes/SwaggerRoutes.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.routes
 2 | 
 3 | import akka.http.scaladsl.model.StatusCodes
 4 | import akka.http.scaladsl.server.Directives._
 5 | 
 6 | trait SwaggerRoutes {
 7 | 
 8 |   val swaggerRoutes = pathPrefix("swagger") {
 9 |     pathEnd {
10 |       extractUri { uri =>
11 |         redirect(uri + "/", StatusCodes.TemporaryRedirect)
12 |       }
13 |     } ~
14 |       pathSingleSlash {
15 |         getFromResource("swagger-ui/index.html")
16 |       } ~
17 |       getFromResourceDirectory("swagger-ui")
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/server/ConfigWithDefault.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.server
 2 | 
 3 | import java.net.InetAddress
 4 | import java.util.concurrent.TimeUnit
 5 | 
 6 | import com.typesafe.config.{Config, ConfigFactory}
 7 | 
 8 | trait ConfigWithDefault {
 9 | 
10 |   def rootConfig: Config
11 | 
12 |   def getBoolean(path: String, default: Boolean) = ifHasPath(path, default) { _.getBoolean(path) }
13 |   def getString(path: String, default: String)   = ifHasPath(path, default) { _.getString(path) }
14 |   def getInt(path: String, default: Int)         = ifHasPath(path, default) { _.getInt(path) }
15 |   def getConfig(path: String, default: Config)   = ifHasPath(path, default) { _.getConfig(path) }
16 |   def getMilliseconds(path: String, default: Long) = ifHasPath(path, default) {
17 |     _.getDuration(path, TimeUnit.MILLISECONDS)
18 |   }
19 |   def getOptionalString(path: String, default: Option[String] = None) = getOptional(path) { _.getString(path) }
20 | 
21 |   def loadDefault(rootName: String, loadEnvConf: Boolean = true) =
22 |     if (loadEnvConf) {
23 |       ConfigFactory
24 |         .parseResources(s"env-conf/$getHostname.conf")
25 |         .withFallback(ConfigFactory.load())
26 |         .getConfig(rootName)
27 |     } else {
28 |       ConfigFactory.load().getConfig(rootName)
29 |     }
30 | 
31 |   protected def getHostname = InetAddress.getLocalHost.getHostName
32 | 
33 |   private def ifHasPath[T](path: String, default: T)(get: Config => T): T =
34 |     if (rootConfig.hasPath(path)) get(rootConfig) else default
35 | 
36 |   private def getOptional[T](fullPath: String, default: Option[T] = None)(get: Config => T) =
37 |     if (rootConfig.hasPath(fullPath)) {
38 |       Some(get(rootConfig))
39 |     } else {
40 |       default
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/server/InferenceConfig.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.server
 2 | 
 3 | import com.typesafe.config.Config
 4 | import java.util.concurrent.TimeUnit.SECONDS
 5 | import scala.concurrent.duration._
 6 | 
 7 | trait InferenceConfig extends ConfigWithDefault {
 8 |   def rootConfig: Config
 9 |   lazy val inferenceConfig = rootConfig.getConfig("inference")
10 |   lazy val inferTimeout    = inferenceConfig.getDuration("timeout", SECONDS).seconds
11 | }
12 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/server/Main.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.server
 2 | 
 3 | import akka.actor.ActorSystem
 4 | import akka.http.scaladsl.Http
 5 | import akka.http.scaladsl.Http.ServerBinding
 6 | import akka.stream.ActorMaterializer
 7 | import com.typesafe.scalalogging.StrictLogging
 8 | import schemer.registry.routes.Routes
 9 | 
10 | import scala.concurrent.ExecutionContext.Implicits.global
11 | import scala.concurrent.Future
12 | import scala.util.{Failure, Success}
13 | 
14 | class Main() extends StrictLogging {
15 | 
16 |   def start(): (Future[ServerBinding], Modules) = {
17 | 
18 |     implicit val _system: ActorSystem             = ActorSystem("main")
19 |     implicit val _materializer: ActorMaterializer = ActorMaterializer()
20 | 
21 |     val modules = new Modules with Routes {
22 |       implicit lazy val ec  = _system.dispatcher
23 |       implicit lazy val mat = _materializer
24 |       lazy val system       = _system
25 | 
26 |     }
27 | 
28 |     (Http().bindAndHandle(modules.routes, modules.config.serverHost, modules.config.serverPort), modules)
29 |   }
30 | }
31 | 
32 | object Main extends App with StrictLogging {
33 |   val (startFuture, modules) = new Main().start()
34 | 
35 |   val host = modules.config.serverHost
36 |   val port = modules.config.serverPort
37 | 
38 |   val system = modules.system
39 | 
40 |   startFuture.onComplete {
41 |     case Success(b) =>
42 |       logger.info(s"Server started on $host:$port")
43 |       sys.addShutdownHook {
44 |         b.unbind()
45 |         shutdown()
46 |       }
47 |     case Failure(e) =>
48 |       logger.error(s"Cannot start server on $host:$port", e)
49 |       sys.addShutdownHook {
50 |         shutdown()
51 |       }
52 |   }
53 | 
54 |   def shutdown() {
55 |     modules.system.terminate()
56 |     logger.info("Server stopped")
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/server/Modules.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.server
 2 | 
 3 | import akka.actor.{ActorSystem, Props}
 4 | import akka.routing.BalancingPool
 5 | import akka.stream.Materializer
 6 | import akka.util.Timeout
 7 | import com.typesafe.config.Config
 8 | import org.apache.spark.SparkConf
 9 | import org.apache.spark.sql.SparkSession
10 | import schemer.registry.actors.InferActor
11 | import schemer.registry.dao.SchemaDao
12 | import schemer.registry.graphql.GraphQLService
13 | import schemer.registry.sql.{DatabaseConfig, SqlDatabase}
14 | import schemer.registry.utils.RealTimeClock
15 | 
16 | import scala.concurrent.ExecutionContext
17 | import scala.concurrent.duration._
18 | 
19 | trait Modules {
20 | 
21 |   implicit def system: ActorSystem
22 | 
23 |   implicit def ec: ExecutionContext
24 | 
25 |   implicit def mat: Materializer
26 | 
27 |   lazy val config = new ServerConfig with DatabaseConfig with InferenceConfig {
28 |     override def rootConfig: Config = loadDefault("registry")
29 |   }
30 | 
31 |   implicit lazy val clock = RealTimeClock
32 | 
33 |   implicit val spark: SparkSession = SparkSession.builder
34 |     .config(new SparkConf())
35 |     .master("local[*]")
36 |     .getOrCreate()
37 | 
38 |   val hadoopConf = spark.sparkContext.hadoopConfiguration
39 | 
40 |   val sqlDatabase = SqlDatabase(config)
41 |   sqlDatabase.updateSchema()
42 | 
43 |   lazy val schemaDao = new SchemaDao(sqlDatabase)
44 |   lazy val inferActor = locally {
45 |     implicit lazy val inferTimeout = Timeout(config.inferTimeout)
46 |     system.actorOf(Props(new InferActor()).withRouter(BalancingPool(nrOfInstances = 10)), name = "InferActor")
47 |   }
48 |   lazy val graphQLService = locally {
49 |     implicit lazy val inferActorTimeout = Timeout(config.inferTimeout + 20.seconds)
50 |     new GraphQLService(schemaDao, inferActor)
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/server/ServerConfig.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.server
 2 | 
 3 | import com.typesafe.config.Config
 4 | 
 5 | trait ServerConfig extends ConfigWithDefault {
 6 | 
 7 |   def rootConfig: Config
 8 | 
 9 |   lazy val serverHost: String = rootConfig.getString("server.host")
10 |   lazy val serverPort: Int    = rootConfig.getInt("server.port")
11 | }
12 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/sql/DatabaseConfig.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.sql
 2 | 
 3 | import com.typesafe.config.Config
 4 | import schemer.registry.server.ConfigWithDefault
 5 | 
 6 | trait DatabaseConfig extends ConfigWithDefault {
 7 |   def rootConfig: Config
 8 | 
 9 |   val h2config       = rootConfig.getConfig("h2")
10 |   val postgresConfig = rootConfig.getConfig("postgres")
11 | }
12 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/sql/SqlDatabase.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.sql
 2 | 
 3 | import io.getquill.{PostgresAsyncContext, SnakeCase}
 4 | import org.apache.commons.lang3.StringUtils
 5 | import org.flywaydb.core.Flyway
 6 | import org.joda.time.DateTime
 7 | 
 8 | trait Quotes { this: PostgresAsyncContext[_] =>
 9 |   implicit class DateTimeQuotes(l: DateTime) {
10 |     def >(r: DateTime) = quote(infix"$l > $r".as[Boolean])
11 |     def <(r: DateTime) = quote(infix"$l < $r".as[Boolean])
12 |   }
13 | 
14 |   implicit class OptDateTimeQuotes(l: Option[DateTime]) {
15 |     def >(r: DateTime) = quote(infix"($l::timestamptz is null or $l > $r)".as[Boolean])
16 |     def <(r: DateTime) = quote(infix"($l::timestamptz is null or $l < $r)".as[Boolean])
17 |   }
18 | }
19 | 
20 | case class SqlDatabase(config: DatabaseConfig) {
21 |   lazy val ctx = new PostgresAsyncContext(SnakeCase, config.postgresConfig) with Quotes
22 | 
23 |   def updateSchema() = {
24 |     val postgresUrl = config.postgresConfig.getString("url")
25 |     if (StringUtils.isNotEmpty(postgresUrl)) {
26 |       val flyway = new Flyway()
27 |       flyway.setOutOfOrder(true)
28 |       flyway.setDataSource(s"jdbc:$postgresUrl", "", "")
29 |       flyway.migrate()
30 |     }
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/sql/package.scala:
--------------------------------------------------------------------------------
1 | package schemer.registry
2 | 
3 | import io.getquill.{PostgresAsyncContext, SnakeCase}
4 | 
5 | package object sql {
6 |   type DbContext = PostgresAsyncContext[SnakeCase]
7 | }
8 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/utils/Clock.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.utils
 2 | 
 3 | import org.joda.time.{DateTime, DateTimeZone, Duration}
 4 | import org.joda.time.format.PeriodFormatterBuilder
 5 | 
 6 | trait Clock {
 7 |   def now: DateTime
 8 |   def nowUtc: DateTime
 9 |   def nowMillis: Long
10 | }
11 | 
12 | object RealTimeClock extends Clock with Serializable {
13 |   def now       = DateTime.now()
14 |   def nowUtc    = DateTime.now(DateTimeZone.UTC)
15 |   def nowMillis = System.currentTimeMillis()
16 | }
17 | 
18 | class FixtureTimeClock(millis: Long) extends Clock with Serializable {
19 |   def now       = new DateTime(millis)
20 |   def nowUtc    = new DateTime(millis, DateTimeZone.UTC)
21 |   def nowMillis = millis
22 | }
23 | 
24 | class FormatDuration() {
25 |   def format(time: Duration): String = {
26 |     val period = time.toPeriod()
27 |     val hms = new PeriodFormatterBuilder()
28 |       .printZeroAlways()
29 |       .appendHours()
30 |       .appendSeparator(" hours ")
31 |       .appendMinutes()
32 |       .appendSeparator(" minutes ")
33 |       .appendSeconds()
34 |       .appendSuffix(" seconds")
35 |       .toFormatter()
36 |     hms.print(period)
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/schemer-registry/src/main/scala/schemer/registry/utils/DateTimeUtils.scala:
--------------------------------------------------------------------------------
 1 | package schemer.registry.utils
 2 | 
 3 | import java.nio.charset.StandardCharsets
 4 | import java.util.Base64
 5 | 
 6 | import org.joda.time.DateTime
 7 | import schemer.registry.Cursor
 8 | 
 9 | object DateTimeUtils {
10 |   implicit class DateTimeCursor(val dt: DateTime) {
11 |     def toCursor: Cursor = Base64.getEncoder.encodeToString(dt.getMillis.toString.getBytes(StandardCharsets.UTF_8))
12 |   }
13 | 
14 |   implicit class CursorDateTime(val cursor: Cursor) {
15 |     def toDateTime: DateTime =
16 |       new DateTime(new String(Base64.getDecoder.decode(cursor), StandardCharsets.UTF_8).toLong)
17 |   }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/schemer-ui.md:
--------------------------------------------------------------------------------
 1 | # Schemer UI screens
 2 | 
 3 | ## Browse schemas
 4 | 
 5 | <p align="center">
 6 |     <img src="resources/images/001.png" />
 7 | </p>
 8 | 
 9 | ## Schema Details
10 | 
11 | <p align="center">
12 |     <img src="resources/images/002.png" />
13 | </p>
14 | 
15 | ## JSON representaion of Schema
16 | 
17 | <p align="center">
18 |     <img src="resources/images/003.png" />
19 | </p>
20 | 
21 | ## Create Schema
22 | 
23 | <p align="center">
24 |     <img src="resources/images/004.png" />
25 | </p>
26 | 
27 | ## Create Schema Version
28 | 
29 | <p align="center">
30 |     <img src="resources/images/005.png" />
31 | </p>
32 | 
33 | ## Field definition Wizard
34 | 
35 | <p align="center">
36 |     <img src="resources/images/007.png" />
37 | </p>


--------------------------------------------------------------------------------
/secring.gpg.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indix/schemer/16069813fa20b652f8e7735b64a47519e3ee8f14/secring.gpg.enc


--------------------------------------------------------------------------------
/sonatype.sbt:
--------------------------------------------------------------------------------
1 | credentials += Credentials(
2 |   "Sonatype Nexus Repository Manager",
3 |   "oss.sonatype.org",
4 |   System.getenv("SONATYPE_USERNAME"),
5 |   System.getenv("SONATYPE_PASSWORD")
6 | )
7 | 


--------------------------------------------------------------------------------