├── .DS_Store ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yml │ ├── codeql-analysis.yml │ └── proto-schema-compatibility.yml ├── .gitignore ├── CNAME ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── _config.yml ├── asset ├── ds_classification.png ├── logo copy.png ├── logo.png └── sample_schema.png ├── document.sh ├── install.sh ├── pom.xml ├── score.sh ├── src ├── main │ ├── java │ │ └── org │ │ │ └── schemata │ │ │ ├── SchemataExecutor.java │ │ │ ├── SchemataMain.java │ │ │ ├── app │ │ │ ├── DocumentApp.java │ │ │ ├── SchemaScoreApp.java │ │ │ └── SchemaValidatorApp.java │ │ │ ├── compatibility │ │ │ ├── Result.java │ │ │ ├── SchemaCompatibilityChecker.java │ │ │ └── Summary.java │ │ │ ├── domain │ │ │ ├── Constraints.java │ │ │ ├── Depends.java │ │ │ ├── EventType.java │ │ │ ├── Field.java │ │ │ ├── Link.java │ │ │ ├── ModelType.java │ │ │ ├── Schema.java │ │ │ ├── SchemaType.java │ │ │ └── Subscribers.java │ │ │ ├── exception │ │ │ ├── SchemaNotFoundException.java │ │ │ └── SchemaParserException.java │ │ │ ├── graph │ │ │ ├── SchemaGraph.java │ │ │ └── WeightedSchemaEdge.java │ │ │ ├── json │ │ │ └── Json.java │ │ │ ├── printer │ │ │ └── Console.java │ │ │ ├── provider │ │ │ ├── SchemaParser.java │ │ │ ├── avro │ │ │ │ ├── AvroSchemaCompatibilityChecker.java │ │ │ │ └── AvroSchemaParser.java │ │ │ ├── dbt │ │ │ │ ├── DbtCatalogMetadata.java │ │ │ │ ├── DbtCatalogParser.java │ │ │ │ ├── DbtManifestParser.java │ │ │ │ ├── DbtSchemaCompatibilityChecker.java │ │ │ │ └── DbtSchemaParser.java │ │ │ └── protobuf │ │ │ │ ├── Loader.java │ │ │ │ ├── ProtoFileDescriptorSetLoader.java │ │ │ │ ├── ProtoProcessor.java │ │ │ │ ├── ProtoSchemaCompatibilityChecker.java │ │ │ │ └── ProtoSchemaParser.java │ │ │ └── validate │ │ │ ├── FieldTrigger.java │ │ │ ├── FieldValidator.java │ │ │ ├── Result.java │ │ │ ├── Rules.java │ │ │ ├── SchemaTrigger.java │ │ │ ├── SchemaValidator.java │ │ │ ├── Status.java │ │ │ └── Validator.java │ └── resources │ │ ├── avro │ │ └── brand.avsc │ │ ├── logback.xml │ │ └── schema │ │ ├── brand.proto │ │ ├── campaign.proto │ │ ├── category.proto │ │ ├── product.proto │ │ └── user.proto ├── opencontract │ └── v1 │ │ └── org │ │ └── schemata │ │ └── protobuf │ │ ├── constraints.proto │ │ ├── schemata.proto │ │ └── subscribers.proto └── test │ ├── java │ └── org │ │ └── schemata │ │ ├── ResourceLoader.java │ │ ├── SchemataExecutorTest.java │ │ ├── graph │ │ └── SchemaGraphTest.java │ │ ├── provider │ │ ├── avro │ │ │ └── AvroSchemaParserTest.java │ │ ├── dbt │ │ │ ├── DbtCatalogParserTest.java │ │ │ ├── DbtManifestParserTest.java │ │ │ ├── DbtSchemaCompatibilityCheckerTest.java │ │ │ └── DbtSchemaParserTest.java │ │ └── protobuf │ │ │ ├── ProtoProcessorTest.java │ │ │ └── ProtoSchemaCompatibilityCheckerTest.java │ │ └── validate │ │ ├── FieldValidatorTest.java │ │ ├── RulesTest.java │ │ └── SchemaValidatorTest.java │ └── resources │ ├── avro_schema │ └── brand.avsc │ ├── dbt │ ├── catalog.json │ └── manifest.json │ ├── dbt_change │ ├── catalog.json │ └── manifest.json │ ├── descriptors │ ├── changed_model.desc │ └── model.desc │ └── schema │ └── entities.proto └── validate.sh /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/.DS_Store -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Instructions: 2 | 3 | The PR has to be tagged with at least one of the following labels (*): 4 | feature 5 | bugfix 6 | performance 7 | ui 8 | backward-incompat 9 | release-notes (**) 10 | Remove these instructions before publishing the PR. 11 | (*) Other labels to consider: 12 | 13 | testing 14 | dependencies 15 | docker 16 | kubernetes 17 | observability 18 | security 19 | code-style 20 | extension-point 21 | refactor 22 | cleanup 23 | (**) Use release-notes label for scenarios like: 24 | 25 | New configuration options 26 | Deprecation of configurations 27 | Signature changes to public methods/interfaces 28 | New plugins added or old plugins removed 29 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Java CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | 12 | - name: Set up JDK 17 13 | uses: actions/setup-java@v2 14 | with: 15 | java-version: '17' 16 | distribution: 'adopt' 17 | 18 | - name: Build with Maven 19 | run: mvn --batch-mode --update-snapshots verify 20 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '45 15 * * 5' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'java' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v3 42 | - name: Set up JDK 17 43 | uses: actions/setup-java@v2 44 | with: 45 | java-version: '17' 46 | distribution: 'adopt' 47 | 48 | # Initializes the CodeQL tools for scanning. 49 | - name: Initialize CodeQL 50 | uses: github/codeql-action/init@v2 51 | with: 52 | languages: ${{ matrix.language }} 53 | # If you wish to specify custom queries, you can do so here or in a config file. 54 | # By default, queries listed here will override any specified in a config file. 55 | # Prefix the list here with "+" to use these queries and those in the config file. 56 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 57 | 58 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 59 | # If this step fails, then you should remove it and run the build manually (see below) 60 | - name: Autobuild with Maven 61 | run: mvn --batch-mode --update-snapshots verify 62 | 63 | # ℹ️ Command-line programs to run using the OS shell. 64 | # 📚 https://git.io/JvXDl 65 | 66 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 67 | # and modify them (or add more) to build your code if your project 68 | # uses a compiled language 69 | 70 | #- run: | 71 | # make bootstrap 72 | # make release 73 | 74 | - name: Perform CodeQL Analysis 75 | uses: github/codeql-action/analyze@v2 76 | -------------------------------------------------------------------------------- /.github/workflows/proto-schema-compatibility.yml: -------------------------------------------------------------------------------- 1 | name: ProtoBuf Backward Compatibility check 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | backward_compatibility: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout PR branch 12 | uses: actions/checkout@v2 13 | with: 14 | ref: ${{ github.head_ref }} 15 | token: ${{ secrets.GITHUB_TOKEN }} 16 | fetch-depth: 0 17 | 18 | - name: Install protoc using apt-get 19 | run: | 20 | sudo apt-get update 21 | sudo apt-get install -y protobuf-compiler 22 | protoc --version 23 | 24 | - name: Checkout base branch 25 | uses: actions/checkout@v2 26 | with: 27 | ref: ${{ github.base_ref }} 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | fetch-depth: 0 30 | path: base-branch 31 | 32 | - name: Set up JDK 17 33 | uses: actions/setup-java@v2 34 | with: 35 | distribution: 'temurin' 36 | java-version: '17' 37 | 38 | - name: Run compile command on PR and base branches 39 | run: | 40 | echo "Running compile command on PR branch:" 41 | protoc --proto_path=src/opencontract/v1/org --proto_path=src/main/resources/schema --descriptor_set_out=model.desc --include_imports --include_source_info ./src/main/resources/**/*.proto 42 | echo "Running compile command on base branch:" 43 | cd base-branch 44 | protoc --proto_path=src/opencontract/v1/org --proto_path=src/main/resources/schema --descriptor_set_out=model.desc --include_imports --include_source_info ./src/main/resources/**/*.proto 45 | cd .. 46 | 47 | - name: Download Schemata jar 48 | run: | 49 | curl -L -o schemata.jar https://github.com/ananthdurai/schemata/releases/download/v0.3/schemata.jar 50 | 51 | - name: Run ProtoBuf Backward compatibility changes 52 | run: | 53 | java -jar schemata.jar isBackwardCompatible -s model.desc -b base-branch/model.desc -p protobuf 54 | exit_code=$? 55 | if [ $exit_code -eq 0 ]; then 56 | echo "ProtoBuf Backward compatibility check passed" 57 | else 58 | echo "ProtoBuf Backward compatibility check failed" 59 | java -jar schemata.jar compatibilitySummary -s model.desc -b base-branch/model.desc -p protobuf 60 | exit 1 61 | fi 62 | 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | # IntelliJ project files 18 | .idea/ 19 | 20 | # build output 21 | target/ 22 | *.iml 23 | *.desc 24 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | schemata.app -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | Twitter:@ananthdurai. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ### Create an issue for the change 2 | 3 | Create a Schemata issue [here](https://github.com/ananthdurai/schemata/issues) for the change you would like to make. Provide information on why the change is needed and how you plan to address it. Use the conversations on the issue as a way to validate assumptions and the right way to proceed. 4 | 5 | If you have a design document, please refer to the design documents in your Issue. You may even want to create multiple issues depending on the extent of your change. 6 | 7 | Once you are clear about what you want to do, proceed with the next steps listed below. 8 | 9 | ### Create a branch for your change 10 | 11 | ```text 12 | $ cd schemata 13 | # 14 | # ensure you are starting from the latest code base 15 | # the following steps, ensure your fork's (origin's) master is up-to-date 16 | # 17 | $ git fetch upstream 18 | $ git checkout master 19 | $ git merge upstream/master 20 | # create a branch for your issue 21 | $ git checkout -b 22 | ``` 23 | 24 | Make the necessary changes. If the changes you plan to make are too big, make sure you break it down into smaller tasks. 25 | 26 | ### Making the changes 27 | 28 | Follow the recommendations/best-practices noted here when you are making changes. 29 | 30 | #### Code documentation 31 | 32 | Please ensure your code is adequately documented. Some things to consider for documentation: 33 | 34 | * Always include class level java docs. At the top class level, we are looking for information about what functionality is provided by the class, what state is maintained by the class, whether there are concurrency/thread-safety concerns and any exceptional behavior that the class might exhibit. 35 | * Document public methods and their parameters. 36 | 37 | #### Logging 38 | 39 | * Ensure there is adequate logging for positive paths as well as exceptional paths. As a corollary to this, ensure logs are not noisy. 40 | * Do not use System.out.println to log messages. Use the `slf4j` loggers. 41 | * Use logging levels correctly: set level to `debug` for verbose logs useful for only for debugging. 42 | * Do not log stack traces via `printStackTrace` method of the exception. 43 | 44 | 45 | #### Exceptions and Exception-Handling 46 | 47 | * Where possible, throw specific exceptions, preferably checked exceptions, so the callers can easily determine what the erroneous conditions that need to be handled are. 48 | * Avoid catching broad exceptions (i.e., `catch (Exception e)` blocks), except for when this is in the `run()` method of a thread/runnable. 49 | 50 | Current Schemata code does not strictly adhere to this, but we would like to change this over time and adopt best practices around exception handling. 51 | 52 | #### Backward and Forward compatibility changes 53 | 54 | If you are making any changes to state stored, either in Zookeeper or in segments, make sure you consider both backward and forward compatibility issues. 55 | 56 | * For backward compatibility, consider cases where one component is using the new version and another is still on the old version. E.g., when the request format between broker and server is updated, consider resulting behaviors when a new broker is talking to an older server. Will it break? 57 | * For forward compatibility, consider rollback cases. E.g., consider what happens when state persisted by new code is handled by old code. Does the old code skip over new fields? 58 | 59 | #### External libraries 60 | 61 | Be cautious about pulling in external dependencies. You will need to consider multiple things when faced with a need to pull in a new library. 62 | 63 | * What capability is the addition of the library providing you with? Can existing libraries provide this functionality (may be with a little bit of effort)? 64 | * Is the external library maintained by an active community of contributors? 65 | * What are the licensing terms for the library. For more information about handling licenses 66 | 67 | #### Testing your changes 68 | 69 | Automated tests are always recommended for contributions. Make sure you write tests so that: 70 | 71 | 1. You verify the correctness of your contribution. This serves as proof to you as well as the reviewers. 72 | 73 | Identify a list of tests for the changes you have made. Depending on the scope of changes, you may need one or more of the following tests: 74 | 75 | * Unit Tests 76 | 77 | Make sure your code has the necessary class or method level unit tests. It is important to write both positive case as well as negative case tests. Document your tests well and add meaningful assertions in the tests; when the assertions fail, ensure that the right messages are logged with information that allows other to debug. 78 | 79 | * Integration Tests 80 | 81 | Add integration tests to cover End-to-End paths without relying on _mocking_ (see note below). You `MUST` add integration tests for REST APIs, and must include tests that cover different error codes; i.e., 200 OK, 4xx or 5xx errors that are explicit contracts of the API. 82 | 83 | #### Testing Guidelines 84 | 85 | * **Mocking** 86 | 87 | Use [Mockito](https://site.mockito.org/) to mock classes to control specific behaviors - e.g., simulate various error conditions. 88 | 89 | **DO NOT** use advanced mock libraries such as [PowerMock](https://github.com/powermock/powermock). They make bytecode level changes to allow tests for static/private members but this typically results in other tools like jacoco to fail. They also promote incorrect implementation choices that make it harder to test additional changes. When faced with a choice to use PowerMock or advanced mocking options, you might either need to refactor the code to work better with mocking or you actually need to write an integration test instead of a unit test. 90 | 91 | * **Validate assumptions in tests** 92 | 93 | Make sure that adequate asserts are added in the tests to verify that the tests are passing for the right reasons. 94 | 95 | * **Write reliable tests** 96 | 97 | Make sure you are writing tests that are reliable. If the tests depend on asynchronous events to be fired, do not add `sleep` to your tests. Where possible, use appropriate mocking or condition based triggers. 98 | 99 | ### Creating a Pull Request (PR) 100 | 101 | * **Run tests** 102 | 103 | Before you create a review request for the changes, make sure you have run the corresponding unit tests for your changes. You can run individual tests via the IDE or via maven command-line. Finally run all tests locally by running `mvn clean install -Pbin-dist`. 104 | * **Push changes and create a PR for review** 105 | 106 | Commit your changes with a meaningful commit message. 107 | 108 | ```text 109 | $ git add 110 | $ git commit -m "Meaningful oneliner for the change" 111 | $ git push origin 112 | 113 | After this, create a PullRequest in `github `_. Include the following information in the description: 114 | 115 | * The changes that are included in the PR. 116 | 117 | * Design document, if any. 118 | 119 | * Information on any implementation choices that were made. 120 | 121 | * Evidence of sufficient testing. You ``MUST`` indicate the tests done, either manually or automated. 122 | 123 | Once the PR is created, the code base is compiled and all tests are run via ``travis``. Make sure you followup on any issues flagged by travis and address them. 124 | If you see test failures that are intermittent, ``please`` create an issue to track them. 125 | 126 | Once the ``travis`` run is clear, request reviews from atleast 2 committers on the project and be sure to gently to followup on the issue with the reviewers. 127 | ``` 128 | 129 | * Once you receive comments on github on your changes, be sure to respond to them on github and address the concerns. If any discussions happen offline for the changes in question, make sure to capture the outcome of the discussion, so others can follow along as well. 130 | 131 | It is possible that while your change is being reviewed, other changes were made to the master branch. Be sure to pull rebase your change on the new changes thus: 132 | 133 | ```text 134 | # commit your changes 135 | $ git add 136 | $ git commit -m "Meaningful message for the udpate" 137 | # pull new changes 138 | $ git checkout master 139 | $ git merge upstream/master 140 | $ git checkout 141 | $ git rebase master 142 | 143 | At this time, if rebase flags any conflicts, resolve the conflicts and follow the instructions provided by the rebase command. 144 | 145 | Run additional tests/validations for the new changes and update the PR by pushing your changes: 146 | ``` 147 | 148 | ```text 149 | $ git push origin 150 | ``` 151 | 152 | * When you have addressed all comments and have an approved PR, one of the committers can merge your PR. 153 | * After your change is merged, check to see if any documentation needs to be updated. If so, create a PR for documentation. 154 | 155 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY:check_java 3 | check_java: 4 | ifeq (, $(shell which java)) 5 | $(error "Schemata depends on Java17. JDK17 not found in $(PATH)") 6 | endif 7 | 8 | .PHONY: check_maven 9 | check_maven: 10 | ifeq (, $(shell which mvn)) 11 | $(error "Schemata uses maven as a build tool. Maven not found in $(PATH)") 12 | endif 13 | 14 | .PHONY: compile 15 | compile: check_java check_maven 16 | mvn clean compile 17 | 18 | .PHONY: test 19 | test: check_java check_maven 20 | mvn clean test 21 | 22 | .PHONY: package 23 | package: check_java check_maven 24 | mvn clean package 25 | 26 | .PHONY: proto-gen 27 | proto-gen: 28 | protoc --proto_path=src/opencontract/v1/org --proto_path=src/main/resources/schema --descriptor_set_out=model.desc --include_imports --include_source_info ./src/main/resources/**/*.proto 29 | 30 | .PHONY: build-all 31 | build-all: proto-gen package -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Use this section to tell people about which versions of your project are 6 | currently being supported with security updates. 7 | 8 | | Version | Supported | 9 | | ------- | ------------------ | 10 | | 5.1.x | :white_check_mark: | 11 | | 5.0.x | :x: | 12 | | 4.0.x | :white_check_mark: | 13 | | < 4.0 | :x: | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | Use this section to tell people how to report a vulnerability. 18 | 19 | Tell them where to go, how often they can expect to get an update on a 20 | reported vulnerability, what to expect if the vulnerability is accepted or 21 | declined, etc. 22 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /asset/ds_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/asset/ds_classification.png -------------------------------------------------------------------------------- /asset/logo copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/asset/logo copy.png -------------------------------------------------------------------------------- /asset/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/asset/logo.png -------------------------------------------------------------------------------- /asset/sample_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/asset/sample_schema.png -------------------------------------------------------------------------------- /document.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | java -jar target/schemata-1.0.jar document --source=src/test/resources/descriptors/entities.desc -p=PROTOBUF 3 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if a version parameter is provided; if not default to v1 4 | 5 | if [ "$#" -eq 1 ]; then 6 | version="$1" 7 | else 8 | version="v1" 9 | fi 10 | 11 | # Build Schemata Directory for Check In 12 | 13 | SCHEMATA_DIR="opencontract/${version}/org/schemata/protobuf" 14 | 15 | mkdir -p $SCHEMATA_DIR 16 | 17 | # Download Schemata proto 18 | 19 | curl -L -o $SCHEMATA_DIR/schemata.proto https://raw.githubusercontent.com/ananthdurai/schemata/main/src/opencontract/v1/org/schemata/protobuf/schemata.proto 20 | 21 | echo "Successfully downloaded the folder: $SCHEMATA_DIR" 22 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.schemata 8 | schemata 9 | 1.0 10 | 11 | 12 | 17 13 | true 14 | 3.1.1 15 | 3.21.1 16 | 3.8.0 17 | 3.11.4 18 | 3.11.4 19 | 3.17.3 20 | 5.8.2 21 | 3.12.0 22 | 1.5.1 23 | 4.6.3 24 | 2.13.3 25 | 4.4 26 | 1.2.11 27 | 1.7.36 28 | 1.11.0 29 | 2.9.0 30 | 31 | 32 | 33 | 34 | com.google.protobuf 35 | protobuf-java 36 | ${protobuf.version} 37 | 38 | 39 | com.fasterxml.jackson.core 40 | jackson-databind 41 | ${jackson.version} 42 | 43 | 44 | org.junit.jupiter 45 | junit-jupiter-api 46 | ${junit.version} 47 | test 48 | 49 | 50 | org.junit.jupiter 51 | junit-jupiter-engine 52 | ${junit.version} 53 | test 54 | 55 | 56 | org.apache.commons 57 | commons-lang3 58 | ${commons-lang.version} 59 | 60 | 61 | org.jgrapht 62 | jgrapht-core 63 | ${jgrapht.version} 64 | 65 | 66 | org.apache.commons 67 | commons-collections4 68 | ${commons-collections4.version} 69 | 70 | 71 | info.picocli 72 | picocli 73 | ${picocli.version} 74 | 75 | 76 | com.github.os72 77 | protoc-jar 78 | ${protoc-jar.version} 79 | 80 | 81 | ch.qos.logback 82 | logback-classic 83 | ${logback.version} 84 | 85 | 86 | ch.qos.logback 87 | logback-core 88 | ${logback.version} 89 | 90 | 91 | 92 | org.slf4j 93 | slf4j-api 94 | ${slf4j.version} 95 | 96 | 97 | 98 | org.apache.avro 99 | avro 100 | ${avro.version} 101 | 102 | 103 | 104 | com.google.code.gson 105 | gson 106 | ${gson.version} 107 | 108 | 109 | 110 | com.github.jsqlparser 111 | jsqlparser 112 | 4.4 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | org.apache.maven.plugins 122 | maven-compiler-plugin 123 | ${maven-compiler-plugin.version} 124 | 125 | ${maven.compiler.release} 126 | 127 | 128 | info.picocli 129 | picocli-codegen 130 | ${picocli.version} 131 | 132 | 133 | 134 | -Aproject=${project.groupId}/${project.artifactId} 135 | 136 | 137 | 138 | 139 | com.github.os72 140 | protoc-jar-maven-plugin 141 | ${protoc-jar-maven-plugin.version} 142 | 143 | 144 | generate-proto-java-source 145 | generate-sources 146 | 147 | run 148 | 149 | 150 | ${protoc.version} 151 | true 152 | 153 | src/opencontract/v1/org/schemata/protobuf 154 | 155 | 156 | 157 | 158 | 159 | generate-proto-test-descriptors 160 | generate-test-sources 161 | 162 | run 163 | 164 | 165 | ${protoc.version} 166 | true 167 | 168 | src/main/resources/schema 169 | 170 | descriptor 171 | src/test/resources/descriptors 172 | 173 | 174 | 175 | 176 | 177 | org.apache.maven.plugins 178 | maven-shade-plugin 179 | ${maven-shade-plugin.version} 180 | 181 | 182 | package 183 | 184 | shade 185 | 186 | 187 | ${shadeSources} 188 | ${project.build.directory}/dependency-reduced-pom.xml 189 | 190 | 191 | 192 | 193 | 194 | true 195 | 196 | 197 | META-INF/LICENSE 198 | target/classes/META-INF/LICENSE 199 | 200 | 201 | org.schemata.SchemataMain 202 | 203 | 204 | false 205 | 206 | 207 | *:* 208 | 209 | META-INF/*.SF 210 | META-INF/*.DSA 211 | META-INF/*.RSA 212 | 213 | 214 | 215 | ${project.artifactId}-${project.version} 216 | 217 | 218 | 219 | 220 | 221 | 222 | org.apache.maven.plugins 223 | maven-surefire-plugin 224 | 2.22.1 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /score.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | java -jar target/schemata-1.0.jar score -s=src/test/resources/descriptors/entities.desc -p=PROTOBUF $1 3 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/SchemataExecutor.java: -------------------------------------------------------------------------------- 1 | package org.schemata; 2 | 3 | import org.schemata.app.DocumentApp; 4 | import org.schemata.app.SchemaScoreApp; 5 | import org.schemata.app.SchemaValidatorApp; 6 | import org.schemata.compatibility.SchemaCompatibilityChecker; 7 | import org.schemata.compatibility.Summary; 8 | import org.schemata.provider.SchemaParser; 9 | import org.schemata.provider.avro.AvroSchemaCompatibilityChecker; 10 | import org.schemata.provider.avro.AvroSchemaParser; 11 | import org.schemata.provider.protobuf.ProtoSchemaCompatibilityChecker; 12 | import org.schemata.provider.protobuf.ProtoSchemaParser; 13 | import picocli.CommandLine.Option; 14 | import picocli.CommandLine.ScopeType; 15 | 16 | import java.util.Set; 17 | 18 | import static picocli.CommandLine.Command; 19 | import static picocli.CommandLine.Parameters; 20 | 21 | 22 | @Command(name = "protocol", mixinStandardHelpOptions = true, description = "Schemata commandline tool") 23 | public class SchemataExecutor { 24 | 25 | enum Provider { 26 | PROTOBUF, AVRO 27 | } 28 | 29 | @Option(names = {"-s", "--source"}, description = "Path to schema file", scope = ScopeType.INHERIT) 30 | private String path; 31 | 32 | @Option(names = {"-p", "--provider"}, description = "Valid provider values: ${COMPLETION-CANDIDATES}", scope = 33 | ScopeType.INHERIT) 34 | private Provider provider; 35 | 36 | @Option(names = {"-b", "--base"}, description = "Base Path to schema file", scope = ScopeType.INHERIT) 37 | private String basePath; 38 | 39 | @Command(description = "Validate schema") 40 | public int validate() 41 | throws Exception { 42 | var parser = getSchemaParser(); 43 | return new SchemaValidatorApp(parser.getSchemaList(path)).call(); 44 | } 45 | 46 | @Command(description = "Calculate protocol score") 47 | public int score(@Parameters(paramLabel = "", description = "fully qualified message name") String schema) 48 | throws Exception { 49 | var parser = getSchemaParser(); 50 | return new SchemaScoreApp(parser.getSchemaList(path), schema).call(); 51 | } 52 | 53 | @Command(description = "Document a schema as JSON") 54 | public int document() 55 | throws Exception { 56 | var parser = getSchemaParser(); 57 | return new DocumentApp(parser.getSchemaList(path)).call(); 58 | } 59 | 60 | @Command(description = "Check if schema is backward compatible") 61 | public int isBackwardCompatible() { 62 | var checker = getSchemaCompatibilityChecker(); 63 | return checker.check(basePath, path).isCompatible() ? 0 : 1; 64 | } 65 | 66 | @Command(description = "Print the backward compatibility summary with incompatible fields") 67 | public int compatibilitySummary() { 68 | var checker = getSchemaCompatibilityChecker().check(basePath, path); 69 | if (checker.isCompatible()) { 70 | System.out.println("Schema is backward compatible"); 71 | return 0; 72 | } else { 73 | System.out.println("Incompatible fields:"); 74 | checker.summary().forEach(System.out::println); 75 | return 1; 76 | } 77 | } 78 | 79 | public SchemaParser getSchemaParser() { 80 | return switch (provider) { 81 | case PROTOBUF -> new ProtoSchemaParser(); 82 | case AVRO -> new AvroSchemaParser(); 83 | }; 84 | } 85 | 86 | public SchemaCompatibilityChecker getSchemaCompatibilityChecker() { 87 | return switch (provider) { 88 | case PROTOBUF -> new ProtoSchemaCompatibilityChecker(); 89 | case AVRO -> new AvroSchemaCompatibilityChecker(); 90 | }; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/SchemataMain.java: -------------------------------------------------------------------------------- 1 | package org.schemata; 2 | 3 | import picocli.CommandLine; 4 | 5 | 6 | public class SchemataMain { 7 | 8 | public static void main(String... args) { 9 | var cmd = new CommandLine(new SchemataExecutor()) 10 | .setOptionsCaseInsensitive(true) 11 | .setCaseInsensitiveEnumValuesAllowed(true); 12 | 13 | int exitCode = cmd.execute(args); 14 | System.exit(exitCode); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/app/DocumentApp.java: -------------------------------------------------------------------------------- 1 | package org.schemata.app; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import com.fasterxml.jackson.databind.SerializationFeature; 5 | import org.schemata.domain.Schema; 6 | 7 | import java.util.List; 8 | import java.util.concurrent.Callable; 9 | 10 | public class DocumentApp implements Callable { 11 | 12 | private final List schemaList; 13 | 14 | public DocumentApp(List schemaList) { 15 | this.schemaList = schemaList; 16 | } 17 | 18 | @Override 19 | public Integer call() throws Exception { 20 | var mapper = new ObjectMapper(); 21 | mapper.enable(SerializationFeature.INDENT_OUTPUT); // pretty print 22 | var out = mapper.writeValueAsString(schemaList); 23 | System.out.println(out); 24 | 25 | return 0; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/app/SchemaScoreApp.java: -------------------------------------------------------------------------------- 1 | package org.schemata.app; 2 | 3 | import java.util.List; 4 | import java.util.concurrent.Callable; 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.schemata.domain.Schema; 7 | import org.schemata.exception.SchemaNotFoundException; 8 | import org.schemata.graph.SchemaGraph; 9 | import org.schemata.printer.Console; 10 | 11 | 12 | public class SchemaScoreApp implements Callable { 13 | 14 | private List schemaList; 15 | private String schemaName; 16 | 17 | public SchemaScoreApp(List schemaList, String schemaName) { 18 | this.schemaList = schemaList; 19 | this.schemaName = schemaName; 20 | } 21 | 22 | @Override 23 | public Integer call() throws Exception { 24 | 25 | if (StringUtils.isBlank(schemaName)) { 26 | Console.printError("Invalid schema name:" + schemaName); 27 | return -1; 28 | } 29 | 30 | var graph = new SchemaGraph(this.schemaList); 31 | try { 32 | double value = graph.getSchemataScore(schemaName); 33 | Console.printSuccess("Schemata score for " + schemaName + " : " + value); 34 | } catch (SchemaNotFoundException e) { 35 | Console.printError(e.getMessage()); 36 | return -1; 37 | } 38 | return 0; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/app/SchemaValidatorApp.java: -------------------------------------------------------------------------------- 1 | package org.schemata.app; 2 | 3 | import java.util.List; 4 | import java.util.concurrent.Callable; 5 | import org.schemata.domain.Field; 6 | import org.schemata.domain.Schema; 7 | import org.schemata.printer.Console; 8 | import org.schemata.validate.FieldValidator; 9 | import org.schemata.validate.SchemaValidator; 10 | import org.schemata.validate.Status; 11 | 12 | 13 | public class SchemaValidatorApp implements Callable { 14 | 15 | private List schemaList; 16 | 17 | public SchemaValidatorApp(List schemaList) { 18 | this.schemaList = schemaList; 19 | } 20 | 21 | @Override 22 | public Integer call() 23 | throws Exception { 24 | var schemaValidator = new SchemaValidator(); 25 | var fieldValidator = new FieldValidator(); 26 | for (Schema schema : schemaList) { 27 | var schemaResult = schemaValidator.apply(schema); 28 | if (schemaResult.status() == Status.ERROR) { 29 | Console.printError("Error parsing Schema " + schema.name() + "Error Message:" + schemaResult.errorMessages()); 30 | return -1; 31 | } 32 | 33 | for (Field field : schema.fieldList()) { 34 | var fieldResult = fieldValidator.apply(field); 35 | if (fieldResult.status() == Status.ERROR) { 36 | Console.printError( 37 | "Error parsing Schema Fields in schema:" + schema.name() + " on field:" + field.name() + " Error Message:" 38 | + fieldResult.errorMessages()); 39 | return -1; 40 | } 41 | } 42 | } 43 | Console.printSuccess("Schema validation success. No error to report"); 44 | return 0; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/compatibility/Result.java: -------------------------------------------------------------------------------- 1 | package org.schemata.compatibility; 2 | 3 | 4 | import java.util.Set; 5 | 6 | public record Result(Boolean isCompatible, Set summary) { 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/compatibility/SchemaCompatibilityChecker.java: -------------------------------------------------------------------------------- 1 | package org.schemata.compatibility; 2 | 3 | 4 | public interface SchemaCompatibilityChecker { 5 | Result check(String baseSchemaPath, String changeSchemaPath); 6 | } 7 | 8 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/compatibility/Summary.java: -------------------------------------------------------------------------------- 1 | package org.schemata.compatibility; 2 | 3 | public record Summary(String filename, String schemaName, String fieldName, String fieldType) { 4 | 5 | private Summary(Builder builder) { 6 | this(builder.filename, builder.schemaName, builder.fieldName, builder.fieldType); 7 | } 8 | 9 | public static class Builder { 10 | protected String filename; 11 | protected String schemaName; 12 | protected String fieldName; 13 | protected String fieldType; 14 | 15 | public Builder filename(String filename) { 16 | this.filename = filename; 17 | return this; 18 | } 19 | 20 | public Builder schemaName(String schemaName) { 21 | this.schemaName = schemaName; 22 | return this; 23 | } 24 | 25 | public Builder fieldName(String fieldName) { 26 | this.fieldName = fieldName; 27 | return this; 28 | } 29 | 30 | public Builder fieldType(String fieldType) { 31 | this.fieldType = fieldType; 32 | return this; 33 | } 34 | 35 | public Summary build() { 36 | return new Summary(filename, schemaName, fieldName, fieldType); 37 | } 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/Constraints.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | public record Constraints(String name, String description, Map constraintMap) { 7 | 8 | public record Constraint(String key, Object value, DataType dataType, List listValue) { 9 | 10 | public static Constraint primitiveConstraints(String key, Object value, DataType dataType) { 11 | return new Constraint(key, value, dataType, null); 12 | } 13 | 14 | public static Constraint listConstraints(String key, List listValue, DataType dataType) { 15 | return new Constraint(key, null, dataType, listValue); 16 | } 17 | 18 | } 19 | 20 | public enum DataType { 21 | NULL, NUMBER, STRING, BOOLEAN, LIST; // STRUCT Type is not supported 22 | 23 | public static DataType fromString(String dataType) { 24 | return switch (dataType.toUpperCase()) { 25 | case "NULL_VALUE", "NULL" -> NULL; 26 | case "NUMBER_VALUE", "NUMBER" -> NUMBER; 27 | case "STRING_VALUE", "STRING" -> STRING; 28 | case "BOOL_VALUE", "BOOLEAN" -> BOOLEAN; 29 | case "LIST_VALUE", "LIST" -> LIST; 30 | default -> throw new IllegalArgumentException("Invalid data type: " + dataType); 31 | }; 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/Depends.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | public record Depends(String model, String column) { 4 | } 5 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/EventType.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | import java.util.Arrays; 4 | 5 | 6 | public enum EventType { 7 | NONE, LIFECYCLE, ACTIVITY, AGGREGATED; 8 | 9 | public static EventType get(String eventType) { 10 | return Arrays.stream(EventType.values()).filter(e -> e.name().equalsIgnoreCase(eventType)).findAny() 11 | .orElse(EventType.NONE); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/Field.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | import java.util.List; 4 | 5 | 6 | public record Field(String schema, String name, String dataType, boolean isPrimitiveType, String description, 7 | String comment, String seeAlso, String reference, boolean isClassified, String classificationLevel, 8 | boolean isPrimaryKey, String productType, Link link, List depends) { 9 | 10 | private Field(Builder builder) { 11 | this(builder.schema, builder.name, builder.dataType, builder.isPrimitiveType, builder.description, builder.comment, 12 | builder.seeAlso, builder.reference, builder.isClassified, builder.classificationLevel, builder.isPrimaryKey, 13 | builder.productType, builder.link, builder.depends); 14 | } 15 | 16 | public static class Builder { 17 | 18 | String schema; 19 | String name; 20 | String dataType; 21 | boolean isPrimitiveType; 22 | String description; 23 | String comment; 24 | String seeAlso; 25 | String reference; 26 | boolean isClassified; 27 | String classificationLevel; 28 | boolean isPrimaryKey; 29 | String productType; 30 | Link link; 31 | List depends; 32 | 33 | public Builder(String schema, String name, String dataType, boolean isPrimitiveType) { 34 | this.schema = schema; 35 | this.name = name; 36 | this.dataType = dataType; 37 | this.isPrimitiveType = isPrimitiveType; 38 | } 39 | 40 | public Builder description(String description) { 41 | this.description = description; 42 | return this; 43 | } 44 | 45 | public Builder comment(String comment) { 46 | this.comment = comment; 47 | return this; 48 | } 49 | 50 | public Builder seeAlso(String seeAlso) { 51 | this.seeAlso = seeAlso; 52 | return this; 53 | } 54 | 55 | public Builder reference(String reference) { 56 | this.reference = reference; 57 | return this; 58 | } 59 | 60 | public Builder isClassified(boolean classified) { 61 | this.isClassified = classified; 62 | return this; 63 | } 64 | 65 | public Builder classificationLevel(String classifiedLevel) { 66 | this.classificationLevel = classifiedLevel; 67 | return this; 68 | } 69 | 70 | public Builder primaryKey(boolean primaryKey) { 71 | this.isPrimaryKey = primaryKey; 72 | return this; 73 | } 74 | 75 | public Builder productType(String productType) { 76 | this.productType = productType; 77 | return this; 78 | } 79 | 80 | public Builder link(Link link) { 81 | this.link = link; 82 | return this; 83 | } 84 | 85 | public Builder depends(List depends) { 86 | this.depends = depends; 87 | return this; 88 | } 89 | 90 | public Field build() { 91 | return new Field(this); 92 | } 93 | } 94 | 95 | public static class Prop { 96 | public static final String DESC = "desc"; 97 | public static final String DESCRIPTION = "description"; 98 | public static final String COMMENT = "comment"; 99 | public static final String SEE_ALSO = "see_also"; 100 | public static final String REFERENCE = "reference"; 101 | public static final String IS_CLASSIFIED = "is_classified"; 102 | public static final String IS_PRIMARY_KEY = "is_primary_key"; 103 | public static final String PRODUCT_TYPE = "product_type"; 104 | public static final String LINK = "link"; 105 | public static final String DEPENDS = "depends"; 106 | public static final String MODEL = "model"; 107 | 108 | public static final String COLUMN = "column"; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/Link.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | public record Link(String model, String column) { 4 | } 5 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/ModelType.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | import java.util.Arrays; 4 | 5 | 6 | public enum ModelType { 7 | DIMENSION, FACT, NONE; 8 | 9 | public static ModelType get(String modelType) { 10 | return Arrays.stream(ModelType.values()) 11 | .filter(e -> e.name().equalsIgnoreCase(modelType)).findAny().orElse(ModelType.NONE); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/Schema.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | import java.util.List; 4 | 5 | 6 | public record Schema(String name, String description, String comment, String seeAlso, String reference, String owner, 7 | String domain, String status, String type, String eventType, String modelType, String teamChannel, 8 | String alertChannel, String complianceOwner, String complianceChannel, 9 | List downstreamSubscribersList, List upstreamSubscribersList, 10 | List fieldList, List constraintsList) { 11 | 12 | private Schema(Builder builder) { 13 | this(builder.name, builder.description, builder.comment, builder.seeAlso, builder.reference, builder.owner, 14 | builder.domain, builder.status, builder.schemaType.name(), builder.eventType.name(), builder.modelType.name(), 15 | builder.teamChannel, builder.alertChannel, builder.complianceOwner, builder.complianceChannel, 16 | builder.downstreamSubscribersList, builder.upstreamSubscribersList, 17 | builder.fieldList, builder.constraintsList); 18 | } 19 | 20 | public static class Builder { 21 | String name; 22 | String description; 23 | String comment; 24 | String seeAlso; 25 | String reference; 26 | String owner; 27 | String domain; 28 | String status; 29 | SchemaType schemaType; 30 | EventType eventType; 31 | ModelType modelType; 32 | String teamChannel; 33 | String alertChannel; 34 | String complianceOwner; 35 | String complianceChannel; 36 | List fieldList; 37 | List downstreamSubscribersList; 38 | List upstreamSubscribersList; 39 | List constraintsList; 40 | 41 | public Builder(String name, List fieldList) { 42 | this.name = name; 43 | this.fieldList = fieldList; 44 | this.eventType = EventType.NONE; 45 | this.modelType = ModelType.NONE; 46 | } 47 | 48 | public Builder description(String description) { 49 | this.description = description; 50 | return this; 51 | } 52 | 53 | public Builder comment(String comment) { 54 | this.comment = comment; 55 | return this; 56 | } 57 | 58 | public Builder seeAlso(String seeAlso) { 59 | this.seeAlso = seeAlso; 60 | return this; 61 | } 62 | 63 | public Builder reference(String reference) { 64 | this.reference = reference; 65 | return this; 66 | } 67 | 68 | public Builder owner(String owner) { 69 | this.owner = owner; 70 | return this; 71 | } 72 | 73 | public Builder domain(String domain) { 74 | this.domain = domain; 75 | return this; 76 | } 77 | 78 | public Builder status(String status) { 79 | this.status = status; 80 | return this; 81 | } 82 | 83 | public Builder schemaType(String schemaTypeValue) { 84 | this.schemaType = SchemaType.get(schemaTypeValue); 85 | return this; 86 | } 87 | 88 | public Builder eventType(String eventTypeValue) { 89 | this.eventType = EventType.get(eventTypeValue); 90 | return this; 91 | } 92 | 93 | public Builder modelType(String modelTypeValue) { 94 | this.modelType = ModelType.get(modelTypeValue); 95 | return this; 96 | } 97 | 98 | public Builder teamChannel(String teamChannel) { 99 | this.teamChannel = teamChannel; 100 | return this; 101 | } 102 | 103 | public Builder alertChannel(String alertChannel) { 104 | this.alertChannel = alertChannel; 105 | return this; 106 | } 107 | 108 | public Builder complianceOwner(String complianceOwner) { 109 | this.complianceOwner = complianceOwner; 110 | return this; 111 | } 112 | 113 | public Builder complianceChannel(String complianceChannel) { 114 | this.complianceChannel = complianceChannel; 115 | return this; 116 | } 117 | 118 | public Builder downstreamSubscribersList(List subscribersList) { 119 | this.downstreamSubscribersList = subscribersList; 120 | return this; 121 | } 122 | 123 | public Builder upstreamSubscribersList(List subscribersList) { 124 | this.upstreamSubscribersList = subscribersList; 125 | return this; 126 | } 127 | 128 | public Builder constraintsList(List constraints) { 129 | this.constraintsList = constraints; 130 | return this; 131 | } 132 | 133 | public Schema build() { 134 | return new Schema(this); 135 | } 136 | } 137 | 138 | public static final class Prop { 139 | 140 | public static final String DESC = "desc"; 141 | 142 | public static final String DESCRIPTION = "description"; 143 | public static final String COMMENT = "comment"; 144 | public static final String SEE_ALSO = "see_also"; 145 | public static final String REFERENCE = "reference"; 146 | public static final String OWNER = "owner"; 147 | public static final String DOMAIN = "domain"; 148 | public static final String STATUS = "status"; 149 | public static final String SCHEMA_TYPE = "schema_type"; 150 | public static final String EVENT_TYPE = "event_type"; 151 | public static final String MODEL_TYPE = "model_type"; 152 | public static final String TEAM_CHANNEL = "team_channel"; 153 | public static final String ALERT_CHANNEL = "alert_channel"; 154 | public static final String COMPLIANCE_OWNER = "compliance_owner"; 155 | public static final String COMPLIANCE_CHANNEL = "compliance_channel"; 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/SchemaType.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | import java.util.Arrays; 4 | 5 | 6 | public enum SchemaType { 7 | ENTITY, EVENT, MODEL, UNKNOWN; 8 | 9 | public static SchemaType get(String schemaType) { 10 | return Arrays.stream(SchemaType.values()).filter(e -> e.name().equalsIgnoreCase(schemaType)).findAny() 11 | .orElse(SchemaType.UNKNOWN); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/domain/Subscribers.java: -------------------------------------------------------------------------------- 1 | package org.schemata.domain; 2 | 3 | public record Subscribers(String name, String usage) { 4 | } 5 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/exception/SchemaNotFoundException.java: -------------------------------------------------------------------------------- 1 | package org.schemata.exception; 2 | 3 | public class SchemaNotFoundException extends RuntimeException { 4 | public SchemaNotFoundException() { 5 | } 6 | 7 | public SchemaNotFoundException(String message) { 8 | super(message); 9 | } 10 | 11 | public SchemaNotFoundException(String message, Throwable cause) { 12 | super(message, cause); 13 | } 14 | 15 | public SchemaNotFoundException(Throwable cause) { 16 | super(cause); 17 | } 18 | 19 | public SchemaNotFoundException(String message, Throwable cause, boolean enableSuppression, 20 | boolean writableStackTrace) { 21 | super(message, cause, enableSuppression, writableStackTrace); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/exception/SchemaParserException.java: -------------------------------------------------------------------------------- 1 | package org.schemata.exception; 2 | 3 | public class SchemaParserException extends RuntimeException { 4 | public SchemaParserException(String message) { 5 | super(message); 6 | } 7 | 8 | public SchemaParserException(String message, Throwable cause) { 9 | super(message, cause); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/graph/SchemaGraph.java: -------------------------------------------------------------------------------- 1 | package org.schemata.graph; 2 | 3 | import java.math.BigDecimal; 4 | import java.math.MathContext; 5 | import java.util.Collection; 6 | import java.util.HashMap; 7 | import java.util.HashSet; 8 | import java.util.List; 9 | import java.util.Map; 10 | import java.util.Optional; 11 | import java.util.Set; 12 | import java.util.stream.Collectors; 13 | import java.util.stream.Stream; 14 | import org.apache.commons.collections4.SetUtils; 15 | import org.apache.commons.lang3.StringUtils; 16 | import org.jgrapht.alg.scoring.PageRank; 17 | import org.jgrapht.graph.DirectedWeightedMultigraph; 18 | import org.jgrapht.util.SupplierUtil; 19 | import org.schemata.domain.Field; 20 | import org.schemata.domain.Schema; 21 | import org.schemata.exception.SchemaNotFoundException; 22 | 23 | 24 | public final class SchemaGraph { 25 | 26 | private final DirectedWeightedMultigraph graph = 27 | new DirectedWeightedMultigraph<>(SupplierUtil.createSupplier(Schema.class), 28 | SupplierUtil.createSupplier(WeightedSchemaEdge.class)); 29 | 30 | private final List schemaList; 31 | Map schemaMap; 32 | private final PageRank pageRank; 33 | 34 | public SchemaGraph(List schemaList) { 35 | this.schemaList = schemaList; 36 | this.schemaMap = buildGraph(); 37 | this.buildEdge(); 38 | pageRank = new PageRank<>(graph); 39 | } 40 | 41 | private Map buildGraph() { 42 | Map schemaMap = new HashMap<>(); 43 | for (Schema schema : schemaList) { 44 | schemaMap.put(schema.name(), schema); 45 | this.addVertex(schema); 46 | } 47 | return schemaMap; 48 | } 49 | 50 | private void buildEdge() 51 | throws SchemaNotFoundException { 52 | for (Schema schema : this.schemaList) { 53 | for (Field field : schema.fieldList()) { 54 | if (!field.isPrimitiveType()) { 55 | findVertex(field.dataType()).ifPresentOrElse( 56 | value -> this.addEdge(new WeightedSchemaEdge(schema, value, field)), () -> { 57 | throw new SchemaNotFoundException("DataType " + field.dataType() + " Not found in the graph"); 58 | }); 59 | } 60 | } 61 | } 62 | } 63 | 64 | private void addVertex(Schema schema) { 65 | graph.addVertex(schema); 66 | } 67 | 68 | private void addEdge(WeightedSchemaEdge edge) { 69 | if (edge == null) { 70 | throw new IllegalArgumentException("Edge can't be null"); 71 | } 72 | graph.addEdge(edge.getSource(), edge.getTarget(), edge); 73 | } 74 | 75 | public Set incomingEdgesOf(String vertex) 76 | throws SchemaNotFoundException { 77 | return graph.incomingEdgesOf(getSchema(vertex)); 78 | } 79 | 80 | public Set incomingVertexOf(String vertex) { 81 | Set incomingSchemaSet = new HashSet<>(); 82 | incomingEdgesOf(vertex).forEach(e -> incomingSchemaSet.add(e.getSource())); 83 | return incomingSchemaSet; 84 | } 85 | 86 | public Set outgoingEdgesOf(String vertex) 87 | throws SchemaNotFoundException { 88 | return graph.outgoingEdgesOf(getSchema(vertex)); 89 | } 90 | 91 | public Set outgoingVertexOf(String vertex) { 92 | Set outgoingSchemaSet = new HashSet<>(); 93 | outgoingEdgesOf(vertex).forEach(e -> outgoingSchemaSet.add(e.getTarget())); 94 | return outgoingSchemaSet; 95 | } 96 | 97 | public Set outgoingEntityVertexOf(String vertex) { 98 | return outgoingVertexOf(vertex).stream().filter(f -> "ENTITY".equalsIgnoreCase(f.type())) 99 | .collect(Collectors.toSet()); 100 | } 101 | 102 | public Set getAllEntityVertex() { 103 | return graph.vertexSet().stream().filter(f -> "ENTITY".equalsIgnoreCase(f.type())).collect(Collectors.toSet()); 104 | } 105 | 106 | public Double getVertexPageRankScore(String vertex) { 107 | return pageRank.getVertexScore(getSchema(vertex)); 108 | } 109 | 110 | public Double getSchemataScore(String vertex) { 111 | var schema = getSchema(vertex); 112 | double score = switch (schema.type().toUpperCase()) { 113 | case "ENTITY" -> computeEntityScore(vertex); 114 | case "EVENT" -> computeEventScore(vertex, schema.eventType()); 115 | default -> 0.0; 116 | }; 117 | return roundUp(score); 118 | } 119 | 120 | private double computeEntityScore(String vertex) { 121 | double totalEdges = graph.edgeSet().size(); 122 | if (totalEdges == 0) { 123 | return 0.0; 124 | } 125 | 126 | double referenceEdges = referenceEdges(vertex).size(); 127 | return 1 - ((totalEdges - referenceEdges) / totalEdges); 128 | } 129 | 130 | public Set referenceEdges(String vertex) { 131 | return SetUtils.union(incomingEdgesOf(vertex), outgoingEdgesOf(vertex)); 132 | } 133 | 134 | private double computeEventScore(String vertex, String eventType) { 135 | double score = switch (eventType) { 136 | case "LIFECYCLE" -> outgoingEntityVertexOf(vertex).size() > 0 ? 1.0 : 0.0; 137 | case "ACTIVITY", "AGGREGATED" -> computeNonLifecycleScore(vertex); 138 | default -> 0.0; 139 | }; 140 | return score; 141 | } 142 | 143 | private double computeNonLifecycleScore(String vertex) { 144 | double totalVertex = getAllEntityVertex().size(); 145 | if (totalVertex == 0) { 146 | return 0.0; 147 | } 148 | Set referenceVertex = 149 | outgoingEntityVertexOf(vertex).stream().map(v -> outgoingEntityVertexOf(v.name())).flatMap(Collection::stream) 150 | .collect(Collectors.toSet()); 151 | Set outgoingVertex = outgoingEntityVertexOf(vertex); 152 | double vertexCount = SetUtils.union(referenceVertex, outgoingVertex).size(); 153 | 154 | return 1 - ((totalVertex - vertexCount) / totalVertex); 155 | } 156 | 157 | public Schema getSchema(String vertex) 158 | throws SchemaNotFoundException { 159 | return findVertex(vertex).orElseThrow( 160 | () -> new SchemaNotFoundException("Vertex " + vertex + " Not found in the graph")); 161 | } 162 | 163 | public Optional findVertex(String vertex) { 164 | if (StringUtils.isBlank(vertex)) { 165 | return Optional.empty(); 166 | } 167 | if (this.schemaMap.containsKey(vertex)) { 168 | return Optional.of(this.schemaMap.get(vertex)); 169 | } 170 | return Optional.empty(); 171 | } 172 | 173 | private double roundUp(double value) { 174 | return new BigDecimal(value, new MathContext(3)).doubleValue(); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/graph/WeightedSchemaEdge.java: -------------------------------------------------------------------------------- 1 | package org.schemata.graph; 2 | 3 | import org.apache.commons.lang3.builder.EqualsBuilder; 4 | import org.apache.commons.lang3.builder.HashCodeBuilder; 5 | import org.jgrapht.graph.DefaultEdge; 6 | import org.schemata.domain.Field; 7 | import org.schemata.domain.Schema; 8 | 9 | 10 | public class WeightedSchemaEdge extends DefaultEdge { 11 | 12 | private static final Double DEFAULT_WEIGHT = 1.0; 13 | Schema source; 14 | Schema target; 15 | Field edgeField; 16 | double weight; // Set default weight == 1 17 | 18 | public WeightedSchemaEdge(Schema source, Schema target, Field edgeField) { 19 | this(source, target, edgeField, DEFAULT_WEIGHT); 20 | } 21 | 22 | public WeightedSchemaEdge(Schema source, Schema target, Field edgeField, double weight) { 23 | this.source = source; 24 | this.target = target; 25 | this.edgeField = edgeField; 26 | this.weight = weight; 27 | } 28 | 29 | @Override 30 | public Schema getSource() { 31 | return source; 32 | } 33 | 34 | @Override 35 | public Schema getTarget() { 36 | return target; 37 | } 38 | 39 | public Field getEdgeField() { 40 | return edgeField; 41 | } 42 | 43 | public double getWeight() { 44 | return weight; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return "WeightedSchemaEdge{" + "source=" + source + ", target=" + target + ", edgeField=" + edgeField + ", weight=" 50 | + weight + '}'; 51 | } 52 | 53 | public String summaryPrint() { 54 | return "WeightedSchemaEdge{" + "source=" + source.name() + ", target=" + target.name() + ", edgeField=" 55 | + edgeField.name() + ", weight=" + weight + '}'; 56 | } 57 | 58 | @Override 59 | public boolean equals(Object o) { 60 | if (this == o) { 61 | return true; 62 | } 63 | 64 | if (o == null || getClass() != o.getClass()) { 65 | return false; 66 | } 67 | 68 | WeightedSchemaEdge that = (WeightedSchemaEdge) o; 69 | 70 | return new EqualsBuilder().append(weight, that.weight).append(source, that.source).append(target, that.target) 71 | .append(edgeField, that.edgeField).isEquals(); 72 | } 73 | 74 | @Override 75 | public int hashCode() { 76 | return new HashCodeBuilder(17, 37).append(source).append(target).append(edgeField).append(weight).toHashCode(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/json/Json.java: -------------------------------------------------------------------------------- 1 | package org.schemata.json; 2 | 3 | import com.google.gson.JsonArray; 4 | import com.google.gson.JsonObject; 5 | import java.util.Locale; 6 | import java.util.Optional; 7 | import org.apache.commons.lang3.BooleanUtils; 8 | import org.apache.commons.lang3.math.NumberUtils; 9 | 10 | 11 | public final class Json { 12 | 13 | public static Boolean containsField(JsonObject obj, String element) { 14 | return obj.has(element) && !obj.get(element).isJsonNull(); 15 | } 16 | 17 | public static String getAsString(JsonObject obj, String element) { 18 | return getAsStringCaseSensitive(obj, element).toLowerCase(); 19 | } 20 | 21 | public static String getAsStringCaseSensitive(JsonObject obj, String element) { 22 | return containsField(obj, element) ? obj.get(element).getAsString() : ""; 23 | } 24 | 25 | public static Long getAsLong(JsonObject obj, String element) { 26 | return NumberUtils.isParsable(getAsString(obj, element)) ? 27 | obj.get(element).getAsLong() : Long.MIN_VALUE; 28 | } 29 | 30 | public static boolean getAsBoolean(JsonObject obj, String element) { 31 | return BooleanUtils.toBoolean(getAsString(obj, element)); 32 | } 33 | 34 | public static Optional getAsJsonObject(JsonObject obj, String element) { 35 | return containsField(obj, element) && obj.isJsonObject() ? 36 | Optional.of(obj.get(element).getAsJsonObject()) : Optional.empty(); 37 | } 38 | 39 | public static Optional getAsJsonArray(JsonObject obj, String element) { 40 | return containsField(obj, element) ? 41 | Optional.of(obj.getAsJsonArray(element)) : Optional.empty(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/printer/Console.java: -------------------------------------------------------------------------------- 1 | package org.schemata.printer; 2 | 3 | public class Console { 4 | 5 | public static final String TEXT_RED = "\033[0;31m"; 6 | public static final String TEXT_GREEN = "\033[0;32m"; 7 | public static final String TEXT_RESET = "\u001B[0m"; 8 | 9 | public static void printSuccess(String message) { 10 | System.out.println(TEXT_GREEN + message + TEXT_RESET); 11 | } 12 | 13 | public static void printError(String message) { 14 | System.out.println(TEXT_RED + message + TEXT_RESET); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/SchemaParser.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider; 2 | 3 | import java.util.List; 4 | import org.schemata.domain.Schema; 5 | 6 | 7 | public interface SchemaParser { 8 | 9 | List getSchemaList(String path); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/avro/AvroSchemaCompatibilityChecker.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.avro; 2 | 3 | import org.schemata.compatibility.Result; 4 | import org.schemata.compatibility.SchemaCompatibilityChecker; 5 | 6 | public class AvroSchemaCompatibilityChecker implements SchemaCompatibilityChecker { 7 | 8 | // TODO: Implement this method 9 | 10 | @Override 11 | public Result check(String baseSchemaPath, String changeSchemaPath) { 12 | return null; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/avro/AvroSchemaParser.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.avro; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.nio.file.Files; 6 | import java.nio.file.Path; 7 | import java.nio.file.Paths; 8 | import java.util.ArrayList; 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.stream.Stream; 13 | import org.schemata.domain.EventType; 14 | import org.schemata.domain.Field; 15 | import org.schemata.domain.Schema; 16 | import org.schemata.domain.SchemaType; 17 | import org.schemata.exception.SchemaParserException; 18 | import org.schemata.provider.SchemaParser; 19 | 20 | 21 | public class AvroSchemaParser implements SchemaParser { 22 | 23 | static final Map PRIMITIVES = new HashMap<>(); 24 | 25 | static { 26 | PRIMITIVES.put("string", org.apache.avro.Schema.Type.STRING); 27 | PRIMITIVES.put("bytes", org.apache.avro.Schema.Type.BYTES); 28 | PRIMITIVES.put("int", org.apache.avro.Schema.Type.INT); 29 | PRIMITIVES.put("long", org.apache.avro.Schema.Type.LONG); 30 | PRIMITIVES.put("float", org.apache.avro.Schema.Type.FLOAT); 31 | PRIMITIVES.put("double", org.apache.avro.Schema.Type.DOUBLE); 32 | PRIMITIVES.put("boolean", org.apache.avro.Schema.Type.BOOLEAN); 33 | PRIMITIVES.put("null", org.apache.avro.Schema.Type.NULL); 34 | } 35 | 36 | @Override 37 | public List getSchemaList(String path) { 38 | try { 39 | var schemaFileList = listAvroSchemaFiles(path); 40 | return schemaFileList.stream().map(s -> { 41 | try { 42 | return buildSchema(s); 43 | } catch (IOException e) { 44 | throw new SchemaParserException("Error while parsing Avro schema", e); 45 | } 46 | }).toList(); 47 | } catch (IOException e) { 48 | throw new SchemaParserException("Error while parsing Avro schema", e); 49 | } 50 | } 51 | 52 | public List listAvroSchemaFiles(String path) 53 | throws IOException { 54 | try (Stream walk = Files.walk(Paths.get(path))) { 55 | return walk 56 | .filter(p -> !Files.isDirectory(p)) 57 | .map(p -> p.toString().toLowerCase()) 58 | .filter(f -> f.endsWith("avsc")) 59 | .toList(); 60 | } 61 | } 62 | 63 | public Schema buildSchema(String path) 64 | throws IOException { 65 | var avroSchema = compileAvroSchema(path); 66 | List fields = new ArrayList<>(); 67 | var avroFields = avroSchema.getFields(); 68 | for (org.apache.avro.Schema.Field avroField : avroFields) { 69 | fields.add(parseField(avroSchema.getFullName(), avroField)); 70 | } 71 | return parseSchema(avroSchema, fields); 72 | } 73 | 74 | public org.apache.avro.Schema compileAvroSchema(String path) 75 | throws IOException { 76 | return new org.apache.avro.Schema.Parser().parse(new File(path)); 77 | } 78 | 79 | public Schema parseSchema(org.apache.avro.Schema schema, List fields) { 80 | Schema.Builder builder = new Schema.Builder(schema.getFullName(), fields); 81 | builder.description(schema.getProp(Schema.Prop.DESC)); 82 | builder.comment(schema.getProp(Schema.Prop.COMMENT)); 83 | builder.seeAlso(schema.getProp(Schema.Prop.SEE_ALSO)); 84 | builder.reference(schema.getProp(Schema.Prop.REFERENCE)); 85 | builder.owner(schema.getProp(Schema.Prop.OWNER)); 86 | builder.domain(schema.getProp(Schema.Prop.DOMAIN)); 87 | builder.status(schema.getProp(Schema.Prop.STATUS)); 88 | builder.schemaType(handleEmptySchemaType(schema)); 89 | builder.eventType(handleEmptyEventType(schema)); 90 | builder.teamChannel(schema.getProp(Schema.Prop.TEAM_CHANNEL)); 91 | builder.alertChannel(schema.getProp(Schema.Prop.ALERT_CHANNEL)); 92 | builder.complianceOwner(schema.getProp(Schema.Prop.COMPLIANCE_OWNER)); 93 | builder.complianceChannel(schema.getProp(Schema.Prop.COMPLIANCE_CHANNEL)); 94 | return builder.build(); 95 | } 96 | 97 | public Field parseField(String schemaName, org.apache.avro.Schema.Field avroField) { 98 | String dataType = avroField.schema().getType().getName(); 99 | var builder = new Field.Builder(schemaName, avroField.name(), dataType, isPrimitiveType(dataType)); 100 | builder.description(avroField.getProp(Field.Prop.DESC)); 101 | builder.comment(avroField.getProp(Field.Prop.COMMENT)); 102 | builder.seeAlso(avroField.getProp(Field.Prop.SEE_ALSO)); 103 | builder.reference(avroField.getProp(Field.Prop.REFERENCE)); 104 | builder.isClassified(Boolean.parseBoolean(avroField.getProp(Field.Prop.IS_CLASSIFIED))); 105 | builder.primaryKey(Boolean.parseBoolean(avroField.getProp(Field.Prop.IS_PRIMARY_KEY))); 106 | builder.productType(avroField.getProp(Field.Prop.PRODUCT_TYPE)); 107 | return builder.build(); 108 | } 109 | 110 | private String handleEmptyEventType(org.apache.avro.Schema schema) { 111 | return schema.getProp(Schema.Prop.EVENT_TYPE) == null ? EventType.NONE.name() 112 | : schema.getProp(Schema.Prop.EVENT_TYPE); 113 | } 114 | 115 | private String handleEmptySchemaType(org.apache.avro.Schema schema) { 116 | return schema.getProp(Schema.Prop.SCHEMA_TYPE) == null ? SchemaType.UNKNOWN.name() 117 | : schema.getProp(Schema.Prop.SCHEMA_TYPE); 118 | } 119 | 120 | private boolean isPrimitiveType(String name) { 121 | return PRIMITIVES.containsKey(name); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/dbt/DbtCatalogMetadata.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import java.util.List; 4 | 5 | 6 | public interface DbtCatalogMetadata { 7 | record Table(String namespace, String modelName, String dbtModelFullName, String comment, String owner) { 8 | } 9 | 10 | record Column(String name, String dataType, long index, String comment) { 11 | } 12 | 13 | record Catalog(Table table, List column) { 14 | } 15 | 16 | String TARGET_PATH = "target"; 17 | String MANIFEST_FILE = "manifest.json"; 18 | String CATALOG_FILE = "catalog.json"; 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/dbt/DbtCatalogParser.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import com.google.gson.JsonElement; 4 | import com.google.gson.JsonObject; 5 | import com.google.gson.JsonParser; 6 | import java.io.IOException; 7 | import java.nio.file.Files; 8 | import java.nio.file.Paths; 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.List; 12 | import java.util.Map; 13 | import java.util.stream.Collectors; 14 | import java.util.stream.Stream; 15 | import org.schemata.exception.SchemaParserException; 16 | import org.schemata.json.Json; 17 | 18 | import static org.schemata.provider.dbt.DbtCatalogMetadata.CATALOG_FILE; 19 | 20 | 21 | public class DbtCatalogParser { 22 | 23 | public Map parse(String path) { 24 | Map catalogMap = new HashMap<>(); 25 | 26 | var jsonParser = getCatalogJsonParser(path); 27 | var nodes = getNodes(jsonParser); 28 | nodes.entrySet().forEach(entry -> { 29 | var table = extractTable(entry.getKey(), entry.getValue()); 30 | var columnList = extractColumn(entry.getValue()); 31 | catalogMap.put(entry.getKey(), new DbtCatalogMetadata.Catalog(table, columnList)); 32 | }); 33 | return catalogMap; 34 | } 35 | 36 | public JsonElement getCatalogJsonParser(String path) { 37 | try (Stream lines = Files.lines(Paths.get(path, CATALOG_FILE))) { 38 | String data = lines.collect(Collectors.joining("\n")); 39 | return JsonParser.parseString(data); 40 | } catch (IOException e) { 41 | throw new SchemaParserException("Error while parsing getCatalogJsonParser:", e); 42 | } 43 | } 44 | 45 | public List extractColumn(JsonElement element) { 46 | var columns = getColumnJsonElement(element); 47 | List fieldList = new ArrayList<>(); 48 | columns.entrySet().forEach(column -> { 49 | var columnObj = column.getValue().getAsJsonObject(); 50 | fieldList.add(new DbtCatalogMetadata.Column(getColumnName(column), getDataType(columnObj), 51 | getColumnIndex(columnObj), getColumnComment(columnObj))); 52 | }); 53 | return fieldList; 54 | } 55 | 56 | public JsonObject getColumnJsonElement(JsonElement element) { 57 | return Json.getAsJsonObject(element.getAsJsonObject(), "columns") 58 | .orElseThrow(() -> new SchemaParserException("Error parsing dbt catalog: columns is empty")); 59 | } 60 | 61 | public String getColumnComment(JsonObject columnObj) { 62 | return Json.getAsString(columnObj, "comment"); 63 | } 64 | 65 | public Long getColumnIndex(JsonObject columnObj) { 66 | return Json.getAsLong(columnObj, "index"); 67 | } 68 | 69 | public String getDataType(JsonObject columnObj) { 70 | return Json.getAsString(columnObj, "type"); 71 | } 72 | 73 | public String getColumnName(Map.Entry column) { 74 | return column.getKey().toLowerCase(); 75 | } 76 | 77 | public DbtCatalogMetadata.Table extractTable(String modelName, JsonElement element) { 78 | var metadata = getMetadata(element); 79 | return new DbtCatalogMetadata.Table(getNamespace(metadata), getModelName(metadata), modelName, 80 | getComment(metadata), getOwner(metadata)); 81 | } 82 | 83 | public JsonObject getMetadata(JsonElement element) { 84 | return Json.getAsJsonObject(element.getAsJsonObject(), "metadata") 85 | .orElseThrow(() -> new SchemaParserException("Error parsing dbt catalog: Metadata Object is Empty")); 86 | } 87 | 88 | public JsonObject getNodes(JsonElement jsonParser) { 89 | return Json.getAsJsonObject(jsonParser.getAsJsonObject(), "nodes") 90 | .orElseThrow(() -> new SchemaParserException("Error parsing dbt catalog: Nodes is empty")); 91 | } 92 | 93 | public String getOwner(JsonObject metadata) { 94 | return Json.getAsString(metadata, "owner"); 95 | } 96 | 97 | public String getComment(JsonObject metadata) { 98 | return Json.getAsString(metadata, "comment"); 99 | } 100 | 101 | public String getModelName(JsonObject metadata) { 102 | return Json.getAsString(metadata, "name"); 103 | } 104 | 105 | public String getNamespace(JsonObject metadata) { 106 | return Json.getAsString(metadata, "schema"); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/dbt/DbtManifestParser.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import com.google.gson.JsonElement; 4 | import com.google.gson.JsonObject; 5 | import com.google.gson.JsonParser; 6 | import java.io.IOException; 7 | import java.nio.file.Files; 8 | import java.nio.file.Paths; 9 | import java.util.ArrayList; 10 | import java.util.Collections; 11 | import java.util.List; 12 | import java.util.Map; 13 | import java.util.Optional; 14 | import java.util.stream.Collectors; 15 | import java.util.stream.Stream; 16 | import org.schemata.domain.Depends; 17 | import org.schemata.domain.Field; 18 | import org.schemata.domain.Link; 19 | import org.schemata.domain.Schema; 20 | import org.schemata.exception.SchemaParserException; 21 | import org.schemata.json.Json; 22 | 23 | import static org.schemata.provider.dbt.DbtCatalogMetadata.MANIFEST_FILE; 24 | 25 | 26 | public class DbtManifestParser { 27 | 28 | public List parse(Map catalog, String path) { 29 | if (catalog == null) { 30 | return Collections.emptyList(); 31 | } 32 | 33 | List schemaList = new ArrayList<>(); 34 | 35 | try { 36 | var jsonParser = getManifestJsonParser(path); 37 | var nodes = getNodes(jsonParser); 38 | 39 | catalog.forEach((key, value) -> { 40 | var node = nodes.get(key); 41 | if (node != null && isModel(node.getAsJsonObject())) { 42 | var nodeObj = node.getAsJsonObject(); 43 | if (getConfig(nodeObj).isPresent()) { 44 | 45 | var fields = extractFields(nodeObj, value); 46 | var schema = new Schema.Builder(key, fields); 47 | 48 | var config = getConfig(nodeObj).get(); 49 | 50 | var builder = schema.domain(Json.getAsString(config, Schema.Prop.DOMAIN)) 51 | .schemaType("model") // We parse only the dbt model type. so all dbt we set it as model 52 | .modelType(Json.getAsString(config, Schema.Prop.MODEL_TYPE)) 53 | .description(Json.getAsString(nodeObj, Schema.Prop.DESCRIPTION)) 54 | .reference(Json.getAsString(config, Schema.Prop.REFERENCE)) 55 | .seeAlso(Json.getAsString(config, Schema.Prop.SEE_ALSO)) 56 | .comment(value.table().comment()) 57 | .owner(value.table().owner()) 58 | .status(Json.getAsString(config, Schema.Prop.STATUS)) 59 | .alertChannel(Json.getAsString(config, Schema.Prop.ALERT_CHANNEL)) 60 | .teamChannel(Json.getAsString(config, Schema.Prop.TEAM_CHANNEL)) 61 | .complianceChannel(Json.getAsString(config, Schema.Prop.COMPLIANCE_CHANNEL)) 62 | .complianceOwner(Json.getAsString(config, Schema.Prop.COMPLIANCE_OWNER)); 63 | schemaList.add(builder.build()); 64 | } 65 | } 66 | }); 67 | } catch (IOException e) { 68 | throw new SchemaParserException("Error while parsing DbtManifestParser", e); 69 | } 70 | return schemaList; 71 | } 72 | 73 | public JsonObject getNodes(JsonElement jsonParser) { 74 | return Json.getAsJsonObject(jsonParser.getAsJsonObject(), "nodes") 75 | .orElseThrow(() -> new SchemaParserException("Error parsing DbtManifestParser: Nodes is empty")); 76 | } 77 | 78 | public JsonElement getManifestJsonParser(String path) 79 | throws IOException { 80 | Stream lines = Files.lines(Paths.get(path, MANIFEST_FILE)); 81 | String data = lines.collect(Collectors.joining("\n")); 82 | return JsonParser.parseString(data); 83 | } 84 | 85 | public String getResourceType(JsonObject node) { 86 | return Json.getAsString(node, "resource_type"); 87 | } 88 | 89 | public boolean isModel(JsonObject node) { 90 | return getResourceType(node).equalsIgnoreCase("model"); 91 | } 92 | 93 | public Optional getConfig(JsonObject node) { 94 | return Json.getAsJsonObject(node, "config"); 95 | } 96 | 97 | public List extractFields(JsonObject nodeObj, DbtCatalogMetadata.Catalog catalog) { 98 | var columns = Json.getAsJsonObject(nodeObj, "columns"); 99 | if (columns.isEmpty()) { 100 | return extractFieldsWithManifestMetadataNotDetected(catalog); 101 | } 102 | return enrichColumnMetadataWithManifest(catalog, columns.get()); 103 | } 104 | 105 | private List enrichColumnMetadataWithManifest(DbtCatalogMetadata.Catalog catalog, JsonObject columns) { 106 | List fieldList = new ArrayList<>(); 107 | catalog.column().forEach(column -> { 108 | var columnObj = columns.getAsJsonObject(column.name()); 109 | var builder = 110 | new Field.Builder(catalog.table().modelName(), column.name(), column.dataType(), true); 111 | 112 | if (isMetaObjExist(columnObj)) { 113 | var metaObj = columnObj.getAsJsonObject("meta"); 114 | builder.description(Json.getAsString(metaObj, Field.Prop.DESCRIPTION)) 115 | .link(getFieldLink(metaObj)) 116 | .depends(getDepends(metaObj)) 117 | .primaryKey(Json.getAsBoolean(metaObj, Field.Prop.IS_PRIMARY_KEY)); 118 | } 119 | fieldList.add(builder.build()); 120 | }); 121 | return fieldList; 122 | } 123 | 124 | public boolean isMetaObjExist(JsonObject columnObj) { 125 | return columnObj != null && columnObj.getAsJsonObject("meta") != null; 126 | } 127 | 128 | private Link getFieldLink(JsonObject obj) { 129 | var linkObj = Json.getAsJsonObject(obj, Field.Prop.LINK); 130 | 131 | if (linkObj.isEmpty()) { 132 | return null; 133 | } 134 | var linkPropObj = linkObj.get(); 135 | return new Link(Json.getAsString(linkPropObj, Field.Prop.MODEL), Json.getAsString(linkPropObj, Field.Prop.COLUMN)); 136 | } 137 | 138 | private List getDepends(JsonObject metaObj) { 139 | List dependsList = new ArrayList<>(); 140 | var obj = Json.getAsJsonArray(metaObj, Field.Prop.DEPENDS); 141 | if (obj.isEmpty()) { 142 | return dependsList; 143 | } 144 | for (JsonElement depends : obj.get()) { 145 | dependsList.add(new Depends(Json.getAsString(depends.getAsJsonObject(), Field.Prop.MODEL), 146 | Json.getAsString(depends.getAsJsonObject(), Field.Prop.COLUMN))); 147 | } 148 | return dependsList; 149 | } 150 | 151 | public List extractFieldsWithManifestMetadataNotDetected(DbtCatalogMetadata.Catalog catalog) { 152 | List fieldList = new ArrayList<>(); 153 | catalog.column().forEach(column -> { 154 | fieldList.add(new Field.Builder(catalog.table().modelName(), column.name(), column.dataType(), true) 155 | .build()); 156 | }); 157 | return fieldList; 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/dbt/DbtSchemaCompatibilityChecker.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import org.schemata.compatibility.Result; 4 | import org.schemata.compatibility.SchemaCompatibilityChecker; 5 | import org.schemata.compatibility.Summary; 6 | import org.schemata.domain.Schema; 7 | 8 | 9 | import java.util.*; 10 | 11 | /** 12 | * Compare the base schema with the change schema and return the incompatible changes. 13 | */ 14 | public class DbtSchemaCompatibilityChecker implements SchemaCompatibilityChecker { 15 | @Override 16 | public Result check(String baseSchemaPath, String changeSchemaPath) { 17 | var baseSchema = new DbtSchemaParser().getSchemaList(baseSchemaPath); 18 | var changeSchema = new DbtSchemaParser().getSchemaList(changeSchemaPath); 19 | var summaries = compare(buildSchemaMap(baseSchema), buildSchemaMap(changeSchema)); 20 | return new Result(summaries.size() == 0, summaries); 21 | } 22 | 23 | private Map buildSchemaMap(List schemaList) { 24 | Map schemaMap = new HashMap<>(); 25 | for (var schema : schemaList) { 26 | for (var field : schema.fieldList()) { 27 | schemaMap.put(new SchemaKey(schema.name(), field.name()), new SchemaValue(field.dataType())); 28 | } 29 | } 30 | return schemaMap; 31 | } 32 | 33 | /** 34 | * The current data type validation is a 'strict type' validation. It doesn't support `type boxing` support. 35 | * 36 | * @param base base schema value 37 | * @param change change schema value 38 | * @return Summary of Incompatible changes 39 | */ 40 | private Set compare(Map base, Map change) { 41 | 42 | for (var entry : change.entrySet()) { 43 | if (base.containsKey(entry.getKey()) 44 | && isDataTypeCompatible(base.get(entry.getKey()), entry.getValue())) { 45 | base.remove(entry.getKey()); 46 | } 47 | } 48 | if (base.size() > 0) { 49 | return getIncompatibleSchemaChanges(base); 50 | } 51 | return Set.of(); // return empty set 52 | } 53 | 54 | /** 55 | * The current data type validation is a 'strict type' validation. It doesn't support `type boxing` support. 56 | * We intend to enrich this compatibility check in the future. For example, `int32` and `int64` are compatible. 57 | * 58 | * @param baseValue base schema value 59 | * @param changeValue change schema value 60 | * @return true if compatible, false otherwise 61 | */ 62 | private boolean isDataTypeCompatible(SchemaValue baseValue, SchemaValue changeValue) { 63 | return baseValue.type.equalsIgnoreCase(changeValue.type); 64 | } 65 | 66 | /** 67 | * Loop through the base schema map and build a set of incompatible schema changes. 68 | * 69 | * @param base base schema map 70 | * @return set of incompatible schema changes 71 | */ 72 | private static Set getIncompatibleSchemaChanges(Map base) { 73 | Set summaries = new HashSet<>(); 74 | for (var entry : base.entrySet()) { 75 | var key = entry.getKey(); 76 | var value = entry.getValue(); 77 | summaries.add(new Summary.Builder().fieldName(key.fieldName) 78 | .schemaName(key.table) 79 | .fieldType(value.type).build()); 80 | } 81 | return summaries; 82 | } 83 | 84 | 85 | record SchemaKey(String table, String fieldName) { 86 | } 87 | 88 | record SchemaValue(String type) { 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/dbt/DbtSchemaParser.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import java.util.List; 4 | import org.schemata.domain.Schema; 5 | import org.schemata.provider.SchemaParser; 6 | 7 | 8 | /** 9 | * Parse the dbt generated catalog.json and manifest.json to gather metadata 10 | * 11 | * Steps: 12 | * ====== 13 | * 1. Parse the catalog.json and gather the list of models, columns and types & index 14 | * 2. Parse manifest.json for each model and gather additional metadata 15 | */ 16 | public class DbtSchemaParser implements SchemaParser { 17 | 18 | @Override 19 | public List getSchemaList(String path) { 20 | var dbtCatalogParser = new DbtCatalogParser(); 21 | var catalog = dbtCatalogParser.parse(path); 22 | return new DbtManifestParser().parse(catalog, path); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/protobuf/Loader.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | import com.google.protobuf.Descriptors; 4 | import com.google.protobuf.Descriptors.Descriptor; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * Abstracts loading of Descriptor objects from various sources 10 | */ 11 | public interface Loader { 12 | List loadDescriptors() throws Descriptors.DescriptorValidationException; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/protobuf/ProtoFileDescriptorSetLoader.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | import com.google.protobuf.DescriptorProtos; 4 | import com.google.protobuf.Descriptors; 5 | import com.google.protobuf.ExtensionRegistry; 6 | import org.jgrapht.graph.DirectedAcyclicGraph; 7 | import org.jgrapht.util.SupplierUtil; 8 | import org.schemata.schema.SchemataBuilder; 9 | import org.schemata.schema.SchemataConstraintsBuilder; 10 | import org.schemata.schema.SchemataSubscribersBuilder; 11 | 12 | import java.io.IOException; 13 | import java.io.InputStream; 14 | import java.util.Collection; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.stream.Collectors; 19 | 20 | /** 21 | * Loads message descriptors from DescriptorProtos.FileDescriptorSet provided directly or by InputStream 22 | */ 23 | public class ProtoFileDescriptorSetLoader implements Loader { 24 | 25 | private final DescriptorProtos.FileDescriptorSet descriptorSet; 26 | 27 | public ProtoFileDescriptorSetLoader(InputStream stream) throws IOException { 28 | var registry = ExtensionRegistry.newInstance(); 29 | SchemataBuilder.registerAllExtensions(registry); 30 | SchemataSubscribersBuilder.registerAllExtensions(registry); 31 | SchemataConstraintsBuilder.registerAllExtensions(registry); 32 | 33 | this.descriptorSet = DescriptorProtos.FileDescriptorSet.parseFrom(stream, registry); 34 | } 35 | 36 | @Override 37 | public List loadDescriptors() throws Descriptors.DescriptorValidationException {// we need to build a DAG of filenames so that we can build the Descriptors.Descriptor objects 38 | // we need to build a DAG of filenames that import each other, so that we can build the Descriptors.Descriptor 39 | // objects in the correct order, providing each one with a Descriptor for each file it imports 40 | var dependencyFilenames = buildFileDependencyGraph(descriptorSet); 41 | 42 | // we key the basic proto representations of the FileDescriptor by filename for simpler retrieval 43 | var fileDescriptorProtosByName = indexFileDescriptorProtoByFilename(descriptorSet); 44 | 45 | // these be the parsed FileDescriptor objects (again keyed by filename) so that they can be passed back into 46 | // the instantiation of any other files that import them 47 | var descriptors = new HashMap(); 48 | 49 | // forEach for the DAG is executed in topological sort order so that we first create the 50 | // FileDescriptors for the leaves in the dependency graph, working backwards to the roots 51 | for (String filename : dependencyFilenames) { 52 | var file = fileDescriptorProtosByName.get(filename); 53 | var dependenciesForFile = file 54 | .getDependencyList() 55 | .stream() 56 | .map(descriptors::get) 57 | .toArray(Descriptors.FileDescriptor[]::new); 58 | 59 | var descriptor = Descriptors.FileDescriptor.buildFrom(file, dependenciesForFile); 60 | descriptors.put(filename, descriptor); 61 | } 62 | 63 | // lastly, we collect out each of the message Descriptor objects from the FileDescriptors into a flat list 64 | return collectAllMessageDescriptors(descriptors.values()); 65 | } 66 | 67 | private DirectedAcyclicGraph buildFileDependencyGraph(DescriptorProtos.FileDescriptorSet descriptorSet) { 68 | var dependencyFilenames = new DirectedAcyclicGraph<>( 69 | SupplierUtil.createSupplier(String.class), 70 | SupplierUtil.createSupplier(String.class), false); 71 | 72 | // populate the graph 73 | for (var fileDescriptorProto : descriptorSet.getFileList()) { 74 | dependencyFilenames.addVertex(fileDescriptorProto.getName()); 75 | for (String dependency : fileDescriptorProto.getDependencyList()) {// adding a vertex is idempotent 76 | dependencyFilenames.addVertex(dependency); 77 | // dependencies point to the file that imports them rather than the other way round 78 | // to ensure we can traverse the graph in the correct order (depth first) 79 | dependencyFilenames.addEdge(dependency, fileDescriptorProto.getName()); 80 | } 81 | } 82 | return dependencyFilenames; 83 | } 84 | 85 | public Map indexFileDescriptorProtoByFilename 86 | (DescriptorProtos.FileDescriptorSet descriptorSet) { 87 | return descriptorSet 88 | .getFileList() 89 | .stream() 90 | .collect(Collectors.toMap(DescriptorProtos.FileDescriptorProto::getName, file -> file)); 91 | } 92 | 93 | public DescriptorProtos.FileDescriptorSet getDescriptorSet() { 94 | return descriptorSet; 95 | } 96 | 97 | private List collectAllMessageDescriptors 98 | (Collection descriptors) { 99 | return descriptors 100 | .stream() 101 | .flatMap(fileDescriptor -> fileDescriptor.getMessageTypes().stream()) 102 | .toList(); 103 | } 104 | 105 | } -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/protobuf/ProtoProcessor.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | import com.google.protobuf.Descriptors; 4 | import com.google.protobuf.Value; 5 | import org.schemata.domain.Constraints; 6 | import org.schemata.domain.Field; 7 | import org.schemata.domain.Schema; 8 | import org.schemata.domain.Subscribers; 9 | import org.schemata.schema.SchemataBuilder; 10 | import org.schemata.schema.SchemataConstraintsBuilder; 11 | import org.schemata.schema.SchemataSubscribersBuilder; 12 | 13 | 14 | import java.util.*; 15 | import java.util.stream.Collectors; 16 | 17 | 18 | public class ProtoProcessor { 19 | 20 | private static final Set INCLUDED_PRIMITIVE_TYPES = Set.of("google.protobuf.Timestamp"); 21 | 22 | public List parse(List descriptors) { 23 | return descriptors 24 | .stream() 25 | .filter(this::isAnnotated) 26 | .map(this::parseSingleSchema) 27 | .toList(); 28 | } 29 | 30 | public Schema parseSingleSchema(Descriptors.Descriptor descriptor) { 31 | String schemaName = descriptor.getFullName(); 32 | // Extract all the metadata for the fieldList 33 | var fieldList = extractFields(descriptor.getFields(), schemaName); 34 | return extractSchema(descriptor, descriptor.getFullName(), fieldList); 35 | } 36 | 37 | public Schema extractSchema(Descriptors.Descriptor descriptorType, String schema, List fieldList) { 38 | Schema.Builder builder = new Schema.Builder(schema, fieldList); 39 | for (Map.Entry entry : descriptorType.getOptions().getAllFields().entrySet()) { 40 | switch (entry.getKey().getName()) { 41 | case "message_core" -> { 42 | SchemataBuilder.CoreMetadata coreMetadata = (SchemataBuilder.CoreMetadata) entry.getValue(); 43 | builder.description(coreMetadata.getDescription()); 44 | builder.comment(coreMetadata.getComment()); 45 | builder.seeAlso(coreMetadata.getSeeAlso()); 46 | builder.reference(coreMetadata.getReference()); 47 | } 48 | case "owner" -> builder.owner(Objects.toString(entry.getValue(), "")); 49 | case "domain" -> builder.domain(Objects.toString(entry.getValue(), "")); 50 | case "schema_type" -> builder.schemaType(entry.getValue().toString()); 51 | case "event_type" -> builder.eventType(entry.getValue().toString()); 52 | case "status" -> builder.status(Objects.toString(entry.getValue(), "")); 53 | case "team_channel" -> builder.teamChannel(Objects.toString(entry.getValue(), "")); 54 | case "alert_channel" -> builder.alertChannel(Objects.toString(entry.getValue(), "")); 55 | case "compliance_owner" -> builder.complianceOwner(Objects.toString(entry.getValue(), "")); 56 | case "compliance_channel" -> builder.complianceChannel(Objects.toString(entry.getValue(), "")); 57 | case "downstream" -> builder.downstreamSubscribersList(extractDownstreamConsumers(entry)); 58 | case "upstream" -> builder.upstreamSubscribersList(extractUpstreamConsumers(entry)); 59 | case "constraints" -> builder.constraintsList(extractConstraintsList(entry)); 60 | } 61 | } 62 | return builder.build(); 63 | } 64 | 65 | private static List extractConstraintsList(Map.Entry entry) { 66 | SchemataConstraintsBuilder.Constraints constraints = (SchemataConstraintsBuilder.Constraints) entry.getValue(); 67 | List constraintsList = new ArrayList<>(); 68 | for (var constraint : constraints.getConstraintList()) { 69 | Map constraintMap = processConstraintConfig(constraint); 70 | constraintsList.add(new Constraints(constraint.getName(), constraint.getDescription(), constraintMap)); 71 | } 72 | return constraintsList; 73 | } 74 | 75 | private static Map processConstraintConfig(SchemataConstraintsBuilder.Constraint constraint) { 76 | Map constraintMap = new HashMap<>(); 77 | for (String key : constraint.getConfigMap().keySet()) { 78 | var value = constraint.getConfigMap().get(key); 79 | if (value.hasListValue()) { 80 | var listValueConstraints = value.getListValue().getValuesList().stream() 81 | .map(Value::getStringValue) 82 | .collect(Collectors.toList()); 83 | constraintMap.put(key, (Constraints.Constraint.listConstraints(key, listValueConstraints, 84 | Constraints.DataType.LIST))); 85 | } else { 86 | var dataType = Constraints.DataType.fromString(value.getKindCase().name()); 87 | constraintMap.put(key, (Constraints.Constraint.primitiveConstraints(key, getConstraintValue(value), 88 | dataType))); 89 | } 90 | } 91 | return constraintMap; 92 | } 93 | 94 | private static Object getConstraintValue(Value value) { 95 | return switch (value.getKindCase()) { 96 | case NUMBER_VALUE -> value.getNumberValue(); 97 | case STRING_VALUE -> value.getStringValue(); 98 | case BOOL_VALUE -> value.getBoolValue(); 99 | default -> null; 100 | }; 101 | } 102 | 103 | 104 | private static List extractUpstreamConsumers(Map.Entry entry) { 105 | SchemataSubscribersBuilder.Upstream upstream = (SchemataSubscribersBuilder.Upstream) entry.getValue(); 106 | return upstream 107 | .getSubscribersList() 108 | .stream() 109 | .map(subscribe -> new Subscribers(subscribe.getName(), subscribe.getUsage())) 110 | .collect(Collectors.toList()); 111 | } 112 | 113 | private static List extractDownstreamConsumers(Map.Entry entry) { 114 | SchemataSubscribersBuilder.Downstream downstream = (SchemataSubscribersBuilder.Downstream) entry.getValue(); 115 | return downstream 116 | .getSubscribersList() 117 | .stream() 118 | .map(subscribe -> new Subscribers(subscribe.getName(), subscribe.getUsage())) 119 | .collect(Collectors.toList()); 120 | } 121 | 122 | public List extractFields(List fieldDescriptorList, String schema) { 123 | List fields = new ArrayList<>(); 124 | 125 | for (Descriptors.FieldDescriptor entry : fieldDescriptorList) { 126 | String type = entry.getType() == Descriptors.FieldDescriptor.Type.MESSAGE ? entry.getMessageType().getFullName() 127 | : entry.getType().name(); 128 | Field.Builder builder = new Field.Builder(schema, entry.getName(), type, isPrimitiveType(entry.getType(), type)); 129 | 130 | for (Map.Entry fieldEntry : entry.getOptions().getAllFields().entrySet()) { 131 | switch (fieldEntry.getKey().getName()) { 132 | case "field_core" -> { 133 | SchemataBuilder.CoreMetadata coreMetadata = (SchemataBuilder.CoreMetadata) fieldEntry.getValue(); 134 | builder.description(coreMetadata.getDescription()); 135 | builder.comment(coreMetadata.getComment()); 136 | builder.seeAlso(coreMetadata.getSeeAlso()); 137 | builder.reference(coreMetadata.getReference()); 138 | } 139 | case "is_classified" -> builder.isClassified( 140 | fieldEntry.getValue() != null && Boolean.parseBoolean(fieldEntry.getValue().toString())); 141 | case "classification_level" -> 142 | builder.classificationLevel(Objects.toString(fieldEntry.getValue(), "")); 143 | case "product_type" -> builder.productType(Objects.toString(fieldEntry.getValue(), "")); 144 | case "is_primary_key" -> builder.primaryKey( 145 | fieldEntry.getValue() != null && Boolean.parseBoolean(fieldEntry.getValue().toString())); 146 | } 147 | } 148 | fields.add(builder.build()); 149 | } 150 | 151 | return fields; 152 | } 153 | 154 | private boolean isAnnotated(Descriptors.Descriptor descriptor) { 155 | return !descriptor.getOptions().getExtension(org.schemata.schema.SchemataBuilder.schemaType) 156 | .equals(SchemataBuilder.SchemaType.UNKNOWN); 157 | } 158 | 159 | private boolean isPrimitiveType(Descriptors.FieldDescriptor.Type type, String typeName) { 160 | return type != Descriptors.FieldDescriptor.Type.MESSAGE || INCLUDED_PRIMITIVE_TYPES.contains(typeName); 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/protobuf/ProtoSchemaCompatibilityChecker.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | 4 | import com.google.protobuf.DescriptorProtos; 5 | import org.schemata.compatibility.Result; 6 | import org.schemata.compatibility.SchemaCompatibilityChecker; 7 | import org.schemata.compatibility.Summary; 8 | 9 | import java.io.File; 10 | import java.io.FileInputStream; 11 | import java.util.*; 12 | 13 | 14 | public class ProtoSchemaCompatibilityChecker implements SchemaCompatibilityChecker { 15 | 16 | 17 | static Set EXCLUDED_FILES = Set.of("protobuf/schemata.proto", 18 | "protobuf/constraints.proto", "protobuf/struct.proto", "protobuf/timestamp.proto", 19 | "protobuf/descriptor.proto", "protobuf/subscribers.proto", "protobuf/empty.proto"); 20 | 21 | 22 | static Map COMPATIBLE_TYPES = Map.of("TYPE_INT32", "TYPE_INT64", "TYPE_UINT32", "TYPE_UINT64"); 23 | 24 | @Override 25 | public Result check(String baseSchemaPath, String changeSchemaPath) { 26 | var baseSchemaMap = getSchemaMap(baseSchemaPath); 27 | var changeSchemaMap = getSchemaMap(changeSchemaPath); 28 | var summaries = compare(baseSchemaMap, changeSchemaMap); 29 | return new Result(summaries.size() == 0, summaries); 30 | } 31 | 32 | private Set compare(Map base, Map change) { 33 | for (var changeEntry : change.entrySet()) { 34 | if (base.containsKey(changeEntry.getKey()) 35 | && isDataTypeCompatible(base.get(changeEntry.getKey()), changeEntry.getValue())) { 36 | base.remove(changeEntry.getKey()); 37 | } 38 | } 39 | if (base.size() > 0) { 40 | return getIncompatibleSchemaChanges(base); 41 | } 42 | return Set.of(); // return empty set 43 | } 44 | 45 | private static Set getIncompatibleSchemaChanges(Map base) { 46 | Set summaries = new HashSet<>(); 47 | for (var entry : base.entrySet()) { 48 | var key = entry.getKey(); 49 | var value = entry.getValue(); 50 | summaries.add(new Summary.Builder().filename(key.filename) 51 | .schemaName(key.messageName) 52 | .fieldName(key.fieldName) 53 | .fieldType(value.type) 54 | .build()); 55 | summaries.add(new Summary(key.filename, key.messageName(), key.fieldName(), value.type())); 56 | } 57 | return summaries; 58 | } 59 | 60 | private static boolean isDataTypeCompatible(SchemaValue baseValue, SchemaValue changeValue) { 61 | return baseValue.type.equalsIgnoreCase(changeValue.type) || 62 | changeValue.type.equalsIgnoreCase(COMPATIBLE_TYPES.get(baseValue.type.toUpperCase())); 63 | } 64 | 65 | private Map getSchemaMap(String path) { 66 | Map schemaValueMap = new HashMap<>(); 67 | 68 | try (FileInputStream stream = new FileInputStream(path)) { 69 | ProtoFileDescriptorSetLoader loader = new ProtoFileDescriptorSetLoader(stream); 70 | var fileDescriptorProtoMap = loader.indexFileDescriptorProtoByFilename(loader.getDescriptorSet()); 71 | 72 | for (var entry : fileDescriptorProtoMap.entrySet()) { 73 | var descriptor = entry.getValue(); 74 | String filename = getLeafDirectoryAndFileName(descriptor.getName()); 75 | if (EXCLUDED_FILES.contains(filename)) { 76 | continue; 77 | } 78 | processDescriptorMessages(schemaValueMap, descriptor, filename); 79 | } 80 | } catch (Exception e) { 81 | throw new RuntimeException(e); 82 | } 83 | 84 | return schemaValueMap; 85 | } 86 | 87 | private static void processDescriptorMessages(Map schemaValueMap, 88 | DescriptorProtos.FileDescriptorProto descriptor, String filename) { 89 | for (var message : descriptor.getMessageTypeList()) { 90 | for (var field : message.getFieldList()) { 91 | var key = new SchemaKey(filename, message.getName(), field.getNumber(), field.getName()); 92 | var value = new SchemaValue(field.getType().name()); 93 | schemaValueMap.put(key, value); 94 | } 95 | } 96 | } 97 | 98 | 99 | private static String getLeafDirectoryAndFileName(String filePath) { 100 | File file = new File(filePath); 101 | String parent = file.getParent(); 102 | String fileName = file.getName(); 103 | if (parent != null && parent.trim().length() > 0) { 104 | var dirSplit = parent.split("[\\\\/]"); 105 | if (dirSplit.length > 0) { 106 | return dirSplit[dirSplit.length - 1] + File.separator + fileName; 107 | } 108 | } 109 | return fileName; 110 | } 111 | 112 | 113 | record SchemaKey(String filename, String messageName, int fieldNumber, String fieldName) { 114 | } 115 | 116 | record SchemaValue(String type) { 117 | } 118 | 119 | 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/provider/protobuf/ProtoSchemaParser.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | import com.google.protobuf.Descriptors; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.util.List; 7 | import org.schemata.domain.Schema; 8 | import org.schemata.exception.SchemaParserException; 9 | import org.schemata.provider.SchemaParser; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | 14 | /** 15 | * Parse .desc proto descriptor file 16 | */ 17 | public class ProtoSchemaParser implements SchemaParser { 18 | 19 | private static final Logger logger = LoggerFactory.getLogger(ProtoSchemaParser.class); 20 | 21 | @Override 22 | public List getSchemaList(String path) 23 | throws SchemaParserException { 24 | try { 25 | var stream = new FileInputStream(path); 26 | var loader = new ProtoFileDescriptorSetLoader(stream); 27 | var descriptors = loader.loadDescriptors(); 28 | return new ProtoProcessor().parse(descriptors); 29 | } catch (IOException | Descriptors.DescriptorValidationException e) { 30 | logger.error("Error finding file:", e); 31 | throw new SchemaParserException("Error parsing Proto Schema", e); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/FieldTrigger.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.function.Predicate; 4 | import org.apache.commons.lang3.StringUtils; 5 | import org.schemata.domain.Field; 6 | 7 | 8 | public interface FieldTrigger extends Predicate { 9 | 10 | FieldTrigger isDescriptionEmpty = field -> StringUtils.isBlank(field.description()); 11 | FieldTrigger isClassificationLevelEmpty = 12 | field -> field.isClassified() && StringUtils.isBlank(field.classificationLevel()); 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/FieldValidator.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | import java.util.function.Function; 7 | import org.schemata.domain.Field; 8 | 9 | import static org.schemata.validate.FieldTrigger.isClassificationLevelEmpty; 10 | import static org.schemata.validate.FieldTrigger.isDescriptionEmpty; 11 | 12 | 13 | public class FieldValidator implements Function, Validator { 14 | @Override 15 | public Result apply(Field schema) { 16 | 17 | List errors = new ArrayList<>(); 18 | for (Map.Entry ruleTrigger : fieldValidatorMap().entrySet()) { 19 | var result = test(ruleTrigger.getKey(), ruleTrigger.getValue(), schema); 20 | result.ifPresent(errors::add); 21 | } 22 | return errors.size() == 0 ? new Result(Status.SUCCESS, errors) : new Result(Status.ERROR, errors); 23 | } 24 | 25 | private Map fieldValidatorMap() { 26 | return Map.of(Rules.FIELD_DESCRIPTION_EMPTY, isDescriptionEmpty, Rules.FIELD_CLASSIFICATION_EMPTY, 27 | isClassificationLevelEmpty); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/Result.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.List; 4 | 5 | 6 | public record Result(Status status, List errorMessages) { 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/Rules.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | public enum Rules { 4 | SCHEMA_DESCRIPTION_EMPTY("Schema description metadata is null or empty"), 5 | SCHEMA_DOMAIN_EMPTY("Schema domain metadata is null or empty"), 6 | SCHEMA_OWNER_EMPTY("Schema owner metadata is null or empty"), 7 | SCHEMA_UNKNOWN_TYPE("UNKNOWN is not a valid type. It should be either ENTITY or EVENT"), 8 | FIELD_DESCRIPTION_EMPTY("Field description metadata is null or empty"), 9 | FIELD_CLASSIFICATION_EMPTY("The field marked as classified, but the classification level is missing"); 10 | 11 | public final String errorMessage; 12 | 13 | Rules(String ruleMessage) { 14 | this.errorMessage = ruleMessage; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/SchemaTrigger.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.function.Predicate; 4 | import org.apache.commons.lang3.StringUtils; 5 | import org.schemata.domain.Field; 6 | import org.schemata.domain.Schema; 7 | import org.schemata.domain.SchemaType; 8 | 9 | 10 | public interface SchemaTrigger extends Predicate { 11 | 12 | SchemaTrigger isDescriptionEmpty = schema -> StringUtils.isBlank(schema.description()); 13 | 14 | SchemaTrigger isOwnerEmpty = schema -> StringUtils.isBlank(schema.owner()); 15 | 16 | SchemaTrigger isDomainEmpty = schema -> StringUtils.isBlank(schema.domain()); 17 | 18 | SchemaTrigger isInValidType = schema -> SchemaType.UNKNOWN.name().equalsIgnoreCase(schema.type()); 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/SchemaValidator.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | import java.util.function.Function; 7 | import org.schemata.domain.Schema; 8 | 9 | import static org.schemata.validate.SchemaTrigger.*; 10 | 11 | 12 | public class SchemaValidator implements Function, Validator { 13 | 14 | @Override 15 | public Result apply(Schema schema) { 16 | 17 | List errors = new ArrayList<>(); 18 | for (Map.Entry ruleTrigger : schemaValidatorMap().entrySet()) { 19 | var result = test(ruleTrigger.getKey(), ruleTrigger.getValue(), schema); 20 | result.ifPresent(errors::add); 21 | } 22 | 23 | return errors.size() == 0 ? new Result(Status.SUCCESS, errors) : new Result(Status.ERROR, errors); 24 | } 25 | 26 | private Map schemaValidatorMap() { 27 | return Map.of(Rules.SCHEMA_DESCRIPTION_EMPTY, isDescriptionEmpty, Rules.SCHEMA_OWNER_EMPTY, isOwnerEmpty, 28 | Rules.SCHEMA_DOMAIN_EMPTY, isDomainEmpty, Rules.SCHEMA_UNKNOWN_TYPE, isInValidType); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/Status.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | public enum Status { 4 | SUCCESS, ERROR 5 | } 6 | -------------------------------------------------------------------------------- /src/main/java/org/schemata/validate/Validator.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.Optional; 4 | import java.util.function.Predicate; 5 | 6 | 7 | public interface Validator { 8 | 9 | default Optional test(Rules rule, Predicate predicate, T t) { 10 | return predicate.test(t) ? Optional.of(rule.errorMessage) : Optional.empty(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/resources/avro/brand.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "org.schemata.schema", 2 | "type": "record", 3 | "name": "Brand", 4 | "fields": [ 5 | {"name": "id", "type": "int"}, 6 | {"name": "name", "type": ["string", "null"]}, 7 | {"name": "is_active", "type": "boolean"} 8 | ] 9 | } -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | # Console appender 3 | 4 | 5 | # Pattern of log message for console appender 6 | %d{yyyy-MM-dd HH:mm:ss} %-5p %m%n 7 | 8 | 9 | 10 | # File appender 11 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/main/resources/schema/brand.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package org.schemata.schema; 3 | 4 | import "schemata/protobuf/schemata.proto"; 5 | 6 | option java_package = "org.protocol.schema"; 7 | option java_outer_classname = "BrandBuilder"; 8 | 9 | 10 | message Brand { 11 | 12 | option(message_core).description = "This is the description of the Brand table"; 13 | option(message_core).comment = "The comment added after thought"; 14 | option(message_core).see_also = "db.brand MySQL table"; 15 | option(owner) = "Platform"; 16 | option(domain) = "Core"; 17 | option(schema_type) = ENTITY; 18 | option(team_channel) = "#team-platform"; 19 | option(alert_channel) = "#alerts-platform"; 20 | 21 | int32 id = 1 22 | [(field_core).description = "Unique identifier for Brand", (is_primary_key) = true]; 23 | 24 | string name = 2 25 | [(field_core).description = "Name of the Brand"]; 26 | 27 | bool is_active = 3 28 | [(field_core).description = "define the active status of the Brand. `true` == active; `false` = inactive`", (field_core).comment = "should refactor to non-binary status"]; 29 | 30 | } 31 | 32 | message BrandEvent { 33 | option(message_core).description = "This is the description of the brand activity table"; 34 | option(owner) = "Platform"; 35 | option(domain) = "Core"; 36 | option(schema_type) = EVENT; 37 | option(event_type) = LIFECYCLE; 38 | option(team_channel) = "#team-platform"; 39 | option(alert_channel) = "#alerts-platform"; 40 | 41 | Brand previous_brand_state = 1 42 | [(field_core).description = "Previous version of the Brand entity before the mutation"]; 43 | 44 | Brand current_brand_state = 2 45 | [(field_core).description = "Current version of the Brand entity before the mutation"]; 46 | 47 | ActivityType activity_type = 3 48 | [(field_core).description = "Lifecycle event type for the Brand table"]; 49 | } -------------------------------------------------------------------------------- /src/main/resources/schema/campaign.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package org.schemata.schema; 3 | 4 | import "schemata/protobuf/schemata.proto"; 5 | import "product.proto"; 6 | import "google/protobuf/timestamp.proto"; 7 | import "category.proto"; 8 | 9 | option java_package = "org.protocol.schema"; 10 | option java_outer_classname = "CampaignBuilder"; 11 | 12 | 13 | enum CampaignOrigin { 14 | EMAIL = 0; 15 | SOCIAL_MEDIA = 1; 16 | } 17 | 18 | message Campaign { 19 | 20 | option(message_core).description = "This is the description of the Campaign table"; 21 | option(message_core).comment = "The comment added after thought"; 22 | option(message_core).see_also = "db.campaign MySQL table"; 23 | option(owner) = "Marketing"; 24 | option(domain) = "Growth"; 25 | option(schema_type) = ENTITY; 26 | option(team_channel) = "#team-growth"; 27 | option(alert_channel) = "#alerts-growth"; 28 | 29 | int32 id = 1 30 | [(field_core).description = "Unique identifier for Campaign", (is_primary_key) = true]; 31 | 32 | string name = 2 33 | [(field_core).description = "Name of the Campaign"]; 34 | 35 | bool is_active = 3 36 | [(field_core).description = "define the active status of the Campaign. `true` == active; `false` = inactive`", (field_core).comment = "should refactor to non-binary status"]; 37 | 38 | } 39 | 40 | message CampaignEvent { 41 | option(message_core).description = "This is the description of the Campaign activity table"; 42 | option(owner) = "Marketing"; 43 | option(domain) = "Growth"; 44 | option(schema_type) = EVENT; 45 | option(event_type) = LIFECYCLE; 46 | option(team_channel) = "#team-growth"; 47 | option(alert_channel) = "#alerts-growth"; 48 | 49 | Campaign previous_campaign_state = 1 50 | [(field_core).description = "Previous version of the Campaign entity before the mutation"]; 51 | 52 | Campaign current_campaign_state = 2 53 | [(field_core).description = "Current version of the Campaign entity before the mutation"]; 54 | 55 | ActivityType activity_type = 3 56 | [(field_core).description = "Lifecycle event type for the Campaign table"]; 57 | } 58 | 59 | message CampaignCategoryTrackerEvent { 60 | option(message_core).description = "This is the description of the Campaign activity table"; 61 | option(owner) = "Marketing"; 62 | option(domain) = "Growth"; 63 | option(schema_type) = EVENT; 64 | option(event_type) = ACTIVITY; 65 | option(team_channel) = "#team-growth"; 66 | option(alert_channel) = "#alerts-growth"; 67 | 68 | Campaign campaign = 1 [(field_core).description = "Campaign entity"]; 69 | Category category = 2 [(field_core).description = "Category of the targeted campaign"]; 70 | CampaignOrigin origin = 3 [(field_core).description = "origin source of the campaign"] ; 71 | google.protobuf.Timestamp timestamp = 4 [(field_core).description = "Timestamp of the activity"]; 72 | 73 | } 74 | 75 | message CampaignProductTrackerEvent { 76 | 77 | option(message_core).description = "This is the description of the Campaign activity table"; 78 | option(owner) = "Marketing"; 79 | option(domain) = "Growth"; 80 | option(schema_type) = EVENT; 81 | option(event_type) = ACTIVITY; 82 | option(team_channel) = "#team-growth"; 83 | option(alert_channel) = "#alerts-growth"; 84 | 85 | Campaign campaign = 1 [(field_core).description = "Campaign entity"]; 86 | Product product = 2 [(field_core).description = "Product of the targeted campaign"]; 87 | CampaignOrigin origin = 3 [(field_core).description = "origin source of the campaign"] ; 88 | google.protobuf.Timestamp timestamp = 4 [(field_core).description = "Timestamp of the activity"]; 89 | 90 | } -------------------------------------------------------------------------------- /src/main/resources/schema/category.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package org.schemata.schema; 3 | 4 | import "schemata/protobuf/schemata.proto"; 5 | 6 | option java_package = "org.protocol.schema"; 7 | option java_outer_classname = "CategoryBuilder"; 8 | 9 | 10 | message Category { 11 | 12 | option(message_core).description = "This is the description of the Category table"; 13 | option(message_core).comment = "The comment added after thought"; 14 | option(message_core).see_also = "db.category MySQL table"; 15 | option(owner) = "Platform"; 16 | option(domain) = "Core"; 17 | option(schema_type) = ENTITY; 18 | option(team_channel) = "#team-platform"; 19 | option(alert_channel) = "#alerts-platform"; 20 | 21 | int32 id = 1 22 | [(field_core).description = "Unique identifier for Category", (is_primary_key) = true]; 23 | 24 | string name = 2 25 | [(field_core).description = "Name of the Category"]; 26 | 27 | bool is_active = 3 28 | [(field_core).description = "define the active status of the Category. `true` == active; `false` = inactive`", (field_core).comment = "should refactor to non-binary status"]; 29 | 30 | } 31 | 32 | message CategoryEvent { 33 | option(message_core).description = "This is the description of the Category activity table"; 34 | option(owner) = "Platform"; 35 | option(domain) = "Core"; 36 | option(schema_type) = EVENT; 37 | option(event_type) = LIFECYCLE; 38 | option(team_channel) = "#team-platform"; 39 | option(alert_channel) = "#alerts-platform"; 40 | 41 | Category previous_category_state = 1 42 | [(field_core).description = "Previous version of the Category entity before the mutation"]; 43 | 44 | Category current_category_state = 2 45 | [(field_core).description = "Current version of the Category entity before the mutation"]; 46 | 47 | ActivityType activity_type = 3 48 | [(field_core).description = "Lifecycle event type for the Category table"]; 49 | } -------------------------------------------------------------------------------- /src/main/resources/schema/product.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package org.schemata.schema; 3 | 4 | import "schemata/protobuf/schemata.proto"; 5 | import "category.proto"; 6 | import "brand.proto"; 7 | 8 | option java_package = "org.protocol.schema"; 9 | option java_outer_classname = "ProductBuilder"; 10 | 11 | 12 | message Product { 13 | 14 | option(message_core).description = "This is the description of the Product table"; 15 | option(message_core).comment = "The comment added after thought"; 16 | option(message_core).see_also = "db.product MySQL table"; 17 | option(owner) = "Platform"; 18 | option(domain) = "Core"; 19 | option(schema_type) = ENTITY; 20 | option(team_channel) = "#team-platform"; 21 | option(alert_channel) = "#alerts-platform"; 22 | 23 | int32 id = 1 24 | [(field_core).description = "Unique identifier for Product", (is_primary_key) = true]; 25 | 26 | string name = 2 27 | [(field_core).description = "Name of the Product"]; 28 | 29 | Category category = 3 30 | [(field_core).description = "Category name of the product"]; 31 | 32 | Brand brand = 4 33 | [(field_core).description = "Brand name of the product"]; 34 | 35 | bool is_active = 5 36 | [(field_core).description = "define the active status of the Product. `true` == active; `false` = inactive`", (field_core).comment = "should refactor to non-binary status"]; 37 | 38 | } 39 | 40 | message ProductEvent { 41 | option(message_core).description = "This is the description of the Product activity table"; 42 | option(owner) = "Platform"; 43 | option(domain) = "Core"; 44 | option(schema_type) = EVENT; 45 | option(event_type) = LIFECYCLE; 46 | option(team_channel) = "#team-platform"; 47 | option(alert_channel) = "#alerts-platform"; 48 | 49 | Product previous_brand_state = 1 50 | [(field_core).description = "Previous version of the Product entity before the mutation"]; 51 | 52 | Product current_brand_state = 2 53 | [(field_core).description = "Current version of the Product entity before the mutation"]; 54 | 55 | ActivityType activity_type = 3 56 | [(field_core).description = "Lifecycle event type for the Product table"]; 57 | } 58 | -------------------------------------------------------------------------------- /src/main/resources/schema/user.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package org.schemata.schema; 3 | 4 | import "schemata/protobuf/schemata.proto"; 5 | import "schemata/protobuf/subscribers.proto"; 6 | import "schemata/protobuf/constraints.proto"; 7 | import "product.proto"; 8 | import "google/protobuf/timestamp.proto"; 9 | 10 | 11 | option java_package = "org.protocol.schema"; 12 | option java_outer_classname = "UserBuilder"; 13 | 14 | enum UserActivityType { 15 | VIEW = 0; 16 | READ_REVIEW = 1; 17 | VIEW_DESCRIPTION = 2; 18 | } 19 | 20 | message User { 21 | 22 | option(message_core).description = "This is the description of the users table"; 23 | option(message_core).comment = "The comment added after thought"; 24 | option(message_core).see_also = "db.user MySQL table"; 25 | option(owner) = "Platform"; 26 | option(domain) = "Core"; 27 | option(schema_type) = ENTITY; 28 | option(team_channel) = "#team-platform"; 29 | option(alert_channel) = "#alerts-platform"; 30 | 31 | option (upstream) = { 32 | subscribers: {name: "Upstream Team A" usage: "Upstream Usage A"} 33 | subscribers: {name: "Upstream Team B" usage: "Upstream Usage B"} 34 | }; 35 | 36 | option (downstream) = { 37 | subscribers: {name: "Downstream Team A" usage: "Downstream Usage A"} 38 | subscribers: {name: "Downstream Team B" usage: "Downstream Usage B"} 39 | }; 40 | 41 | option (constraints) = { 42 | 43 | constraint: [{ 44 | name: "age range", 45 | description: "age range constraint", 46 | config: { 47 | key: "column", 48 | value: {string_value: "age"} 49 | }, 50 | config: { 51 | key: "class_name", 52 | value: {string_value: "great_expectations.core.validation_operators.expect_column_values_to_be_between"} 53 | }, 54 | config: { 55 | key: "min_value", 56 | value: {number_value: 18} 57 | }, 58 | config: { 59 | key: "max_value", 60 | value: {number_value: 150} 61 | }, 62 | config: { 63 | key: "mostly", 64 | value: {number_value: 0.95} 65 | } 66 | }], 67 | constraint: [{ 68 | name: "Timezone Constraint", 69 | description: "Timezone should be either EST or PST", 70 | config: { 71 | key: "column", 72 | value: {string_value: "timezone"} 73 | }, 74 | config: { 75 | key: "class_name", 76 | value: {string_value: "expect_column_values_to_be_in_set"} 77 | }, 78 | config: { 79 | key: "value_set", 80 | value: { 81 | list_value: { 82 | values: {string_value: "EST"} 83 | values: {string_value: "PST"} 84 | } 85 | } 86 | } 87 | }] 88 | }; 89 | 90 | int32 id = 1 91 | [(field_core).description = "Unique identifier for User", (is_primary_key) = true]; 92 | 93 | string name = 2 94 | [(field_core).description = "Name of the user"] ; 95 | 96 | string email = 3 97 | [(field_core).description = "email id for the user", (product_type) = "username", (is_classified) = true, (classification_level) = "LEVEL1"] ; 98 | 99 | bool is_active = 4 100 | [(field_core).description = "define the active status of the user. `true` == active; `false` = inactive`", (field_core).comment = "should refactor to non-binary status"]; 101 | 102 | string timezone = 5 103 | [(field_core).description = "preferred time zone for the user"] ; 104 | 105 | string age = 6 106 | [(field_core).description = "Age of the user"]; 107 | } 108 | 109 | message UserEvent { 110 | option(message_core).description = "This is the description of the users table"; 111 | option(owner) = "Platform"; 112 | option(domain) = "Core"; 113 | option(schema_type) = EVENT; 114 | option(event_type) = LIFECYCLE; 115 | option(team_channel) = "#team-platform"; 116 | option(alert_channel) = "#alerts-platform"; 117 | 118 | User previous_user_state = 1 119 | [(field_core).description = "Previous version of the user entity before the mutation"]; 120 | 121 | User current_user_state = 2 122 | [(field_core).description = "Current version of the user entity before the mutation"]; 123 | 124 | ActivityType activity_type = 3 125 | [(field_core).description = "Lifecycle event type for the Users table"]; 126 | 127 | google.protobuf.Timestamp timestamp = 4 [(field_core).description = "Timestamp of the activity"]; 128 | } 129 | 130 | message UserActivityEvent { 131 | option(message_core).description = "This is the description of the users table"; 132 | option(owner) = "Product"; 133 | option(domain) = "Growth"; 134 | option(schema_type) = EVENT; 135 | option(event_type) = ACTIVITY; 136 | option(team_channel) = "#team-growth"; 137 | option(alert_channel) = "#alerts-growth"; 138 | User user = 1 [(field_core).description = "User entity reference"]; 139 | Product product = 2 [(field_core).description = "Product entity reference"]; 140 | UserActivityType activity_type = 3 [(field_core).description = "Type of the user activity"]; 141 | google.protobuf.Timestamp timestamp = 4 [(field_core).description = "Timestamp of the activity"]; 142 | } 143 | 144 | message UserActivityAggregate { 145 | 146 | option(message_core).description = "This is the aggregated user activity view count. The event aggregated by user & product"; 147 | option(owner) = "Product"; 148 | option(domain) = "Growth"; 149 | option(schema_type) = EVENT; 150 | option(event_type) = AGGREGATED; 151 | option(team_channel) = "#team-growth"; 152 | option(alert_channel) = "#alerts-growth"; 153 | 154 | User user = 1[(field_core).description = "User entity reference"]; 155 | Product product = 2 [(field_core).description = "Product entity reference"]; 156 | int64 count = 3 [(field_core).description = "Aggregated count of the user activity per product", (product_type) = "activity_count"]; 157 | int32 windowTime = 4 [(field_core).description = "Max window time for the aggregation"]; 158 | TimeUnit window_time_unit = 5 [(field_core).description = "TimeUnit of window for the aggregation"]; 159 | google.protobuf.Timestamp timestamp = 6 [(field_core).description = "Timestamp of the activity"]; 160 | 161 | } -------------------------------------------------------------------------------- /src/opencontract/v1/org/schemata/protobuf/constraints.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package org.schemata.schema; 4 | 5 | import "google/protobuf/descriptor.proto"; 6 | import "google/protobuf/struct.proto"; 7 | 8 | option java_package = "org.schemata.schema"; 9 | option java_outer_classname = "SchemataConstraintsBuilder"; 10 | 11 | 12 | 13 | // The Constraints message is used to define the constraints that are 14 | // applied to a message. 15 | message Constraint { 16 | // The list of constraints that are applied to the message. 17 | string name = 1; 18 | string description = 2; 19 | map config = 3; 20 | } 21 | 22 | // You can define N number of constraints for a message or a field. 23 | message Constraints { 24 | // The list of constraints that are applied to the message. 25 | repeated Constraint constraint = 1; 26 | } 27 | 28 | // Add the Constraints rules to the MessageOptions. 29 | extend google.protobuf.MessageOptions { 30 | Constraints constraints = 800001; 31 | } 32 | -------------------------------------------------------------------------------- /src/opencontract/v1/org/schemata/protobuf/subscribers.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package org.schemata.schema; 4 | 5 | import "google/protobuf/descriptor.proto"; 6 | 7 | option java_package = "org.schemata.schema"; 8 | option java_outer_classname = "SchemataSubscribersBuilder"; 9 | 10 | // The Consumers subscribe a stream by providing the name of the consumer & the usage of the stream. 11 | message Subscribers { 12 | string name = 1; 13 | string usage = 2; 14 | } 15 | 16 | // The Upstream message is used to send the list of subscribers to the publisher. 17 | message Upstream { 18 | repeated Subscribers subscribers = 2; 19 | } 20 | 21 | // The Downstream message is used to send the list of subscribers to the publisher. 22 | message Downstream { 23 | repeated Subscribers subscribers = 1; 24 | } 25 | 26 | // Add the Downstream Subscribers to the MessageOptions. 27 | extend google.protobuf.MessageOptions { 28 | Downstream downstream = 700001; 29 | Upstream upstream = 700002; 30 | } 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/ResourceLoader.java: -------------------------------------------------------------------------------- 1 | package org.schemata; 2 | 3 | import java.nio.file.Path; 4 | import java.nio.file.Paths; 5 | 6 | 7 | public class ResourceLoader { 8 | 9 | public static String getDescriptorsPath() { 10 | Path resourceDirectory = Paths.get("src", "test", "resources"); 11 | String absolutePath = resourceDirectory.toFile().getAbsolutePath(); 12 | return absolutePath + "/descriptors/model.desc"; 13 | } 14 | 15 | public static String getChangedDescriptorsPath() { 16 | Path resourceDirectory = Paths.get("src", "test", "resources"); 17 | String absolutePath = resourceDirectory.toFile().getAbsolutePath(); 18 | return absolutePath + "/descriptors/changed_model.desc"; 19 | } 20 | 21 | public static String getProtoEntitiesPath() { 22 | Path resourceDirectory = Paths.get("src", "test", "resources"); 23 | String absolutePath = resourceDirectory.toFile().getAbsolutePath(); 24 | return absolutePath + "/schema/entities.proto"; 25 | } 26 | 27 | public static String getAvroSchemaPath() { 28 | Path resourceDirectory = Paths.get("src", "test", "resources"); 29 | return resourceDirectory.toFile().getAbsolutePath(); 30 | } 31 | 32 | public static String getBrandSchemaPath() { 33 | Path resourceDirectory = Paths.get("src", "test", "resources"); 34 | return resourceDirectory.toFile().getAbsolutePath() + "/avro_schema/brand.avsc"; 35 | } 36 | 37 | public static String getInValidBrandSchemaPath() { 38 | Path resourceDirectory = Paths.get("src", "test", "resources"); 39 | return resourceDirectory.toFile().getAbsolutePath() + "/avro_schema/brand_dummy.avsc"; 40 | } 41 | 42 | public static String getDbtBasePath() { 43 | Path resourceDirectory = Paths.get("src", "test", "resources"); 44 | return resourceDirectory.toFile().getAbsolutePath() + "/dbt"; 45 | } 46 | 47 | public static String getChangedDbtBasePath() { 48 | Path resourceDirectory = Paths.get("src", "test", "resources"); 49 | return resourceDirectory.toFile().getAbsolutePath() + "/dbt_change"; 50 | } 51 | 52 | public static String getInvalidDbtBasePath() { 53 | Path resourceDirectory = Paths.get("src", "main", "dbt"); 54 | return resourceDirectory.toFile().getAbsolutePath(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/SchemataExecutorTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata; 2 | 3 | import jdk.jfr.Description; 4 | import org.junit.jupiter.api.BeforeAll; 5 | import org.junit.jupiter.api.Test; 6 | import picocli.CommandLine; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | 11 | public class SchemataExecutorTest { 12 | 13 | static CommandLine cmd; 14 | 15 | @BeforeAll 16 | static void setup() { 17 | var executor = new SchemataExecutor(); 18 | cmd = new CommandLine(executor); 19 | } 20 | 21 | @Test 22 | @Description("Run schema validate function to run all the schema and column validation rules") 23 | public void testSchemaValidateCmd() { 24 | int exitCode = cmd.execute("validate", "-s=" + ResourceLoader.getDescriptorsPath(), "-p=PROTOBUF"); 25 | assertEquals(0, exitCode); 26 | } 27 | 28 | @Test 29 | @Description("Test Schema score with an invalid schema name") 30 | public void testScoreWithInvalidSchema() { 31 | int exitCode = cmd.execute("score", "-s=" + ResourceLoader.getDescriptorsPath(), "User", "-p=PROTOBUF"); 32 | assertEquals(-1, exitCode); 33 | } 34 | 35 | @Test 36 | @Description("Test Schema score with an valid schema name") 37 | public void testScoreWithValidSchema() { 38 | int exitCode = cmd.execute("score", "-s=" + ResourceLoader.getDescriptorsPath(), "-p=PROTOBUF", 39 | "org.schemata.schema.CampaignCategoryTrackerEvent"); 40 | assertEquals(0, exitCode); 41 | } 42 | 43 | @Test 44 | @Description("Test Schema documentation") 45 | public void testSchemaDocumentationCmd() { 46 | int exitCode = cmd.execute("document", "-s=" + ResourceLoader.getDescriptorsPath(), "-p=PROTOBUF"); 47 | assertEquals(0, exitCode); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/graph/SchemaGraphTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.graph; 2 | 3 | import com.google.protobuf.Descriptors; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.util.Set; 7 | import java.util.stream.Collectors; 8 | 9 | import org.apache.commons.collections4.SetUtils; 10 | import org.junit.jupiter.api.Assertions; 11 | import org.junit.jupiter.api.BeforeAll; 12 | import org.junit.jupiter.api.Test; 13 | 14 | import org.schemata.ResourceLoader; 15 | import org.schemata.domain.Field; 16 | import org.schemata.domain.Schema; 17 | import org.schemata.exception.SchemaNotFoundException; 18 | import org.schemata.provider.protobuf.ProtoFileDescriptorSetLoader; 19 | import org.schemata.provider.protobuf.ProtoProcessor; 20 | 21 | import static org.junit.jupiter.api.Assertions.assertEquals; 22 | import static org.junit.jupiter.api.Assertions.assertTrue; 23 | 24 | 25 | public class SchemaGraphTest { 26 | 27 | static SchemaGraph graph; 28 | 29 | @BeforeAll 30 | static void setUp() 31 | throws IOException, Descriptors.DescriptorValidationException { 32 | var stream = new FileInputStream(ResourceLoader.getDescriptorsPath()); 33 | var protoFileDescriptorLoader = new ProtoFileDescriptorSetLoader(stream); 34 | var parser = new ProtoProcessor(); 35 | var schemaList = parser.parse(protoFileDescriptorLoader.loadDescriptors()); 36 | graph = new SchemaGraph(schemaList); 37 | } 38 | 39 | @Test 40 | public void testWithInvalidSchema() { 41 | Assertions.assertThrows(SchemaNotFoundException.class, () -> graph.getSchema("User"), 42 | "Schema not found was expected"); 43 | } 44 | 45 | @Test 46 | public void testWithValidSchema() { 47 | assertEquals("org.schemata.schema.UserEvent", graph.getSchema("org.schemata.schema.UserEvent").name()); 48 | } 49 | 50 | @Test 51 | public void testIncomingEdges() { 52 | var incomingEdges = graph.incomingEdgesOf("org.schemata.schema.User"); 53 | var expectedEdges = Set.of(newUserEdge("org.schemata.schema.UserEvent", "previous_user_state"), 54 | newUserEdge("org.schemata.schema.UserEvent", "current_user_state"), 55 | newUserEdge("org.schemata.schema.UserActivityEvent", "user"), 56 | newUserEdge("org.schemata.schema.UserActivityAggregate", "user")); 57 | assertEquals(4, incomingEdges.size()); 58 | var actualEdges = incomingEdges.stream().map(WeightedSchemaEdge::summaryPrint).collect(Collectors.toSet()); 59 | assertTrue(SetUtils.isEqualSet(expectedEdges, actualEdges)); 60 | } 61 | 62 | @Test 63 | public void testIncomingVertex() { 64 | var incomingSchemaSet = graph.incomingVertexOf("org.schemata.schema.User"); 65 | var expectedVertex = Set.of("org.schemata.schema.UserActivityAggregate", "org.schemata.schema.UserActivityEvent", 66 | "org.schemata.schema.UserEvent"); 67 | var actualVertex = incomingSchemaSet.stream().map(Schema::name).collect(Collectors.toSet()); 68 | assertEquals(3, incomingSchemaSet.size()); 69 | assertTrue(SetUtils.isEqualSet(expectedVertex, actualVertex)); 70 | } 71 | 72 | @Test 73 | public void testOutgoingEdges() { 74 | var outgoingEdges = graph.outgoingEdgesOf("org.schemata.schema.UserActivityAggregate"); 75 | var expectedEdges = Set.of(newUserActivityAggregateEdge("org.schemata.schema.User", "user"), 76 | newUserActivityAggregateEdge("org.schemata.schema.Product", "product")); 77 | assertEquals(2, outgoingEdges.size()); 78 | var actualEdges = outgoingEdges.stream().map(WeightedSchemaEdge::summaryPrint).collect(Collectors.toSet()); 79 | assertTrue(SetUtils.isEqualSet(expectedEdges, actualEdges)); 80 | } 81 | 82 | @Test 83 | public void testOutgoingVertex() { 84 | var outgoingSchemaSet = graph.outgoingVertexOf("org.schemata.schema.UserActivityAggregate"); 85 | var expectedVertex = Set.of("org.schemata.schema.Product", "org.schemata.schema.User"); 86 | var actualVertex = outgoingSchemaSet.stream().map(Schema::name).collect(Collectors.toSet()); 87 | assertEquals(2, outgoingSchemaSet.size()); 88 | assertTrue(SetUtils.isEqualSet(expectedVertex, actualVertex)); 89 | } 90 | 91 | @Test 92 | public void testOutgoingEntityVertex() { 93 | var outgoingSchemaSet = graph.outgoingEntityVertexOf("org.schemata.schema.UserActivityAggregate"); 94 | for (var schema : outgoingSchemaSet) { 95 | System.out.println(schema.name()); 96 | } 97 | } 98 | 99 | @Test 100 | public void testPageRankScore() { 101 | assertTrue(graph.getVertexPageRankScore("org.schemata.schema.Product") > graph.getVertexPageRankScore( 102 | "org.schemata.schema.User")); 103 | } 104 | 105 | @Test 106 | public void testSchemataScore() { 107 | assertTrue( 108 | graph.getSchemataScore("org.schemata.schema.Product") > graph.getSchemataScore("org.schemata.schema.User")); 109 | assertTrue(graph.getSchemataScore("org.schemata.schema.CampaignProductTrackerEvent") > graph.getSchemataScore( 110 | "org.schemata.schema.CampaignCategoryTrackerEvent")); 111 | } 112 | 113 | private String newUserEdge(String source, String edge) { 114 | return newEdge(source, "org.schemata.schema.User", edge); 115 | } 116 | 117 | private String newUserActivityAggregateEdge(String target, String edge) { 118 | return newEdge("org.schemata.schema.UserActivityAggregate", target, edge); 119 | } 120 | 121 | private String newEdge(String source, String target, String edge) { 122 | Field field = new Field.Builder(source, edge, target, false).build(); 123 | return new WeightedSchemaEdge(graph.getSchema(source), graph.getSchema(target), field).summaryPrint(); 124 | } 125 | } -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/avro/AvroSchemaParserTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.avro; 2 | 3 | import java.io.IOException; 4 | import org.junit.jupiter.api.Test; 5 | import org.schemata.ResourceLoader; 6 | import org.schemata.domain.EventType; 7 | import org.schemata.domain.SchemaType; 8 | 9 | import static org.junit.jupiter.api.Assertions.assertAll; 10 | import static org.junit.jupiter.api.Assertions.assertEquals; 11 | import static org.junit.jupiter.api.Assertions.assertThrows; 12 | 13 | 14 | 15 | public class AvroSchemaParserTest { 16 | 17 | @Test 18 | public void testListAvroSchemaFilesWithInvalidPath() { 19 | assertThrows(IOException.class, () -> { 20 | String avroSchemaPath = ResourceLoader.getAvroSchemaPath() + "dummy_path"; 21 | var parser = new AvroSchemaParser(); 22 | parser.listAvroSchemaFiles(avroSchemaPath); 23 | }); 24 | } 25 | 26 | @Test 27 | public void testListAvroSchemaFiles() 28 | throws IOException { 29 | String avroSchemaPath = ResourceLoader.getAvroSchemaPath(); 30 | var parser = new AvroSchemaParser(); 31 | var schemaFileList = parser.listAvroSchemaFiles(avroSchemaPath); 32 | assertAll("Assert list of schema files", 33 | () -> assertEquals(1, schemaFileList.size())); 34 | } 35 | 36 | @Test 37 | public void testCompileAvroSchemaWithInvalidFile() { 38 | assertThrows(IOException.class, () -> { 39 | var parser = new AvroSchemaParser(); 40 | parser.compileAvroSchema(ResourceLoader.getInValidBrandSchemaPath()); 41 | }); 42 | } 43 | 44 | @Test 45 | public void testCompileAvroSchema() 46 | throws IOException { 47 | var parser = new AvroSchemaParser(); 48 | var schema = parser.compileAvroSchema(ResourceLoader.getBrandSchemaPath()); 49 | assertEquals("org.schemata.schema.Brand", schema.getFullName()); 50 | } 51 | 52 | @Test 53 | public void testBuildSchema() 54 | throws IOException { 55 | var parser = new AvroSchemaParser(); 56 | var schema = parser.buildSchema(ResourceLoader.getBrandSchemaPath()); 57 | assertAll("Assert schema properties", 58 | () -> assertEquals("org.schemata.schema.Brand", schema.name()), 59 | () -> assertEquals("This is the description of the Brand table", schema.description()), 60 | () -> assertEquals(SchemaType.ENTITY.name(), schema.type()), 61 | () -> assertEquals(EventType.NONE.name(), schema.eventType()), 62 | () -> assertEquals(3, schema.fieldList().size()) 63 | ); 64 | System.out.println(schema); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/dbt/DbtCatalogParserTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import com.google.gson.JsonElement; 4 | import java.util.List; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | import org.schemata.ResourceLoader; 8 | import org.schemata.exception.SchemaParserException; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | import static org.junit.jupiter.api.Assertions.assertThrows; 13 | import static org.junit.jupiter.api.Assertions.assertTrue; 14 | 15 | 16 | public class DbtCatalogParserTest { 17 | 18 | JsonElement element; 19 | DbtCatalogParser parser; 20 | 21 | @BeforeEach 22 | public void init() { 23 | parser = new DbtCatalogParser(); 24 | element = parser.getCatalogJsonParser(ResourceLoader.getDbtBasePath()); 25 | } 26 | 27 | @Test 28 | public void testGetCatalogParserWithInvalidPath() { 29 | assertThrows(SchemaParserException.class, 30 | () -> new DbtCatalogParser().getCatalogJsonParser(ResourceLoader.getInvalidDbtBasePath())); 31 | } 32 | 33 | @Test 34 | public void testGetCatalogParser() { 35 | assertDoesNotThrow(() -> new DbtCatalogParser().getCatalogJsonParser(ResourceLoader.getDbtBasePath())); 36 | } 37 | 38 | @Test 39 | public void testGetNodes() { 40 | assertTrue(parser.getNodes(element).isJsonObject()); 41 | } 42 | 43 | @Test 44 | public void testExtractTable() { 45 | var nodes = parser.getNodes(element); 46 | String modelName = "model.dbtlearn.fct_reviews"; 47 | var expected = new DbtCatalogMetadata.Table("dev", "fct_reviews", 48 | "model.dbtlearn.fct_reviews", "", "transform"); 49 | assertEquals(expected, parser.extractTable(modelName, nodes.get(modelName))); 50 | } 51 | 52 | @Test 53 | public void testExtractColumn() { 54 | var nodes = parser.getNodes(element); 55 | String modelName = "model.dbtlearn.fct_reviews"; 56 | 57 | var element = nodes.get(modelName); 58 | 59 | var expected = List.of(new DbtCatalogMetadata.Column("listing_id", "number", 1, ""), 60 | new DbtCatalogMetadata.Column("review_date", "timestamp_ntz", 2, ""), 61 | new DbtCatalogMetadata.Column("reviewer_name", "text", 3, ""), 62 | new DbtCatalogMetadata.Column("review_text", "text", 4, ""), 63 | new DbtCatalogMetadata.Column("review_sentiment", "text", 5, "") 64 | ); 65 | 66 | assertEquals(expected, parser.extractColumn(element)); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/dbt/DbtManifestParserTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import java.util.List; 4 | import org.junit.jupiter.api.Test; 5 | import java.util.Map; 6 | import org.junit.jupiter.api.BeforeEach; 7 | import org.schemata.ResourceLoader; 8 | import org.schemata.domain.Depends; 9 | import org.schemata.domain.EventType; 10 | import org.schemata.domain.Link; 11 | import org.schemata.domain.ModelType; 12 | import org.schemata.domain.SchemaType; 13 | 14 | import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; 15 | import static org.junit.jupiter.api.Assertions.assertEquals; 16 | import static org.junit.jupiter.api.Assertions.assertTrue; 17 | 18 | 19 | public class DbtManifestParserTest { 20 | Map catalog; 21 | DbtManifestParser manifestParser; 22 | 23 | @BeforeEach 24 | public void init() { 25 | catalog = new DbtCatalogParser().parse(ResourceLoader.getDbtBasePath()); 26 | manifestParser = new DbtManifestParser(); 27 | } 28 | 29 | @Test 30 | public void testParse() { 31 | assertDoesNotThrow(() -> manifestParser.parse(catalog, ResourceLoader.getDbtBasePath())); 32 | } 33 | 34 | @Test 35 | public void testSchemaTable() { 36 | var schemaList = manifestParser.parse(catalog, ResourceLoader.getDbtBasePath()); 37 | assertEquals(schemaList.size(), 7); 38 | } 39 | 40 | @Test 41 | public void testReviewsSchema() { 42 | var schemaList = manifestParser.parse(catalog, ResourceLoader.getDbtBasePath()); 43 | var schema = schemaList.stream().filter(f -> f.name().equals("model.dbtlearn.src_reviews")).toList(); 44 | assertEquals(schema.size(), 1); 45 | var reviewsSchema = schemaList.get(0); 46 | assertEquals(reviewsSchema.domain(), "core"); 47 | assertEquals(reviewsSchema.modelType(), ModelType.DIMENSION.name()); 48 | assertEquals(reviewsSchema.eventType(), EventType.NONE.name()); 49 | assertEquals(reviewsSchema.type(), SchemaType.MODEL.name()); 50 | } 51 | 52 | @Test 53 | public void testReviewColumns() { 54 | var schemaList = manifestParser.parse(catalog, ResourceLoader.getDbtBasePath()); 55 | var schema = schemaList.stream().filter(f -> f.name().equals("model.dbtlearn.src_reviews")).toList().get(0); 56 | var fieldList = schema.fieldList(); 57 | assertEquals(5, fieldList.size()); 58 | var field = fieldList.stream().filter(f -> f.name().equals("listing_id")).toList().get(0); 59 | assertTrue(field.isPrimaryKey()); 60 | assertEquals(new Link("src_listings", "id"), field.link()); 61 | List dependsList = List.of(new Depends("listings", "id")); 62 | assertEquals(dependsList, field.depends()); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/dbt/DbtSchemaCompatibilityCheckerTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.schemata.ResourceLoader; 5 | 6 | import static org.junit.jupiter.api.Assertions.*; 7 | 8 | public class DbtSchemaCompatibilityCheckerTest { 9 | 10 | @Test 11 | public void testValidSchemaChangesCheck() { 12 | var checker = new DbtSchemaCompatibilityChecker(); 13 | var result = checker.check(ResourceLoader.getDbtBasePath(), ResourceLoader.getDbtBasePath()); 14 | assertTrue(result.isCompatible()); 15 | } 16 | 17 | @Test 18 | public void testInValidSchemaChangesCheck() { 19 | var checker = new DbtSchemaCompatibilityChecker(); 20 | var result = checker.check(ResourceLoader.getDbtBasePath(), ResourceLoader.getChangedDbtBasePath()); 21 | assertFalse(result.isCompatible()); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/dbt/DbtSchemaParserTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.dbt; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.schemata.ResourceLoader; 5 | 6 | 7 | public class DbtSchemaParserTest { 8 | 9 | @Test 10 | public void testMe() { 11 | DbtSchemaParser parser = new DbtSchemaParser(); 12 | parser.getSchemaList(ResourceLoader.getDbtBasePath()); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/protobuf/ProtoProcessorTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | import com.google.protobuf.Descriptors; 4 | 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.util.List; 8 | 9 | import org.junit.jupiter.api.BeforeAll; 10 | import org.junit.jupiter.api.DisplayName; 11 | import org.junit.jupiter.api.Test; 12 | import org.schemata.ResourceLoader; 13 | import org.schemata.domain.Constraints; 14 | import org.schemata.domain.Schema; 15 | 16 | import static org.junit.jupiter.api.Assertions.assertAll; 17 | import static org.junit.jupiter.api.Assertions.assertEquals; 18 | import static org.junit.jupiter.api.Assertions.assertNotNull; 19 | import static org.junit.jupiter.api.Assertions.assertTrue; 20 | 21 | 22 | public class ProtoProcessorTest { 23 | 24 | private static Schema userSchema; 25 | 26 | @BeforeAll 27 | static void setUp() 28 | throws IOException, Descriptors.DescriptorValidationException { 29 | 30 | var stream = new FileInputStream(ResourceLoader.getDescriptorsPath()); 31 | var protoFileDescriptorLoader = new ProtoFileDescriptorSetLoader(stream); 32 | var parser = new ProtoProcessor(); 33 | var schemaList = parser.parse(protoFileDescriptorLoader.loadDescriptors()); 34 | assertAll("User Schema Sanity Check", () -> assertNotNull(schemaList), () -> assertEquals(14, schemaList.size())); 35 | userSchema = schemaList.stream().filter(s -> s.name().equalsIgnoreCase("org.schemata.schema.User")).toList().get(0); 36 | assertNotNull(userSchema); 37 | } 38 | 39 | @Test 40 | @DisplayName("Test User Schema metadata") 41 | public void checkSchema() { 42 | assertAll("User Schema properties", () -> assertNotNull(userSchema), 43 | () -> assertEquals("org.schemata.schema.User", userSchema.name())); 44 | } 45 | 46 | @Test 47 | @DisplayName("Test User Fields metadata") 48 | public void checkFields() { 49 | assertAll("User Schema Fields Sanity Check", () -> assertNotNull(userSchema.fieldList()), 50 | () -> assertTrue(userSchema.fieldList().size() > 1)); 51 | var fieldList = userSchema.fieldList(); 52 | assertEquals(6, fieldList.size()); 53 | } 54 | 55 | @Test 56 | @DisplayName("Test Downstream Subscribers List metadata") 57 | public void checkDownstreamSubscribersList() { 58 | assertAll("User Schema Downstream Subscribers Sanity Check", () -> assertNotNull(userSchema.downstreamSubscribersList()), 59 | () -> assertTrue(userSchema.downstreamSubscribersList().size() > 1)); 60 | var subscribersList = userSchema.downstreamSubscribersList(); 61 | assertEquals(2, subscribersList.size()); 62 | } 63 | 64 | @Test 65 | @DisplayName("Test Upstream Subscribers List metadata") 66 | public void checkUpstreamSubscribersList() { 67 | assertAll("User Schema Upstream Subscribers Sanity Check", () -> assertNotNull(userSchema.upstreamSubscribersList()), 68 | () -> assertTrue(userSchema.upstreamSubscribersList().size() > 1)); 69 | var subscribersList = userSchema.upstreamSubscribersList(); 70 | assertEquals(2, subscribersList.size()); 71 | } 72 | 73 | @Test 74 | public void checkConstraintsList() { 75 | assertAll("User Schema Constraints Sanity Check", () -> assertNotNull(userSchema.constraintsList()), 76 | () -> assertTrue(userSchema.constraintsList().size() > 1)); 77 | var constraintsList = userSchema.constraintsList(); 78 | assertEquals(2, constraintsList.size()); 79 | } 80 | 81 | @Test 82 | public void checkPrimitiveConstraints() { 83 | var constraints = userSchema.constraintsList().stream().filter(v -> v.name().equals("age range")).toList(); 84 | assertEquals(1, constraints.size()); 85 | var constraint = constraints.get(0); 86 | assertEquals(18.0, constraint.constraintMap().get("min_value").value()); 87 | } 88 | 89 | @Test 90 | public void checkListConstraints() { 91 | var constraints = userSchema.constraintsList().stream().filter(v -> v.name().equals("Timezone Constraint")).toList(); 92 | assertEquals(1, constraints.size()); 93 | var constraint = constraints.get(0); 94 | List expected = List.of("EST", "PST"); 95 | assertEquals(expected, constraint.constraintMap().get("value_set").listValue()); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/provider/protobuf/ProtoSchemaCompatibilityCheckerTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.provider.protobuf; 2 | 3 | 4 | import org.junit.jupiter.api.Test; 5 | import org.schemata.ResourceLoader; 6 | 7 | import static org.junit.jupiter.api.Assertions.assertEquals; 8 | import static org.junit.jupiter.api.Assertions.assertFalse; 9 | 10 | public class ProtoSchemaCompatibilityCheckerTest { 11 | 12 | 13 | @Test 14 | public void testCheck() { 15 | var checker = new ProtoSchemaCompatibilityChecker(); 16 | var result = checker.check(ResourceLoader.getDescriptorsPath(), ResourceLoader.getChangedDescriptorsPath()); 17 | assertFalse(result.isCompatible()); 18 | assertEquals(2, result.summary().size()); 19 | assertEquals(1, result.summary().stream().filter(summary -> summary.fieldName().equals("name")).toList().size()); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/validate/FieldValidatorTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.schemata.domain.Field; 5 | 6 | import static org.junit.jupiter.api.Assertions.assertEquals; 7 | 8 | 9 | public class FieldValidatorTest { 10 | 11 | @Test 12 | public void testWithEmptyDescriptor() { 13 | Field.Builder builder = new Field.Builder("TestSchema", "TestField", "STRING", true); 14 | builder.isClassified(true); 15 | builder.classificationLevel("LEVEL3"); 16 | var result = new FieldValidator().apply(builder.build()); 17 | assertEquals(Status.ERROR, result.status()); 18 | } 19 | 20 | @Test 21 | public void testWithEmptyClassificationLevel() { 22 | Field.Builder builder = new Field.Builder("TestSchema", "TestField", "STRING", true); 23 | builder.isClassified(true); 24 | builder.description("Field Description"); 25 | var result = new FieldValidator().apply(builder.build()); 26 | assertEquals(Status.ERROR, result.status()); 27 | } 28 | 29 | @Test 30 | public void testSuccessStatus() { 31 | Field.Builder builder = new Field.Builder("TestSchema", "TestField", "STRING", true); 32 | builder.isClassified(true); 33 | builder.description("Field Description"); 34 | builder.classificationLevel("LEVEL3"); 35 | var result = new FieldValidator().apply(builder.build()); 36 | assertEquals(Status.SUCCESS, result.status()); 37 | } 38 | 39 | @Test 40 | public void testSuccessStatusForNonClassifiedField() { 41 | Field.Builder builder = new Field.Builder("TestSchema", "TestField", "STRING", true); 42 | builder.description("Field Description"); 43 | var result = new FieldValidator().apply(builder.build()); 44 | assertEquals(Status.SUCCESS, result.status()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/validate/RulesTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import static org.junit.jupiter.api.Assertions.assertEquals; 6 | 7 | 8 | public class RulesTest { 9 | 10 | @Test 11 | public void testDescription() { 12 | assertEquals("Schema domain metadata is null or empty", Rules.SCHEMA_DOMAIN_EMPTY.errorMessage); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/java/org/schemata/validate/SchemaValidatorTest.java: -------------------------------------------------------------------------------- 1 | package org.schemata.validate; 2 | 3 | import java.util.List; 4 | import org.junit.jupiter.api.Test; 5 | import org.schemata.domain.Field; 6 | import org.schemata.domain.Schema; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | 11 | public class SchemaValidatorTest { 12 | 13 | 14 | @Test 15 | public void testEntityWithValidPrimaryKey() { 16 | Field.Builder fieldBuilder = new Field.Builder("SchemaName", "FieldName", "STRING", true); 17 | fieldBuilder.primaryKey(true); 18 | Schema.Builder builder = new Schema.Builder("SchemaName", List.of(fieldBuilder.build())); 19 | builder.description("Schema Description"); 20 | builder.owner("Growth"); 21 | builder.domain("Core"); 22 | builder.schemaType("ENTITY"); 23 | assertEquals(Status.SUCCESS, new SchemaValidator().apply(builder.build()).status()); 24 | } 25 | 26 | @Test 27 | public void testValidEvent() { 28 | Field.Builder fieldBuilder = new Field.Builder("SchemaName", "FieldName", "STRING", true); 29 | Schema.Builder builder = new Schema.Builder("SchemaName", List.of(fieldBuilder.build())); 30 | builder.description("Schema Description"); 31 | builder.owner("Growth"); 32 | builder.domain("Core"); 33 | builder.schemaType("EVENT"); 34 | assertEquals(Status.SUCCESS, new SchemaValidator().apply(builder.build()).status()); 35 | } 36 | 37 | @Test 38 | public void testWithEmptyDescriptor() { 39 | Field.Builder fieldBuilder = new Field.Builder("SchemaName", "FieldName", "STRING", true); 40 | Schema.Builder builder = new Schema.Builder("SchemaName", List.of(fieldBuilder.build())); 41 | builder.owner("Growth"); 42 | builder.domain("Core"); 43 | builder.schemaType("EVENT"); 44 | assertEquals(Status.ERROR, new SchemaValidator().apply(builder.build()).status()); 45 | } 46 | 47 | @Test 48 | public void testWithEmptyOwner() { 49 | Field.Builder fieldBuilder = new Field.Builder("SchemaName", "FieldName", "STRING", true); 50 | Schema.Builder builder = new Schema.Builder("SchemaName", List.of(fieldBuilder.build())); 51 | builder.description("Schema Description"); 52 | builder.domain("Core"); 53 | builder.schemaType("EVENT"); 54 | assertEquals(Status.ERROR, new SchemaValidator().apply(builder.build()).status()); 55 | } 56 | 57 | @Test 58 | public void testWithEmptyDomain() { 59 | Field.Builder fieldBuilder = new Field.Builder("SchemaName", "FieldName", "STRING", true); 60 | Schema.Builder builder = new Schema.Builder("SchemaName", List.of(fieldBuilder.build())); 61 | builder.description("Schema Description"); 62 | builder.owner("Growth"); 63 | builder.schemaType("EVENT"); 64 | assertEquals(Status.ERROR, new SchemaValidator().apply(builder.build()).status()); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/resources/avro_schema/brand.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "org.schemata.schema", 3 | "type": "record", 4 | "name": "Brand", 5 | "desc": "This is the description of the Brand table", 6 | "comment": "The comment added after thought", 7 | "see_also": "db.brand MySQL table", 8 | "owner": "Platform", 9 | "domain": "Core", 10 | "schema_type": "ENTITY", 11 | "team_channel": "#team-platform", 12 | "alert_channel": "#alerts-platform", 13 | "fields": [ 14 | { 15 | "name": "id", 16 | "type": "int", 17 | "is_primary_key": "true", 18 | "desc": "Unique identifier for Brand" 19 | }, 20 | { 21 | "name": "name", 22 | "type": [ 23 | "string", 24 | "null" 25 | ], 26 | "desc": "Name of the Brand" 27 | }, 28 | { 29 | "name": "is_active", 30 | "type": "boolean", 31 | "desc": "define the active status of the Brand. `true` == active; `false` = inactive`", 32 | "comment": "should refactor to non-binary status" 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /src/test/resources/dbt/catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", 4 | "dbt_version": "1.0.5", 5 | "generated_at": "2022-04-29T04:29:35.188250Z", 6 | "invocation_id": "6f5bd62d-50bb-4d99-b92b-933c06fe7759", 7 | "env": {} 8 | }, 9 | "nodes": { 10 | "model.dbtlearn.dim_listings_cleansed": { 11 | "metadata": { 12 | "type": "VIEW", 13 | "schema": "DEV", 14 | "name": "DIM_LISTINGS_CLEANSED", 15 | "database": "AIRBNB", 16 | "comment": null, 17 | "owner": "TRANSFORM" 18 | }, 19 | "columns": { 20 | "LISTING_ID": { 21 | "type": "NUMBER", 22 | "index": 1, 23 | "name": "LISTING_ID", 24 | "comment": null 25 | }, 26 | "LISTING_NAME": { 27 | "type": "TEXT", 28 | "index": 2, 29 | "name": "LISTING_NAME", 30 | "comment": null 31 | }, 32 | "ROOM_TYPE": { 33 | "type": "TEXT", 34 | "index": 3, 35 | "name": "ROOM_TYPE", 36 | "comment": null 37 | }, 38 | "MINIMUM_NIGHTS": { 39 | "type": "NUMBER", 40 | "index": 4, 41 | "name": "MINIMUM_NIGHTS", 42 | "comment": null 43 | }, 44 | "HOST_ID": { 45 | "type": "NUMBER", 46 | "index": 5, 47 | "name": "HOST_ID", 48 | "comment": null 49 | }, 50 | "PRICE": { 51 | "type": "NUMBER", 52 | "index": 6, 53 | "name": "PRICE", 54 | "comment": null 55 | }, 56 | "CREATED_AT": { 57 | "type": "TIMESTAMP_NTZ", 58 | "index": 7, 59 | "name": "CREATED_AT", 60 | "comment": null 61 | }, 62 | "UPDATED_AT": { 63 | "type": "TIMESTAMP_NTZ", 64 | "index": 8, 65 | "name": "UPDATED_AT", 66 | "comment": null 67 | } 68 | }, 69 | "stats": { 70 | "has_stats": { 71 | "id": "has_stats", 72 | "label": "Has Stats?", 73 | "value": false, 74 | "include": false, 75 | "description": "Indicates whether there are statistics for this table" 76 | } 77 | }, 78 | "unique_id": "model.dbtlearn.dim_listings_cleansed" 79 | }, 80 | "model.dbtlearn.src_hosts": { 81 | "metadata": { 82 | "type": "VIEW", 83 | "schema": "DEV", 84 | "name": "SRC_HOSTS", 85 | "database": "AIRBNB", 86 | "comment": null, 87 | "owner": "TRANSFORM" 88 | }, 89 | "columns": { 90 | "HOST_ID": { 91 | "type": "NUMBER", 92 | "index": 1, 93 | "name": "HOST_ID", 94 | "comment": null 95 | }, 96 | "HOST_NAME": { 97 | "type": "TEXT", 98 | "index": 2, 99 | "name": "HOST_NAME", 100 | "comment": null 101 | }, 102 | "IS_SUPERHOST": { 103 | "type": "TEXT", 104 | "index": 3, 105 | "name": "IS_SUPERHOST", 106 | "comment": null 107 | }, 108 | "CREATED_AT": { 109 | "type": "TIMESTAMP_NTZ", 110 | "index": 4, 111 | "name": "CREATED_AT", 112 | "comment": null 113 | }, 114 | "UPDATED_AT": { 115 | "type": "TIMESTAMP_NTZ", 116 | "index": 5, 117 | "name": "UPDATED_AT", 118 | "comment": null 119 | } 120 | }, 121 | "stats": { 122 | "has_stats": { 123 | "id": "has_stats", 124 | "label": "Has Stats?", 125 | "value": false, 126 | "include": false, 127 | "description": "Indicates whether there are statistics for this table" 128 | } 129 | }, 130 | "unique_id": "model.dbtlearn.src_hosts" 131 | }, 132 | "model.dbtlearn.src_reviews": { 133 | "metadata": { 134 | "type": "VIEW", 135 | "schema": "DEV", 136 | "name": "SRC_REVIEWS", 137 | "database": "AIRBNB", 138 | "comment": null, 139 | "owner": "TRANSFORM" 140 | }, 141 | "columns": { 142 | "LISTING_ID": { 143 | "type": "NUMBER", 144 | "index": 1, 145 | "name": "LISTING_ID", 146 | "comment": null 147 | }, 148 | "REVIEW_DATE": { 149 | "type": "TIMESTAMP_NTZ", 150 | "index": 2, 151 | "name": "REVIEW_DATE", 152 | "comment": null 153 | }, 154 | "REVIEWER_NAME": { 155 | "type": "TEXT", 156 | "index": 3, 157 | "name": "REVIEWER_NAME", 158 | "comment": null 159 | }, 160 | "REVIEW_TEXT": { 161 | "type": "TEXT", 162 | "index": 4, 163 | "name": "REVIEW_TEXT", 164 | "comment": null 165 | }, 166 | "REVIEW_SENTIMENT": { 167 | "type": "TEXT", 168 | "index": 5, 169 | "name": "REVIEW_SENTIMENT", 170 | "comment": null 171 | } 172 | }, 173 | "stats": { 174 | "has_stats": { 175 | "id": "has_stats", 176 | "label": "Has Stats?", 177 | "value": false, 178 | "include": false, 179 | "description": "Indicates whether there are statistics for this table" 180 | } 181 | }, 182 | "unique_id": "model.dbtlearn.src_reviews" 183 | }, 184 | "model.dbtlearn.dim_hosts_cleansed": { 185 | "metadata": { 186 | "type": "VIEW", 187 | "schema": "DEV", 188 | "name": "DIM_HOSTS_CLEANSED", 189 | "database": "AIRBNB", 190 | "comment": null, 191 | "owner": "TRANSFORM" 192 | }, 193 | "columns": { 194 | "HOST_ID": { 195 | "type": "NUMBER", 196 | "index": 1, 197 | "name": "HOST_ID", 198 | "comment": null 199 | }, 200 | "HOST_NAME": { 201 | "type": "TEXT", 202 | "index": 2, 203 | "name": "HOST_NAME", 204 | "comment": null 205 | }, 206 | "IS_SUPERHOST": { 207 | "type": "TEXT", 208 | "index": 3, 209 | "name": "IS_SUPERHOST", 210 | "comment": null 211 | }, 212 | "CREATED_AT": { 213 | "type": "TIMESTAMP_NTZ", 214 | "index": 4, 215 | "name": "CREATED_AT", 216 | "comment": null 217 | }, 218 | "UPDATED_AT": { 219 | "type": "TIMESTAMP_NTZ", 220 | "index": 5, 221 | "name": "UPDATED_AT", 222 | "comment": null 223 | } 224 | }, 225 | "stats": { 226 | "has_stats": { 227 | "id": "has_stats", 228 | "label": "Has Stats?", 229 | "value": false, 230 | "include": false, 231 | "description": "Indicates whether there are statistics for this table" 232 | } 233 | }, 234 | "unique_id": "model.dbtlearn.dim_hosts_cleansed" 235 | }, 236 | "model.dbtlearn.dim_listings_w_hosts": { 237 | "metadata": { 238 | "type": "VIEW", 239 | "schema": "DEV", 240 | "name": "DIM_LISTINGS_W_HOSTS", 241 | "database": "AIRBNB", 242 | "comment": null, 243 | "owner": "TRANSFORM" 244 | }, 245 | "columns": { 246 | "LISTING_ID": { 247 | "type": "NUMBER", 248 | "index": 1, 249 | "name": "LISTING_ID", 250 | "comment": null 251 | }, 252 | "LISTING_NAME": { 253 | "type": "TEXT", 254 | "index": 2, 255 | "name": "LISTING_NAME", 256 | "comment": null 257 | }, 258 | "ROOM_TYPE": { 259 | "type": "TEXT", 260 | "index": 3, 261 | "name": "ROOM_TYPE", 262 | "comment": null 263 | }, 264 | "MINIMUM_NIGHTS": { 265 | "type": "NUMBER", 266 | "index": 4, 267 | "name": "MINIMUM_NIGHTS", 268 | "comment": null 269 | }, 270 | "PRICE": { 271 | "type": "NUMBER", 272 | "index": 5, 273 | "name": "PRICE", 274 | "comment": null 275 | }, 276 | "HOST_ID": { 277 | "type": "NUMBER", 278 | "index": 6, 279 | "name": "HOST_ID", 280 | "comment": null 281 | }, 282 | "HOST_NAME": { 283 | "type": "TEXT", 284 | "index": 7, 285 | "name": "HOST_NAME", 286 | "comment": null 287 | }, 288 | "HOST_IS_SUPERHOST": { 289 | "type": "TEXT", 290 | "index": 8, 291 | "name": "HOST_IS_SUPERHOST", 292 | "comment": null 293 | }, 294 | "CREATED_AT": { 295 | "type": "TIMESTAMP_NTZ", 296 | "index": 9, 297 | "name": "CREATED_AT", 298 | "comment": null 299 | }, 300 | "UPDATED_AT": { 301 | "type": "TIMESTAMP_NTZ", 302 | "index": 10, 303 | "name": "UPDATED_AT", 304 | "comment": null 305 | } 306 | }, 307 | "stats": { 308 | "has_stats": { 309 | "id": "has_stats", 310 | "label": "Has Stats?", 311 | "value": false, 312 | "include": false, 313 | "description": "Indicates whether there are statistics for this table" 314 | } 315 | }, 316 | "unique_id": "model.dbtlearn.dim_listings_w_hosts" 317 | }, 318 | "model.dbtlearn.fct_reviews": { 319 | "metadata": { 320 | "type": "BASE TABLE", 321 | "schema": "DEV", 322 | "name": "FCT_REVIEWS", 323 | "database": "AIRBNB", 324 | "comment": null, 325 | "owner": "TRANSFORM" 326 | }, 327 | "columns": { 328 | "LISTING_ID": { 329 | "type": "NUMBER", 330 | "index": 1, 331 | "name": "LISTING_ID", 332 | "comment": null 333 | }, 334 | "REVIEW_DATE": { 335 | "type": "TIMESTAMP_NTZ", 336 | "index": 2, 337 | "name": "REVIEW_DATE", 338 | "comment": null 339 | }, 340 | "REVIEWER_NAME": { 341 | "type": "TEXT", 342 | "index": 3, 343 | "name": "REVIEWER_NAME", 344 | "comment": null 345 | }, 346 | "REVIEW_TEXT": { 347 | "type": "TEXT", 348 | "index": 4, 349 | "name": "REVIEW_TEXT", 350 | "comment": null 351 | }, 352 | "REVIEW_SENTIMENT": { 353 | "type": "TEXT", 354 | "index": 5, 355 | "name": "REVIEW_SENTIMENT", 356 | "comment": null 357 | } 358 | }, 359 | "stats": { 360 | "bytes": { 361 | "id": "bytes", 362 | "label": "Approximate Size", 363 | "value": 42548736.0, 364 | "include": true, 365 | "description": "Approximate size of the table as reported by Snowflake" 366 | }, 367 | "row_count": { 368 | "id": "row_count", 369 | "label": "Row Count", 370 | "value": 409697.0, 371 | "include": true, 372 | "description": "An approximate count of rows in this table" 373 | }, 374 | "last_modified": { 375 | "id": "last_modified", 376 | "label": "Last Modified", 377 | "value": "2022-04-05 20:47UTC", 378 | "include": true, 379 | "description": "The timestamp for last update/change" 380 | }, 381 | "has_stats": { 382 | "id": "has_stats", 383 | "label": "Has Stats?", 384 | "value": true, 385 | "include": false, 386 | "description": "Indicates whether there are statistics for this table" 387 | } 388 | }, 389 | "unique_id": "model.dbtlearn.fct_reviews" 390 | }, 391 | "model.dbtlearn.src_listings": { 392 | "metadata": { 393 | "type": "VIEW", 394 | "schema": "DEV", 395 | "name": "SRC_LISTINGS", 396 | "database": "AIRBNB", 397 | "comment": null, 398 | "owner": "TRANSFORM" 399 | }, 400 | "columns": { 401 | "LISTING_ID": { 402 | "type": "NUMBER", 403 | "index": 1, 404 | "name": "LISTING_ID", 405 | "comment": null 406 | }, 407 | "LISTING_NAME": { 408 | "type": "TEXT", 409 | "index": 2, 410 | "name": "LISTING_NAME", 411 | "comment": null 412 | }, 413 | "LISTING_URL": { 414 | "type": "TEXT", 415 | "index": 3, 416 | "name": "LISTING_URL", 417 | "comment": null 418 | }, 419 | "ROOM_TYPE": { 420 | "type": "TEXT", 421 | "index": 4, 422 | "name": "ROOM_TYPE", 423 | "comment": null 424 | }, 425 | "MINIMUM_NIGHTS": { 426 | "type": "NUMBER", 427 | "index": 5, 428 | "name": "MINIMUM_NIGHTS", 429 | "comment": null 430 | }, 431 | "HOST_ID": { 432 | "type": "NUMBER", 433 | "index": 6, 434 | "name": "HOST_ID", 435 | "comment": null 436 | }, 437 | "PRICE_STR": { 438 | "type": "TEXT", 439 | "index": 7, 440 | "name": "PRICE_STR", 441 | "comment": null 442 | }, 443 | "CREATED_AT": { 444 | "type": "TIMESTAMP_NTZ", 445 | "index": 8, 446 | "name": "CREATED_AT", 447 | "comment": null 448 | }, 449 | "UPDATED_AT": { 450 | "type": "TIMESTAMP_NTZ", 451 | "index": 9, 452 | "name": "UPDATED_AT", 453 | "comment": null 454 | } 455 | }, 456 | "stats": { 457 | "has_stats": { 458 | "id": "has_stats", 459 | "label": "Has Stats?", 460 | "value": false, 461 | "include": false, 462 | "description": "Indicates whether there are statistics for this table" 463 | } 464 | }, 465 | "unique_id": "model.dbtlearn.src_listings" 466 | } 467 | }, 468 | "sources": {}, 469 | "errors": null 470 | } -------------------------------------------------------------------------------- /src/test/resources/dbt_change/catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", 4 | "dbt_version": "1.0.5", 5 | "generated_at": "2022-04-29T04:29:35.188250Z", 6 | "invocation_id": "6f5bd62d-50bb-4d99-b92b-933c06fe7759", 7 | "env": {} 8 | }, 9 | "nodes": { 10 | "model.dbtlearn.dim_listings_cleansed_changed": { 11 | "metadata": { 12 | "type": "VIEW", 13 | "schema": "DEV", 14 | "name": "DIM_LISTINGS_CLEANSED_CHANGED", 15 | "database": "AIRBNB", 16 | "comment": null, 17 | "owner": "TRANSFORM" 18 | }, 19 | "columns": { 20 | "LISTING_ID": { 21 | "type": "NUMBER", 22 | "index": 1, 23 | "name": "LISTING_ID", 24 | "comment": null 25 | }, 26 | "LISTING_NAME": { 27 | "type": "TEXT", 28 | "index": 2, 29 | "name": "LISTING_NAME", 30 | "comment": null 31 | }, 32 | "ROOM_TYPE": { 33 | "type": "TEXT", 34 | "index": 3, 35 | "name": "ROOM_TYPE", 36 | "comment": null 37 | }, 38 | "MINIMUM_NIGHTS": { 39 | "type": "NUMBER", 40 | "index": 4, 41 | "name": "MINIMUM_NIGHTS", 42 | "comment": null 43 | }, 44 | "HOST_ID": { 45 | "type": "NUMBER", 46 | "index": 5, 47 | "name": "HOST_ID", 48 | "comment": null 49 | }, 50 | "PRICE": { 51 | "type": "NUMBER", 52 | "index": 6, 53 | "name": "PRICE", 54 | "comment": null 55 | }, 56 | "CREATED_AT": { 57 | "type": "TIMESTAMP_NTZ", 58 | "index": 7, 59 | "name": "CREATED_AT", 60 | "comment": null 61 | }, 62 | "UPDATED_AT": { 63 | "type": "TIMESTAMP_NTZ", 64 | "index": 8, 65 | "name": "UPDATED_AT", 66 | "comment": null 67 | } 68 | }, 69 | "stats": { 70 | "has_stats": { 71 | "id": "has_stats", 72 | "label": "Has Stats?", 73 | "value": false, 74 | "include": false, 75 | "description": "Indicates whether there are statistics for this table" 76 | } 77 | }, 78 | "unique_id": "model.dbtlearn.dim_listings_cleansed_changed" 79 | }, 80 | "model.dbtlearn.src_hosts": { 81 | "metadata": { 82 | "type": "VIEW", 83 | "schema": "DEV", 84 | "name": "SRC_HOSTS", 85 | "database": "AIRBNB", 86 | "comment": null, 87 | "owner": "TRANSFORM" 88 | }, 89 | "columns": { 90 | "HOST_ID": { 91 | "type": "NUMBER", 92 | "index": 1, 93 | "name": "HOST_ID", 94 | "comment": null 95 | }, 96 | "HOST_NAME": { 97 | "type": "TEXT", 98 | "index": 2, 99 | "name": "HOST_NAME", 100 | "comment": null 101 | }, 102 | "IS_SUPERHOST": { 103 | "type": "TEXT", 104 | "index": 3, 105 | "name": "IS_SUPERHOST", 106 | "comment": null 107 | }, 108 | "CREATED_AT": { 109 | "type": "TIMESTAMP_NTZ", 110 | "index": 4, 111 | "name": "CREATED_AT", 112 | "comment": null 113 | }, 114 | "UPDATED_AT": { 115 | "type": "TIMESTAMP_NTZ", 116 | "index": 5, 117 | "name": "UPDATED_AT", 118 | "comment": null 119 | } 120 | }, 121 | "stats": { 122 | "has_stats": { 123 | "id": "has_stats", 124 | "label": "Has Stats?", 125 | "value": false, 126 | "include": false, 127 | "description": "Indicates whether there are statistics for this table" 128 | } 129 | }, 130 | "unique_id": "model.dbtlearn.src_hosts" 131 | }, 132 | "model.dbtlearn.src_reviews": { 133 | "metadata": { 134 | "type": "VIEW", 135 | "schema": "DEV", 136 | "name": "SRC_REVIEWS", 137 | "database": "AIRBNB", 138 | "comment": null, 139 | "owner": "TRANSFORM" 140 | }, 141 | "columns": { 142 | "LISTING_ID": { 143 | "type": "NUMBER", 144 | "index": 1, 145 | "name": "LISTING_ID", 146 | "comment": null 147 | }, 148 | "REVIEW_DATE": { 149 | "type": "TIMESTAMP_NTZ", 150 | "index": 2, 151 | "name": "REVIEW_DATE", 152 | "comment": null 153 | }, 154 | "REVIEWER_NAME": { 155 | "type": "TEXT", 156 | "index": 3, 157 | "name": "REVIEWER_NAME", 158 | "comment": null 159 | }, 160 | "REVIEW_TEXT": { 161 | "type": "TEXT", 162 | "index": 4, 163 | "name": "REVIEW_TEXT", 164 | "comment": null 165 | }, 166 | "REVIEW_SENTIMENT": { 167 | "type": "TEXT", 168 | "index": 5, 169 | "name": "REVIEW_SENTIMENT", 170 | "comment": null 171 | } 172 | }, 173 | "stats": { 174 | "has_stats": { 175 | "id": "has_stats", 176 | "label": "Has Stats?", 177 | "value": false, 178 | "include": false, 179 | "description": "Indicates whether there are statistics for this table" 180 | } 181 | }, 182 | "unique_id": "model.dbtlearn.src_reviews" 183 | }, 184 | "model.dbtlearn.dim_hosts_cleansed": { 185 | "metadata": { 186 | "type": "VIEW", 187 | "schema": "DEV", 188 | "name": "DIM_HOSTS_CLEANSED", 189 | "database": "AIRBNB", 190 | "comment": null, 191 | "owner": "TRANSFORM" 192 | }, 193 | "columns": { 194 | "HOST_ID": { 195 | "type": "NUMBER", 196 | "index": 1, 197 | "name": "HOST_ID", 198 | "comment": null 199 | }, 200 | "HOST_NAME": { 201 | "type": "TEXT", 202 | "index": 2, 203 | "name": "HOST_NAME", 204 | "comment": null 205 | }, 206 | "IS_SUPERHOST": { 207 | "type": "TEXT", 208 | "index": 3, 209 | "name": "IS_SUPERHOST", 210 | "comment": null 211 | }, 212 | "CREATED_AT": { 213 | "type": "TIMESTAMP_NTZ", 214 | "index": 4, 215 | "name": "CREATED_AT", 216 | "comment": null 217 | }, 218 | "UPDATED_AT": { 219 | "type": "TIMESTAMP_NTZ", 220 | "index": 5, 221 | "name": "UPDATED_AT", 222 | "comment": null 223 | } 224 | }, 225 | "stats": { 226 | "has_stats": { 227 | "id": "has_stats", 228 | "label": "Has Stats?", 229 | "value": false, 230 | "include": false, 231 | "description": "Indicates whether there are statistics for this table" 232 | } 233 | }, 234 | "unique_id": "model.dbtlearn.dim_hosts_cleansed" 235 | }, 236 | "model.dbtlearn.dim_listings_w_hosts": { 237 | "metadata": { 238 | "type": "VIEW", 239 | "schema": "DEV", 240 | "name": "DIM_LISTINGS_W_HOSTS", 241 | "database": "AIRBNB", 242 | "comment": null, 243 | "owner": "TRANSFORM" 244 | }, 245 | "columns": { 246 | "LISTING_ID": { 247 | "type": "NUMBER", 248 | "index": 1, 249 | "name": "LISTING_ID", 250 | "comment": null 251 | }, 252 | "LISTING_NAME": { 253 | "type": "TEXT", 254 | "index": 2, 255 | "name": "LISTING_NAME", 256 | "comment": null 257 | }, 258 | "ROOM_TYPE": { 259 | "type": "TEXT", 260 | "index": 3, 261 | "name": "ROOM_TYPE", 262 | "comment": null 263 | }, 264 | "MINIMUM_NIGHTS": { 265 | "type": "NUMBER", 266 | "index": 4, 267 | "name": "MINIMUM_NIGHTS", 268 | "comment": null 269 | }, 270 | "PRICE": { 271 | "type": "NUMBER", 272 | "index": 5, 273 | "name": "PRICE", 274 | "comment": null 275 | }, 276 | "HOST_ID": { 277 | "type": "NUMBER", 278 | "index": 6, 279 | "name": "HOST_ID", 280 | "comment": null 281 | }, 282 | "HOST_NAME": { 283 | "type": "TEXT", 284 | "index": 7, 285 | "name": "HOST_NAME", 286 | "comment": null 287 | }, 288 | "HOST_IS_SUPERHOST": { 289 | "type": "TEXT", 290 | "index": 8, 291 | "name": "HOST_IS_SUPERHOST", 292 | "comment": null 293 | }, 294 | "CREATED_AT": { 295 | "type": "TIMESTAMP_NTZ", 296 | "index": 9, 297 | "name": "CREATED_AT", 298 | "comment": null 299 | }, 300 | "UPDATED_AT": { 301 | "type": "TIMESTAMP_NTZ", 302 | "index": 10, 303 | "name": "UPDATED_AT", 304 | "comment": null 305 | } 306 | }, 307 | "stats": { 308 | "has_stats": { 309 | "id": "has_stats", 310 | "label": "Has Stats?", 311 | "value": false, 312 | "include": false, 313 | "description": "Indicates whether there are statistics for this table" 314 | } 315 | }, 316 | "unique_id": "model.dbtlearn.dim_listings_w_hosts" 317 | }, 318 | "model.dbtlearn.fct_reviews": { 319 | "metadata": { 320 | "type": "BASE TABLE", 321 | "schema": "DEV", 322 | "name": "FCT_REVIEWS", 323 | "database": "AIRBNB", 324 | "comment": null, 325 | "owner": "TRANSFORM" 326 | }, 327 | "columns": { 328 | "LISTING_ID": { 329 | "type": "NUMBER", 330 | "index": 1, 331 | "name": "LISTING_ID", 332 | "comment": null 333 | }, 334 | "REVIEW_DATE": { 335 | "type": "TIMESTAMP_NTZ", 336 | "index": 2, 337 | "name": "REVIEW_DATE", 338 | "comment": null 339 | }, 340 | "REVIEWER_NAME": { 341 | "type": "TEXT", 342 | "index": 3, 343 | "name": "REVIEWER_NAME", 344 | "comment": null 345 | }, 346 | "REVIEW_TEXT": { 347 | "type": "TEXT", 348 | "index": 4, 349 | "name": "REVIEW_TEXT", 350 | "comment": null 351 | }, 352 | "REVIEW_SENTIMENT": { 353 | "type": "TEXT", 354 | "index": 5, 355 | "name": "REVIEW_SENTIMENT", 356 | "comment": null 357 | } 358 | }, 359 | "stats": { 360 | "bytes": { 361 | "id": "bytes", 362 | "label": "Approximate Size", 363 | "value": 42548736.0, 364 | "include": true, 365 | "description": "Approximate size of the table as reported by Snowflake" 366 | }, 367 | "row_count": { 368 | "id": "row_count", 369 | "label": "Row Count", 370 | "value": 409697.0, 371 | "include": true, 372 | "description": "An approximate count of rows in this table" 373 | }, 374 | "last_modified": { 375 | "id": "last_modified", 376 | "label": "Last Modified", 377 | "value": "2022-04-05 20:47UTC", 378 | "include": true, 379 | "description": "The timestamp for last update/change" 380 | }, 381 | "has_stats": { 382 | "id": "has_stats", 383 | "label": "Has Stats?", 384 | "value": true, 385 | "include": false, 386 | "description": "Indicates whether there are statistics for this table" 387 | } 388 | }, 389 | "unique_id": "model.dbtlearn.fct_reviews" 390 | }, 391 | "model.dbtlearn.src_listings": { 392 | "metadata": { 393 | "type": "VIEW", 394 | "schema": "DEV", 395 | "name": "SRC_LISTINGS", 396 | "database": "AIRBNB", 397 | "comment": null, 398 | "owner": "TRANSFORM" 399 | }, 400 | "columns": { 401 | "LISTING_ID": { 402 | "type": "NUMBER", 403 | "index": 1, 404 | "name": "LISTING_ID", 405 | "comment": null 406 | }, 407 | "LISTING_NAME": { 408 | "type": "TEXT", 409 | "index": 2, 410 | "name": "LISTING_NAME", 411 | "comment": null 412 | }, 413 | "LISTING_URL": { 414 | "type": "TEXT", 415 | "index": 3, 416 | "name": "LISTING_URL", 417 | "comment": null 418 | }, 419 | "ROOM_TYPE": { 420 | "type": "TEXT", 421 | "index": 4, 422 | "name": "ROOM_TYPE", 423 | "comment": null 424 | }, 425 | "MINIMUM_NIGHTS": { 426 | "type": "NUMBER", 427 | "index": 5, 428 | "name": "MINIMUM_NIGHTS", 429 | "comment": null 430 | }, 431 | "HOST_ID": { 432 | "type": "NUMBER", 433 | "index": 6, 434 | "name": "HOST_ID", 435 | "comment": null 436 | }, 437 | "PRICE_STR": { 438 | "type": "TEXT", 439 | "index": 7, 440 | "name": "PRICE_STR", 441 | "comment": null 442 | }, 443 | "CREATED_AT": { 444 | "type": "TIMESTAMP_NTZ", 445 | "index": 8, 446 | "name": "CREATED_AT", 447 | "comment": null 448 | }, 449 | "UPDATED_AT": { 450 | "type": "TIMESTAMP_NTZ", 451 | "index": 9, 452 | "name": "UPDATED_AT", 453 | "comment": null 454 | } 455 | }, 456 | "stats": { 457 | "has_stats": { 458 | "id": "has_stats", 459 | "label": "Has Stats?", 460 | "value": false, 461 | "include": false, 462 | "description": "Indicates whether there are statistics for this table" 463 | } 464 | }, 465 | "unique_id": "model.dbtlearn.src_listings" 466 | } 467 | }, 468 | "sources": {}, 469 | "errors": null 470 | } -------------------------------------------------------------------------------- /src/test/resources/descriptors/changed_model.desc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/src/test/resources/descriptors/changed_model.desc -------------------------------------------------------------------------------- /src/test/resources/descriptors/model.desc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ananthdurai/schemata/21a6b14f78c48355e49471b08c017341b51fd191/src/test/resources/descriptors/model.desc -------------------------------------------------------------------------------- /src/test/resources/schema/entities.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package org.entities; 4 | 5 | import "google/protobuf/descriptor.proto"; 6 | import "protocol.proto"; 7 | 8 | option java_package = "org.entities.proto"; 9 | option java_outer_classname = "EntityBuilder"; 10 | 11 | message Department { 12 | option(org.schemata.schema.type) = ENTITY; 13 | int32 id = 1; 14 | string name = 2; 15 | } 16 | 17 | message Person { 18 | option(org.schemata.schema.message_core).description = "This is the description of the users table"; 19 | option(org.schemata.schema.message_core).comment = "The comment added after thought"; 20 | option(org.schemata.schema.owner) = "Growth"; 21 | option(org.schemata.schema.domain) = "Core"; 22 | option(org.schemata.schema.type) = ENTITY; 23 | 24 | string name = 1 25 | [(org.schemata.schema.field_core).description = "person name"]; 26 | 27 | int32 id = 2 28 | [(org.schemata.schema.field_core).description = "unique identifier for a given person", (org.schemata.schema.is_primary_key) = true]; 29 | 30 | string email = 3 31 | [(org.schemata.schema.field_core).description = "official email address", (org.schemata.schema.is_classified) = true, (org.schemata.schema.classification_level) = "HIGH", (org.schemata.schema.product_type) = "email"]; 32 | 33 | Department dept = 4 34 | [(org.schemata.schema.field_core).description = "department name of the person"] ; 35 | } 36 | -------------------------------------------------------------------------------- /validate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | java -jar target/schemata-1.0.jar validate -s=src/test/resources/descriptors/entities.desc -p=PROTOBUF 3 | --------------------------------------------------------------------------------