├── .gitignore ├── exercises ├── exercise6.odcs.xlsx ├── exercise7-data-product.md ├── exercise5-data-contract-customization.md ├── exercise4-data-contract-evolution.md ├── exercise6-dataconsumer.md ├── exercise3-consumer-driven-data-contracts.md ├── exercise2-iterate-your-data-contract.md ├── exercise1-put-your-data-under-contract.md └── exercise6-dataprovider.md ├── odcs ├── excel-template.odcs.xlsx ├── examples │ ├── datacontract-example.odcs.xlsx │ └── datacontract-example.odcs.yaml ├── ~$excel-template.odcs.xlsx ├── template.odcs.yaml └── jsonschema │ └── odcs-json-schema-v3.0.2.json ├── scripts ├── dockerinstall.sh ├── install.sh └── check ├── links.md ├── odps ├── template.odps.yaml ├── examples │ └── dataproduct-example.odps.yaml └── jsonschema │ └── odps-json-schema-v1.0.0.json └── order-service └── order-created-events-v1.odcs.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | task1.sh 3 | catalog 4 | solutions 5 | .idea 6 | .claude 7 | -------------------------------------------------------------------------------- /exercises/exercise6.odcs.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/odcs-workshop-data2day/main/exercises/exercise6.odcs.xlsx -------------------------------------------------------------------------------- /odcs/excel-template.odcs.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/odcs-workshop-data2day/main/odcs/excel-template.odcs.xlsx -------------------------------------------------------------------------------- /scripts/dockerinstall.sh: -------------------------------------------------------------------------------- 1 | docker pull datacontract/cli:latest 2 | docker run --rm -v ${PWD}:/home/datacontract datacontract/cli --version 3 | -------------------------------------------------------------------------------- /odcs/examples/datacontract-example.odcs.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/odcs-workshop-data2day/main/odcs/examples/datacontract-example.odcs.xlsx -------------------------------------------------------------------------------- /odcs/~$excel-template.odcs.xlsx: -------------------------------------------------------------------------------- 1 | Microsoft Office User Microsoft Office User -------------------------------------------------------------------------------- /scripts/install.sh: -------------------------------------------------------------------------------- 1 | uv tool install --python python3.11 'datacontract-cli[all]' 2 | uv tool update --python python3.11 'datacontract-cli[all]' 3 | uv tool update-shell 4 | which datacontract 5 | datacontract --version 6 | -------------------------------------------------------------------------------- /links.md: -------------------------------------------------------------------------------- 1 | ### Links 2 | - [ODCS Docs](https://bitol-io.github.io/open-data-contract-standard/) 3 | - [ODCS Source (GitHub)](https://github.com/bitol-io/open-data-contract-standard) 4 | - [Data Contract CLI (GitHub)](https://github.com/datacontract/datacontract-cli) 5 | -------------------------------------------------------------------------------- /exercises/exercise7-data-product.md: -------------------------------------------------------------------------------- 1 | # Define The Data Product 2 | 3 | - Copy “odps/template.odps.yaml” to “orders.odps.yaml” 4 | - Open “orders.odps.yaml” and fill in the following details 5 | - Define id, name, and status 6 | - Add an input port, linking to the data contract “odcs/order-created-events-v1.odcs.yaml” from the order application 7 | - Add two output ports, linking to the existing data contracts in v1 and v2: "orders-v1.odcs.yaml" and "orders-v2.odcs.yaml". As the version use only the major version. 8 | -------------------------------------------------------------------------------- /exercises/exercise5-data-contract-customization.md: -------------------------------------------------------------------------------- 1 | # Data Contract Customization 2 | 3 | - Create your custom JSON schema from `odcs/jsonschema/odcs-json-schema-v3.0.2.json` and name it `odcs-json-schema-v3.0.2.data2day.json` 4 | - Add a regex pattern for the ID field so it always ends with “-v” followed by a number (use pattern with “.*-v\d+”) 5 | - Allow only “draft”, “active”, “deprecated”, “retired” as status (use enum) 6 | - Validate your data contracts with the new schema 7 | ``` 8 | datacontract lint --schema odcs-json-schema-v3.0.2.data2day.json 9 | ``` 10 | - Fix the data contracts 11 | -------------------------------------------------------------------------------- /exercises/exercise4-data-contract-evolution.md: -------------------------------------------------------------------------------- 1 | # Data Contract Evolution 2 | 3 | - Introduce a new major version where the column “quantity” is introduced in the “line_items” table. It defaults to 1, but is still a breaking change for the data consumers. 4 | - New file `...-v2` 5 | - New ID `...-v2` 6 | - New db-schema `...-v2` 7 | - Set status to `draft` 8 | - Add property `quantity` 9 | - Play the migration path manually. 10 | - deprecate v1 11 | - phase in v2 12 | - retire v1 13 | 14 | Bonus: specify a lifecycle-policy.md in natural language with constraints and rules when one is allowed to switch to “active” 15 | -------------------------------------------------------------------------------- /exercises/exercise6-dataconsumer.md: -------------------------------------------------------------------------------- 1 | # Task 2: Data Consumer - Instructions 2 | 3 | You want to create a dashboard to see the outliers of shipments to track how well your carrier is delivering the shipments. 4 | 5 | ## Data Requirements 6 | 7 | You need data about **delivered shipments only**. 8 | 9 | Required fields: 10 | - Tracking number 11 | - Carrier (e.g., DHL Express) 12 | - Shipment Duration (to identify outliers) 13 | 14 | ## Service Level Agreements (SLAs) 15 | 16 | Your requirements for the shipment data: 17 | 18 | - **Data Retention:** 1 year (10 years preferred) 19 | - **Data Freshness:** At least daily updates (hourly preferred) 20 | - **Business Hours & Support:** Data access required during working hours (preferred starting at 7 in the morning as report is opened typically at 8:00 by the COO) 21 | -------------------------------------------------------------------------------- /exercises/exercise3-consumer-driven-data-contracts.md: -------------------------------------------------------------------------------- 1 | # Consumer-driven Data Contracts 2 | 3 | - Copy the provider-driven data contract “orders-v1.odcs.yaml” as a starting point for the consumer-driven on to “orders-v1.consumer-controlling.odcs.yaml” 4 | - Consumer is only interested in order_id and order_total, not in line_items. Remove unused schema and unused properties 5 | - Consumer requires orders to have an order_total of more than 0. Add the additional quality check. 6 | - Make sure your consumer-driven data contract works: 7 | ``` 8 | datacontract test orders-v1.consumer-controlling.odcs.yaml 9 | ``` 10 | 11 | Bonus: define an SQL view in “consumer-view.sql” that does the projection from the provider-driven contract, so you are sure to only work on that subset using export to SQL Query. Use `datacontract export --format sql-query` as a helper. 12 | -------------------------------------------------------------------------------- /scripts/check: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check script for validating ODCS and ODPS templates 4 | # 5 | # Prerequisites: 6 | # - datacontract-cli: pip install datacontract-cli 7 | # - ajv-cli: npm install -g ajv-cli 8 | # - ajv-formats: npm install -g ajv-formats 9 | # - yq: brew install yq (macOS) or see https://github.com/mikefarah/yq 10 | # 11 | # Usage: 12 | # ./scripts/check 13 | 14 | set -e 15 | 16 | echo "Checking ODCS files..." 17 | for file in $(find . -name "*.odcs.yaml" -type f); do 18 | echo "" 19 | echo " Checking $file with datacontract lint..." 20 | datacontract lint "$file" 21 | echo " Validating $file with ajv..." 22 | ajv validate --spec=draft2019 --all-errors -c ajv-formats -s odcs/jsonschema/odcs-json-schema-v3.0.2.json -d "$file" 23 | done 24 | 25 | echo "" 26 | echo "Checking ODPS files with ajv..." 27 | for file in $(find . -name "*.odps.yaml" -type f); do 28 | echo " Validating $file..." 29 | ajv validate --spec=draft2019 --all-errors -c ajv-formats -s odps/jsonschema/odps-json-schema-v1.0.0.json -d "$file" 30 | done 31 | 32 | echo "" 33 | echo "All checks passed!" 34 | -------------------------------------------------------------------------------- /exercises/exercise2-iterate-your-data-contract.md: -------------------------------------------------------------------------------- 1 | # Iterate your data contract 2 | 3 | - set version to "1.0.0" 4 | - set status to "active" 5 | - Add examples for customer email address based on `data/*.json` via `examples: [...]` 6 | - Make customer email address a required field `required: true` and test again 7 | - Add a SQL quality check to ensure that the customer email address is valid (`NOT LIKE '%@%'`) and test again 8 | - BONUS: add more constraints 9 | - for every order_id in line item, there must exist an order_id in the orders table 10 | - order_total must be greater than or equal to 0 11 | - think of additional own constraints or things you want to add 12 | 13 | Documentation: https://bitol-io.github.io/open-data-contract-standard/latest/#sql 14 | 15 | Reference for quality checks: 16 | 17 | ``` 18 | properties: 19 | - name: field 20 | quality: 21 | - type: text 22 | description: Ensure that ... 23 | - type: sql 24 | description: Ensure that ... 25 | query: SELECT COUNT(*) FROM ... WHERE ...; 26 | mustBe: 0 27 | # mustBeGreaterThan: 0 28 | ``` 29 | -------------------------------------------------------------------------------- /exercises/exercise1-put-your-data-under-contract.md: -------------------------------------------------------------------------------- 1 | # Put your data under contract 2 | 3 | 1. Set up environment variables for a connection to Databricks using the environment variables provided by the trainers via https://yopad.eu/p/data2day-2025-11-03. 4 | ``` 5 | export DATACONTRACT_DATABRICKS_TOKEN=dapi... 6 | export DATACONTRACT_DATABRICKS_SERVER_HOSTNAME=adb-...x.azuredatabricks.net 7 | export DATACONTRACT_DATABRICKS_HTTP_PATH=/sql/1.0/warehouses/xxx 8 | ``` 9 | 10 | 2. Use the **import** command to put the data available on Databricks under contract. 11 | - Tables in the format catalog.schema.table: 12 | - data2day.orders-v1.orders 13 | - data2day.orders-v1.line_items 14 | - Command: 15 | ``` 16 | datacontract import \ 17 | --format unity \ 18 | --unity-table-full-name data2day.orders-v1.orders \ 19 | --unity-table-full-name data2day.orders-v1.line_items \ 20 | --spec odcs \ 21 | --output orders-v1.odcs.yaml 22 | ``` 23 | 24 | 3. Use the **test** command to check whether the data on Databricks confirm to the Data Contract 25 | 26 | ``` 27 | datacontract test orders-v1.odcs.yaml 28 | ``` 29 | 30 | 4. Change physicalType in customerEmailAddress to integer, or remove a column, and run the tests again to see that they can fail as well 31 | 32 | ``` 33 | datacontract test orders-v1.odcs.yaml 34 | ``` 35 | 36 | 5. Use the **export** command to create an HTML documentation of the data contract. 37 | 38 | ``` 39 | datacontract export --format html orders-v1.odcs.yaml > orders-v1.odcs.html 40 | ``` 41 | 42 | 6. Export to SQL DDL [exports](https://cli.datacontract.com/#export) 43 | 44 | ``` 45 | datacontract export --format sql orders-v1.odcs.yaml 46 | ``` 47 | 48 | 7. Use the **catalog** command to create a data contract catalog. 49 | - Command: `datacontract catalog` 50 | -------------------------------------------------------------------------------- /odps/template.odps.yaml: -------------------------------------------------------------------------------- 1 | # REQUIRED Version of the standard used to build data product. 2 | apiVersion: v1.0.0 3 | 4 | # REQUIRED The kind of file this is. Valid value is `DataProduct`. 5 | kind: DataProduct 6 | 7 | # REQUIRED A unique identifier. 8 | # Example: dp-customer 9 | id: 10 | 11 | # REQUIRED Current status of the data product (e.g., draft, in development, proposed, active, deprecated, retired). 12 | # Example: active 13 | status: 14 | 15 | # Name of the data product. 16 | # Example: Customer 17 | name: 18 | 19 | # Business domain. 20 | # Example: seller 21 | domain: 22 | 23 | # Organization identifier. 24 | # Example: data2day 25 | tenant: 26 | 27 | # List of objects describing an input port. You need at least one as a data product needs to get data somewhere. 28 | inputPorts: 29 | # Name of the input port. 30 | # Example: payments 31 | - name: 32 | # Version of the input port. 33 | # Example: 1.0.0 34 | version: 35 | # Contract ID for the input port. 36 | # Example: dbb7b1eb-7628-436e-8914-2a00638ba6db 37 | contractId: 38 | 39 | # List of objects describing an output port. You need at least one, as a data product without output is useless. 40 | outputPorts: 41 | # Name of the output port. 42 | # Example: rawtransactions 43 | - name: 44 | # Human-readable short description of the output port. 45 | # Example: Raw Transactions 46 | description: 47 | # There can be different types of output ports, each automated and handled differently. 48 | # Example: snowflake 49 | type: 50 | # For each version, a different instance of the output port is listed. The combination of the name and version is the key. 51 | # Example: 1 52 | version: 53 | # Contract ID for the output port. 54 | # Example: c2798941-1b7e-4b03-9e0d-955b1a872b33 55 | contractId: 56 | -------------------------------------------------------------------------------- /exercises/exercise6-dataprovider.md: -------------------------------------------------------------------------------- 1 | # Task 2: Data Provider - Instructions 2 | 3 | You are the product owner of the team shipment (SHIP). Your team owns the shipments bounded context and the operational shipment system. 4 | 5 | ## Shipment Data Model 6 | 7 | This data model describes the structure of shipment data available in the operational shipment system. 8 | 9 | | Field Name | Type | Description | 10 | |---------------------|----------------|---------------------------------------------------------------| 11 | | shipment_id | string | Unique identifier for the shipment | 12 | | tracking_number | string | Carrier-provided tracking number | 13 | | status | string | Current status (e.g., 'in_transit', 'delivered', 'exception') | 14 | | origin | string | Origin location | 15 | | destination | string | Destination location | 16 | | carrier | string | Name of the carrier handling the shipment (e.g., DHL, Hermes) | 17 | | carrier_service | string | Service level (e.g., 'express', 'standard') | 18 | | created_at | datetime | Timestamp when the shipment was created | 19 | | shipped_at | datetime | Timestamp when the shipment was shipped | 20 | | expected_delivery_at | datetime | Expected delivery timestamp | 21 | | delivered_at | datetime/null | Actual delivery timestamp (null if not delivered) | 22 | | last_update_at | datetime | Timestamp of the last status/location update | 23 | | current_location | string | Most recent known location | 24 | | location_history | array | List of past locations and timestamps | 25 | 26 | ## Example Shipments (JSON) 27 | 28 | ```json 29 | { 30 | "shipment_id": "SHIP654321", 31 | "tracking_number": "1Z999BB20234567890", 32 | "status": "delivered", 33 | "origin": "Hamburg Port", 34 | "destination": "Frankfurt Office", 35 | "carrier": "FedEx", 36 | "carrier_service": "standard", 37 | "created_at": "2025-05-28T09:00:00Z", 38 | "shipped_at": "2025-05-28T12:00:00Z", 39 | "expected_delivery_at": "2025-05-30T17:00:00Z", 40 | "delivered_at": "2025-05-30T16:45:00Z", 41 | "last_update_at": "2025-05-30T16:45:00Z", 42 | "current_location": "Frankfurt Office", 43 | "location_history": [ 44 | { "location": "Hamburg Port", "timestamp": "2025-05-28T12:00:00Z" }, 45 | { "location": "Kassel Facility", "timestamp": "2025-05-29T08:30:00Z" }, 46 | { "location": "Frankfurt Office", "timestamp": "2025-05-30T16:45:00Z" } 47 | ] 48 | } 49 | ``` 50 | 51 | --- 52 | 53 | ## Service Level Agreements (SLAs) 54 | 55 | The operational shipment system has the following behaviour: 56 | 57 | - **Data Retention:** 180 days for delivered shipments; deleted afterwards 58 | - **Data Freshness:** Status updates every 5 minutes, location updates every 15 minutes 59 | - **Business Hours & Support:** 24/7 data access, support available Mon-Fri 9-5 CEST 60 | -------------------------------------------------------------------------------- /odps/examples/dataproduct-example.odps.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1.0.0 2 | kind: DataProduct 3 | 4 | name: Customer Data Product 5 | id: fbe8d147-28db-4f1d-bedf-a3fe9f458427 6 | domain: seller 7 | status: draft 8 | tenant: Entropy Data 9 | 10 | description: 11 | purpose: Enterprise view of a customer. 12 | limitations: No known limitations. 13 | usage: Check the various artefacts for their own description. 14 | 15 | tags: ['customer'] 16 | 17 | inputPorts: 18 | - name: payments 19 | version: 1.0.0 20 | contractId: dbb7b1eb-7628-436e-8914-2a00638ba6db 21 | - name: payments 22 | version: 2.0.0 23 | contractId: dbb7b1eb-7628-436e-8914-2a00638ba6da 24 | - name: onlinetransactions 25 | version: 1.0.0 26 | contractId: ec2a112d-5cfe-49f3-8760-f9cfb4597544 27 | - name: onlinetransactions 28 | version: 1.1.0 29 | contractId: ec2a112d-5cfe-49f3-8760-f9cfb4597547 30 | tags: ["transactions"] 31 | customProperties: 32 | - property: "transactions_version" 33 | value: "1.1.0" 34 | authoritativeDefinitions: 35 | - type: "data_dictionary" 36 | url: "https://mydata/dictionary" 37 | 38 | outputPorts: 39 | - name: rawtransactions 40 | description: "Raw Transactions" 41 | type: tables 42 | version: 1.0.0 43 | contractId: c2798941-1b7e-4b03-9e0d-955b1a872b32 44 | - name: rawtransactions 45 | description: "Raw Transactions" 46 | type: tables 47 | version: 2.0.0 48 | contractId: c2798941-1b7e-4b03-9e0d-955b1a872b33 49 | tags: ["transactions"] 50 | customProperties: 51 | - property: "transactions_version" 52 | value: "2.0.0" 53 | authoritativeDefinitions: 54 | - type: "data_dictionary" 55 | url: "https://mydata/dictionary" 56 | sbom: 57 | - type: "external" 58 | url: "https://mysbomserver/mysbom" 59 | inputContracts: 60 | - id: dbb7b1eb-7628-436e-8914-2a00638ba6db 61 | version: 2.0.0 62 | - id: ec2a112d-5cfe-49f3-8760-f9cfb4597544 63 | version: 1.0.0 64 | 65 | - name: consolidatedtransactions 66 | description: "Consolidated transactions" 67 | type: tables 68 | version: 1.0.0 69 | contractId: a44978be-1fe0-4226-b840-1b715bc25c63 70 | 71 | - name: fulltransactionswithreturns 72 | description: "Full transactions with returns" 73 | type: tables 74 | version: 0.3.0 75 | contractId: ef769969-0cbe-4188-876f-bb00abadaee4 76 | 77 | managementPorts: 78 | - content: dictionary 79 | type: topic 80 | name: tpc-dict-update 81 | description: Kafka topic for dictionary updates 82 | tags: ["kafka"] 83 | customProperties: 84 | - property: "kafka_topic" 85 | value: "true" 86 | authoritativeDefinitions: 87 | - type: "kafka_topic" 88 | url: "https://mykafka/topic" 89 | 90 | support: 91 | - channel: "Data Team Slack" 92 | url: "https://retailcorp.slack.com/archives/C1234567890" 93 | description: "Primary support channel for data product questions" 94 | tool: "slack" 95 | scope: "interactive" 96 | - channel: "Email Support" 97 | url: "mailto:simon.harrer@entropy-data.com" 98 | description: "Email support for urgent issues" 99 | tool: "email" 100 | scope: "issues" 101 | 102 | team: 103 | name: "Data Team" 104 | description: "The Data Team is responsible for the data product." 105 | tags: ["data", "team"] 106 | customProperties: 107 | - property: "data_team" 108 | value: "true" 109 | members: 110 | - username: "simon.harrer@entropy-data.com" 111 | name: "John Doe" 112 | description: "Data Product Owner" 113 | role: "owner" 114 | dateIn: "2023-01-15" 115 | - username: "simon.harrer@entropy-data.com" 116 | name: "Jane Smith" 117 | description: "Data Steward" 118 | role: "data steward" 119 | dateIn: "2023-02-01" 120 | 121 | productCreatedTs: "2023-01-15T10:30:00Z" 122 | -------------------------------------------------------------------------------- /order-service/order-created-events-v1.odcs.yaml: -------------------------------------------------------------------------------- 1 | kind: DataContract 2 | apiVersion: v3.0.2 3 | 4 | id: order-created-events-v1 5 | name: Order Created Events 6 | status: active 7 | version: 1.0.5 8 | domain: sales 9 | tenant: Entropy Data 10 | 11 | description: 12 | purpose: Stream of order creation events for downstream processing 13 | limitations: Only contains order creation events, not updates or cancellations 14 | usage: Real-time order processing and analytics 15 | 16 | servers: 17 | - server: production 18 | type: kafka 19 | host: localhost:9092 20 | format: json 21 | environment: production 22 | 23 | schema: 24 | - name: order-created-events 25 | physicalName: order-created-events 26 | physicalType: topic 27 | businessName: Order Created Events 28 | description: Kafka topic containing order creation events with nested line items 29 | tags: ['orders', 'events', 'streaming'] 30 | properties: 31 | - name: order_id 32 | primaryKey: true 33 | primaryKeyPosition: 1 34 | businessName: Order ID 35 | logicalType: string 36 | physicalType: string 37 | required: true 38 | description: Unique identifier for the order 39 | classification: public 40 | examples: 41 | - "a8c38fec-2acd-4b55-883b-4b48572d4a26" 42 | 43 | - name: order_timestamp 44 | businessName: Order Timestamp 45 | logicalType: date 46 | physicalType: string 47 | required: true 48 | description: Date and time when the order was created 49 | classification: public 50 | examples: 51 | - "2020-01-01T00:18:00Z" 52 | 53 | - name: order_total 54 | businessName: Order Total 55 | logicalType: integer 56 | physicalType: integer 57 | required: true 58 | description: Total amount of the order in cents 59 | classification: public 60 | examples: 61 | - 29747 62 | 63 | - name: customer_id 64 | businessName: Customer ID 65 | logicalType: string 66 | physicalType: string 67 | required: true 68 | description: Unique identifier for the customer placing the order 69 | examples: 70 | - "6GSHKOZIEN" 71 | 72 | - xname: customer_email_address 73 | b usinessName: Customer Email Address 74 | logicalType: string 75 | physicalType: string 76 | required: true 77 | description: Email address of the customer 78 | classification: restricted 79 | encryptedName: customer_email_address_encrypted 80 | examples: 81 | - "test394@example.org" 82 | 83 | - name: line_items 84 | businessName: Order Line Items 85 | logicalType: array 86 | physicalType: array 87 | required: true 88 | description: List of line items in the order 89 | classification: public 90 | items: 91 | logicalType: object 92 | physicalType: object 93 | properties: 94 | - name: lines_item_id 95 | businessName: Line Item ID 96 | logicalType: string 97 | physicalType: string 98 | required: true 99 | description: Unique identifier for the line item 100 | classification: public 101 | examples: 102 | - "94aa82c8-50ba-47fb-994a-9b041b4127af" 103 | 104 | - name: order_id 105 | businessName: Order ID 106 | logicalType: string 107 | physicalType: string 108 | required: true 109 | description: Reference to the parent order 110 | classification: public 111 | examples: 112 | - "a8c38fec-2acd-4b55-883b-4b48572d4a26" 113 | 114 | - name: sku 115 | businessName: SKU 116 | logicalType: string 117 | physicalType: string 118 | required: true 119 | description: Stock Keeping Unit identifier for the product 120 | classification: public 121 | examples: 122 | - "D3KT74L5EV46T" 123 | customProperties: 124 | - property: temporalModel 125 | value: asdf 126 | 127 | team: 128 | - username: simon.harrer@entropy-data.com 129 | role: Owner 130 | 131 | support: 132 | - channel: '#checkout-team' 133 | tool: slack 134 | url: https://entropydata.slack.com/archives/checkout-team 135 | 136 | tags: 137 | - orders 138 | - events 139 | - kafka 140 | - streaming 141 | 142 | contractCreatedTs: "2024-11-02T10:00:00+00:00" 143 | -------------------------------------------------------------------------------- /odcs/examples/datacontract-example.odcs.yaml: -------------------------------------------------------------------------------- 1 | kind: DataContract 2 | apiVersion: v3.0.2 3 | 4 | # What's this data contract about? 5 | domain: seller # Domain 6 | dataProduct: my quantum # Data product name 7 | version: 1.1.0 # Version (follows semantic versioning) 8 | status: active 9 | id: 53581432-6c55-4ba2-a65f-72344a91553a 10 | name: Example Data Contract 11 | 12 | tenant: Entropy Data 13 | 14 | # Lots of information 15 | description: 16 | purpose: Views built on top of the seller tables. 17 | limitations: Data based on seller perspective, no buyer information 18 | usage: Predict sales over time 19 | authoritativeDefinitions: 20 | - type: privacy-statement 21 | url: https://example.com/gdpr.pdf 22 | 23 | # Infrastructure & servers 24 | servers: 25 | - server: my-postgres 26 | type: postgres 27 | host: localhost 28 | port: 5432 29 | database: pypl-edw 30 | schema: pp_access_views 31 | 32 | # Dataset, schema and quality 33 | schema: 34 | - name: tbl 35 | physicalName: tbl_1 36 | physicalType: table 37 | businessName: Core Payment Metrics 38 | description: Provides core payment metrics 39 | authoritativeDefinitions: 40 | - url: https://catalog.data.gov/dataset/air-quality 41 | type: businessDefinition 42 | - url: https://youtu.be/jbY1BKFj9ec 43 | type: videoTutorial 44 | tags: [ 'finance', 'payments'] 45 | dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id 46 | properties: 47 | - name: transaction_reference_date 48 | physicalName: txn_ref_dt 49 | primaryKey: false 50 | primaryKeyPosition: -1 51 | businessName: transaction reference date 52 | logicalType: date 53 | physicalType: date 54 | required: false 55 | description: Reference date for transaction 56 | partitioned: true 57 | partitionKeyPosition: 1 58 | criticalDataElement: false 59 | tags: [ ] 60 | classification: public 61 | transformSourceObjects: 62 | - table_name_1 63 | - table_name_2 64 | - table_name_3 65 | transformLogic: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1, table_name_2 as t2, table_name_3 as t3 where t1.txn_dt=date-3 66 | transformDescription: defines the logic in business terms; logic for dummies 67 | examples: 68 | - "2022-10-03" 69 | - "2020-01-28" 70 | customProperties: 71 | - property: anonymizationStrategy 72 | value: none 73 | - name: rcvr_id 74 | primaryKey: true 75 | primaryKeyPosition: 1 76 | businessName: receiver id 77 | logicalType: string 78 | physicalType: varchar(18) 79 | required: false 80 | description: A description for column rcvr_id. 81 | partitioned: false 82 | partitionKeyPosition: -1 83 | criticalDataElement: false 84 | tags: [ 'uid' ] 85 | classification: restricted 86 | - name: rcvr_cntry_code 87 | primaryKey: false 88 | primaryKeyPosition: -1 89 | businessName: receiver country code 90 | logicalType: string 91 | physicalType: varchar(2) 92 | required: false 93 | description: Country code 94 | partitioned: false 95 | partitionKeyPosition: -1 96 | criticalDataElement: false 97 | tags: [ ] 98 | classification: public 99 | authoritativeDefinitions: 100 | - url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25 101 | type: businessDefinition 102 | - url: https://github.com/myorg/myrepo 103 | type: transformationImplementation 104 | - url: jdbc:postgresql://localhost:5432/adventureworks/tbl_1/rcvr_cntry_code 105 | type: implementation 106 | encryptedName: rcvr_cntry_code_encrypted 107 | quality: 108 | - rule: nullCheck 109 | description: column should not contain null values 110 | dimension: completeness # dropdown 7 values 111 | type: library 112 | severity: error 113 | businessImpact: operational 114 | schedule: 0 20 * * * 115 | scheduler: cron 116 | customProperties: 117 | - property: FIELD_NAME 118 | value: 119 | - property: COMPARE_TO 120 | value: 121 | - property: COMPARISON_TYPE 122 | value: Greater than 123 | quality: 124 | - rule: countCheck 125 | type: library 126 | description: Ensure row count is within expected volume range 127 | dimension: completeness 128 | method: reconciliation 129 | severity: error 130 | businessImpact: operational 131 | schedule: 0 20 * * * 132 | scheduler: cron 133 | customProperties: 134 | - property: business-key 135 | value: 136 | - txn_ref_dt 137 | - rcvr_id 138 | 139 | 140 | # Pricing 141 | price: 142 | priceAmount: 9.95 143 | priceCurrency: USD 144 | priceUnit: megabyte 145 | 146 | 147 | # Team 148 | team: 149 | - username: ceastwood 150 | role: Data Scientist 151 | dateIn: "2022-08-02" 152 | dateOut: "2022-10-01" 153 | replacedByUsername: mhopper 154 | - username: mhopper 155 | role: Data Scientist 156 | dateIn: "2022-10-01" 157 | - username: daustin 158 | role: Owner 159 | description: Keeper of the grail 160 | dateIn: "2022-10-01" 161 | 162 | 163 | # Roles 164 | roles: 165 | - role: microstrategy_user_opr 166 | access: read 167 | firstLevelApprovers: Reporting Manager 168 | secondLevelApprovers: 'mandolorian' 169 | - role: bq_queryman_user_opr 170 | access: read 171 | firstLevelApprovers: Reporting Manager 172 | secondLevelApprovers: na 173 | - role: risk_data_access_opr 174 | access: read 175 | firstLevelApprovers: Reporting Manager 176 | secondLevelApprovers: 'dathvador' 177 | - role: bq_unica_user_opr 178 | access: write 179 | firstLevelApprovers: Reporting Manager 180 | secondLevelApprovers: 'mickey' 181 | 182 | # SLA 183 | slaDefaultElement: tab1.txn_ref_dt 184 | slaProperties: 185 | - property: latency # Property, see list of values in DP QoS 186 | value: 4 187 | unit: d # d, day, days for days; y, yr, years for years 188 | element: tab1.txn_ref_dt # This would not be needed as it is the same table.column as the default one 189 | - property: generalAvailability 190 | value: "2022-05-12T09:30:10-08:00" 191 | - property: endOfSupport 192 | value: "2032-05-12T09:30:10-08:00" 193 | - property: endOfLife 194 | value: "2042-05-12T09:30:10-08:00" 195 | - property: retention 196 | value: 3 197 | unit: y 198 | element: tab1.txn_ref_dt 199 | - property: frequency 200 | value: 1 201 | valueExt: 1 202 | unit: d 203 | element: tab1.txn_ref_dt 204 | - property: timeOfAvailability 205 | value: 09:00-08:00 206 | element: tab1.txn_ref_dt 207 | driver: regulatory # Describes the importance of the SLA: [regulatory|analytics|operational|...] 208 | - property: timeOfAvailability 209 | value: 08:00-08:00 210 | element: tab1.txn_ref_dt 211 | driver: analytics 212 | 213 | 214 | # Support 215 | support: 216 | - channel: '#product-help' # Simple Slack communication channel 217 | tool: slack 218 | url: https://aidaug.slack.com/archives/C05UZRSBKLY 219 | - channel: datacontract-ann # Simple distribution list 220 | tool: email 221 | url: mailto:simon.harrer@entropy-data.com 222 | - channel: Feedback # Product Feedback 223 | description: General Product Feedback (Public) 224 | url: https://product-feedback.com 225 | 226 | # Tags 227 | tags: 228 | - transactions 229 | 230 | 231 | # Custom properties 232 | customProperties: 233 | - property: refRulesetName 234 | value: gcsc.ruleset.name 235 | - property: somePropertyName 236 | value: property.value 237 | - property: dataprocClusterName # Used for specific applications like Elevate 238 | value: [ cluster name ] 239 | 240 | contractCreatedTs: "2022-11-15T02:59:43+00:00" 241 | -------------------------------------------------------------------------------- /odcs/template.odcs.yaml: -------------------------------------------------------------------------------- 1 | # REQUIRED The kind of file this is. Valid value is `DataContract`. 2 | kind: DataContract 3 | 4 | # REQUIRED Version of the standard used to build data contract. 5 | apiVersion: v3.0.2 6 | 7 | # REQUIRED A unique identifier to reduce risk of dataset name collisions, such as a UUID. 8 | # Example: 53581432-6c55-4ba2-a65f-72344a91553a 9 | id: 10 | 11 | # REQUIRED Current status of the dataset (e.g., proposed, draft, active, deprecated, retired). 12 | # Example: active 13 | status: 14 | 15 | # Current version of the data contract (follows semantic versioning). 16 | # Example: 1.1.0 17 | version: 18 | 19 | # 20 | # Name of the logical data domain. 21 | # Example: seller 22 | #domain: 23 | 24 | # 25 | # The name of the data product. 26 | # Example: my quantum 27 | #dataProduct: 28 | 29 | # 30 | # Indicates the property the data is primarily associated with. 31 | # Example: ClimateQuantumInc 32 | tenant: Entropy Data 33 | 34 | # 35 | # High level description of the dataset. 36 | #description: 37 | # 38 | # Purpose of the dataset. 39 | # Example: Views built on top of the seller tables. 40 | # purpose: 41 | # 42 | # Limitations of the dataset. 43 | # Example: Data based on seller perspective, no buyer information 44 | # limitations: 45 | # 46 | # Intended usage of the dataset. 47 | # Example: Predict sales over time 48 | # usage: 49 | # 50 | # List of links to sources that provide more details on the dataset. 51 | # authoritativeDefinitions: 52 | # 53 | # Type of definition for authority (e.g., businessDefinition, videoTutorial, implementation). 54 | # Example: privacy-statement 55 | # - type: 56 | # 57 | # URL to the authority. 58 | # Example: https://example.com/gdpr.pdf 59 | # url: 60 | 61 | # 62 | # List of servers where the datasets reside. 63 | #servers: 64 | # 65 | # Identifier of the server. 66 | # Example: my-postgres 67 | # - server: 68 | # 69 | # Type of the server (e.g., postgres, snowflake, bigquery, s3, kafka, etc.). 70 | # Example: postgres 71 | # type: 72 | # 73 | # The host to the server. 74 | # Example: localhost 75 | # host: 76 | # 77 | # The port to the server. 78 | # Example: 5432 79 | # port: 80 | # 81 | # The name of the database. 82 | # Example: pypl-edw 83 | # database: 84 | # 85 | # The name of the schema in the database. 86 | # Example: pp_access_views 87 | # schema: 88 | 89 | # 90 | # A list of elements within the schema to be cataloged. 91 | #schema: 92 | # 93 | # Name of the element. 94 | # Example: tbl 95 | # - name: 96 | # 97 | # Physical name. 98 | # Example: tbl_1 99 | # physicalName: 100 | # 101 | # The physical element data type in the data source (e.g., table, view, topic, file). 102 | # Example: table 103 | # physicalType: 104 | # 105 | # The business name of the element. 106 | # Example: Core Payment Metrics 107 | # businessName: 108 | # 109 | # Description of the element. 110 | # Example: Provides core payment metrics 111 | # description: 112 | # 113 | # List of links to sources that provide more details on the dataset. 114 | # authoritativeDefinitions: 115 | # 116 | # Example: https://catalog.data.gov/dataset/air-quality 117 | # - url: 118 | # 119 | # Example: businessDefinition 120 | # type: 121 | # 122 | # A list of tags to better categorize the element (e.g., finance, sensitive). 123 | # Example: ['finance', 'payments'] 124 | # tags: [, ] 125 | # 126 | # Granular level of the data in the object (e.g., Aggregation by country). 127 | # Example: Aggregation on columns txn_ref_dt, pmt_txn_id 128 | # dataGranularityDescription: 129 | # 130 | # A list of properties for the object. 131 | # properties: 132 | # 133 | # Name of the element. 134 | # Example: transaction_reference_date 135 | # - name: 136 | # 137 | # Physical name. 138 | # Example: txn_ref_dt 139 | # physicalName: 140 | # 141 | # Boolean value specifying whether the element is primary or not. 142 | # Example: false 143 | # primaryKey: 144 | # 145 | # If element is a primary key, the position of the primary key element (starts from 1). 146 | # Example: -1 147 | # primaryKeyPosition: 148 | # 149 | # The business name of the element. 150 | # Example: transaction reference date 151 | # businessName: 152 | # 153 | # The logical element data type (e.g., string, date, number, integer, object, array, boolean). 154 | # Example: date 155 | # logicalType: 156 | # 157 | # The physical element data type in the data source (e.g., VARCHAR(2), DOUBLE, INT). 158 | # Example: date 159 | # physicalType: 160 | # 161 | # Indicates if the element may contain Null values; possible values are true and false. 162 | # Example: false 163 | # required: 164 | # 165 | # Description of the element. 166 | # Example: Reference date for transaction 167 | # description: 168 | # 169 | # Indicates if the element is partitioned; possible values are true and false. 170 | # Example: true 171 | # partitioned: 172 | # 173 | # If element is used for partitioning, the position of the partition element (starts from 1). 174 | # Example: 1 175 | # partitionKeyPosition: 176 | # 177 | # True or false indicator; If element is considered a critical data element (CDE). 178 | # Example: false 179 | # criticalDataElement: 180 | # 181 | # A list of tags to better categorize the element. 182 | # Example: [] 183 | # tags: [, ] 184 | # 185 | # Data classification (e.g., confidential, restricted, public). 186 | # Example: public 187 | # classification: 188 | # 189 | # List of objects in the data source used in the transformation. 190 | # Example: table_name_1 191 | # transformSourceObjects: 192 | # - 193 | # 194 | # Logic used in the element transformation. 195 | # Example: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1 where t1.txn_dt=date-3 196 | # transformLogic: 197 | # 198 | # Describes the transform logic in very simple terms. 199 | # Example: defines the logic in business terms; logic for dummies 200 | # transformDescription: 201 | # 202 | # List of sample element values. 203 | # Example: "2022-10-03" 204 | # examples: 205 | # - 206 | # 207 | # A list of key/value pairs for custom properties. 208 | # customProperties: 209 | # 210 | # The name of the key (in camel case). 211 | # Example: anonymizationStrategy 212 | # - property: 213 | # 214 | # The value of the key. 215 | # Example: none 216 | # value: 217 | 218 | # 219 | # Example: rcvr_id 220 | # - name: 221 | # 222 | # Example: true 223 | # primaryKey: 224 | # 225 | # Example: 1 226 | # primaryKeyPosition: 227 | # 228 | # Example: receiver id 229 | # businessName: 230 | # 231 | # Example: string 232 | # logicalType: 233 | # 234 | # Example: varchar(18) 235 | # physicalType: 236 | # 237 | # Example: false 238 | # required: 239 | # 240 | # Example: A description for column rcvr_id. 241 | # description: 242 | # 243 | # Example: false 244 | # partitioned: 245 | # 246 | # Example: -1 247 | # partitionKeyPosition: 248 | # 249 | # Example: false 250 | # criticalDataElement: 251 | # 252 | # Example: ['uid'] 253 | # tags: [] 254 | # 255 | # Example: restricted 256 | # classification: 257 | # 258 | # The element name within the dataset that contains the encrypted element value. 259 | # Example: rcvr_id_encrypted 260 | # encryptedName: 261 | # 262 | # Data quality rules with all the relevant information for rule setup and execution. 263 | # quality: 264 | # 265 | # Define a data quality check based on the predefined rules as per ODCS. 266 | # Example: nullCheck 267 | # - rule: 268 | # 269 | # Describe the quality check to be completed. 270 | # Example: column should not contain null values 271 | # description: 272 | # 273 | # The key performance indicator (KPI) or dimension for data quality. 274 | # Example: completeness 275 | # dimension: 276 | # 277 | # The type of quality check (text, library, sql, custom). 278 | # Example: library 279 | # type: 280 | # 281 | # The severance of the quality rule (e.g., info, warning, error). 282 | # Example: error 283 | # severity: 284 | # 285 | # Consequences of the rule failure (e.g., operational, regulatory). 286 | # Example: operational 287 | # businessImpact: 288 | # 289 | # Rule execution schedule details. 290 | # Example: 0 20 * * * 291 | # schedule: 292 | # 293 | # The name or type of scheduler used to start the data quality check. 294 | # Example: cron 295 | # scheduler: 296 | 297 | # 298 | # Data quality rules at the table level. 299 | # quality: 300 | # 301 | # Example: countCheck 302 | # - rule: 303 | # 304 | # Example: library 305 | # type: 306 | # 307 | # Example: Ensure row count is within expected volume range 308 | # description: 309 | # 310 | # Example: completeness 311 | # dimension: 312 | # 313 | # Method used for quality check. 314 | # Example: reconciliation 315 | # method: 316 | # 317 | # Example: error 318 | # severity: 319 | # 320 | # Example: operational 321 | # businessImpact: 322 | # 323 | # Example: 0 20 * * * 324 | # schedule: 325 | # 326 | # Example: cron 327 | # scheduler: 328 | 329 | # 330 | # customProperties: 331 | # 332 | # Example: business-key 333 | # - property: 334 | # 335 | # Example: [txn_ref_dt, rcvr_id] 336 | # value: 337 | # - 338 | # - 339 | 340 | # 341 | # Pricing information for the dataset. 342 | #price: 343 | # 344 | # Subscription price per unit of measure. 345 | # Example: 9.95 346 | # priceAmount: 347 | # 348 | # Currency of the subscription price. 349 | # Example: USD 350 | # priceCurrency: 351 | # 352 | # The unit of measure for calculating cost (e.g., megabyte, gigabyte). 353 | # Example: megabyte 354 | # priceUnit: 355 | 356 | # 357 | # Team members associated with the dataset. 358 | #team: 359 | # 360 | # The user's username or email. 361 | # Example: ceastwood 362 | # - username: 363 | # 364 | # The user's job role (e.g., owner, data steward). 365 | # Example: Data Scientist 366 | # role: 367 | # 368 | # The date when the user joined the team. 369 | # Example: 2022-08-02 370 | # dateIn: 371 | # 372 | # Example: daustin 373 | # - username: 374 | # 375 | # Example: Owner 376 | # role: 377 | # 378 | # The user's description. 379 | # Example: Keeper of the grail 380 | # description: 381 | # 382 | # Example: 2022-10-01 383 | # dateIn: 384 | 385 | # 386 | # A list of roles that will provide user access to the dataset. 387 | #roles: 388 | # 389 | # Name of the IAM role that provides access to the dataset. 390 | # Example: microstrategy_user_opr 391 | # - role: 392 | # 393 | # The type of access provided by the IAM role. 394 | # Example: read 395 | # access: 396 | # 397 | # The name(s) of the first-level approver(s) of the role. 398 | # Example: Reporting Manager 399 | # firstLevelApprovers: 400 | # 401 | # The name(s) of the second-level approver(s) of the role. 402 | # Example: mandolorian 403 | # secondLevelApprovers: 404 | 405 | # 406 | # Element (using the element path notation) to do the checks on. 407 | # Example: tab1.txn_ref_dt 408 | #slaDefaultElement: 409 | 410 | # 411 | # A list of key/value pairs for SLA specific properties. 412 | #slaProperties: 413 | # 414 | # Specific property in SLA. 415 | # Example: latency 416 | # - property: 417 | # 418 | # Agreement value. 419 | # Example: 4 420 | # value: 421 | # 422 | # Units (d/day/days for days; y/yr/years for years, etc.). 423 | # Example: d 424 | # unit: 425 | # 426 | # Element(s) to check on. 427 | # Example: tab1.txn_ref_dt 428 | # element: 429 | # 430 | # Example: generalAvailability 431 | # - property: 432 | # 433 | # Example: 2022-05-12T09:30:10-08:00 434 | # value: 435 | 436 | # 437 | # Support and communication channels. 438 | #support: 439 | # 440 | # Channel name or identifier. 441 | # Example: #product-help 442 | # - channel: 443 | # 444 | # Name of the tool (e.g., email, slack, teams, discord, ticket, other). 445 | # Example: slack 446 | # tool: 447 | # 448 | # Access URL using normal URL scheme (https, mailto, etc.). 449 | # Example: https://aidaug.slack.com/archives/C05UZRSBKLY 450 | # url: 451 | 452 | # 453 | # A list of tags to better categorize the dataset. 454 | # Example: transactions 455 | #tags: 456 | # - 457 | 458 | # 459 | # A list of key/value pairs for custom properties. 460 | #customProperties: 461 | # 462 | # Example: refRulesetName 463 | # - property: 464 | # 465 | # Example: gcsc.ruleset.name 466 | # value: 467 | 468 | # 469 | # Timestamp in UTC of when the data contract was created. 470 | # Example: 2022-11-15T02:59:43+00:00 471 | #contractCreatedTs: 472 | -------------------------------------------------------------------------------- /odps/jsonschema/odps-json-schema-v1.0.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2019-09/schema", 3 | "title": "Open Data Product Standard (ODPS)", 4 | "description": "An open data product standard descriptor to enable defining data products.", 5 | "type": "object", 6 | "required": ["apiVersion", "kind", "id", "status"], 7 | "additionalProperties": false, 8 | "properties": { 9 | "apiVersion": { 10 | "type": "string", 11 | "default": "v1.0.0", 12 | "description": "Version of the standard used to build data product. Default value is v1.0.0.", 13 | "enum": ["v0.9.0", "v1.0.0"] 14 | }, 15 | "kind": { 16 | "type": "string", 17 | "default": "DataProduct", 18 | "description": "The kind of file this is. Valid value is `DataProduct`.", 19 | "enum": ["DataProduct"] 20 | }, 21 | "id": { 22 | "type": "string", 23 | "description": "A unique identifier used to reduce the risk of dataset name collisions, such as a UUID." 24 | }, 25 | "name": { 26 | "type": "string", 27 | "description": "Name of the data product." 28 | }, 29 | "version": { 30 | "type": "string", 31 | "description": "Current version of the data product. Not required, but highly recommended." 32 | }, 33 | "status": { 34 | "type": "string", 35 | "description": "Current status of the data product.", 36 | "examples": ["proposed", "draft", "active", "deprecated", "retired"] 37 | }, 38 | "domain": { 39 | "type": "string", 40 | "description": "Business domain" 41 | }, 42 | "tenant": { 43 | "type": "string", 44 | "description": "Organization identifier" 45 | }, 46 | "authoritativeDefinitions": { 47 | "type": "array", 48 | "description": "List of links to sources that provide more details on the data contract.", 49 | "items": { 50 | "$ref": "#/$defs/AuthoritativeDefinition" 51 | } 52 | }, 53 | "description": { 54 | "$ref": "#/$defs/Description" 55 | }, 56 | "customProperties": { 57 | "type": "array", 58 | "description": "A list of key/value pairs for custom properties.", 59 | "items": { 60 | "$ref": "#/$defs/CustomProperty" 61 | } 62 | }, 63 | "tags": { 64 | "$ref": "#/$defs/Tags" 65 | }, 66 | "inputPorts": { 67 | "type": "array", 68 | "description": "List of objects describing an input port. You need at least one as a data product needs to get data somewhere.", 69 | "items": { 70 | "$ref": "#/$defs/InputPort" 71 | } 72 | }, 73 | "outputPorts": { 74 | "type": "array", 75 | "description": "List of objects describing an output port. You need at least one, as a data product without output is useless.", 76 | "items": { 77 | "$ref": "#/$defs/OutputPort" 78 | } 79 | }, 80 | "managementPorts": { 81 | "type": "array", 82 | "description": "Management ports define access points for managing the data product.", 83 | "items": { 84 | "$ref": "#/$defs/ManagementPort" 85 | } 86 | }, 87 | "support": { 88 | "type": "array", 89 | "description": "Support and communication channels.", 90 | "items": { 91 | "$ref": "#/$defs/Support" 92 | } 93 | }, 94 | "team": { 95 | "$ref": "#/$defs/Team" 96 | }, 97 | "productCreatedTs": { 98 | "type": "string", 99 | "format": "date-time", 100 | "description": "Timestamp in UTC of when the data contract was created, using ISO 8601." 101 | } 102 | }, 103 | "$defs": { 104 | "Tags": { 105 | "type": "array", 106 | "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.", 107 | "examples": ["finance", "sensitive", "employee_record"], 108 | "items": { 109 | "type": "string" 110 | } 111 | }, 112 | "Description": { 113 | "type": "object", 114 | "description": "Object containing the descriptions.", 115 | "additionalProperties": false, 116 | "properties": { 117 | "purpose": { 118 | "type": "string", 119 | "description": "Intended purpose for the provided data." 120 | }, 121 | "limitations": { 122 | "type": "string", 123 | "description": "Technical, compliance, and legal limitations for data use." 124 | }, 125 | "usage": { 126 | "type": "string", 127 | "description": "Recommended usage of the data." 128 | }, 129 | "authoritativeDefinitions": { 130 | "type": "array", 131 | "description": "List of links to sources that provide more details on the data contract.", 132 | "items": { 133 | "$ref": "#/$defs/AuthoritativeDefinition" 134 | } 135 | }, 136 | "customProperties": { 137 | "type": "array", 138 | "description": "A list of key/value pairs for custom properties.", 139 | "items": { 140 | "$ref": "#/$defs/CustomProperty" 141 | } 142 | } 143 | } 144 | }, 145 | "CustomProperty": { 146 | "type": "object", 147 | "description": "A key/value pair for custom properties.", 148 | "additionalProperties": false, 149 | "properties": { 150 | "property": { 151 | "type": "string", 152 | "description": "The name of the key. Names should be in camel case, the same as if they were permanent properties in the contract." 153 | }, 154 | "value": { 155 | "description": "The value of the key." 156 | }, 157 | "description": { 158 | "type": "string", 159 | "description": "Optional description." 160 | } 161 | }, 162 | "required": ["property", "value"] 163 | }, 164 | "AuthoritativeDefinition": { 165 | "type": "object", 166 | "description": "A type/link pair for authoritative definitions.", 167 | "additionalProperties": false, 168 | "properties": { 169 | "type": { 170 | "type": "string", 171 | "description": "Type of definition for authority.", 172 | "examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"] 173 | }, 174 | "url": { 175 | "type": "string", 176 | "format": "uri", 177 | "description": "URL to the authority." 178 | }, 179 | "description": { 180 | "type": "string", 181 | "description": "Optional description." 182 | } 183 | }, 184 | "required": ["type", "url"] 185 | }, 186 | "InputPort": { 187 | "type": "object", 188 | "description": "An input port describing expectations.", 189 | "additionalProperties": false, 190 | "properties": { 191 | "name": { 192 | "type": "string", 193 | "description": "Name of the input port." 194 | }, 195 | "version": { 196 | "type": "string", 197 | "description": "Version of the input port." 198 | }, 199 | "contractId": { 200 | "type": "string", 201 | "description": "Contract ID for the input port." 202 | }, 203 | "tags": { 204 | "$ref": "#/$defs/Tags" 205 | }, 206 | "customProperties": { 207 | "type": "array", 208 | "description": "Custom properties block.", 209 | "items": { 210 | "$ref": "#/$defs/CustomProperty" 211 | } 212 | }, 213 | "authoritativeDefinitions": { 214 | "type": "array", 215 | "description": "Authoritative definitions block.", 216 | "items": { 217 | "$ref": "#/$defs/AuthoritativeDefinition" 218 | } 219 | } 220 | }, 221 | "required": ["name", "version", "contractId"] 222 | }, 223 | "OutputPort": { 224 | "type": "object", 225 | "description": "An output port describing promises.", 226 | "additionalProperties": false, 227 | "properties": { 228 | "name": { 229 | "type": "string", 230 | "description": "Name of the output port." 231 | }, 232 | "description": { 233 | "type": "string", 234 | "description": "Human readable short description of the output port." 235 | }, 236 | "type": { 237 | "type": "string", 238 | "description": "There can be different types of output ports, each automated and handled differently. Here you can indicate the type." 239 | }, 240 | "version": { 241 | "type": "string", 242 | "description": "For each version, a different instance of the output port is listed. The combination of the name and version is the key." 243 | }, 244 | "contractId": { 245 | "type": "string", 246 | "description": "Contract ID for the output port." 247 | }, 248 | "sbom": { 249 | "type": "array", 250 | "description": "The SBOM can/should be at the version level.", 251 | "items": { 252 | "$ref": "#/$defs/SBOM" 253 | } 254 | }, 255 | "inputContracts": { 256 | "type": "array", 257 | "description": "Dependencies or input contracts.", 258 | "items": { 259 | "$ref": "#/$defs/InputContract" 260 | } 261 | }, 262 | "tags": { 263 | "$ref": "#/$defs/Tags" 264 | }, 265 | "customProperties": { 266 | "type": "array", 267 | "description": "Custom properties block.", 268 | "items": { 269 | "$ref": "#/$defs/CustomProperty" 270 | } 271 | }, 272 | "authoritativeDefinitions": { 273 | "type": "array", 274 | "description": "Authoritative definitions block.", 275 | "items": { 276 | "$ref": "#/$defs/AuthoritativeDefinition" 277 | } 278 | } 279 | }, 280 | "required": ["name", "version"] 281 | }, 282 | "SBOM": { 283 | "type": "object", 284 | "description": "Software Bill of Materials.", 285 | "additionalProperties": false, 286 | "properties": { 287 | "type": { 288 | "type": "string", 289 | "default": "external", 290 | "description": "Type of SBOM." 291 | }, 292 | "url": { 293 | "type": "string", 294 | "format": "uri", 295 | "description": "URL to the SBOM." 296 | } 297 | }, 298 | "required": ["url"] 299 | }, 300 | "InputContract": { 301 | "type": "object", 302 | "description": "Input contract dependency.", 303 | "additionalProperties": false, 304 | "properties": { 305 | "id": { 306 | "type": "string", 307 | "description": "Contract ID or contractId." 308 | }, 309 | "version": { 310 | "type": "string", 311 | "description": "Version of the input contract." 312 | } 313 | }, 314 | "required": ["id", "version"] 315 | }, 316 | "ManagementPort": { 317 | "type": "object", 318 | "description": "Management port for managing the data product.", 319 | "additionalProperties": false, 320 | "properties": { 321 | "name": { 322 | "type": "string", 323 | "description": "Endpoint identifier or unique name." 324 | }, 325 | "content": { 326 | "type": "string", 327 | "description": "Content type.", 328 | "examples": ["discoverability", "observability", "control", "dictionary"] 329 | }, 330 | "type": { 331 | "type": "string", 332 | "default": "rest", 333 | "description": "Type: can be `rest` or `topic`. Default is `rest`.", 334 | "examples": ["rest", "topic"] 335 | }, 336 | "url": { 337 | "type": "string", 338 | "format": "uri", 339 | "description": "URL to access the endpoint." 340 | }, 341 | "channel": { 342 | "type": "string", 343 | "description": "Channel to communicate with the data product." 344 | }, 345 | "description": { 346 | "type": "string", 347 | "description": "Purpose and usage." 348 | }, 349 | "tags": { 350 | "$ref": "#/$defs/Tags" 351 | }, 352 | "customProperties": { 353 | "type": "array", 354 | "description": "Custom properties block.", 355 | "items": { 356 | "$ref": "#/$defs/CustomProperty" 357 | } 358 | }, 359 | "authoritativeDefinitions": { 360 | "type": "array", 361 | "description": "Authoritative definitions block.", 362 | "items": { 363 | "$ref": "#/$defs/AuthoritativeDefinition" 364 | } 365 | } 366 | }, 367 | "required": ["name", "content"] 368 | }, 369 | "Support": { 370 | "type": "object", 371 | "description": "Support channel.", 372 | "additionalProperties": false, 373 | "properties": { 374 | "channel": { 375 | "type": "string", 376 | "description": "Channel name or identifier." 377 | }, 378 | "url": { 379 | "type": "string", 380 | "format": "uri", 381 | "description": "Access URL using normal URL scheme (https, mailto, etc.)." 382 | }, 383 | "description": { 384 | "type": "string", 385 | "description": "Description of the channel, free text." 386 | }, 387 | "tool": { 388 | "type": "string", 389 | "description": "Name of the tool.", 390 | "examples": ["email", "slack", "teams", "discord", "ticket", "other"] 391 | }, 392 | "scope": { 393 | "type": "string", 394 | "description": "Scope can be: `interactive`, `announcements`, `issues`.", 395 | "examples": ["interactive", "announcements", "issues"] 396 | }, 397 | "invitationUrl": { 398 | "type": "string", 399 | "format": "uri", 400 | "description": "Some tools uses invitation URL for requesting or subscribing. Follows the URL scheme." 401 | }, 402 | "tags": { 403 | "$ref": "#/$defs/Tags" 404 | }, 405 | "customProperties": { 406 | "type": "array", 407 | "description": "Custom properties block.", 408 | "items": { 409 | "$ref": "#/$defs/CustomProperty" 410 | } 411 | }, 412 | "authoritativeDefinitions": { 413 | "type": "array", 414 | "description": "Authoritative definitions block.", 415 | "items": { 416 | "$ref": "#/$defs/AuthoritativeDefinition" 417 | } 418 | } 419 | }, 420 | "required": ["channel", "url"] 421 | }, 422 | "TeamMember": { 423 | "type": "object", 424 | "description": "Team member information.", 425 | "additionalProperties": false, 426 | "properties": { 427 | "username": { 428 | "type": "string", 429 | "description": "The user's username or email." 430 | }, 431 | "name": { 432 | "type": "string", 433 | "description": "The user's name." 434 | }, 435 | "description": { 436 | "type": "string", 437 | "description": "The user's description." 438 | }, 439 | "role": { 440 | "type": "string", 441 | "description": "The user's job role; Examples might be owner, data steward. There is no limit on the role." 442 | }, 443 | "dateIn": { 444 | "type": "string", 445 | "format": "date", 446 | "description": "The date when the user joined the team." 447 | }, 448 | "dateOut": { 449 | "type": "string", 450 | "format": "date", 451 | "description": "The date when the user ceased to be part of the team." 452 | }, 453 | "replacedByUsername": { 454 | "type": "string", 455 | "description": "The username of the user who replaced the previous user." 456 | }, 457 | "tags": { 458 | "$ref": "#/$defs/Tags" 459 | }, 460 | "customProperties": { 461 | "type": "array", 462 | "description": "Custom properties block.", 463 | "items": { 464 | "$ref": "#/$defs/CustomProperty" 465 | } 466 | }, 467 | "authoritativeDefinitions": { 468 | "type": "array", 469 | "description": "Authoritative definitions block.", 470 | "items": { 471 | "$ref": "#/$defs/AuthoritativeDefinition" 472 | } 473 | } 474 | }, 475 | "required": ["username"] 476 | }, 477 | "Team": { 478 | "type": "object", 479 | "description": "Team information.", 480 | "additionalProperties": false, 481 | "properties": { 482 | "name": { 483 | "type": "string", 484 | "description": "Team name." 485 | }, 486 | "description": { 487 | "type": "string", 488 | "description": "Team description." 489 | }, 490 | "members": { 491 | "type": "array", 492 | "description": "List of members.", 493 | "items": { 494 | "$ref": "#/$defs/TeamMember" 495 | } 496 | }, 497 | "tags": { 498 | "$ref": "#/$defs/Tags" 499 | }, 500 | "customProperties": { 501 | "type": "array", 502 | "description": "Custom properties block.", 503 | "items": { 504 | "$ref": "#/$defs/CustomProperty" 505 | } 506 | }, 507 | "authoritativeDefinitions": { 508 | "type": "array", 509 | "description": "Authoritative definitions block.", 510 | "items": { 511 | "$ref": "#/$defs/AuthoritativeDefinition" 512 | } 513 | } 514 | } 515 | } 516 | } 517 | } -------------------------------------------------------------------------------- /odcs/jsonschema/odcs-json-schema-v3.0.2.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2019-09/schema", 3 | "title": "Open Data Contract Standard (ODCS)", 4 | "description": "An open data contract specification to establish agreement between data producers and consumers.", 5 | "type": "object", 6 | "properties": { 7 | "version": { 8 | "type": "string", 9 | "description": "Current version of the data contract." 10 | }, 11 | "kind": { 12 | "type": "string", 13 | "default": "DataContract", 14 | "description": "The kind of file this is. Valid value is `DataContract`.", 15 | "enum": ["DataContract"] 16 | }, 17 | "apiVersion": { 18 | "type": "string", 19 | "default": "v3.0.2", 20 | "description": "Version of the standard used to build data contract. Default value is v3.0.2.", 21 | "enum": ["v3.0.2","v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"] 22 | }, 23 | "id": { 24 | "type": "string", 25 | "description": "A unique identifier used to reduce the risk of dataset name collisions, such as a UUID." 26 | }, 27 | "name": { 28 | "type": "string", 29 | "description": "Name of the data contract." 30 | }, 31 | "tenant": { 32 | "type": "string", 33 | "description": "Indicates the property the data is primarily associated with. Value is case insensitive." 34 | }, 35 | "tags": { 36 | "$ref": "#/$defs/Tags" 37 | }, 38 | "status": { 39 | "type": "string", 40 | "description": "Current status of the dataset.", 41 | "examples": [ 42 | "proposed", "draft", "active", "deprecated", "retired" 43 | ] 44 | }, 45 | "servers": { 46 | "type": "array", 47 | "description": "List of servers where the datasets reside.", 48 | "items": { 49 | "$ref": "#/$defs/Server" 50 | } 51 | }, 52 | "dataProduct": { 53 | "type": "string", 54 | "description": "The name of the data product." 55 | }, 56 | "description": { 57 | "type": "object", 58 | "description": "High level description of the dataset.", 59 | "properties": { 60 | "usage": { 61 | "type": "string", 62 | "description": "Intended usage of the dataset." 63 | }, 64 | "purpose": { 65 | "type": "string", 66 | "description": "Purpose of the dataset." 67 | }, 68 | "limitations": { 69 | "type": "string", 70 | "description": "Limitations of the dataset." 71 | }, 72 | "authoritativeDefinitions": { 73 | "$ref": "#/$defs/AuthoritativeDefinitions" 74 | }, 75 | "customProperties": { 76 | "$ref": "#/$defs/CustomProperties" 77 | } 78 | } 79 | }, 80 | "domain": { 81 | "type": "string", 82 | "description": "Name of the logical data domain.", 83 | "examples": ["imdb_ds_aggregate", "receiver_profile_out", "transaction_profile_out"] 84 | }, 85 | "schema": { 86 | "type": "array", 87 | "description": "A list of elements within the schema to be cataloged.", 88 | "items": { 89 | "$ref": "#/$defs/SchemaObject" 90 | } 91 | }, 92 | "support": { 93 | "$ref": "#/$defs/Support" 94 | }, 95 | "price": { 96 | "$ref": "#/$defs/Pricing" 97 | }, 98 | "team": { 99 | "type": "array", 100 | "items": { 101 | "$ref": "#/$defs/Team" 102 | } 103 | }, 104 | "roles": { 105 | "type": "array", 106 | "description": "A list of roles that will provide user access to the dataset.", 107 | "items": { 108 | "$ref": "#/$defs/Role" 109 | } 110 | }, 111 | "slaDefaultElement": { 112 | "type": "string", 113 | "description": "Element (using the element path notation) to do the checks on." 114 | }, 115 | "slaProperties": { 116 | "type": "array", 117 | "description": "A list of key/value pairs for SLA specific properties. There is no limit on the type of properties (more details to come).", 118 | "items": { 119 | "$ref": "#/$defs/ServiceLevelAgreementProperty" 120 | } 121 | }, 122 | "authoritativeDefinitions": { 123 | "$ref": "#/$defs/AuthoritativeDefinitions" 124 | }, 125 | "customProperties": { 126 | "$ref": "#/$defs/CustomProperties" 127 | }, 128 | "contractCreatedTs": { 129 | "type": "string", 130 | "format": "date-time", 131 | "description": "Timestamp in UTC of when the data contract was created." 132 | } 133 | }, 134 | "required": ["version", "apiVersion", "kind", "id", "status"], 135 | "additionalProperties": false, 136 | "$defs": { 137 | "Server": { 138 | "type": "object", 139 | "description": "Data source details of where data is physically stored.", 140 | "properties": { 141 | "server": { 142 | "type": "string", 143 | "description": "Identifier of the server." 144 | }, 145 | "type": { 146 | "type": "string", 147 | "description": "Type of the server.", 148 | "enum": [ 149 | "api", "athena", "azure", "bigquery", "clickhouse", "databricks", "denodo", "dremio", 150 | "duckdb", "glue", "cloudsql", "db2", "informix", "kafka", "kinesis", "local", 151 | "mysql", "oracle", "postgresql", "postgres", "presto", "pubsub", 152 | "redshift", "s3", "sftp", "snowflake", "sqlserver", "synapse", "trino", "vertica", "custom" 153 | ] 154 | }, 155 | "description": { 156 | "type": "string", 157 | "description": "Description of the server." 158 | }, 159 | "environment": { 160 | "type": "string", 161 | "description": "Environment of the server.", 162 | "examples": ["prod", "preprod", "dev", "uat"] 163 | }, 164 | "roles": { 165 | "type": "array", 166 | "description": "List of roles that have access to the server.", 167 | "items": { 168 | "$ref": "#/$defs/Role" 169 | } 170 | }, 171 | "customProperties": { 172 | "$ref": "#/$defs/CustomProperties" 173 | } 174 | }, 175 | "allOf": [ 176 | { 177 | "if": { 178 | "properties": { 179 | "type": { 180 | "const": "api" 181 | } 182 | }, 183 | "required": ["type"] 184 | }, 185 | "then": { 186 | "$ref": "#/$defs/ServerSource/ApiServer" 187 | } 188 | }, 189 | { 190 | "if": { 191 | "properties": { 192 | "type": { 193 | "const": "athena" 194 | } 195 | }, 196 | "required": ["type"] 197 | }, 198 | "then": { 199 | "$ref": "#/$defs/ServerSource/AthenaServer" 200 | } 201 | }, 202 | { 203 | "if": { 204 | "properties": { 205 | "type": { 206 | "const": "azure" 207 | } 208 | }, 209 | "required": ["type"] 210 | }, 211 | "then": { 212 | "$ref": "#/$defs/ServerSource/AzureServer" 213 | } 214 | }, 215 | { 216 | "if": { 217 | "properties": { 218 | "type": { 219 | "const": "bigquery" 220 | } 221 | }, 222 | "required": ["type"] 223 | }, 224 | "then": { 225 | "$ref": "#/$defs/ServerSource/BigQueryServer" 226 | } 227 | }, 228 | { 229 | "if": { 230 | "properties": { 231 | "type": { 232 | "const": "clickhouse" 233 | } 234 | }, 235 | "required": ["type"] 236 | }, 237 | "then": { 238 | "$ref": "#/$defs/ServerSource/ClickHouseServer" 239 | } 240 | }, 241 | { 242 | "if": { 243 | "properties": { 244 | "type": { 245 | "const": "databricks" 246 | } 247 | }, 248 | "required": ["type"] 249 | }, 250 | "then": { 251 | "$ref": "#/$defs/ServerSource/DatabricksServer" 252 | } 253 | }, 254 | { 255 | "if": { 256 | "properties": { 257 | "type": { 258 | "const": "denodo" 259 | } 260 | }, 261 | "required": ["type"] 262 | }, 263 | "then": { 264 | "$ref": "#/$defs/ServerSource/DenodoServer" 265 | } 266 | }, 267 | { 268 | "if": { 269 | "properties": { 270 | "type": { 271 | "const": "dremio" 272 | } 273 | }, 274 | "required": ["type"] 275 | }, 276 | "then": { 277 | "$ref": "#/$defs/ServerSource/DremioServer" 278 | } 279 | }, 280 | { 281 | "if": { 282 | "properties": { 283 | "type": { 284 | "const": "duckdb" 285 | } 286 | }, 287 | "required": ["type"] 288 | }, 289 | "then": { 290 | "$ref": "#/$defs/ServerSource/DuckdbServer" 291 | } 292 | }, 293 | { 294 | "if": { 295 | "properties": { 296 | "type": { 297 | "const": "glue" 298 | } 299 | }, 300 | "required": ["type"] 301 | }, 302 | "then": { 303 | "$ref": "#/$defs/ServerSource/GlueServer" 304 | } 305 | }, 306 | { 307 | "if": { 308 | "properties": { 309 | "type": { 310 | "const": "cloudsql" 311 | } 312 | }, 313 | "required": ["type"] 314 | }, 315 | "then": { 316 | "$ref": "#/$defs/ServerSource/GoogleCloudSqlServer" 317 | } 318 | }, 319 | { 320 | "if": { 321 | "properties": { 322 | "type": { 323 | "const": "db2" 324 | } 325 | }, 326 | "required": ["type"] 327 | }, 328 | "then": { 329 | "$ref": "#/$defs/ServerSource/IBMDB2Server" 330 | } 331 | }, 332 | { 333 | "if": { 334 | "properties": { 335 | "type": { 336 | "const": "informix" 337 | } 338 | }, 339 | "required": ["type"] 340 | }, 341 | "then": { 342 | "$ref": "#/$defs/ServerSource/InformixServer" 343 | } 344 | }, 345 | 346 | { 347 | "if": { 348 | "properties": { 349 | "type": { 350 | "const": "custom" 351 | } 352 | }, 353 | "required": ["type"] 354 | }, 355 | "then": { 356 | "$ref": "#/$defs/ServerSource/CustomServer" 357 | } 358 | }, 359 | { 360 | "if": { 361 | "properties": { 362 | "type": { 363 | "const": "kafka" 364 | } 365 | }, 366 | "required": ["type"] 367 | }, 368 | "then": { 369 | "$ref": "#/$defs/ServerSource/KafkaServer" 370 | } 371 | }, 372 | { 373 | "if": { 374 | "properties": { 375 | "type": { 376 | "const": "kinesis" 377 | } 378 | }, 379 | "required": ["type"] 380 | }, 381 | "then": { 382 | "$ref": "#/$defs/ServerSource/KinesisServer" 383 | } 384 | }, 385 | { 386 | "if": { 387 | "properties": { 388 | "type": { 389 | "const": "local" 390 | } 391 | }, 392 | "required": ["type"] 393 | }, 394 | "then": { 395 | "$ref": "#/$defs/ServerSource/LocalServer" 396 | } 397 | }, 398 | { 399 | "if": { 400 | "properties": { 401 | "type": { 402 | "const": "mysql" 403 | } 404 | }, 405 | "required": ["type"] 406 | }, 407 | "then": { 408 | "$ref": "#/$defs/ServerSource/MySqlServer" 409 | } 410 | }, 411 | { 412 | "if": { 413 | "properties": { 414 | "type": { 415 | "const": "oracle" 416 | } 417 | }, 418 | "required": ["type"] 419 | }, 420 | "then": { 421 | "$ref": "#/$defs/ServerSource/OracleServer" 422 | } 423 | }, 424 | { 425 | "if": { 426 | "properties": { 427 | "type": { 428 | "const": "postgresql" 429 | } 430 | }, 431 | "required": ["type"] 432 | }, 433 | "then": { 434 | "$ref": "#/$defs/ServerSource/PostgresServer" 435 | } 436 | }, 437 | { 438 | "if": { 439 | "properties": { 440 | "type": { 441 | "const": "postgres" 442 | } 443 | }, 444 | "required": ["type"] 445 | }, 446 | "then": { 447 | "$ref": "#/$defs/ServerSource/PostgresServer" 448 | } 449 | }, 450 | { 451 | "if": { 452 | "properties": { 453 | "type": { 454 | "const": "presto" 455 | } 456 | }, 457 | "required": ["type"] 458 | }, 459 | "then": { 460 | "$ref": "#/$defs/ServerSource/PrestoServer" 461 | } 462 | }, 463 | { 464 | "if": { 465 | "properties": { 466 | "type": { 467 | "const": "pubsub" 468 | } 469 | }, 470 | "required": ["type"] 471 | }, 472 | "then": { 473 | "$ref": "#/$defs/ServerSource/PubSubServer" 474 | } 475 | }, 476 | { 477 | "if": { 478 | "properties": { 479 | "type": { 480 | "const": "redshift" 481 | } 482 | }, 483 | "required": ["type"] 484 | }, 485 | "then": { 486 | "$ref": "#/$defs/ServerSource/RedshiftServer" 487 | } 488 | }, 489 | { 490 | "if": { 491 | "properties": { 492 | "type": { 493 | "const": "s3" 494 | } 495 | }, 496 | "required": ["type"] 497 | }, 498 | "then": { 499 | "$ref": "#/$defs/ServerSource/S3Server" 500 | } 501 | }, 502 | { 503 | "if": { 504 | "properties": { 505 | "type": { 506 | "const": "sftp" 507 | } 508 | }, 509 | "required": ["type"] 510 | }, 511 | "then": { 512 | "$ref": "#/$defs/ServerSource/SftpServer" 513 | } 514 | }, 515 | { 516 | "if": { 517 | "properties": { 518 | "type": { 519 | "const": "snowflake" 520 | } 521 | }, 522 | "required": ["type"] 523 | }, 524 | "then": { 525 | "$ref": "#/$defs/ServerSource/SnowflakeServer" 526 | } 527 | }, 528 | { 529 | "if": { 530 | "properties": { 531 | "type": { 532 | "const": "sqlserver" 533 | } 534 | }, 535 | "required": ["type"] 536 | }, 537 | "then": { 538 | "$ref": "#/$defs/ServerSource/SqlserverServer" 539 | } 540 | }, 541 | { 542 | "if": { 543 | "properties": { 544 | "type": { 545 | "const": "synapse" 546 | } 547 | }, 548 | "required": ["type"] 549 | }, 550 | "then": { 551 | "$ref": "#/$defs/ServerSource/SynapseServer" 552 | } 553 | }, 554 | { 555 | "if": { 556 | "properties": { 557 | "type": { 558 | "const": "trino" 559 | } 560 | }, 561 | "required": ["type"] 562 | }, 563 | "then": { 564 | "$ref": "#/$defs/ServerSource/TrinoServer" 565 | } 566 | }, 567 | { 568 | "if": { 569 | "properties": { 570 | "type": { 571 | "const": "vertica" 572 | } 573 | }, 574 | "required": ["type"] 575 | }, 576 | "then": { 577 | "$ref": "#/$defs/ServerSource/VerticaServer" 578 | } 579 | } 580 | ], 581 | "required": ["server", "type"] 582 | }, 583 | "ServerSource": { 584 | "ApiServer": { 585 | "type": "object", 586 | "title": "AthenaServer", 587 | "properties": { 588 | "location": { 589 | "type": "string", 590 | "format": "uri", 591 | "description": "The url to the API.", 592 | "examples": [ 593 | "https://api.example.com/v1" 594 | ] 595 | } 596 | }, 597 | "required": [ 598 | "location" 599 | ] 600 | }, 601 | "AthenaServer": { 602 | "type": "object", 603 | "title": "AthenaServer", 604 | "properties": { 605 | "stagingDir": { 606 | "type": "string", 607 | "format": "uri", 608 | "description": "Amazon Athena automatically stores query results and metadata information for each query that runs in a query result location that you can specify in Amazon S3.", 609 | "examples": [ 610 | "s3://my_storage_account_name/my_container/path" 611 | ] 612 | }, 613 | "schema": { 614 | "type": "string", 615 | "description": "Identify the schema in the data source in which your tables exist." 616 | }, 617 | "catalog": { 618 | "type": "string", 619 | "description": "Identify the name of the Data Source, also referred to as a Catalog.", 620 | "default": "awsdatacatalog" 621 | }, 622 | "regionName": { 623 | "type": "string", 624 | "description": "The region your AWS account uses.", 625 | "examples": ["eu-west-1"] 626 | } 627 | }, 628 | "required": [ 629 | "stagingDir", 630 | "schema" 631 | ] 632 | }, 633 | "AzureServer": { 634 | "type": "object", 635 | "title": "AzureServer", 636 | "properties": { 637 | "location": { 638 | "type": "string", 639 | "format": "uri", 640 | "description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.", 641 | "examples": [ 642 | "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet", 643 | "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet" 644 | ] 645 | }, 646 | "format": { 647 | "type": "string", 648 | "enum": [ 649 | "parquet", 650 | "delta", 651 | "json", 652 | "csv" 653 | ], 654 | "description": "File format." 655 | }, 656 | "delimiter": { 657 | "type": "string", 658 | "enum": [ 659 | "new_line", 660 | "array" 661 | ], 662 | "description": "Only for format = json. How multiple json documents are delimited within one file" 663 | } 664 | }, 665 | "required": [ 666 | "location", 667 | "format" 668 | ] 669 | }, 670 | "BigQueryServer": { 671 | "type": "object", 672 | "title": "BigQueryServer", 673 | "properties": { 674 | "project": { 675 | "type": "string", 676 | "description": "The GCP project name." 677 | }, 678 | "dataset": { 679 | "type": "string", 680 | "description": "The GCP dataset name." 681 | } 682 | }, 683 | "required": [ 684 | "project", 685 | "dataset" 686 | ] 687 | }, 688 | "ClickHouseServer": { 689 | "type": "object", 690 | "title": "ClickHouseServer", 691 | "properties": { 692 | "host": { 693 | "type": "string", 694 | "description": "The host of the ClickHouse server." 695 | }, 696 | "port": { 697 | "type": "integer", 698 | "description": "The port to the ClickHouse server." 699 | }, 700 | "database": { 701 | "type": "string", 702 | "description": "The name of the database." 703 | } 704 | }, 705 | "required": [ 706 | "host", 707 | "port", 708 | "database" 709 | ] 710 | }, 711 | "DatabricksServer": { 712 | "type": "object", 713 | "title": "DatabricksServer", 714 | "properties": { 715 | "host": { 716 | "type": "string", 717 | "description": "The Databricks host", 718 | "examples": [ 719 | "dbc-abcdefgh-1234.cloud.databricks.com" 720 | ] 721 | }, 722 | "catalog": { 723 | "type": "string", 724 | "description": "The name of the Hive or Unity catalog" 725 | }, 726 | "schema": { 727 | "type": "string", 728 | "description": "The schema name in the catalog" 729 | } 730 | }, 731 | "required": [ 732 | "catalog", 733 | "schema" 734 | ] 735 | }, 736 | "DenodoServer": { 737 | "type": "object", 738 | "title": "DenodoServer", 739 | "properties": { 740 | "host": { 741 | "type": "string", 742 | "description": "The host of the Denodo server." 743 | }, 744 | "port": { 745 | "type": "integer", 746 | "description": "The port of the Denodo server." 747 | }, 748 | "database": { 749 | "type": "string", 750 | "description": "The name of the database." 751 | } 752 | }, 753 | "required": [ 754 | "host", 755 | "port" 756 | ] 757 | }, 758 | "DremioServer": { 759 | "type": "object", 760 | "title": "DremioServer", 761 | "properties": { 762 | "host": { 763 | "type": "string", 764 | "description": "The host of the Dremio server." 765 | }, 766 | "port": { 767 | "type": "integer", 768 | "description": "The port of the Dremio server." 769 | }, 770 | "schema": { 771 | "type": "string", 772 | "description": "The name of the schema." 773 | } 774 | }, 775 | "required": [ 776 | "host", 777 | "port" 778 | ] 779 | }, 780 | "DuckdbServer": { 781 | "type": "object", 782 | "title": "DuckdbServer", 783 | "properties": { 784 | "database": { 785 | "type": "string", 786 | "description": "Path to duckdb database file." 787 | }, 788 | "schema": { 789 | "type": "integer", 790 | "description": "The name of the schema." 791 | } 792 | }, 793 | "required": [ 794 | "database" 795 | ] 796 | }, 797 | "GlueServer": { 798 | "type": "object", 799 | "title": "GlueServer", 800 | "properties": { 801 | "account": { 802 | "type": "string", 803 | "description": "The AWS Glue account", 804 | "examples": [ 805 | "1234-5678-9012" 806 | ] 807 | }, 808 | "database": { 809 | "type": "string", 810 | "description": "The AWS Glue database name", 811 | "examples": [ 812 | "my_database" 813 | ] 814 | }, 815 | "location": { 816 | "type": "string", 817 | "format": "uri", 818 | "description": "The AWS S3 path. Must be in the form of a URL.", 819 | "examples": [ 820 | "s3://datacontract-example-orders-latest/data/{model}" 821 | ] 822 | }, 823 | "format": { 824 | "type": "string", 825 | "description": "The format of the files", 826 | "examples": [ 827 | "parquet", 828 | "csv", 829 | "json", 830 | "delta" 831 | ] 832 | } 833 | }, 834 | "required": [ 835 | "account", 836 | "database" 837 | ] 838 | }, 839 | "GoogleCloudSqlServer": { 840 | "type": "object", 841 | "title": "GoogleCloudSqlServer", 842 | "properties": { 843 | "host": { 844 | "type": "string", 845 | "description": "The host of the Google Cloud Sql server." 846 | }, 847 | "port": { 848 | "type": "integer", 849 | "description": "The port of the Google Cloud Sql server." 850 | }, 851 | "database": { 852 | "type": "string", 853 | "description": "The name of the database." 854 | }, 855 | "schema": { 856 | "type": "string", 857 | "description": "The name of the schema." 858 | } 859 | }, 860 | "required": [ 861 | "host", 862 | "port", 863 | "database", 864 | "schema" 865 | ] 866 | }, 867 | "IBMDB2Server": { 868 | "type": "object", 869 | "title": "IBMDB2Server", 870 | "properties": { 871 | "host": { 872 | "type": "string", 873 | "description": "The host of the IBM DB2 server." 874 | }, 875 | "port": { 876 | "type": "integer", 877 | "description": "The port of the IBM DB2 server." 878 | }, 879 | "database": { 880 | "type": "string", 881 | "description": "The name of the database." 882 | }, 883 | "schema": { 884 | "type": "string", 885 | "description": "The name of the schema." 886 | } 887 | }, 888 | "required": [ 889 | "host", 890 | "port", 891 | "database" 892 | ] 893 | }, 894 | "InformixServer": { 895 | "type": "object", 896 | "title": "InformixServer", 897 | "properties": { 898 | "host": { 899 | "type": "string", 900 | "description": "The host to the Informix server. " 901 | }, 902 | "port": { 903 | "type": "integer", 904 | "description": "The port to the Informix server. Defaults to 9088." 905 | }, 906 | "database": { 907 | "type": "string", 908 | "description": "The name of the database." 909 | } 910 | }, 911 | "required": [ 912 | "host", 913 | "database" 914 | ] 915 | }, 916 | "CustomServer": { 917 | "type": "object", 918 | "title": "CustomServer", 919 | "properties": { 920 | "account": { 921 | "type": "string", 922 | "description": "Account used by the server." 923 | }, 924 | "catalog": { 925 | "type": "string", 926 | "description": "Name of the catalog." 927 | }, 928 | "database": { 929 | "type": "string", 930 | "description": "Name of the database." 931 | }, 932 | "dataset": { 933 | "type": "string", 934 | "description": "Name of the dataset." 935 | }, 936 | "delimiter": { 937 | "type": "string", 938 | "description": "Delimiter." 939 | }, 940 | "endpointUrl": { 941 | "type": "string", 942 | "description": "Server endpoint.", 943 | "format": "uri" 944 | }, 945 | "format": { 946 | "type": "string", 947 | "description": "File format." 948 | }, 949 | "host": { 950 | "type": "string", 951 | "description": "Host name or IP address." 952 | }, 953 | "location": { 954 | "type": "string", 955 | "description": "A URL to a location.", 956 | "format": "uri" 957 | }, 958 | "path": { 959 | "type": "string", 960 | "description": "Relative or absolute path to the data file(s)." 961 | }, 962 | "port": { 963 | "type": "integer", 964 | "description": "Port to the server. No default value is assumed for custom servers." 965 | }, 966 | "project": { 967 | "type": "string", 968 | "description": "Project name." 969 | }, 970 | "region": { 971 | "type": "string", 972 | "description": "Cloud region." 973 | }, 974 | "regionName": { 975 | "type": "string", 976 | "description": "Region name." 977 | }, 978 | "schema": { 979 | "type": "string", 980 | "description": "Name of the schema." 981 | }, 982 | "serviceName": { 983 | "type": "string", 984 | "description": "Name of the service." 985 | }, 986 | "stagingDir": { 987 | "type": "string", 988 | "description": "Staging directory." 989 | }, 990 | "warehouse": { 991 | "type": "string", 992 | "description": "Name of the cluster or warehouse." 993 | } 994 | } 995 | }, 996 | "KafkaServer": { 997 | "type": "object", 998 | "title": "KafkaServer", 999 | "description": "Kafka Server", 1000 | "properties": { 1001 | "host": { 1002 | "type": "string", 1003 | "description": "The bootstrap server of the kafka cluster." 1004 | }, 1005 | "format": { 1006 | "type": "string", 1007 | "description": "The format of the messages.", 1008 | "examples": ["json", "avro", "protobuf", "xml"], 1009 | "default": "json" 1010 | } 1011 | }, 1012 | "required": [ 1013 | "host" 1014 | ] 1015 | }, 1016 | "KinesisServer": { 1017 | "type": "object", 1018 | "title": "KinesisDataStreamsServer", 1019 | "description": "Kinesis Data Streams Server", 1020 | "properties": { 1021 | "region": { 1022 | "type": "string", 1023 | "description": "AWS region.", 1024 | "examples": [ 1025 | "eu-west-1" 1026 | ] 1027 | }, 1028 | "format": { 1029 | "type": "string", 1030 | "description": "The format of the record", 1031 | "examples": [ 1032 | "json", 1033 | "avro", 1034 | "protobuf" 1035 | ] 1036 | } 1037 | } 1038 | }, 1039 | "LocalServer": { 1040 | "type": "object", 1041 | "title": "LocalServer", 1042 | "properties": { 1043 | "path": { 1044 | "type": "string", 1045 | "description": "The relative or absolute path to the data file(s).", 1046 | "examples": [ 1047 | "./folder/data.parquet", 1048 | "./folder/*.parquet" 1049 | ] 1050 | }, 1051 | "format": { 1052 | "type": "string", 1053 | "description": "The format of the file(s)", 1054 | "examples": [ 1055 | "json", 1056 | "parquet", 1057 | "delta", 1058 | "csv" 1059 | ] 1060 | } 1061 | }, 1062 | "required": [ 1063 | "path", 1064 | "format" 1065 | ] 1066 | }, 1067 | "MySqlServer": { 1068 | "type": "object", 1069 | "title": "MySqlServer", 1070 | "properties": { 1071 | "host": { 1072 | "type": "string", 1073 | "description": "The host of the MySql server." 1074 | }, 1075 | "port": { 1076 | "type": "integer", 1077 | "description": "The port of the MySql server." 1078 | }, 1079 | "database": { 1080 | "type": "string", 1081 | "description": "The name of the database." 1082 | } 1083 | }, 1084 | "required": [ 1085 | "host", 1086 | "port", 1087 | "database" 1088 | ] 1089 | }, 1090 | "OracleServer": { 1091 | "type": "object", 1092 | "title": "OracleServer", 1093 | "properties": { 1094 | "host": { 1095 | "type": "string", 1096 | "description": "The host to the oracle server", 1097 | "examples": [ 1098 | "localhost" 1099 | ] 1100 | }, 1101 | "port": { 1102 | "type": "integer", 1103 | "description": "The port to the oracle server.", 1104 | "examples": [ 1105 | 1523 1106 | ] 1107 | }, 1108 | "serviceName": { 1109 | "type": "string", 1110 | "description": "The name of the service.", 1111 | "examples": [ 1112 | "service" 1113 | ] 1114 | } 1115 | }, 1116 | "required": [ 1117 | "host", 1118 | "port", 1119 | "serviceName" 1120 | ] 1121 | }, 1122 | "PostgresServer": { 1123 | "type": "object", 1124 | "title": "PostgresServer", 1125 | "properties": { 1126 | "host": { 1127 | "type": "string", 1128 | "description": "The host to the Postgres server" 1129 | }, 1130 | "port": { 1131 | "type": "integer", 1132 | "description": "The port to the Postgres server." 1133 | }, 1134 | "database": { 1135 | "type": "string", 1136 | "description": "The name of the database." 1137 | }, 1138 | "schema": { 1139 | "type": "string", 1140 | "description": "The name of the schema in the database." 1141 | } 1142 | }, 1143 | "required": [ 1144 | "host", 1145 | "port", 1146 | "database", 1147 | "schema" 1148 | ] 1149 | }, 1150 | "PrestoServer": { 1151 | "type": "object", 1152 | "title": "PrestoServer", 1153 | "properties": { 1154 | "host": { 1155 | "type": "string", 1156 | "description": "The host to the Presto server", 1157 | "examples": [ 1158 | "localhost:8080" 1159 | ] 1160 | }, 1161 | "catalog": { 1162 | "type": "string", 1163 | "description": "The name of the catalog.", 1164 | "examples": [ 1165 | "postgres" 1166 | ] 1167 | }, 1168 | "schema": { 1169 | "type": "string", 1170 | "description": "The name of the schema.", 1171 | "examples": [ 1172 | "public" 1173 | ] 1174 | } 1175 | }, 1176 | "required": [ 1177 | "host" 1178 | ] 1179 | }, 1180 | "PubSubServer": { 1181 | "type": "object", 1182 | "title": "PubSubServer", 1183 | "properties": { 1184 | "project": { 1185 | "type": "string", 1186 | "description": "The GCP project name." 1187 | } 1188 | }, 1189 | "required": [ 1190 | "project" 1191 | ] 1192 | }, 1193 | "RedshiftServer": { 1194 | "type": "object", 1195 | "title": "RedshiftServer", 1196 | "properties": { 1197 | "host": { 1198 | "type": "string", 1199 | "description": "An optional string describing the server." 1200 | }, 1201 | "database": { 1202 | "type": "string", 1203 | "description": "The name of the database." 1204 | }, 1205 | "schema": { 1206 | "type": "string", 1207 | "description": "The name of the schema." 1208 | }, 1209 | "region": { 1210 | "type": "string", 1211 | "description": "AWS region of Redshift server.", 1212 | "examples": ["us-east-1"] 1213 | }, 1214 | "account": { 1215 | "type": "string", 1216 | "description": "The account used by the server." 1217 | } 1218 | }, 1219 | "required": [ 1220 | "database", 1221 | "schema" 1222 | ] 1223 | }, 1224 | "S3Server": { 1225 | "type": "object", 1226 | "title": "S3Server", 1227 | "properties": { 1228 | "location": { 1229 | "type": "string", 1230 | "format": "uri", 1231 | "description": "S3 URL, starting with `s3://`", 1232 | "examples": [ 1233 | "s3://datacontract-example-orders-latest/data/{model}/*.json" 1234 | ] 1235 | }, 1236 | "endpointUrl": { 1237 | "type": "string", 1238 | "format": "uri", 1239 | "description": "The server endpoint for S3-compatible servers.", 1240 | "examples": ["https://minio.example.com"] 1241 | }, 1242 | "format": { 1243 | "type": "string", 1244 | "enum": [ 1245 | "parquet", 1246 | "delta", 1247 | "json", 1248 | "csv" 1249 | ], 1250 | "description": "File format." 1251 | }, 1252 | "delimiter": { 1253 | "type": "string", 1254 | "enum": [ 1255 | "new_line", 1256 | "array" 1257 | ], 1258 | "description": "Only for format = json. How multiple json documents are delimited within one file" 1259 | } 1260 | }, 1261 | "required": [ 1262 | "location" 1263 | ] 1264 | }, 1265 | "SftpServer": { 1266 | "type": "object", 1267 | "title": "SftpServer", 1268 | "properties": { 1269 | "location": { 1270 | "type": "string", 1271 | "format": "uri", 1272 | "pattern": "^sftp://.*", 1273 | "description": "SFTP URL, starting with `sftp://`", 1274 | "examples": [ 1275 | "sftp://123.123.12.123/{model}/*.json" 1276 | ] 1277 | }, 1278 | "format": { 1279 | "type": "string", 1280 | "enum": [ 1281 | "parquet", 1282 | "delta", 1283 | "json", 1284 | "csv" 1285 | ], 1286 | "description": "File format." 1287 | }, 1288 | "delimiter": { 1289 | "type": "string", 1290 | "enum": [ 1291 | "new_line", 1292 | "array" 1293 | ], 1294 | "description": "Only for format = json. How multiple json documents are delimited within one file" 1295 | } 1296 | }, 1297 | "required": [ 1298 | "location" 1299 | ] 1300 | }, 1301 | "SnowflakeServer": { 1302 | "type": "object", 1303 | "title": "SnowflakeServer", 1304 | "properties": { 1305 | "host": { 1306 | "type": "string", 1307 | "description": "The host to the Snowflake server" 1308 | }, 1309 | "port": { 1310 | "type": "integer", 1311 | "description": "The port to the Snowflake server." 1312 | }, 1313 | "account": { 1314 | "type": "string", 1315 | "description": "The Snowflake account used by the server." 1316 | }, 1317 | "database": { 1318 | "type": "string", 1319 | "description": "The name of the database." 1320 | }, 1321 | "schema": { 1322 | "type": "string", 1323 | "description": "The name of the schema." 1324 | }, 1325 | "warehouse": { 1326 | "type": "string", 1327 | "description": "The name of the cluster of resources that is a Snowflake virtual warehouse." 1328 | } 1329 | }, 1330 | "required": [ 1331 | "account", 1332 | "database", 1333 | "schema" 1334 | ] 1335 | }, 1336 | "SqlserverServer": { 1337 | "type": "object", 1338 | "title": "SqlserverServer", 1339 | "properties": { 1340 | "host": { 1341 | "type": "string", 1342 | "description": "The host to the database server", 1343 | "examples": [ 1344 | "localhost" 1345 | ] 1346 | }, 1347 | "port": { 1348 | "type": "integer", 1349 | "description": "The port to the database server.", 1350 | "default": 1433, 1351 | "examples": [ 1352 | 1433 1353 | ] 1354 | }, 1355 | "database": { 1356 | "type": "string", 1357 | "description": "The name of the database.", 1358 | "examples": [ 1359 | "database" 1360 | ] 1361 | }, 1362 | "schema": { 1363 | "type": "string", 1364 | "description": "The name of the schema in the database.", 1365 | "examples": [ 1366 | "dbo" 1367 | ] 1368 | } 1369 | }, 1370 | "required": [ 1371 | "host", 1372 | "database", 1373 | "schema" 1374 | ] 1375 | }, 1376 | "SynapseServer": { 1377 | "type": "object", 1378 | "title": "SynapseServer", 1379 | "properties": { 1380 | "host": { 1381 | "type": "string", 1382 | "description": "The host of the Synapse server." 1383 | }, 1384 | "port": { 1385 | "type": "integer", 1386 | "description": "The port of the Synapse server." 1387 | }, 1388 | "database": { 1389 | "type": "string", 1390 | "description": "The name of the database." 1391 | } 1392 | }, 1393 | "required": [ 1394 | "host", 1395 | "port", 1396 | "database" 1397 | ] 1398 | }, 1399 | "TrinoServer": { 1400 | "type": "object", 1401 | "title": "TrinoServer", 1402 | "properties": { 1403 | "host": { 1404 | "type": "string", 1405 | "description": "The Trino host URL.", 1406 | "examples": [ 1407 | "localhost" 1408 | ] 1409 | }, 1410 | "port": { 1411 | "type": "integer", 1412 | "description": "The Trino port." 1413 | }, 1414 | "catalog": { 1415 | "type": "string", 1416 | "description": "The name of the catalog.", 1417 | "examples": [ 1418 | "hive" 1419 | ] 1420 | }, 1421 | "schema": { 1422 | "type": "string", 1423 | "description": "The name of the schema in the database.", 1424 | "examples": [ 1425 | "my_schema" 1426 | ] 1427 | } 1428 | }, 1429 | "required": [ 1430 | "host", 1431 | "port", 1432 | "catalog", 1433 | "schema" 1434 | ] 1435 | }, 1436 | "VerticaServer": { 1437 | "type": "object", 1438 | "title": "VerticaServer", 1439 | "properties": { 1440 | "host": { 1441 | "type": "string", 1442 | "description": "The host of the Vertica server." 1443 | }, 1444 | "port": { 1445 | "type": "integer", 1446 | "description": "The port of the Vertica server." 1447 | }, 1448 | "database": { 1449 | "type": "string", 1450 | "description": "The name of the database." 1451 | }, 1452 | "schema": { 1453 | "type": "string", 1454 | "description": "The name of the schema." 1455 | } 1456 | }, 1457 | "required": [ 1458 | "host", 1459 | "port", 1460 | "database", 1461 | "schema" 1462 | ] 1463 | } 1464 | }, 1465 | "SchemaElement": { 1466 | "type": "object", 1467 | "properties": { 1468 | "name": { 1469 | "type": "string", 1470 | "description": "Name of the element." 1471 | }, 1472 | "physicalType": { 1473 | "type": "string", 1474 | "description": "The physical element data type in the data source.", 1475 | "examples": ["table", "view", "topic", "file"] 1476 | }, 1477 | "description": { 1478 | "type": "string", 1479 | "description": "Description of the element." 1480 | }, 1481 | "businessName": { 1482 | "type": "string", 1483 | "description": "The business name of the element." 1484 | }, 1485 | "authoritativeDefinitions": { 1486 | "$ref": "#/$defs/AuthoritativeDefinitions" 1487 | }, 1488 | "tags": { 1489 | "$ref": "#/$defs/Tags" 1490 | }, 1491 | "customProperties": { 1492 | "$ref": "#/$defs/CustomProperties" 1493 | } 1494 | } 1495 | }, 1496 | "SchemaObject": { 1497 | "type": "object", 1498 | "properties": { 1499 | "logicalType": { 1500 | "type": "string", 1501 | "description": "The logical element data type.", 1502 | "enum": ["object"] 1503 | }, 1504 | "physicalName": { 1505 | "type": "string", 1506 | "description": "Physical name.", 1507 | "examples": ["table_1_2_0"] 1508 | }, 1509 | "dataGranularityDescription": { 1510 | "type": "string", 1511 | "description": "Granular level of the data in the object.", 1512 | "examples": ["Aggregation by country"] 1513 | }, 1514 | "properties": { 1515 | "type": "array", 1516 | "description": "A list of properties for the object.", 1517 | "items": { 1518 | "$ref": "#/$defs/SchemaProperty" 1519 | } 1520 | }, 1521 | "quality": { 1522 | "$ref": "#/$defs/DataQualityChecks" 1523 | } 1524 | }, 1525 | "allOf": [ 1526 | { 1527 | "$ref": "#/$defs/SchemaElement" 1528 | } 1529 | ], 1530 | "required": ["name"], 1531 | "unevaluatedProperties": false 1532 | }, 1533 | "SchemaBaseProperty": { 1534 | "type": "object", 1535 | "properties": { 1536 | "primaryKey": { 1537 | "type": "boolean", 1538 | "description": "Boolean value specifying whether the element is primary or not. Default is false." 1539 | }, 1540 | "primaryKeyPosition": { 1541 | "type": "integer", 1542 | "default": -1, 1543 | "description": "If element is a primary key, the position of the primary key element. Starts from 1. Example of `account_id, name` being primary key columns, `account_id` has primaryKeyPosition 1 and `name` primaryKeyPosition 2. Default to -1." 1544 | }, 1545 | "logicalType": { 1546 | "type": "string", 1547 | "description": "The logical element data type.", 1548 | "enum": ["string", "date", "number", "integer", "object", "array", "boolean"] 1549 | }, 1550 | "logicalTypeOptions": { 1551 | "type": "object", 1552 | "description": "Additional optional metadata to describe the logical type." 1553 | }, 1554 | "physicalType": { 1555 | "type": "string", 1556 | "description": "The physical element data type in the data source. For example, VARCHAR(2), DOUBLE, INT." 1557 | }, 1558 | "physicalName": { 1559 | "type": "string", 1560 | "description": "Physical name.", 1561 | "examples": ["col_str_a"] 1562 | }, 1563 | "required": { 1564 | "type": "boolean", 1565 | "default": false, 1566 | "description": "Indicates if the element may contain Null values; possible values are true and false. Default is false." 1567 | }, 1568 | "unique": { 1569 | "type": "boolean", 1570 | "default": false, 1571 | "description": "Indicates if the element contains unique values; possible values are true and false. Default is false." 1572 | }, 1573 | "partitioned": { 1574 | "type": "boolean", 1575 | "default": false, 1576 | "description": "Indicates if the element is partitioned; possible values are true and false." 1577 | }, 1578 | "partitionKeyPosition": { 1579 | "type": "integer", 1580 | "default": -1, 1581 | "description": "If element is used for partitioning, the position of the partition element. Starts from 1. Example of `country, year` being partition columns, `country` has partitionKeyPosition 1 and `year` partitionKeyPosition 2. Default to -1." 1582 | }, 1583 | "classification": { 1584 | "type": "string", 1585 | "description": "Can be anything, like confidential, restricted, and public to more advanced categorization. Some companies like PayPal, use data classification indicating the class of data in the element; expected values are 1, 2, 3, 4, or 5.", 1586 | "examples": ["confidential", "restricted", "public"] 1587 | }, 1588 | "encryptedName": { 1589 | "type": "string", 1590 | "description": "The element name within the dataset that contains the encrypted element value. For example, unencrypted element `email_address` might have an encryptedName of `email_address_encrypt`." 1591 | }, 1592 | "transformSourceObjects": { 1593 | "type": "array", 1594 | "description": "List of objects in the data source used in the transformation.", 1595 | "items": { 1596 | "type": "string" 1597 | } 1598 | }, 1599 | "transformLogic": { 1600 | "type": "string", 1601 | "description": "Logic used in the element transformation." 1602 | }, 1603 | "transformDescription": { 1604 | "type": "string", 1605 | "description": "Describes the transform logic in very simple terms." 1606 | }, 1607 | "examples": { 1608 | "type": "array", 1609 | "description": "List of sample element values.", 1610 | "items": { 1611 | "$ref": "#/$defs/AnyType" 1612 | } 1613 | }, 1614 | "criticalDataElement": { 1615 | "type": "boolean", 1616 | "default": false, 1617 | "description": "True or false indicator; If element is considered a critical data element (CDE) then true else false." 1618 | }, 1619 | "quality": { 1620 | "$ref": "#/$defs/DataQualityChecks" 1621 | } 1622 | }, 1623 | "allOf": [ 1624 | { 1625 | "$ref": "#/$defs/SchemaElement" 1626 | }, 1627 | { 1628 | "if": { 1629 | "properties": { 1630 | "logicalType": { 1631 | "const": "string" 1632 | } 1633 | } 1634 | }, 1635 | "then": { 1636 | "properties": { 1637 | "logicalTypeOptions": { 1638 | "type": "object", 1639 | "properties": { 1640 | "minLength": { 1641 | "type": "integer", 1642 | "minimum": 0, 1643 | "description": "Minimum length of the string." 1644 | }, 1645 | "maxLength": { 1646 | "type": "integer", 1647 | "minimum": 0, 1648 | "description": "Maximum length of the string." 1649 | }, 1650 | "pattern": { 1651 | "type": "string", 1652 | "description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)." 1653 | }, 1654 | "format": { 1655 | "type": "string", 1656 | "examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"], 1657 | "description": "Provides extra context about what format the string follows." 1658 | } 1659 | }, 1660 | "additionalProperties": false 1661 | } 1662 | } 1663 | } 1664 | }, 1665 | { 1666 | "if": { 1667 | "properties": { 1668 | "logicalType": { 1669 | "const": "date" 1670 | } 1671 | } 1672 | }, 1673 | "then": { 1674 | "properties": { 1675 | "logicalTypeOptions": { 1676 | "type": "object", 1677 | "properties": { 1678 | "format": { 1679 | "type": "string", 1680 | "examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"], 1681 | "description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'." 1682 | }, 1683 | "exclusiveMaximum": { 1684 | "type": "boolean", 1685 | "default": false, 1686 | "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)." 1687 | }, 1688 | "maximum": { 1689 | "type": "string", 1690 | "description": "All date values are less than or equal to this value (values <= maximum)." 1691 | }, 1692 | "exclusiveMinimum": { 1693 | "type": "boolean", 1694 | "default": false, 1695 | "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)." 1696 | }, 1697 | "minimum": { 1698 | "type": "string", 1699 | "description": "All date values are greater than or equal to this value (values >= minimum)." 1700 | } 1701 | }, 1702 | "additionalProperties": false 1703 | } 1704 | } 1705 | } 1706 | }, 1707 | { 1708 | "if": { 1709 | "anyOf": [ 1710 | { 1711 | "properties": { 1712 | "logicalType": { 1713 | "const": "integer" 1714 | } 1715 | } 1716 | } 1717 | ] 1718 | }, 1719 | "then": { 1720 | "properties": { 1721 | "logicalTypeOptions": { 1722 | "type": "object", 1723 | "properties": { 1724 | "multipleOf": { 1725 | "type": "number", 1726 | "exclusiveMinimum": 0, 1727 | "description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5." 1728 | }, 1729 | "maximum": { 1730 | "type": "number", 1731 | "description": "All values are less than or equal to this value (values <= maximum)." 1732 | }, 1733 | "exclusiveMaximum": { 1734 | "type": "boolean", 1735 | "default": false, 1736 | "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)." 1737 | }, 1738 | "minimum": { 1739 | "type": "number", 1740 | "description": "All values are greater than or equal to this value (values >= minimum)." 1741 | }, 1742 | "exclusiveMinimum": { 1743 | "type": "boolean", 1744 | "default": false, 1745 | "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)." 1746 | }, 1747 | "format": { 1748 | "type": "string", 1749 | "default": "i32", 1750 | "description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).", 1751 | "enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"] 1752 | } 1753 | }, 1754 | "additionalProperties": false 1755 | } 1756 | } 1757 | } 1758 | }, 1759 | { 1760 | "if": { 1761 | "anyOf": [ 1762 | { 1763 | "properties": { 1764 | "logicalType": { 1765 | "const": "number" 1766 | } 1767 | } 1768 | } 1769 | ] 1770 | }, 1771 | "then": { 1772 | "properties": { 1773 | "logicalTypeOptions": { 1774 | "type": "object", 1775 | "properties": { 1776 | "multipleOf": { 1777 | "type": "number", 1778 | "exclusiveMinimum": 0, 1779 | "description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5." 1780 | }, 1781 | "maximum": { 1782 | "type": "number", 1783 | "description": "All values are less than or equal to this value (values <= maximum)." 1784 | }, 1785 | "exclusiveMaximum": { 1786 | "type": "boolean", 1787 | "default": false, 1788 | "description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)." 1789 | }, 1790 | "minimum": { 1791 | "type": "number", 1792 | "description": "All values are greater than or equal to this value (values >= minimum)." 1793 | }, 1794 | "exclusiveMinimum": { 1795 | "type": "boolean", 1796 | "default": false, 1797 | "description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)." 1798 | }, 1799 | "format": { 1800 | "type": "string", 1801 | "default": "i32", 1802 | "description": "Format of the value in terms of how many bits of space it can use (follows the Rust float types).", 1803 | "enum": ["f32", "f64"] 1804 | } 1805 | }, 1806 | "additionalProperties": false 1807 | } 1808 | } 1809 | } 1810 | }, 1811 | { 1812 | "if": { 1813 | "properties": { 1814 | "logicalType": { 1815 | "const": "object" 1816 | } 1817 | } 1818 | }, 1819 | "then": { 1820 | "properties": { 1821 | "logicalTypeOptions": { 1822 | "type": "object", 1823 | "properties": { 1824 | "maxProperties": { 1825 | "type": "integer", 1826 | "minimum": 0, 1827 | "description": "Maximum number of properties." 1828 | }, 1829 | "minProperties": { 1830 | "type": "integer", 1831 | "minimum": 0, 1832 | "default": 0, 1833 | "description": "Minimum number of properties." 1834 | }, 1835 | "required": { 1836 | "type": "array", 1837 | "items": { 1838 | "type": "string" 1839 | }, 1840 | "minItems": 1, 1841 | "uniqueItems": true, 1842 | "description": "Property names that are required to exist in the object." 1843 | } 1844 | }, 1845 | "additionalProperties": false 1846 | }, 1847 | "properties": { 1848 | "type": "array", 1849 | "description": "A list of properties for the object.", 1850 | "items": { 1851 | "$ref": "#/$defs/SchemaProperty" 1852 | } 1853 | } 1854 | } 1855 | } 1856 | }, 1857 | { 1858 | "if": { 1859 | "properties": { 1860 | "logicalType": { 1861 | "const": "array" 1862 | } 1863 | } 1864 | }, 1865 | "then": { 1866 | "properties": { 1867 | "logicalTypeOptions": { 1868 | "type": "object", 1869 | "properties": { 1870 | "maxItems": { 1871 | "type": "integer", 1872 | "minimum": 0, 1873 | "description": "Maximum number of items." 1874 | }, 1875 | "minItems": { 1876 | "type": "integer", 1877 | "minimum": 0, 1878 | "default": 0, 1879 | "description": "Minimum number of items" 1880 | }, 1881 | "uniqueItems": { 1882 | "type": "boolean", 1883 | "default": false, 1884 | "description": "If set to true, all items in the array are unique." 1885 | } 1886 | }, 1887 | "additionalProperties": false 1888 | }, 1889 | "items": { 1890 | "$ref": "#/$defs/SchemaItemProperty", 1891 | "description": "List of items in an array (only applicable when `logicalType: array`)." 1892 | } 1893 | } 1894 | } 1895 | } 1896 | ] 1897 | }, 1898 | "SchemaProperty": { 1899 | "type": "object", 1900 | "$ref": "#/$defs/SchemaBaseProperty", 1901 | "required": ["name"], 1902 | "unevaluatedProperties": false 1903 | }, 1904 | "SchemaItemProperty": { 1905 | "type": "object", 1906 | "$ref": "#/$defs/SchemaBaseProperty", 1907 | "properties": { 1908 | "properties": { 1909 | "type": "array", 1910 | "description": "A list of properties for the object.", 1911 | "items": { 1912 | "$ref": "#/$defs/SchemaProperty" 1913 | } 1914 | } 1915 | }, 1916 | "unevaluatedProperties": false 1917 | }, 1918 | "Tags": { 1919 | "type": "array", 1920 | "description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.", 1921 | "examples": ["finance", "sensitive", "employee_record"], 1922 | "items": { 1923 | "type": "string" 1924 | } 1925 | }, 1926 | "DataQuality": { 1927 | "type": "object", 1928 | "properties": { 1929 | "authoritativeDefinitions": { 1930 | "$ref": "#/$defs/AuthoritativeDefinitions" 1931 | }, 1932 | "businessImpact": { 1933 | "type": "string", 1934 | "description": "Consequences of the rule failure.", 1935 | "examples": ["operational", "regulatory"] 1936 | }, 1937 | "customProperties": { 1938 | "type": "array", 1939 | "description": "Additional properties required for rule execution.", 1940 | "items": { 1941 | "$ref": "#/$defs/CustomProperty" 1942 | } 1943 | }, 1944 | "description": { 1945 | "type": "string", 1946 | "description": "Describe the quality check to be completed." 1947 | }, 1948 | "dimension": { 1949 | "type": "string", 1950 | "description": "The key performance indicator (KPI) or dimension for data quality.", 1951 | "enum": ["accuracy", "completeness", "conformity", "consistency", "coverage", "timeliness", "uniqueness"] 1952 | }, 1953 | "method": { 1954 | "type": "string", 1955 | "examples": ["reconciliation"] 1956 | }, 1957 | "name": { 1958 | "type": "string", 1959 | "description": "Name of the data quality check." 1960 | }, 1961 | "schedule": { 1962 | "type": "string", 1963 | "description": "Rule execution schedule details.", 1964 | "examples": ["0 20 * * *"] 1965 | }, 1966 | "scheduler": { 1967 | "type": "string", 1968 | "description": "The name or type of scheduler used to start the data quality check.", 1969 | "examples": ["cron"] 1970 | }, 1971 | "severity": { 1972 | "type": "string", 1973 | "description": "The severance of the quality rule.", 1974 | "examples": ["info", "warning", "error"] 1975 | }, 1976 | "tags": { 1977 | "$ref": "#/$defs/Tags" 1978 | }, 1979 | "type": { 1980 | "type": "string", 1981 | "description": "The type of quality check. 'text' is human-readable text that describes the quality of the data. 'library' is a set of maintained predefined quality attributes such as row count or unique. 'sql' is an individual SQL query that returns a value that can be compared. 'custom' is quality attributes that are vendor-specific, such as Soda or Great Expectations.", 1982 | "enum": ["text", "library", "sql", "custom"], 1983 | "default": "library" 1984 | }, 1985 | "unit": { 1986 | "type": "string", 1987 | "description": "Unit the rule is using, popular values are `rows` or `percent`, but any value is allowed.", 1988 | "examples": ["rows", "percent"] 1989 | } 1990 | }, 1991 | "allOf": [ 1992 | { 1993 | "if": { 1994 | "properties": { 1995 | "type": { 1996 | "const": "library" 1997 | } 1998 | } 1999 | }, 2000 | "then": { 2001 | "$ref": "#/$defs/DataQualityLibrary" 2002 | } 2003 | }, 2004 | { 2005 | "if": { 2006 | "properties": { 2007 | "type": { 2008 | "const": "sql" 2009 | } 2010 | }, 2011 | "required": ["type"] 2012 | }, 2013 | "then": { 2014 | "$ref": "#/$defs/DataQualitySql" 2015 | } 2016 | }, 2017 | { 2018 | "if": { 2019 | "properties": { 2020 | "type": { 2021 | "const": "custom" 2022 | } 2023 | }, 2024 | "required": ["type"] 2025 | }, 2026 | "then": { 2027 | "$ref": "#/$defs/DataQualityCustom" 2028 | } 2029 | } 2030 | ] 2031 | }, 2032 | "DataQualityChecks": { 2033 | "type": "array", 2034 | "description": "Data quality rules with all the relevant information for rule setup and execution.", 2035 | "items": { 2036 | "$ref": "#/$defs/DataQuality" 2037 | } 2038 | }, 2039 | "DataQualityLibrary": { 2040 | "type": "object", 2041 | "properties": { 2042 | "rule": { 2043 | "type": "string", 2044 | "description": "Define a data quality check based on the predefined rules as per ODCS.", 2045 | "examples": ["duplicateCount", "validValues", "rowCount"] 2046 | }, 2047 | "mustBe": { 2048 | "description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='." 2049 | }, 2050 | "mustNotBe": { 2051 | "description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='." 2052 | }, 2053 | "mustBeGreaterThan": { 2054 | "type": "number", 2055 | "description": "Must be greater than the value to be valid. It is equivalent to '>'." 2056 | }, 2057 | "mustBeGreaterOrEqualTo": { 2058 | "type": "number", 2059 | "description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='." 2060 | }, 2061 | "mustBeLessThan": { 2062 | "type": "number", 2063 | "description": "Must be less than the value to be valid. It is equivalent to '<'." 2064 | }, 2065 | "mustBeLessOrEqualTo": { 2066 | "type": "number", 2067 | "description": "Must be less than or equal to the value to be valid. It is equivalent to '<='." 2068 | }, 2069 | "mustBeBetween": { 2070 | "type": "array", 2071 | "description": "Must be between the two numbers to be valid. Smallest number first in the array.", 2072 | "minItems": 2, 2073 | "maxItems": 2, 2074 | "uniqueItems": true, 2075 | "items": { 2076 | "type": "number" 2077 | } 2078 | }, 2079 | "mustNotBeBetween": { 2080 | "type": "array", 2081 | "description": "Must not be between the two numbers to be valid. Smallest number first in the array.", 2082 | "minItems": 2, 2083 | "maxItems": 2, 2084 | "uniqueItems": true, 2085 | "items": { 2086 | "type": "number" 2087 | } 2088 | } 2089 | }, 2090 | "required": ["rule"] 2091 | }, 2092 | "DataQualitySql": { 2093 | "type": "object", 2094 | "properties": { 2095 | "query": { 2096 | "type": "string", 2097 | "description": "Query string that adheres to the dialect of the provided server.", 2098 | "examples": ["SELECT COUNT(*) FROM ${table} WHERE ${column} IS NOT NULL"] 2099 | } 2100 | }, 2101 | "required": ["query"] 2102 | }, 2103 | "DataQualityCustom": { 2104 | "type": "object", 2105 | "properties": { 2106 | "engine": { 2107 | "type": "string", 2108 | "description": "Name of the engine which executes the data quality checks.", 2109 | "examples": ["soda", "great-expectations", "monte-carlo", "dbt"] 2110 | }, 2111 | "implementation": { 2112 | "oneOf": [ 2113 | { 2114 | "type": "string" 2115 | }, 2116 | { 2117 | "type": "object" 2118 | } 2119 | ] 2120 | } 2121 | }, 2122 | "required": ["engine", "implementation"] 2123 | }, 2124 | "AuthoritativeDefinitions": { 2125 | "type": "array", 2126 | "description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. Authoritative definitions follow the same structure in the standard.", 2127 | "items": { 2128 | "type": "object", 2129 | "properties": { 2130 | "url": { 2131 | "type": "string", 2132 | "description": "URL to the authority." 2133 | }, 2134 | "type": { 2135 | "type": "string", 2136 | "description": "Type of definition for authority: v2.3 adds standard values: `businessDefinition`, `transformationImplementation`, `videoTutorial`, `tutorial`, and `implementation`.", 2137 | "examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"] 2138 | } 2139 | }, 2140 | "required": ["url", "type"] 2141 | } 2142 | }, 2143 | "Support": { 2144 | "type": "array", 2145 | "description": "Top level for support channels.", 2146 | "items": { 2147 | "$ref": "#/$defs/SupportItem" 2148 | } 2149 | }, 2150 | "SupportItem": { 2151 | "type": "object", 2152 | "properties": { 2153 | "channel": { 2154 | "type": "string", 2155 | "description": "Channel name or identifier." 2156 | }, 2157 | "url": { 2158 | "type": "string", 2159 | "description": "Access URL using normal [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax) (https, mailto, etc.)." 2160 | }, 2161 | "description": { 2162 | "type": "string", 2163 | "description": "Description of the channel, free text." 2164 | }, 2165 | "tool": { 2166 | "type": "string", 2167 | "description": "Name of the tool, value can be `email`, `slack`, `teams`, `discord`, `ticket`, or `other`.", 2168 | "examples": ["email", "slack", "teams", "discord", "ticket", "other"] 2169 | }, 2170 | "scope": { 2171 | "type": "string", 2172 | "description": "Scope can be: `interactive`, `announcements`, `issues`.", 2173 | "examples": ["interactive", "announcements", "issues"] 2174 | }, 2175 | "invitationUrl": { 2176 | "type": "string", 2177 | "description": "Some tools uses invitation URL for requesting or subscribing. Follows the [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax)." 2178 | } 2179 | }, 2180 | "required": ["channel", "url"] 2181 | }, 2182 | "Pricing": { 2183 | "type": "object", 2184 | "properties": { 2185 | "priceAmount": { 2186 | "type": "number", 2187 | "description": "Subscription price per unit of measure in `priceUnit`." 2188 | }, 2189 | "priceCurrency": { 2190 | "type": "string", 2191 | "description": "Currency of the subscription price in `price.priceAmount`." 2192 | }, 2193 | "priceUnit": { 2194 | "type": "string", 2195 | "description": "The unit of measure for calculating cost. Examples megabyte, gigabyte." 2196 | } 2197 | } 2198 | }, 2199 | "Team": { 2200 | "type": "object", 2201 | "properties": { 2202 | "username": { 2203 | "type": "string", 2204 | "description": "The user's username or email.", 2205 | "examples": [ 2206 | "mail@example.com", 2207 | "uid12345678" 2208 | ] 2209 | }, 2210 | "name": { 2211 | "type": "string", 2212 | "description": "The user's name.", 2213 | "examples": [ 2214 | "Jane Doe" 2215 | ] 2216 | }, 2217 | "description": { 2218 | "type": "string", 2219 | "description": "The user's description." 2220 | }, 2221 | "role": { 2222 | "type": "string", 2223 | "description": "The user's job role; Examples might be owner, data steward. There is no limit on the role." 2224 | }, 2225 | "dateIn": { 2226 | "type": "string", 2227 | "format": "date", 2228 | "description": "The date when the user joined the team." 2229 | }, 2230 | "dateOut": { 2231 | "type": "string", 2232 | "format": "date", 2233 | "description": "The date when the user ceased to be part of the team." 2234 | }, 2235 | "replacedByUsername": { 2236 | "type": "string", 2237 | "description": "The username of the user who replaced the previous user." 2238 | } 2239 | } 2240 | }, 2241 | "Role": { 2242 | "type": "object", 2243 | "properties": { 2244 | "role": { 2245 | "type": "string", 2246 | "description": "Name of the IAM role that provides access to the dataset." 2247 | }, 2248 | "description": { 2249 | "type": "string", 2250 | "description": "Description of the IAM role and its permissions." 2251 | }, 2252 | "access": { 2253 | "type": "string", 2254 | "description": "The type of access provided by the IAM role." 2255 | }, 2256 | "firstLevelApprovers": { 2257 | "type": "string", 2258 | "description": "The name(s) of the first-level approver(s) of the role." 2259 | }, 2260 | "secondLevelApprovers": { 2261 | "type": "string", 2262 | "description": "The name(s) of the second-level approver(s) of the role." 2263 | }, 2264 | "customProperties": { 2265 | "$ref": "#/$defs/CustomProperties" 2266 | } 2267 | }, 2268 | "required": ["role"] 2269 | }, 2270 | "ServiceLevelAgreementProperty": { 2271 | "type": "object", 2272 | "properties": { 2273 | "property": { 2274 | "type": "string", 2275 | "description": "Specific property in SLA, check the periodic table. May requires units (more details to come)." 2276 | }, 2277 | "value": { 2278 | "anyOf": [ 2279 | { 2280 | "type": "string" 2281 | }, 2282 | { 2283 | "type": "number" 2284 | }, 2285 | { 2286 | "type": "integer" 2287 | }, 2288 | { 2289 | "type": "boolean" 2290 | }, 2291 | { 2292 | "type": "null" 2293 | } 2294 | ], 2295 | "description": "Agreement value. The label will change based on the property itself." 2296 | }, 2297 | "valueExt": { 2298 | "$ref": "#/$defs/AnyNonCollectionType", 2299 | "description": "Extended agreement value. The label will change based on the property itself." 2300 | }, 2301 | "unit": { 2302 | "type": "string", 2303 | "description": "**d**, day, days for days; **y**, yr, years for years, etc. Units use the ISO standard." 2304 | }, 2305 | "element": { 2306 | "type": "string", 2307 | "description": "Element(s) to check on. Multiple elements should be extremely rare and, if so, separated by commas." 2308 | }, 2309 | "driver": { 2310 | "type": "string", 2311 | "description": "Describes the importance of the SLA from the list of: `regulatory`, `analytics`, or `operational`.", 2312 | "examples": ["regulatory", "analytics", "operational"] 2313 | } 2314 | }, 2315 | "required": ["property", "value"] 2316 | }, 2317 | "CustomProperties": { 2318 | "type": "array", 2319 | "description": "A list of key/value pairs for custom properties.", 2320 | "items": { 2321 | "$ref": "#/$defs/CustomProperty" 2322 | } 2323 | }, 2324 | "CustomProperty": { 2325 | "type": "object", 2326 | "properties": { 2327 | "property": { 2328 | "type": "string", 2329 | "description": "The name of the key. Names should be in camel case–the same as if they were permanent properties in the contract." 2330 | }, 2331 | "value": { 2332 | "$ref": "#/$defs/AnyType", 2333 | "description": "The value of the key." 2334 | } 2335 | } 2336 | }, 2337 | "AnyType": { 2338 | "anyOf": [ 2339 | { 2340 | "type": "string" 2341 | }, 2342 | { 2343 | "type": "number" 2344 | }, 2345 | { 2346 | "type": "integer" 2347 | }, 2348 | { 2349 | "type": "boolean" 2350 | }, 2351 | { 2352 | "type": "null" 2353 | }, 2354 | { 2355 | "type": "array" 2356 | }, 2357 | { 2358 | "type": "object" 2359 | } 2360 | ] 2361 | }, 2362 | "AnyNonCollectionType": { 2363 | "anyOf": [ 2364 | { 2365 | "type": "string" 2366 | }, 2367 | { 2368 | "type": "number" 2369 | }, 2370 | { 2371 | "type": "integer" 2372 | }, 2373 | { 2374 | "type": "boolean" 2375 | }, 2376 | { 2377 | "type": "null" 2378 | } 2379 | ] 2380 | } 2381 | } 2382 | } 2383 | --------------------------------------------------------------------------------