├── .editorconfig ├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── ci.yaml │ └── release.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .pre-commit-hooks.yaml ├── API.md ├── CHANGELOG.md ├── CNAME ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── _config.yml ├── _layouts └── default.html ├── datacontract ├── __init__.py ├── api.py ├── breaking │ ├── breaking.py │ ├── breaking_change.py │ └── breaking_rules.py ├── catalog │ └── catalog.py ├── cli.py ├── data_contract.py ├── engines │ ├── __init__.py │ ├── data_contract_checks.py │ ├── data_contract_test.py │ ├── datacontract │ │ ├── check_that_datacontract_contains_valid_servers_configuration.py │ │ └── check_that_datacontract_file_exists.py │ ├── fastjsonschema │ │ ├── check_jsonschema.py │ │ └── s3 │ │ │ └── s3_read_files.py │ └── soda │ │ ├── __init__.py │ │ ├── check_soda_execute.py │ │ └── connections │ │ ├── bigquery.py │ │ ├── databricks.py │ │ ├── duckdb_connection.py │ │ ├── kafka.py │ │ ├── postgres.py │ │ ├── snowflake.py │ │ ├── sqlserver.py │ │ └── trino.py ├── export │ ├── __init__.py │ ├── avro_converter.py │ ├── avro_idl_converter.py │ ├── bigquery_converter.py │ ├── custom_converter.py │ ├── data_caterer_converter.py │ ├── dbml_converter.py │ ├── dbt_converter.py │ ├── dcs_exporter.py │ ├── duckdb_type_converter.py │ ├── exporter.py │ ├── exporter_factory.py │ ├── go_converter.py │ ├── great_expectations_converter.py │ ├── html_exporter.py │ ├── iceberg_converter.py │ ├── jsonschema_converter.py │ ├── markdown_converter.py │ ├── mermaid_exporter.py │ ├── odcs_v3_exporter.py │ ├── pandas_type_converter.py │ ├── protobuf_converter.py │ ├── pydantic_converter.py │ ├── rdf_converter.py │ ├── sodacl_converter.py │ ├── spark_converter.py │ ├── sql_converter.py │ ├── sql_type_converter.py │ ├── sqlalchemy_converter.py │ └── terraform_converter.py ├── imports │ ├── avro_importer.py │ ├── bigquery_importer.py │ ├── csv_importer.py │ ├── dbml_importer.py │ ├── dbt_importer.py │ ├── excel_importer.py │ ├── glue_importer.py │ ├── iceberg_importer.py │ ├── importer.py │ ├── importer_factory.py │ ├── jsonschema_importer.py │ ├── odcs_importer.py │ ├── odcs_v3_importer.py │ ├── parquet_importer.py │ ├── protobuf_importer.py │ ├── spark_importer.py │ ├── sql_importer.py │ └── unity_importer.py ├── init │ └── init_template.py ├── integration │ └── datamesh_manager.py ├── lint │ ├── files.py │ ├── lint.py │ ├── linters │ │ ├── __init__.py │ │ ├── description_linter.py │ │ ├── field_pattern_linter.py │ │ ├── field_reference_linter.py │ │ ├── notice_period_linter.py │ │ └── valid_constraints_linter.py │ ├── resolve.py │ ├── resources.py │ ├── schema.py │ └── urls.py ├── model │ ├── data_contract_specification │ │ └── __init__.py │ ├── exceptions.py │ ├── odcs.py │ └── run.py ├── output │ ├── __init__.py │ ├── junit_test_results.py │ ├── output_format.py │ └── test_results_writer.py ├── py.typed ├── schemas │ ├── datacontract-1.1.0.init.yaml │ ├── datacontract-1.1.0.schema.json │ ├── download │ └── odcs-3.0.1.schema.json └── templates │ ├── datacontract.html │ ├── datacontract_odcs.html │ ├── index.html │ ├── partials │ ├── datacontract_information.html │ ├── datacontract_servicelevels.html │ ├── datacontract_terms.html │ ├── definition.html │ ├── example.html │ ├── model_field.html │ ├── quality.html │ └── server.html │ └── style │ ├── generate-style │ ├── input.css │ ├── output.css │ └── tailwind.config.js ├── datacontractcli.png ├── favicon.png ├── pyproject.toml ├── release ├── tests ├── __init__.py ├── conftest.py ├── fixtures │ ├── avro │ │ ├── data │ │ │ ├── arrays.avsc │ │ │ ├── logical_types.avsc │ │ │ ├── nested.avsc │ │ │ ├── nested_with_arrays.avsc │ │ │ └── orders.avsc │ │ └── export │ │ │ ├── datacontract.yaml │ │ │ ├── datacontract_decimal.avsc │ │ │ ├── datacontract_decimal.yaml │ │ │ ├── datacontract_enum.avsc │ │ │ ├── datacontract_enum.yaml │ │ │ ├── datacontract_logicalType.avsc │ │ │ ├── datacontract_logicalType.yaml │ │ │ ├── datacontract_test_field_float.avsc │ │ │ ├── datacontract_test_field_float.yaml │ │ │ ├── datacontract_test_field_map.avsc │ │ │ ├── datacontract_test_field_map.yaml │ │ │ ├── datacontract_test_field_namespace.avsc │ │ │ ├── datacontract_test_field_namespace.yaml │ │ │ ├── datacontract_test_logical_type.yaml │ │ │ ├── datacontract_test_required.avsc │ │ │ ├── datacontract_test_required.yaml │ │ │ └── orders_with_datefields.avsc │ ├── azure-delta-remote │ │ └── datacontract.yaml │ ├── azure-json-remote │ │ └── datacontract.yaml │ ├── azure-parquet-remote │ │ └── datacontract.yaml │ ├── bigquery │ │ ├── datacontract.yaml │ │ ├── datacontract_complex.yaml │ │ ├── export │ │ │ ├── bq.txt │ │ │ ├── bq_table_schema.json │ │ │ └── datacontract.yaml │ │ └── import │ │ │ ├── complete_table_schema.json │ │ │ ├── datacontract.yaml │ │ │ ├── datacontract_multi_import.yaml │ │ │ ├── multi_import_external_table.json │ │ │ ├── multi_import_materialized_view.json │ │ │ ├── multi_import_snapshot.json │ │ │ ├── multi_import_table.json │ │ │ └── multi_import_view.json │ ├── breaking │ │ ├── datacontract-definitions-v1.yaml │ │ ├── datacontract-definitions-v2.yaml │ │ ├── datacontract-definitions-v3.yaml │ │ ├── datacontract-fields-array-v1.yaml │ │ ├── datacontract-fields-array-v2.yaml │ │ ├── datacontract-fields-v1.yaml │ │ ├── datacontract-fields-v2.yaml │ │ ├── datacontract-fields-v3.yaml │ │ ├── datacontract-info-v1.yaml │ │ ├── datacontract-info-v2.yaml │ │ ├── datacontract-info-v3.yaml │ │ ├── datacontract-models-v1.yaml │ │ ├── datacontract-models-v2.yaml │ │ ├── datacontract-models-v3.yaml │ │ ├── datacontract-quality-v1.yaml │ │ ├── datacontract-quality-v2.yaml │ │ ├── datacontract-quality-v3.yaml │ │ ├── datacontract-terms-v1.yaml │ │ ├── datacontract-terms-v2.yaml │ │ └── datacontract-terms-v3.yaml │ ├── catalog │ │ ├── datacontract-1.yaml │ │ └── datacontract-2.yaml │ ├── csv │ │ └── data │ │ │ ├── datacontract.yaml │ │ │ ├── sample_data.csv │ │ │ └── sample_data_5_column.csv │ ├── custom │ │ └── export │ │ │ ├── datacontract.yaml │ │ │ ├── expected.sql │ │ │ └── template.sql │ ├── data-caterer │ │ └── export │ │ │ └── datacontract_nested.yaml │ ├── databricks-sql │ │ └── datacontract.yaml │ ├── databricks-unity │ │ └── import │ │ │ ├── datacontract.yaml │ │ │ ├── datacontract_complex_types.yaml │ │ │ ├── unity_table_schema.json │ │ │ └── unity_table_schema_complex_types.json │ ├── dataframe │ │ └── datacontract.yaml │ ├── dbml │ │ ├── datacontract.yaml │ │ └── import │ │ │ ├── datacontract.yaml │ │ │ ├── datacontract_schema_filtered.yaml │ │ │ ├── datacontract_table_filtered.yaml │ │ │ └── dbml.txt │ ├── dbt │ │ ├── export │ │ │ └── datacontract.yaml │ │ └── import │ │ │ ├── manifest_empty_columns.json │ │ │ ├── manifest_jaffle_bigquery.json │ │ │ └── manifest_jaffle_duckdb.json │ ├── excel │ │ ├── shipments-odcs.xlsx │ │ └── shipments-odcs.yaml │ ├── export │ │ ├── datacontract.html │ │ ├── datacontract.yaml │ │ ├── datacontract_nested.yaml │ │ ├── datacontract_no_model_type.yaml │ │ ├── datacontract_s3.yaml │ │ └── rdf │ │ │ ├── datacontract-complex.yaml │ │ │ └── datacontract.yaml │ ├── gcs-json-remote │ │ ├── data │ │ │ ├── README.md │ │ │ └── inventory │ │ │ │ └── year=2022 │ │ │ │ ├── month=04 │ │ │ │ └── day=20 │ │ │ │ │ └── hour=00 │ │ │ │ │ ├── inventory+0+0001327496.json │ │ │ │ │ ├── inventory+0+0001328496.json │ │ │ │ │ ├── inventory+0+0001329496.json │ │ │ │ │ └── inventory+0+0001330496.json │ │ │ │ └── month=05 │ │ │ │ └── day=04 │ │ │ │ └── hour=00 │ │ │ │ ├── inventory+0+0002657902.json │ │ │ │ ├── inventory+0+0002658902.json │ │ │ │ └── inventory+0+0002659902.json │ │ └── datacontract.yaml │ ├── glue │ │ ├── datacontract-empty-model.yaml │ │ └── datacontract.yaml │ ├── great-expectations │ │ ├── datacontract.yaml │ │ ├── datacontract_missing_quality_file.yaml │ │ ├── datacontract_quality_column.yaml │ │ ├── datacontract_quality_file.yaml │ │ ├── datacontract_quality_yaml.yaml │ │ └── quality.json │ ├── iceberg │ │ ├── invalid_schema.json │ │ ├── nested_schema.json │ │ └── simple_schema.json │ ├── import │ │ ├── football-datacontract.yml │ │ ├── football.json │ │ ├── football_deeply_nested_no_required.json │ │ ├── football_deeply_nested_no_required_datacontract.yml │ │ ├── orders.json │ │ ├── orders_union-types.json │ │ └── orders_union-types_datacontract.yml │ ├── junit │ │ ├── data │ │ │ └── somedata.csv │ │ └── datacontract.yaml │ ├── kafka-avro-remote │ │ └── datacontract.yaml │ ├── kafka-json-remote │ │ └── datacontract.yaml │ ├── kafka │ │ ├── data │ │ │ └── messages.json │ │ └── datacontract.yaml │ ├── lint │ │ ├── custom_datacontract.schema.json │ │ ├── custom_datacontract.yaml │ │ ├── datacontract_csv_lint_base.yaml │ │ ├── datacontract_quality_schema.yaml │ │ ├── datacontract_unknown_model.yaml │ │ ├── invalid_datacontract.yaml │ │ ├── valid_datacontract.yaml │ │ ├── valid_datacontract_ref.yaml │ │ └── valid_datacontract_references.yaml │ ├── local-delta │ │ ├── data │ │ │ ├── line_items │ │ │ │ ├── 0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet │ │ │ │ └── _delta_log │ │ │ │ │ └── 00000000000000000000.json │ │ │ └── orders │ │ │ │ ├── 0-5014bd96-6666-482e-bec9-d02a43a78cfb-0.parquet │ │ │ │ └── _delta_log │ │ │ │ └── 00000000000000000000.json │ │ ├── datacontract.yaml │ │ └── helper │ │ │ └── create_delta_files.py │ ├── local-json-complex │ │ ├── data │ │ │ └── sts_data.json │ │ └── datacontract.yaml │ ├── local-json │ │ ├── data │ │ │ ├── nested_types.json │ │ │ └── verbraucherpreisindex.json │ │ ├── datacontract.json │ │ └── datacontract.yaml │ ├── markdown │ │ └── export │ │ │ ├── datacontract.yaml │ │ │ └── expected.md │ ├── odcs_v3 │ │ ├── adventureworks.datacontract.yml │ │ ├── adventureworks.odcs.yaml │ │ ├── full-example.datacontract.yml │ │ └── full-example.odcs.yaml │ ├── parquet │ │ ├── data │ │ │ ├── array.parquet │ │ │ ├── bigint.parquet │ │ │ ├── blob.parquet │ │ │ ├── boolean.parquet │ │ │ ├── combined.parquet │ │ │ ├── combined_no_time.parquet │ │ │ ├── date.parquet │ │ │ ├── decimal.parquet │ │ │ ├── double.parquet │ │ │ ├── float.parquet │ │ │ ├── integer.parquet │ │ │ ├── list.parquet │ │ │ ├── map.parquet │ │ │ ├── string.parquet │ │ │ ├── struct.parquet │ │ │ ├── time.parquet │ │ │ ├── timestamp.parquet │ │ │ └── timestamp_ntz.parquet │ │ ├── datacontract.yaml │ │ ├── datacontract_array.yaml │ │ ├── datacontract_bigint.yaml │ │ ├── datacontract_binary.yaml │ │ ├── datacontract_boolean.yaml │ │ ├── datacontract_date.yaml │ │ ├── datacontract_decimal.yaml │ │ ├── datacontract_double.yaml │ │ ├── datacontract_float.yaml │ │ ├── datacontract_integer.yaml │ │ ├── datacontract_invalid.yaml │ │ ├── datacontract_map.yaml │ │ ├── datacontract_string.yaml │ │ ├── datacontract_struct.yaml │ │ ├── datacontract_timestamp.yaml │ │ ├── datacontract_timestamp_ntz.yaml │ │ └── helper │ │ │ └── create_parquet_files.py │ ├── postgres-export │ │ ├── data │ │ │ └── data.sql │ │ └── datacontract.yaml │ ├── postgres │ │ ├── data │ │ │ ├── data.sql │ │ │ ├── data_case_sensitive.sql │ │ │ └── data_constraints.sql │ │ ├── datacontract.yaml │ │ ├── datacontract_case_sensitive.yaml │ │ ├── datacontract_servicelevels.yaml │ │ └── odcs.yaml │ ├── protobuf │ │ ├── data │ │ │ └── sample_data.proto3.data │ │ └── datacontract.yaml │ ├── quality │ │ ├── data │ │ │ ├── data.invalid.sql │ │ │ └── data.valid.sql │ │ └── datacontract.yaml │ ├── s3-csv │ │ ├── data │ │ │ └── sample_data.csv │ │ └── datacontract.yaml │ ├── s3-delta │ │ ├── data │ │ │ └── orders.delta │ │ │ │ ├── 0-66aaa7ef-36e3-4985-9359-72874e273705-0.parquet │ │ │ │ └── _delta_log │ │ │ │ └── 00000000000000000000.json │ │ ├── datacontract.yaml │ │ └── helper │ │ │ └── create_delta_files.py │ ├── s3-json-complex │ │ ├── data │ │ │ └── feed.json │ │ └── datacontract.yaml │ ├── s3-json-multiple-models │ │ ├── data │ │ │ ├── line_items │ │ │ │ └── line_items-1.json │ │ │ └── orders │ │ │ │ └── orders-1.json │ │ ├── datacontract.yaml │ │ └── v2 │ │ │ ├── line_items │ │ │ └── line_items-1.json │ │ │ └── orders │ │ │ └── orders-1.json │ ├── s3-json-remote │ │ └── datacontract.yaml │ ├── s3-json │ │ ├── data │ │ │ └── inventory │ │ │ │ └── year=2022 │ │ │ │ ├── month=04 │ │ │ │ └── day=20 │ │ │ │ │ └── hour=00 │ │ │ │ │ ├── inventory+0+0001327496.json │ │ │ │ │ ├── inventory+0+0001328496.json │ │ │ │ │ ├── inventory+0+0001329496.json │ │ │ │ │ └── inventory+0+0001330496.json │ │ │ │ └── month=05 │ │ │ │ └── day=04 │ │ │ │ └── hour=00 │ │ │ │ ├── inventory+0+0002657902.json │ │ │ │ ├── inventory+0+0002658902.json │ │ │ │ └── inventory+0+0002659902.json │ │ └── datacontract.yaml │ ├── snowflake │ │ └── datacontract.yaml │ ├── sodacl │ │ ├── checks.yaml │ │ └── datacontract.yaml │ ├── spark │ │ ├── export │ │ │ └── datacontract.yaml │ │ └── import │ │ │ ├── users_datacontract_desc.yml │ │ │ └── users_datacontract_no_desc.yml │ ├── spec │ │ ├── datacontract_aliases.yaml │ │ └── datacontract_fields_field.yaml │ ├── sqlserver │ │ ├── data │ │ │ └── data.sql │ │ ├── datacontract.yaml │ │ └── import │ │ │ └── ddl.sql │ └── trino │ │ ├── data │ │ ├── data.sql │ │ └── table.sql │ │ └── datacontract.yaml ├── test_api.py ├── test_breaking.py ├── test_catalog.py ├── test_changelog.py ├── test_cli.py ├── test_data_contract_checks.py ├── test_data_contract_specification.py ├── test_description_linter.py ├── test_documentation_linter.py ├── test_download_datacontract_file.py ├── test_duckdb_json.py ├── test_export_avro.py ├── test_export_avro_idl.py ├── test_export_bigquery.py ├── test_export_complex_data_contract.py ├── test_export_custom.py ├── test_export_custom_exporter.py ├── test_export_data_caterer.py ├── test_export_dbml.py ├── test_export_dbt_models.py ├── test_export_dbt_sources.py ├── test_export_dbt_staging_sql.py ├── test_export_go.py ├── test_export_great_expectations.py ├── test_export_html.py ├── test_export_iceberg.py ├── test_export_jsonschema.py ├── test_export_markdown.py ├── test_export_mermaid.py ├── test_export_odcs_v3.py ├── test_export_protobuf.py ├── test_export_pydantic.py ├── test_export_rdf.py ├── test_export_sodacl.py ├── test_export_spark.py ├── test_export_sql.py ├── test_export_sql_query.py ├── test_export_sqlalchemy.py ├── test_export_terraform.py ├── test_field_constraint_linter.py ├── test_field_pattern_linter.py ├── test_field_reference_linter.py ├── test_import_avro.py ├── test_import_bigquery.py ├── test_import_csv.py ├── test_import_dbml.py ├── test_import_dbt.py ├── test_import_excel.py ├── test_import_glue.py ├── test_import_iceberg.py ├── test_import_jsonschema.py ├── test_import_odcs_v3.py ├── test_import_parquet.py ├── test_import_protobuf.py ├── test_import_spark.py ├── test_import_sql_postgres.py ├── test_import_sql_sqlserver.py ├── test_import_unity_file.py ├── test_integration_datameshmanager.py ├── test_lint.py ├── test_notice_period_linter.py ├── test_resolve.py ├── test_roundtrip_jsonschema.py ├── test_spec_fields_field.py ├── test_spec_ref.py ├── test_test_azure_remote.py ├── test_test_bigquery.py ├── test_test_databricks.py ├── test_test_dataframe.py ├── test_test_delta.py ├── test_test_gcs_json_remote.py ├── test_test_kafka.py ├── test_test_kafka_remote.py ├── test_test_local_json.py ├── test_test_output_junit.py ├── test_test_parquet.py ├── test_test_postgres.py ├── test_test_quality.py ├── test_test_s3_csv.py ├── test_test_s3_delta.py ├── test_test_s3_json.py ├── test_test_s3_json_complex.py ├── test_test_s3_json_multiple_models.py ├── test_test_s3_json_remote.py ├── test_test_snowflake.py ├── test_test_sqlserver.py └── test_test_trino.py └── update_help.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | insert_final_newline = false 9 | max_line_length = 100 10 | tab_width = 2 11 | 12 | [{*.py,*.pyw}] 13 | indent_size = 4 14 | max_line_length = 120 15 | tab_width = 4 16 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | - [ ] Tests pass 2 | - [ ] ruff format 3 | - [ ] README.md updated (if relevant) 4 | - [ ] CHANGELOG.md entry added 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # Ruff version. 4 | rev: v0.4.7 5 | hooks: 6 | # Run the linter. 7 | - id: ruff 8 | args: [ --fix ] 9 | # Run the formatter. 10 | - id: ruff-format -------------------------------------------------------------------------------- /.pre-commit-hooks.yaml: -------------------------------------------------------------------------------- 1 | - id: datacontract-lint 2 | name: Data Contract Linter 3 | description: This hook lint the data contract. 4 | entry: datacontract lint 5 | files: "datacontract*.yaml" 6 | language: python 7 | additional_dependencies: ['.[all]'] 8 | types: [yaml] 9 | 10 | - id: datacontract-test 11 | name: Data Contract Tester 12 | description: This hook test the data contract. 13 | entry: datacontract test 14 | files: "datacontract*.yaml" 15 | language: python 16 | additional_dependencies: ['.[all]'] 17 | types: [yaml] 18 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | cli.datacontract.com -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-bullseye 2 | 3 | # Setting PYTHONUNBUFFERED to a non-empty value different from 0 ensures that the python output i.e. 4 | # the stdout and stderr streams are sent straight to terminal (e.g. your container log) without 5 | # being first buffered and that you can see the output of your application in real time. 6 | ENV PYTHONUNBUFFERED=1 7 | 8 | # Compiling Python source files to bytecode is typically desirable for production images as it tends 9 | # to improve startup time (at the cost of increased installation time). 10 | ENV UV_COMPILE_BYTECODE=1 11 | 12 | # install uv 13 | COPY --from=ghcr.io/astral-sh/uv:0.6.9 /uv /uvx /bin/ 14 | 15 | # copy resources 16 | COPY pyproject.toml /app/. 17 | COPY MANIFEST.in /app/. 18 | COPY datacontract/ /app/datacontract/ 19 | 20 | # install requirements 21 | RUN cd /app && uv pip --no-cache-dir install --system ".[all]" 22 | 23 | RUN mkdir -p /home/datacontract 24 | WORKDIR /home/datacontract 25 | 26 | ENTRYPOINT ["datacontract"] 27 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include datacontract/templates/style/output.css 2 | recursive-include datacontract/templates/ **/*.html 3 | recursive-include datacontract/schemas/ **/*.json **/*.yaml -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | - jekyll-sitemap 3 | markdown: kramdown 4 | name: "Data Contract CLI" 5 | title: null 6 | -------------------------------------------------------------------------------- /datacontract/__init__.py: -------------------------------------------------------------------------------- 1 | # Configuration so that yaml.safe_dump dumps strings with line breaks with yaml literal | 2 | import yaml 3 | 4 | yaml.SafeDumper.org_represent_str = yaml.SafeDumper.represent_str 5 | 6 | 7 | def repr_str(dumper, data): 8 | if "\n" in data: 9 | return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") 10 | return dumper.org_represent_str(data) 11 | 12 | 13 | yaml.add_representer(str, repr_str, Dumper=yaml.SafeDumper) 14 | -------------------------------------------------------------------------------- /datacontract/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/engines/__init__.py -------------------------------------------------------------------------------- /datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py: -------------------------------------------------------------------------------- 1 | from datacontract.model.data_contract_specification import DataContractSpecification 2 | from datacontract.model.exceptions import DataContractException 3 | 4 | 5 | def check_that_datacontract_contains_valid_server_configuration( 6 | data_contract: DataContractSpecification, server_name: str | None 7 | ): 8 | if data_contract.servers is None or len(data_contract.servers) == 0: 9 | raise DataContractException( 10 | type="lint", 11 | name="Check that data contract contains valid server configuration", 12 | result="warning", 13 | reason="Servers block is missing. Skip executing tests.", 14 | engine="datacontract", 15 | ) 16 | if len(data_contract.servers) > 1 and server_name is None: 17 | raise DataContractException( 18 | type="lint", 19 | name="Check that data contract contains valid server configuration", 20 | result="warning", 21 | reason="Data contract contains multiple server configurations. Specify the server you want to test. Skip executing tests.", 22 | engine="datacontract", 23 | ) 24 | if server_name is not None and server_name not in data_contract.servers: 25 | raise DataContractException( 26 | type="lint", 27 | name="Check that data contract contains valid servers configuration", 28 | result="warning", 29 | reason=f"Cannot find server '{server_name}' in the data contract servers configuration. Skip executing tests.", 30 | engine="datacontract", 31 | ) 32 | 33 | 34 | # TODO check for server.type, if all required fields are present 35 | -------------------------------------------------------------------------------- /datacontract/engines/datacontract/check_that_datacontract_file_exists.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from datacontract.model.run import Check, Run 4 | 5 | 6 | def check_that_datacontract_file_exists(run: Run, file_path: str): 7 | if file_path is None: 8 | return 9 | if file_path.startswith("http://") or file_path.startswith("https://"): 10 | return 11 | if not os.path.exists(file_path): 12 | run.checks.append( 13 | Check( 14 | type="lint", 15 | name="Check that data contract file exists", 16 | result="failed", 17 | reason=f"The file '{file_path}' does not exist.", 18 | engine="datacontract-cli", 19 | ) 20 | ) 21 | raise Exception(f"The file '{file_path}' does not exist.") 22 | -------------------------------------------------------------------------------- /datacontract/engines/fastjsonschema/s3/s3_read_files.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from datacontract.model.exceptions import DataContractException 5 | from datacontract.model.run import ResultEnum 6 | 7 | 8 | def yield_s3_files(s3_endpoint_url, s3_location): 9 | fs = s3_fs(s3_endpoint_url) 10 | files = fs.glob(s3_location) 11 | for file in files: 12 | with fs.open(file) as f: 13 | logging.info(f"Downloading file {file}") 14 | yield f.read() 15 | 16 | 17 | def s3_fs(s3_endpoint_url): 18 | try: 19 | import s3fs 20 | except ImportError as e: 21 | raise DataContractException( 22 | type="schema", 23 | result=ResultEnum.failed, 24 | name="s3 extra missing", 25 | reason="Install the extra s3 to use s3", 26 | engine="datacontract", 27 | original_exception=e, 28 | ) 29 | 30 | aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID") 31 | aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY") 32 | aws_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN") 33 | return s3fs.S3FileSystem( 34 | key=aws_access_key_id, 35 | secret=aws_secret_access_key, 36 | token=aws_session_token, 37 | anon=aws_access_key_id is None, 38 | client_kwargs={"endpoint_url": s3_endpoint_url}, 39 | ) 40 | -------------------------------------------------------------------------------- /datacontract/engines/soda/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/engines/soda/__init__.py -------------------------------------------------------------------------------- /datacontract/engines/soda/connections/bigquery.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | 6 | # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods 7 | def to_bigquery_soda_configuration(server): 8 | # with service account key, using an external json file 9 | 10 | # check for our own environment variable first 11 | account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH") 12 | if account_info is None: 13 | # but as a fallback look for the default google one 14 | account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") 15 | 16 | soda_configuration = { 17 | f"data_source {server.type}": { 18 | "type": "bigquery", 19 | "account_info_json_path": account_info, 20 | "auth_scopes": ["https://www.googleapis.com/auth/bigquery"], 21 | "project_id": server.project, 22 | "dataset": server.dataset, 23 | } 24 | } 25 | 26 | soda_configuration_str = yaml.dump(soda_configuration) 27 | return soda_configuration_str 28 | -------------------------------------------------------------------------------- /datacontract/engines/soda/connections/databricks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | 6 | def to_databricks_soda_configuration(server): 7 | token = os.getenv("DATACONTRACT_DATABRICKS_TOKEN") 8 | if token is None: 9 | raise ValueError("DATACONTRACT_DATABRICKS_TOKEN environment variable is not set") 10 | http_path = os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH") 11 | host = server.host 12 | if host is None: 13 | host = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME") 14 | if host is None: 15 | raise ValueError("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set") 16 | soda_configuration = { 17 | f"data_source {server.type}": { 18 | "type": "spark", 19 | "method": "databricks", 20 | "host": host, 21 | "catalog": server.catalog, 22 | "schema": server.schema_, 23 | "http_path": http_path, 24 | "token": token, 25 | } 26 | } 27 | 28 | soda_configuration_str = yaml.dump(soda_configuration) 29 | return soda_configuration_str 30 | -------------------------------------------------------------------------------- /datacontract/engines/soda/connections/postgres.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | 6 | def to_postgres_soda_configuration(server): 7 | # with service account key, using an external json file 8 | soda_configuration = { 9 | f"data_source {server.type}": { 10 | "type": "postgres", 11 | "host": server.host, 12 | "port": str(server.port), 13 | "username": os.getenv("DATACONTRACT_POSTGRES_USERNAME"), 14 | "password": os.getenv("DATACONTRACT_POSTGRES_PASSWORD"), 15 | "database": server.database, 16 | "schema": server.schema_, 17 | } 18 | } 19 | 20 | soda_configuration_str = yaml.dump(soda_configuration) 21 | return soda_configuration_str 22 | -------------------------------------------------------------------------------- /datacontract/engines/soda/connections/snowflake.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | 6 | def to_snowflake_soda_configuration(server): 7 | prefix = "DATACONTRACT_SNOWFLAKE_" 8 | snowflake_soda_params = {k.replace(prefix, "").lower(): v for k, v in os.environ.items() if k.startswith(prefix)} 9 | 10 | # backward compatibility 11 | if "connection_timeout" not in snowflake_soda_params: 12 | snowflake_soda_params["connection_timeout"] = "5" # minutes 13 | 14 | soda_configuration = { 15 | f"data_source {server.type}": { 16 | "type": "snowflake", 17 | "account": server.account, 18 | "database": server.database, 19 | "schema": server.schema_, 20 | **snowflake_soda_params, 21 | } 22 | } 23 | soda_configuration_str = yaml.dump(soda_configuration) 24 | return soda_configuration_str 25 | -------------------------------------------------------------------------------- /datacontract/engines/soda/connections/sqlserver.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | from datacontract.model.data_contract_specification import Server 6 | 7 | 8 | def to_sqlserver_soda_configuration(server: Server) -> str: 9 | """Serialize server config to soda configuration. 10 | 11 | 12 | ### Example: 13 | type: sqlserver 14 | host: host 15 | port: '1433' 16 | username: simple 17 | password: simple_pass 18 | database: database 19 | schema: dbo 20 | trusted_connection: false 21 | encrypt: false 22 | trust_server_certificate: false 23 | driver: ODBC Driver 18 for SQL Server 24 | """ 25 | # with service account key, using an external json file 26 | soda_configuration = { 27 | f"data_source {server.type}": { 28 | "type": "sqlserver", 29 | "host": server.host, 30 | "port": str(server.port), 31 | "username": os.getenv("DATACONTRACT_SQLSERVER_USERNAME", ""), 32 | "password": os.getenv("DATACONTRACT_SQLSERVER_PASSWORD", ""), 33 | "database": server.database, 34 | "schema": server.schema_, 35 | "trusted_connection": os.getenv("DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION", False), 36 | "trust_server_certificate": os.getenv("DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE", False), 37 | "encrypt": os.getenv("DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION", True), 38 | "driver": server.driver, 39 | } 40 | } 41 | 42 | soda_configuration_str = yaml.dump(soda_configuration) 43 | return soda_configuration_str 44 | -------------------------------------------------------------------------------- /datacontract/engines/soda/connections/trino.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | 6 | def to_trino_soda_configuration(server): 7 | password = os.getenv("DATACONTRACT_TRINO_PASSWORD") 8 | username = os.getenv("DATACONTRACT_TRINO_USERNAME") 9 | 10 | data_source = { 11 | "type": "trino", 12 | "host": server.host, 13 | "port": str(server.port), 14 | "username": username, 15 | "password": password, 16 | "catalog": server.catalog, 17 | "schema": server.schema_, 18 | } 19 | 20 | if password is None or password == "": 21 | data_source["auth_type"] = "NoAuthentication" # default is BasicAuthentication 22 | 23 | soda_configuration = {f"data_source {server.type}": data_source} 24 | 25 | soda_configuration_str = yaml.dump(soda_configuration) 26 | return soda_configuration_str 27 | -------------------------------------------------------------------------------- /datacontract/export/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/export/__init__.py -------------------------------------------------------------------------------- /datacontract/export/custom_converter.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from jinja2 import Environment, FileSystemLoader 4 | 5 | from datacontract.export.exporter import Exporter 6 | from datacontract.model.data_contract_specification import ( 7 | DataContractSpecification, 8 | Model, 9 | ) 10 | 11 | 12 | class CustomExporter(Exporter): 13 | """Exporter implementation for converting data contracts to Markdown.""" 14 | 15 | def export( 16 | self, 17 | data_contract: DataContractSpecification, 18 | model: Model, 19 | server: str, 20 | sql_server_type: str, 21 | export_args: dict, 22 | ) -> str: 23 | """Exports a data contract to custom format with Jinja.""" 24 | template = export_args.get("template") 25 | if template is None: 26 | raise RuntimeError("Export to custom requires template argument.") 27 | 28 | return to_custom(data_contract, template) 29 | 30 | 31 | def to_custom(data_contract: DataContractSpecification, template_path: Path) -> str: 32 | template = get_template(template_path) 33 | rendered_sql = template.render(data_contract=data_contract) 34 | return rendered_sql 35 | 36 | 37 | def get_template(path: Path): 38 | abosolute_path = Path(path).resolve() 39 | env = Environment(loader=FileSystemLoader(str(abosolute_path.parent))) 40 | return env.get_template(path.name) 41 | -------------------------------------------------------------------------------- /datacontract/export/dcs_exporter.py: -------------------------------------------------------------------------------- 1 | from datacontract.export.exporter import Exporter 2 | 3 | 4 | class DcsExporter(Exporter): 5 | def export(self, data_contract, model, server, sql_server_type, export_args) -> dict: 6 | return data_contract.to_yaml() 7 | -------------------------------------------------------------------------------- /datacontract/export/pandas_type_converter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for converting data contract field types to corresponding pandas data types. 3 | """ 4 | 5 | from datacontract.model.data_contract_specification import Field 6 | 7 | 8 | def convert_to_pandas_type(field: Field) -> str: 9 | """ 10 | Convert a data contract field type to the equivalent pandas data type. 11 | 12 | Parameters: 13 | ---------- 14 | field : Field 15 | A Field object containing metadata about the data type of the field. 16 | 17 | Returns: 18 | ------- 19 | str 20 | The corresponding pandas data type as a string. 21 | """ 22 | field_type = field.type 23 | 24 | if field_type in ["string", "varchar", "text"]: 25 | return "str" 26 | if field_type in ["integer", "int"]: 27 | return "int32" 28 | if field_type == "long": 29 | return "int64" 30 | if field_type == "float": 31 | return "float32" 32 | if field_type in ["number", "decimal", "numeric", "double"]: 33 | return "float64" 34 | if field_type == "boolean": 35 | return "bool" 36 | if field_type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]: 37 | return "datetime64[ns]" 38 | if field_type == "bytes": 39 | return "object" 40 | return "object" 41 | -------------------------------------------------------------------------------- /datacontract/export/sodacl_converter.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | from datacontract.engines.data_contract_checks import create_checks 4 | from datacontract.export.exporter import Exporter 5 | from datacontract.model.data_contract_specification import DataContractSpecification, Server 6 | from datacontract.model.run import Run 7 | 8 | 9 | class SodaExporter(Exporter): 10 | def export(self, data_contract, model, server, sql_server_type, export_args) -> str: 11 | run = Run.create_run() 12 | server = get_server(data_contract, server) 13 | run.checks.extend(create_checks(data_contract, server)) 14 | return to_sodacl_yaml(run) 15 | 16 | 17 | def to_sodacl_yaml(run: Run) -> str: 18 | sodacl_dict = {} 19 | for run_check in run.checks: 20 | if run_check.engine != "soda" or run_check.language != "sodacl": 21 | continue 22 | check_yaml_str = run_check.implementation 23 | check_yaml_dict = yaml.safe_load(check_yaml_str) 24 | for key, value in check_yaml_dict.items(): 25 | if key in sodacl_dict: 26 | if isinstance(sodacl_dict[key], list) and isinstance(value, list): 27 | sodacl_dict[key].extend(value) 28 | else: 29 | sodacl_dict[key].update(value) 30 | else: 31 | sodacl_dict[key] = value 32 | return yaml.dump(sodacl_dict) 33 | 34 | 35 | def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None: 36 | if server_name is None: 37 | return None 38 | return data_contract_specification.servers.get(server_name) 39 | -------------------------------------------------------------------------------- /datacontract/imports/importer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from enum import Enum 3 | 4 | from datacontract_specification.model import DataContractSpecification 5 | from open_data_contract_standard.model import OpenDataContractStandard 6 | 7 | 8 | class Importer(ABC): 9 | def __init__(self, import_format) -> None: 10 | self.import_format = import_format 11 | 12 | @abstractmethod 13 | def import_source( 14 | self, 15 | data_contract_specification: DataContractSpecification | OpenDataContractStandard, 16 | source: str, 17 | import_args: dict, 18 | ) -> DataContractSpecification | OpenDataContractStandard: 19 | pass 20 | 21 | 22 | class ImportFormat(str, Enum): 23 | sql = "sql" 24 | avro = "avro" 25 | dbt = "dbt" 26 | dbml = "dbml" 27 | glue = "glue" 28 | jsonschema = "jsonschema" 29 | bigquery = "bigquery" 30 | odcs = "odcs" 31 | unity = "unity" 32 | spark = "spark" 33 | iceberg = "iceberg" 34 | parquet = "parquet" 35 | csv = "csv" 36 | protobuf = "protobuf" 37 | excel = "excel" 38 | 39 | @classmethod 40 | def get_supported_formats(cls): 41 | return list(map(lambda c: c.value, cls)) 42 | 43 | 44 | class Spec(str, Enum): 45 | datacontract_specification = "datacontract_specification" 46 | odcs = "odcs" 47 | 48 | @classmethod 49 | def get_supported_types(cls): 50 | return list(map(lambda c: c.value, cls)) 51 | -------------------------------------------------------------------------------- /datacontract/init/init_template.py: -------------------------------------------------------------------------------- 1 | import importlib.resources as resources 2 | import logging 3 | 4 | import requests 5 | 6 | DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.1.0.init.yaml" 7 | 8 | 9 | def get_init_template(location: str = None) -> str: 10 | if location is None: 11 | logging.info("Use default bundled template " + DEFAULT_DATA_CONTRACT_INIT_TEMPLATE) 12 | schemas = resources.files("datacontract") 13 | template = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_INIT_TEMPLATE) 14 | with template.open("r") as file: 15 | return file.read() 16 | elif location.startswith("http://") or location.startswith("https://"): 17 | return requests.get(location).text 18 | else: 19 | with open(location, "r") as file: 20 | return file.read() 21 | -------------------------------------------------------------------------------- /datacontract/lint/files.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from datacontract.model.exceptions import DataContractException 4 | 5 | 6 | def read_file(path): 7 | if not os.path.exists(path): 8 | raise DataContractException( 9 | type="lint", 10 | name=f"Reading data contract from {path}", 11 | reason=f"The file '{path}' does not exist.", 12 | engine="datacontract", 13 | result="error", 14 | ) 15 | with open(path, "r") as file: 16 | file_content = file.read() 17 | return file_content 18 | -------------------------------------------------------------------------------- /datacontract/lint/linters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/lint/linters/__init__.py -------------------------------------------------------------------------------- /datacontract/lint/linters/description_linter.py: -------------------------------------------------------------------------------- 1 | from datacontract.model.data_contract_specification import DataContractSpecification 2 | 3 | from ..lint import Linter, LinterResult 4 | 5 | 6 | class DescriptionLinter(Linter): 7 | """Check for a description on contracts, models, model fields, definitions and examples.""" 8 | 9 | @property 10 | def name(self) -> str: 11 | return "Objects have descriptions" 12 | 13 | @property 14 | def id(self) -> str: 15 | return "description" 16 | 17 | def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: 18 | result = LinterResult() 19 | if not contract.info or not contract.info.description: 20 | result = result.with_error("Contract has empty description.") 21 | for model_name, model in contract.models.items(): 22 | if not model.description: 23 | result = result.with_error(f"Model '{model_name}' has empty description.") 24 | for field_name, field in model.fields.items(): 25 | if not field.description: 26 | result = result.with_error(f"Field '{field_name}' in model '{model_name}' has empty description.") 27 | for definition_name, definition in contract.definitions.items(): 28 | if not definition.description: 29 | result = result.with_error(f"Definition '{definition_name}' has empty description.") 30 | for index, example in enumerate(contract.examples): 31 | if not example.description: 32 | result = result.with_error(f"Example {index + 1} has empty description.") 33 | return result 34 | -------------------------------------------------------------------------------- /datacontract/lint/linters/field_pattern_linter.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from datacontract.model.data_contract_specification import DataContractSpecification 4 | 5 | from ..lint import Linter, LinterResult 6 | 7 | 8 | class FieldPatternLinter(Linter): 9 | """Checks that all patterns defined for fields are correct Python regex 10 | syntax. 11 | 12 | """ 13 | 14 | @property 15 | def name(self): 16 | return "Field pattern is correct regex" 17 | 18 | @property 19 | def id(self) -> str: 20 | return "field-pattern" 21 | 22 | def lint_implementation(self, contract: DataContractSpecification) -> LinterResult: 23 | result = LinterResult() 24 | for model_name, model in contract.models.items(): 25 | for field_name, field in model.fields.items(): 26 | if field.pattern: 27 | try: 28 | re.compile(field.pattern) 29 | except re.error as e: 30 | result = result.with_error( 31 | f"Failed to compile pattern regex '{field.pattern}' for " 32 | f"field '{field_name}' in model '{model_name}': {e.msg}" 33 | ) 34 | return result 35 | -------------------------------------------------------------------------------- /datacontract/lint/resources.py: -------------------------------------------------------------------------------- 1 | from datacontract.lint.files import read_file 2 | from datacontract.lint.urls import fetch_resource 3 | 4 | 5 | def read_resource(location: str) -> str: 6 | """ 7 | Read a resource from a given location. 8 | 9 | If the location is a URL, fetch the resource from the web. API-Keys are supported. 10 | Otherwise, read the resource from a local file. 11 | 12 | Args: 13 | location (str): The location of the resource, either a URL or a file path. 14 | 15 | Returns: 16 | str: The content of the resource. 17 | """ 18 | if location.startswith("http://") or location.startswith("https://"): 19 | return fetch_resource(location) 20 | else: 21 | return read_file(location) 22 | -------------------------------------------------------------------------------- /datacontract/model/data_contract_specification/__init__.py: -------------------------------------------------------------------------------- 1 | from datacontract_specification.model import * 2 | -------------------------------------------------------------------------------- /datacontract/model/exceptions.py: -------------------------------------------------------------------------------- 1 | from datacontract.model.run import ResultEnum 2 | 3 | 4 | class DataContractException(Exception): 5 | """Exception raised for errors in the execution of a run. 6 | 7 | Attributes: 8 | type (str): The type of the error. 9 | name (str): The name associated with the error. 10 | model (str): The model involved in the error. 11 | reason (str): Explanation of the error. 12 | engine (str): The engine where the error occurred. 13 | original_exception (Exception, optional): Original exception that led to this error. 14 | message (str): General message for the error. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | type, 20 | name, 21 | reason, 22 | engine="datacontract", 23 | model=None, 24 | original_exception=None, 25 | result: ResultEnum = ResultEnum.failed, 26 | message="Run operation failed", 27 | ): 28 | self.type = type 29 | self.name = name 30 | self.model = model 31 | self.reason = reason 32 | self.result = result 33 | self.engine = engine 34 | self.original_exception = original_exception 35 | self.message = message 36 | super().__init__( 37 | f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}" 38 | ) 39 | -------------------------------------------------------------------------------- /datacontract/model/odcs.py: -------------------------------------------------------------------------------- 1 | def is_open_data_contract_standard(odcs: dict) -> bool: 2 | """ 3 | Check if the given dictionary is an OpenDataContractStandard. 4 | 5 | Args: 6 | odcs (dict): The dictionary to check. 7 | 8 | Returns: 9 | bool: True if the dictionary is an OpenDataContractStandard, False otherwise. 10 | """ 11 | return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3") 12 | -------------------------------------------------------------------------------- /datacontract/output/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/output/__init__.py -------------------------------------------------------------------------------- /datacontract/output/output_format.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class OutputFormat(str, Enum): 5 | # json = "json" # coming soon 6 | junit = "junit" 7 | 8 | @classmethod 9 | def get_supported_formats(cls): 10 | return list(map(lambda c: c.value, cls)) 11 | -------------------------------------------------------------------------------- /datacontract/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/py.typed -------------------------------------------------------------------------------- /datacontract/schemas/download: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | curl -o datacontract-1.1.0.init.yaml https://datacontract.com/datacontract.init.yaml 5 | curl -o datacontract-1.1.0.schema.json https://datacontract.com/datacontract.schema.json 6 | curl -o odcs-3.0.1.schema.json https://raw.githubusercontent.com/bitol-io/open-data-contract-standard/refs/heads/main/schema/odcs-json-schema-v3.0.1.json 7 | 8 | -------------------------------------------------------------------------------- /datacontract/templates/partials/definition.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
5 | 6 | 7 | 8 | 9 | 13 | 14 | 15 | 16 | 17 | {{ render_partial('partials/model_field.html', nested = False, field_name=definition_name, 18 | field = definition, level = 0) }} 19 | 20 | 21 |
10 | {{ definition_name }} 11 |
{{ definition.description }}
12 |
22 |
23 |
24 |
25 |
-------------------------------------------------------------------------------- /datacontract/templates/partials/example.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
5 | 6 | 7 | 8 | 9 | 14 | 15 | 16 | 17 | 18 | 21 | 22 | 23 |
10 | {{ example.model }} 11 | {{ example.type }} 12 |
{{ example.description }}
13 |
19 |
{{ example.data }}
20 |
24 |
25 |
26 |
27 |
-------------------------------------------------------------------------------- /datacontract/templates/style/generate-style: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # npm install --global tailwindcss 4 | tailwindcss --input input.css --output output.css 5 | -------------------------------------------------------------------------------- /datacontract/templates/style/input.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; -------------------------------------------------------------------------------- /datacontract/templates/style/tailwind.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | content: [ 3 | "../datacontract.html", 4 | "../datacontract_odcs.html", 5 | "../index.html", 6 | "../partials/model_field.html", 7 | "../partials/server.html", 8 | "../partials/definition.html", 9 | "../partials/datacontract_information.html", 10 | "../partials/datacontract_servicelevels.html", 11 | "../partials/datacontract_terms.html", 12 | "../partials/example.html", 13 | "../partials/quality.html", 14 | ], 15 | theme: { }, 16 | plugins: [], 17 | } -------------------------------------------------------------------------------- /datacontractcli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontractcli.png -------------------------------------------------------------------------------- /favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/favicon.png -------------------------------------------------------------------------------- /release: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Release steps: 5 | # 1. Update release version in pyproject.toml 6 | # 2. Update CHANGELOG.md header 7 | # 3. Run ./release 8 | # 4. Update release notes in Github 9 | 10 | # pip install toml-cli 11 | VERSION=$(uvx --from toml-cli toml get --toml-path pyproject.toml project.version) 12 | TAG_VERSION=v$VERSION 13 | 14 | echo "Checking that everything is committed" 15 | git diff --exit-code 16 | echo "Tagging $TAG_VERSION" 17 | git tag $TAG_VERSION 18 | echo "Pushing $TAG_VERSION" 19 | git push origin $TAG_VERSION 20 | echo "Pushed $TAG_VERSION" 21 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(autouse=True) 5 | def change_test_dir(request, monkeypatch): 6 | monkeypatch.chdir(request.fspath.dirname) 7 | -------------------------------------------------------------------------------- /tests/fixtures/avro/data/arrays.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "fields": [ 3 | { 4 | "name": "orderid", 5 | "type": "int" 6 | }, 7 | { 8 | "name": "addresses", 9 | "doc": "Addresses of a customer", 10 | "type": { 11 | "type": "array", 12 | "items": { 13 | "name": "address", 14 | "type": "record", 15 | "fields": [ 16 | { 17 | "name": "city", 18 | "type": "string" 19 | }, 20 | { 21 | "name": "state", 22 | "type": "string" 23 | }, 24 | { 25 | "name": "zipcode", 26 | "type": "long" 27 | } 28 | ] 29 | } 30 | } 31 | }, 32 | { 33 | "name": "nestedArrays", 34 | "doc": "Example schema for an array of arrays", 35 | "type": { 36 | "type": "array", 37 | "items": { 38 | "type": "array", 39 | "items": "int" 40 | } 41 | } 42 | }, 43 | { 44 | "name": "nationalities", 45 | "type": [ 46 | "null", 47 | { 48 | "type": "array", 49 | "items": { 50 | "type": "string", 51 | "connect.parameters": { 52 | "avro.java.string": "String" 53 | }, 54 | "avro.java.string": "String" 55 | } 56 | } 57 | ], 58 | "default": null 59 | } 60 | ], 61 | "name": "orders", 62 | "doc": "My Model", 63 | "type": "record" 64 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/data/logical_types.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "Test", 4 | "namespace": "mynamespace.com", 5 | "fields": [ 6 | { 7 | "name": "test_id", 8 | "type": "string", 9 | "doc": "id documentation test" 10 | }, 11 | { 12 | "name": "device_id", 13 | "type": "int" 14 | }, 15 | { 16 | "name": "test_value", 17 | "type": "double" 18 | }, 19 | { 20 | "name": "num_items", 21 | "type": "int" 22 | }, 23 | { 24 | "name": "processed_timestamp", 25 | "type": "long", 26 | "doc": "The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29", 27 | "logicalType": "local-timestamp-micros" 28 | }, 29 | { 30 | "name": "description", 31 | "type": "string" 32 | }, 33 | { 34 | "name": "is_processed", 35 | "type": "boolean", 36 | "default": false 37 | }, 38 | { 39 | "name": "some_bytes_decimal", 40 | "type": { 41 | "type": "bytes", 42 | "logicalType": "decimal", 43 | "precision": 25, 44 | "scale": 2 45 | } 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/data/nested.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "fields": [ 3 | { 4 | "default": null, 5 | "name": "fieldA", 6 | "type": [ 7 | "null", 8 | "long" 9 | ] 10 | }, 11 | { 12 | "default": null, 13 | "name": "fieldB", 14 | "type": [ 15 | "null", 16 | { 17 | "fields": [ 18 | { 19 | "default": null, 20 | "name": "fieldC", 21 | "type": [ 22 | "null", 23 | { 24 | "avro.java.string": "String", 25 | "type": "string" 26 | } 27 | ] 28 | } 29 | ], 30 | "name": "ObjectB", 31 | "type": "record" 32 | } 33 | ] 34 | } 35 | ], 36 | "name": "Doc", 37 | "namespace": "com.xxx", 38 | "type": "record" 39 | } 40 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders 3 | info: 4 | title: Orders 5 | version: 0.0.1 6 | description: Order messages as generated by Confluent Datagen Source Adapter 7 | servers: 8 | production: 9 | type: kafka 10 | host: pkc-7xoy1.eu-central-1.aws.confluent.cloud:9092 11 | topic: orders.avro.v1 12 | format: avro 13 | models: 14 | orders: 15 | type: table 16 | description: My Model 17 | namespace: com.example.checkout 18 | fields: 19 | orderdate: 20 | type: date 21 | description: My Field 22 | order_timestamp: 23 | type: timestamp 24 | delivery_timestamp: 25 | type: timestamp_ntz 26 | orderid: 27 | type: int 28 | itemid: 29 | type: string 30 | orderunits: 31 | type: double 32 | tags: 33 | type: array 34 | items: 35 | type: string 36 | address: 37 | type: object 38 | fields: 39 | city: 40 | type: string 41 | state: 42 | type: string 43 | zipcode: 44 | type: long 45 | quality: 46 | type: SodaCL 47 | specification: 48 | checks for orders: 49 | - row_count >= 5000 50 | 51 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_decimal.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "MySchema", 4 | "fields": [ 5 | { 6 | "name": "price", 7 | "type": { 8 | "type": "bytes", 9 | "logicalType": "decimal" 10 | } 11 | }, 12 | { 13 | "name": "dewey_decimal", 14 | "type": { 15 | "type": "bytes", 16 | "logicalType": "decimal", 17 | "scale": 2, 18 | "precision": 4 19 | } 20 | }, 21 | { 22 | "name": "reading_level", 23 | "type": [ 24 | "null", 25 | { 26 | "type": "bytes", 27 | "logicalType": "decimal" 28 | } 29 | ] 30 | }, 31 | { 32 | "name": "age", 33 | "type": [ 34 | "null", 35 | { 36 | "type": "bytes", 37 | "logicalType": "decimal", 38 | "precision": 3 39 | } 40 | ] 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_decimal.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | MySchema: 8 | fields: 9 | price: 10 | type: decimal 11 | required: true 12 | dewey_decimal: 13 | type: decimal 14 | required: true 15 | precision: 4 16 | scale: 2 17 | reading_level: 18 | type: decimal 19 | required: false 20 | age: 21 | type: decimal 22 | required: false 23 | precision: 3 -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_enum.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "MySchema", 4 | "fields": [ 5 | { 6 | "name": "color", 7 | "type": { 8 | "type": "enum", 9 | "name": "Color", 10 | "symbols": [ 11 | "RED", 12 | "GREEN", 13 | "BLUE", 14 | "UNKNOWN" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_enum.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | MySchema: 8 | fields: 9 | color: 10 | type: string 11 | title: Color 12 | enum: 13 | - RED 14 | - GREEN 15 | - BLUE 16 | - UNKNOWN 17 | config: 18 | avroType: enum 19 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_logicalType.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "Test", 4 | "namespace": "mynamespace.com", 5 | "fields": [ 6 | {"name": "test_id", "type": "string", "doc": "id documentation test"}, 7 | {"name": "device_id", "type": "int"}, 8 | {"name": "test_value", "type": "double"}, 9 | {"name": "num_items", "type": "int"}, 10 | {"name": "processed_timestamp", 11 | "type": { 12 | "type": "long", 13 | "logicalType": "local-timestamp-micros" 14 | }, 15 | "doc": "The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29" 16 | }, 17 | {"name": "description", "type": "string"}, 18 | {"name": "is_processed", "type": "boolean", 19 | "default": false} 20 | ] 21 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_logicalType.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | Test: 8 | namespace: mynamespace.com 9 | fields: 10 | test_id: 11 | type: string 12 | required: true 13 | description: id documentation test 14 | device_id: 15 | type: int 16 | required: true 17 | test_value: 18 | type: double 19 | required: true 20 | num_items: 21 | type: int 22 | required: true 23 | processed_timestamp: 24 | type: long 25 | required: true 26 | description: 'The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29' 27 | config: 28 | avroType: long 29 | avroLogicalType: local-timestamp-micros 30 | description: 31 | type: string 32 | required: true 33 | is_processed: 34 | type: boolean 35 | required: true 36 | config: 37 | avroDefault: false -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_field_float.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "row", 4 | "namespace": "com.example", 5 | "fields": [ 6 | { 7 | "name": "field_name", 8 | "type": "float" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_field_float.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: pageviews 3 | info: 4 | title: Pageviews 5 | version: 0.0.1 6 | models: 7 | row: 8 | type: table 9 | namespace: com.example 10 | fields: 11 | field_name: 12 | type: float 13 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_field_map.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "row", 4 | "namespace": "com.example", 5 | "fields": [ 6 | { 7 | "name": "field_name", 8 | "type": { 9 | "type": "map", 10 | "values":["string", 11 | "long" 12 | ] 13 | } 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_field_map.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: pageviews 3 | info: 4 | title: Pageviews 5 | version: 0.0.1 6 | models: 7 | row: 8 | type: table 9 | namespace: com.example 10 | fields: 11 | field_name: 12 | type: map 13 | config: 14 | values: ["string", "long"] 15 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_field_namespace.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "row", 4 | "namespace": "com.example", 5 | "fields": [ 6 | { 7 | "name": "field_name", 8 | "type": 9 | { 10 | "type": "record", 11 | "name": "field_name", 12 | "namespace": "com.example", 13 | "fields": [ 14 | { 15 | "name": "field", 16 | "type": "string" 17 | 18 | } 19 | ] 20 | } 21 | 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_field_namespace.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: pageviews 3 | info: 4 | title: Pageviews 5 | version: 0.0.1 6 | models: 7 | row: 8 | type: table 9 | namespace: com.example 10 | fields: 11 | field_name: 12 | type: record 13 | config: 14 | namespace: com.example 15 | fields: 16 | field: 17 | type: string 18 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_logical_type.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | alert: 8 | fields: 9 | currentSelectionLiability: 10 | type: decimal 11 | required: true 12 | precision: 25 13 | scale: 2 14 | raised: 15 | type: timestamp_tz 16 | required: true 17 | selectionSettledTime: 18 | type: timestamp_tz 19 | required: false -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_required.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "pageviews", 4 | "namespace": "com.example.activity", 5 | "fields": [ 6 | { 7 | "name": "event_ts", 8 | "type": { 9 | "type": "long", 10 | "logicalType": "local-timestamp-millis" 11 | } 12 | }, 13 | { 14 | "name": "correlation_id", 15 | "type": "int" 16 | }, 17 | { 18 | "name": "user_guid", 19 | "type": ["null", "string"] 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /tests/fixtures/avro/export/datacontract_test_required.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: pageviews 3 | info: 4 | title: Pageviews 5 | version: 0.0.1 6 | models: 7 | pageviews: 8 | type: table 9 | namespace: com.example.activity 10 | fields: 11 | event_ts: 12 | type: timestamp_ntz 13 | correlation_id: 14 | type: int 15 | required: true 16 | user_guid: 17 | type: string 18 | required: false 19 | 20 | -------------------------------------------------------------------------------- /tests/fixtures/avro/export/orders_with_datefields.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "orders", 4 | "doc": "My Model", 5 | "namespace": "com.example.checkout", 6 | "fields": [ 7 | { 8 | "name": "orderdate", 9 | "doc": "My Field", 10 | "type": { 11 | "type": "int", 12 | "logicalType": "date" 13 | } 14 | }, 15 | { 16 | "name": "order_timestamp", 17 | "type": { 18 | "type": "long", 19 | "logicalType": "timestamp-millis" 20 | } 21 | }, 22 | { 23 | "name": "delivery_timestamp", 24 | "type": { 25 | "type": "long", 26 | "logicalType": "local-timestamp-millis" 27 | } 28 | }, 29 | { 30 | "name": "orderid", 31 | "type": "int" 32 | }, 33 | { 34 | "name": "itemid", 35 | "type": "string" 36 | }, 37 | { 38 | "name": "orderunits", 39 | "type": "double" 40 | }, 41 | { 42 | "name": "tags", 43 | "type": { 44 | "type": "array", 45 | "items": "string" 46 | } 47 | }, 48 | { 49 | "name": "address", 50 | "type": { 51 | "type": "record", 52 | "name": "address", 53 | "fields": [ 54 | { 55 | "name": "city", 56 | "type": "string" 57 | }, 58 | { 59 | "name": "state", 60 | "type": "string" 61 | }, 62 | { 63 | "name": "zipcode", 64 | "type": "long" 65 | } 66 | ] 67 | } 68 | } 69 | ] 70 | } -------------------------------------------------------------------------------- /tests/fixtures/azure-delta-remote/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: azure 9 | storageAccount: datameshdatabricksdemo 10 | location: abfss://dataproducts/orders_delta/orders.delta 11 | format: delta 12 | models: 13 | orders: 14 | fields: 15 | order_id: 16 | type: varchar 17 | unique: true 18 | required: true 19 | order_timestamp: 20 | required: true 21 | order_total: 22 | type: bigint 23 | required: true 24 | -------------------------------------------------------------------------------- /tests/fixtures/azure-json-remote/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: azure 9 | location: abfss://datameshdatabricksdemo.dfs.core.windows.net/topics/inventory/year=2022/month=07/day=13/*/inventory+0+000000*.json 10 | format: json 11 | delimiter: new_line 12 | models: 13 | orders: 14 | fields: 15 | updated_at: 16 | type: varchar 17 | available: 18 | type: integer 19 | location: 20 | type: varchar 21 | minLength: 2 22 | maxLength: 2 23 | sku: 24 | type: varchar 25 | quality: 26 | type: SodaCL 27 | specification: 28 | checks for orders: 29 | - row_count >= 5000 -------------------------------------------------------------------------------- /tests/fixtures/azure-parquet-remote/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: azure 9 | storageAccount: datameshdatabricksdemo 10 | location: abfss://dataproducts/inventory_events/*.parquet 11 | format: parquet 12 | models: 13 | orders: 14 | fields: 15 | updated_at: 16 | type: varchar 17 | available: 18 | type: varchar # for historic reasons 19 | location: 20 | type: varchar 21 | minLength: 2 22 | maxLength: 2 23 | sku: 24 | type: varchar 25 | -------------------------------------------------------------------------------- /tests/fixtures/bigquery/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: bigquery 3 | info: 4 | title: bigquery 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/bigquery: 9 | type: bigquery 10 | project: datameshexample-product 11 | dataset: datacontract_cli_test_dataset 12 | dataProductId: my-dataproduct 13 | outputPortId: bigquery 14 | models: 15 | datacontract_cli_test_table: 16 | type: table 17 | fields: 18 | field_one: 19 | type: varchar 20 | required: true 21 | unique: true 22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 23 | field_two: 24 | type: int 25 | minimum: 10 26 | field_three: 27 | type: timestamp 28 | -------------------------------------------------------------------------------- /tests/fixtures/bigquery/datacontract_complex.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: bigquery 3 | info: 4 | title: bigquery 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/bigquery: 9 | type: bigquery 10 | project: datameshexample-product 11 | dataset: datacontract_cli 12 | models: 13 | complex_table: 14 | type: table 15 | fields: 16 | some_string: 17 | type: string 18 | some_record: 19 | type: record 20 | fields: 21 | some_field_1: 22 | type: string 23 | some_field_2: 24 | type: string 25 | some_array_of_strings: 26 | type: array 27 | items: 28 | type: string 29 | some_array_of_records: 30 | type: array 31 | items: 32 | type: record 33 | fields: 34 | some_other_field_1: 35 | type: string 36 | some_other_field_2: 37 | type: string 38 | some_json: 39 | type: text 40 | config: 41 | bigqueryType: json 42 | some_range_of_timestamp: 43 | type: record 44 | config: 45 | bigqueryType: RANGE 46 | -------------------------------------------------------------------------------- /tests/fixtures/bigquery/import/datacontract_multi_import.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | BQ_Table: 8 | description: This is a test table 9 | type: table 10 | fields: 11 | String_field: 12 | type: string 13 | required: false 14 | description: A simple String field 15 | BQ_External_Table: 16 | description: This is a test table 17 | type: table 18 | fields: 19 | String_field: 20 | type: string 21 | required: false 22 | description: A simple String field 23 | BQ_Snapshot: 24 | description: This is a test table 25 | type: table 26 | fields: 27 | String_field: 28 | type: string 29 | required: false 30 | description: A simple String field 31 | BQ_View: 32 | description: This is a test table 33 | type: view 34 | fields: 35 | String_field: 36 | type: string 37 | required: false 38 | description: A simple String field 39 | BQ_Materialized_View: 40 | description: This is a test table 41 | type: view 42 | fields: 43 | String_field: 44 | type: string 45 | required: false 46 | description: A simple String field -------------------------------------------------------------------------------- /tests/fixtures/bigquery/import/multi_import_external_table.json: -------------------------------------------------------------------------------- 1 | { 2 | "creationTime": "1715608399201", 3 | "description": "This is a test table", 4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==", 5 | "expirationTime": "1720792399201", 6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table", 7 | "kind": "bigquery#table", 8 | "lastModifiedTime": "1715610311747", 9 | "location": "europe-west3", 10 | "numActiveLogicalBytes": "0", 11 | "numBytes": "0", 12 | "numLongTermBytes": "0", 13 | "numLongTermLogicalBytes": "0", 14 | "numRows": "0", 15 | "numTotalLogicalBytes": "0", 16 | "schema": { 17 | "fields": [ 18 | { 19 | "description": "A simple String field", 20 | "mode": "NULLABLE", 21 | "name": "String_field", 22 | "type": "STRING" 23 | } 24 | ] 25 | }, 26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table", 27 | "tableReference": { 28 | "datasetId": "test_dataset", 29 | "projectId": "bigquery-test-423213", 30 | "tableId": "BQ_External_Table" 31 | }, 32 | "type": "EXTERNAL" 33 | } -------------------------------------------------------------------------------- /tests/fixtures/bigquery/import/multi_import_materialized_view.json: -------------------------------------------------------------------------------- 1 | { 2 | "creationTime": "1715608399201", 3 | "description": "This is a test table", 4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==", 5 | "expirationTime": "1720792399201", 6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table", 7 | "kind": "bigquery#table", 8 | "lastModifiedTime": "1715610311747", 9 | "location": "europe-west3", 10 | "numActiveLogicalBytes": "0", 11 | "numBytes": "0", 12 | "numLongTermBytes": "0", 13 | "numLongTermLogicalBytes": "0", 14 | "numRows": "0", 15 | "numTotalLogicalBytes": "0", 16 | "schema": { 17 | "fields": [ 18 | { 19 | "description": "A simple String field", 20 | "mode": "NULLABLE", 21 | "name": "String_field", 22 | "type": "STRING" 23 | } 24 | ] 25 | }, 26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table", 27 | "tableReference": { 28 | "datasetId": "test_dataset", 29 | "projectId": "bigquery-test-423213", 30 | "tableId": "BQ_Materialized_View" 31 | }, 32 | "type": "MATERIALIZED_VIEW" 33 | } -------------------------------------------------------------------------------- /tests/fixtures/bigquery/import/multi_import_snapshot.json: -------------------------------------------------------------------------------- 1 | { 2 | "creationTime": "1715608399201", 3 | "description": "This is a test table", 4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==", 5 | "expirationTime": "1720792399201", 6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table", 7 | "kind": "bigquery#table", 8 | "lastModifiedTime": "1715610311747", 9 | "location": "europe-west3", 10 | "numActiveLogicalBytes": "0", 11 | "numBytes": "0", 12 | "numLongTermBytes": "0", 13 | "numLongTermLogicalBytes": "0", 14 | "numRows": "0", 15 | "numTotalLogicalBytes": "0", 16 | "schema": { 17 | "fields": [ 18 | { 19 | "description": "A simple String field", 20 | "mode": "NULLABLE", 21 | "name": "String_field", 22 | "type": "STRING" 23 | } 24 | ] 25 | }, 26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table", 27 | "tableReference": { 28 | "datasetId": "test_dataset", 29 | "projectId": "bigquery-test-423213", 30 | "tableId": "BQ_Snapshot" 31 | }, 32 | "type": "SNAPSHOT" 33 | } -------------------------------------------------------------------------------- /tests/fixtures/bigquery/import/multi_import_table.json: -------------------------------------------------------------------------------- 1 | { 2 | "creationTime": "1715608399201", 3 | "description": "This is a test table", 4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==", 5 | "expirationTime": "1720792399201", 6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table", 7 | "kind": "bigquery#table", 8 | "lastModifiedTime": "1715610311747", 9 | "location": "europe-west3", 10 | "numActiveLogicalBytes": "0", 11 | "numBytes": "0", 12 | "numLongTermBytes": "0", 13 | "numLongTermLogicalBytes": "0", 14 | "numRows": "0", 15 | "numTotalLogicalBytes": "0", 16 | "schema": { 17 | "fields": [ 18 | { 19 | "description": "A simple String field", 20 | "mode": "NULLABLE", 21 | "name": "String_field", 22 | "type": "STRING" 23 | } 24 | ] 25 | }, 26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table", 27 | "tableReference": { 28 | "datasetId": "test_dataset", 29 | "projectId": "bigquery-test-423213", 30 | "tableId": "BQ_Table" 31 | }, 32 | "type": "TABLE" 33 | } -------------------------------------------------------------------------------- /tests/fixtures/bigquery/import/multi_import_view.json: -------------------------------------------------------------------------------- 1 | { 2 | "creationTime": "1715608399201", 3 | "description": "This is a test table", 4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==", 5 | "expirationTime": "1720792399201", 6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table", 7 | "kind": "bigquery#table", 8 | "lastModifiedTime": "1715610311747", 9 | "location": "europe-west3", 10 | "numActiveLogicalBytes": "0", 11 | "numBytes": "0", 12 | "numLongTermBytes": "0", 13 | "numLongTermLogicalBytes": "0", 14 | "numRows": "0", 15 | "numTotalLogicalBytes": "0", 16 | "schema": { 17 | "fields": [ 18 | { 19 | "description": "A simple String field", 20 | "mode": "NULLABLE", 21 | "name": "String_field", 22 | "type": "STRING" 23 | } 24 | ] 25 | }, 26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table", 27 | "tableReference": { 28 | "datasetId": "test_dataset", 29 | "projectId": "bigquery-test-423213", 30 | "tableId": "BQ_View" 31 | }, 32 | "type": "VIEW" 33 | } -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-definitions-v1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | type: table 11 | fields: 12 | my_field: 13 | required: false 14 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-definitions-v2.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | type: table 11 | fields: 12 | my_field: 13 | $ref: '#/definitions/my_definition' 14 | 15 | definitions: 16 | my_definition: 17 | name: my_definition 18 | domain: global 19 | title: my_title 20 | description: My Description 21 | type: string 22 | enum: [my_enum] 23 | format: uuid 24 | minLength: 8 25 | maxLength: 14 26 | pattern: .* 27 | minimum: 8 28 | exclusiveMaximum: 8 29 | maximum: 14 30 | exclusiveMinimum: 14 31 | example: my_example 32 | pii: false 33 | classification: internal 34 | tags: [my_tags] 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-definitions-v3.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | type: table 11 | fields: 12 | my_field: 13 | $ref: '#/definitions/my_definition_2' 14 | 15 | definitions: 16 | my_definition_2: 17 | name: my_definition_2 18 | domain: global 19 | title: my_title_2 20 | description: My Description 2 21 | type: integer 22 | enum: [my_enum_2] 23 | format: url 24 | minLength: 10 25 | maxLength: 20 26 | pattern: .*.* 27 | minimum: 10 28 | exclusiveMaximum: 20 29 | maximum: 20 30 | exclusiveMinimum: 10 31 | example: my_example_2 32 | pii: true 33 | classification: sensitive 34 | tags: [my_tags_2] 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-fields-v1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | type: table 11 | fields: 12 | field_type: 13 | description: My Description 14 | field_format: 15 | type: string 16 | field_required: 17 | type: string 18 | field_primaryKey: 19 | type: string 20 | field_references: 21 | type: string 22 | field_unique: 23 | type: string 24 | field_description: 25 | type: string 26 | field_pii: 27 | type: string 28 | field_classification: 29 | type: string 30 | field_pattern: 31 | type: string 32 | field_minLength: 33 | type: string 34 | field_maxLength: 35 | type: string 36 | field_minimum: 37 | type: string 38 | field_exclusiveMinimum: 39 | type: string 40 | field_maximum: 41 | type: string 42 | field_exclusiveMaximum: 43 | type: string 44 | field_enum: 45 | type: string 46 | field_tags: 47 | type: string 48 | field_ref: 49 | type: string 50 | field_fields: 51 | fields: 52 | nested_field_1: 53 | type: string 54 | field_custom_key: 55 | type: string 56 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-info-v1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-info-v2.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | owner: Data Team 7 | some-other-key: some information 8 | contact: 9 | email: datateam@work.com 10 | models: 11 | orders: 12 | fields: 13 | column_1: 14 | type: string 15 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-info-v3.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | owner: Another Team 7 | some-other-key: new information 8 | contact: 9 | email: anotherteam@work.com 10 | models: 11 | orders: 12 | fields: 13 | column_1: 14 | type: string 15 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-models-v1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | fields: 11 | my_field: 12 | description: My Description -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-models-v2.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | type: table 11 | description: My Model Description 12 | fields: 13 | my_field: 14 | description: My Description 15 | another-key: original value 16 | my_table_2: 17 | fields: 18 | my_field_2: 19 | description: My Description 2 20 | some-other-key: some value -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-models-v3.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | 8 | models: 9 | my_table: 10 | type: object 11 | description: My Updated Model Description 12 | fields: 13 | my_field: 14 | description: My Description 15 | another-key: updated value 16 | my_table_2: 17 | fields: 18 | my_field_2: 19 | description: My Description 2 -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-quality-v1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-quality-v2.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | quality: 12 | type: SodaCL 13 | specification: |- 14 | checks for orders: 15 | - freshness(column_1) < 1d 16 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-quality-v3.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | quality: 12 | type: custom 13 | specification: |- 14 | checks for orders: 15 | - freshness(column_1) < 2d 16 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-terms-v1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-terms-v2.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | terms: 7 | usage: | 8 | Data can be used for reports, analytics and machine learning use cases. 9 | Order may be linked and joined by other tables 10 | limitations: | 11 | Not suitable for real-time use cases. 12 | Data may not be used to identify individual customers. 13 | Max data processing per day: 10 TiB 14 | billing: 5000 USD per month 15 | noticePeriod: P3M 16 | models: 17 | orders: 18 | fields: 19 | column_1: 20 | type: string 21 | -------------------------------------------------------------------------------- /tests/fixtures/breaking/datacontract-terms-v3.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | terms: 7 | usage: Data can be used for anything 8 | billing: 1000000 GBP per month 9 | noticePeriod: P1Y 10 | someOtherTerms: must abide by policies 11 | models: 12 | orders: 13 | fields: 14 | column_1: 15 | type: string 16 | -------------------------------------------------------------------------------- /tests/fixtures/catalog/datacontract-1.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | owner: checkout 7 | description: The orders data contract 8 | contact: 9 | email: team-orders@example.com 10 | url: https://wiki.example.com/teams/checkout 11 | terms: 12 | usage: This data contract serves to demo datacontract CLI export. 13 | limitations: Not intended to use in production 14 | billing: free 15 | noticePeriod: P3M 16 | servers: 17 | production: 18 | type: snowflake 19 | account: my-account 20 | database: my-database 21 | schema: my-schema 22 | models: 23 | orders: 24 | description: The orders model 25 | fields: 26 | order_id: 27 | type: varchar 28 | unique: true 29 | required: true 30 | minLength: 8 31 | maxLength: 10 32 | pii: true 33 | classification: sensitive 34 | tags: 35 | - order_id 36 | pattern: ^B[0-9]+$ 37 | order_total: 38 | type: bigint 39 | required: true 40 | description: The order_total field 41 | minimum: 0 42 | maximum: 1000000 43 | order_status: 44 | type: text 45 | required: true 46 | enum: 47 | - pending 48 | - shipped 49 | - delivered -------------------------------------------------------------------------------- /tests/fixtures/catalog/datacontract-2.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: urn:datacontract:test 3 | info: 4 | title: Test datacontract 5 | version: 0.0.1 6 | description: Test datacontract 7 | models: 8 | model_test: 9 | description: A test model 10 | type: table 11 | fields: 12 | user_id: 13 | $ref: '#/definitions/user_id' 14 | required: true 15 | unique: true 16 | primaryKey: true 17 | allowed_actions: 18 | $ref: '#/definitions/allowed_actions' 19 | required: false 20 | definitions: 21 | user_id: 22 | title: User ID 23 | type: int 24 | description: An internal, autoincremental ID that identifies an user ID in the metricool app. 25 | examples: 26 | - 883749 27 | allowed_actions: 28 | type: array 29 | description: Allowed user actions 30 | items: 31 | type: string 32 | enum: 33 | - "Add" 34 | - "View" 35 | - "Report" -------------------------------------------------------------------------------- /tests/fixtures/csv/data/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.3 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | servers: 7 | production: 8 | type: local 9 | format: csv 10 | path: ./tests/fixtures/csv/data/sample_data.csv 11 | delimiter: ',' 12 | models: 13 | sample_data: 14 | description: Csv file with encoding ascii 15 | type: table 16 | fields: 17 | field_one: 18 | type: string 19 | field_two: 20 | type: integer 21 | field_three: 22 | type: string 23 | 24 | -------------------------------------------------------------------------------- /tests/fixtures/csv/data/sample_data.csv: -------------------------------------------------------------------------------- 1 | field_one,field_two,field_three 2 | CX-263-DU,50,2023-06-16 13:12:56 3 | IK-894-MN,47,2023-10-08 22:40:57 4 | ER-399-JY,22,2023-05-16 01:08:22 5 | MT-939-FH,63,2023-03-15 05:15:21 6 | LV-849-MI,33,2023-09-08 20:08:43 7 | VS-079-OH,85,2023-04-15 00:50:32 8 | DN-297-XY,79,2023-11-08 12:55:42 9 | ZE-172-FP,14,2023-12-03 18:38:38 10 | ID-840-EG,89,2023-10-02 17:17:58 11 | FK-230-KZ,64,2023-11-27 15:21:48 12 | -------------------------------------------------------------------------------- /tests/fixtures/csv/data/sample_data_5_column.csv: -------------------------------------------------------------------------------- 1 | field_one,field_two,field_three,field_four,field_five,field_six 2 | CX-263-DU,50,2023-06-16 13:12:56,,true,test1@gmail.com 3 | IK-894-MN,47,2023-10-08 22:40:57,,true,test1@gmail.com 4 | ER-399-JY,22,2023-05-16 01:08:22,,true,test1@gmail.com 5 | MT-939-FH,47,2023-03-15 05:15:21,,false,test1@gmail.com 6 | LV-849-MI,50,2023-09-08 20:08:43,,false,test1@gmail.com 7 | VS-079-OH,22,2023-04-15 00:50:32,,false,test1@gmail.com 8 | DN-297-XY,50,2023-11-08 12:55:42,,false,test1@gmail.com 9 | ZE-172-FP,14,,,true,test1@gmail.com 10 | ID-840-EG,89,2023-10-02 17:17:58,,true, 11 | FK-230-KZ,64,2023-11-27 15:21:48,,true,test1@gmail.com 12 | -------------------------------------------------------------------------------- /tests/fixtures/custom/export/expected.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | order_id AS order_id, 3 | DATETIME(order_timestamp, "Asia/Tokyo") AS order_timestamp, 4 | order_total AS order_total, 5 | customer_id AS customer_id, 6 | customer_email_address AS customer_email_address, 7 | DATETIME(processed_timestamp, "Asia/Tokyo") AS processed_timestamp, 8 | FROM 9 | {{ ref('orders') }} 10 | -------------------------------------------------------------------------------- /tests/fixtures/custom/export/template.sql: -------------------------------------------------------------------------------- 1 | {%- for model_name, model in data_contract.models.items() %} 2 | {#- Export only the first model #} 3 | {%- if loop.first -%} 4 | SELECT 5 | {%- for field_name, field in model.fields.items() %} 6 | {%- if field.type == "timestamp" %} 7 | DATETIME({{ field_name }}, "Asia/Tokyo") AS {{ field_name }}, 8 | {%- else %} 9 | {{ field_name }} AS {{ field_name }}, 10 | {%- endif %} 11 | {%- endfor %} 12 | FROM 13 | {{ "{{" }} ref('{{ model_name }}') {{ "}}" }} 14 | {%- endif %} 15 | {%- endfor %} 16 | 17 | -------------------------------------------------------------------------------- /tests/fixtures/databricks-unity/import/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | test_table: 8 | description: string 9 | type: table 10 | title: test_table 11 | fields: 12 | id: 13 | type: int 14 | required: true 15 | config: 16 | databricksType: int 17 | name: 18 | type: string 19 | required: false 20 | config: 21 | databricksType: varchar(255) 22 | age: 23 | type: int 24 | required: false 25 | config: 26 | databricksType: smallint 27 | salary: 28 | type: decimal 29 | required: false 30 | config: 31 | databricksType: decimal(10,2) 32 | join_date: 33 | type: date 34 | required: false 35 | config: 36 | databricksType: date 37 | updated_at: 38 | type: timestamp_ntz 39 | required: false 40 | config: 41 | databricksType: timestamp 42 | is_active: 43 | type: boolean 44 | required: false 45 | config: 46 | databricksType: boolean 47 | servers: 48 | myserver: 49 | type: databricks 50 | catalog: mycatalog 51 | schema: myschema 52 | -------------------------------------------------------------------------------- /tests/fixtures/dataframe/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: dataframetest 3 | info: 4 | title: dataframetest 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | unittest: 9 | type: dataframe 10 | models: 11 | my_table: 12 | type: table 13 | fields: 14 | field_one: 15 | type: varchar 16 | required: true 17 | unique: true 18 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 19 | field_two: 20 | type: int 21 | minimum: 10 22 | field_three: 23 | type: timestamp 24 | field_array_of_strings: 25 | type: array 26 | items: 27 | type: string 28 | field_array_of_structs: 29 | type: array 30 | items: 31 | type: struct 32 | fields: 33 | inner_field_string: 34 | type: varchar 35 | inner_field_int: 36 | type: int 37 | -------------------------------------------------------------------------------- /tests/fixtures/dbml/import/datacontract_schema_filtered.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: Orders Latest 5 | version: 0.0.1 6 | models: 7 | line_items: 8 | description: A single article that is part of an order. 9 | namespace: orders 10 | fields: 11 | lines_item_id: 12 | type: string 13 | required: true 14 | primaryKey: true 15 | unique: true 16 | description: Primary key of the lines_item_id table 17 | order_id: 18 | type: string 19 | required: false 20 | primaryKey: false 21 | unique: false 22 | references: orders.order_id 23 | description: An internal ID that identifies an order in the online shop. 24 | sku: 25 | type: string 26 | required: false 27 | primaryKey: false 28 | unique: false 29 | description: The purchased article number -------------------------------------------------------------------------------- /tests/fixtures/dbml/import/datacontract_table_filtered.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: Orders Latest 5 | version: 0.0.1 6 | models: 7 | orders: 8 | description: One record per order. Includes cancelled and deleted orders. 9 | namespace: test 10 | fields: 11 | order_id: 12 | type: string 13 | required: true 14 | primaryKey: true 15 | unique: true 16 | description: An internal ID that identifies an order in the online shop. 17 | order_timestamp: 18 | type: timestamp_ntz 19 | required: true 20 | primaryKey: false 21 | unique: false 22 | description: The business timestamp in UTC when the order was successfully 23 | registered in the source system and the payment was successful. 24 | order_total: 25 | type: variant 26 | required: true 27 | primaryKey: false 28 | unique: false 29 | description: Total amount the smallest monetary unit (e.g., cents). 30 | customer_id: 31 | type: string 32 | required: false 33 | primaryKey: false 34 | unique: false 35 | description: Unique identifier for the customer. 36 | customer_email_address: 37 | type: string 38 | required: true 39 | primaryKey: false 40 | unique: false 41 | description: The email address, as entered by the customer. The email address 42 | was not verified. 43 | processed_timestamp: 44 | type: timestamp_ntz 45 | required: true 46 | primaryKey: false 47 | unique: false 48 | description: The timestamp when the record was processed by the data platform. -------------------------------------------------------------------------------- /tests/fixtures/dbml/import/dbml.txt: -------------------------------------------------------------------------------- 1 | Project "Orders Latest" { 2 | Note: '''Successful customer orders in the webshop. 3 | All orders since 2020-01-01. 4 | Orders with their line items are in their current state (no history included). 5 | ''' 6 | } 7 | 8 | Table test.orders { 9 | "order_id" "text" [pk,unique,not null,Note: "An internal ID that identifies an order in the online shop."] 10 | "order_timestamp" "timestamp" [not null,Note: "The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful."] 11 | "order_total" "record" [not null,Note: "Total amount the smallest monetary unit (e.g., cents)."] 12 | "customer_id" "text" [null,Note: "Unique identifier for the customer."] 13 | "customer_email_address" "text" [not null,Note: "The email address, as entered by the customer. The email address was not verified."] 14 | "processed_timestamp" "timestamp" [not null,Note: "The timestamp when the record was processed by the data platform."] 15 | Note: "One record per order. Includes cancelled and deleted orders." 16 | } 17 | 18 | 19 | Table orders.line_items { 20 | "lines_item_id" "text" [pk,unique,not null,Note: "Primary key of the lines_item_id table"] 21 | "order_id" "text" [null,Note: "An internal ID that identifies an order in the online shop."] 22 | "sku" "text" [null,Note: "The purchased article number"] 23 | Note: "A single article that is part of an order." 24 | } 25 | 26 | Ref: orders.line_items.order_id > test.orders.order_id -------------------------------------------------------------------------------- /tests/fixtures/excel/shipments-odcs.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/excel/shipments-odcs.xlsx -------------------------------------------------------------------------------- /tests/fixtures/export/datacontract_nested.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | owner: checkout 7 | description: The orders data contract 8 | contact: 9 | email: team-orders@example.com 10 | url: https://wiki.example.com/teams/checkout 11 | terms: 12 | usage: This data contract serves to demo datacontract CLI export. 13 | limitations: Not intended to use in production 14 | billing: free 15 | noticePeriod: P3M 16 | servers: 17 | production: 18 | type: snowflake 19 | account: my-account 20 | database: my-database 21 | schema: my-schema 22 | models: 23 | orders: 24 | description: The orders model 25 | fields: 26 | order_id: 27 | type: varchar 28 | unique: true 29 | required: true 30 | minLength: 8 31 | maxLength: 10 32 | pii: true 33 | classification: sensitive 34 | tags: 35 | - order_id 36 | pattern: ^B[0-9]+$ 37 | order_total: 38 | type: bigint 39 | required: true 40 | description: The order_total field 41 | minimum: 0 42 | maximum: 1000000 43 | order_status: 44 | type: text 45 | required: true 46 | enum: 47 | - pending 48 | - shipped 49 | - delivered 50 | address: 51 | type: record 52 | fields: 53 | street: 54 | type: string 55 | city: 56 | type: string -------------------------------------------------------------------------------- /tests/fixtures/export/datacontract_s3.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | owner: checkout 7 | description: The orders data contract 8 | contact: 9 | email: team-orders@example.com 10 | url: https://wiki.example.com/teams/checkout 11 | terms: 12 | usage: This data contract serves to demo datacontract CLI export. 13 | limitations: Not intended to use in production 14 | billing: free 15 | noticePeriod: P3M 16 | servers: 17 | production: 18 | type: s3 19 | location: s3://datacontract-example-orders-latest/data/{model}/*.json 20 | format: json 21 | delimiter: new_line 22 | dataProductId: orders 23 | models: 24 | orders: 25 | description: The orders model 26 | fields: 27 | order_id: 28 | type: varchar 29 | unique: true 30 | required: true 31 | minLength: 8 32 | maxLength: 10 33 | pii: true 34 | classification: sensitive 35 | tags: 36 | - order_id 37 | pattern: ^B[0-9]+$ 38 | order_total: 39 | type: bigint 40 | required: true 41 | description: The order_total field 42 | minimum: 0 43 | maximum: 1000000 44 | order_status: 45 | type: text 46 | required: true 47 | enum: 48 | - pending 49 | - shipped 50 | - delivered -------------------------------------------------------------------------------- /tests/fixtures/export/rdf/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | owner: checkout 7 | contact: 8 | email: team-orders@example.com 9 | url: https://wiki.example.com/teams/checkout 10 | terms: 11 | usage: This data contract serves to demo datacontract CLI export. 12 | limitations: Not intended to use in production 13 | billing: free 14 | noticePeriod: P3M 15 | models: 16 | orders: 17 | description: The orders model 18 | fields: 19 | order_id: 20 | type: varchar 21 | unique: true 22 | required: true 23 | minLength: 8 24 | maxLength: 10 25 | pii: true 26 | classification: sensitive 27 | tags: 28 | - order_id 29 | pattern: ^B[0-9]+$ 30 | order_total: 31 | type: bigint 32 | required: true 33 | description: The order_total field 34 | minimum: 0 35 | maximum: 1000000 36 | order_status: 37 | type: text 38 | required: true 39 | enum: 40 | - pending 41 | - shipped 42 | - delivered -------------------------------------------------------------------------------- /tests/fixtures/gcs-json-remote/data/README.md: -------------------------------------------------------------------------------- 1 | This folder is uploaded to a GCS bucket. -------------------------------------------------------------------------------- /tests/fixtures/gcs-json-remote/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: inventory-events 3 | info: 4 | title: Inventory Events 5 | version: 0.0.1 6 | owner: my-domain-team 7 | contact: 8 | email: jochen.christ@innoq.com 9 | servers: 10 | gcs-url: 11 | type: gcs 12 | location: gs://datacontract-test-inventory/inventory/*/*/*/*/*.json 13 | delimiter: new_line 14 | format: json 15 | s3-style: 16 | type: s3 17 | endpointUrl: https://storage.googleapis.com 18 | location: s3://datacontract-test-inventory/inventory/*/*/*/*/*.json 19 | delimiter: new_line 20 | format: json 21 | models: 22 | inventory: 23 | type: table 24 | fields: 25 | updated_at: 26 | type: string 27 | available: 28 | type: numeric 29 | location: 30 | type: string 31 | sku: 32 | type: string 33 | -------------------------------------------------------------------------------- /tests/fixtures/glue/datacontract-empty-model.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | servers: 7 | production: 8 | account: '123456789012' 9 | database: test_database 10 | location: s3://test_bucket/testdb 11 | type: glue 12 | models: 13 | table_1: 14 | type: table 15 | -------------------------------------------------------------------------------- /tests/fixtures/great-expectations/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.1 2 | id: my-data-contract-id 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | owner: checkout 7 | description: The orders data contract 8 | contact: 9 | email: team-orders@example.com 10 | url: https://wiki.example.com/teams/checkout 11 | models: 12 | orders: 13 | description: test 14 | fields: 15 | order_id: 16 | type: string 17 | required: true 18 | processed_timestamp: 19 | type: timestamp 20 | required: true 21 | quality: 22 | type: great-expectations 23 | specification: 24 | orders: |- 25 | [ 26 | { 27 | "expectation_type": "expect_table_row_count_to_be_between", 28 | "kwargs": { 29 | "min_value": 10 30 | }, 31 | "meta": { 32 | 33 | } 34 | } 35 | ] -------------------------------------------------------------------------------- /tests/fixtures/great-expectations/datacontract_missing_quality_file.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.1 2 | id: my-data-contract-id 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | owner: checkout 7 | description: The orders data contract 8 | contact: 9 | email: team-orders@example.com 10 | url: https://wiki.example.com/teams/checkout 11 | models: 12 | orders: 13 | description: test 14 | fields: 15 | order_id: 16 | type: string 17 | required: true 18 | processed_timestamp: 19 | type: timestamp 20 | required: true 21 | quality: 22 | type: great-expectations 23 | specification: 24 | orders: 25 | $ref: ./fixtures/great-expectations/missing.json 26 | -------------------------------------------------------------------------------- /tests/fixtures/great-expectations/datacontract_quality_column.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: Orders Unit Test 5 | version: 1.1.1 6 | owner: checkout 7 | description: The orders data contract 8 | contact: 9 | email: team-orders@example.com 10 | url: https://wiki.example.com/teams/checkout 11 | models: 12 | orders: 13 | description: test 14 | fields: 15 | id: 16 | description: Unique identifier for each alert. 17 | type: string 18 | required: true 19 | primaryKey: true 20 | unique: true 21 | type: 22 | description: The type of alert that has fired. 23 | type: string 24 | required: true 25 | enum: [ "A", "B", "C", "D", "E" ] 26 | quality: 27 | - type: custom 28 | engine: great-expectations 29 | description: "Accepted Values for type" 30 | implementation: 31 | expectation_type: expect_column_value_lengths_to_equal 32 | kwargs: 33 | value: 1 34 | meta: 35 | notes: "Ensures that column length is 1." -------------------------------------------------------------------------------- /tests/fixtures/great-expectations/datacontract_quality_file.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.1 2 | id: my-data-contract-id 3 | 4 | info: 5 | title: Orders Unit Test 6 | version: 1.0.0 7 | owner: checkout 8 | description: The orders data contract 9 | contact: 10 | email: team-orders@example.com 11 | url: https://wiki.example.com/teams/checkout 12 | models: 13 | orders: 14 | description: test 15 | fields: 16 | order_id: 17 | type: string 18 | required: true 19 | processed_timestamp: 20 | type: timestamp 21 | required: true 22 | quality: 23 | type: great-expectations 24 | specification: 25 | orders: 26 | $ref: ./fixtures/great-expectations/quality.json 27 | -------------------------------------------------------------------------------- /tests/fixtures/great-expectations/datacontract_quality_yaml.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.1 2 | id: my-data-contract-id 3 | 4 | info: 5 | title: Orders Unit Test 6 | version: 1.0.0 7 | owner: checkout 8 | description: The orders data contract 9 | contact: 10 | email: team-orders@example.com 11 | url: https://wiki.example.com/teams/checkout 12 | models: 13 | orders: 14 | description: test 15 | fields: 16 | order_id: 17 | type: string 18 | required: true 19 | quality: 20 | - type: custom 21 | engine: great-expectations 22 | implementation: 23 | expectation_type: expect_table_row_count_to_be_between 24 | kwargs: 25 | min_value: 10 26 | meta: {} 27 | -------------------------------------------------------------------------------- /tests/fixtures/great-expectations/quality.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "expectation_type": "expect_table_row_count_to_be_between", 4 | "kwargs": { 5 | "min_value": 10 6 | }, 7 | "meta": { 8 | } 9 | } 10 | ] -------------------------------------------------------------------------------- /tests/fixtures/iceberg/invalid_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "fields": "not a list" 3 | } -------------------------------------------------------------------------------- /tests/fixtures/iceberg/simple_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "id": 1, 6 | "name": "foo", 7 | "type": "int", 8 | "required": true 9 | } 10 | ], 11 | "schema-id": 1, 12 | "identifier-field-ids": [ 13 | 1 14 | ] 15 | } -------------------------------------------------------------------------------- /tests/fixtures/import/football-datacontract.yml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | FootballSchema: 8 | description: Schema for football team and person details 9 | type: object 10 | title: FootballSchema 11 | definitions: 12 | person: 13 | name: person 14 | type: object 15 | fields: 16 | first_name: 17 | type: string 18 | required: true 19 | last_name: 20 | type: string 21 | required: true 22 | age: 23 | type: integer 24 | required: true 25 | football_team: 26 | name: football_team 27 | type: object 28 | fields: 29 | name: 30 | type: string 31 | required: true 32 | league: 33 | type: string 34 | required: true 35 | year_founded: 36 | type: integer 37 | required: false 38 | 39 | -------------------------------------------------------------------------------- /tests/fixtures/import/football.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "FootballSchema", 4 | "description": "Schema for football team and person details", 5 | "type": "object", 6 | "definitions": { 7 | "person": { 8 | "type": "object", 9 | "required": ["first_name", "last_name", "age"], 10 | "properties": { 11 | "first_name": {"type": "string"}, 12 | "last_name": {"type": "string"}, 13 | "age": {"type": "integer"} 14 | } 15 | }, 16 | "football_team": { 17 | "type": "object", 18 | "required": ["name", "league"], 19 | "properties": { 20 | "name": {"type": "string"}, 21 | "league": {"type": "string"}, 22 | "year_founded": {"type": "integer"} 23 | } 24 | } 25 | }, 26 | "allOf": [ 27 | {"$ref": "#/definitions/person"}, 28 | {"$ref": "#/definitions/football_team"} 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /tests/fixtures/import/football_deeply_nested_no_required.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "FootballSchema", 4 | "description": "Schema for football team and person details, where team is nested under the person", 5 | "type": "object", 6 | "properties": { 7 | "person": { 8 | "type": "object", 9 | "properties": { 10 | "first_name": { "type": "string" }, 11 | "last_name": { "type": "string" }, 12 | "age": { "type": "integer" }, 13 | "football_team": { 14 | "type": "object", 15 | "properties": { 16 | "name": { "type": "string" }, 17 | "league": { "type": "string" }, 18 | "year_founded": { "type": "integer" } 19 | } 20 | } 21 | }, 22 | "required": ["first_name", "last_name", "age"] 23 | } 24 | }, 25 | "required": ["person"] 26 | } 27 | -------------------------------------------------------------------------------- /tests/fixtures/import/football_deeply_nested_no_required_datacontract.yml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | FootballSchema: 8 | description: Schema for football team and person details, where team is nested under the person 9 | type: object 10 | title: FootballSchema 11 | fields: 12 | person: 13 | type: object 14 | required: true 15 | fields: 16 | first_name: 17 | type: string 18 | required: true 19 | last_name: 20 | type: string 21 | required: true 22 | age: 23 | type: integer 24 | required: true 25 | football_team: 26 | type: object 27 | required: false 28 | fields: 29 | name: 30 | type: string 31 | required: false 32 | league: 33 | type: string 34 | required: false 35 | year_founded: 36 | type: integer 37 | required: false 38 | 39 | -------------------------------------------------------------------------------- /tests/fixtures/import/orders.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "OrderSchema", 4 | "description": "Schema for order details", 5 | "type": "object", 6 | "properties": { 7 | "order_id": { 8 | "type": "string", 9 | "title": "Order ID", 10 | "description": "Unique identifier for the order" 11 | }, 12 | "order_timestamp": { 13 | "type": "string", 14 | "format": "date-time", 15 | "title": "Order Timestamp", 16 | "description": "Timestamp when the order was placed" 17 | }, 18 | "order_total": { 19 | "type": "integer", 20 | "title": "Order Total", 21 | "description": "Total amount of the order" 22 | }, 23 | "line_items": { 24 | "type": "array", 25 | "title": "Line Items", 26 | "items": { 27 | "type" : ["integer", "null"] 28 | } 29 | }, 30 | "customer_id": { 31 | "type": [ 32 | "string", 33 | "null" 34 | ], 35 | "minLength": 10, 36 | "maxLength": 20, 37 | "title": "Customer ID", 38 | "description": "Unique identifier for the customer" 39 | }, 40 | "customer_email_address": { 41 | "type": "string", 42 | "format": "email", 43 | "title": "Customer Email Address", 44 | "description": "Email address of the customer" 45 | }, 46 | "processed_timestamp": { 47 | "type": "string", 48 | "format": "date-time", 49 | "title": "Processed Timestamp", 50 | "description": "Timestamp when the order was processed" 51 | } 52 | }, 53 | "required": [ 54 | "order_id", 55 | "order_timestamp", 56 | "order_total", 57 | "line_items", 58 | "customer_email_address", 59 | "processed_timestamp" 60 | ] 61 | } -------------------------------------------------------------------------------- /tests/fixtures/import/orders_union-types.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "OrderSchema", 4 | "description": "Schema for order details", 5 | "type": "object", 6 | "properties": { 7 | "order_id": { 8 | "type": "string", 9 | "title": "Order ID", 10 | "description": "Unique identifier for the order" 11 | }, 12 | "order_timestamp": { 13 | "type": "string", 14 | "format": "date-time", 15 | "title": "Order Timestamp", 16 | "description": "Timestamp when the order was placed" 17 | }, 18 | "order_total": { 19 | "type": "integer", 20 | "title": "Order Total", 21 | "description": "Total amount of the order" 22 | }, 23 | "line_items": { 24 | "type": "array", 25 | "title": "Line Items", 26 | "items": { 27 | "type" : "integer" 28 | } 29 | }, 30 | "vouchers": { 31 | "type": "array", 32 | "title": "List of used vouchers", 33 | "items": [ 34 | { 35 | "type": "integer" 36 | } 37 | ] 38 | }, 39 | "customer_id": { 40 | "type": [ 41 | "string", 42 | "null" 43 | ], 44 | "minLength": 10, 45 | "maxLength": 20, 46 | "title": "Customer ID", 47 | "description": "Unique identifier for the customer" 48 | }, 49 | "customer_email_address": { 50 | "type": "string", 51 | "format": "email", 52 | "title": "Customer Email Address", 53 | "description": "Email address of the customer" 54 | }, 55 | "processed_timestamp": { 56 | "type": "string", 57 | "format": "date-time", 58 | "title": "Processed Timestamp", 59 | "description": "Timestamp when the order was processed" 60 | } 61 | }, 62 | "required": [ 63 | "order_id", 64 | "order_timestamp", 65 | "order_total", 66 | "customer_email_address", 67 | "processed_timestamp" 68 | ] 69 | } -------------------------------------------------------------------------------- /tests/fixtures/import/orders_union-types_datacontract.yml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | OrderSchema: 8 | description: Schema for order details 9 | type: object 10 | title: OrderSchema 11 | fields: 12 | order_id: 13 | title: Order ID 14 | type: string 15 | required: true 16 | description: Unique identifier for the order 17 | order_timestamp: 18 | title: Order Timestamp 19 | type: string 20 | format: date-time 21 | required: true 22 | description: Timestamp when the order was placed 23 | order_total: 24 | title: Order Total 25 | type: integer 26 | required: true 27 | description: Total amount of the order 28 | line_items: 29 | title: Line Items 30 | type: array 31 | required: false 32 | items: 33 | type: integer 34 | vouchers: 35 | title: List of used vouchers 36 | type: array 37 | required: false 38 | items: 39 | type: integer 40 | customer_id: 41 | title: Customer ID 42 | type: string 43 | required: false 44 | description: Unique identifier for the customer 45 | minLength: 10 46 | maxLength: 20 47 | customer_email_address: 48 | title: Customer Email Address 49 | type: string 50 | format: email 51 | required: true 52 | description: Email address of the customer 53 | processed_timestamp: 54 | title: Processed Timestamp 55 | type: string 56 | format: date-time 57 | required: true 58 | description: Timestamp when the order was processed -------------------------------------------------------------------------------- /tests/fixtures/junit/data/somedata.csv: -------------------------------------------------------------------------------- 1 | 1,abc -------------------------------------------------------------------------------- /tests/fixtures/junit/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: junit_test 3 | info: 4 | title: Sample contract to demonstrate the JUnit output format 5 | version: 1.0.0 6 | owner: my-domain-team 7 | servers: 8 | local: 9 | type: local 10 | path: ./fixtures/junit/data/somedata.csv 11 | format: csv 12 | models: 13 | my_object: 14 | fields: 15 | field_ok: 16 | description: This field is OK 17 | type: integer 18 | required: true 19 | minimum: 0 20 | field_nok: 21 | description: This check should fail 22 | type: string 23 | required: true 24 | minLength: 4 25 | -------------------------------------------------------------------------------- /tests/fixtures/kafka-avro-remote/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders 3 | info: 4 | title: Orders 5 | version: 0.0.1 6 | description: Order messages as generated by Confluent Datagen Source Adapter 7 | servers: 8 | production: 9 | type: kafka 10 | host: pkc-7xoy1.eu-central-1.aws.confluent.cloud:9092 11 | topic: orders.avro.v1 12 | format: avro 13 | models: 14 | orders: 15 | type: table 16 | description: My Model 17 | namespace: com.example.checkout 18 | fields: 19 | ordertime: 20 | type: bigint 21 | description: My Field 22 | orderid: 23 | type: int 24 | itemid: 25 | type: string 26 | orderunits: 27 | type: double 28 | address: 29 | type: object 30 | fields: 31 | city: 32 | type: string 33 | state: 34 | type: string 35 | zipcode: 36 | type: long 37 | quality: 38 | type: SodaCL 39 | specification: 40 | checks for orders: 41 | - row_count >= 5000 42 | 43 | -------------------------------------------------------------------------------- /tests/fixtures/kafka-json-remote/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders 3 | info: 4 | title: Orders 5 | version: 0.0.1 6 | description: Order messages as generated by Confluent Datagen Source Adapter 7 | servers: 8 | production: 9 | type: kafka 10 | host: pkc-7xoy1.eu-central-1.aws.confluent.cloud:9092 11 | topic: datamesh.orders.v1 12 | format: json 13 | models: 14 | orders: 15 | type: table 16 | fields: 17 | ordertime: 18 | type: bigint 19 | required: true 20 | orderid: 21 | type: int 22 | itemid: 23 | type: string 24 | orderunits: 25 | type: double 26 | address: 27 | type: object 28 | fields: 29 | city: 30 | type: string 31 | state: 32 | type: string 33 | zipcode: 34 | type: string 35 | quality: 36 | type: SodaCL 37 | specification: 38 | checks for orders: 39 | - row_count >= 5000 40 | 41 | -------------------------------------------------------------------------------- /tests/fixtures/kafka/data/messages.json: -------------------------------------------------------------------------------- 1 | {"updated_at":"2022-04-20T13:50:34.228811Z","available":17,"location":"18","sku":"9521582929054"} 2 | {"updated_at":"2022-04-20T13:50:34.589142Z","available":16,"location":"18","sku":"9521582929054"} 3 | {"updated_at":"2022-04-20T13:50:34.589501Z","available":15,"location":"18","sku":"9521582929054"} 4 | {"updated_at":"2022-04-20T13:50:34.589771Z","available":14,"location":"18","sku":"9521582929054"} 5 | {"updated_at":"2022-04-20T13:50:34.590008Z","available":13,"location":"18","sku":"9521582929054"} 6 | {"updated_at":"2022-04-20T13:50:34.590261Z","available":12,"location":"18","sku":"9521582929054"} 7 | {"updated_at":"2022-04-20T13:50:34.590559Z","available":11,"location":"18","sku":"9521582929054"} 8 | {"updated_at":"2022-04-20T13:50:34.590831Z","available":12,"location":"18","sku":"9521582929054"} 9 | {"updated_at":"2022-04-20T13:50:34.591076Z","available":11,"location":"18","sku":"9521582929054"} 10 | {"updated_at":"2022-04-20T13:50:34.591308Z","available":10,"location":"18","sku":"9521582929054"} -------------------------------------------------------------------------------- /tests/fixtures/kafka/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: inventory-events 3 | info: 4 | title: Inventory Events 5 | version: 0.0.1 6 | servers: 7 | production: 8 | type: kafka 9 | topic: inventory-events 10 | host: __KAFKA_HOST__ 11 | format: json 12 | dataProductId: inventory 13 | outputPortId: s3 14 | models: 15 | inventory: 16 | type: table 17 | fields: 18 | updated_at: 19 | type: string 20 | available: 21 | type: int 22 | location: 23 | type: string 24 | sku: 25 | type: string 26 | quality: 27 | type: SodaCL 28 | specification: 29 | checks for inventory: 30 | - row_count >= 10 31 | -------------------------------------------------------------------------------- /tests/fixtures/lint/custom_datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 0.9.2 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | my-custom-required-field: hello 7 | description: Custom data contract description. 8 | -------------------------------------------------------------------------------- /tests/fixtures/lint/datacontract_csv_lint_base.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | column_2: 12 | type: string 13 | examples: 14 | - type: csv 15 | model: orders 16 | data: |- 17 | column_1, column_2 18 | value_1, value_2 19 | -------------------------------------------------------------------------------- /tests/fixtures/lint/datacontract_quality_schema.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | models: 7 | orders: 8 | fields: 9 | column_1: 10 | type: string 11 | column_2: 12 | type: string 13 | quality: 14 | type: SodaCL 15 | specification: |- 16 | checks for orders: 17 | - freshness(column_1) < 1d 18 | -------------------------------------------------------------------------------- /tests/fixtures/lint/datacontract_unknown_model.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | examples: 7 | - type: csv 8 | model: orders 9 | data: |- 10 | column_1, column_2 11 | value_1, value_2 12 | -------------------------------------------------------------------------------- /tests/fixtures/lint/invalid_datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | #id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | -------------------------------------------------------------------------------- /tests/fixtures/lint/valid_datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | description: An empty data contract 7 | -------------------------------------------------------------------------------- /tests/fixtures/lint/valid_datacontract_ref.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: urn:datacontract:checkout:orders-latest 3 | info: 4 | title: Orders Latest 5 | version: 1.0.0 6 | description: | 7 | Successful customer orders in the webshop. 8 | All orders since 2020-01-01. 9 | Orders with their line items are in their current state (no history included). 10 | models: 11 | orders: 12 | description: One record per order. Includes cancelled and deleted orders. 13 | type: table 14 | fields: 15 | order_id: 16 | $ref: '#/definitions/order_id' 17 | required: true 18 | unique: true 19 | primaryKey: true 20 | definitions: 21 | order_id: 22 | domain: checkout 23 | name: order_id 24 | title: Order ID 25 | type: text 26 | format: uuid 27 | description: An internal ID that identifies an order in the online shop. 28 | example: 243c25e5-a081-43a9-aeab-6d5d5b6cb5e2 29 | pii: true 30 | classification: restricted 31 | -------------------------------------------------------------------------------- /tests/fixtures/lint/valid_datacontract_references.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: urn:datacontract:checkout:orders-latest 3 | info: 4 | title: Orders Latest 5 | version: 1.0.0 6 | description: Data contract for orders 7 | models: 8 | orders: 9 | description: One record per order. 10 | type: table 11 | fields: 12 | order_id: 13 | type: string 14 | primaryKey: true 15 | description: Unique identifier for the order. 16 | line_items: 17 | description: One record per line item in an order. 18 | type: table 19 | fields: 20 | order_id: 21 | type: string 22 | references: orders.order_id 23 | description: Reference to a field in the orders table. 24 | -------------------------------------------------------------------------------- /tests/fixtures/local-delta/data/line_items/0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/local-delta/data/line_items/0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet -------------------------------------------------------------------------------- /tests/fixtures/local-delta/data/line_items/_delta_log/00000000000000000000.json: -------------------------------------------------------------------------------- 1 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} 2 | {"metaData":{"id":"4df5ab31-bc35-478a-a175-bf27fc05d3a4","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"line_item_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"order_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sku\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1721891765448,"configuration":{}}} 3 | {"add":{"path":"0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet","partitionValues":{},"size":1414,"modificationTime":1721891765441,"dataChange":true,"stats":"{\"numRecords\": 12, \"minValues\": {\"line_item_id\": \"LI-001\", \"order_id\": 1001, \"sku\": \"SKU-12345\"}, \"maxValues\": {\"line_item_id\": \"LI-012\", \"order_id\": 1008, \"sku\": \"SKU-12356\"}, \"nullCount\": {\"line_item_id\": 0, \"order_id\": 0, \"sku\": 0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}} 4 | {"commitInfo":{"timestamp":1721891765448,"operation":"CREATE TABLE","operationParameters":{"mode":"ErrorIfExists","metadata":"{\"configuration\":{},\"createdTime\":1721891765448,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"4df5ab31-bc35-478a-a175-bf27fc05d3a4\",\"name\":null,\"partitionColumns\":[],\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"line_item_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"order_id\\\",\\\"type\\\":\\\"long\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"sku\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\"}","location":"file:///C:/Users/harsh/OneDrive/Desktop/New%2520folder/data/line_items","protocol":"{\"minReaderVersion\":1,\"minWriterVersion\":2}"},"clientVersion":"delta-rs.0.18.1"}} -------------------------------------------------------------------------------- /tests/fixtures/local-delta/data/orders/0-5014bd96-6666-482e-bec9-d02a43a78cfb-0.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/local-delta/data/orders/0-5014bd96-6666-482e-bec9-d02a43a78cfb-0.parquet -------------------------------------------------------------------------------- /tests/fixtures/local-delta/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/local-delta/data/orders 10 | format: delta 11 | dataProductId: orders 12 | models: 13 | orders: 14 | fields: 15 | order_id: 16 | type: varchar 17 | unique: true 18 | required: true 19 | order_timestamp: 20 | required: true 21 | order_total: 22 | type: bigint 23 | required: true 24 | -------------------------------------------------------------------------------- /tests/fixtures/local-json-complex/data/sts_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "array_test_string": ["test1", "test2"], 3 | "array_test_object": [ 4 | { 5 | "key": "key1", 6 | "value": "value1" 7 | }, 8 | { 9 | "key": "key2", 10 | "value": "value2" 11 | } 12 | ], 13 | "id": "11111111", 14 | "sts_data": { 15 | "connection_test": "SUCCESS", 16 | "key_list": { 17 | "0": { 18 | "key": "12345678" 19 | }, 20 | "1": { 21 | "key": "23456789" 22 | } 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /tests/fixtures/local-json/data/nested_types.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "tags": [ 5 | { 6 | "foo": "bar", 7 | "arr": [ 1, 2, 3 ] 8 | }, 9 | { 10 | "foo": "lap", 11 | "arr": [ 4 ] 12 | } 13 | ], 14 | "name": { 15 | "first": "John", 16 | "last": "Doe" 17 | } 18 | }, 19 | { 20 | "id": 2, 21 | "tags": [ 22 | { 23 | "foo": "zap", 24 | "arr": [ ] 25 | } 26 | ] 27 | } 28 | ] -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/array.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/array.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/bigint.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/bigint.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/blob.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/blob.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/boolean.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/boolean.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/combined.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/combined.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/combined_no_time.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/combined_no_time.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/date.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/date.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/decimal.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/decimal.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/double.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/double.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/float.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/float.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/integer.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/integer.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/list.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/list.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/map.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/map.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/string.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/string.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/struct.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/struct.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/time.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/time.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/timestamp.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/timestamp.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/data/timestamp_ntz.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/timestamp_ntz.parquet -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_array.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: array-unit-test 3 | info: 4 | title: Array Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/array.parquet 10 | format: parquet 11 | models: 12 | example: 13 | description: Test data with an array 14 | type: table 15 | fields: 16 | array_field: 17 | type: array 18 | items: 19 | type: integer 20 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_bigint.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: bigint-unit-test 3 | info: 4 | title: BigInt Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/bigint.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | bigint_field: 15 | type: bigint 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_binary.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: blob-unit-test 3 | info: 4 | title: Blob Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/blob.parquet 10 | format: parquet 11 | models: 12 | blob: 13 | description: Test data with binary field 14 | type: table 15 | fields: 16 | blob_field: 17 | type: bytes 18 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_boolean.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: boolean-unit-test 3 | info: 4 | title: Boolean Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/boolean.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | boolean_field: 15 | type: boolean 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_date.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: date-unit-test 3 | info: 4 | title: Date Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/date.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | date_field: 15 | type: date 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_decimal.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/decimal.parquet 10 | format: parquet 11 | dataProductId: orders 12 | outputPortId: parquet 13 | models: 14 | orders: 15 | fields: 16 | decimal_field: 17 | type: decimal 18 | precision: 10 19 | scale: 2 20 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_double.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: double-unit-test 3 | info: 4 | title: Double Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/double.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | double_field: 15 | type: double 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_float.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: float-unit-test 3 | info: 4 | title: Float Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/float.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | float_field: 15 | type: float 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_integer.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: integer-unit-test 3 | info: 4 | title: Integer Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/integer.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | integer_field: 15 | type: integer 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_invalid.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: orders-unit-test 3 | info: 4 | title: Orders Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/orders.parquet 10 | format: parquet 11 | dataProductId: orders 12 | outputPortId: parquet 13 | models: 14 | orders: 15 | fields: 16 | order_id: 17 | type: date # this is not true 18 | unique: true 19 | required: true 20 | some_extra_field: #does not exist 21 | type: long 22 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_map.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: map-unit-test 3 | info: 4 | title: Map Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/map.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | map_field: 15 | type: map 16 | keys: 17 | type: string 18 | values: 19 | type: string 20 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_string.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: string-unit-test 3 | info: 4 | title: String Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/string.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | string_field: 15 | type: varchar 16 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_struct.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: struct-unit-test 3 | info: 4 | title: Struct Unit Test 5 | version: 1.0.0 6 | servers: 7 | production: 8 | type: local 9 | path: ./fixtures/parquet/data/struct.parquet 10 | format: parquet 11 | models: 12 | example: 13 | fields: 14 | struct_field: 15 | type: struct 16 | fields: 17 | a: 18 | type: integer 19 | b: 20 | type: varchar 21 | 22 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_timestamp.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: iceberg-ingestion 3 | info: 4 | title: ingestion to s3/iceberg 5 | version: 0.0.1 6 | description: The ingestion of parquet files from s3 into iceberg table format 7 | servers: 8 | test: 9 | type: local 10 | path: "./fixtures/parquet/data/timestamp.parquet" 11 | format: parquet 12 | models: 13 | example: 14 | type: table 15 | fields: 16 | timestamp_field: 17 | type: timestamp_tz 18 | description: CREATEDDATE 19 | required: true 20 | -------------------------------------------------------------------------------- /tests/fixtures/parquet/datacontract_timestamp_ntz.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: parquet-timestamp-ntz-test 3 | info: 4 | title: Parquet Timestamp w/o timezone test 5 | version: 0.0.1 6 | description: Parquet Timestamp w/o timezone test 7 | servers: 8 | test: 9 | type: local 10 | path: "./fixtures/parquet/data/timestamp_ntz.parquet" 11 | format: parquet 12 | models: 13 | example: 14 | type: table 15 | fields: 16 | timestamp: 17 | type: timestamp_ntz 18 | description: CREATEDDATE 19 | required: true 20 | -------------------------------------------------------------------------------- /tests/fixtures/postgres-export/data/data.sql: -------------------------------------------------------------------------------- 1 | -- Create the table 2 | CREATE TABLE public.my_table ( 3 | field_one VARCHAR(10) primary key, 4 | field_two INT not null, 5 | field_three TIMESTAMP 6 | ); 7 | 8 | -- Insert the data 9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES 10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'), 11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'), 12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'), 13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'), 14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'), 15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'), 16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'), 17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'), 18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'), 19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48'); 20 | -------------------------------------------------------------------------------- /tests/fixtures/postgres-export/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: postgres 3 | info: 4 | title: postgres 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | production: 9 | type: postgres 10 | host: localhost 11 | port: 4567 12 | database: test 13 | schema: public 14 | staging: 15 | type: postgres 16 | host: localhost 17 | port: 4567 18 | database: test 19 | schema: public 20 | models: 21 | my_table: 22 | type: table 23 | fields: 24 | field_one: 25 | type: varchar 26 | required: true 27 | unique: true 28 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 29 | field_two: 30 | type: integer 31 | minimum: 10 32 | field_three: 33 | type: timestamp 34 | -------------------------------------------------------------------------------- /tests/fixtures/postgres/data/data.sql: -------------------------------------------------------------------------------- 1 | -- Create the table 2 | CREATE TABLE public.my_table ( 3 | field_one VARCHAR(10) primary key, 4 | field_two INT not null, 5 | field_three TIMESTAMPTZ 6 | ); 7 | 8 | -- Insert the data 9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES 10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'), 11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'), 12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'), 13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'), 14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'), 15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'), 16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'), 17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'), 18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'), 19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48'); 20 | -------------------------------------------------------------------------------- /tests/fixtures/postgres/data/data_case_sensitive.sql: -------------------------------------------------------------------------------- 1 | -- Create the table 2 | CREATE TABLE public."My_Table2" ( 3 | "Field_one" VARCHAR(10) primary key, 4 | "Field_two" INT not null, 5 | "Field_three" TIMESTAMPTZ 6 | ); 7 | 8 | -- Insert the data 9 | INSERT INTO public."My_Table2" ("Field_one", "Field_two", "Field_three") VALUES 10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'), 11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'), 12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'), 13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'), 14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'), 15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'), 16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'), 17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'), 18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'), 19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48'); 20 | -------------------------------------------------------------------------------- /tests/fixtures/postgres/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: postgres 3 | info: 4 | title: postgres 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/postgres: 9 | type: postgres 10 | host: localhost 11 | port: 5432 12 | database: test 13 | schema: public 14 | models: 15 | my_table_old_name: 16 | type: table 17 | fields: 18 | field_one: 19 | type: varchar 20 | required: true 21 | unique: true 22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 23 | field_two: 24 | type: integer 25 | minimum: 10 26 | field_three: 27 | type: timestamp 28 | config: 29 | postgresTable: my_table -------------------------------------------------------------------------------- /tests/fixtures/postgres/datacontract_case_sensitive.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: postgres 3 | info: 4 | title: postgres 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/postgres: 9 | type: postgres 10 | host: localhost 11 | port: 5432 12 | database: test 13 | schema: public 14 | models: 15 | My_Table2: 16 | type: table 17 | fields: 18 | Field_one: 19 | type: varchar 20 | required: true 21 | unique: true 22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 23 | Field_two: 24 | type: integer 25 | minimum: 10 26 | Field_three: 27 | type: timestamp 28 | -------------------------------------------------------------------------------- /tests/fixtures/postgres/datacontract_servicelevels.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: postgres 3 | info: 4 | title: postgres 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/postgres: 9 | type: postgres 10 | host: localhost 11 | port: 5432 12 | database: test 13 | schema: public 14 | models: 15 | my_table: 16 | type: table 17 | fields: 18 | field_one: 19 | type: varchar 20 | required: true 21 | unique: true 22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 23 | field_two: 24 | type: integer 25 | minimum: 10 26 | field_three: 27 | type: timestamp 28 | servicelevels: 29 | freshness: 30 | description: This is expected to fail 31 | threshold: PT1H 32 | timestampField: my_table.field_three 33 | -------------------------------------------------------------------------------- /tests/fixtures/postgres/odcs.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v3.0.0 2 | kind: DataContract 3 | id: postgres 4 | name: postgres 5 | version: 0.0.1 6 | domain: my-domain-team 7 | status: null 8 | schema: 9 | - name: my_table 10 | physicalName: my_table 11 | logicalType: object 12 | physicalType: table 13 | properties: 14 | - name: field_one 15 | logicalType: string 16 | physicalType: varchar 17 | isNullable: false 18 | isUnique: true 19 | logicalTypeOptions: 20 | pattern: '[A-Za-z]{2}-\d{3}-[A-Za-z]{2}$' 21 | - name: field_two 22 | logicalType: integer 23 | physicalType: integer 24 | isNullable: true 25 | isUnique: false 26 | logicalTypeOptions: 27 | minimum: 10 28 | quality: 29 | - type: sql 30 | description: Less than 5% of null values 31 | query: | 32 | SELECT (COUNT(*) FILTER (WHERE field_two IS NULL) * 100.0 / COUNT(*)) AS null_percentage 33 | FROM my_table 34 | mustBeLessThan: 5 35 | - name: field_three 36 | logicalType: date 37 | physicalType: timestamptz 38 | isNullable: true 39 | isUnique: false 40 | quality: 41 | - type: sql 42 | query: | 43 | SELECT COUNT(*) FROM my_table WHERE field_two IS NOT NULL 44 | mustBeLessThan: 3600 45 | servers: 46 | - server: postgres 47 | type: postgres 48 | database: test 49 | schema: public 50 | host: localhost 51 | port: 5432 -------------------------------------------------------------------------------- /tests/fixtures/protobuf/data/sample_data.proto3.data: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package example; 4 | 5 | // Enum for product category 6 | enum Category { 7 | CATEGORY_UNKNOWN = 0; 8 | CATEGORY_ELECTRONICS = 1; 9 | CATEGORY_CLOTHING = 2; 10 | CATEGORY_HOME_APPLIANCES = 3; 11 | } 12 | 13 | // Message representing product attributes 14 | message Product { 15 | string id = 1; 16 | string name = 2; 17 | double price = 3; 18 | Category category = 4; 19 | repeated string tags = 5; 20 | repeated Review reviews = 6; 21 | } 22 | 23 | // Message representing a review 24 | message Review { 25 | string user = 1; 26 | int32 rating = 2; 27 | string comment = 3; 28 | } 29 | -------------------------------------------------------------------------------- /tests/fixtures/quality/data/data.invalid.sql: -------------------------------------------------------------------------------- 1 | -- Create the table 2 | CREATE TABLE public.my_table ( 3 | field_one VARCHAR(10) primary key, 4 | field_two INT not null, 5 | field_three TIMESTAMPTZ 6 | ); 7 | 8 | -- Insert the data 9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES 10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'), 11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'), 12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'), 13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'), 14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'), 15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'), 16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'), 17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'), 18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'), 19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48'); 20 | -------------------------------------------------------------------------------- /tests/fixtures/quality/data/data.valid.sql: -------------------------------------------------------------------------------- 1 | -- Create the table 2 | CREATE TABLE public.my_table ( 3 | field_one VARCHAR(10) primary key, 4 | field_two INT, 5 | field_three TIMESTAMPTZ 6 | ); 7 | 8 | -- Insert the data 9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES 10 | ('CX-263-DU', 5000, '2023-01-01 00:00:00'), 11 | ('IK-894-MN', 4700, '2023-01-01 00:59:00'), 12 | ('ER-399-JY', 2200, '2023-01-01 01:58:00'), 13 | ('MT-939-FH', 6300, '2023-01-01 02:00:00'), 14 | ('LV-849-MI', 3300, '2023-01-01 02:30:00'), 15 | ('VS-079-OH', 8500, '2023-01-01 03:00:00'), 16 | ('DN-297-XY', 7900, '2023-01-01 03:30:00'), 17 | ('ZE-172-FP', 1400, '2023-01-01 04:00:00'), 18 | ('ID-840-EG', 8900, '2023-01-01 04:50:00'), 19 | ('FK-230-KZ', 10, '2023-01-01 04:50:00'); 20 | -------------------------------------------------------------------------------- /tests/fixtures/quality/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: postgres 3 | info: 4 | title: postgres 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/postgres: 9 | type: postgres 10 | host: localhost 11 | port: 5432 12 | database: test 13 | schema: public 14 | models: 15 | my_table: 16 | type: table 17 | fields: 18 | field_one: 19 | type: varchar 20 | required: true 21 | unique: true 22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 23 | field_two: 24 | type: integer 25 | minimum: 10 26 | quality: 27 | # field level quality checks 28 | - type: sql 29 | description: 95% of all order total values are expected to be between 10 and 499 EUR. 30 | dialect: postgres 31 | query: SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY field_two) AS percentile_95 FROM my_table 32 | mustBeBetween: [ 1000, 49900 ] 33 | field_three: 34 | type: timestamp 35 | # model level quality checks 36 | quality: 37 | - type: sql 38 | description: The maximum duration between two orders should be less that 3600 seconds 39 | dialect: postgres 40 | query: | 41 | SELECT MAX(duration) AS max_duration 42 | FROM ( 43 | SELECT EXTRACT(EPOCH FROM (field_three - LAG(field_three) OVER (ORDER BY field_three))) AS duration 44 | FROM my_table 45 | ) subquery; 46 | mustBeLessThan: 3600 47 | - type: sql 48 | description: Row Count 49 | query: | 50 | SELECT count(*) as row_count 51 | FROM {model} 52 | mustBeGreaterThan: 5 53 | -------------------------------------------------------------------------------- /tests/fixtures/s3-csv/data/sample_data.csv: -------------------------------------------------------------------------------- 1 | field_one,field_two,field_three 2 | CX-263-DU,50,2023-06-16 13:12:56 3 | IK-894-MN,47,2023-10-08 22:40:57 4 | ER-399-JY,22,2023-05-16 01:08:22 5 | MT-939-FH,63,2023-03-15 05:15:21 6 | LV-849-MI,33,2023-09-08 20:08:43 7 | VS-079-OH,85,2023-04-15 00:50:32 8 | DN-297-XY,79,2023-11-08 12:55:42 9 | ZE-172-FP,14,2023-12-03 18:38:38 10 | ID-840-EG,89,2023-10-02 17:17:58 11 | FK-230-KZ,64,2023-11-27 15:21:48 12 | -------------------------------------------------------------------------------- /tests/fixtures/s3-csv/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: s3-csv 3 | info: 4 | title: s30-csv 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/s3: 9 | type: s3 10 | endpointUrl: __S3_ENDPOINT_URL__ 11 | location: s3://test-bucket/fixtures/s3-csv/data/sample_data.csv 12 | format: csv 13 | dataProductId: my-dataproduct 14 | outputPortId: s3 15 | models: 16 | my_table: 17 | type: table 18 | fields: 19 | field_one: 20 | type: varchar 21 | required: true 22 | unique: true 23 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 24 | field_two: 25 | type: bigint 26 | minimum: 10 27 | field_three: 28 | type: timestamp 29 | -------------------------------------------------------------------------------- /tests/fixtures/s3-delta/data/orders.delta/0-66aaa7ef-36e3-4985-9359-72874e273705-0.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/s3-delta/data/orders.delta/0-66aaa7ef-36e3-4985-9359-72874e273705-0.parquet -------------------------------------------------------------------------------- /tests/fixtures/s3-delta/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: s3-delta-orders 3 | info: 4 | title: S3 Delta Table Test 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | orders/s3: 9 | type: s3 10 | endpointUrl: __S3_ENDPOINT_URL__ 11 | location: s3://test-bucket/fixtures/s3-delta/data/orders.delta 12 | format: delta 13 | dataProductId: orders 14 | outputPortId: s3 15 | models: 16 | orders: 17 | type: table 18 | fields: 19 | order_id: 20 | type: varchar 21 | unique: true 22 | required: true 23 | -------------------------------------------------------------------------------- /tests/fixtures/s3-delta/helper/create_delta_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | from deltalake.writer import write_deltalake 5 | 6 | # Ensure the required directory exists 7 | output_dir = "../data" 8 | if not os.path.exists(output_dir): 9 | os.makedirs(output_dir) 10 | 11 | # Sample data for Orders table 12 | orders_data = { 13 | "order_id": ["1001", "1002", "1003", "1004", "1005", "1006", "1007", "1008"], 14 | "order_timestamp": [ 15 | "2024-01-01T10:00:00.000Z", 16 | "2024-01-01T11:30:00.000Z", 17 | "2024-01-01T12:45:00.000Z", 18 | "2024-01-02T08:20:00.000Z", 19 | "2024-01-02T09:15:00.000Z", 20 | "2024-01-02T10:05:00.000Z", 21 | "2024-01-02T10:45:00.000Z", 22 | "2024-01-02T11:30:00.000Z", 23 | ], 24 | "order_total": [5000, 7500, 3000, 2000, 6500, 12000, 4500, 8000], 25 | } 26 | 27 | orders_df = pd.DataFrame(orders_data) 28 | orders_df["order_timestamp"] = pd.to_datetime(orders_df["order_timestamp"], format="%Y-%m-%dT%H:%M:%S.%fZ") 29 | 30 | # Write to Delta table files 31 | write_deltalake(os.path.join(output_dir, "orders.delta"), orders_df) 32 | -------------------------------------------------------------------------------- /tests/fixtures/s3-json-complex/data/feed.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "specversion" : "1.0", 3 | "type" : "org.http-feeds.example.inventory", 4 | "source" : "https://example.http-feeds.org/inventory", 5 | "id" : "1c6b8c6e-d8d0-4a91-b51c-1f56bd04c758", 6 | "time" : "2021-01-01T00:00:01Z", 7 | "subject" : "9521234567899", 8 | "data" : { 9 | "sku": "9521234567899", 10 | "updated": "2022-01-01T00:00:01Z", 11 | "quantity": 5 12 | } 13 | },{ 14 | "specversion" : "1.0", 15 | "type" : "org.http-feeds.example.inventory", 16 | "source" : "https://example.http-feeds.org/inventory", 17 | "id" : "292042fb-ab04-4653-af90-19a24032bffe", 18 | "time" : "2021-12-01T00:00:15Z", 19 | "subject" : "9521234512349", 20 | "data" : { 21 | "sku": "9521234512349", 22 | "updated": "2022-01-01T00:00:12Z", 23 | "quantity": 0 24 | } 25 | },{ 26 | "specversion" : "1.0", 27 | "type" : "org.http-feeds.example.inventory", 28 | "source" : "https://example.http-feeds.org/inventory", 29 | "id" : "fa3e2a22-398c-4d02-ad08-9415e43178e6", 30 | "time" : "2021-01-01T00:00:22Z", 31 | "subject" : "9521234567899", 32 | "data" : { 33 | "sku": "9521234567899", 34 | "updated": "2022-01-01T00:00:21Z", 35 | "quantity": 4 36 | } 37 | }] -------------------------------------------------------------------------------- /tests/fixtures/s3-json-complex/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: inventory-events 3 | info: 4 | title: Inventory Events Feed 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | feed/s3: 9 | type: s3 10 | endpointUrl: __S3_ENDPOINT_URL__ 11 | location: s3://feed-bucket/fixtures/s3-json-complex/data/*.json 12 | delimiter: array 13 | format: json 14 | dataProductId: feed 15 | outputPortId: s3 16 | models: 17 | inventory: 18 | type: object 19 | fields: 20 | specversion: 21 | type: string 22 | const: "1.0" 23 | required: true 24 | type: 25 | type: string 26 | const: "org.http-feeds.example.inventory" 27 | required: true 28 | source: 29 | type: string 30 | format: uri 31 | const: "https://example.http-feeds.org/inventory" 32 | required: true 33 | id: 34 | type: string 35 | required: true 36 | time: 37 | type: string 38 | format: date-time 39 | required: true 40 | subject: 41 | type: string 42 | data: 43 | type: object 44 | fields: 45 | sku: 46 | type: string 47 | required: true 48 | updated: 49 | type: string 50 | format: date-time 51 | required: true 52 | quantity: 53 | type: integer 54 | required: true 55 | -------------------------------------------------------------------------------- /tests/fixtures/s3-json/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: inventory-events 3 | info: 4 | title: Inventory Events 5 | version: 0.0.1 6 | owner: my-domain-team 7 | contact: 8 | email: jochen.christ@innoq.com 9 | servers: 10 | inventory/s3: 11 | type: s3 12 | endpointUrl: __S3_ENDPOINT_URL__ 13 | # location: s3://test-bucket/topics/inventory/*/*/*/*/*.json 14 | location: s3://test-bucket/fixtures/s3-json/data/{model}/year=2022/month=04/day=20/hour=00/inventory+0+0001327496.json 15 | delimiter: new_line 16 | format: json 17 | dataProductId: inventory 18 | outputPortId: s3 19 | models: 20 | inventory: 21 | type: table 22 | fields: 23 | updated_at: 24 | type: string 25 | available: 26 | type: numeric 27 | location: 28 | type: string 29 | sku: 30 | type: string 31 | -------------------------------------------------------------------------------- /tests/fixtures/sodacl/checks.yaml: -------------------------------------------------------------------------------- 1 | checks for orders: 2 | - freshness(processed_timestamp) < 1d 3 | - row_count > 10 4 | checks for line_items: 5 | - row_count > 10: 6 | name: Have at lease 10 line items -------------------------------------------------------------------------------- /tests/fixtures/sodacl/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: urn:datacontract:checkout:orders-latest 3 | info: 4 | title: Orders Latest 5 | version: 1.0.0 6 | description: | 7 | Successful customer orders in the webshop. 8 | All orders since 2020-01-01. 9 | Orders with their line items are in their current state (no history included). 10 | owner: Checkout Team 11 | contact: 12 | name: John Doe (Data Product Owner) 13 | url: https://teams.microsoft.com/l/channel/example/checkout 14 | models: 15 | orders: 16 | description: test 17 | fields: 18 | order_id: 19 | type: string 20 | required: true 21 | processed_timestamp: 22 | type: timestamp 23 | required: true 24 | quality: 25 | type: SodaCL 26 | specification: 27 | $ref: "./fixtures/sodacl/checks.yaml" -------------------------------------------------------------------------------- /tests/fixtures/spark/import/users_datacontract_desc.yml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | servers: 7 | local: 8 | type: dataframe 9 | models: 10 | users: 11 | description: description 12 | fields: 13 | id: 14 | type: string 15 | required: false 16 | name: 17 | type: string 18 | required: false 19 | address: 20 | type: struct 21 | required: false 22 | fields: 23 | number: 24 | type: integer 25 | required: false 26 | street: 27 | type: string 28 | required: false 29 | city: 30 | type: string 31 | required: false 32 | tags: 33 | type: array 34 | required: false 35 | items: 36 | type: string 37 | required: false 38 | metadata: 39 | type: map 40 | required: false 41 | keys: 42 | type: string 43 | required: true 44 | values: 45 | type: struct 46 | required: false 47 | fields: 48 | value: 49 | type: string 50 | required: false 51 | type: 52 | type: string 53 | required: false 54 | timestamp: 55 | type: long 56 | required: false 57 | source: 58 | type: string 59 | required: false -------------------------------------------------------------------------------- /tests/fixtures/spark/import/users_datacontract_no_desc.yml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: my-data-contract-id 3 | info: 4 | title: My Data Contract 5 | version: 0.0.1 6 | servers: 7 | local: 8 | type: dataframe 9 | models: 10 | users: 11 | fields: 12 | id: 13 | type: string 14 | required: false 15 | name: 16 | type: string 17 | required: false 18 | address: 19 | type: struct 20 | required: false 21 | fields: 22 | number: 23 | type: integer 24 | required: false 25 | street: 26 | type: string 27 | required: false 28 | city: 29 | type: string 30 | required: false 31 | tags: 32 | type: array 33 | required: false 34 | items: 35 | type: string 36 | required: false 37 | metadata: 38 | type: map 39 | required: false 40 | keys: 41 | type: string 42 | required: true 43 | values: 44 | type: struct 45 | required: false 46 | fields: 47 | value: 48 | type: string 49 | required: false 50 | type: 51 | type: string 52 | required: false 53 | timestamp: 54 | type: long 55 | required: false 56 | source: 57 | type: string 58 | required: false -------------------------------------------------------------------------------- /tests/fixtures/spec/datacontract_aliases.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: "123" 3 | info: 4 | title: "Test" 5 | version: 1.0.0 6 | owner: my-domain-team 7 | models: 8 | sample_model: 9 | description: Sample Model 10 | type: table 11 | fields: 12 | id: 13 | type: text 14 | title: ID 15 | description: A unique identifier 16 | $ref: '#/definitions/test' 17 | definitions: 18 | test: 19 | description: Test definition reference 20 | name: refdef 21 | type: text 22 | -------------------------------------------------------------------------------- /tests/fixtures/spec/datacontract_fields_field.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: "123" 3 | info: 4 | title: "Test" 5 | version: 1.0.0 6 | owner: my-domain-team 7 | models: 8 | sample_model: 9 | description: Sample Model 10 | type: table 11 | fields: 12 | id: 13 | type: text 14 | title: ID 15 | description: A unique identifier 16 | $ref: '#/definitions/def' 17 | definitions: 18 | def: 19 | description: Test definition reference 20 | type: object 21 | name: refdef 22 | fields: 23 | id: 24 | type: text 25 | title: MyField 26 | $ref: '#/definitions/other' 27 | other: 28 | description: Another Def 29 | type: string 30 | name: fieldname 31 | -------------------------------------------------------------------------------- /tests/fixtures/sqlserver/data/data.sql: -------------------------------------------------------------------------------- 1 | -- Create the table 2 | CREATE TABLE [dbo].[my_table] ( 3 | field_one VARCHAR(10) PRIMARY KEY, 4 | field_two INT NOT NULL, 5 | field_three DATETIME2 6 | ); 7 | 8 | -- Insert the data 9 | INSERT INTO [dbo].[my_table] (field_one, field_two, field_three) VALUES 10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'), 11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'), 12 | ('ER-399-JY', 22, '2023-05-16 01:08:22 '), 13 | ('MT-939-FH', 63, '2023-03-15 05:15:21 '), 14 | ('LV-849-MI', 33, '2023-09-08 20:08:43 '), 15 | ('VS-079-OH', 85, '2023-04-15 00:50:32 '), 16 | ('DN-297-XY', 79, '2023-11-08 12:55:42 '), 17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38 '), 18 | ('ID-840-EG', 89, '2023-10-02 17:17:58 '), 19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48 '); 20 | -------------------------------------------------------------------------------- /tests/fixtures/sqlserver/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: sqlserver 3 | info: 4 | title: sqlserver 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/sqlserver: 9 | type: sqlserver 10 | host: localhost 11 | port: __PORT__ 12 | database: tempdb 13 | schema: dbo 14 | driver: ODBC Driver 18 for SQL Server 15 | models: 16 | my_table: 17 | type: table 18 | fields: 19 | field_one: 20 | type: varchar 21 | required: true 22 | unique: true 23 | field_two: 24 | type: int 25 | minimum: 10 26 | field_three: 27 | type: timestamp 28 | config: 29 | sqlserverType: DATETIME2 30 | -------------------------------------------------------------------------------- /tests/fixtures/trino/data/data.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO my_schema.my_table (field_one, field_two, field_three) 2 | VALUES ('CX-263-DU', 50, TIMESTAMP '2023-06-16 13:12:56'), 3 | ('IK-894-MN', 47, TIMESTAMP '2023-10-08 22:40:57'), 4 | ('ER-399-JY', 22, TIMESTAMP '2023-05-16 01:08:22'), 5 | ('MT-939-FH', 63, TIMESTAMP '2023-03-15 05:15:21'), 6 | ('LV-849-MI', 33, TIMESTAMP '2023-09-08 20:08:43'), 7 | ('VS-079-OH', 85, TIMESTAMP '2023-04-15 00:50:32'), 8 | ('DN-297-XY', 79, TIMESTAMP '2023-11-08 12:55:42'), 9 | ('ZE-172-FP', 14, TIMESTAMP '2023-12-03 18:38:38'), 10 | ('ID-840-EG', 89, TIMESTAMP '2023-10-02 17:17:58'), 11 | ('FK-230-KZ', 64, TIMESTAMP '2023-11-27 15:21:48') 12 | -------------------------------------------------------------------------------- /tests/fixtures/trino/data/table.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE my_schema.my_table 2 | ( 3 | field_one VARCHAR, 4 | field_two INT, 5 | field_three TIMESTAMP WITH TIME ZONE 6 | ) -------------------------------------------------------------------------------- /tests/fixtures/trino/datacontract.yaml: -------------------------------------------------------------------------------- 1 | dataContractSpecification: 1.1.0 2 | id: trino 3 | info: 4 | title: trino 5 | version: 0.0.1 6 | owner: my-domain-team 7 | servers: 8 | my-dataproduct/trino: 9 | type: trino 10 | host: http://localhost 11 | port: __PORT__ 12 | catalog: memory 13 | schema: my_schema 14 | models: 15 | my_table: 16 | type: table 17 | fields: 18 | field_one: 19 | type: varchar 20 | required: true 21 | unique: true 22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$" 23 | field_two: 24 | type: integer 25 | minimum: 10 26 | field_three: 27 | type: timestamp 28 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | from datacontract.api import app 4 | 5 | client = TestClient(app) 6 | 7 | 8 | def test_lint(): 9 | with open("fixtures/lint/valid_datacontract.yaml", "r") as f: 10 | data_contract_str = f.read() 11 | 12 | response = client.post( 13 | url="/lint", 14 | json=data_contract_str, 15 | ) 16 | assert response.status_code == 200 17 | print(response.json()) 18 | assert response.json()["result"] == "passed" 19 | assert len(response.json()["checks"]) == 6 20 | assert all([check["result"] == "passed" for check in response.json()["checks"]]) 21 | 22 | 23 | def test_export_jsonschema(): 24 | with open("fixtures/local-json/datacontract.yaml", "r") as f: 25 | data_contract_str = f.read() 26 | response = client.post( 27 | url="/export?format=jsonschema", 28 | json=data_contract_str, 29 | ) 30 | assert response.status_code == 200 31 | print(response.text) 32 | with open("fixtures/local-json/datacontract.json") as file: 33 | expected_json_schema = file.read() 34 | print(expected_json_schema) 35 | assert response.text == expected_json_schema 36 | -------------------------------------------------------------------------------- /tests/test_catalog.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import PosixPath 3 | 4 | from typer.testing import CliRunner 5 | 6 | from datacontract.cli import app 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | 11 | def test_cli(tmp_path: PosixPath): 12 | runner = CliRunner() 13 | result = runner.invoke(app, ["catalog", "--files", "fixtures/catalog/*.yaml", "--output", tmp_path]) 14 | assert result.exit_code == 0 15 | assert os.path.exists(tmp_path / "index.html") 16 | assert os.path.exists(tmp_path / "fixtures/catalog/datacontract-1.html") 17 | assert os.path.exists(tmp_path / "fixtures/catalog/datacontract-2.html") 18 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | 5 | runner = CliRunner() 6 | 7 | # logging.basicConfig(level=logging.DEBUG, force=True) 8 | 9 | 10 | def test_test_help(): 11 | result = runner.invoke(app, ["test", "--help"]) 12 | assert result.exit_code == 0 13 | 14 | 15 | def test_file_does_not_exist(): 16 | result = runner.invoke(app, ["test", "unknown.yaml"]) 17 | assert result.exit_code == 1 18 | assert "The file 'unknown.yaml' does not \nexist." in result.stdout 19 | -------------------------------------------------------------------------------- /tests/test_data_contract_checks.py: -------------------------------------------------------------------------------- 1 | from datacontract.engines.data_contract_checks import period_to_seconds 2 | 3 | 4 | def test_period_to_seconds(): 5 | assert period_to_seconds("P1Y") == 31536000 6 | assert period_to_seconds("P1D") == 86400 7 | assert period_to_seconds("PT24H") == 86400 8 | assert period_to_seconds("1d") == 86400 9 | assert period_to_seconds("24h") == 86400 10 | assert period_to_seconds("60m") == 3600 11 | -------------------------------------------------------------------------------- /tests/test_data_contract_specification.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | import pytest 4 | 5 | from datacontract.model.data_contract_specification import DataContractSpecification 6 | 7 | 8 | def test_from_file_raises_exception_if_file_does_not_exist(): 9 | with pytest.raises(FileNotFoundError): 10 | DataContractSpecification.from_file(f"{uuid4().hex}.yaml") 11 | -------------------------------------------------------------------------------- /tests/test_description_linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/test_description_linter.py -------------------------------------------------------------------------------- /tests/test_export_bigquery.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from typer.testing import CliRunner 4 | 5 | from datacontract.cli import app 6 | from datacontract.data_contract import DataContract 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | 11 | def test_cli(): 12 | runner = CliRunner() 13 | result = runner.invoke( 14 | app, 15 | [ 16 | "export", 17 | "--format", 18 | "bigquery", 19 | "--server", 20 | "bigquery", 21 | "fixtures/bigquery/export/datacontract.yaml", 22 | ], 23 | ) 24 | assert result.exit_code == 0 25 | 26 | 27 | def test_exports_bigquery_schema(): 28 | data_contract_file: str = "fixtures/bigquery/export/datacontract.yaml" 29 | with open(data_contract_file) as file: 30 | file_content = file.read() 31 | data_contract = DataContract(data_contract_str=file_content, server="bigquery") 32 | assert data_contract.lint(enabled_linters="none").has_passed() 33 | result = data_contract.export("bigquery") 34 | 35 | print("Result:\n", result) 36 | with open("fixtures/bigquery/export/bq_table_schema.json") as file: 37 | expected = file.read() 38 | assert json.loads(result) == json.loads(expected) 39 | -------------------------------------------------------------------------------- /tests/test_export_custom.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from typer.testing import CliRunner 4 | 5 | from datacontract.cli import app 6 | from datacontract.export.custom_converter import to_custom 7 | from datacontract.model.data_contract_specification import DataContractSpecification 8 | 9 | # logging.basicConfig(level=logging.DEBUG, force=True) 10 | 11 | 12 | def test_cli(): 13 | runner = CliRunner() 14 | result = runner.invoke( 15 | app, 16 | [ 17 | "export", 18 | "./fixtures/custom/export/datacontract.yaml", 19 | "--format", 20 | "custom", 21 | "--template", 22 | "./fixtures/custom/export/template.sql", 23 | ], 24 | ) 25 | assert result.exit_code == 0 26 | 27 | 28 | def test_to_custom(): 29 | data_contract = DataContractSpecification.from_file("fixtures/custom/export/datacontract.yaml") 30 | template = Path("fixtures/custom/export/template.sql") 31 | result = to_custom(data_contract, template) 32 | 33 | with open("fixtures/custom/export/expected.sql", "r") as file: 34 | assert result == file.read() 35 | -------------------------------------------------------------------------------- /tests/test_export_custom_exporter.py: -------------------------------------------------------------------------------- 1 | from datacontract.data_contract import DataContract 2 | from datacontract.export.exporter import Exporter 3 | from datacontract.export.exporter_factory import exporter_factory 4 | 5 | # logging.basicConfig(level=logging.DEBUG, force=True) 6 | 7 | 8 | class CustomExporter(Exporter): 9 | def export(self, data_contract, model, server, sql_server_type, export_args) -> str: 10 | result = { 11 | "data_contract_servers": data_contract.servers, 12 | "model": model, 13 | "server": server, 14 | "sql_server_type": sql_server_type, 15 | "export_args": export_args, 16 | "custom_args": export_args.get("custom_arg", ""), 17 | } 18 | return str(result) 19 | 20 | 21 | exporter_factory.register_exporter("custom_exporter", CustomExporter) 22 | 23 | 24 | def test_custom_exporter(): 25 | expected_custom = """{'data_contract_servers': {'production': Server(type='snowflake', description=None, environment='production', format=None, project=None, dataset=None, path=None, delimiter=None, endpointUrl=None, location=None, account='my-account', database='my-database', schema_='my-schema', host=None, port=None, catalog=None, topic=None, http_path=None, token=None, dataProductId=None, outputPortId=None, driver=None, storageAccount=None, roles=[ServerRole(name='analyst_us', description='Access to the data for US region')])}, 'model': 'orders', 'server': 'production', 'sql_server_type': 'auto', 'export_args': {'server': 'production', 'custom_arg': 'my_custom_arg'}, 'custom_args': 'my_custom_arg'}""" 26 | result = DataContract(data_contract_file="./fixtures/export/datacontract.yaml", server="production").export( 27 | export_format="custom_exporter", model="orders", server="production", custom_arg="my_custom_arg" 28 | ) 29 | # TODO use json comparison instead of string comparison 30 | assert result.strip() == expected_custom.strip() 31 | -------------------------------------------------------------------------------- /tests/test_export_dbt_staging_sql.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from typer.testing import CliRunner 3 | 4 | from datacontract.cli import app 5 | from datacontract.export.dbt_converter import to_dbt_staging_sql 6 | from datacontract.model.data_contract_specification import DataContractSpecification 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | 11 | def test_cli(): 12 | runner = CliRunner() 13 | result = runner.invoke( 14 | app, 15 | [ 16 | "export", 17 | "./fixtures/dbt/export/datacontract.yaml", 18 | "--format", 19 | "dbt-staging-sql", 20 | "--model", 21 | "orders", 22 | ], 23 | ) 24 | print(result.stdout) 25 | assert result.exit_code == 0 26 | 27 | 28 | def test_to_dbt_staging(): 29 | data_contract = DataContractSpecification.from_file("fixtures/dbt/export/datacontract.yaml") 30 | expected = """ 31 | select 32 | order_id, 33 | order_total, 34 | order_status, 35 | user_id 36 | from {{ source('orders-unit-test', 'orders') }} 37 | """ 38 | 39 | result = to_dbt_staging_sql(data_contract, "orders", data_contract.models.get("orders")) 40 | 41 | assert yaml.safe_load(result) == yaml.safe_load(expected) 42 | -------------------------------------------------------------------------------- /tests/test_export_go.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | from datacontract.data_contract import DataContract 5 | 6 | # logging.basicConfig(level=logging.DEBUG, force=True) 7 | 8 | 9 | def test_cli(): 10 | runner = CliRunner() 11 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "go"]) 12 | assert result.exit_code == 0 13 | 14 | 15 | def test_to_go_types(): 16 | actual = DataContract(data_contract_file="fixtures/export/datacontract.yaml").export("go") 17 | expected = """ 18 | package main 19 | 20 | 21 | type Orders struct { 22 | OrderId varchar `json:"order_id" avro:"order_id"` // None 23 | OrderTotal bigint `json:"order_total" avro:"order_total"` // The order_total field 24 | OrderStatus string `json:"order_status" avro:"order_status"` // None 25 | } 26 | 27 | """ 28 | assert actual.strip() == expected.strip() 29 | -------------------------------------------------------------------------------- /tests/test_export_html.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from typer.testing import CliRunner 5 | 6 | from datacontract.cli import app 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | 11 | def test_cli(): 12 | runner = CliRunner() 13 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "html"]) 14 | assert result.exit_code == 0 15 | 16 | 17 | def test_cli_with_output(tmp_path: Path): 18 | runner = CliRunner() 19 | result = runner.invoke( 20 | app, 21 | [ 22 | "export", 23 | "./fixtures/export/datacontract.yaml", 24 | "--format", 25 | "html", 26 | "--output", 27 | tmp_path / "datacontract.html", 28 | ], 29 | ) 30 | assert result.exit_code == 0 31 | assert os.path.exists(tmp_path / "datacontract.html") 32 | -------------------------------------------------------------------------------- /tests/test_export_markdown.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | from datacontract.export.markdown_converter import to_markdown 5 | from datacontract.model.data_contract_specification import DataContractSpecification 6 | 7 | # logging.basicConfig(level=logging.DEBUG, force=True) 8 | 9 | 10 | def test_cli(): 11 | runner = CliRunner() 12 | result = runner.invoke( 13 | app, 14 | [ 15 | "export", 16 | "./fixtures/markdown/export/datacontract.yaml", 17 | "--format", 18 | "markdown", 19 | ], 20 | ) 21 | assert result.exit_code == 0 22 | assert result.output.startswith("# urn:datacontract:checkout:orders-latest") 23 | 24 | 25 | def test_to_markdown(): 26 | data_contract = DataContractSpecification.from_file("fixtures/markdown/export/datacontract.yaml") 27 | result = to_markdown(data_contract) 28 | 29 | with open("fixtures/markdown/export/expected.md", "r") as file: 30 | assert result == file.read() 31 | -------------------------------------------------------------------------------- /tests/test_export_mermaid.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from typer.testing import CliRunner 5 | 6 | from datacontract.cli import app 7 | 8 | 9 | def test_cli(): 10 | runner = CliRunner() 11 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "mermaid"]) 12 | assert result.exit_code == 0 13 | 14 | 15 | def test_cli_with_output(tmp_path: Path): 16 | runner = CliRunner() 17 | result = runner.invoke( 18 | app, 19 | [ 20 | "export", 21 | "./fixtures/export/datacontract.yaml", 22 | "--format", 23 | "mermaid", 24 | "--output", 25 | tmp_path / "datacontract.mermaid", 26 | ], 27 | ) 28 | assert result.exit_code == 0 29 | assert os.path.exists(tmp_path / "datacontract.mermaid") 30 | 31 | 32 | def test_mermaid_structure(tmp_path: Path): 33 | datacontract_file = "fixtures/export/datacontract.yaml" 34 | runner = CliRunner() 35 | result = runner.invoke( 36 | app, 37 | [ 38 | "export", 39 | datacontract_file, 40 | "--format", 41 | "mermaid", 42 | "--output", 43 | tmp_path / "datacontract.mermaid", 44 | ], 45 | ) 46 | assert result.exit_code == 0 47 | 48 | with open(tmp_path / "datacontract.mermaid") as file: 49 | content = file.read() 50 | 51 | # Check structure 52 | assert "erDiagram" in content 53 | assert "orders" in content 54 | assert "order_id" in content 55 | assert "order_total" in content 56 | assert "order_status" in content 57 | -------------------------------------------------------------------------------- /tests/test_export_protobuf.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | from datacontract.export.protobuf_converter import to_protobuf 5 | from datacontract.model.data_contract_specification import DataContractSpecification 6 | 7 | # logging.basicConfig(level=logging.DEBUG, force=True) 8 | 9 | 10 | def test_cli(): 11 | runner = CliRunner() 12 | result = runner.invoke(app, ["export", "./fixtures/protobuf/datacontract.yaml", "--format", "protobuf"]) 13 | assert result.exit_code == 0 14 | 15 | 16 | def test_to_protobuf(): 17 | data_contract = DataContractSpecification.from_file("fixtures/protobuf/datacontract.yaml") 18 | expected_protobuf = """ 19 | syntax = "proto3"; 20 | 21 | package example; 22 | 23 | // Enum for Category 24 | enum Category { 25 | CATEGORY_UNKNOWN = 0; 26 | CATEGORY_ELECTRONICS = 1; 27 | CATEGORY_CLOTHING = 2; 28 | CATEGORY_HOME_APPLIANCES = 3; 29 | } 30 | 31 | // Details of Product. 32 | message Product { 33 | // Enum field category 34 | Category category = 1; 35 | // Field id 36 | string id = 2; 37 | // Field name 38 | string name = 3; 39 | // Field price 40 | double price = 4; 41 | // List of Review 42 | repeated string reviews = 5; 43 | // Field tags 44 | string tags = 6; 45 | } 46 | 47 | // Details of Review. 48 | message Review { 49 | // Field comment 50 | string comment = 1; 51 | // Field rating 52 | int32 rating = 2; 53 | // Field user 54 | string user = 3; 55 | } 56 | 57 | """.strip() 58 | 59 | result = to_protobuf(data_contract).strip() 60 | 61 | assert result == expected_protobuf 62 | -------------------------------------------------------------------------------- /tests/test_export_sql_query.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | from datacontract.data_contract import DataContract 5 | 6 | # logging.basicConfig(level=logging.DEBUG, force=True) 7 | 8 | 9 | def test_cli(): 10 | runner = CliRunner() 11 | result = runner.invoke(app, ["export", "./fixtures/postgres-export/datacontract.yaml", "--format", "sql-query"]) 12 | assert result.exit_code == 0 13 | 14 | 15 | def test_to_sql_query_postgres(): 16 | actual = DataContract(data_contract_file="fixtures/postgres-export/datacontract.yaml").export("sql-query") 17 | expected = """ 18 | -- Data Contract: postgres 19 | -- SQL Dialect: postgres 20 | select 21 | field_one, 22 | field_two, 23 | field_three 24 | from my_table 25 | """ 26 | assert actual.strip() == expected.strip() 27 | 28 | 29 | def test_to_sql_query_snowflake(): 30 | actual = DataContract(data_contract_file="fixtures/snowflake/datacontract.yaml").export("sql-query", model="orders") 31 | expected = """ 32 | -- Data Contract: urn:datacontract:checkout:snowflake_orders_pii_v2 33 | -- SQL Dialect: snowflake 34 | select 35 | ORDER_ID, 36 | ORDER_TIMESTAMP, 37 | ORDER_TOTAL, 38 | CUSTOMER_ID, 39 | CUSTOMER_EMAIL_ADDRESS, 40 | PROCESSING_TIMESTAMP 41 | from orders 42 | """ 43 | assert actual.strip() == expected.strip() 44 | -------------------------------------------------------------------------------- /tests/test_export_terraform.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | from datacontract.export.terraform_converter import to_terraform 5 | from datacontract.model.data_contract_specification import DataContractSpecification 6 | 7 | # logging.basicConfig(level=logging.DEBUG, force=True) 8 | 9 | 10 | def test_cli(): 11 | runner = CliRunner() 12 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract_s3.yaml", "--format", "terraform"]) 13 | assert result.exit_code == 0 14 | 15 | 16 | def test_to_terraform(): 17 | data_contract = DataContractSpecification.from_file("fixtures/export/datacontract_s3.yaml") 18 | expected_terraform_file = """ 19 | resource "aws_s3_bucket" "orders-unit-test_production" { 20 | bucket = "datacontract-example-orders-latest" 21 | 22 | tags = { 23 | Name = "Orders Unit Test" 24 | DataContract = "orders-unit-test" 25 | Server = "production" 26 | DataProduct = "orders" 27 | } 28 | } 29 | """.strip() 30 | 31 | result = to_terraform(data_contract) 32 | 33 | assert result == expected_terraform_file 34 | -------------------------------------------------------------------------------- /tests/test_field_pattern_linter.py: -------------------------------------------------------------------------------- 1 | import datacontract.model.data_contract_specification as spec 2 | from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter 3 | from datacontract.model.run import Check 4 | 5 | 6 | def construct_error_check(msg: str) -> Check: 7 | return Check( 8 | type="lint", 9 | name="Linter 'Field pattern is correct regex'", 10 | result="warning", 11 | engine="datacontract", 12 | reason=msg, 13 | ) 14 | 15 | 16 | success_check = Check( 17 | type="lint", name="Linter 'Field pattern is correct regex'", result="passed", engine="datacontract" 18 | ) 19 | 20 | linter = FieldPatternLinter() 21 | 22 | 23 | def test_correct_regex_pattern(): 24 | specification = spec.DataContractSpecification( 25 | models={"test_model": spec.Model(fields={"test_field": spec.Field(pattern=".")})} 26 | ) 27 | result = linter.lint(specification) 28 | assert result == [success_check] 29 | 30 | 31 | def test_incorrect_regex_pattern(): 32 | specification = spec.DataContractSpecification( 33 | models={"test_model": spec.Model(fields={"test_field": spec.Field(pattern="\\")})} 34 | ) 35 | result = linter.lint(specification) 36 | assert result == [ 37 | construct_error_check( 38 | "Failed to compile pattern regex '\\' for field" 39 | " 'test_field' in model 'test_model': " 40 | "bad escape (end of pattern)" 41 | ) 42 | ] 43 | -------------------------------------------------------------------------------- /tests/test_import_excel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import yaml 5 | from typer.testing import CliRunner 6 | 7 | from datacontract.cli import app 8 | from datacontract.imports.excel_importer import import_excel_as_odcs 9 | 10 | # logging.basicConfig(level=logging.DEBUG, force=True) 11 | 12 | 13 | def test_cli(): 14 | runner = CliRunner() 15 | result = runner.invoke( 16 | app, 17 | [ 18 | "import", 19 | "--format", 20 | "excel", 21 | "--source", 22 | "./fixtures/excel/shipments-odcs.xlsx", 23 | ], 24 | ) 25 | assert result.exit_code == 0 26 | 27 | 28 | def test_import_excel_odcs(): 29 | result = import_excel_as_odcs("./fixtures/excel/shipments-odcs.xlsx") 30 | expected_datacontract = read_file("fixtures/excel/shipments-odcs.yaml") 31 | assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) 32 | 33 | 34 | def read_file(file): 35 | if not os.path.exists(file): 36 | print(f"The file '{file}' does not exist.") 37 | sys.exit(1) 38 | with open(file, "r") as file: 39 | file_content = file.read() 40 | return file_content 41 | -------------------------------------------------------------------------------- /tests/test_import_odcs_v3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import yaml 5 | from typer.testing import CliRunner 6 | 7 | from datacontract.cli import app 8 | from datacontract.data_contract import DataContract 9 | 10 | # logging.basicConfig(level=logging.DEBUG, force=True) 11 | 12 | 13 | def test_cli(): 14 | runner = CliRunner() 15 | result = runner.invoke( 16 | app, 17 | [ 18 | "import", 19 | "--format", 20 | "odcs", 21 | "--source", 22 | "./fixtures/odcs_v3/full-example.odcs.yaml", 23 | ], 24 | ) 25 | assert result.exit_code == 0 26 | 27 | 28 | def test_import_full_odcs(): 29 | result = DataContract().import_from_source("odcs", "./fixtures/odcs_v3/full-example.odcs.yaml") 30 | expected_datacontract = read_file("fixtures/odcs_v3/full-example.datacontract.yml") 31 | assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) 32 | assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed() 33 | 34 | 35 | def test_import_complex_odcs(): 36 | result = DataContract().import_from_source("odcs", "./fixtures/odcs_v3/adventureworks.odcs.yaml") 37 | expected_datacontract = read_file("fixtures/odcs_v3/adventureworks.datacontract.yml") 38 | assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract) 39 | assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed() 40 | 41 | 42 | def read_file(file): 43 | if not os.path.exists(file): 44 | print(f"The file '{file}' does not exist.") 45 | sys.exit(1) 46 | with open(file, "r") as file: 47 | file_content = file.read() 48 | return file_content 49 | -------------------------------------------------------------------------------- /tests/test_import_parquet.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.cli import app 4 | from datacontract.data_contract import DataContract 5 | 6 | parquet_file_path = "fixtures/parquet/data/combined_no_time.parquet" 7 | 8 | 9 | def test_cli(): 10 | runner = CliRunner() 11 | result = runner.invoke( 12 | app, 13 | [ 14 | "import", 15 | "--format", 16 | "parquet", 17 | "--source", 18 | parquet_file_path, 19 | ], 20 | ) 21 | assert result.exit_code == 0 22 | 23 | 24 | def test_import_parquet(): 25 | result = DataContract().import_from_source(format="parquet", source=parquet_file_path) 26 | 27 | expected = """dataContractSpecification: 1.1.0 28 | id: my-data-contract-id 29 | info: 30 | title: My Data Contract 31 | version: 0.0.1 32 | models: 33 | combined_no_time: 34 | fields: 35 | string_field: 36 | type: string 37 | blob_field: 38 | type: bytes 39 | boolean_field: 40 | type: boolean 41 | decimal_field: 42 | type: decimal 43 | precision: 10 44 | scale: 2 45 | float_field: 46 | type: float 47 | double_field: 48 | type: double 49 | integer_field: 50 | type: int 51 | bigint_field: 52 | type: long 53 | struct_field: 54 | type: struct 55 | array_field: 56 | type: array 57 | list_field: 58 | type: array 59 | map_field: 60 | type: map 61 | date_field: 62 | type: date 63 | timestamp_field: 64 | type: timestamp 65 | """ 66 | 67 | assert result.to_yaml() == expected 68 | assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed() 69 | -------------------------------------------------------------------------------- /tests/test_integration_datameshmanager.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dotenv import load_dotenv 5 | from typer.testing import CliRunner 6 | 7 | from datacontract.data_contract import DataContract 8 | 9 | runner = CliRunner() 10 | load_dotenv(override=True) 11 | 12 | 13 | @pytest.mark.skipif( 14 | os.environ.get("DATAMESH_MANAGER_API_KEY") is None, reason="Requires DATAMESH_MANAGER_API_KEY to be set" 15 | ) 16 | def test_remote_data_contract(): 17 | data_contract = DataContract( 18 | data_contract_file="https://app.datamesh-manager.com/checker1/datacontracts/verbraucherpreisindex-61111-0002zzz", 19 | publish_url="https://api.datamesh-manager.com/api/test-results", 20 | ) 21 | 22 | run = data_contract.test() 23 | 24 | print(run) 25 | assert run.result == "passed" 26 | assert len(run.checks) == 4 27 | assert all(check.result == "passed" for check in run.checks) 28 | -------------------------------------------------------------------------------- /tests/test_notice_period_linter.py: -------------------------------------------------------------------------------- 1 | import datacontract.model.data_contract_specification as spec 2 | from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter 3 | from datacontract.model.run import Check 4 | 5 | 6 | def construct_error_check(msg: str) -> Check: 7 | return Check( 8 | type="lint", 9 | name="Linter 'noticePeriod in ISO8601 format'", 10 | result="warning", 11 | engine="datacontract", 12 | reason=msg, 13 | ) 14 | 15 | 16 | success_check = Check( 17 | type="lint", name="Linter 'noticePeriod in ISO8601 format'", result="passed", engine="datacontract" 18 | ) 19 | 20 | 21 | def test_lint_correct_period(): 22 | specification = spec.DataContractSpecification() 23 | specification.terms = spec.Terms(noticePeriod="P1M") 24 | result = NoticePeriodLinter().lint(specification) 25 | assert result == [success_check] 26 | 27 | 28 | def test_lint_empty_period(): 29 | # This returns a warning that's currently ignored. 30 | # If warnings are treated differently, change this spec. 31 | specification = spec.DataContractSpecification(terms=spec.Terms()) 32 | result = NoticePeriodLinter().lint(specification) 33 | assert result == [success_check] 34 | 35 | 36 | def test_lint_incorrect_period(): 37 | # This returns a warning that's currently ignored. 38 | # If warnings are treated differently, change this spec. 39 | specification = spec.DataContractSpecification(terms=spec.Terms(noticePeriod="P0")) 40 | result = NoticePeriodLinter().lint(specification) 41 | assert result == [construct_error_check("Notice period 'P0' is not a valid ISO8601 duration.")] 42 | 43 | 44 | def test_lint_correct_datetime_period(): 45 | specification = spec.DataContractSpecification(terms=spec.Terms(noticePeriod="P00000001T000001")) 46 | result = NoticePeriodLinter().lint(specification) 47 | assert result == [success_check] 48 | -------------------------------------------------------------------------------- /tests/test_roundtrip_jsonschema.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from typer.testing import CliRunner 4 | 5 | from datacontract.cli import app 6 | from datacontract.data_contract import DataContract 7 | from datacontract.export.jsonschema_converter import to_jsonschemas 8 | 9 | # logging.basicConfig(level=logging.DEBUG, force=True) 10 | 11 | 12 | def test_import_cli(): 13 | runner = CliRunner() 14 | result = runner.invoke( 15 | app, 16 | [ 17 | "import", 18 | "--format", 19 | "jsonschema", 20 | "--source", 21 | "fixtures/import/orders.json", 22 | ], 23 | ) 24 | assert result.exit_code == 0 25 | 26 | 27 | def test_export_cli(): 28 | runner = CliRunner() 29 | result = runner.invoke(app, ["export", "./fixtures/local-json/datacontract.yaml", "--format", "jsonschema"]) 30 | assert result.exit_code == 0 31 | 32 | 33 | def test_roundtrip_json_schema_orders(): 34 | # Import the data contract from the JSON schema source 35 | result_import = DataContract().import_from_source("jsonschema", "fixtures/import/orders.json") 36 | 37 | # Create a data contract specification with inline definitions 38 | data_contract = DataContract( 39 | data_contract_str=result_import.to_yaml(), inline_definitions=True 40 | ).get_data_contract_specification() 41 | 42 | # Load the expected result from the JSON file 43 | with open("fixtures/import/orders.json", "r") as f: 44 | expected_result = json.load(f) 45 | 46 | # Export the data contract to JSON schema 47 | exported_jsonschema = to_jsonschemas(data_contract) 48 | 49 | # Compare the exported JSON schema with the expected result 50 | assert exported_jsonschema["OrderSchema"] == expected_result 51 | -------------------------------------------------------------------------------- /tests/test_spec_fields_field.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.data_contract import DataContract 4 | from datacontract.model.data_contract_specification import Field 5 | 6 | runner = CliRunner() 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | 11 | def test_aliases(): 12 | data_contract = DataContract(data_contract_file="fixtures/spec/datacontract_fields_field.yaml") 13 | spec = data_contract.get_data_contract_specification() 14 | model_field = spec.models["sample_model"].fields["id"] 15 | definition_field = model_field.fields["id"] 16 | assert isinstance(model_field, Field) 17 | assert isinstance(definition_field, Field) 18 | -------------------------------------------------------------------------------- /tests/test_spec_ref.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | 3 | from datacontract.data_contract import DataContract 4 | 5 | runner = CliRunner() 6 | 7 | # logging.basicConfig(level=logging.DEBUG, force=True) 8 | 9 | 10 | def test_aliases(): 11 | data_contract = DataContract(data_contract_file="fixtures/spec/datacontract_aliases.yaml") 12 | spec = data_contract.get_data_contract_specification() 13 | yaml = spec.to_yaml() 14 | print(yaml) 15 | assert "$ref" in yaml 16 | -------------------------------------------------------------------------------- /tests/test_test_bigquery.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dotenv import load_dotenv 5 | 6 | from datacontract.data_contract import DataContract 7 | 8 | # logging.basicConfig(level=logging.INFO, force=True) 9 | 10 | datacontract = "fixtures/bigquery/datacontract.yaml" 11 | 12 | load_dotenv(override=True) 13 | 14 | 15 | # Deactivated because the test requires special setup on a non-free BigQuery account. 16 | # Can activate for testing locally, using a custom account_info file. 17 | # For the provided datacontract.yaml the data file from s3-csv should be imported in the target BigQuery table. 18 | @pytest.mark.skipif( 19 | os.environ.get("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH") is None, 20 | reason="Requires DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH to be set", 21 | ) 22 | def _test_test_bigquery(): 23 | data_contract = DataContract(data_contract_file=datacontract) 24 | 25 | run = data_contract.test() 26 | 27 | print(run) 28 | assert run.result == "passed" 29 | assert all(check.result == "passed" for check in run.checks) 30 | 31 | 32 | @pytest.mark.skipif( 33 | os.environ.get("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH") is None, 34 | reason="Requires DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH to be set", 35 | ) 36 | def test_test_bigquery_complex_tables(): 37 | data_contract = DataContract(data_contract_file="fixtures/bigquery/datacontract_complex.yaml") 38 | 39 | run = data_contract.test() 40 | 41 | print(run.pretty()) 42 | assert run.result == "passed" 43 | assert all(check.result == "passed" for check in run.checks) 44 | -------------------------------------------------------------------------------- /tests/test_test_databricks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dotenv import load_dotenv 5 | 6 | from datacontract.data_contract import DataContract 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | datacontract = "fixtures/databricks-sql/datacontract.yaml" 11 | 12 | load_dotenv(override=True) 13 | 14 | 15 | @pytest.mark.skipif( 16 | os.environ.get("DATACONTRACT_DATABRICKS_TOKEN") is None, reason="Requires DATACONTRACT_DATABRICKS_TOKEN to be set" 17 | ) 18 | def _test_test_databricks_sql(): 19 | # os.environ['DATACONTRACT_DATABRICKS_TOKEN'] = "xxx" 20 | # os.environ['DATACONTRACT_DATABRICKS_HTTP_PATH'] = "/sql/1.0/warehouses/b053a326fa014fb3" 21 | data_contract = DataContract(data_contract_file=datacontract) 22 | 23 | run = data_contract.test() 24 | 25 | print(run) 26 | assert run.result == "passed" 27 | assert all(check.result == "passed" for check in run.checks) 28 | -------------------------------------------------------------------------------- /tests/test_test_delta.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from typer.testing import CliRunner 4 | 5 | from datacontract.cli import app 6 | from datacontract.data_contract import DataContract 7 | 8 | runner = CliRunner() 9 | 10 | 11 | def test_valid_cli(): 12 | current_file_path = os.path.abspath(__file__) 13 | print("DEBUG Current file path:" + current_file_path) 14 | 15 | result = runner.invoke(app, ["test", "./fixtures/local-delta/datacontract.yaml"]) 16 | assert result.exit_code == 0 17 | assert "Testing ./fixtures/local-delta/datacontract.yaml" in result.stdout 18 | 19 | 20 | def test_valid(): 21 | data_contract = DataContract( 22 | data_contract_file="fixtures/local-delta/datacontract.yaml", 23 | # publish=True, 24 | ) 25 | run = data_contract.test() 26 | print(run.pretty()) 27 | assert run.result == "passed" 28 | assert len(run.checks) == 9 29 | assert all(check.result == "passed" for check in run.checks) 30 | -------------------------------------------------------------------------------- /tests/test_test_gcs_json_remote.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dotenv import load_dotenv 5 | 6 | from datacontract.data_contract import DataContract 7 | 8 | datacontract = "fixtures/gcs-json-remote/datacontract.yaml" 9 | load_dotenv(override=True) 10 | 11 | 12 | @pytest.mark.skipif( 13 | os.environ.get("DATACONTRACT_GCS_KEY_ID") is None or os.environ.get("DATACONTRACT_GCS_SECRET") is None, 14 | reason="Requires DATACONTRACT_GCS_KEY_ID, and DATACONTRACT_GCS_SECRET to be set", 15 | ) 16 | def test_test_gcs_json_remote_gcs_url(): 17 | """ 18 | server.type "gcs" and gs:// locations work with DuckDB, but are not yet supported for json schema testing 19 | """ 20 | data_contract = DataContract( 21 | data_contract_file=datacontract, 22 | server="gcs-url", 23 | ) 24 | 25 | run = data_contract.test() 26 | 27 | print(run) 28 | assert run.result == "passed" 29 | 30 | 31 | @pytest.mark.skipif( 32 | os.environ.get("DATACONTRACT_GCS_KEY_ID") is None or os.environ.get("DATACONTRACT_GCS_SECRET") is None, 33 | reason="Requires DATACONTRACT_GCS_KEY_ID, and DATACONTRACT_GCS_SECRET to be set", 34 | ) 35 | def test_test_gcs_json_remote_s3_style(monkeypatch): 36 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", os.environ.get("DATACONTRACT_GCS_KEY_ID")) 37 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", os.environ.get("DATACONTRACT_GCS_SECRET")) 38 | 39 | data_contract = DataContract( 40 | data_contract_file=datacontract, 41 | server="s3-style", 42 | ) 43 | 44 | run = data_contract.test() 45 | 46 | print(run) 47 | assert run.result == "passed" 48 | assert all(check.result == "passed" for check in run.checks) 49 | -------------------------------------------------------------------------------- /tests/test_test_kafka.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import six 4 | 5 | # Fix for Python 3.12 6 | if sys.version_info >= (3, 12, 1): 7 | sys.modules["kafka.vendor.six.moves"] = six.moves 8 | 9 | 10 | from kafka import KafkaProducer 11 | from testcontainers.kafka import KafkaContainer 12 | 13 | from datacontract.data_contract import DataContract 14 | 15 | datacontract = "fixtures/kafka/datacontract.yaml" 16 | 17 | 18 | def test_test_kafka(monkeypatch): 19 | monkeypatch.delenv("DATACONTRACT_KAFKA_SASL_USERNAME", raising=False) 20 | 21 | with KafkaContainer("confluentinc/cp-kafka:7.7.0").with_kraft() as kafka: 22 | send_messages_to_topic(kafka, "fixtures/kafka/data/messages.json", "inventory-events") 23 | data_contract_str = _setup_datacontract(kafka) 24 | data_contract = DataContract(data_contract_str=data_contract_str) 25 | run = data_contract.test() 26 | 27 | print(run.pretty()) 28 | assert run.result == "passed" 29 | 30 | 31 | def send_messages_to_topic(kafka: KafkaContainer, messages_file_path: str, topic_name: str): 32 | print(f"Sending messages from {messages_file_path} to Kafka topic {topic_name}") 33 | 34 | producer = KafkaProducer( 35 | bootstrap_servers=kafka.get_bootstrap_server(), value_serializer=lambda v: v.encode("utf-8") 36 | ) 37 | messages_sent = 0 38 | 39 | with open(messages_file_path) as messages_file: 40 | for line in messages_file: 41 | message = line 42 | producer.send(topic=topic_name, value=message) 43 | messages_sent += 1 44 | producer.flush() 45 | 46 | print(f"Sent {messages_sent} messages from {messages_file_path} to Kafka topic {topic_name}") 47 | 48 | 49 | def _setup_datacontract(kafka: KafkaContainer): 50 | with open(datacontract) as data_contract_file: 51 | data_contract_str = data_contract_file.read() 52 | host = kafka.get_bootstrap_server() 53 | return data_contract_str.replace("__KAFKA_HOST__", host) 54 | -------------------------------------------------------------------------------- /tests/test_test_kafka_remote.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import pytest 5 | import six 6 | 7 | # Fix for Python 3.12 8 | if sys.version_info >= (3, 12, 1): 9 | sys.modules["kafka.vendor.six.moves"] = six.moves 10 | 11 | 12 | from dotenv import load_dotenv 13 | 14 | from datacontract.data_contract import DataContract 15 | 16 | # logging.basicConfig(level=logging.INFO, force=True) 17 | 18 | 19 | @pytest.mark.skipif( 20 | os.environ.get("DATACONTRACT_KAFKA_SASL_USERNAME") is None, 21 | reason="Requires DATACONTRACT_KAFKA_SASL_USERNAME to be set", 22 | ) 23 | def _test_test_kafka_json_remote(): 24 | load_dotenv(override=True) 25 | # os.environ['DATACONTRACT_KAFKA_SASL_USERNAME'] = "xxx" 26 | # os.environ['DATACONTRACT_KAFKA_SASL_PASSWORD'] = "xxx" 27 | data_contract = DataContract(data_contract_file="fixtures/kafka-json-remote/datacontract.yaml") 28 | 29 | run = data_contract.test() 30 | 31 | print(run) 32 | assert run.result == "passed" 33 | 34 | 35 | @pytest.mark.skipif( 36 | os.environ.get("DATACONTRACT_KAFKA_SASL_USERNAME") is None, 37 | reason="Requires DATACONTRACT_KAFKA_SASL_USERNAME to be set", 38 | ) 39 | def _test_test_kafka_avro_remote(): 40 | load_dotenv(override=True) 41 | # os.environ['DATACONTRACT_KAFKA_SASL_USERNAME'] = "xxx" 42 | # os.environ['DATACONTRACT_KAFKA_SASL_PASSWORD'] = "xxx" 43 | data_contract = DataContract(data_contract_file="fixtures/kafka-avro-remote/datacontract.yaml") 44 | 45 | run = data_contract.test() 46 | 47 | print(run) 48 | assert run.result == "passed" 49 | -------------------------------------------------------------------------------- /tests/test_test_local_json.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from typer.testing import CliRunner 3 | 4 | from datacontract.cli import app 5 | from datacontract.data_contract import DataContract 6 | 7 | runner = CliRunner() 8 | 9 | 10 | @pytest.mark.skip(reason="https://github.com/sodadata/soda-core/issues/1992") 11 | def _test_cli(): 12 | result = runner.invoke(app, ["test", "./fixtures/local-json/datacontract.yaml"]) 13 | assert result.exit_code == 0 14 | 15 | 16 | @pytest.mark.skip(reason="https://github.com/sodadata/soda-core/issues/1992") 17 | def _test_local_json(): 18 | data_contract = DataContract(data_contract_file="fixtures/local-json/datacontract.yaml") 19 | run = data_contract.test() 20 | print(run) 21 | assert run.result == "passed" 22 | -------------------------------------------------------------------------------- /tests/test_test_output_junit.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from typer.testing import CliRunner 4 | 5 | from datacontract.cli import app 6 | 7 | runner = CliRunner() 8 | 9 | 10 | def test_output_junit_test_result(tmp_path): 11 | runner.invoke( 12 | app, 13 | [ 14 | "test", 15 | "--output", 16 | tmp_path / "TEST-datacontract.xml", 17 | "--output-format", 18 | "junit", 19 | "./fixtures/junit/datacontract.yaml", 20 | ], 21 | ) 22 | assert os.path.exists(tmp_path / "TEST-datacontract.xml"), "Should write a JUnit test result file" 23 | -------------------------------------------------------------------------------- /tests/test_test_s3_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from testcontainers.minio import MinioContainer 5 | 6 | from datacontract.data_contract import DataContract 7 | 8 | # logging.basicConfig(level=logging.DEBUG, force=True) 9 | 10 | datacontract = "fixtures/s3-csv/datacontract.yaml" 11 | file_name = "fixtures/s3-csv/data/sample_data.csv" 12 | bucket_name = "test-bucket" 13 | s3_access_key = "test-access" 14 | s3_secret_access_key = "test-secret" 15 | 16 | 17 | @pytest.fixture(scope="session") 18 | def minio_container(): 19 | with MinioContainer( 20 | image="quay.io/minio/minio", access_key=s3_access_key, secret_key=s3_secret_access_key 21 | ) as minio_container: 22 | yield minio_container 23 | 24 | 25 | def test_test_s3_csv(minio_container, monkeypatch): 26 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", s3_access_key) 27 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", s3_secret_access_key) 28 | data_contract_str = _prepare_s3_files(minio_container) 29 | data_contract = DataContract(data_contract_str=data_contract_str) 30 | 31 | run = data_contract.test() 32 | 33 | print(run) 34 | assert run.result == "passed" 35 | assert all(check.result == "passed" for check in run.checks) 36 | 37 | 38 | def _prepare_s3_files(minio_container): 39 | s3_endpoint_url = f"http://{minio_container.get_container_host_ip()}:{minio_container.get_exposed_port(9000)}" 40 | minio_client = minio_container.get_client() 41 | minio_client.make_bucket(bucket_name) 42 | with open(file_name, "rb") as file: 43 | minio_client.put_object(bucket_name, file_name, file, os.path.getsize(file_name)) 44 | with open(datacontract) as data_contract_file: 45 | data_contract_str = data_contract_file.read() 46 | data_contract_str = data_contract_str.replace("__S3_ENDPOINT_URL__", s3_endpoint_url) 47 | return data_contract_str 48 | -------------------------------------------------------------------------------- /tests/test_test_s3_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from testcontainers.minio import MinioContainer 5 | 6 | from datacontract.data_contract import DataContract 7 | 8 | datacontract = "fixtures/s3-json/datacontract.yaml" 9 | file_name = "fixtures/s3-json/data/inventory/year=2022/month=04/day=20/hour=00/inventory+0+0001327496.json" 10 | bucket_name = "test-bucket" 11 | s3_access_key = "test-access" 12 | s3_secret_access_key = "test-secret" 13 | 14 | 15 | @pytest.fixture(scope="session") 16 | def minio_container(): 17 | with MinioContainer( 18 | image="quay.io/minio/minio", access_key=s3_access_key, secret_key=s3_secret_access_key 19 | ) as minio_container: 20 | yield minio_container 21 | 22 | 23 | def test_test_s3_json(minio_container, monkeypatch): 24 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", "test-access") 25 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", "test-secret") 26 | data_contract_str = _prepare_s3_files(minio_container) 27 | data_contract = DataContract(data_contract_str=data_contract_str) 28 | 29 | run = data_contract.test() 30 | 31 | print(run) 32 | assert run.result == "passed" 33 | assert all(check.result == "passed" for check in run.checks) 34 | 35 | 36 | def _prepare_s3_files(minio_container): 37 | s3_endpoint_url = f"http://{minio_container.get_container_host_ip()}:{minio_container.get_exposed_port(9000)}" 38 | minio_client = minio_container.get_client() 39 | minio_client.make_bucket(bucket_name) 40 | with open(file_name, "rb") as file: 41 | minio_client.put_object(bucket_name, file_name, file, os.path.getsize(file_name)) 42 | with open(datacontract) as data_contract_file: 43 | data_contract_str = data_contract_file.read() 44 | data_contract_str = data_contract_str.replace("__S3_ENDPOINT_URL__", s3_endpoint_url) 45 | return data_contract_str 46 | -------------------------------------------------------------------------------- /tests/test_test_s3_json_complex.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from testcontainers.minio import MinioContainer 5 | 6 | from datacontract.data_contract import DataContract 7 | 8 | datacontract = "./fixtures/s3-json-complex/datacontract.yaml" 9 | file_name = "fixtures/s3-json-complex/data/feed.json" 10 | bucket_name = "feed-bucket" 11 | s3_access_key = "test-access" 12 | s3_secret_access_key = "test-secret" 13 | 14 | 15 | @pytest.fixture(scope="session") 16 | def minio_container(): 17 | with MinioContainer( 18 | image="quay.io/minio/minio", access_key=s3_access_key, secret_key=s3_secret_access_key 19 | ) as minio_container: 20 | yield minio_container 21 | 22 | 23 | def test_test_s3_json(minio_container, monkeypatch): 24 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", "test-access") 25 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", "test-secret") 26 | 27 | data_contract_str = _prepare_s3_files(minio_container) 28 | 29 | data_contract = DataContract(data_contract_str=data_contract_str) 30 | 31 | run = data_contract.test() 32 | 33 | print(run.pretty()) 34 | assert run.result == "passed" 35 | assert all(check.result == "passed" for check in run.checks) 36 | 37 | 38 | def _prepare_s3_files(minio_container): 39 | s3_endpoint_url = f"http://{minio_container.get_container_host_ip()}:{minio_container.get_exposed_port(9000)}" 40 | minio_client = minio_container.get_client() 41 | minio_client.make_bucket(bucket_name) 42 | with open(file_name, "rb") as file: 43 | minio_client.put_object(bucket_name, file_name, file, os.path.getsize(file_name)) 44 | with open(datacontract) as data_contract_file: 45 | data_contract_str = data_contract_file.read() 46 | data_contract_str = data_contract_str.replace("__S3_ENDPOINT_URL__", s3_endpoint_url) 47 | return data_contract_str 48 | -------------------------------------------------------------------------------- /tests/test_test_s3_json_remote.py: -------------------------------------------------------------------------------- 1 | from datacontract.data_contract import DataContract 2 | 3 | # logging.basicConfig(level=logging.INFO, force=True) 4 | 5 | datacontract = "fixtures/s3-json-remote/datacontract.yaml" 6 | 7 | 8 | def test_test_s3_json(monkeypatch): 9 | monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False) 10 | monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) 11 | monkeypatch.delenv("DATACONTRACT_S3_ACCESS_KEY_ID", raising=False) 12 | monkeypatch.delenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", raising=False) 13 | 14 | data_contract = DataContract(data_contract_file=datacontract) 15 | 16 | run = data_contract.test() 17 | 18 | print(run.pretty()) 19 | assert run.result == "passed" 20 | assert all(check.result == "passed" for check in run.checks) 21 | -------------------------------------------------------------------------------- /tests/test_test_snowflake.py: -------------------------------------------------------------------------------- 1 | # logging.basicConfig(level=logging.INFO, force=True) 2 | 3 | datacontract = "fixtures/snowflake/datacontract.yaml" 4 | 5 | 6 | # @pytest.mark.skipif(os.environ.get("DATACONTRACT_SNOWFLAKE_USERNAME") is None, reason="Requires DATACONTRACT_SNOWFLAKE_USERNAME to be set") 7 | # def test_test_snowflake(): 8 | # load_dotenv(override=True) 9 | # # os.environ['DATACONTRACT_SNOWFLAKE_USERNAME'] = "xxx" 10 | # # os.environ['DATACONTRACT_SNOWFLAKE_PASSWORD'] = "xxx" 11 | # # os.environ['DATACONTRACT_SNOWFLAKE_ROLE'] = "xxx" 12 | # # os.environ['DATACONTRACT_SNOWFLAKE_WAREHOUSE'] = "COMPUTE_WH" 13 | # data_contract = DataContract(data_contract_file=datacontract) 14 | # 15 | # run = data_contract.test() 16 | # 17 | # print(run) 18 | # assert run.result == "passed" 19 | # assert all(check.result == "passed" for check in run.checks) 20 | --------------------------------------------------------------------------------