├── .editorconfig
├── .github
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── ci.yaml
│ └── release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .pre-commit-hooks.yaml
├── API.md
├── CHANGELOG.md
├── CNAME
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── _config.yml
├── _layouts
└── default.html
├── datacontract
├── __init__.py
├── api.py
├── breaking
│ ├── breaking.py
│ ├── breaking_change.py
│ └── breaking_rules.py
├── catalog
│ └── catalog.py
├── cli.py
├── data_contract.py
├── engines
│ ├── __init__.py
│ ├── data_contract_checks.py
│ ├── data_contract_test.py
│ ├── datacontract
│ │ ├── check_that_datacontract_contains_valid_servers_configuration.py
│ │ └── check_that_datacontract_file_exists.py
│ ├── fastjsonschema
│ │ ├── check_jsonschema.py
│ │ └── s3
│ │ │ └── s3_read_files.py
│ └── soda
│ │ ├── __init__.py
│ │ ├── check_soda_execute.py
│ │ └── connections
│ │ ├── bigquery.py
│ │ ├── databricks.py
│ │ ├── duckdb_connection.py
│ │ ├── kafka.py
│ │ ├── postgres.py
│ │ ├── snowflake.py
│ │ ├── sqlserver.py
│ │ └── trino.py
├── export
│ ├── __init__.py
│ ├── avro_converter.py
│ ├── avro_idl_converter.py
│ ├── bigquery_converter.py
│ ├── custom_converter.py
│ ├── data_caterer_converter.py
│ ├── dbml_converter.py
│ ├── dbt_converter.py
│ ├── dcs_exporter.py
│ ├── duckdb_type_converter.py
│ ├── exporter.py
│ ├── exporter_factory.py
│ ├── go_converter.py
│ ├── great_expectations_converter.py
│ ├── html_exporter.py
│ ├── iceberg_converter.py
│ ├── jsonschema_converter.py
│ ├── markdown_converter.py
│ ├── mermaid_exporter.py
│ ├── odcs_v3_exporter.py
│ ├── pandas_type_converter.py
│ ├── protobuf_converter.py
│ ├── pydantic_converter.py
│ ├── rdf_converter.py
│ ├── sodacl_converter.py
│ ├── spark_converter.py
│ ├── sql_converter.py
│ ├── sql_type_converter.py
│ ├── sqlalchemy_converter.py
│ └── terraform_converter.py
├── imports
│ ├── avro_importer.py
│ ├── bigquery_importer.py
│ ├── csv_importer.py
│ ├── dbml_importer.py
│ ├── dbt_importer.py
│ ├── excel_importer.py
│ ├── glue_importer.py
│ ├── iceberg_importer.py
│ ├── importer.py
│ ├── importer_factory.py
│ ├── jsonschema_importer.py
│ ├── odcs_importer.py
│ ├── odcs_v3_importer.py
│ ├── parquet_importer.py
│ ├── protobuf_importer.py
│ ├── spark_importer.py
│ ├── sql_importer.py
│ └── unity_importer.py
├── init
│ └── init_template.py
├── integration
│ └── datamesh_manager.py
├── lint
│ ├── files.py
│ ├── lint.py
│ ├── linters
│ │ ├── __init__.py
│ │ ├── description_linter.py
│ │ ├── field_pattern_linter.py
│ │ ├── field_reference_linter.py
│ │ ├── notice_period_linter.py
│ │ └── valid_constraints_linter.py
│ ├── resolve.py
│ ├── resources.py
│ ├── schema.py
│ └── urls.py
├── model
│ ├── data_contract_specification
│ │ └── __init__.py
│ ├── exceptions.py
│ ├── odcs.py
│ └── run.py
├── output
│ ├── __init__.py
│ ├── junit_test_results.py
│ ├── output_format.py
│ └── test_results_writer.py
├── py.typed
├── schemas
│ ├── datacontract-1.1.0.init.yaml
│ ├── datacontract-1.1.0.schema.json
│ ├── download
│ └── odcs-3.0.1.schema.json
└── templates
│ ├── datacontract.html
│ ├── datacontract_odcs.html
│ ├── index.html
│ ├── partials
│ ├── datacontract_information.html
│ ├── datacontract_servicelevels.html
│ ├── datacontract_terms.html
│ ├── definition.html
│ ├── example.html
│ ├── model_field.html
│ ├── quality.html
│ └── server.html
│ └── style
│ ├── generate-style
│ ├── input.css
│ ├── output.css
│ └── tailwind.config.js
├── datacontractcli.png
├── favicon.png
├── pyproject.toml
├── release
├── tests
├── __init__.py
├── conftest.py
├── fixtures
│ ├── avro
│ │ ├── data
│ │ │ ├── arrays.avsc
│ │ │ ├── logical_types.avsc
│ │ │ ├── nested.avsc
│ │ │ ├── nested_with_arrays.avsc
│ │ │ └── orders.avsc
│ │ └── export
│ │ │ ├── datacontract.yaml
│ │ │ ├── datacontract_decimal.avsc
│ │ │ ├── datacontract_decimal.yaml
│ │ │ ├── datacontract_enum.avsc
│ │ │ ├── datacontract_enum.yaml
│ │ │ ├── datacontract_logicalType.avsc
│ │ │ ├── datacontract_logicalType.yaml
│ │ │ ├── datacontract_test_field_float.avsc
│ │ │ ├── datacontract_test_field_float.yaml
│ │ │ ├── datacontract_test_field_map.avsc
│ │ │ ├── datacontract_test_field_map.yaml
│ │ │ ├── datacontract_test_field_namespace.avsc
│ │ │ ├── datacontract_test_field_namespace.yaml
│ │ │ ├── datacontract_test_logical_type.yaml
│ │ │ ├── datacontract_test_required.avsc
│ │ │ ├── datacontract_test_required.yaml
│ │ │ └── orders_with_datefields.avsc
│ ├── azure-delta-remote
│ │ └── datacontract.yaml
│ ├── azure-json-remote
│ │ └── datacontract.yaml
│ ├── azure-parquet-remote
│ │ └── datacontract.yaml
│ ├── bigquery
│ │ ├── datacontract.yaml
│ │ ├── datacontract_complex.yaml
│ │ ├── export
│ │ │ ├── bq.txt
│ │ │ ├── bq_table_schema.json
│ │ │ └── datacontract.yaml
│ │ └── import
│ │ │ ├── complete_table_schema.json
│ │ │ ├── datacontract.yaml
│ │ │ ├── datacontract_multi_import.yaml
│ │ │ ├── multi_import_external_table.json
│ │ │ ├── multi_import_materialized_view.json
│ │ │ ├── multi_import_snapshot.json
│ │ │ ├── multi_import_table.json
│ │ │ └── multi_import_view.json
│ ├── breaking
│ │ ├── datacontract-definitions-v1.yaml
│ │ ├── datacontract-definitions-v2.yaml
│ │ ├── datacontract-definitions-v3.yaml
│ │ ├── datacontract-fields-array-v1.yaml
│ │ ├── datacontract-fields-array-v2.yaml
│ │ ├── datacontract-fields-v1.yaml
│ │ ├── datacontract-fields-v2.yaml
│ │ ├── datacontract-fields-v3.yaml
│ │ ├── datacontract-info-v1.yaml
│ │ ├── datacontract-info-v2.yaml
│ │ ├── datacontract-info-v3.yaml
│ │ ├── datacontract-models-v1.yaml
│ │ ├── datacontract-models-v2.yaml
│ │ ├── datacontract-models-v3.yaml
│ │ ├── datacontract-quality-v1.yaml
│ │ ├── datacontract-quality-v2.yaml
│ │ ├── datacontract-quality-v3.yaml
│ │ ├── datacontract-terms-v1.yaml
│ │ ├── datacontract-terms-v2.yaml
│ │ └── datacontract-terms-v3.yaml
│ ├── catalog
│ │ ├── datacontract-1.yaml
│ │ └── datacontract-2.yaml
│ ├── csv
│ │ └── data
│ │ │ ├── datacontract.yaml
│ │ │ ├── sample_data.csv
│ │ │ └── sample_data_5_column.csv
│ ├── custom
│ │ └── export
│ │ │ ├── datacontract.yaml
│ │ │ ├── expected.sql
│ │ │ └── template.sql
│ ├── data-caterer
│ │ └── export
│ │ │ └── datacontract_nested.yaml
│ ├── databricks-sql
│ │ └── datacontract.yaml
│ ├── databricks-unity
│ │ └── import
│ │ │ ├── datacontract.yaml
│ │ │ ├── datacontract_complex_types.yaml
│ │ │ ├── unity_table_schema.json
│ │ │ └── unity_table_schema_complex_types.json
│ ├── dataframe
│ │ └── datacontract.yaml
│ ├── dbml
│ │ ├── datacontract.yaml
│ │ └── import
│ │ │ ├── datacontract.yaml
│ │ │ ├── datacontract_schema_filtered.yaml
│ │ │ ├── datacontract_table_filtered.yaml
│ │ │ └── dbml.txt
│ ├── dbt
│ │ ├── export
│ │ │ └── datacontract.yaml
│ │ └── import
│ │ │ ├── manifest_empty_columns.json
│ │ │ ├── manifest_jaffle_bigquery.json
│ │ │ └── manifest_jaffle_duckdb.json
│ ├── excel
│ │ ├── shipments-odcs.xlsx
│ │ └── shipments-odcs.yaml
│ ├── export
│ │ ├── datacontract.html
│ │ ├── datacontract.yaml
│ │ ├── datacontract_nested.yaml
│ │ ├── datacontract_no_model_type.yaml
│ │ ├── datacontract_s3.yaml
│ │ └── rdf
│ │ │ ├── datacontract-complex.yaml
│ │ │ └── datacontract.yaml
│ ├── gcs-json-remote
│ │ ├── data
│ │ │ ├── README.md
│ │ │ └── inventory
│ │ │ │ └── year=2022
│ │ │ │ ├── month=04
│ │ │ │ └── day=20
│ │ │ │ │ └── hour=00
│ │ │ │ │ ├── inventory+0+0001327496.json
│ │ │ │ │ ├── inventory+0+0001328496.json
│ │ │ │ │ ├── inventory+0+0001329496.json
│ │ │ │ │ └── inventory+0+0001330496.json
│ │ │ │ └── month=05
│ │ │ │ └── day=04
│ │ │ │ └── hour=00
│ │ │ │ ├── inventory+0+0002657902.json
│ │ │ │ ├── inventory+0+0002658902.json
│ │ │ │ └── inventory+0+0002659902.json
│ │ └── datacontract.yaml
│ ├── glue
│ │ ├── datacontract-empty-model.yaml
│ │ └── datacontract.yaml
│ ├── great-expectations
│ │ ├── datacontract.yaml
│ │ ├── datacontract_missing_quality_file.yaml
│ │ ├── datacontract_quality_column.yaml
│ │ ├── datacontract_quality_file.yaml
│ │ ├── datacontract_quality_yaml.yaml
│ │ └── quality.json
│ ├── iceberg
│ │ ├── invalid_schema.json
│ │ ├── nested_schema.json
│ │ └── simple_schema.json
│ ├── import
│ │ ├── football-datacontract.yml
│ │ ├── football.json
│ │ ├── football_deeply_nested_no_required.json
│ │ ├── football_deeply_nested_no_required_datacontract.yml
│ │ ├── orders.json
│ │ ├── orders_union-types.json
│ │ └── orders_union-types_datacontract.yml
│ ├── junit
│ │ ├── data
│ │ │ └── somedata.csv
│ │ └── datacontract.yaml
│ ├── kafka-avro-remote
│ │ └── datacontract.yaml
│ ├── kafka-json-remote
│ │ └── datacontract.yaml
│ ├── kafka
│ │ ├── data
│ │ │ └── messages.json
│ │ └── datacontract.yaml
│ ├── lint
│ │ ├── custom_datacontract.schema.json
│ │ ├── custom_datacontract.yaml
│ │ ├── datacontract_csv_lint_base.yaml
│ │ ├── datacontract_quality_schema.yaml
│ │ ├── datacontract_unknown_model.yaml
│ │ ├── invalid_datacontract.yaml
│ │ ├── valid_datacontract.yaml
│ │ ├── valid_datacontract_ref.yaml
│ │ └── valid_datacontract_references.yaml
│ ├── local-delta
│ │ ├── data
│ │ │ ├── line_items
│ │ │ │ ├── 0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet
│ │ │ │ └── _delta_log
│ │ │ │ │ └── 00000000000000000000.json
│ │ │ └── orders
│ │ │ │ ├── 0-5014bd96-6666-482e-bec9-d02a43a78cfb-0.parquet
│ │ │ │ └── _delta_log
│ │ │ │ └── 00000000000000000000.json
│ │ ├── datacontract.yaml
│ │ └── helper
│ │ │ └── create_delta_files.py
│ ├── local-json-complex
│ │ ├── data
│ │ │ └── sts_data.json
│ │ └── datacontract.yaml
│ ├── local-json
│ │ ├── data
│ │ │ ├── nested_types.json
│ │ │ └── verbraucherpreisindex.json
│ │ ├── datacontract.json
│ │ └── datacontract.yaml
│ ├── markdown
│ │ └── export
│ │ │ ├── datacontract.yaml
│ │ │ └── expected.md
│ ├── odcs_v3
│ │ ├── adventureworks.datacontract.yml
│ │ ├── adventureworks.odcs.yaml
│ │ ├── full-example.datacontract.yml
│ │ └── full-example.odcs.yaml
│ ├── parquet
│ │ ├── data
│ │ │ ├── array.parquet
│ │ │ ├── bigint.parquet
│ │ │ ├── blob.parquet
│ │ │ ├── boolean.parquet
│ │ │ ├── combined.parquet
│ │ │ ├── combined_no_time.parquet
│ │ │ ├── date.parquet
│ │ │ ├── decimal.parquet
│ │ │ ├── double.parquet
│ │ │ ├── float.parquet
│ │ │ ├── integer.parquet
│ │ │ ├── list.parquet
│ │ │ ├── map.parquet
│ │ │ ├── string.parquet
│ │ │ ├── struct.parquet
│ │ │ ├── time.parquet
│ │ │ ├── timestamp.parquet
│ │ │ └── timestamp_ntz.parquet
│ │ ├── datacontract.yaml
│ │ ├── datacontract_array.yaml
│ │ ├── datacontract_bigint.yaml
│ │ ├── datacontract_binary.yaml
│ │ ├── datacontract_boolean.yaml
│ │ ├── datacontract_date.yaml
│ │ ├── datacontract_decimal.yaml
│ │ ├── datacontract_double.yaml
│ │ ├── datacontract_float.yaml
│ │ ├── datacontract_integer.yaml
│ │ ├── datacontract_invalid.yaml
│ │ ├── datacontract_map.yaml
│ │ ├── datacontract_string.yaml
│ │ ├── datacontract_struct.yaml
│ │ ├── datacontract_timestamp.yaml
│ │ ├── datacontract_timestamp_ntz.yaml
│ │ └── helper
│ │ │ └── create_parquet_files.py
│ ├── postgres-export
│ │ ├── data
│ │ │ └── data.sql
│ │ └── datacontract.yaml
│ ├── postgres
│ │ ├── data
│ │ │ ├── data.sql
│ │ │ ├── data_case_sensitive.sql
│ │ │ └── data_constraints.sql
│ │ ├── datacontract.yaml
│ │ ├── datacontract_case_sensitive.yaml
│ │ ├── datacontract_servicelevels.yaml
│ │ └── odcs.yaml
│ ├── protobuf
│ │ ├── data
│ │ │ └── sample_data.proto3.data
│ │ └── datacontract.yaml
│ ├── quality
│ │ ├── data
│ │ │ ├── data.invalid.sql
│ │ │ └── data.valid.sql
│ │ └── datacontract.yaml
│ ├── s3-csv
│ │ ├── data
│ │ │ └── sample_data.csv
│ │ └── datacontract.yaml
│ ├── s3-delta
│ │ ├── data
│ │ │ └── orders.delta
│ │ │ │ ├── 0-66aaa7ef-36e3-4985-9359-72874e273705-0.parquet
│ │ │ │ └── _delta_log
│ │ │ │ └── 00000000000000000000.json
│ │ ├── datacontract.yaml
│ │ └── helper
│ │ │ └── create_delta_files.py
│ ├── s3-json-complex
│ │ ├── data
│ │ │ └── feed.json
│ │ └── datacontract.yaml
│ ├── s3-json-multiple-models
│ │ ├── data
│ │ │ ├── line_items
│ │ │ │ └── line_items-1.json
│ │ │ └── orders
│ │ │ │ └── orders-1.json
│ │ ├── datacontract.yaml
│ │ └── v2
│ │ │ ├── line_items
│ │ │ └── line_items-1.json
│ │ │ └── orders
│ │ │ └── orders-1.json
│ ├── s3-json-remote
│ │ └── datacontract.yaml
│ ├── s3-json
│ │ ├── data
│ │ │ └── inventory
│ │ │ │ └── year=2022
│ │ │ │ ├── month=04
│ │ │ │ └── day=20
│ │ │ │ │ └── hour=00
│ │ │ │ │ ├── inventory+0+0001327496.json
│ │ │ │ │ ├── inventory+0+0001328496.json
│ │ │ │ │ ├── inventory+0+0001329496.json
│ │ │ │ │ └── inventory+0+0001330496.json
│ │ │ │ └── month=05
│ │ │ │ └── day=04
│ │ │ │ └── hour=00
│ │ │ │ ├── inventory+0+0002657902.json
│ │ │ │ ├── inventory+0+0002658902.json
│ │ │ │ └── inventory+0+0002659902.json
│ │ └── datacontract.yaml
│ ├── snowflake
│ │ └── datacontract.yaml
│ ├── sodacl
│ │ ├── checks.yaml
│ │ └── datacontract.yaml
│ ├── spark
│ │ ├── export
│ │ │ └── datacontract.yaml
│ │ └── import
│ │ │ ├── users_datacontract_desc.yml
│ │ │ └── users_datacontract_no_desc.yml
│ ├── spec
│ │ ├── datacontract_aliases.yaml
│ │ └── datacontract_fields_field.yaml
│ ├── sqlserver
│ │ ├── data
│ │ │ └── data.sql
│ │ ├── datacontract.yaml
│ │ └── import
│ │ │ └── ddl.sql
│ └── trino
│ │ ├── data
│ │ ├── data.sql
│ │ └── table.sql
│ │ └── datacontract.yaml
├── test_api.py
├── test_breaking.py
├── test_catalog.py
├── test_changelog.py
├── test_cli.py
├── test_data_contract_checks.py
├── test_data_contract_specification.py
├── test_description_linter.py
├── test_documentation_linter.py
├── test_download_datacontract_file.py
├── test_duckdb_json.py
├── test_export_avro.py
├── test_export_avro_idl.py
├── test_export_bigquery.py
├── test_export_complex_data_contract.py
├── test_export_custom.py
├── test_export_custom_exporter.py
├── test_export_data_caterer.py
├── test_export_dbml.py
├── test_export_dbt_models.py
├── test_export_dbt_sources.py
├── test_export_dbt_staging_sql.py
├── test_export_go.py
├── test_export_great_expectations.py
├── test_export_html.py
├── test_export_iceberg.py
├── test_export_jsonschema.py
├── test_export_markdown.py
├── test_export_mermaid.py
├── test_export_odcs_v3.py
├── test_export_protobuf.py
├── test_export_pydantic.py
├── test_export_rdf.py
├── test_export_sodacl.py
├── test_export_spark.py
├── test_export_sql.py
├── test_export_sql_query.py
├── test_export_sqlalchemy.py
├── test_export_terraform.py
├── test_field_constraint_linter.py
├── test_field_pattern_linter.py
├── test_field_reference_linter.py
├── test_import_avro.py
├── test_import_bigquery.py
├── test_import_csv.py
├── test_import_dbml.py
├── test_import_dbt.py
├── test_import_excel.py
├── test_import_glue.py
├── test_import_iceberg.py
├── test_import_jsonschema.py
├── test_import_odcs_v3.py
├── test_import_parquet.py
├── test_import_protobuf.py
├── test_import_spark.py
├── test_import_sql_postgres.py
├── test_import_sql_sqlserver.py
├── test_import_unity_file.py
├── test_integration_datameshmanager.py
├── test_lint.py
├── test_notice_period_linter.py
├── test_resolve.py
├── test_roundtrip_jsonschema.py
├── test_spec_fields_field.py
├── test_spec_ref.py
├── test_test_azure_remote.py
├── test_test_bigquery.py
├── test_test_databricks.py
├── test_test_dataframe.py
├── test_test_delta.py
├── test_test_gcs_json_remote.py
├── test_test_kafka.py
├── test_test_kafka_remote.py
├── test_test_local_json.py
├── test_test_output_junit.py
├── test_test_parquet.py
├── test_test_postgres.py
├── test_test_quality.py
├── test_test_s3_csv.py
├── test_test_s3_delta.py
├── test_test_s3_json.py
├── test_test_s3_json_complex.py
├── test_test_s3_json_multiple_models.py
├── test_test_s3_json_remote.py
├── test_test_snowflake.py
├── test_test_sqlserver.py
└── test_test_trino.py
└── update_help.py
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | end_of_line = lf
6 | indent_size = 2
7 | indent_style = space
8 | insert_final_newline = false
9 | max_line_length = 100
10 | tab_width = 2
11 |
12 | [{*.py,*.pyw}]
13 | indent_size = 4
14 | max_line_length = 120
15 | tab_width = 4
16 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | - [ ] Tests pass
2 | - [ ] ruff format
3 | - [ ] README.md updated (if relevant)
4 | - [ ] CHANGELOG.md entry added
5 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/astral-sh/ruff-pre-commit
3 | # Ruff version.
4 | rev: v0.4.7
5 | hooks:
6 | # Run the linter.
7 | - id: ruff
8 | args: [ --fix ]
9 | # Run the formatter.
10 | - id: ruff-format
--------------------------------------------------------------------------------
/.pre-commit-hooks.yaml:
--------------------------------------------------------------------------------
1 | - id: datacontract-lint
2 | name: Data Contract Linter
3 | description: This hook lint the data contract.
4 | entry: datacontract lint
5 | files: "datacontract*.yaml"
6 | language: python
7 | additional_dependencies: ['.[all]']
8 | types: [yaml]
9 |
10 | - id: datacontract-test
11 | name: Data Contract Tester
12 | description: This hook test the data contract.
13 | entry: datacontract test
14 | files: "datacontract*.yaml"
15 | language: python
16 | additional_dependencies: ['.[all]']
17 | types: [yaml]
18 |
--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | cli.datacontract.com
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11-bullseye
2 |
3 | # Setting PYTHONUNBUFFERED to a non-empty value different from 0 ensures that the python output i.e.
4 | # the stdout and stderr streams are sent straight to terminal (e.g. your container log) without
5 | # being first buffered and that you can see the output of your application in real time.
6 | ENV PYTHONUNBUFFERED=1
7 |
8 | # Compiling Python source files to bytecode is typically desirable for production images as it tends
9 | # to improve startup time (at the cost of increased installation time).
10 | ENV UV_COMPILE_BYTECODE=1
11 |
12 | # install uv
13 | COPY --from=ghcr.io/astral-sh/uv:0.6.9 /uv /uvx /bin/
14 |
15 | # copy resources
16 | COPY pyproject.toml /app/.
17 | COPY MANIFEST.in /app/.
18 | COPY datacontract/ /app/datacontract/
19 |
20 | # install requirements
21 | RUN cd /app && uv pip --no-cache-dir install --system ".[all]"
22 |
23 | RUN mkdir -p /home/datacontract
24 | WORKDIR /home/datacontract
25 |
26 | ENTRYPOINT ["datacontract"]
27 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include datacontract/templates/style/output.css
2 | recursive-include datacontract/templates/ **/*.html
3 | recursive-include datacontract/schemas/ **/*.json **/*.yaml
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | plugins:
2 | - jekyll-sitemap
3 | markdown: kramdown
4 | name: "Data Contract CLI"
5 | title: null
6 |
--------------------------------------------------------------------------------
/datacontract/__init__.py:
--------------------------------------------------------------------------------
1 | # Configuration so that yaml.safe_dump dumps strings with line breaks with yaml literal |
2 | import yaml
3 |
4 | yaml.SafeDumper.org_represent_str = yaml.SafeDumper.represent_str
5 |
6 |
7 | def repr_str(dumper, data):
8 | if "\n" in data:
9 | return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
10 | return dumper.org_represent_str(data)
11 |
12 |
13 | yaml.add_representer(str, repr_str, Dumper=yaml.SafeDumper)
14 |
--------------------------------------------------------------------------------
/datacontract/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/engines/__init__.py
--------------------------------------------------------------------------------
/datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py:
--------------------------------------------------------------------------------
1 | from datacontract.model.data_contract_specification import DataContractSpecification
2 | from datacontract.model.exceptions import DataContractException
3 |
4 |
5 | def check_that_datacontract_contains_valid_server_configuration(
6 | data_contract: DataContractSpecification, server_name: str | None
7 | ):
8 | if data_contract.servers is None or len(data_contract.servers) == 0:
9 | raise DataContractException(
10 | type="lint",
11 | name="Check that data contract contains valid server configuration",
12 | result="warning",
13 | reason="Servers block is missing. Skip executing tests.",
14 | engine="datacontract",
15 | )
16 | if len(data_contract.servers) > 1 and server_name is None:
17 | raise DataContractException(
18 | type="lint",
19 | name="Check that data contract contains valid server configuration",
20 | result="warning",
21 | reason="Data contract contains multiple server configurations. Specify the server you want to test. Skip executing tests.",
22 | engine="datacontract",
23 | )
24 | if server_name is not None and server_name not in data_contract.servers:
25 | raise DataContractException(
26 | type="lint",
27 | name="Check that data contract contains valid servers configuration",
28 | result="warning",
29 | reason=f"Cannot find server '{server_name}' in the data contract servers configuration. Skip executing tests.",
30 | engine="datacontract",
31 | )
32 |
33 |
34 | # TODO check for server.type, if all required fields are present
35 |
--------------------------------------------------------------------------------
/datacontract/engines/datacontract/check_that_datacontract_file_exists.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from datacontract.model.run import Check, Run
4 |
5 |
6 | def check_that_datacontract_file_exists(run: Run, file_path: str):
7 | if file_path is None:
8 | return
9 | if file_path.startswith("http://") or file_path.startswith("https://"):
10 | return
11 | if not os.path.exists(file_path):
12 | run.checks.append(
13 | Check(
14 | type="lint",
15 | name="Check that data contract file exists",
16 | result="failed",
17 | reason=f"The file '{file_path}' does not exist.",
18 | engine="datacontract-cli",
19 | )
20 | )
21 | raise Exception(f"The file '{file_path}' does not exist.")
22 |
--------------------------------------------------------------------------------
/datacontract/engines/fastjsonschema/s3/s3_read_files.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | from datacontract.model.exceptions import DataContractException
5 | from datacontract.model.run import ResultEnum
6 |
7 |
8 | def yield_s3_files(s3_endpoint_url, s3_location):
9 | fs = s3_fs(s3_endpoint_url)
10 | files = fs.glob(s3_location)
11 | for file in files:
12 | with fs.open(file) as f:
13 | logging.info(f"Downloading file {file}")
14 | yield f.read()
15 |
16 |
17 | def s3_fs(s3_endpoint_url):
18 | try:
19 | import s3fs
20 | except ImportError as e:
21 | raise DataContractException(
22 | type="schema",
23 | result=ResultEnum.failed,
24 | name="s3 extra missing",
25 | reason="Install the extra s3 to use s3",
26 | engine="datacontract",
27 | original_exception=e,
28 | )
29 |
30 | aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
31 | aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
32 | aws_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
33 | return s3fs.S3FileSystem(
34 | key=aws_access_key_id,
35 | secret=aws_secret_access_key,
36 | token=aws_session_token,
37 | anon=aws_access_key_id is None,
38 | client_kwargs={"endpoint_url": s3_endpoint_url},
39 | )
40 |
--------------------------------------------------------------------------------
/datacontract/engines/soda/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/engines/soda/__init__.py
--------------------------------------------------------------------------------
/datacontract/engines/soda/connections/bigquery.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 |
5 |
6 | # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
7 | def to_bigquery_soda_configuration(server):
8 | # with service account key, using an external json file
9 |
10 | # check for our own environment variable first
11 | account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
12 | if account_info is None:
13 | # but as a fallback look for the default google one
14 | account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
15 |
16 | soda_configuration = {
17 | f"data_source {server.type}": {
18 | "type": "bigquery",
19 | "account_info_json_path": account_info,
20 | "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
21 | "project_id": server.project,
22 | "dataset": server.dataset,
23 | }
24 | }
25 |
26 | soda_configuration_str = yaml.dump(soda_configuration)
27 | return soda_configuration_str
28 |
--------------------------------------------------------------------------------
/datacontract/engines/soda/connections/databricks.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 |
5 |
6 | def to_databricks_soda_configuration(server):
7 | token = os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
8 | if token is None:
9 | raise ValueError("DATACONTRACT_DATABRICKS_TOKEN environment variable is not set")
10 | http_path = os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH")
11 | host = server.host
12 | if host is None:
13 | host = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME")
14 | if host is None:
15 | raise ValueError("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set")
16 | soda_configuration = {
17 | f"data_source {server.type}": {
18 | "type": "spark",
19 | "method": "databricks",
20 | "host": host,
21 | "catalog": server.catalog,
22 | "schema": server.schema_,
23 | "http_path": http_path,
24 | "token": token,
25 | }
26 | }
27 |
28 | soda_configuration_str = yaml.dump(soda_configuration)
29 | return soda_configuration_str
30 |
--------------------------------------------------------------------------------
/datacontract/engines/soda/connections/postgres.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 |
5 |
6 | def to_postgres_soda_configuration(server):
7 | # with service account key, using an external json file
8 | soda_configuration = {
9 | f"data_source {server.type}": {
10 | "type": "postgres",
11 | "host": server.host,
12 | "port": str(server.port),
13 | "username": os.getenv("DATACONTRACT_POSTGRES_USERNAME"),
14 | "password": os.getenv("DATACONTRACT_POSTGRES_PASSWORD"),
15 | "database": server.database,
16 | "schema": server.schema_,
17 | }
18 | }
19 |
20 | soda_configuration_str = yaml.dump(soda_configuration)
21 | return soda_configuration_str
22 |
--------------------------------------------------------------------------------
/datacontract/engines/soda/connections/snowflake.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 |
5 |
6 | def to_snowflake_soda_configuration(server):
7 | prefix = "DATACONTRACT_SNOWFLAKE_"
8 | snowflake_soda_params = {k.replace(prefix, "").lower(): v for k, v in os.environ.items() if k.startswith(prefix)}
9 |
10 | # backward compatibility
11 | if "connection_timeout" not in snowflake_soda_params:
12 | snowflake_soda_params["connection_timeout"] = "5" # minutes
13 |
14 | soda_configuration = {
15 | f"data_source {server.type}": {
16 | "type": "snowflake",
17 | "account": server.account,
18 | "database": server.database,
19 | "schema": server.schema_,
20 | **snowflake_soda_params,
21 | }
22 | }
23 | soda_configuration_str = yaml.dump(soda_configuration)
24 | return soda_configuration_str
25 |
--------------------------------------------------------------------------------
/datacontract/engines/soda/connections/sqlserver.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 |
5 | from datacontract.model.data_contract_specification import Server
6 |
7 |
8 | def to_sqlserver_soda_configuration(server: Server) -> str:
9 | """Serialize server config to soda configuration.
10 |
11 |
12 | ### Example:
13 | type: sqlserver
14 | host: host
15 | port: '1433'
16 | username: simple
17 | password: simple_pass
18 | database: database
19 | schema: dbo
20 | trusted_connection: false
21 | encrypt: false
22 | trust_server_certificate: false
23 | driver: ODBC Driver 18 for SQL Server
24 | """
25 | # with service account key, using an external json file
26 | soda_configuration = {
27 | f"data_source {server.type}": {
28 | "type": "sqlserver",
29 | "host": server.host,
30 | "port": str(server.port),
31 | "username": os.getenv("DATACONTRACT_SQLSERVER_USERNAME", ""),
32 | "password": os.getenv("DATACONTRACT_SQLSERVER_PASSWORD", ""),
33 | "database": server.database,
34 | "schema": server.schema_,
35 | "trusted_connection": os.getenv("DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION", False),
36 | "trust_server_certificate": os.getenv("DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE", False),
37 | "encrypt": os.getenv("DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION", True),
38 | "driver": server.driver,
39 | }
40 | }
41 |
42 | soda_configuration_str = yaml.dump(soda_configuration)
43 | return soda_configuration_str
44 |
--------------------------------------------------------------------------------
/datacontract/engines/soda/connections/trino.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 |
5 |
6 | def to_trino_soda_configuration(server):
7 | password = os.getenv("DATACONTRACT_TRINO_PASSWORD")
8 | username = os.getenv("DATACONTRACT_TRINO_USERNAME")
9 |
10 | data_source = {
11 | "type": "trino",
12 | "host": server.host,
13 | "port": str(server.port),
14 | "username": username,
15 | "password": password,
16 | "catalog": server.catalog,
17 | "schema": server.schema_,
18 | }
19 |
20 | if password is None or password == "":
21 | data_source["auth_type"] = "NoAuthentication" # default is BasicAuthentication
22 |
23 | soda_configuration = {f"data_source {server.type}": data_source}
24 |
25 | soda_configuration_str = yaml.dump(soda_configuration)
26 | return soda_configuration_str
27 |
--------------------------------------------------------------------------------
/datacontract/export/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/export/__init__.py
--------------------------------------------------------------------------------
/datacontract/export/custom_converter.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from jinja2 import Environment, FileSystemLoader
4 |
5 | from datacontract.export.exporter import Exporter
6 | from datacontract.model.data_contract_specification import (
7 | DataContractSpecification,
8 | Model,
9 | )
10 |
11 |
12 | class CustomExporter(Exporter):
13 | """Exporter implementation for converting data contracts to Markdown."""
14 |
15 | def export(
16 | self,
17 | data_contract: DataContractSpecification,
18 | model: Model,
19 | server: str,
20 | sql_server_type: str,
21 | export_args: dict,
22 | ) -> str:
23 | """Exports a data contract to custom format with Jinja."""
24 | template = export_args.get("template")
25 | if template is None:
26 | raise RuntimeError("Export to custom requires template argument.")
27 |
28 | return to_custom(data_contract, template)
29 |
30 |
31 | def to_custom(data_contract: DataContractSpecification, template_path: Path) -> str:
32 | template = get_template(template_path)
33 | rendered_sql = template.render(data_contract=data_contract)
34 | return rendered_sql
35 |
36 |
37 | def get_template(path: Path):
38 | abosolute_path = Path(path).resolve()
39 | env = Environment(loader=FileSystemLoader(str(abosolute_path.parent)))
40 | return env.get_template(path.name)
41 |
--------------------------------------------------------------------------------
/datacontract/export/dcs_exporter.py:
--------------------------------------------------------------------------------
1 | from datacontract.export.exporter import Exporter
2 |
3 |
4 | class DcsExporter(Exporter):
5 | def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
6 | return data_contract.to_yaml()
7 |
--------------------------------------------------------------------------------
/datacontract/export/pandas_type_converter.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for converting data contract field types to corresponding pandas data types.
3 | """
4 |
5 | from datacontract.model.data_contract_specification import Field
6 |
7 |
8 | def convert_to_pandas_type(field: Field) -> str:
9 | """
10 | Convert a data contract field type to the equivalent pandas data type.
11 |
12 | Parameters:
13 | ----------
14 | field : Field
15 | A Field object containing metadata about the data type of the field.
16 |
17 | Returns:
18 | -------
19 | str
20 | The corresponding pandas data type as a string.
21 | """
22 | field_type = field.type
23 |
24 | if field_type in ["string", "varchar", "text"]:
25 | return "str"
26 | if field_type in ["integer", "int"]:
27 | return "int32"
28 | if field_type == "long":
29 | return "int64"
30 | if field_type == "float":
31 | return "float32"
32 | if field_type in ["number", "decimal", "numeric", "double"]:
33 | return "float64"
34 | if field_type == "boolean":
35 | return "bool"
36 | if field_type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
37 | return "datetime64[ns]"
38 | if field_type == "bytes":
39 | return "object"
40 | return "object"
41 |
--------------------------------------------------------------------------------
/datacontract/export/sodacl_converter.py:
--------------------------------------------------------------------------------
1 | import yaml
2 |
3 | from datacontract.engines.data_contract_checks import create_checks
4 | from datacontract.export.exporter import Exporter
5 | from datacontract.model.data_contract_specification import DataContractSpecification, Server
6 | from datacontract.model.run import Run
7 |
8 |
9 | class SodaExporter(Exporter):
10 | def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
11 | run = Run.create_run()
12 | server = get_server(data_contract, server)
13 | run.checks.extend(create_checks(data_contract, server))
14 | return to_sodacl_yaml(run)
15 |
16 |
17 | def to_sodacl_yaml(run: Run) -> str:
18 | sodacl_dict = {}
19 | for run_check in run.checks:
20 | if run_check.engine != "soda" or run_check.language != "sodacl":
21 | continue
22 | check_yaml_str = run_check.implementation
23 | check_yaml_dict = yaml.safe_load(check_yaml_str)
24 | for key, value in check_yaml_dict.items():
25 | if key in sodacl_dict:
26 | if isinstance(sodacl_dict[key], list) and isinstance(value, list):
27 | sodacl_dict[key].extend(value)
28 | else:
29 | sodacl_dict[key].update(value)
30 | else:
31 | sodacl_dict[key] = value
32 | return yaml.dump(sodacl_dict)
33 |
34 |
35 | def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None:
36 | if server_name is None:
37 | return None
38 | return data_contract_specification.servers.get(server_name)
39 |
--------------------------------------------------------------------------------
/datacontract/imports/importer.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from enum import Enum
3 |
4 | from datacontract_specification.model import DataContractSpecification
5 | from open_data_contract_standard.model import OpenDataContractStandard
6 |
7 |
8 | class Importer(ABC):
9 | def __init__(self, import_format) -> None:
10 | self.import_format = import_format
11 |
12 | @abstractmethod
13 | def import_source(
14 | self,
15 | data_contract_specification: DataContractSpecification | OpenDataContractStandard,
16 | source: str,
17 | import_args: dict,
18 | ) -> DataContractSpecification | OpenDataContractStandard:
19 | pass
20 |
21 |
22 | class ImportFormat(str, Enum):
23 | sql = "sql"
24 | avro = "avro"
25 | dbt = "dbt"
26 | dbml = "dbml"
27 | glue = "glue"
28 | jsonschema = "jsonschema"
29 | bigquery = "bigquery"
30 | odcs = "odcs"
31 | unity = "unity"
32 | spark = "spark"
33 | iceberg = "iceberg"
34 | parquet = "parquet"
35 | csv = "csv"
36 | protobuf = "protobuf"
37 | excel = "excel"
38 |
39 | @classmethod
40 | def get_supported_formats(cls):
41 | return list(map(lambda c: c.value, cls))
42 |
43 |
44 | class Spec(str, Enum):
45 | datacontract_specification = "datacontract_specification"
46 | odcs = "odcs"
47 |
48 | @classmethod
49 | def get_supported_types(cls):
50 | return list(map(lambda c: c.value, cls))
51 |
--------------------------------------------------------------------------------
/datacontract/init/init_template.py:
--------------------------------------------------------------------------------
1 | import importlib.resources as resources
2 | import logging
3 |
4 | import requests
5 |
6 | DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.1.0.init.yaml"
7 |
8 |
9 | def get_init_template(location: str = None) -> str:
10 | if location is None:
11 | logging.info("Use default bundled template " + DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
12 | schemas = resources.files("datacontract")
13 | template = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
14 | with template.open("r") as file:
15 | return file.read()
16 | elif location.startswith("http://") or location.startswith("https://"):
17 | return requests.get(location).text
18 | else:
19 | with open(location, "r") as file:
20 | return file.read()
21 |
--------------------------------------------------------------------------------
/datacontract/lint/files.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from datacontract.model.exceptions import DataContractException
4 |
5 |
6 | def read_file(path):
7 | if not os.path.exists(path):
8 | raise DataContractException(
9 | type="lint",
10 | name=f"Reading data contract from {path}",
11 | reason=f"The file '{path}' does not exist.",
12 | engine="datacontract",
13 | result="error",
14 | )
15 | with open(path, "r") as file:
16 | file_content = file.read()
17 | return file_content
18 |
--------------------------------------------------------------------------------
/datacontract/lint/linters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/lint/linters/__init__.py
--------------------------------------------------------------------------------
/datacontract/lint/linters/description_linter.py:
--------------------------------------------------------------------------------
1 | from datacontract.model.data_contract_specification import DataContractSpecification
2 |
3 | from ..lint import Linter, LinterResult
4 |
5 |
6 | class DescriptionLinter(Linter):
7 | """Check for a description on contracts, models, model fields, definitions and examples."""
8 |
9 | @property
10 | def name(self) -> str:
11 | return "Objects have descriptions"
12 |
13 | @property
14 | def id(self) -> str:
15 | return "description"
16 |
17 | def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
18 | result = LinterResult()
19 | if not contract.info or not contract.info.description:
20 | result = result.with_error("Contract has empty description.")
21 | for model_name, model in contract.models.items():
22 | if not model.description:
23 | result = result.with_error(f"Model '{model_name}' has empty description.")
24 | for field_name, field in model.fields.items():
25 | if not field.description:
26 | result = result.with_error(f"Field '{field_name}' in model '{model_name}' has empty description.")
27 | for definition_name, definition in contract.definitions.items():
28 | if not definition.description:
29 | result = result.with_error(f"Definition '{definition_name}' has empty description.")
30 | for index, example in enumerate(contract.examples):
31 | if not example.description:
32 | result = result.with_error(f"Example {index + 1} has empty description.")
33 | return result
34 |
--------------------------------------------------------------------------------
/datacontract/lint/linters/field_pattern_linter.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from datacontract.model.data_contract_specification import DataContractSpecification
4 |
5 | from ..lint import Linter, LinterResult
6 |
7 |
8 | class FieldPatternLinter(Linter):
9 | """Checks that all patterns defined for fields are correct Python regex
10 | syntax.
11 |
12 | """
13 |
14 | @property
15 | def name(self):
16 | return "Field pattern is correct regex"
17 |
18 | @property
19 | def id(self) -> str:
20 | return "field-pattern"
21 |
22 | def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
23 | result = LinterResult()
24 | for model_name, model in contract.models.items():
25 | for field_name, field in model.fields.items():
26 | if field.pattern:
27 | try:
28 | re.compile(field.pattern)
29 | except re.error as e:
30 | result = result.with_error(
31 | f"Failed to compile pattern regex '{field.pattern}' for "
32 | f"field '{field_name}' in model '{model_name}': {e.msg}"
33 | )
34 | return result
35 |
--------------------------------------------------------------------------------
/datacontract/lint/resources.py:
--------------------------------------------------------------------------------
1 | from datacontract.lint.files import read_file
2 | from datacontract.lint.urls import fetch_resource
3 |
4 |
5 | def read_resource(location: str) -> str:
6 | """
7 | Read a resource from a given location.
8 |
9 | If the location is a URL, fetch the resource from the web. API-Keys are supported.
10 | Otherwise, read the resource from a local file.
11 |
12 | Args:
13 | location (str): The location of the resource, either a URL or a file path.
14 |
15 | Returns:
16 | str: The content of the resource.
17 | """
18 | if location.startswith("http://") or location.startswith("https://"):
19 | return fetch_resource(location)
20 | else:
21 | return read_file(location)
22 |
--------------------------------------------------------------------------------
/datacontract/model/data_contract_specification/__init__.py:
--------------------------------------------------------------------------------
1 | from datacontract_specification.model import *
2 |
--------------------------------------------------------------------------------
/datacontract/model/exceptions.py:
--------------------------------------------------------------------------------
1 | from datacontract.model.run import ResultEnum
2 |
3 |
4 | class DataContractException(Exception):
5 | """Exception raised for errors in the execution of a run.
6 |
7 | Attributes:
8 | type (str): The type of the error.
9 | name (str): The name associated with the error.
10 | model (str): The model involved in the error.
11 | reason (str): Explanation of the error.
12 | engine (str): The engine where the error occurred.
13 | original_exception (Exception, optional): Original exception that led to this error.
14 | message (str): General message for the error.
15 | """
16 |
17 | def __init__(
18 | self,
19 | type,
20 | name,
21 | reason,
22 | engine="datacontract",
23 | model=None,
24 | original_exception=None,
25 | result: ResultEnum = ResultEnum.failed,
26 | message="Run operation failed",
27 | ):
28 | self.type = type
29 | self.name = name
30 | self.model = model
31 | self.reason = reason
32 | self.result = result
33 | self.engine = engine
34 | self.original_exception = original_exception
35 | self.message = message
36 | super().__init__(
37 | f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}"
38 | )
39 |
--------------------------------------------------------------------------------
/datacontract/model/odcs.py:
--------------------------------------------------------------------------------
1 | def is_open_data_contract_standard(odcs: dict) -> bool:
2 | """
3 | Check if the given dictionary is an OpenDataContractStandard.
4 |
5 | Args:
6 | odcs (dict): The dictionary to check.
7 |
8 | Returns:
9 | bool: True if the dictionary is an OpenDataContractStandard, False otherwise.
10 | """
11 | return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3")
12 |
--------------------------------------------------------------------------------
/datacontract/output/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/output/__init__.py
--------------------------------------------------------------------------------
/datacontract/output/output_format.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class OutputFormat(str, Enum):
5 | # json = "json" # coming soon
6 | junit = "junit"
7 |
8 | @classmethod
9 | def get_supported_formats(cls):
10 | return list(map(lambda c: c.value, cls))
11 |
--------------------------------------------------------------------------------
/datacontract/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontract/py.typed
--------------------------------------------------------------------------------
/datacontract/schemas/download:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | curl -o datacontract-1.1.0.init.yaml https://datacontract.com/datacontract.init.yaml
5 | curl -o datacontract-1.1.0.schema.json https://datacontract.com/datacontract.schema.json
6 | curl -o odcs-3.0.1.schema.json https://raw.githubusercontent.com/bitol-io/open-data-contract-standard/refs/heads/main/schema/odcs-json-schema-v3.0.1.json
7 |
8 |
--------------------------------------------------------------------------------
/datacontract/templates/partials/definition.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | {{ definition_name }}
11 | {{ definition.description }}
12 | |
13 |
14 |
15 |
16 |
17 | {{ render_partial('partials/model_field.html', nested = False, field_name=definition_name,
18 | field = definition, level = 0) }}
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/datacontract/templates/partials/example.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | {{ example.model }}
11 | {{ example.type }}
12 | {{ example.description }}
13 | |
14 |
15 |
16 |
17 |
18 |
19 | {{ example.data }}
20 | |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/datacontract/templates/style/generate-style:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # npm install --global tailwindcss
4 | tailwindcss --input input.css --output output.css
5 |
--------------------------------------------------------------------------------
/datacontract/templates/style/input.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
--------------------------------------------------------------------------------
/datacontract/templates/style/tailwind.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | content: [
3 | "../datacontract.html",
4 | "../datacontract_odcs.html",
5 | "../index.html",
6 | "../partials/model_field.html",
7 | "../partials/server.html",
8 | "../partials/definition.html",
9 | "../partials/datacontract_information.html",
10 | "../partials/datacontract_servicelevels.html",
11 | "../partials/datacontract_terms.html",
12 | "../partials/example.html",
13 | "../partials/quality.html",
14 | ],
15 | theme: { },
16 | plugins: [],
17 | }
--------------------------------------------------------------------------------
/datacontractcli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/datacontractcli.png
--------------------------------------------------------------------------------
/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/favicon.png
--------------------------------------------------------------------------------
/release:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Release steps:
5 | # 1. Update release version in pyproject.toml
6 | # 2. Update CHANGELOG.md header
7 | # 3. Run ./release
8 | # 4. Update release notes in Github
9 |
10 | # pip install toml-cli
11 | VERSION=$(uvx --from toml-cli toml get --toml-path pyproject.toml project.version)
12 | TAG_VERSION=v$VERSION
13 |
14 | echo "Checking that everything is committed"
15 | git diff --exit-code
16 | echo "Tagging $TAG_VERSION"
17 | git tag $TAG_VERSION
18 | echo "Pushing $TAG_VERSION"
19 | git push origin $TAG_VERSION
20 | echo "Pushed $TAG_VERSION"
21 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | @pytest.fixture(autouse=True)
5 | def change_test_dir(request, monkeypatch):
6 | monkeypatch.chdir(request.fspath.dirname)
7 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/data/arrays.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "fields": [
3 | {
4 | "name": "orderid",
5 | "type": "int"
6 | },
7 | {
8 | "name": "addresses",
9 | "doc": "Addresses of a customer",
10 | "type": {
11 | "type": "array",
12 | "items": {
13 | "name": "address",
14 | "type": "record",
15 | "fields": [
16 | {
17 | "name": "city",
18 | "type": "string"
19 | },
20 | {
21 | "name": "state",
22 | "type": "string"
23 | },
24 | {
25 | "name": "zipcode",
26 | "type": "long"
27 | }
28 | ]
29 | }
30 | }
31 | },
32 | {
33 | "name": "nestedArrays",
34 | "doc": "Example schema for an array of arrays",
35 | "type": {
36 | "type": "array",
37 | "items": {
38 | "type": "array",
39 | "items": "int"
40 | }
41 | }
42 | },
43 | {
44 | "name": "nationalities",
45 | "type": [
46 | "null",
47 | {
48 | "type": "array",
49 | "items": {
50 | "type": "string",
51 | "connect.parameters": {
52 | "avro.java.string": "String"
53 | },
54 | "avro.java.string": "String"
55 | }
56 | }
57 | ],
58 | "default": null
59 | }
60 | ],
61 | "name": "orders",
62 | "doc": "My Model",
63 | "type": "record"
64 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/data/logical_types.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "Test",
4 | "namespace": "mynamespace.com",
5 | "fields": [
6 | {
7 | "name": "test_id",
8 | "type": "string",
9 | "doc": "id documentation test"
10 | },
11 | {
12 | "name": "device_id",
13 | "type": "int"
14 | },
15 | {
16 | "name": "test_value",
17 | "type": "double"
18 | },
19 | {
20 | "name": "num_items",
21 | "type": "int"
22 | },
23 | {
24 | "name": "processed_timestamp",
25 | "type": "long",
26 | "doc": "The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29",
27 | "logicalType": "local-timestamp-micros"
28 | },
29 | {
30 | "name": "description",
31 | "type": "string"
32 | },
33 | {
34 | "name": "is_processed",
35 | "type": "boolean",
36 | "default": false
37 | },
38 | {
39 | "name": "some_bytes_decimal",
40 | "type": {
41 | "type": "bytes",
42 | "logicalType": "decimal",
43 | "precision": 25,
44 | "scale": 2
45 | }
46 | }
47 | ]
48 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/data/nested.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "fields": [
3 | {
4 | "default": null,
5 | "name": "fieldA",
6 | "type": [
7 | "null",
8 | "long"
9 | ]
10 | },
11 | {
12 | "default": null,
13 | "name": "fieldB",
14 | "type": [
15 | "null",
16 | {
17 | "fields": [
18 | {
19 | "default": null,
20 | "name": "fieldC",
21 | "type": [
22 | "null",
23 | {
24 | "avro.java.string": "String",
25 | "type": "string"
26 | }
27 | ]
28 | }
29 | ],
30 | "name": "ObjectB",
31 | "type": "record"
32 | }
33 | ]
34 | }
35 | ],
36 | "name": "Doc",
37 | "namespace": "com.xxx",
38 | "type": "record"
39 | }
40 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders
3 | info:
4 | title: Orders
5 | version: 0.0.1
6 | description: Order messages as generated by Confluent Datagen Source Adapter
7 | servers:
8 | production:
9 | type: kafka
10 | host: pkc-7xoy1.eu-central-1.aws.confluent.cloud:9092
11 | topic: orders.avro.v1
12 | format: avro
13 | models:
14 | orders:
15 | type: table
16 | description: My Model
17 | namespace: com.example.checkout
18 | fields:
19 | orderdate:
20 | type: date
21 | description: My Field
22 | order_timestamp:
23 | type: timestamp
24 | delivery_timestamp:
25 | type: timestamp_ntz
26 | orderid:
27 | type: int
28 | itemid:
29 | type: string
30 | orderunits:
31 | type: double
32 | tags:
33 | type: array
34 | items:
35 | type: string
36 | address:
37 | type: object
38 | fields:
39 | city:
40 | type: string
41 | state:
42 | type: string
43 | zipcode:
44 | type: long
45 | quality:
46 | type: SodaCL
47 | specification:
48 | checks for orders:
49 | - row_count >= 5000
50 |
51 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_decimal.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "MySchema",
4 | "fields": [
5 | {
6 | "name": "price",
7 | "type": {
8 | "type": "bytes",
9 | "logicalType": "decimal"
10 | }
11 | },
12 | {
13 | "name": "dewey_decimal",
14 | "type": {
15 | "type": "bytes",
16 | "logicalType": "decimal",
17 | "scale": 2,
18 | "precision": 4
19 | }
20 | },
21 | {
22 | "name": "reading_level",
23 | "type": [
24 | "null",
25 | {
26 | "type": "bytes",
27 | "logicalType": "decimal"
28 | }
29 | ]
30 | },
31 | {
32 | "name": "age",
33 | "type": [
34 | "null",
35 | {
36 | "type": "bytes",
37 | "logicalType": "decimal",
38 | "precision": 3
39 | }
40 | ]
41 | }
42 | ]
43 | }
44 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_decimal.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | MySchema:
8 | fields:
9 | price:
10 | type: decimal
11 | required: true
12 | dewey_decimal:
13 | type: decimal
14 | required: true
15 | precision: 4
16 | scale: 2
17 | reading_level:
18 | type: decimal
19 | required: false
20 | age:
21 | type: decimal
22 | required: false
23 | precision: 3
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_enum.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "MySchema",
4 | "fields": [
5 | {
6 | "name": "color",
7 | "type": {
8 | "type": "enum",
9 | "name": "Color",
10 | "symbols": [
11 | "RED",
12 | "GREEN",
13 | "BLUE",
14 | "UNKNOWN"
15 | ]
16 | }
17 | }
18 | ]
19 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_enum.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | MySchema:
8 | fields:
9 | color:
10 | type: string
11 | title: Color
12 | enum:
13 | - RED
14 | - GREEN
15 | - BLUE
16 | - UNKNOWN
17 | config:
18 | avroType: enum
19 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_logicalType.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "Test",
4 | "namespace": "mynamespace.com",
5 | "fields": [
6 | {"name": "test_id", "type": "string", "doc": "id documentation test"},
7 | {"name": "device_id", "type": "int"},
8 | {"name": "test_value", "type": "double"},
9 | {"name": "num_items", "type": "int"},
10 | {"name": "processed_timestamp",
11 | "type": {
12 | "type": "long",
13 | "logicalType": "local-timestamp-micros"
14 | },
15 | "doc": "The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29"
16 | },
17 | {"name": "description", "type": "string"},
18 | {"name": "is_processed", "type": "boolean",
19 | "default": false}
20 | ]
21 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_logicalType.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | Test:
8 | namespace: mynamespace.com
9 | fields:
10 | test_id:
11 | type: string
12 | required: true
13 | description: id documentation test
14 | device_id:
15 | type: int
16 | required: true
17 | test_value:
18 | type: double
19 | required: true
20 | num_items:
21 | type: int
22 | required: true
23 | processed_timestamp:
24 | type: long
25 | required: true
26 | description: 'The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29'
27 | config:
28 | avroType: long
29 | avroLogicalType: local-timestamp-micros
30 | description:
31 | type: string
32 | required: true
33 | is_processed:
34 | type: boolean
35 | required: true
36 | config:
37 | avroDefault: false
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_field_float.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "row",
4 | "namespace": "com.example",
5 | "fields": [
6 | {
7 | "name": "field_name",
8 | "type": "float"
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_field_float.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: pageviews
3 | info:
4 | title: Pageviews
5 | version: 0.0.1
6 | models:
7 | row:
8 | type: table
9 | namespace: com.example
10 | fields:
11 | field_name:
12 | type: float
13 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_field_map.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "row",
4 | "namespace": "com.example",
5 | "fields": [
6 | {
7 | "name": "field_name",
8 | "type": {
9 | "type": "map",
10 | "values":["string",
11 | "long"
12 | ]
13 | }
14 | }
15 | ]
16 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_field_map.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: pageviews
3 | info:
4 | title: Pageviews
5 | version: 0.0.1
6 | models:
7 | row:
8 | type: table
9 | namespace: com.example
10 | fields:
11 | field_name:
12 | type: map
13 | config:
14 | values: ["string", "long"]
15 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_field_namespace.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "row",
4 | "namespace": "com.example",
5 | "fields": [
6 | {
7 | "name": "field_name",
8 | "type":
9 | {
10 | "type": "record",
11 | "name": "field_name",
12 | "namespace": "com.example",
13 | "fields": [
14 | {
15 | "name": "field",
16 | "type": "string"
17 |
18 | }
19 | ]
20 | }
21 |
22 | }
23 | ]
24 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_field_namespace.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: pageviews
3 | info:
4 | title: Pageviews
5 | version: 0.0.1
6 | models:
7 | row:
8 | type: table
9 | namespace: com.example
10 | fields:
11 | field_name:
12 | type: record
13 | config:
14 | namespace: com.example
15 | fields:
16 | field:
17 | type: string
18 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_logical_type.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | alert:
8 | fields:
9 | currentSelectionLiability:
10 | type: decimal
11 | required: true
12 | precision: 25
13 | scale: 2
14 | raised:
15 | type: timestamp_tz
16 | required: true
17 | selectionSettledTime:
18 | type: timestamp_tz
19 | required: false
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_required.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "pageviews",
4 | "namespace": "com.example.activity",
5 | "fields": [
6 | {
7 | "name": "event_ts",
8 | "type": {
9 | "type": "long",
10 | "logicalType": "local-timestamp-millis"
11 | }
12 | },
13 | {
14 | "name": "correlation_id",
15 | "type": "int"
16 | },
17 | {
18 | "name": "user_guid",
19 | "type": ["null", "string"]
20 | }
21 | ]
22 | }
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/datacontract_test_required.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: pageviews
3 | info:
4 | title: Pageviews
5 | version: 0.0.1
6 | models:
7 | pageviews:
8 | type: table
9 | namespace: com.example.activity
10 | fields:
11 | event_ts:
12 | type: timestamp_ntz
13 | correlation_id:
14 | type: int
15 | required: true
16 | user_guid:
17 | type: string
18 | required: false
19 |
20 |
--------------------------------------------------------------------------------
/tests/fixtures/avro/export/orders_with_datefields.avsc:
--------------------------------------------------------------------------------
1 | {
2 | "type": "record",
3 | "name": "orders",
4 | "doc": "My Model",
5 | "namespace": "com.example.checkout",
6 | "fields": [
7 | {
8 | "name": "orderdate",
9 | "doc": "My Field",
10 | "type": {
11 | "type": "int",
12 | "logicalType": "date"
13 | }
14 | },
15 | {
16 | "name": "order_timestamp",
17 | "type": {
18 | "type": "long",
19 | "logicalType": "timestamp-millis"
20 | }
21 | },
22 | {
23 | "name": "delivery_timestamp",
24 | "type": {
25 | "type": "long",
26 | "logicalType": "local-timestamp-millis"
27 | }
28 | },
29 | {
30 | "name": "orderid",
31 | "type": "int"
32 | },
33 | {
34 | "name": "itemid",
35 | "type": "string"
36 | },
37 | {
38 | "name": "orderunits",
39 | "type": "double"
40 | },
41 | {
42 | "name": "tags",
43 | "type": {
44 | "type": "array",
45 | "items": "string"
46 | }
47 | },
48 | {
49 | "name": "address",
50 | "type": {
51 | "type": "record",
52 | "name": "address",
53 | "fields": [
54 | {
55 | "name": "city",
56 | "type": "string"
57 | },
58 | {
59 | "name": "state",
60 | "type": "string"
61 | },
62 | {
63 | "name": "zipcode",
64 | "type": "long"
65 | }
66 | ]
67 | }
68 | }
69 | ]
70 | }
--------------------------------------------------------------------------------
/tests/fixtures/azure-delta-remote/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: azure
9 | storageAccount: datameshdatabricksdemo
10 | location: abfss://dataproducts/orders_delta/orders.delta
11 | format: delta
12 | models:
13 | orders:
14 | fields:
15 | order_id:
16 | type: varchar
17 | unique: true
18 | required: true
19 | order_timestamp:
20 | required: true
21 | order_total:
22 | type: bigint
23 | required: true
24 |
--------------------------------------------------------------------------------
/tests/fixtures/azure-json-remote/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: azure
9 | location: abfss://datameshdatabricksdemo.dfs.core.windows.net/topics/inventory/year=2022/month=07/day=13/*/inventory+0+000000*.json
10 | format: json
11 | delimiter: new_line
12 | models:
13 | orders:
14 | fields:
15 | updated_at:
16 | type: varchar
17 | available:
18 | type: integer
19 | location:
20 | type: varchar
21 | minLength: 2
22 | maxLength: 2
23 | sku:
24 | type: varchar
25 | quality:
26 | type: SodaCL
27 | specification:
28 | checks for orders:
29 | - row_count >= 5000
--------------------------------------------------------------------------------
/tests/fixtures/azure-parquet-remote/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: azure
9 | storageAccount: datameshdatabricksdemo
10 | location: abfss://dataproducts/inventory_events/*.parquet
11 | format: parquet
12 | models:
13 | orders:
14 | fields:
15 | updated_at:
16 | type: varchar
17 | available:
18 | type: varchar # for historic reasons
19 | location:
20 | type: varchar
21 | minLength: 2
22 | maxLength: 2
23 | sku:
24 | type: varchar
25 |
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: bigquery
3 | info:
4 | title: bigquery
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/bigquery:
9 | type: bigquery
10 | project: datameshexample-product
11 | dataset: datacontract_cli_test_dataset
12 | dataProductId: my-dataproduct
13 | outputPortId: bigquery
14 | models:
15 | datacontract_cli_test_table:
16 | type: table
17 | fields:
18 | field_one:
19 | type: varchar
20 | required: true
21 | unique: true
22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
23 | field_two:
24 | type: int
25 | minimum: 10
26 | field_three:
27 | type: timestamp
28 |
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/datacontract_complex.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: bigquery
3 | info:
4 | title: bigquery
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/bigquery:
9 | type: bigquery
10 | project: datameshexample-product
11 | dataset: datacontract_cli
12 | models:
13 | complex_table:
14 | type: table
15 | fields:
16 | some_string:
17 | type: string
18 | some_record:
19 | type: record
20 | fields:
21 | some_field_1:
22 | type: string
23 | some_field_2:
24 | type: string
25 | some_array_of_strings:
26 | type: array
27 | items:
28 | type: string
29 | some_array_of_records:
30 | type: array
31 | items:
32 | type: record
33 | fields:
34 | some_other_field_1:
35 | type: string
36 | some_other_field_2:
37 | type: string
38 | some_json:
39 | type: text
40 | config:
41 | bigqueryType: json
42 | some_range_of_timestamp:
43 | type: record
44 | config:
45 | bigqueryType: RANGE
46 |
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/import/datacontract_multi_import.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | BQ_Table:
8 | description: This is a test table
9 | type: table
10 | fields:
11 | String_field:
12 | type: string
13 | required: false
14 | description: A simple String field
15 | BQ_External_Table:
16 | description: This is a test table
17 | type: table
18 | fields:
19 | String_field:
20 | type: string
21 | required: false
22 | description: A simple String field
23 | BQ_Snapshot:
24 | description: This is a test table
25 | type: table
26 | fields:
27 | String_field:
28 | type: string
29 | required: false
30 | description: A simple String field
31 | BQ_View:
32 | description: This is a test table
33 | type: view
34 | fields:
35 | String_field:
36 | type: string
37 | required: false
38 | description: A simple String field
39 | BQ_Materialized_View:
40 | description: This is a test table
41 | type: view
42 | fields:
43 | String_field:
44 | type: string
45 | required: false
46 | description: A simple String field
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/import/multi_import_external_table.json:
--------------------------------------------------------------------------------
1 | {
2 | "creationTime": "1715608399201",
3 | "description": "This is a test table",
4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==",
5 | "expirationTime": "1720792399201",
6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table",
7 | "kind": "bigquery#table",
8 | "lastModifiedTime": "1715610311747",
9 | "location": "europe-west3",
10 | "numActiveLogicalBytes": "0",
11 | "numBytes": "0",
12 | "numLongTermBytes": "0",
13 | "numLongTermLogicalBytes": "0",
14 | "numRows": "0",
15 | "numTotalLogicalBytes": "0",
16 | "schema": {
17 | "fields": [
18 | {
19 | "description": "A simple String field",
20 | "mode": "NULLABLE",
21 | "name": "String_field",
22 | "type": "STRING"
23 | }
24 | ]
25 | },
26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table",
27 | "tableReference": {
28 | "datasetId": "test_dataset",
29 | "projectId": "bigquery-test-423213",
30 | "tableId": "BQ_External_Table"
31 | },
32 | "type": "EXTERNAL"
33 | }
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/import/multi_import_materialized_view.json:
--------------------------------------------------------------------------------
1 | {
2 | "creationTime": "1715608399201",
3 | "description": "This is a test table",
4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==",
5 | "expirationTime": "1720792399201",
6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table",
7 | "kind": "bigquery#table",
8 | "lastModifiedTime": "1715610311747",
9 | "location": "europe-west3",
10 | "numActiveLogicalBytes": "0",
11 | "numBytes": "0",
12 | "numLongTermBytes": "0",
13 | "numLongTermLogicalBytes": "0",
14 | "numRows": "0",
15 | "numTotalLogicalBytes": "0",
16 | "schema": {
17 | "fields": [
18 | {
19 | "description": "A simple String field",
20 | "mode": "NULLABLE",
21 | "name": "String_field",
22 | "type": "STRING"
23 | }
24 | ]
25 | },
26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table",
27 | "tableReference": {
28 | "datasetId": "test_dataset",
29 | "projectId": "bigquery-test-423213",
30 | "tableId": "BQ_Materialized_View"
31 | },
32 | "type": "MATERIALIZED_VIEW"
33 | }
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/import/multi_import_snapshot.json:
--------------------------------------------------------------------------------
1 | {
2 | "creationTime": "1715608399201",
3 | "description": "This is a test table",
4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==",
5 | "expirationTime": "1720792399201",
6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table",
7 | "kind": "bigquery#table",
8 | "lastModifiedTime": "1715610311747",
9 | "location": "europe-west3",
10 | "numActiveLogicalBytes": "0",
11 | "numBytes": "0",
12 | "numLongTermBytes": "0",
13 | "numLongTermLogicalBytes": "0",
14 | "numRows": "0",
15 | "numTotalLogicalBytes": "0",
16 | "schema": {
17 | "fields": [
18 | {
19 | "description": "A simple String field",
20 | "mode": "NULLABLE",
21 | "name": "String_field",
22 | "type": "STRING"
23 | }
24 | ]
25 | },
26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table",
27 | "tableReference": {
28 | "datasetId": "test_dataset",
29 | "projectId": "bigquery-test-423213",
30 | "tableId": "BQ_Snapshot"
31 | },
32 | "type": "SNAPSHOT"
33 | }
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/import/multi_import_table.json:
--------------------------------------------------------------------------------
1 | {
2 | "creationTime": "1715608399201",
3 | "description": "This is a test table",
4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==",
5 | "expirationTime": "1720792399201",
6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table",
7 | "kind": "bigquery#table",
8 | "lastModifiedTime": "1715610311747",
9 | "location": "europe-west3",
10 | "numActiveLogicalBytes": "0",
11 | "numBytes": "0",
12 | "numLongTermBytes": "0",
13 | "numLongTermLogicalBytes": "0",
14 | "numRows": "0",
15 | "numTotalLogicalBytes": "0",
16 | "schema": {
17 | "fields": [
18 | {
19 | "description": "A simple String field",
20 | "mode": "NULLABLE",
21 | "name": "String_field",
22 | "type": "STRING"
23 | }
24 | ]
25 | },
26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table",
27 | "tableReference": {
28 | "datasetId": "test_dataset",
29 | "projectId": "bigquery-test-423213",
30 | "tableId": "BQ_Table"
31 | },
32 | "type": "TABLE"
33 | }
--------------------------------------------------------------------------------
/tests/fixtures/bigquery/import/multi_import_view.json:
--------------------------------------------------------------------------------
1 | {
2 | "creationTime": "1715608399201",
3 | "description": "This is a test table",
4 | "etag": "vv0Ksh3XakMcCTFmhM0FOA==",
5 | "expirationTime": "1720792399201",
6 | "id": "bigquery-test-423213:test_dataset.BQ Example Table",
7 | "kind": "bigquery#table",
8 | "lastModifiedTime": "1715610311747",
9 | "location": "europe-west3",
10 | "numActiveLogicalBytes": "0",
11 | "numBytes": "0",
12 | "numLongTermBytes": "0",
13 | "numLongTermLogicalBytes": "0",
14 | "numRows": "0",
15 | "numTotalLogicalBytes": "0",
16 | "schema": {
17 | "fields": [
18 | {
19 | "description": "A simple String field",
20 | "mode": "NULLABLE",
21 | "name": "String_field",
22 | "type": "STRING"
23 | }
24 | ]
25 | },
26 | "selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/bigquery-test-423213/datasets/test_dataset/tables/BQ Example Table",
27 | "tableReference": {
28 | "datasetId": "test_dataset",
29 | "projectId": "bigquery-test-423213",
30 | "tableId": "BQ_View"
31 | },
32 | "type": "VIEW"
33 | }
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-definitions-v1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | type: table
11 | fields:
12 | my_field:
13 | required: false
14 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-definitions-v2.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | type: table
11 | fields:
12 | my_field:
13 | $ref: '#/definitions/my_definition'
14 |
15 | definitions:
16 | my_definition:
17 | name: my_definition
18 | domain: global
19 | title: my_title
20 | description: My Description
21 | type: string
22 | enum: [my_enum]
23 | format: uuid
24 | minLength: 8
25 | maxLength: 14
26 | pattern: .*
27 | minimum: 8
28 | exclusiveMaximum: 8
29 | maximum: 14
30 | exclusiveMinimum: 14
31 | example: my_example
32 | pii: false
33 | classification: internal
34 | tags: [my_tags]
35 |
36 |
37 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-definitions-v3.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | type: table
11 | fields:
12 | my_field:
13 | $ref: '#/definitions/my_definition_2'
14 |
15 | definitions:
16 | my_definition_2:
17 | name: my_definition_2
18 | domain: global
19 | title: my_title_2
20 | description: My Description 2
21 | type: integer
22 | enum: [my_enum_2]
23 | format: url
24 | minLength: 10
25 | maxLength: 20
26 | pattern: .*.*
27 | minimum: 10
28 | exclusiveMaximum: 20
29 | maximum: 20
30 | exclusiveMinimum: 10
31 | example: my_example_2
32 | pii: true
33 | classification: sensitive
34 | tags: [my_tags_2]
35 |
36 |
37 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-fields-v1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | type: table
11 | fields:
12 | field_type:
13 | description: My Description
14 | field_format:
15 | type: string
16 | field_required:
17 | type: string
18 | field_primaryKey:
19 | type: string
20 | field_references:
21 | type: string
22 | field_unique:
23 | type: string
24 | field_description:
25 | type: string
26 | field_pii:
27 | type: string
28 | field_classification:
29 | type: string
30 | field_pattern:
31 | type: string
32 | field_minLength:
33 | type: string
34 | field_maxLength:
35 | type: string
36 | field_minimum:
37 | type: string
38 | field_exclusiveMinimum:
39 | type: string
40 | field_maximum:
41 | type: string
42 | field_exclusiveMaximum:
43 | type: string
44 | field_enum:
45 | type: string
46 | field_tags:
47 | type: string
48 | field_ref:
49 | type: string
50 | field_fields:
51 | fields:
52 | nested_field_1:
53 | type: string
54 | field_custom_key:
55 | type: string
56 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-info-v1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-info-v2.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | owner: Data Team
7 | some-other-key: some information
8 | contact:
9 | email: datateam@work.com
10 | models:
11 | orders:
12 | fields:
13 | column_1:
14 | type: string
15 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-info-v3.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | owner: Another Team
7 | some-other-key: new information
8 | contact:
9 | email: anotherteam@work.com
10 | models:
11 | orders:
12 | fields:
13 | column_1:
14 | type: string
15 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-models-v1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | fields:
11 | my_field:
12 | description: My Description
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-models-v2.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | type: table
11 | description: My Model Description
12 | fields:
13 | my_field:
14 | description: My Description
15 | another-key: original value
16 | my_table_2:
17 | fields:
18 | my_field_2:
19 | description: My Description 2
20 | some-other-key: some value
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-models-v3.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 |
8 | models:
9 | my_table:
10 | type: object
11 | description: My Updated Model Description
12 | fields:
13 | my_field:
14 | description: My Description
15 | another-key: updated value
16 | my_table_2:
17 | fields:
18 | my_field_2:
19 | description: My Description 2
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-quality-v1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-quality-v2.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 | quality:
12 | type: SodaCL
13 | specification: |-
14 | checks for orders:
15 | - freshness(column_1) < 1d
16 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-quality-v3.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 | quality:
12 | type: custom
13 | specification: |-
14 | checks for orders:
15 | - freshness(column_1) < 2d
16 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-terms-v1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-terms-v2.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | terms:
7 | usage: |
8 | Data can be used for reports, analytics and machine learning use cases.
9 | Order may be linked and joined by other tables
10 | limitations: |
11 | Not suitable for real-time use cases.
12 | Data may not be used to identify individual customers.
13 | Max data processing per day: 10 TiB
14 | billing: 5000 USD per month
15 | noticePeriod: P3M
16 | models:
17 | orders:
18 | fields:
19 | column_1:
20 | type: string
21 |
--------------------------------------------------------------------------------
/tests/fixtures/breaking/datacontract-terms-v3.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | terms:
7 | usage: Data can be used for anything
8 | billing: 1000000 GBP per month
9 | noticePeriod: P1Y
10 | someOtherTerms: must abide by policies
11 | models:
12 | orders:
13 | fields:
14 | column_1:
15 | type: string
16 |
--------------------------------------------------------------------------------
/tests/fixtures/catalog/datacontract-1.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | owner: checkout
7 | description: The orders data contract
8 | contact:
9 | email: team-orders@example.com
10 | url: https://wiki.example.com/teams/checkout
11 | terms:
12 | usage: This data contract serves to demo datacontract CLI export.
13 | limitations: Not intended to use in production
14 | billing: free
15 | noticePeriod: P3M
16 | servers:
17 | production:
18 | type: snowflake
19 | account: my-account
20 | database: my-database
21 | schema: my-schema
22 | models:
23 | orders:
24 | description: The orders model
25 | fields:
26 | order_id:
27 | type: varchar
28 | unique: true
29 | required: true
30 | minLength: 8
31 | maxLength: 10
32 | pii: true
33 | classification: sensitive
34 | tags:
35 | - order_id
36 | pattern: ^B[0-9]+$
37 | order_total:
38 | type: bigint
39 | required: true
40 | description: The order_total field
41 | minimum: 0
42 | maximum: 1000000
43 | order_status:
44 | type: text
45 | required: true
46 | enum:
47 | - pending
48 | - shipped
49 | - delivered
--------------------------------------------------------------------------------
/tests/fixtures/catalog/datacontract-2.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: urn:datacontract:test
3 | info:
4 | title: Test datacontract
5 | version: 0.0.1
6 | description: Test datacontract
7 | models:
8 | model_test:
9 | description: A test model
10 | type: table
11 | fields:
12 | user_id:
13 | $ref: '#/definitions/user_id'
14 | required: true
15 | unique: true
16 | primaryKey: true
17 | allowed_actions:
18 | $ref: '#/definitions/allowed_actions'
19 | required: false
20 | definitions:
21 | user_id:
22 | title: User ID
23 | type: int
24 | description: An internal, autoincremental ID that identifies an user ID in the metricool app.
25 | examples:
26 | - 883749
27 | allowed_actions:
28 | type: array
29 | description: Allowed user actions
30 | items:
31 | type: string
32 | enum:
33 | - "Add"
34 | - "View"
35 | - "Report"
--------------------------------------------------------------------------------
/tests/fixtures/csv/data/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.3
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | servers:
7 | production:
8 | type: local
9 | format: csv
10 | path: ./tests/fixtures/csv/data/sample_data.csv
11 | delimiter: ','
12 | models:
13 | sample_data:
14 | description: Csv file with encoding ascii
15 | type: table
16 | fields:
17 | field_one:
18 | type: string
19 | field_two:
20 | type: integer
21 | field_three:
22 | type: string
23 |
24 |
--------------------------------------------------------------------------------
/tests/fixtures/csv/data/sample_data.csv:
--------------------------------------------------------------------------------
1 | field_one,field_two,field_three
2 | CX-263-DU,50,2023-06-16 13:12:56
3 | IK-894-MN,47,2023-10-08 22:40:57
4 | ER-399-JY,22,2023-05-16 01:08:22
5 | MT-939-FH,63,2023-03-15 05:15:21
6 | LV-849-MI,33,2023-09-08 20:08:43
7 | VS-079-OH,85,2023-04-15 00:50:32
8 | DN-297-XY,79,2023-11-08 12:55:42
9 | ZE-172-FP,14,2023-12-03 18:38:38
10 | ID-840-EG,89,2023-10-02 17:17:58
11 | FK-230-KZ,64,2023-11-27 15:21:48
12 |
--------------------------------------------------------------------------------
/tests/fixtures/csv/data/sample_data_5_column.csv:
--------------------------------------------------------------------------------
1 | field_one,field_two,field_three,field_four,field_five,field_six
2 | CX-263-DU,50,2023-06-16 13:12:56,,true,test1@gmail.com
3 | IK-894-MN,47,2023-10-08 22:40:57,,true,test1@gmail.com
4 | ER-399-JY,22,2023-05-16 01:08:22,,true,test1@gmail.com
5 | MT-939-FH,47,2023-03-15 05:15:21,,false,test1@gmail.com
6 | LV-849-MI,50,2023-09-08 20:08:43,,false,test1@gmail.com
7 | VS-079-OH,22,2023-04-15 00:50:32,,false,test1@gmail.com
8 | DN-297-XY,50,2023-11-08 12:55:42,,false,test1@gmail.com
9 | ZE-172-FP,14,,,true,test1@gmail.com
10 | ID-840-EG,89,2023-10-02 17:17:58,,true,
11 | FK-230-KZ,64,2023-11-27 15:21:48,,true,test1@gmail.com
12 |
--------------------------------------------------------------------------------
/tests/fixtures/custom/export/expected.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | order_id AS order_id,
3 | DATETIME(order_timestamp, "Asia/Tokyo") AS order_timestamp,
4 | order_total AS order_total,
5 | customer_id AS customer_id,
6 | customer_email_address AS customer_email_address,
7 | DATETIME(processed_timestamp, "Asia/Tokyo") AS processed_timestamp,
8 | FROM
9 | {{ ref('orders') }}
10 |
--------------------------------------------------------------------------------
/tests/fixtures/custom/export/template.sql:
--------------------------------------------------------------------------------
1 | {%- for model_name, model in data_contract.models.items() %}
2 | {#- Export only the first model #}
3 | {%- if loop.first -%}
4 | SELECT
5 | {%- for field_name, field in model.fields.items() %}
6 | {%- if field.type == "timestamp" %}
7 | DATETIME({{ field_name }}, "Asia/Tokyo") AS {{ field_name }},
8 | {%- else %}
9 | {{ field_name }} AS {{ field_name }},
10 | {%- endif %}
11 | {%- endfor %}
12 | FROM
13 | {{ "{{" }} ref('{{ model_name }}') {{ "}}" }}
14 | {%- endif %}
15 | {%- endfor %}
16 |
17 |
--------------------------------------------------------------------------------
/tests/fixtures/databricks-unity/import/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | test_table:
8 | description: string
9 | type: table
10 | title: test_table
11 | fields:
12 | id:
13 | type: int
14 | required: true
15 | config:
16 | databricksType: int
17 | name:
18 | type: string
19 | required: false
20 | config:
21 | databricksType: varchar(255)
22 | age:
23 | type: int
24 | required: false
25 | config:
26 | databricksType: smallint
27 | salary:
28 | type: decimal
29 | required: false
30 | config:
31 | databricksType: decimal(10,2)
32 | join_date:
33 | type: date
34 | required: false
35 | config:
36 | databricksType: date
37 | updated_at:
38 | type: timestamp_ntz
39 | required: false
40 | config:
41 | databricksType: timestamp
42 | is_active:
43 | type: boolean
44 | required: false
45 | config:
46 | databricksType: boolean
47 | servers:
48 | myserver:
49 | type: databricks
50 | catalog: mycatalog
51 | schema: myschema
52 |
--------------------------------------------------------------------------------
/tests/fixtures/dataframe/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: dataframetest
3 | info:
4 | title: dataframetest
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | unittest:
9 | type: dataframe
10 | models:
11 | my_table:
12 | type: table
13 | fields:
14 | field_one:
15 | type: varchar
16 | required: true
17 | unique: true
18 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
19 | field_two:
20 | type: int
21 | minimum: 10
22 | field_three:
23 | type: timestamp
24 | field_array_of_strings:
25 | type: array
26 | items:
27 | type: string
28 | field_array_of_structs:
29 | type: array
30 | items:
31 | type: struct
32 | fields:
33 | inner_field_string:
34 | type: varchar
35 | inner_field_int:
36 | type: int
37 |
--------------------------------------------------------------------------------
/tests/fixtures/dbml/import/datacontract_schema_filtered.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: Orders Latest
5 | version: 0.0.1
6 | models:
7 | line_items:
8 | description: A single article that is part of an order.
9 | namespace: orders
10 | fields:
11 | lines_item_id:
12 | type: string
13 | required: true
14 | primaryKey: true
15 | unique: true
16 | description: Primary key of the lines_item_id table
17 | order_id:
18 | type: string
19 | required: false
20 | primaryKey: false
21 | unique: false
22 | references: orders.order_id
23 | description: An internal ID that identifies an order in the online shop.
24 | sku:
25 | type: string
26 | required: false
27 | primaryKey: false
28 | unique: false
29 | description: The purchased article number
--------------------------------------------------------------------------------
/tests/fixtures/dbml/import/datacontract_table_filtered.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: Orders Latest
5 | version: 0.0.1
6 | models:
7 | orders:
8 | description: One record per order. Includes cancelled and deleted orders.
9 | namespace: test
10 | fields:
11 | order_id:
12 | type: string
13 | required: true
14 | primaryKey: true
15 | unique: true
16 | description: An internal ID that identifies an order in the online shop.
17 | order_timestamp:
18 | type: timestamp_ntz
19 | required: true
20 | primaryKey: false
21 | unique: false
22 | description: The business timestamp in UTC when the order was successfully
23 | registered in the source system and the payment was successful.
24 | order_total:
25 | type: variant
26 | required: true
27 | primaryKey: false
28 | unique: false
29 | description: Total amount the smallest monetary unit (e.g., cents).
30 | customer_id:
31 | type: string
32 | required: false
33 | primaryKey: false
34 | unique: false
35 | description: Unique identifier for the customer.
36 | customer_email_address:
37 | type: string
38 | required: true
39 | primaryKey: false
40 | unique: false
41 | description: The email address, as entered by the customer. The email address
42 | was not verified.
43 | processed_timestamp:
44 | type: timestamp_ntz
45 | required: true
46 | primaryKey: false
47 | unique: false
48 | description: The timestamp when the record was processed by the data platform.
--------------------------------------------------------------------------------
/tests/fixtures/dbml/import/dbml.txt:
--------------------------------------------------------------------------------
1 | Project "Orders Latest" {
2 | Note: '''Successful customer orders in the webshop.
3 | All orders since 2020-01-01.
4 | Orders with their line items are in their current state (no history included).
5 | '''
6 | }
7 |
8 | Table test.orders {
9 | "order_id" "text" [pk,unique,not null,Note: "An internal ID that identifies an order in the online shop."]
10 | "order_timestamp" "timestamp" [not null,Note: "The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful."]
11 | "order_total" "record" [not null,Note: "Total amount the smallest monetary unit (e.g., cents)."]
12 | "customer_id" "text" [null,Note: "Unique identifier for the customer."]
13 | "customer_email_address" "text" [not null,Note: "The email address, as entered by the customer. The email address was not verified."]
14 | "processed_timestamp" "timestamp" [not null,Note: "The timestamp when the record was processed by the data platform."]
15 | Note: "One record per order. Includes cancelled and deleted orders."
16 | }
17 |
18 |
19 | Table orders.line_items {
20 | "lines_item_id" "text" [pk,unique,not null,Note: "Primary key of the lines_item_id table"]
21 | "order_id" "text" [null,Note: "An internal ID that identifies an order in the online shop."]
22 | "sku" "text" [null,Note: "The purchased article number"]
23 | Note: "A single article that is part of an order."
24 | }
25 |
26 | Ref: orders.line_items.order_id > test.orders.order_id
--------------------------------------------------------------------------------
/tests/fixtures/excel/shipments-odcs.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/excel/shipments-odcs.xlsx
--------------------------------------------------------------------------------
/tests/fixtures/export/datacontract_nested.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | owner: checkout
7 | description: The orders data contract
8 | contact:
9 | email: team-orders@example.com
10 | url: https://wiki.example.com/teams/checkout
11 | terms:
12 | usage: This data contract serves to demo datacontract CLI export.
13 | limitations: Not intended to use in production
14 | billing: free
15 | noticePeriod: P3M
16 | servers:
17 | production:
18 | type: snowflake
19 | account: my-account
20 | database: my-database
21 | schema: my-schema
22 | models:
23 | orders:
24 | description: The orders model
25 | fields:
26 | order_id:
27 | type: varchar
28 | unique: true
29 | required: true
30 | minLength: 8
31 | maxLength: 10
32 | pii: true
33 | classification: sensitive
34 | tags:
35 | - order_id
36 | pattern: ^B[0-9]+$
37 | order_total:
38 | type: bigint
39 | required: true
40 | description: The order_total field
41 | minimum: 0
42 | maximum: 1000000
43 | order_status:
44 | type: text
45 | required: true
46 | enum:
47 | - pending
48 | - shipped
49 | - delivered
50 | address:
51 | type: record
52 | fields:
53 | street:
54 | type: string
55 | city:
56 | type: string
--------------------------------------------------------------------------------
/tests/fixtures/export/datacontract_s3.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | owner: checkout
7 | description: The orders data contract
8 | contact:
9 | email: team-orders@example.com
10 | url: https://wiki.example.com/teams/checkout
11 | terms:
12 | usage: This data contract serves to demo datacontract CLI export.
13 | limitations: Not intended to use in production
14 | billing: free
15 | noticePeriod: P3M
16 | servers:
17 | production:
18 | type: s3
19 | location: s3://datacontract-example-orders-latest/data/{model}/*.json
20 | format: json
21 | delimiter: new_line
22 | dataProductId: orders
23 | models:
24 | orders:
25 | description: The orders model
26 | fields:
27 | order_id:
28 | type: varchar
29 | unique: true
30 | required: true
31 | minLength: 8
32 | maxLength: 10
33 | pii: true
34 | classification: sensitive
35 | tags:
36 | - order_id
37 | pattern: ^B[0-9]+$
38 | order_total:
39 | type: bigint
40 | required: true
41 | description: The order_total field
42 | minimum: 0
43 | maximum: 1000000
44 | order_status:
45 | type: text
46 | required: true
47 | enum:
48 | - pending
49 | - shipped
50 | - delivered
--------------------------------------------------------------------------------
/tests/fixtures/export/rdf/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | owner: checkout
7 | contact:
8 | email: team-orders@example.com
9 | url: https://wiki.example.com/teams/checkout
10 | terms:
11 | usage: This data contract serves to demo datacontract CLI export.
12 | limitations: Not intended to use in production
13 | billing: free
14 | noticePeriod: P3M
15 | models:
16 | orders:
17 | description: The orders model
18 | fields:
19 | order_id:
20 | type: varchar
21 | unique: true
22 | required: true
23 | minLength: 8
24 | maxLength: 10
25 | pii: true
26 | classification: sensitive
27 | tags:
28 | - order_id
29 | pattern: ^B[0-9]+$
30 | order_total:
31 | type: bigint
32 | required: true
33 | description: The order_total field
34 | minimum: 0
35 | maximum: 1000000
36 | order_status:
37 | type: text
38 | required: true
39 | enum:
40 | - pending
41 | - shipped
42 | - delivered
--------------------------------------------------------------------------------
/tests/fixtures/gcs-json-remote/data/README.md:
--------------------------------------------------------------------------------
1 | This folder is uploaded to a GCS bucket.
--------------------------------------------------------------------------------
/tests/fixtures/gcs-json-remote/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: inventory-events
3 | info:
4 | title: Inventory Events
5 | version: 0.0.1
6 | owner: my-domain-team
7 | contact:
8 | email: jochen.christ@innoq.com
9 | servers:
10 | gcs-url:
11 | type: gcs
12 | location: gs://datacontract-test-inventory/inventory/*/*/*/*/*.json
13 | delimiter: new_line
14 | format: json
15 | s3-style:
16 | type: s3
17 | endpointUrl: https://storage.googleapis.com
18 | location: s3://datacontract-test-inventory/inventory/*/*/*/*/*.json
19 | delimiter: new_line
20 | format: json
21 | models:
22 | inventory:
23 | type: table
24 | fields:
25 | updated_at:
26 | type: string
27 | available:
28 | type: numeric
29 | location:
30 | type: string
31 | sku:
32 | type: string
33 |
--------------------------------------------------------------------------------
/tests/fixtures/glue/datacontract-empty-model.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | servers:
7 | production:
8 | account: '123456789012'
9 | database: test_database
10 | location: s3://test_bucket/testdb
11 | type: glue
12 | models:
13 | table_1:
14 | type: table
15 |
--------------------------------------------------------------------------------
/tests/fixtures/great-expectations/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.1
2 | id: my-data-contract-id
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | owner: checkout
7 | description: The orders data contract
8 | contact:
9 | email: team-orders@example.com
10 | url: https://wiki.example.com/teams/checkout
11 | models:
12 | orders:
13 | description: test
14 | fields:
15 | order_id:
16 | type: string
17 | required: true
18 | processed_timestamp:
19 | type: timestamp
20 | required: true
21 | quality:
22 | type: great-expectations
23 | specification:
24 | orders: |-
25 | [
26 | {
27 | "expectation_type": "expect_table_row_count_to_be_between",
28 | "kwargs": {
29 | "min_value": 10
30 | },
31 | "meta": {
32 |
33 | }
34 | }
35 | ]
--------------------------------------------------------------------------------
/tests/fixtures/great-expectations/datacontract_missing_quality_file.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.1
2 | id: my-data-contract-id
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | owner: checkout
7 | description: The orders data contract
8 | contact:
9 | email: team-orders@example.com
10 | url: https://wiki.example.com/teams/checkout
11 | models:
12 | orders:
13 | description: test
14 | fields:
15 | order_id:
16 | type: string
17 | required: true
18 | processed_timestamp:
19 | type: timestamp
20 | required: true
21 | quality:
22 | type: great-expectations
23 | specification:
24 | orders:
25 | $ref: ./fixtures/great-expectations/missing.json
26 |
--------------------------------------------------------------------------------
/tests/fixtures/great-expectations/datacontract_quality_column.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: Orders Unit Test
5 | version: 1.1.1
6 | owner: checkout
7 | description: The orders data contract
8 | contact:
9 | email: team-orders@example.com
10 | url: https://wiki.example.com/teams/checkout
11 | models:
12 | orders:
13 | description: test
14 | fields:
15 | id:
16 | description: Unique identifier for each alert.
17 | type: string
18 | required: true
19 | primaryKey: true
20 | unique: true
21 | type:
22 | description: The type of alert that has fired.
23 | type: string
24 | required: true
25 | enum: [ "A", "B", "C", "D", "E" ]
26 | quality:
27 | - type: custom
28 | engine: great-expectations
29 | description: "Accepted Values for type"
30 | implementation:
31 | expectation_type: expect_column_value_lengths_to_equal
32 | kwargs:
33 | value: 1
34 | meta:
35 | notes: "Ensures that column length is 1."
--------------------------------------------------------------------------------
/tests/fixtures/great-expectations/datacontract_quality_file.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.1
2 | id: my-data-contract-id
3 |
4 | info:
5 | title: Orders Unit Test
6 | version: 1.0.0
7 | owner: checkout
8 | description: The orders data contract
9 | contact:
10 | email: team-orders@example.com
11 | url: https://wiki.example.com/teams/checkout
12 | models:
13 | orders:
14 | description: test
15 | fields:
16 | order_id:
17 | type: string
18 | required: true
19 | processed_timestamp:
20 | type: timestamp
21 | required: true
22 | quality:
23 | type: great-expectations
24 | specification:
25 | orders:
26 | $ref: ./fixtures/great-expectations/quality.json
27 |
--------------------------------------------------------------------------------
/tests/fixtures/great-expectations/datacontract_quality_yaml.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.1
2 | id: my-data-contract-id
3 |
4 | info:
5 | title: Orders Unit Test
6 | version: 1.0.0
7 | owner: checkout
8 | description: The orders data contract
9 | contact:
10 | email: team-orders@example.com
11 | url: https://wiki.example.com/teams/checkout
12 | models:
13 | orders:
14 | description: test
15 | fields:
16 | order_id:
17 | type: string
18 | required: true
19 | quality:
20 | - type: custom
21 | engine: great-expectations
22 | implementation:
23 | expectation_type: expect_table_row_count_to_be_between
24 | kwargs:
25 | min_value: 10
26 | meta: {}
27 |
--------------------------------------------------------------------------------
/tests/fixtures/great-expectations/quality.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "expectation_type": "expect_table_row_count_to_be_between",
4 | "kwargs": {
5 | "min_value": 10
6 | },
7 | "meta": {
8 | }
9 | }
10 | ]
--------------------------------------------------------------------------------
/tests/fixtures/iceberg/invalid_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "fields": "not a list"
3 | }
--------------------------------------------------------------------------------
/tests/fixtures/iceberg/simple_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "struct",
3 | "fields": [
4 | {
5 | "id": 1,
6 | "name": "foo",
7 | "type": "int",
8 | "required": true
9 | }
10 | ],
11 | "schema-id": 1,
12 | "identifier-field-ids": [
13 | 1
14 | ]
15 | }
--------------------------------------------------------------------------------
/tests/fixtures/import/football-datacontract.yml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | FootballSchema:
8 | description: Schema for football team and person details
9 | type: object
10 | title: FootballSchema
11 | definitions:
12 | person:
13 | name: person
14 | type: object
15 | fields:
16 | first_name:
17 | type: string
18 | required: true
19 | last_name:
20 | type: string
21 | required: true
22 | age:
23 | type: integer
24 | required: true
25 | football_team:
26 | name: football_team
27 | type: object
28 | fields:
29 | name:
30 | type: string
31 | required: true
32 | league:
33 | type: string
34 | required: true
35 | year_founded:
36 | type: integer
37 | required: false
38 |
39 |
--------------------------------------------------------------------------------
/tests/fixtures/import/football.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "title": "FootballSchema",
4 | "description": "Schema for football team and person details",
5 | "type": "object",
6 | "definitions": {
7 | "person": {
8 | "type": "object",
9 | "required": ["first_name", "last_name", "age"],
10 | "properties": {
11 | "first_name": {"type": "string"},
12 | "last_name": {"type": "string"},
13 | "age": {"type": "integer"}
14 | }
15 | },
16 | "football_team": {
17 | "type": "object",
18 | "required": ["name", "league"],
19 | "properties": {
20 | "name": {"type": "string"},
21 | "league": {"type": "string"},
22 | "year_founded": {"type": "integer"}
23 | }
24 | }
25 | },
26 | "allOf": [
27 | {"$ref": "#/definitions/person"},
28 | {"$ref": "#/definitions/football_team"}
29 | ]
30 | }
31 |
--------------------------------------------------------------------------------
/tests/fixtures/import/football_deeply_nested_no_required.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "title": "FootballSchema",
4 | "description": "Schema for football team and person details, where team is nested under the person",
5 | "type": "object",
6 | "properties": {
7 | "person": {
8 | "type": "object",
9 | "properties": {
10 | "first_name": { "type": "string" },
11 | "last_name": { "type": "string" },
12 | "age": { "type": "integer" },
13 | "football_team": {
14 | "type": "object",
15 | "properties": {
16 | "name": { "type": "string" },
17 | "league": { "type": "string" },
18 | "year_founded": { "type": "integer" }
19 | }
20 | }
21 | },
22 | "required": ["first_name", "last_name", "age"]
23 | }
24 | },
25 | "required": ["person"]
26 | }
27 |
--------------------------------------------------------------------------------
/tests/fixtures/import/football_deeply_nested_no_required_datacontract.yml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | FootballSchema:
8 | description: Schema for football team and person details, where team is nested under the person
9 | type: object
10 | title: FootballSchema
11 | fields:
12 | person:
13 | type: object
14 | required: true
15 | fields:
16 | first_name:
17 | type: string
18 | required: true
19 | last_name:
20 | type: string
21 | required: true
22 | age:
23 | type: integer
24 | required: true
25 | football_team:
26 | type: object
27 | required: false
28 | fields:
29 | name:
30 | type: string
31 | required: false
32 | league:
33 | type: string
34 | required: false
35 | year_founded:
36 | type: integer
37 | required: false
38 |
39 |
--------------------------------------------------------------------------------
/tests/fixtures/import/orders.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "title": "OrderSchema",
4 | "description": "Schema for order details",
5 | "type": "object",
6 | "properties": {
7 | "order_id": {
8 | "type": "string",
9 | "title": "Order ID",
10 | "description": "Unique identifier for the order"
11 | },
12 | "order_timestamp": {
13 | "type": "string",
14 | "format": "date-time",
15 | "title": "Order Timestamp",
16 | "description": "Timestamp when the order was placed"
17 | },
18 | "order_total": {
19 | "type": "integer",
20 | "title": "Order Total",
21 | "description": "Total amount of the order"
22 | },
23 | "line_items": {
24 | "type": "array",
25 | "title": "Line Items",
26 | "items": {
27 | "type" : ["integer", "null"]
28 | }
29 | },
30 | "customer_id": {
31 | "type": [
32 | "string",
33 | "null"
34 | ],
35 | "minLength": 10,
36 | "maxLength": 20,
37 | "title": "Customer ID",
38 | "description": "Unique identifier for the customer"
39 | },
40 | "customer_email_address": {
41 | "type": "string",
42 | "format": "email",
43 | "title": "Customer Email Address",
44 | "description": "Email address of the customer"
45 | },
46 | "processed_timestamp": {
47 | "type": "string",
48 | "format": "date-time",
49 | "title": "Processed Timestamp",
50 | "description": "Timestamp when the order was processed"
51 | }
52 | },
53 | "required": [
54 | "order_id",
55 | "order_timestamp",
56 | "order_total",
57 | "line_items",
58 | "customer_email_address",
59 | "processed_timestamp"
60 | ]
61 | }
--------------------------------------------------------------------------------
/tests/fixtures/import/orders_union-types.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "title": "OrderSchema",
4 | "description": "Schema for order details",
5 | "type": "object",
6 | "properties": {
7 | "order_id": {
8 | "type": "string",
9 | "title": "Order ID",
10 | "description": "Unique identifier for the order"
11 | },
12 | "order_timestamp": {
13 | "type": "string",
14 | "format": "date-time",
15 | "title": "Order Timestamp",
16 | "description": "Timestamp when the order was placed"
17 | },
18 | "order_total": {
19 | "type": "integer",
20 | "title": "Order Total",
21 | "description": "Total amount of the order"
22 | },
23 | "line_items": {
24 | "type": "array",
25 | "title": "Line Items",
26 | "items": {
27 | "type" : "integer"
28 | }
29 | },
30 | "vouchers": {
31 | "type": "array",
32 | "title": "List of used vouchers",
33 | "items": [
34 | {
35 | "type": "integer"
36 | }
37 | ]
38 | },
39 | "customer_id": {
40 | "type": [
41 | "string",
42 | "null"
43 | ],
44 | "minLength": 10,
45 | "maxLength": 20,
46 | "title": "Customer ID",
47 | "description": "Unique identifier for the customer"
48 | },
49 | "customer_email_address": {
50 | "type": "string",
51 | "format": "email",
52 | "title": "Customer Email Address",
53 | "description": "Email address of the customer"
54 | },
55 | "processed_timestamp": {
56 | "type": "string",
57 | "format": "date-time",
58 | "title": "Processed Timestamp",
59 | "description": "Timestamp when the order was processed"
60 | }
61 | },
62 | "required": [
63 | "order_id",
64 | "order_timestamp",
65 | "order_total",
66 | "customer_email_address",
67 | "processed_timestamp"
68 | ]
69 | }
--------------------------------------------------------------------------------
/tests/fixtures/import/orders_union-types_datacontract.yml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | OrderSchema:
8 | description: Schema for order details
9 | type: object
10 | title: OrderSchema
11 | fields:
12 | order_id:
13 | title: Order ID
14 | type: string
15 | required: true
16 | description: Unique identifier for the order
17 | order_timestamp:
18 | title: Order Timestamp
19 | type: string
20 | format: date-time
21 | required: true
22 | description: Timestamp when the order was placed
23 | order_total:
24 | title: Order Total
25 | type: integer
26 | required: true
27 | description: Total amount of the order
28 | line_items:
29 | title: Line Items
30 | type: array
31 | required: false
32 | items:
33 | type: integer
34 | vouchers:
35 | title: List of used vouchers
36 | type: array
37 | required: false
38 | items:
39 | type: integer
40 | customer_id:
41 | title: Customer ID
42 | type: string
43 | required: false
44 | description: Unique identifier for the customer
45 | minLength: 10
46 | maxLength: 20
47 | customer_email_address:
48 | title: Customer Email Address
49 | type: string
50 | format: email
51 | required: true
52 | description: Email address of the customer
53 | processed_timestamp:
54 | title: Processed Timestamp
55 | type: string
56 | format: date-time
57 | required: true
58 | description: Timestamp when the order was processed
--------------------------------------------------------------------------------
/tests/fixtures/junit/data/somedata.csv:
--------------------------------------------------------------------------------
1 | 1,abc
--------------------------------------------------------------------------------
/tests/fixtures/junit/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: junit_test
3 | info:
4 | title: Sample contract to demonstrate the JUnit output format
5 | version: 1.0.0
6 | owner: my-domain-team
7 | servers:
8 | local:
9 | type: local
10 | path: ./fixtures/junit/data/somedata.csv
11 | format: csv
12 | models:
13 | my_object:
14 | fields:
15 | field_ok:
16 | description: This field is OK
17 | type: integer
18 | required: true
19 | minimum: 0
20 | field_nok:
21 | description: This check should fail
22 | type: string
23 | required: true
24 | minLength: 4
25 |
--------------------------------------------------------------------------------
/tests/fixtures/kafka-avro-remote/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders
3 | info:
4 | title: Orders
5 | version: 0.0.1
6 | description: Order messages as generated by Confluent Datagen Source Adapter
7 | servers:
8 | production:
9 | type: kafka
10 | host: pkc-7xoy1.eu-central-1.aws.confluent.cloud:9092
11 | topic: orders.avro.v1
12 | format: avro
13 | models:
14 | orders:
15 | type: table
16 | description: My Model
17 | namespace: com.example.checkout
18 | fields:
19 | ordertime:
20 | type: bigint
21 | description: My Field
22 | orderid:
23 | type: int
24 | itemid:
25 | type: string
26 | orderunits:
27 | type: double
28 | address:
29 | type: object
30 | fields:
31 | city:
32 | type: string
33 | state:
34 | type: string
35 | zipcode:
36 | type: long
37 | quality:
38 | type: SodaCL
39 | specification:
40 | checks for orders:
41 | - row_count >= 5000
42 |
43 |
--------------------------------------------------------------------------------
/tests/fixtures/kafka-json-remote/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders
3 | info:
4 | title: Orders
5 | version: 0.0.1
6 | description: Order messages as generated by Confluent Datagen Source Adapter
7 | servers:
8 | production:
9 | type: kafka
10 | host: pkc-7xoy1.eu-central-1.aws.confluent.cloud:9092
11 | topic: datamesh.orders.v1
12 | format: json
13 | models:
14 | orders:
15 | type: table
16 | fields:
17 | ordertime:
18 | type: bigint
19 | required: true
20 | orderid:
21 | type: int
22 | itemid:
23 | type: string
24 | orderunits:
25 | type: double
26 | address:
27 | type: object
28 | fields:
29 | city:
30 | type: string
31 | state:
32 | type: string
33 | zipcode:
34 | type: string
35 | quality:
36 | type: SodaCL
37 | specification:
38 | checks for orders:
39 | - row_count >= 5000
40 |
41 |
--------------------------------------------------------------------------------
/tests/fixtures/kafka/data/messages.json:
--------------------------------------------------------------------------------
1 | {"updated_at":"2022-04-20T13:50:34.228811Z","available":17,"location":"18","sku":"9521582929054"}
2 | {"updated_at":"2022-04-20T13:50:34.589142Z","available":16,"location":"18","sku":"9521582929054"}
3 | {"updated_at":"2022-04-20T13:50:34.589501Z","available":15,"location":"18","sku":"9521582929054"}
4 | {"updated_at":"2022-04-20T13:50:34.589771Z","available":14,"location":"18","sku":"9521582929054"}
5 | {"updated_at":"2022-04-20T13:50:34.590008Z","available":13,"location":"18","sku":"9521582929054"}
6 | {"updated_at":"2022-04-20T13:50:34.590261Z","available":12,"location":"18","sku":"9521582929054"}
7 | {"updated_at":"2022-04-20T13:50:34.590559Z","available":11,"location":"18","sku":"9521582929054"}
8 | {"updated_at":"2022-04-20T13:50:34.590831Z","available":12,"location":"18","sku":"9521582929054"}
9 | {"updated_at":"2022-04-20T13:50:34.591076Z","available":11,"location":"18","sku":"9521582929054"}
10 | {"updated_at":"2022-04-20T13:50:34.591308Z","available":10,"location":"18","sku":"9521582929054"}
--------------------------------------------------------------------------------
/tests/fixtures/kafka/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: inventory-events
3 | info:
4 | title: Inventory Events
5 | version: 0.0.1
6 | servers:
7 | production:
8 | type: kafka
9 | topic: inventory-events
10 | host: __KAFKA_HOST__
11 | format: json
12 | dataProductId: inventory
13 | outputPortId: s3
14 | models:
15 | inventory:
16 | type: table
17 | fields:
18 | updated_at:
19 | type: string
20 | available:
21 | type: int
22 | location:
23 | type: string
24 | sku:
25 | type: string
26 | quality:
27 | type: SodaCL
28 | specification:
29 | checks for inventory:
30 | - row_count >= 10
31 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/custom_datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 0.9.2
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | my-custom-required-field: hello
7 | description: Custom data contract description.
8 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/datacontract_csv_lint_base.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 | column_2:
12 | type: string
13 | examples:
14 | - type: csv
15 | model: orders
16 | data: |-
17 | column_1, column_2
18 | value_1, value_2
19 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/datacontract_quality_schema.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | models:
7 | orders:
8 | fields:
9 | column_1:
10 | type: string
11 | column_2:
12 | type: string
13 | quality:
14 | type: SodaCL
15 | specification: |-
16 | checks for orders:
17 | - freshness(column_1) < 1d
18 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/datacontract_unknown_model.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | examples:
7 | - type: csv
8 | model: orders
9 | data: |-
10 | column_1, column_2
11 | value_1, value_2
12 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/invalid_datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | #id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/valid_datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | description: An empty data contract
7 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/valid_datacontract_ref.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: urn:datacontract:checkout:orders-latest
3 | info:
4 | title: Orders Latest
5 | version: 1.0.0
6 | description: |
7 | Successful customer orders in the webshop.
8 | All orders since 2020-01-01.
9 | Orders with their line items are in their current state (no history included).
10 | models:
11 | orders:
12 | description: One record per order. Includes cancelled and deleted orders.
13 | type: table
14 | fields:
15 | order_id:
16 | $ref: '#/definitions/order_id'
17 | required: true
18 | unique: true
19 | primaryKey: true
20 | definitions:
21 | order_id:
22 | domain: checkout
23 | name: order_id
24 | title: Order ID
25 | type: text
26 | format: uuid
27 | description: An internal ID that identifies an order in the online shop.
28 | example: 243c25e5-a081-43a9-aeab-6d5d5b6cb5e2
29 | pii: true
30 | classification: restricted
31 |
--------------------------------------------------------------------------------
/tests/fixtures/lint/valid_datacontract_references.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: urn:datacontract:checkout:orders-latest
3 | info:
4 | title: Orders Latest
5 | version: 1.0.0
6 | description: Data contract for orders
7 | models:
8 | orders:
9 | description: One record per order.
10 | type: table
11 | fields:
12 | order_id:
13 | type: string
14 | primaryKey: true
15 | description: Unique identifier for the order.
16 | line_items:
17 | description: One record per line item in an order.
18 | type: table
19 | fields:
20 | order_id:
21 | type: string
22 | references: orders.order_id
23 | description: Reference to a field in the orders table.
24 |
--------------------------------------------------------------------------------
/tests/fixtures/local-delta/data/line_items/0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/local-delta/data/line_items/0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet
--------------------------------------------------------------------------------
/tests/fixtures/local-delta/data/line_items/_delta_log/00000000000000000000.json:
--------------------------------------------------------------------------------
1 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
2 | {"metaData":{"id":"4df5ab31-bc35-478a-a175-bf27fc05d3a4","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"line_item_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"order_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sku\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1721891765448,"configuration":{}}}
3 | {"add":{"path":"0-7b7ac87a-16b4-43be-b019-de661a3180cf-0.parquet","partitionValues":{},"size":1414,"modificationTime":1721891765441,"dataChange":true,"stats":"{\"numRecords\": 12, \"minValues\": {\"line_item_id\": \"LI-001\", \"order_id\": 1001, \"sku\": \"SKU-12345\"}, \"maxValues\": {\"line_item_id\": \"LI-012\", \"order_id\": 1008, \"sku\": \"SKU-12356\"}, \"nullCount\": {\"line_item_id\": 0, \"order_id\": 0, \"sku\": 0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}}
4 | {"commitInfo":{"timestamp":1721891765448,"operation":"CREATE TABLE","operationParameters":{"mode":"ErrorIfExists","metadata":"{\"configuration\":{},\"createdTime\":1721891765448,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"4df5ab31-bc35-478a-a175-bf27fc05d3a4\",\"name\":null,\"partitionColumns\":[],\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"line_item_id\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"order_id\\\",\\\"type\\\":\\\"long\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}},{\\\"name\\\":\\\"sku\\\",\\\"type\\\":\\\"string\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\"}","location":"file:///C:/Users/harsh/OneDrive/Desktop/New%2520folder/data/line_items","protocol":"{\"minReaderVersion\":1,\"minWriterVersion\":2}"},"clientVersion":"delta-rs.0.18.1"}}
--------------------------------------------------------------------------------
/tests/fixtures/local-delta/data/orders/0-5014bd96-6666-482e-bec9-d02a43a78cfb-0.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/local-delta/data/orders/0-5014bd96-6666-482e-bec9-d02a43a78cfb-0.parquet
--------------------------------------------------------------------------------
/tests/fixtures/local-delta/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/local-delta/data/orders
10 | format: delta
11 | dataProductId: orders
12 | models:
13 | orders:
14 | fields:
15 | order_id:
16 | type: varchar
17 | unique: true
18 | required: true
19 | order_timestamp:
20 | required: true
21 | order_total:
22 | type: bigint
23 | required: true
24 |
--------------------------------------------------------------------------------
/tests/fixtures/local-json-complex/data/sts_data.json:
--------------------------------------------------------------------------------
1 | {
2 | "array_test_string": ["test1", "test2"],
3 | "array_test_object": [
4 | {
5 | "key": "key1",
6 | "value": "value1"
7 | },
8 | {
9 | "key": "key2",
10 | "value": "value2"
11 | }
12 | ],
13 | "id": "11111111",
14 | "sts_data": {
15 | "connection_test": "SUCCESS",
16 | "key_list": {
17 | "0": {
18 | "key": "12345678"
19 | },
20 | "1": {
21 | "key": "23456789"
22 | }
23 | }
24 | }
25 | }
--------------------------------------------------------------------------------
/tests/fixtures/local-json/data/nested_types.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id": 1,
4 | "tags": [
5 | {
6 | "foo": "bar",
7 | "arr": [ 1, 2, 3 ]
8 | },
9 | {
10 | "foo": "lap",
11 | "arr": [ 4 ]
12 | }
13 | ],
14 | "name": {
15 | "first": "John",
16 | "last": "Doe"
17 | }
18 | },
19 | {
20 | "id": 2,
21 | "tags": [
22 | {
23 | "foo": "zap",
24 | "arr": [ ]
25 | }
26 | ]
27 | }
28 | ]
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/array.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/array.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/bigint.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/bigint.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/blob.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/blob.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/boolean.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/boolean.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/combined.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/combined.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/combined_no_time.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/combined_no_time.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/date.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/date.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/decimal.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/decimal.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/double.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/double.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/float.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/float.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/integer.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/integer.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/list.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/list.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/map.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/map.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/string.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/string.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/struct.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/struct.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/time.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/time.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/timestamp.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/timestamp.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/data/timestamp_ntz.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/parquet/data/timestamp_ntz.parquet
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_array.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: array-unit-test
3 | info:
4 | title: Array Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/array.parquet
10 | format: parquet
11 | models:
12 | example:
13 | description: Test data with an array
14 | type: table
15 | fields:
16 | array_field:
17 | type: array
18 | items:
19 | type: integer
20 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_bigint.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: bigint-unit-test
3 | info:
4 | title: BigInt Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/bigint.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | bigint_field:
15 | type: bigint
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_binary.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: blob-unit-test
3 | info:
4 | title: Blob Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/blob.parquet
10 | format: parquet
11 | models:
12 | blob:
13 | description: Test data with binary field
14 | type: table
15 | fields:
16 | blob_field:
17 | type: bytes
18 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_boolean.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: boolean-unit-test
3 | info:
4 | title: Boolean Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/boolean.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | boolean_field:
15 | type: boolean
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_date.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: date-unit-test
3 | info:
4 | title: Date Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/date.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | date_field:
15 | type: date
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_decimal.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/decimal.parquet
10 | format: parquet
11 | dataProductId: orders
12 | outputPortId: parquet
13 | models:
14 | orders:
15 | fields:
16 | decimal_field:
17 | type: decimal
18 | precision: 10
19 | scale: 2
20 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_double.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: double-unit-test
3 | info:
4 | title: Double Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/double.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | double_field:
15 | type: double
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_float.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: float-unit-test
3 | info:
4 | title: Float Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/float.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | float_field:
15 | type: float
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_integer.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: integer-unit-test
3 | info:
4 | title: Integer Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/integer.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | integer_field:
15 | type: integer
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_invalid.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: orders-unit-test
3 | info:
4 | title: Orders Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/orders.parquet
10 | format: parquet
11 | dataProductId: orders
12 | outputPortId: parquet
13 | models:
14 | orders:
15 | fields:
16 | order_id:
17 | type: date # this is not true
18 | unique: true
19 | required: true
20 | some_extra_field: #does not exist
21 | type: long
22 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_map.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: map-unit-test
3 | info:
4 | title: Map Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/map.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | map_field:
15 | type: map
16 | keys:
17 | type: string
18 | values:
19 | type: string
20 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_string.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: string-unit-test
3 | info:
4 | title: String Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/string.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | string_field:
15 | type: varchar
16 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_struct.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: struct-unit-test
3 | info:
4 | title: Struct Unit Test
5 | version: 1.0.0
6 | servers:
7 | production:
8 | type: local
9 | path: ./fixtures/parquet/data/struct.parquet
10 | format: parquet
11 | models:
12 | example:
13 | fields:
14 | struct_field:
15 | type: struct
16 | fields:
17 | a:
18 | type: integer
19 | b:
20 | type: varchar
21 |
22 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_timestamp.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: iceberg-ingestion
3 | info:
4 | title: ingestion to s3/iceberg
5 | version: 0.0.1
6 | description: The ingestion of parquet files from s3 into iceberg table format
7 | servers:
8 | test:
9 | type: local
10 | path: "./fixtures/parquet/data/timestamp.parquet"
11 | format: parquet
12 | models:
13 | example:
14 | type: table
15 | fields:
16 | timestamp_field:
17 | type: timestamp_tz
18 | description: CREATEDDATE
19 | required: true
20 |
--------------------------------------------------------------------------------
/tests/fixtures/parquet/datacontract_timestamp_ntz.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: parquet-timestamp-ntz-test
3 | info:
4 | title: Parquet Timestamp w/o timezone test
5 | version: 0.0.1
6 | description: Parquet Timestamp w/o timezone test
7 | servers:
8 | test:
9 | type: local
10 | path: "./fixtures/parquet/data/timestamp_ntz.parquet"
11 | format: parquet
12 | models:
13 | example:
14 | type: table
15 | fields:
16 | timestamp:
17 | type: timestamp_ntz
18 | description: CREATEDDATE
19 | required: true
20 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres-export/data/data.sql:
--------------------------------------------------------------------------------
1 | -- Create the table
2 | CREATE TABLE public.my_table (
3 | field_one VARCHAR(10) primary key,
4 | field_two INT not null,
5 | field_three TIMESTAMP
6 | );
7 |
8 | -- Insert the data
9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES
10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'),
11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'),
12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'),
13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'),
14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'),
15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'),
16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'),
17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'),
18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'),
19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48');
20 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres-export/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: postgres
3 | info:
4 | title: postgres
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | production:
9 | type: postgres
10 | host: localhost
11 | port: 4567
12 | database: test
13 | schema: public
14 | staging:
15 | type: postgres
16 | host: localhost
17 | port: 4567
18 | database: test
19 | schema: public
20 | models:
21 | my_table:
22 | type: table
23 | fields:
24 | field_one:
25 | type: varchar
26 | required: true
27 | unique: true
28 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
29 | field_two:
30 | type: integer
31 | minimum: 10
32 | field_three:
33 | type: timestamp
34 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres/data/data.sql:
--------------------------------------------------------------------------------
1 | -- Create the table
2 | CREATE TABLE public.my_table (
3 | field_one VARCHAR(10) primary key,
4 | field_two INT not null,
5 | field_three TIMESTAMPTZ
6 | );
7 |
8 | -- Insert the data
9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES
10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'),
11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'),
12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'),
13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'),
14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'),
15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'),
16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'),
17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'),
18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'),
19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48');
20 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres/data/data_case_sensitive.sql:
--------------------------------------------------------------------------------
1 | -- Create the table
2 | CREATE TABLE public."My_Table2" (
3 | "Field_one" VARCHAR(10) primary key,
4 | "Field_two" INT not null,
5 | "Field_three" TIMESTAMPTZ
6 | );
7 |
8 | -- Insert the data
9 | INSERT INTO public."My_Table2" ("Field_one", "Field_two", "Field_three") VALUES
10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'),
11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'),
12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'),
13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'),
14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'),
15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'),
16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'),
17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'),
18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'),
19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48');
20 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: postgres
3 | info:
4 | title: postgres
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/postgres:
9 | type: postgres
10 | host: localhost
11 | port: 5432
12 | database: test
13 | schema: public
14 | models:
15 | my_table_old_name:
16 | type: table
17 | fields:
18 | field_one:
19 | type: varchar
20 | required: true
21 | unique: true
22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
23 | field_two:
24 | type: integer
25 | minimum: 10
26 | field_three:
27 | type: timestamp
28 | config:
29 | postgresTable: my_table
--------------------------------------------------------------------------------
/tests/fixtures/postgres/datacontract_case_sensitive.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: postgres
3 | info:
4 | title: postgres
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/postgres:
9 | type: postgres
10 | host: localhost
11 | port: 5432
12 | database: test
13 | schema: public
14 | models:
15 | My_Table2:
16 | type: table
17 | fields:
18 | Field_one:
19 | type: varchar
20 | required: true
21 | unique: true
22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
23 | Field_two:
24 | type: integer
25 | minimum: 10
26 | Field_three:
27 | type: timestamp
28 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres/datacontract_servicelevels.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: postgres
3 | info:
4 | title: postgres
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/postgres:
9 | type: postgres
10 | host: localhost
11 | port: 5432
12 | database: test
13 | schema: public
14 | models:
15 | my_table:
16 | type: table
17 | fields:
18 | field_one:
19 | type: varchar
20 | required: true
21 | unique: true
22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
23 | field_two:
24 | type: integer
25 | minimum: 10
26 | field_three:
27 | type: timestamp
28 | servicelevels:
29 | freshness:
30 | description: This is expected to fail
31 | threshold: PT1H
32 | timestampField: my_table.field_three
33 |
--------------------------------------------------------------------------------
/tests/fixtures/postgres/odcs.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v3.0.0
2 | kind: DataContract
3 | id: postgres
4 | name: postgres
5 | version: 0.0.1
6 | domain: my-domain-team
7 | status: null
8 | schema:
9 | - name: my_table
10 | physicalName: my_table
11 | logicalType: object
12 | physicalType: table
13 | properties:
14 | - name: field_one
15 | logicalType: string
16 | physicalType: varchar
17 | isNullable: false
18 | isUnique: true
19 | logicalTypeOptions:
20 | pattern: '[A-Za-z]{2}-\d{3}-[A-Za-z]{2}$'
21 | - name: field_two
22 | logicalType: integer
23 | physicalType: integer
24 | isNullable: true
25 | isUnique: false
26 | logicalTypeOptions:
27 | minimum: 10
28 | quality:
29 | - type: sql
30 | description: Less than 5% of null values
31 | query: |
32 | SELECT (COUNT(*) FILTER (WHERE field_two IS NULL) * 100.0 / COUNT(*)) AS null_percentage
33 | FROM my_table
34 | mustBeLessThan: 5
35 | - name: field_three
36 | logicalType: date
37 | physicalType: timestamptz
38 | isNullable: true
39 | isUnique: false
40 | quality:
41 | - type: sql
42 | query: |
43 | SELECT COUNT(*) FROM my_table WHERE field_two IS NOT NULL
44 | mustBeLessThan: 3600
45 | servers:
46 | - server: postgres
47 | type: postgres
48 | database: test
49 | schema: public
50 | host: localhost
51 | port: 5432
--------------------------------------------------------------------------------
/tests/fixtures/protobuf/data/sample_data.proto3.data:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | package example;
4 |
5 | // Enum for product category
6 | enum Category {
7 | CATEGORY_UNKNOWN = 0;
8 | CATEGORY_ELECTRONICS = 1;
9 | CATEGORY_CLOTHING = 2;
10 | CATEGORY_HOME_APPLIANCES = 3;
11 | }
12 |
13 | // Message representing product attributes
14 | message Product {
15 | string id = 1;
16 | string name = 2;
17 | double price = 3;
18 | Category category = 4;
19 | repeated string tags = 5;
20 | repeated Review reviews = 6;
21 | }
22 |
23 | // Message representing a review
24 | message Review {
25 | string user = 1;
26 | int32 rating = 2;
27 | string comment = 3;
28 | }
29 |
--------------------------------------------------------------------------------
/tests/fixtures/quality/data/data.invalid.sql:
--------------------------------------------------------------------------------
1 | -- Create the table
2 | CREATE TABLE public.my_table (
3 | field_one VARCHAR(10) primary key,
4 | field_two INT not null,
5 | field_three TIMESTAMPTZ
6 | );
7 |
8 | -- Insert the data
9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES
10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'),
11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'),
12 | ('ER-399-JY', 22, '2023-05-16 01:08:22'),
13 | ('MT-939-FH', 63, '2023-03-15 05:15:21'),
14 | ('LV-849-MI', 33, '2023-09-08 20:08:43'),
15 | ('VS-079-OH', 85, '2023-04-15 00:50:32'),
16 | ('DN-297-XY', 79, '2023-11-08 12:55:42'),
17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38'),
18 | ('ID-840-EG', 89, '2023-10-02 17:17:58'),
19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48');
20 |
--------------------------------------------------------------------------------
/tests/fixtures/quality/data/data.valid.sql:
--------------------------------------------------------------------------------
1 | -- Create the table
2 | CREATE TABLE public.my_table (
3 | field_one VARCHAR(10) primary key,
4 | field_two INT,
5 | field_three TIMESTAMPTZ
6 | );
7 |
8 | -- Insert the data
9 | INSERT INTO public.my_table (field_one, field_two, field_three) VALUES
10 | ('CX-263-DU', 5000, '2023-01-01 00:00:00'),
11 | ('IK-894-MN', 4700, '2023-01-01 00:59:00'),
12 | ('ER-399-JY', 2200, '2023-01-01 01:58:00'),
13 | ('MT-939-FH', 6300, '2023-01-01 02:00:00'),
14 | ('LV-849-MI', 3300, '2023-01-01 02:30:00'),
15 | ('VS-079-OH', 8500, '2023-01-01 03:00:00'),
16 | ('DN-297-XY', 7900, '2023-01-01 03:30:00'),
17 | ('ZE-172-FP', 1400, '2023-01-01 04:00:00'),
18 | ('ID-840-EG', 8900, '2023-01-01 04:50:00'),
19 | ('FK-230-KZ', 10, '2023-01-01 04:50:00');
20 |
--------------------------------------------------------------------------------
/tests/fixtures/quality/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: postgres
3 | info:
4 | title: postgres
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/postgres:
9 | type: postgres
10 | host: localhost
11 | port: 5432
12 | database: test
13 | schema: public
14 | models:
15 | my_table:
16 | type: table
17 | fields:
18 | field_one:
19 | type: varchar
20 | required: true
21 | unique: true
22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
23 | field_two:
24 | type: integer
25 | minimum: 10
26 | quality:
27 | # field level quality checks
28 | - type: sql
29 | description: 95% of all order total values are expected to be between 10 and 499 EUR.
30 | dialect: postgres
31 | query: SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY field_two) AS percentile_95 FROM my_table
32 | mustBeBetween: [ 1000, 49900 ]
33 | field_three:
34 | type: timestamp
35 | # model level quality checks
36 | quality:
37 | - type: sql
38 | description: The maximum duration between two orders should be less that 3600 seconds
39 | dialect: postgres
40 | query: |
41 | SELECT MAX(duration) AS max_duration
42 | FROM (
43 | SELECT EXTRACT(EPOCH FROM (field_three - LAG(field_three) OVER (ORDER BY field_three))) AS duration
44 | FROM my_table
45 | ) subquery;
46 | mustBeLessThan: 3600
47 | - type: sql
48 | description: Row Count
49 | query: |
50 | SELECT count(*) as row_count
51 | FROM {model}
52 | mustBeGreaterThan: 5
53 |
--------------------------------------------------------------------------------
/tests/fixtures/s3-csv/data/sample_data.csv:
--------------------------------------------------------------------------------
1 | field_one,field_two,field_three
2 | CX-263-DU,50,2023-06-16 13:12:56
3 | IK-894-MN,47,2023-10-08 22:40:57
4 | ER-399-JY,22,2023-05-16 01:08:22
5 | MT-939-FH,63,2023-03-15 05:15:21
6 | LV-849-MI,33,2023-09-08 20:08:43
7 | VS-079-OH,85,2023-04-15 00:50:32
8 | DN-297-XY,79,2023-11-08 12:55:42
9 | ZE-172-FP,14,2023-12-03 18:38:38
10 | ID-840-EG,89,2023-10-02 17:17:58
11 | FK-230-KZ,64,2023-11-27 15:21:48
12 |
--------------------------------------------------------------------------------
/tests/fixtures/s3-csv/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: s3-csv
3 | info:
4 | title: s30-csv
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/s3:
9 | type: s3
10 | endpointUrl: __S3_ENDPOINT_URL__
11 | location: s3://test-bucket/fixtures/s3-csv/data/sample_data.csv
12 | format: csv
13 | dataProductId: my-dataproduct
14 | outputPortId: s3
15 | models:
16 | my_table:
17 | type: table
18 | fields:
19 | field_one:
20 | type: varchar
21 | required: true
22 | unique: true
23 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
24 | field_two:
25 | type: bigint
26 | minimum: 10
27 | field_three:
28 | type: timestamp
29 |
--------------------------------------------------------------------------------
/tests/fixtures/s3-delta/data/orders.delta/0-66aaa7ef-36e3-4985-9359-72874e273705-0.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/fixtures/s3-delta/data/orders.delta/0-66aaa7ef-36e3-4985-9359-72874e273705-0.parquet
--------------------------------------------------------------------------------
/tests/fixtures/s3-delta/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: s3-delta-orders
3 | info:
4 | title: S3 Delta Table Test
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | orders/s3:
9 | type: s3
10 | endpointUrl: __S3_ENDPOINT_URL__
11 | location: s3://test-bucket/fixtures/s3-delta/data/orders.delta
12 | format: delta
13 | dataProductId: orders
14 | outputPortId: s3
15 | models:
16 | orders:
17 | type: table
18 | fields:
19 | order_id:
20 | type: varchar
21 | unique: true
22 | required: true
23 |
--------------------------------------------------------------------------------
/tests/fixtures/s3-delta/helper/create_delta_files.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pandas as pd
4 | from deltalake.writer import write_deltalake
5 |
6 | # Ensure the required directory exists
7 | output_dir = "../data"
8 | if not os.path.exists(output_dir):
9 | os.makedirs(output_dir)
10 |
11 | # Sample data for Orders table
12 | orders_data = {
13 | "order_id": ["1001", "1002", "1003", "1004", "1005", "1006", "1007", "1008"],
14 | "order_timestamp": [
15 | "2024-01-01T10:00:00.000Z",
16 | "2024-01-01T11:30:00.000Z",
17 | "2024-01-01T12:45:00.000Z",
18 | "2024-01-02T08:20:00.000Z",
19 | "2024-01-02T09:15:00.000Z",
20 | "2024-01-02T10:05:00.000Z",
21 | "2024-01-02T10:45:00.000Z",
22 | "2024-01-02T11:30:00.000Z",
23 | ],
24 | "order_total": [5000, 7500, 3000, 2000, 6500, 12000, 4500, 8000],
25 | }
26 |
27 | orders_df = pd.DataFrame(orders_data)
28 | orders_df["order_timestamp"] = pd.to_datetime(orders_df["order_timestamp"], format="%Y-%m-%dT%H:%M:%S.%fZ")
29 |
30 | # Write to Delta table files
31 | write_deltalake(os.path.join(output_dir, "orders.delta"), orders_df)
32 |
--------------------------------------------------------------------------------
/tests/fixtures/s3-json-complex/data/feed.json:
--------------------------------------------------------------------------------
1 | [{
2 | "specversion" : "1.0",
3 | "type" : "org.http-feeds.example.inventory",
4 | "source" : "https://example.http-feeds.org/inventory",
5 | "id" : "1c6b8c6e-d8d0-4a91-b51c-1f56bd04c758",
6 | "time" : "2021-01-01T00:00:01Z",
7 | "subject" : "9521234567899",
8 | "data" : {
9 | "sku": "9521234567899",
10 | "updated": "2022-01-01T00:00:01Z",
11 | "quantity": 5
12 | }
13 | },{
14 | "specversion" : "1.0",
15 | "type" : "org.http-feeds.example.inventory",
16 | "source" : "https://example.http-feeds.org/inventory",
17 | "id" : "292042fb-ab04-4653-af90-19a24032bffe",
18 | "time" : "2021-12-01T00:00:15Z",
19 | "subject" : "9521234512349",
20 | "data" : {
21 | "sku": "9521234512349",
22 | "updated": "2022-01-01T00:00:12Z",
23 | "quantity": 0
24 | }
25 | },{
26 | "specversion" : "1.0",
27 | "type" : "org.http-feeds.example.inventory",
28 | "source" : "https://example.http-feeds.org/inventory",
29 | "id" : "fa3e2a22-398c-4d02-ad08-9415e43178e6",
30 | "time" : "2021-01-01T00:00:22Z",
31 | "subject" : "9521234567899",
32 | "data" : {
33 | "sku": "9521234567899",
34 | "updated": "2022-01-01T00:00:21Z",
35 | "quantity": 4
36 | }
37 | }]
--------------------------------------------------------------------------------
/tests/fixtures/s3-json-complex/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: inventory-events
3 | info:
4 | title: Inventory Events Feed
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | feed/s3:
9 | type: s3
10 | endpointUrl: __S3_ENDPOINT_URL__
11 | location: s3://feed-bucket/fixtures/s3-json-complex/data/*.json
12 | delimiter: array
13 | format: json
14 | dataProductId: feed
15 | outputPortId: s3
16 | models:
17 | inventory:
18 | type: object
19 | fields:
20 | specversion:
21 | type: string
22 | const: "1.0"
23 | required: true
24 | type:
25 | type: string
26 | const: "org.http-feeds.example.inventory"
27 | required: true
28 | source:
29 | type: string
30 | format: uri
31 | const: "https://example.http-feeds.org/inventory"
32 | required: true
33 | id:
34 | type: string
35 | required: true
36 | time:
37 | type: string
38 | format: date-time
39 | required: true
40 | subject:
41 | type: string
42 | data:
43 | type: object
44 | fields:
45 | sku:
46 | type: string
47 | required: true
48 | updated:
49 | type: string
50 | format: date-time
51 | required: true
52 | quantity:
53 | type: integer
54 | required: true
55 |
--------------------------------------------------------------------------------
/tests/fixtures/s3-json/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: inventory-events
3 | info:
4 | title: Inventory Events
5 | version: 0.0.1
6 | owner: my-domain-team
7 | contact:
8 | email: jochen.christ@innoq.com
9 | servers:
10 | inventory/s3:
11 | type: s3
12 | endpointUrl: __S3_ENDPOINT_URL__
13 | # location: s3://test-bucket/topics/inventory/*/*/*/*/*.json
14 | location: s3://test-bucket/fixtures/s3-json/data/{model}/year=2022/month=04/day=20/hour=00/inventory+0+0001327496.json
15 | delimiter: new_line
16 | format: json
17 | dataProductId: inventory
18 | outputPortId: s3
19 | models:
20 | inventory:
21 | type: table
22 | fields:
23 | updated_at:
24 | type: string
25 | available:
26 | type: numeric
27 | location:
28 | type: string
29 | sku:
30 | type: string
31 |
--------------------------------------------------------------------------------
/tests/fixtures/sodacl/checks.yaml:
--------------------------------------------------------------------------------
1 | checks for orders:
2 | - freshness(processed_timestamp) < 1d
3 | - row_count > 10
4 | checks for line_items:
5 | - row_count > 10:
6 | name: Have at lease 10 line items
--------------------------------------------------------------------------------
/tests/fixtures/sodacl/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: urn:datacontract:checkout:orders-latest
3 | info:
4 | title: Orders Latest
5 | version: 1.0.0
6 | description: |
7 | Successful customer orders in the webshop.
8 | All orders since 2020-01-01.
9 | Orders with their line items are in their current state (no history included).
10 | owner: Checkout Team
11 | contact:
12 | name: John Doe (Data Product Owner)
13 | url: https://teams.microsoft.com/l/channel/example/checkout
14 | models:
15 | orders:
16 | description: test
17 | fields:
18 | order_id:
19 | type: string
20 | required: true
21 | processed_timestamp:
22 | type: timestamp
23 | required: true
24 | quality:
25 | type: SodaCL
26 | specification:
27 | $ref: "./fixtures/sodacl/checks.yaml"
--------------------------------------------------------------------------------
/tests/fixtures/spark/import/users_datacontract_desc.yml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | servers:
7 | local:
8 | type: dataframe
9 | models:
10 | users:
11 | description: description
12 | fields:
13 | id:
14 | type: string
15 | required: false
16 | name:
17 | type: string
18 | required: false
19 | address:
20 | type: struct
21 | required: false
22 | fields:
23 | number:
24 | type: integer
25 | required: false
26 | street:
27 | type: string
28 | required: false
29 | city:
30 | type: string
31 | required: false
32 | tags:
33 | type: array
34 | required: false
35 | items:
36 | type: string
37 | required: false
38 | metadata:
39 | type: map
40 | required: false
41 | keys:
42 | type: string
43 | required: true
44 | values:
45 | type: struct
46 | required: false
47 | fields:
48 | value:
49 | type: string
50 | required: false
51 | type:
52 | type: string
53 | required: false
54 | timestamp:
55 | type: long
56 | required: false
57 | source:
58 | type: string
59 | required: false
--------------------------------------------------------------------------------
/tests/fixtures/spark/import/users_datacontract_no_desc.yml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: my-data-contract-id
3 | info:
4 | title: My Data Contract
5 | version: 0.0.1
6 | servers:
7 | local:
8 | type: dataframe
9 | models:
10 | users:
11 | fields:
12 | id:
13 | type: string
14 | required: false
15 | name:
16 | type: string
17 | required: false
18 | address:
19 | type: struct
20 | required: false
21 | fields:
22 | number:
23 | type: integer
24 | required: false
25 | street:
26 | type: string
27 | required: false
28 | city:
29 | type: string
30 | required: false
31 | tags:
32 | type: array
33 | required: false
34 | items:
35 | type: string
36 | required: false
37 | metadata:
38 | type: map
39 | required: false
40 | keys:
41 | type: string
42 | required: true
43 | values:
44 | type: struct
45 | required: false
46 | fields:
47 | value:
48 | type: string
49 | required: false
50 | type:
51 | type: string
52 | required: false
53 | timestamp:
54 | type: long
55 | required: false
56 | source:
57 | type: string
58 | required: false
--------------------------------------------------------------------------------
/tests/fixtures/spec/datacontract_aliases.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: "123"
3 | info:
4 | title: "Test"
5 | version: 1.0.0
6 | owner: my-domain-team
7 | models:
8 | sample_model:
9 | description: Sample Model
10 | type: table
11 | fields:
12 | id:
13 | type: text
14 | title: ID
15 | description: A unique identifier
16 | $ref: '#/definitions/test'
17 | definitions:
18 | test:
19 | description: Test definition reference
20 | name: refdef
21 | type: text
22 |
--------------------------------------------------------------------------------
/tests/fixtures/spec/datacontract_fields_field.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: "123"
3 | info:
4 | title: "Test"
5 | version: 1.0.0
6 | owner: my-domain-team
7 | models:
8 | sample_model:
9 | description: Sample Model
10 | type: table
11 | fields:
12 | id:
13 | type: text
14 | title: ID
15 | description: A unique identifier
16 | $ref: '#/definitions/def'
17 | definitions:
18 | def:
19 | description: Test definition reference
20 | type: object
21 | name: refdef
22 | fields:
23 | id:
24 | type: text
25 | title: MyField
26 | $ref: '#/definitions/other'
27 | other:
28 | description: Another Def
29 | type: string
30 | name: fieldname
31 |
--------------------------------------------------------------------------------
/tests/fixtures/sqlserver/data/data.sql:
--------------------------------------------------------------------------------
1 | -- Create the table
2 | CREATE TABLE [dbo].[my_table] (
3 | field_one VARCHAR(10) PRIMARY KEY,
4 | field_two INT NOT NULL,
5 | field_three DATETIME2
6 | );
7 |
8 | -- Insert the data
9 | INSERT INTO [dbo].[my_table] (field_one, field_two, field_three) VALUES
10 | ('CX-263-DU', 50, '2023-06-16 13:12:56'),
11 | ('IK-894-MN', 47, '2023-10-08 22:40:57'),
12 | ('ER-399-JY', 22, '2023-05-16 01:08:22 '),
13 | ('MT-939-FH', 63, '2023-03-15 05:15:21 '),
14 | ('LV-849-MI', 33, '2023-09-08 20:08:43 '),
15 | ('VS-079-OH', 85, '2023-04-15 00:50:32 '),
16 | ('DN-297-XY', 79, '2023-11-08 12:55:42 '),
17 | ('ZE-172-FP', 14, '2023-12-03 18:38:38 '),
18 | ('ID-840-EG', 89, '2023-10-02 17:17:58 '),
19 | ('FK-230-KZ', 64, '2023-11-27 15:21:48 ');
20 |
--------------------------------------------------------------------------------
/tests/fixtures/sqlserver/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: sqlserver
3 | info:
4 | title: sqlserver
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/sqlserver:
9 | type: sqlserver
10 | host: localhost
11 | port: __PORT__
12 | database: tempdb
13 | schema: dbo
14 | driver: ODBC Driver 18 for SQL Server
15 | models:
16 | my_table:
17 | type: table
18 | fields:
19 | field_one:
20 | type: varchar
21 | required: true
22 | unique: true
23 | field_two:
24 | type: int
25 | minimum: 10
26 | field_three:
27 | type: timestamp
28 | config:
29 | sqlserverType: DATETIME2
30 |
--------------------------------------------------------------------------------
/tests/fixtures/trino/data/data.sql:
--------------------------------------------------------------------------------
1 | INSERT INTO my_schema.my_table (field_one, field_two, field_three)
2 | VALUES ('CX-263-DU', 50, TIMESTAMP '2023-06-16 13:12:56'),
3 | ('IK-894-MN', 47, TIMESTAMP '2023-10-08 22:40:57'),
4 | ('ER-399-JY', 22, TIMESTAMP '2023-05-16 01:08:22'),
5 | ('MT-939-FH', 63, TIMESTAMP '2023-03-15 05:15:21'),
6 | ('LV-849-MI', 33, TIMESTAMP '2023-09-08 20:08:43'),
7 | ('VS-079-OH', 85, TIMESTAMP '2023-04-15 00:50:32'),
8 | ('DN-297-XY', 79, TIMESTAMP '2023-11-08 12:55:42'),
9 | ('ZE-172-FP', 14, TIMESTAMP '2023-12-03 18:38:38'),
10 | ('ID-840-EG', 89, TIMESTAMP '2023-10-02 17:17:58'),
11 | ('FK-230-KZ', 64, TIMESTAMP '2023-11-27 15:21:48')
12 |
--------------------------------------------------------------------------------
/tests/fixtures/trino/data/table.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE my_schema.my_table
2 | (
3 | field_one VARCHAR,
4 | field_two INT,
5 | field_three TIMESTAMP WITH TIME ZONE
6 | )
--------------------------------------------------------------------------------
/tests/fixtures/trino/datacontract.yaml:
--------------------------------------------------------------------------------
1 | dataContractSpecification: 1.1.0
2 | id: trino
3 | info:
4 | title: trino
5 | version: 0.0.1
6 | owner: my-domain-team
7 | servers:
8 | my-dataproduct/trino:
9 | type: trino
10 | host: http://localhost
11 | port: __PORT__
12 | catalog: memory
13 | schema: my_schema
14 | models:
15 | my_table:
16 | type: table
17 | fields:
18 | field_one:
19 | type: varchar
20 | required: true
21 | unique: true
22 | pattern: "[A-Za-z]{2}-\\d{3}-[A-Za-z]{2}$"
23 | field_two:
24 | type: integer
25 | minimum: 10
26 | field_three:
27 | type: timestamp
28 |
--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
1 | from fastapi.testclient import TestClient
2 |
3 | from datacontract.api import app
4 |
5 | client = TestClient(app)
6 |
7 |
8 | def test_lint():
9 | with open("fixtures/lint/valid_datacontract.yaml", "r") as f:
10 | data_contract_str = f.read()
11 |
12 | response = client.post(
13 | url="/lint",
14 | json=data_contract_str,
15 | )
16 | assert response.status_code == 200
17 | print(response.json())
18 | assert response.json()["result"] == "passed"
19 | assert len(response.json()["checks"]) == 6
20 | assert all([check["result"] == "passed" for check in response.json()["checks"]])
21 |
22 |
23 | def test_export_jsonschema():
24 | with open("fixtures/local-json/datacontract.yaml", "r") as f:
25 | data_contract_str = f.read()
26 | response = client.post(
27 | url="/export?format=jsonschema",
28 | json=data_contract_str,
29 | )
30 | assert response.status_code == 200
31 | print(response.text)
32 | with open("fixtures/local-json/datacontract.json") as file:
33 | expected_json_schema = file.read()
34 | print(expected_json_schema)
35 | assert response.text == expected_json_schema
36 |
--------------------------------------------------------------------------------
/tests/test_catalog.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import PosixPath
3 |
4 | from typer.testing import CliRunner
5 |
6 | from datacontract.cli import app
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 |
11 | def test_cli(tmp_path: PosixPath):
12 | runner = CliRunner()
13 | result = runner.invoke(app, ["catalog", "--files", "fixtures/catalog/*.yaml", "--output", tmp_path])
14 | assert result.exit_code == 0
15 | assert os.path.exists(tmp_path / "index.html")
16 | assert os.path.exists(tmp_path / "fixtures/catalog/datacontract-1.html")
17 | assert os.path.exists(tmp_path / "fixtures/catalog/datacontract-2.html")
18 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 |
5 | runner = CliRunner()
6 |
7 | # logging.basicConfig(level=logging.DEBUG, force=True)
8 |
9 |
10 | def test_test_help():
11 | result = runner.invoke(app, ["test", "--help"])
12 | assert result.exit_code == 0
13 |
14 |
15 | def test_file_does_not_exist():
16 | result = runner.invoke(app, ["test", "unknown.yaml"])
17 | assert result.exit_code == 1
18 | assert "The file 'unknown.yaml' does not \nexist." in result.stdout
19 |
--------------------------------------------------------------------------------
/tests/test_data_contract_checks.py:
--------------------------------------------------------------------------------
1 | from datacontract.engines.data_contract_checks import period_to_seconds
2 |
3 |
4 | def test_period_to_seconds():
5 | assert period_to_seconds("P1Y") == 31536000
6 | assert period_to_seconds("P1D") == 86400
7 | assert period_to_seconds("PT24H") == 86400
8 | assert period_to_seconds("1d") == 86400
9 | assert period_to_seconds("24h") == 86400
10 | assert period_to_seconds("60m") == 3600
11 |
--------------------------------------------------------------------------------
/tests/test_data_contract_specification.py:
--------------------------------------------------------------------------------
1 | from uuid import uuid4
2 |
3 | import pytest
4 |
5 | from datacontract.model.data_contract_specification import DataContractSpecification
6 |
7 |
8 | def test_from_file_raises_exception_if_file_does_not_exist():
9 | with pytest.raises(FileNotFoundError):
10 | DataContractSpecification.from_file(f"{uuid4().hex}.yaml")
11 |
--------------------------------------------------------------------------------
/tests/test_description_linter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datacontract/datacontract-cli/0dc8b6177a4697c18f4aa71fbc4d7bfbde59989b/tests/test_description_linter.py
--------------------------------------------------------------------------------
/tests/test_export_bigquery.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from typer.testing import CliRunner
4 |
5 | from datacontract.cli import app
6 | from datacontract.data_contract import DataContract
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 |
11 | def test_cli():
12 | runner = CliRunner()
13 | result = runner.invoke(
14 | app,
15 | [
16 | "export",
17 | "--format",
18 | "bigquery",
19 | "--server",
20 | "bigquery",
21 | "fixtures/bigquery/export/datacontract.yaml",
22 | ],
23 | )
24 | assert result.exit_code == 0
25 |
26 |
27 | def test_exports_bigquery_schema():
28 | data_contract_file: str = "fixtures/bigquery/export/datacontract.yaml"
29 | with open(data_contract_file) as file:
30 | file_content = file.read()
31 | data_contract = DataContract(data_contract_str=file_content, server="bigquery")
32 | assert data_contract.lint(enabled_linters="none").has_passed()
33 | result = data_contract.export("bigquery")
34 |
35 | print("Result:\n", result)
36 | with open("fixtures/bigquery/export/bq_table_schema.json") as file:
37 | expected = file.read()
38 | assert json.loads(result) == json.loads(expected)
39 |
--------------------------------------------------------------------------------
/tests/test_export_custom.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from typer.testing import CliRunner
4 |
5 | from datacontract.cli import app
6 | from datacontract.export.custom_converter import to_custom
7 | from datacontract.model.data_contract_specification import DataContractSpecification
8 |
9 | # logging.basicConfig(level=logging.DEBUG, force=True)
10 |
11 |
12 | def test_cli():
13 | runner = CliRunner()
14 | result = runner.invoke(
15 | app,
16 | [
17 | "export",
18 | "./fixtures/custom/export/datacontract.yaml",
19 | "--format",
20 | "custom",
21 | "--template",
22 | "./fixtures/custom/export/template.sql",
23 | ],
24 | )
25 | assert result.exit_code == 0
26 |
27 |
28 | def test_to_custom():
29 | data_contract = DataContractSpecification.from_file("fixtures/custom/export/datacontract.yaml")
30 | template = Path("fixtures/custom/export/template.sql")
31 | result = to_custom(data_contract, template)
32 |
33 | with open("fixtures/custom/export/expected.sql", "r") as file:
34 | assert result == file.read()
35 |
--------------------------------------------------------------------------------
/tests/test_export_custom_exporter.py:
--------------------------------------------------------------------------------
1 | from datacontract.data_contract import DataContract
2 | from datacontract.export.exporter import Exporter
3 | from datacontract.export.exporter_factory import exporter_factory
4 |
5 | # logging.basicConfig(level=logging.DEBUG, force=True)
6 |
7 |
8 | class CustomExporter(Exporter):
9 | def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
10 | result = {
11 | "data_contract_servers": data_contract.servers,
12 | "model": model,
13 | "server": server,
14 | "sql_server_type": sql_server_type,
15 | "export_args": export_args,
16 | "custom_args": export_args.get("custom_arg", ""),
17 | }
18 | return str(result)
19 |
20 |
21 | exporter_factory.register_exporter("custom_exporter", CustomExporter)
22 |
23 |
24 | def test_custom_exporter():
25 | expected_custom = """{'data_contract_servers': {'production': Server(type='snowflake', description=None, environment='production', format=None, project=None, dataset=None, path=None, delimiter=None, endpointUrl=None, location=None, account='my-account', database='my-database', schema_='my-schema', host=None, port=None, catalog=None, topic=None, http_path=None, token=None, dataProductId=None, outputPortId=None, driver=None, storageAccount=None, roles=[ServerRole(name='analyst_us', description='Access to the data for US region')])}, 'model': 'orders', 'server': 'production', 'sql_server_type': 'auto', 'export_args': {'server': 'production', 'custom_arg': 'my_custom_arg'}, 'custom_args': 'my_custom_arg'}"""
26 | result = DataContract(data_contract_file="./fixtures/export/datacontract.yaml", server="production").export(
27 | export_format="custom_exporter", model="orders", server="production", custom_arg="my_custom_arg"
28 | )
29 | # TODO use json comparison instead of string comparison
30 | assert result.strip() == expected_custom.strip()
31 |
--------------------------------------------------------------------------------
/tests/test_export_dbt_staging_sql.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | from typer.testing import CliRunner
3 |
4 | from datacontract.cli import app
5 | from datacontract.export.dbt_converter import to_dbt_staging_sql
6 | from datacontract.model.data_contract_specification import DataContractSpecification
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 |
11 | def test_cli():
12 | runner = CliRunner()
13 | result = runner.invoke(
14 | app,
15 | [
16 | "export",
17 | "./fixtures/dbt/export/datacontract.yaml",
18 | "--format",
19 | "dbt-staging-sql",
20 | "--model",
21 | "orders",
22 | ],
23 | )
24 | print(result.stdout)
25 | assert result.exit_code == 0
26 |
27 |
28 | def test_to_dbt_staging():
29 | data_contract = DataContractSpecification.from_file("fixtures/dbt/export/datacontract.yaml")
30 | expected = """
31 | select
32 | order_id,
33 | order_total,
34 | order_status,
35 | user_id
36 | from {{ source('orders-unit-test', 'orders') }}
37 | """
38 |
39 | result = to_dbt_staging_sql(data_contract, "orders", data_contract.models.get("orders"))
40 |
41 | assert yaml.safe_load(result) == yaml.safe_load(expected)
42 |
--------------------------------------------------------------------------------
/tests/test_export_go.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 | from datacontract.data_contract import DataContract
5 |
6 | # logging.basicConfig(level=logging.DEBUG, force=True)
7 |
8 |
9 | def test_cli():
10 | runner = CliRunner()
11 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "go"])
12 | assert result.exit_code == 0
13 |
14 |
15 | def test_to_go_types():
16 | actual = DataContract(data_contract_file="fixtures/export/datacontract.yaml").export("go")
17 | expected = """
18 | package main
19 |
20 |
21 | type Orders struct {
22 | OrderId varchar `json:"order_id" avro:"order_id"` // None
23 | OrderTotal bigint `json:"order_total" avro:"order_total"` // The order_total field
24 | OrderStatus string `json:"order_status" avro:"order_status"` // None
25 | }
26 |
27 | """
28 | assert actual.strip() == expected.strip()
29 |
--------------------------------------------------------------------------------
/tests/test_export_html.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | from typer.testing import CliRunner
5 |
6 | from datacontract.cli import app
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 |
11 | def test_cli():
12 | runner = CliRunner()
13 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "html"])
14 | assert result.exit_code == 0
15 |
16 |
17 | def test_cli_with_output(tmp_path: Path):
18 | runner = CliRunner()
19 | result = runner.invoke(
20 | app,
21 | [
22 | "export",
23 | "./fixtures/export/datacontract.yaml",
24 | "--format",
25 | "html",
26 | "--output",
27 | tmp_path / "datacontract.html",
28 | ],
29 | )
30 | assert result.exit_code == 0
31 | assert os.path.exists(tmp_path / "datacontract.html")
32 |
--------------------------------------------------------------------------------
/tests/test_export_markdown.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 | from datacontract.export.markdown_converter import to_markdown
5 | from datacontract.model.data_contract_specification import DataContractSpecification
6 |
7 | # logging.basicConfig(level=logging.DEBUG, force=True)
8 |
9 |
10 | def test_cli():
11 | runner = CliRunner()
12 | result = runner.invoke(
13 | app,
14 | [
15 | "export",
16 | "./fixtures/markdown/export/datacontract.yaml",
17 | "--format",
18 | "markdown",
19 | ],
20 | )
21 | assert result.exit_code == 0
22 | assert result.output.startswith("# urn:datacontract:checkout:orders-latest")
23 |
24 |
25 | def test_to_markdown():
26 | data_contract = DataContractSpecification.from_file("fixtures/markdown/export/datacontract.yaml")
27 | result = to_markdown(data_contract)
28 |
29 | with open("fixtures/markdown/export/expected.md", "r") as file:
30 | assert result == file.read()
31 |
--------------------------------------------------------------------------------
/tests/test_export_mermaid.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | from typer.testing import CliRunner
5 |
6 | from datacontract.cli import app
7 |
8 |
9 | def test_cli():
10 | runner = CliRunner()
11 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "mermaid"])
12 | assert result.exit_code == 0
13 |
14 |
15 | def test_cli_with_output(tmp_path: Path):
16 | runner = CliRunner()
17 | result = runner.invoke(
18 | app,
19 | [
20 | "export",
21 | "./fixtures/export/datacontract.yaml",
22 | "--format",
23 | "mermaid",
24 | "--output",
25 | tmp_path / "datacontract.mermaid",
26 | ],
27 | )
28 | assert result.exit_code == 0
29 | assert os.path.exists(tmp_path / "datacontract.mermaid")
30 |
31 |
32 | def test_mermaid_structure(tmp_path: Path):
33 | datacontract_file = "fixtures/export/datacontract.yaml"
34 | runner = CliRunner()
35 | result = runner.invoke(
36 | app,
37 | [
38 | "export",
39 | datacontract_file,
40 | "--format",
41 | "mermaid",
42 | "--output",
43 | tmp_path / "datacontract.mermaid",
44 | ],
45 | )
46 | assert result.exit_code == 0
47 |
48 | with open(tmp_path / "datacontract.mermaid") as file:
49 | content = file.read()
50 |
51 | # Check structure
52 | assert "erDiagram" in content
53 | assert "orders" in content
54 | assert "order_id" in content
55 | assert "order_total" in content
56 | assert "order_status" in content
57 |
--------------------------------------------------------------------------------
/tests/test_export_protobuf.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 | from datacontract.export.protobuf_converter import to_protobuf
5 | from datacontract.model.data_contract_specification import DataContractSpecification
6 |
7 | # logging.basicConfig(level=logging.DEBUG, force=True)
8 |
9 |
10 | def test_cli():
11 | runner = CliRunner()
12 | result = runner.invoke(app, ["export", "./fixtures/protobuf/datacontract.yaml", "--format", "protobuf"])
13 | assert result.exit_code == 0
14 |
15 |
16 | def test_to_protobuf():
17 | data_contract = DataContractSpecification.from_file("fixtures/protobuf/datacontract.yaml")
18 | expected_protobuf = """
19 | syntax = "proto3";
20 |
21 | package example;
22 |
23 | // Enum for Category
24 | enum Category {
25 | CATEGORY_UNKNOWN = 0;
26 | CATEGORY_ELECTRONICS = 1;
27 | CATEGORY_CLOTHING = 2;
28 | CATEGORY_HOME_APPLIANCES = 3;
29 | }
30 |
31 | // Details of Product.
32 | message Product {
33 | // Enum field category
34 | Category category = 1;
35 | // Field id
36 | string id = 2;
37 | // Field name
38 | string name = 3;
39 | // Field price
40 | double price = 4;
41 | // List of Review
42 | repeated string reviews = 5;
43 | // Field tags
44 | string tags = 6;
45 | }
46 |
47 | // Details of Review.
48 | message Review {
49 | // Field comment
50 | string comment = 1;
51 | // Field rating
52 | int32 rating = 2;
53 | // Field user
54 | string user = 3;
55 | }
56 |
57 | """.strip()
58 |
59 | result = to_protobuf(data_contract).strip()
60 |
61 | assert result == expected_protobuf
62 |
--------------------------------------------------------------------------------
/tests/test_export_sql_query.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 | from datacontract.data_contract import DataContract
5 |
6 | # logging.basicConfig(level=logging.DEBUG, force=True)
7 |
8 |
9 | def test_cli():
10 | runner = CliRunner()
11 | result = runner.invoke(app, ["export", "./fixtures/postgres-export/datacontract.yaml", "--format", "sql-query"])
12 | assert result.exit_code == 0
13 |
14 |
15 | def test_to_sql_query_postgres():
16 | actual = DataContract(data_contract_file="fixtures/postgres-export/datacontract.yaml").export("sql-query")
17 | expected = """
18 | -- Data Contract: postgres
19 | -- SQL Dialect: postgres
20 | select
21 | field_one,
22 | field_two,
23 | field_three
24 | from my_table
25 | """
26 | assert actual.strip() == expected.strip()
27 |
28 |
29 | def test_to_sql_query_snowflake():
30 | actual = DataContract(data_contract_file="fixtures/snowflake/datacontract.yaml").export("sql-query", model="orders")
31 | expected = """
32 | -- Data Contract: urn:datacontract:checkout:snowflake_orders_pii_v2
33 | -- SQL Dialect: snowflake
34 | select
35 | ORDER_ID,
36 | ORDER_TIMESTAMP,
37 | ORDER_TOTAL,
38 | CUSTOMER_ID,
39 | CUSTOMER_EMAIL_ADDRESS,
40 | PROCESSING_TIMESTAMP
41 | from orders
42 | """
43 | assert actual.strip() == expected.strip()
44 |
--------------------------------------------------------------------------------
/tests/test_export_terraform.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 | from datacontract.export.terraform_converter import to_terraform
5 | from datacontract.model.data_contract_specification import DataContractSpecification
6 |
7 | # logging.basicConfig(level=logging.DEBUG, force=True)
8 |
9 |
10 | def test_cli():
11 | runner = CliRunner()
12 | result = runner.invoke(app, ["export", "./fixtures/export/datacontract_s3.yaml", "--format", "terraform"])
13 | assert result.exit_code == 0
14 |
15 |
16 | def test_to_terraform():
17 | data_contract = DataContractSpecification.from_file("fixtures/export/datacontract_s3.yaml")
18 | expected_terraform_file = """
19 | resource "aws_s3_bucket" "orders-unit-test_production" {
20 | bucket = "datacontract-example-orders-latest"
21 |
22 | tags = {
23 | Name = "Orders Unit Test"
24 | DataContract = "orders-unit-test"
25 | Server = "production"
26 | DataProduct = "orders"
27 | }
28 | }
29 | """.strip()
30 |
31 | result = to_terraform(data_contract)
32 |
33 | assert result == expected_terraform_file
34 |
--------------------------------------------------------------------------------
/tests/test_field_pattern_linter.py:
--------------------------------------------------------------------------------
1 | import datacontract.model.data_contract_specification as spec
2 | from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
3 | from datacontract.model.run import Check
4 |
5 |
6 | def construct_error_check(msg: str) -> Check:
7 | return Check(
8 | type="lint",
9 | name="Linter 'Field pattern is correct regex'",
10 | result="warning",
11 | engine="datacontract",
12 | reason=msg,
13 | )
14 |
15 |
16 | success_check = Check(
17 | type="lint", name="Linter 'Field pattern is correct regex'", result="passed", engine="datacontract"
18 | )
19 |
20 | linter = FieldPatternLinter()
21 |
22 |
23 | def test_correct_regex_pattern():
24 | specification = spec.DataContractSpecification(
25 | models={"test_model": spec.Model(fields={"test_field": spec.Field(pattern=".")})}
26 | )
27 | result = linter.lint(specification)
28 | assert result == [success_check]
29 |
30 |
31 | def test_incorrect_regex_pattern():
32 | specification = spec.DataContractSpecification(
33 | models={"test_model": spec.Model(fields={"test_field": spec.Field(pattern="\\")})}
34 | )
35 | result = linter.lint(specification)
36 | assert result == [
37 | construct_error_check(
38 | "Failed to compile pattern regex '\\' for field"
39 | " 'test_field' in model 'test_model': "
40 | "bad escape (end of pattern)"
41 | )
42 | ]
43 |
--------------------------------------------------------------------------------
/tests/test_import_excel.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import yaml
5 | from typer.testing import CliRunner
6 |
7 | from datacontract.cli import app
8 | from datacontract.imports.excel_importer import import_excel_as_odcs
9 |
10 | # logging.basicConfig(level=logging.DEBUG, force=True)
11 |
12 |
13 | def test_cli():
14 | runner = CliRunner()
15 | result = runner.invoke(
16 | app,
17 | [
18 | "import",
19 | "--format",
20 | "excel",
21 | "--source",
22 | "./fixtures/excel/shipments-odcs.xlsx",
23 | ],
24 | )
25 | assert result.exit_code == 0
26 |
27 |
28 | def test_import_excel_odcs():
29 | result = import_excel_as_odcs("./fixtures/excel/shipments-odcs.xlsx")
30 | expected_datacontract = read_file("fixtures/excel/shipments-odcs.yaml")
31 | assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract)
32 |
33 |
34 | def read_file(file):
35 | if not os.path.exists(file):
36 | print(f"The file '{file}' does not exist.")
37 | sys.exit(1)
38 | with open(file, "r") as file:
39 | file_content = file.read()
40 | return file_content
41 |
--------------------------------------------------------------------------------
/tests/test_import_odcs_v3.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import yaml
5 | from typer.testing import CliRunner
6 |
7 | from datacontract.cli import app
8 | from datacontract.data_contract import DataContract
9 |
10 | # logging.basicConfig(level=logging.DEBUG, force=True)
11 |
12 |
13 | def test_cli():
14 | runner = CliRunner()
15 | result = runner.invoke(
16 | app,
17 | [
18 | "import",
19 | "--format",
20 | "odcs",
21 | "--source",
22 | "./fixtures/odcs_v3/full-example.odcs.yaml",
23 | ],
24 | )
25 | assert result.exit_code == 0
26 |
27 |
28 | def test_import_full_odcs():
29 | result = DataContract().import_from_source("odcs", "./fixtures/odcs_v3/full-example.odcs.yaml")
30 | expected_datacontract = read_file("fixtures/odcs_v3/full-example.datacontract.yml")
31 | assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract)
32 | assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed()
33 |
34 |
35 | def test_import_complex_odcs():
36 | result = DataContract().import_from_source("odcs", "./fixtures/odcs_v3/adventureworks.odcs.yaml")
37 | expected_datacontract = read_file("fixtures/odcs_v3/adventureworks.datacontract.yml")
38 | assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected_datacontract)
39 | assert DataContract(data_contract_str=expected_datacontract).lint(enabled_linters="none").has_passed()
40 |
41 |
42 | def read_file(file):
43 | if not os.path.exists(file):
44 | print(f"The file '{file}' does not exist.")
45 | sys.exit(1)
46 | with open(file, "r") as file:
47 | file_content = file.read()
48 | return file_content
49 |
--------------------------------------------------------------------------------
/tests/test_import_parquet.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.cli import app
4 | from datacontract.data_contract import DataContract
5 |
6 | parquet_file_path = "fixtures/parquet/data/combined_no_time.parquet"
7 |
8 |
9 | def test_cli():
10 | runner = CliRunner()
11 | result = runner.invoke(
12 | app,
13 | [
14 | "import",
15 | "--format",
16 | "parquet",
17 | "--source",
18 | parquet_file_path,
19 | ],
20 | )
21 | assert result.exit_code == 0
22 |
23 |
24 | def test_import_parquet():
25 | result = DataContract().import_from_source(format="parquet", source=parquet_file_path)
26 |
27 | expected = """dataContractSpecification: 1.1.0
28 | id: my-data-contract-id
29 | info:
30 | title: My Data Contract
31 | version: 0.0.1
32 | models:
33 | combined_no_time:
34 | fields:
35 | string_field:
36 | type: string
37 | blob_field:
38 | type: bytes
39 | boolean_field:
40 | type: boolean
41 | decimal_field:
42 | type: decimal
43 | precision: 10
44 | scale: 2
45 | float_field:
46 | type: float
47 | double_field:
48 | type: double
49 | integer_field:
50 | type: int
51 | bigint_field:
52 | type: long
53 | struct_field:
54 | type: struct
55 | array_field:
56 | type: array
57 | list_field:
58 | type: array
59 | map_field:
60 | type: map
61 | date_field:
62 | type: date
63 | timestamp_field:
64 | type: timestamp
65 | """
66 |
67 | assert result.to_yaml() == expected
68 | assert DataContract(data_contract_str=expected).lint(enabled_linters=set()).has_passed()
69 |
--------------------------------------------------------------------------------
/tests/test_integration_datameshmanager.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from dotenv import load_dotenv
5 | from typer.testing import CliRunner
6 |
7 | from datacontract.data_contract import DataContract
8 |
9 | runner = CliRunner()
10 | load_dotenv(override=True)
11 |
12 |
13 | @pytest.mark.skipif(
14 | os.environ.get("DATAMESH_MANAGER_API_KEY") is None, reason="Requires DATAMESH_MANAGER_API_KEY to be set"
15 | )
16 | def test_remote_data_contract():
17 | data_contract = DataContract(
18 | data_contract_file="https://app.datamesh-manager.com/checker1/datacontracts/verbraucherpreisindex-61111-0002zzz",
19 | publish_url="https://api.datamesh-manager.com/api/test-results",
20 | )
21 |
22 | run = data_contract.test()
23 |
24 | print(run)
25 | assert run.result == "passed"
26 | assert len(run.checks) == 4
27 | assert all(check.result == "passed" for check in run.checks)
28 |
--------------------------------------------------------------------------------
/tests/test_notice_period_linter.py:
--------------------------------------------------------------------------------
1 | import datacontract.model.data_contract_specification as spec
2 | from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
3 | from datacontract.model.run import Check
4 |
5 |
6 | def construct_error_check(msg: str) -> Check:
7 | return Check(
8 | type="lint",
9 | name="Linter 'noticePeriod in ISO8601 format'",
10 | result="warning",
11 | engine="datacontract",
12 | reason=msg,
13 | )
14 |
15 |
16 | success_check = Check(
17 | type="lint", name="Linter 'noticePeriod in ISO8601 format'", result="passed", engine="datacontract"
18 | )
19 |
20 |
21 | def test_lint_correct_period():
22 | specification = spec.DataContractSpecification()
23 | specification.terms = spec.Terms(noticePeriod="P1M")
24 | result = NoticePeriodLinter().lint(specification)
25 | assert result == [success_check]
26 |
27 |
28 | def test_lint_empty_period():
29 | # This returns a warning that's currently ignored.
30 | # If warnings are treated differently, change this spec.
31 | specification = spec.DataContractSpecification(terms=spec.Terms())
32 | result = NoticePeriodLinter().lint(specification)
33 | assert result == [success_check]
34 |
35 |
36 | def test_lint_incorrect_period():
37 | # This returns a warning that's currently ignored.
38 | # If warnings are treated differently, change this spec.
39 | specification = spec.DataContractSpecification(terms=spec.Terms(noticePeriod="P0"))
40 | result = NoticePeriodLinter().lint(specification)
41 | assert result == [construct_error_check("Notice period 'P0' is not a valid ISO8601 duration.")]
42 |
43 |
44 | def test_lint_correct_datetime_period():
45 | specification = spec.DataContractSpecification(terms=spec.Terms(noticePeriod="P00000001T000001"))
46 | result = NoticePeriodLinter().lint(specification)
47 | assert result == [success_check]
48 |
--------------------------------------------------------------------------------
/tests/test_roundtrip_jsonschema.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from typer.testing import CliRunner
4 |
5 | from datacontract.cli import app
6 | from datacontract.data_contract import DataContract
7 | from datacontract.export.jsonschema_converter import to_jsonschemas
8 |
9 | # logging.basicConfig(level=logging.DEBUG, force=True)
10 |
11 |
12 | def test_import_cli():
13 | runner = CliRunner()
14 | result = runner.invoke(
15 | app,
16 | [
17 | "import",
18 | "--format",
19 | "jsonschema",
20 | "--source",
21 | "fixtures/import/orders.json",
22 | ],
23 | )
24 | assert result.exit_code == 0
25 |
26 |
27 | def test_export_cli():
28 | runner = CliRunner()
29 | result = runner.invoke(app, ["export", "./fixtures/local-json/datacontract.yaml", "--format", "jsonschema"])
30 | assert result.exit_code == 0
31 |
32 |
33 | def test_roundtrip_json_schema_orders():
34 | # Import the data contract from the JSON schema source
35 | result_import = DataContract().import_from_source("jsonschema", "fixtures/import/orders.json")
36 |
37 | # Create a data contract specification with inline definitions
38 | data_contract = DataContract(
39 | data_contract_str=result_import.to_yaml(), inline_definitions=True
40 | ).get_data_contract_specification()
41 |
42 | # Load the expected result from the JSON file
43 | with open("fixtures/import/orders.json", "r") as f:
44 | expected_result = json.load(f)
45 |
46 | # Export the data contract to JSON schema
47 | exported_jsonschema = to_jsonschemas(data_contract)
48 |
49 | # Compare the exported JSON schema with the expected result
50 | assert exported_jsonschema["OrderSchema"] == expected_result
51 |
--------------------------------------------------------------------------------
/tests/test_spec_fields_field.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.data_contract import DataContract
4 | from datacontract.model.data_contract_specification import Field
5 |
6 | runner = CliRunner()
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 |
11 | def test_aliases():
12 | data_contract = DataContract(data_contract_file="fixtures/spec/datacontract_fields_field.yaml")
13 | spec = data_contract.get_data_contract_specification()
14 | model_field = spec.models["sample_model"].fields["id"]
15 | definition_field = model_field.fields["id"]
16 | assert isinstance(model_field, Field)
17 | assert isinstance(definition_field, Field)
18 |
--------------------------------------------------------------------------------
/tests/test_spec_ref.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 |
3 | from datacontract.data_contract import DataContract
4 |
5 | runner = CliRunner()
6 |
7 | # logging.basicConfig(level=logging.DEBUG, force=True)
8 |
9 |
10 | def test_aliases():
11 | data_contract = DataContract(data_contract_file="fixtures/spec/datacontract_aliases.yaml")
12 | spec = data_contract.get_data_contract_specification()
13 | yaml = spec.to_yaml()
14 | print(yaml)
15 | assert "$ref" in yaml
16 |
--------------------------------------------------------------------------------
/tests/test_test_bigquery.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from dotenv import load_dotenv
5 |
6 | from datacontract.data_contract import DataContract
7 |
8 | # logging.basicConfig(level=logging.INFO, force=True)
9 |
10 | datacontract = "fixtures/bigquery/datacontract.yaml"
11 |
12 | load_dotenv(override=True)
13 |
14 |
15 | # Deactivated because the test requires special setup on a non-free BigQuery account.
16 | # Can activate for testing locally, using a custom account_info file.
17 | # For the provided datacontract.yaml the data file from s3-csv should be imported in the target BigQuery table.
18 | @pytest.mark.skipif(
19 | os.environ.get("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH") is None,
20 | reason="Requires DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH to be set",
21 | )
22 | def _test_test_bigquery():
23 | data_contract = DataContract(data_contract_file=datacontract)
24 |
25 | run = data_contract.test()
26 |
27 | print(run)
28 | assert run.result == "passed"
29 | assert all(check.result == "passed" for check in run.checks)
30 |
31 |
32 | @pytest.mark.skipif(
33 | os.environ.get("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH") is None,
34 | reason="Requires DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH to be set",
35 | )
36 | def test_test_bigquery_complex_tables():
37 | data_contract = DataContract(data_contract_file="fixtures/bigquery/datacontract_complex.yaml")
38 |
39 | run = data_contract.test()
40 |
41 | print(run.pretty())
42 | assert run.result == "passed"
43 | assert all(check.result == "passed" for check in run.checks)
44 |
--------------------------------------------------------------------------------
/tests/test_test_databricks.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from dotenv import load_dotenv
5 |
6 | from datacontract.data_contract import DataContract
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 | datacontract = "fixtures/databricks-sql/datacontract.yaml"
11 |
12 | load_dotenv(override=True)
13 |
14 |
15 | @pytest.mark.skipif(
16 | os.environ.get("DATACONTRACT_DATABRICKS_TOKEN") is None, reason="Requires DATACONTRACT_DATABRICKS_TOKEN to be set"
17 | )
18 | def _test_test_databricks_sql():
19 | # os.environ['DATACONTRACT_DATABRICKS_TOKEN'] = "xxx"
20 | # os.environ['DATACONTRACT_DATABRICKS_HTTP_PATH'] = "/sql/1.0/warehouses/b053a326fa014fb3"
21 | data_contract = DataContract(data_contract_file=datacontract)
22 |
23 | run = data_contract.test()
24 |
25 | print(run)
26 | assert run.result == "passed"
27 | assert all(check.result == "passed" for check in run.checks)
28 |
--------------------------------------------------------------------------------
/tests/test_test_delta.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from typer.testing import CliRunner
4 |
5 | from datacontract.cli import app
6 | from datacontract.data_contract import DataContract
7 |
8 | runner = CliRunner()
9 |
10 |
11 | def test_valid_cli():
12 | current_file_path = os.path.abspath(__file__)
13 | print("DEBUG Current file path:" + current_file_path)
14 |
15 | result = runner.invoke(app, ["test", "./fixtures/local-delta/datacontract.yaml"])
16 | assert result.exit_code == 0
17 | assert "Testing ./fixtures/local-delta/datacontract.yaml" in result.stdout
18 |
19 |
20 | def test_valid():
21 | data_contract = DataContract(
22 | data_contract_file="fixtures/local-delta/datacontract.yaml",
23 | # publish=True,
24 | )
25 | run = data_contract.test()
26 | print(run.pretty())
27 | assert run.result == "passed"
28 | assert len(run.checks) == 9
29 | assert all(check.result == "passed" for check in run.checks)
30 |
--------------------------------------------------------------------------------
/tests/test_test_gcs_json_remote.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from dotenv import load_dotenv
5 |
6 | from datacontract.data_contract import DataContract
7 |
8 | datacontract = "fixtures/gcs-json-remote/datacontract.yaml"
9 | load_dotenv(override=True)
10 |
11 |
12 | @pytest.mark.skipif(
13 | os.environ.get("DATACONTRACT_GCS_KEY_ID") is None or os.environ.get("DATACONTRACT_GCS_SECRET") is None,
14 | reason="Requires DATACONTRACT_GCS_KEY_ID, and DATACONTRACT_GCS_SECRET to be set",
15 | )
16 | def test_test_gcs_json_remote_gcs_url():
17 | """
18 | server.type "gcs" and gs:// locations work with DuckDB, but are not yet supported for json schema testing
19 | """
20 | data_contract = DataContract(
21 | data_contract_file=datacontract,
22 | server="gcs-url",
23 | )
24 |
25 | run = data_contract.test()
26 |
27 | print(run)
28 | assert run.result == "passed"
29 |
30 |
31 | @pytest.mark.skipif(
32 | os.environ.get("DATACONTRACT_GCS_KEY_ID") is None or os.environ.get("DATACONTRACT_GCS_SECRET") is None,
33 | reason="Requires DATACONTRACT_GCS_KEY_ID, and DATACONTRACT_GCS_SECRET to be set",
34 | )
35 | def test_test_gcs_json_remote_s3_style(monkeypatch):
36 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", os.environ.get("DATACONTRACT_GCS_KEY_ID"))
37 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", os.environ.get("DATACONTRACT_GCS_SECRET"))
38 |
39 | data_contract = DataContract(
40 | data_contract_file=datacontract,
41 | server="s3-style",
42 | )
43 |
44 | run = data_contract.test()
45 |
46 | print(run)
47 | assert run.result == "passed"
48 | assert all(check.result == "passed" for check in run.checks)
49 |
--------------------------------------------------------------------------------
/tests/test_test_kafka.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import six
4 |
5 | # Fix for Python 3.12
6 | if sys.version_info >= (3, 12, 1):
7 | sys.modules["kafka.vendor.six.moves"] = six.moves
8 |
9 |
10 | from kafka import KafkaProducer
11 | from testcontainers.kafka import KafkaContainer
12 |
13 | from datacontract.data_contract import DataContract
14 |
15 | datacontract = "fixtures/kafka/datacontract.yaml"
16 |
17 |
18 | def test_test_kafka(monkeypatch):
19 | monkeypatch.delenv("DATACONTRACT_KAFKA_SASL_USERNAME", raising=False)
20 |
21 | with KafkaContainer("confluentinc/cp-kafka:7.7.0").with_kraft() as kafka:
22 | send_messages_to_topic(kafka, "fixtures/kafka/data/messages.json", "inventory-events")
23 | data_contract_str = _setup_datacontract(kafka)
24 | data_contract = DataContract(data_contract_str=data_contract_str)
25 | run = data_contract.test()
26 |
27 | print(run.pretty())
28 | assert run.result == "passed"
29 |
30 |
31 | def send_messages_to_topic(kafka: KafkaContainer, messages_file_path: str, topic_name: str):
32 | print(f"Sending messages from {messages_file_path} to Kafka topic {topic_name}")
33 |
34 | producer = KafkaProducer(
35 | bootstrap_servers=kafka.get_bootstrap_server(), value_serializer=lambda v: v.encode("utf-8")
36 | )
37 | messages_sent = 0
38 |
39 | with open(messages_file_path) as messages_file:
40 | for line in messages_file:
41 | message = line
42 | producer.send(topic=topic_name, value=message)
43 | messages_sent += 1
44 | producer.flush()
45 |
46 | print(f"Sent {messages_sent} messages from {messages_file_path} to Kafka topic {topic_name}")
47 |
48 |
49 | def _setup_datacontract(kafka: KafkaContainer):
50 | with open(datacontract) as data_contract_file:
51 | data_contract_str = data_contract_file.read()
52 | host = kafka.get_bootstrap_server()
53 | return data_contract_str.replace("__KAFKA_HOST__", host)
54 |
--------------------------------------------------------------------------------
/tests/test_test_kafka_remote.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import pytest
5 | import six
6 |
7 | # Fix for Python 3.12
8 | if sys.version_info >= (3, 12, 1):
9 | sys.modules["kafka.vendor.six.moves"] = six.moves
10 |
11 |
12 | from dotenv import load_dotenv
13 |
14 | from datacontract.data_contract import DataContract
15 |
16 | # logging.basicConfig(level=logging.INFO, force=True)
17 |
18 |
19 | @pytest.mark.skipif(
20 | os.environ.get("DATACONTRACT_KAFKA_SASL_USERNAME") is None,
21 | reason="Requires DATACONTRACT_KAFKA_SASL_USERNAME to be set",
22 | )
23 | def _test_test_kafka_json_remote():
24 | load_dotenv(override=True)
25 | # os.environ['DATACONTRACT_KAFKA_SASL_USERNAME'] = "xxx"
26 | # os.environ['DATACONTRACT_KAFKA_SASL_PASSWORD'] = "xxx"
27 | data_contract = DataContract(data_contract_file="fixtures/kafka-json-remote/datacontract.yaml")
28 |
29 | run = data_contract.test()
30 |
31 | print(run)
32 | assert run.result == "passed"
33 |
34 |
35 | @pytest.mark.skipif(
36 | os.environ.get("DATACONTRACT_KAFKA_SASL_USERNAME") is None,
37 | reason="Requires DATACONTRACT_KAFKA_SASL_USERNAME to be set",
38 | )
39 | def _test_test_kafka_avro_remote():
40 | load_dotenv(override=True)
41 | # os.environ['DATACONTRACT_KAFKA_SASL_USERNAME'] = "xxx"
42 | # os.environ['DATACONTRACT_KAFKA_SASL_PASSWORD'] = "xxx"
43 | data_contract = DataContract(data_contract_file="fixtures/kafka-avro-remote/datacontract.yaml")
44 |
45 | run = data_contract.test()
46 |
47 | print(run)
48 | assert run.result == "passed"
49 |
--------------------------------------------------------------------------------
/tests/test_test_local_json.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from typer.testing import CliRunner
3 |
4 | from datacontract.cli import app
5 | from datacontract.data_contract import DataContract
6 |
7 | runner = CliRunner()
8 |
9 |
10 | @pytest.mark.skip(reason="https://github.com/sodadata/soda-core/issues/1992")
11 | def _test_cli():
12 | result = runner.invoke(app, ["test", "./fixtures/local-json/datacontract.yaml"])
13 | assert result.exit_code == 0
14 |
15 |
16 | @pytest.mark.skip(reason="https://github.com/sodadata/soda-core/issues/1992")
17 | def _test_local_json():
18 | data_contract = DataContract(data_contract_file="fixtures/local-json/datacontract.yaml")
19 | run = data_contract.test()
20 | print(run)
21 | assert run.result == "passed"
22 |
--------------------------------------------------------------------------------
/tests/test_test_output_junit.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from typer.testing import CliRunner
4 |
5 | from datacontract.cli import app
6 |
7 | runner = CliRunner()
8 |
9 |
10 | def test_output_junit_test_result(tmp_path):
11 | runner.invoke(
12 | app,
13 | [
14 | "test",
15 | "--output",
16 | tmp_path / "TEST-datacontract.xml",
17 | "--output-format",
18 | "junit",
19 | "./fixtures/junit/datacontract.yaml",
20 | ],
21 | )
22 | assert os.path.exists(tmp_path / "TEST-datacontract.xml"), "Should write a JUnit test result file"
23 |
--------------------------------------------------------------------------------
/tests/test_test_s3_csv.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from testcontainers.minio import MinioContainer
5 |
6 | from datacontract.data_contract import DataContract
7 |
8 | # logging.basicConfig(level=logging.DEBUG, force=True)
9 |
10 | datacontract = "fixtures/s3-csv/datacontract.yaml"
11 | file_name = "fixtures/s3-csv/data/sample_data.csv"
12 | bucket_name = "test-bucket"
13 | s3_access_key = "test-access"
14 | s3_secret_access_key = "test-secret"
15 |
16 |
17 | @pytest.fixture(scope="session")
18 | def minio_container():
19 | with MinioContainer(
20 | image="quay.io/minio/minio", access_key=s3_access_key, secret_key=s3_secret_access_key
21 | ) as minio_container:
22 | yield minio_container
23 |
24 |
25 | def test_test_s3_csv(minio_container, monkeypatch):
26 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", s3_access_key)
27 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", s3_secret_access_key)
28 | data_contract_str = _prepare_s3_files(minio_container)
29 | data_contract = DataContract(data_contract_str=data_contract_str)
30 |
31 | run = data_contract.test()
32 |
33 | print(run)
34 | assert run.result == "passed"
35 | assert all(check.result == "passed" for check in run.checks)
36 |
37 |
38 | def _prepare_s3_files(minio_container):
39 | s3_endpoint_url = f"http://{minio_container.get_container_host_ip()}:{minio_container.get_exposed_port(9000)}"
40 | minio_client = minio_container.get_client()
41 | minio_client.make_bucket(bucket_name)
42 | with open(file_name, "rb") as file:
43 | minio_client.put_object(bucket_name, file_name, file, os.path.getsize(file_name))
44 | with open(datacontract) as data_contract_file:
45 | data_contract_str = data_contract_file.read()
46 | data_contract_str = data_contract_str.replace("__S3_ENDPOINT_URL__", s3_endpoint_url)
47 | return data_contract_str
48 |
--------------------------------------------------------------------------------
/tests/test_test_s3_json.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from testcontainers.minio import MinioContainer
5 |
6 | from datacontract.data_contract import DataContract
7 |
8 | datacontract = "fixtures/s3-json/datacontract.yaml"
9 | file_name = "fixtures/s3-json/data/inventory/year=2022/month=04/day=20/hour=00/inventory+0+0001327496.json"
10 | bucket_name = "test-bucket"
11 | s3_access_key = "test-access"
12 | s3_secret_access_key = "test-secret"
13 |
14 |
15 | @pytest.fixture(scope="session")
16 | def minio_container():
17 | with MinioContainer(
18 | image="quay.io/minio/minio", access_key=s3_access_key, secret_key=s3_secret_access_key
19 | ) as minio_container:
20 | yield minio_container
21 |
22 |
23 | def test_test_s3_json(minio_container, monkeypatch):
24 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", "test-access")
25 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", "test-secret")
26 | data_contract_str = _prepare_s3_files(minio_container)
27 | data_contract = DataContract(data_contract_str=data_contract_str)
28 |
29 | run = data_contract.test()
30 |
31 | print(run)
32 | assert run.result == "passed"
33 | assert all(check.result == "passed" for check in run.checks)
34 |
35 |
36 | def _prepare_s3_files(minio_container):
37 | s3_endpoint_url = f"http://{minio_container.get_container_host_ip()}:{minio_container.get_exposed_port(9000)}"
38 | minio_client = minio_container.get_client()
39 | minio_client.make_bucket(bucket_name)
40 | with open(file_name, "rb") as file:
41 | minio_client.put_object(bucket_name, file_name, file, os.path.getsize(file_name))
42 | with open(datacontract) as data_contract_file:
43 | data_contract_str = data_contract_file.read()
44 | data_contract_str = data_contract_str.replace("__S3_ENDPOINT_URL__", s3_endpoint_url)
45 | return data_contract_str
46 |
--------------------------------------------------------------------------------
/tests/test_test_s3_json_complex.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 | from testcontainers.minio import MinioContainer
5 |
6 | from datacontract.data_contract import DataContract
7 |
8 | datacontract = "./fixtures/s3-json-complex/datacontract.yaml"
9 | file_name = "fixtures/s3-json-complex/data/feed.json"
10 | bucket_name = "feed-bucket"
11 | s3_access_key = "test-access"
12 | s3_secret_access_key = "test-secret"
13 |
14 |
15 | @pytest.fixture(scope="session")
16 | def minio_container():
17 | with MinioContainer(
18 | image="quay.io/minio/minio", access_key=s3_access_key, secret_key=s3_secret_access_key
19 | ) as minio_container:
20 | yield minio_container
21 |
22 |
23 | def test_test_s3_json(minio_container, monkeypatch):
24 | monkeypatch.setenv("DATACONTRACT_S3_ACCESS_KEY_ID", "test-access")
25 | monkeypatch.setenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", "test-secret")
26 |
27 | data_contract_str = _prepare_s3_files(minio_container)
28 |
29 | data_contract = DataContract(data_contract_str=data_contract_str)
30 |
31 | run = data_contract.test()
32 |
33 | print(run.pretty())
34 | assert run.result == "passed"
35 | assert all(check.result == "passed" for check in run.checks)
36 |
37 |
38 | def _prepare_s3_files(minio_container):
39 | s3_endpoint_url = f"http://{minio_container.get_container_host_ip()}:{minio_container.get_exposed_port(9000)}"
40 | minio_client = minio_container.get_client()
41 | minio_client.make_bucket(bucket_name)
42 | with open(file_name, "rb") as file:
43 | minio_client.put_object(bucket_name, file_name, file, os.path.getsize(file_name))
44 | with open(datacontract) as data_contract_file:
45 | data_contract_str = data_contract_file.read()
46 | data_contract_str = data_contract_str.replace("__S3_ENDPOINT_URL__", s3_endpoint_url)
47 | return data_contract_str
48 |
--------------------------------------------------------------------------------
/tests/test_test_s3_json_remote.py:
--------------------------------------------------------------------------------
1 | from datacontract.data_contract import DataContract
2 |
3 | # logging.basicConfig(level=logging.INFO, force=True)
4 |
5 | datacontract = "fixtures/s3-json-remote/datacontract.yaml"
6 |
7 |
8 | def test_test_s3_json(monkeypatch):
9 | monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
10 | monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
11 | monkeypatch.delenv("DATACONTRACT_S3_ACCESS_KEY_ID", raising=False)
12 | monkeypatch.delenv("DATACONTRACT_S3_SECRET_ACCESS_KEY", raising=False)
13 |
14 | data_contract = DataContract(data_contract_file=datacontract)
15 |
16 | run = data_contract.test()
17 |
18 | print(run.pretty())
19 | assert run.result == "passed"
20 | assert all(check.result == "passed" for check in run.checks)
21 |
--------------------------------------------------------------------------------
/tests/test_test_snowflake.py:
--------------------------------------------------------------------------------
1 | # logging.basicConfig(level=logging.INFO, force=True)
2 |
3 | datacontract = "fixtures/snowflake/datacontract.yaml"
4 |
5 |
6 | # @pytest.mark.skipif(os.environ.get("DATACONTRACT_SNOWFLAKE_USERNAME") is None, reason="Requires DATACONTRACT_SNOWFLAKE_USERNAME to be set")
7 | # def test_test_snowflake():
8 | # load_dotenv(override=True)
9 | # # os.environ['DATACONTRACT_SNOWFLAKE_USERNAME'] = "xxx"
10 | # # os.environ['DATACONTRACT_SNOWFLAKE_PASSWORD'] = "xxx"
11 | # # os.environ['DATACONTRACT_SNOWFLAKE_ROLE'] = "xxx"
12 | # # os.environ['DATACONTRACT_SNOWFLAKE_WAREHOUSE'] = "COMPUTE_WH"
13 | # data_contract = DataContract(data_contract_file=datacontract)
14 | #
15 | # run = data_contract.test()
16 | #
17 | # print(run)
18 | # assert run.result == "passed"
19 | # assert all(check.result == "passed" for check in run.checks)
20 |
--------------------------------------------------------------------------------