├── .all-contributorsrc ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature-or-warehouse-request.md ├── scripts │ ├── report_release_failure.py │ ├── report_release_success.py │ ├── report_test_failure.py │ ├── report_test_success.py │ ├── requirements.in │ ├── requirements.txt │ └── utils │ │ ├── reporter.py │ │ ├── slack.py │ │ └── utils.py └── workflows │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── core ├── README.md ├── setup.py └── sodasql │ ├── __main__.py │ ├── __version__.py │ ├── cli │ ├── __init__.py │ ├── cli.py │ ├── indenting_yaml_dumper.py │ └── ingest.py │ ├── common │ ├── config_helper.py │ ├── json_helper.py │ ├── logging_helper.py │ └── yaml_helper.py │ ├── dataset_analyzer.py │ ├── exceptions │ └── exceptions.py │ ├── scan │ ├── column_metadata.py │ ├── db.py │ ├── dialect.py │ ├── dialect_parser.py │ ├── env_vars.py │ ├── failed_rows_processor.py │ ├── file_system.py │ ├── group_value.py │ ├── historic_metric_yml.py │ ├── measurement.py │ ├── metric.py │ ├── missing.py │ ├── parser.py │ ├── sampler.py │ ├── samples_yml.py │ ├── scan.py │ ├── scan_builder.py │ ├── scan_column.py │ ├── scan_error.py │ ├── scan_result.py │ ├── scan_yml.py │ ├── scan_yml_column.py │ ├── scan_yml_parser.py │ ├── sql_metric_yml.py │ ├── test.py │ ├── test_result.py │ ├── validity │ │ ├── __init__.py │ │ └── money_patterns.py │ ├── warehouse.py │ ├── warehouse_yml.py │ └── warehouse_yml_parser.py │ ├── soda_server_client │ ├── monitor_measurement.py │ ├── monitor_metric.py │ ├── monitor_metric_parser.py │ └── soda_server_client.py │ └── telemetry │ ├── memory_span_exporter.py │ ├── soda_exporter.py │ ├── soda_telemetry.py │ └── soda_tracer.py ├── dev-requirements.in ├── dev-requirements.txt ├── docker-compose-arm.yml ├── docs ├── README.md ├── assets │ └── images │ │ ├── cloud-tutorial-results.png │ │ ├── column-metrics.png │ │ ├── configure-yaml.png │ │ ├── dataset-metadata.png │ │ ├── failed-row-message.png │ │ ├── failed-rows.png │ │ ├── monitor-results.png │ │ ├── named-dataset1.png │ │ ├── named-dataset2.png │ │ ├── named-dataset3.png │ │ ├── orchestrate.png │ │ ├── sample-data.png │ │ ├── scan-anatomy.png │ │ ├── scan-failed.png │ │ ├── scan-with-cloud-sql.png │ │ ├── soda-cloud-logo.png │ │ ├── soda-operation.png │ │ ├── soda-sql-logo.png │ │ └── table-metrics.png ├── release notes │ ├── soda-spark-launch.md │ ├── soda-spark-v0.1.1.md │ ├── soda-spark-v0.2.0.md │ ├── soda-spark-v0.2.1.md │ ├── soda-spark-v0.2.3.md │ ├── soda-spark-v0.3.0.md │ ├── soda-sql-v2.1.0.md │ ├── soda-sql-v2.1.0b017.md │ ├── soda-sql-v2.1.0b021.md │ ├── soda-sql-v2.1.0b16.md │ ├── soda-sql-v2.1.0b18.md │ ├── soda-sql-v2.1.0b19.md │ ├── soda-sql-v2.1.0b20.md │ ├── soda-sql-v2.1.0b22.md │ ├── soda-sql-v2.1.1.md │ ├── soda-sql-v2.1.2.md │ ├── soda-sql-v2.1.3.md │ ├── soda-sql-v2.1.4.md │ ├── soda-sql-v2.1.5.md │ └── soda-sql-v2.1.6.md ├── soda-spark │ └── install-and-use.md └── soda-sql │ ├── cli.md │ ├── concepts.md │ ├── configure.md │ ├── connect_to_cloud.md │ ├── custom-metric-templates.md │ ├── example-test-missing.md │ ├── example-test-unique.md │ ├── example-test-valid.md │ ├── examples-by-metric.md │ ├── filtering.md │ ├── global-configuration.md │ ├── installation.md │ ├── metrics.md │ ├── orchestrate_scans.md │ ├── overview.md │ ├── programmatic_scan.md │ ├── quick-start-soda-sql.md │ ├── samples.md │ ├── scan-different-datasets.md │ ├── scan-yaml.md │ ├── scan.md │ ├── send-failed-rows.md │ ├── sql_metrics.md │ ├── supported-data-types.md │ ├── tests.md │ ├── troubleshoot.md │ ├── warehouse.md │ └── warehouse_types.md ├── examples ├── airflow │ ├── airflow_bash.py │ ├── airflow_bash_venv.py │ ├── airflow_python_op.py │ └── airflow_python_venv_op.py ├── aws-lambda │ └── lambda-zip │ │ ├── product.yml │ │ └── soda_lambda.py └── spark │ └── warehouse.yml ├── packages ├── athena │ ├── setup.py │ └── sodasql │ │ ├── dialects │ │ └── athena_dialect.py │ │ └── scan │ │ └── aws_credentials.py ├── bigquery │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── bigquery_dialect.py ├── dbt │ ├── setup.py │ └── sodasql │ │ └── dbt.py ├── denodo │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── denodo_dialect.py ├── hive │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── hive_dialect.py ├── mysql │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── mysql_dialect.py ├── postgresql │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── postgres_dialect.py ├── redshift │ ├── setup.py │ └── sodasql │ │ ├── dialects │ │ └── redshift_dialect.py │ │ └── scan │ │ └── aws_credentials.py ├── snowflake │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── snowflake_dialect.py ├── spark │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── spark_dialect.py ├── sqlserver │ ├── setup.py │ └── sodasql │ │ └── dialects │ │ └── sqlserver_dialect.py └── trino │ ├── setup.py │ └── sodasql │ └── dialects │ └── trino_dialect.py ├── pytest.ini ├── reports └── index.html ├── requirements.txt ├── scripts ├── build_for_arm_full.sh ├── clean-install.sh ├── demo.sh ├── install_soda_cli.sh ├── publish_package.sh ├── recreate_venv.sh ├── run_scan_on_arm.sh ├── run_tests.sh ├── show_release_tags.sh ├── start_postgres_container.sh └── start_spark_container.sh ├── tbump.toml ├── tests ├── cli │ ├── run_cli.py │ ├── test_cli_commands.py │ ├── test_ingest.py │ └── test_soda_server_interaction.py ├── common │ ├── boto3_helper.py │ ├── mock_dialect.py │ ├── mock_soda_server_client.py │ ├── sql_test_case.py │ ├── sql_test_suite.py │ ├── telemetry_helper.py │ ├── validity_test_suite.py │ ├── warehouse_fixture.py │ ├── warehouse_test_helper.py │ └── yaml_helper_test.py ├── conftest.py ├── db │ └── test_quotes_basic.py ├── dbt │ ├── data │ │ ├── manifest.json │ │ ├── run_results.json │ │ └── run_results_null_failures.json │ └── test_dbt.py ├── demo │ ├── Dockerfile │ ├── demodata.sql │ ├── print_demodata_sql_script.py │ └── run_demo_scans.py ├── example_userhome_env_vars.yml ├── local │ ├── independent │ │ ├── test_date_parser.py │ │ ├── test_json_helper.py │ │ ├── test_measurement_str.py │ │ ├── test_scan_configuration_validation.py │ │ ├── test_soda_server_client_configuration.py │ │ ├── test_sql_expressions.py │ │ └── test_telemetry.py │ └── warehouse │ │ ├── cli │ │ └── test_cli.py │ │ ├── cloud │ │ └── test_soda_server_client.py │ │ ├── metrics │ │ ├── test_all_table_metrics.py │ │ ├── test_default_metrics.py │ │ ├── test_distinct_and_uniqueness.py │ │ ├── test_filter_and_group_by.py │ │ ├── test_frequent_values.py │ │ ├── test_histogram_numeric.py │ │ ├── test_min_max_length.py │ │ ├── test_mins_maxs.py │ │ ├── test_missing_and_invalid_customizations.py │ │ ├── test_missing_and_invalid_metric_configurations.py │ │ ├── test_schema.py │ │ ├── test_statistical_metrics.py │ │ ├── test_valid_values.py │ │ └── test_validity.py │ │ ├── samples │ │ ├── test_samples_and_failed_rows.py │ │ └── test_tests_sql_metric_failed_rows.py │ │ ├── scans │ │ ├── test_columns_exclusion.py │ │ └── test_scan_result.py │ │ ├── tests │ │ ├── test_tests_column_metric.py │ │ ├── test_tests_complex_expressions.py │ │ ├── test_tests_sql_metric.py │ │ ├── test_tests_sql_metric_multi.py │ │ ├── test_tests_table_metric.py │ │ ├── test_tests_with_filtering.py │ │ └── test_tests_with_variables.py │ │ └── validity │ │ ├── test_date_and_time_validity_formats.py │ │ ├── test_network_validity_formats.py │ │ ├── test_number_validity_formats.py │ │ ├── test_numeric_data.py │ │ └── test_user_info_validity_formats.py ├── mysql_container │ └── docker-compose.yml ├── postgres_container │ └── docker-compose.yml ├── spark_container │ ├── docker-compose.yml │ └── hive-site.xml ├── sqlserver_container │ └── docker-compose.yml ├── trino_contaner │ └── docker-compose.yml └── warehouses │ ├── athena_cfg.yml │ ├── athena_fixture.py │ ├── athena_suite.py │ ├── bigquery_cfg.yml │ ├── bigquery_fixture.py │ ├── bigquery_suite.py │ ├── denodo_cfg.yml │ ├── denodo_fixture.py │ ├── denodo_suite.py │ ├── hive_cfg.yml │ ├── hive_fixture.py │ ├── hive_suite.py │ ├── mysql_cfg.yml │ ├── mysql_fixture.py │ ├── mysql_suite.py │ ├── postgres_cfg.yml │ ├── postgres_fixture.py │ ├── postgres_suite.py │ ├── redshift_cfg.yml │ ├── redshift_fixture.py │ ├── redshift_suite.py │ ├── snowflake_cfg.yml │ ├── snowflake_fixture.py │ ├── snowflake_suite.py │ ├── spark_cfg.yml │ ├── spark_fixture.py │ ├── spark_suite.py │ ├── sqlserver_cfg.yml │ ├── sqlserver_fixture.py │ ├── sqlserver_suite.py │ ├── trino_cfg.yml │ ├── trino_fixture.py │ └── trino_suite.py └── tox.ini /.editorconfig: -------------------------------------------------------------------------------- 1 | # Editor configuration, see https://editorconfig.org 2 | [*] 3 | charset = utf-8 4 | indent_style = space 5 | indent_size = 4 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | quote_type = double 9 | 10 | [*.yml] 11 | charset = utf-8 12 | indent_style = space 13 | indent_size = 2 14 | 15 | [*.md] 16 | max_line_length = off 17 | trim_trailing_whitespace = false 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve soda-sql 4 | title: '' 5 | labels: bug, soda-sql 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | __A clear and concise description of what the bug is and what you expected instead.__ 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Create a new test in `scan.yml` 16 | 2. Run `soda scan ...` 17 | 3 ... 18 | 19 | **Context** 20 | __Include your scan.yml or warehouse.yml when relevant__ 21 | 22 | **OS**: 23 | **Python Version**: 24 | **Soda SQL Version**: 25 | **Warehouse Type**: 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-or-warehouse-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature or warehouse request 3 | about: Suggest an idea, enhancement or new warehouse for soda-sql 4 | title: '' 5 | labels: feature request, soda-sql 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Eg. I'm always frustrated when [...] 12 | 13 | _If your requesting a new Warehouse please provide us with as much information as possible. We support warehouses provided and maintained by the community, so feel free to code an initial suggestion/implementation yourself._ 14 | 15 | **Describe the solution you'd like** 16 | A clear and concise description of what you want to happen. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. 20 | 21 | **Soda SQL Version**: 22 | -------------------------------------------------------------------------------- /.github/scripts/report_release_failure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from utils.reporter import Reporter 4 | 5 | 6 | if __name__ == '__main__': 7 | reporter = Reporter() 8 | reporter.report_release_failure() 9 | -------------------------------------------------------------------------------- /.github/scripts/report_release_success.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from utils.reporter import Reporter 4 | 5 | 6 | if __name__ == '__main__': 7 | reporter = Reporter() 8 | reporter.report_release_success() 9 | -------------------------------------------------------------------------------- /.github/scripts/report_test_failure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from utils.reporter import Reporter 4 | 5 | 6 | if __name__ == '__main__': 7 | reporter = Reporter() 8 | reporter.report_test_failure() 9 | -------------------------------------------------------------------------------- /.github/scripts/report_test_success.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from utils.reporter import Reporter 4 | 5 | 6 | if __name__ == '__main__': 7 | reporter = Reporter() 8 | reporter.report_test_success() 9 | -------------------------------------------------------------------------------- /.github/scripts/requirements.in: -------------------------------------------------------------------------------- 1 | requests==2.25.1 2 | urllib3>=1.26.5 3 | 4 | -------------------------------------------------------------------------------- /.github/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | certifi==2020.12.5 8 | # via requests 9 | chardet==4.0.0 10 | # via requests 11 | idna==2.10 12 | # via requests 13 | requests==2.25.1 14 | # via -r requirements.in 15 | urllib3==1.26.6 16 | # via 17 | # -r requirements.in 18 | # requests 19 | -------------------------------------------------------------------------------- /.github/scripts/utils/slack.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import ssl 5 | 6 | import requests 7 | 8 | from utils.utils import get_env 9 | 10 | 11 | class SlackMessageSender: 12 | slack_webhook_url: str 13 | branch_or_tag: str 14 | ctx: ssl.SSLContext 15 | 16 | def __init__(self): 17 | self.slack_webhook_url = get_env('SLACK_WEBHOOK_URL') 18 | self.force_send = os.environ.get('FORCE_SEND', 'false') 19 | self.ctx = SlackMessageSender._create_non_verifying_context() 20 | self.branch_or_tag = get_env('GITHUB_REF') 21 | 22 | def send_slack_message(self, msg: str): 23 | payload = {"text": msg} 24 | if self.branch_or_tag == "refs/heads/master" \ 25 | or self.branch_or_tag == "refs/heads/main" \ 26 | or self.branch_or_tag.startswith("refs/tags/") \ 27 | or self.force_send == "true": 28 | response = requests.post(self.slack_webhook_url, data=json.dumps(payload), 29 | headers={'Content-Type': 'application/json'}) 30 | if response.status_code != 200: 31 | logging.error(f'Request to slack returned an error {response.status_code}, ' 32 | f'the response is:\n{response.text}') 33 | else: 34 | for e, v in os.environ.items(): 35 | print(f"{e}={v}") 36 | print(f"Ignoring message '{msg}' since on branch {self.branch_or_tag}") 37 | 38 | @staticmethod 39 | def _create_non_verifying_context() -> ssl.SSLContext: 40 | context = ssl.create_default_context() 41 | context.check_hostname = False 42 | context.verify_mode = ssl.CERT_NONE 43 | return context 44 | -------------------------------------------------------------------------------- /.github/scripts/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 sodadata.io - All Rights Reserved. 2 | # Unauthorized copying of this file, via any medium is strictly prohibited. Proprietary and confidential. 3 | # Written by Milan Aleksić , 2020 4 | import os 5 | 6 | 7 | def get_env(env_name: str) -> str: 8 | value = os.environ.get(env_name, '') 9 | if value == '': 10 | raise Exception(f"no environment variable {env_name} defined!") 11 | return value 12 | 13 | 14 | def deployment_description(): 15 | environment = os.environ.get('ENV', '') 16 | if environment == '': 17 | return '' 18 | else: 19 | return f"with deployment to *{environment}* environment " 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .postgres/ 3 | .sqlserver/ 4 | .mysql/ 5 | .vscode/ 6 | mini.vim 7 | .vim/ 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | pip-wheel-metadata/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | reports/* 138 | !reports/index.html 139 | 140 | # Spark 141 | .hive-metastore/ 142 | .spark-warehouse/ 143 | 144 | #trino 145 | .trino 146 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | 3 | RUN apt-get update && apt-get -y install gcc libsasl2-dev python-dev unixodbc-dev 4 | 5 | RUN mkdir /app 6 | 7 | WORKDIR /app 8 | 9 | RUN pip install --upgrade pip 10 | 11 | COPY . . 12 | 13 | RUN pip install "$(cat dev-requirements.in | grep pip-tools)" && \ 14 | pip install -r dev-requirements.txt && \ 15 | pip install -r requirements.txt 16 | 17 | ENTRYPOINT [ "soda" ] 18 | CMD [ "scan" ] 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include dev-requirements.txt 3 | -------------------------------------------------------------------------------- /core/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import pathlib 4 | from setuptools import setup, find_namespace_packages 5 | 6 | if sys.version_info < (3, 7): 7 | print("Error: Soda SQL requires at least Python 3.7") 8 | print("Error: Please upgrade your Python version to 3.7 or later") 9 | sys.exit(1) 10 | 11 | package_name = "soda-sql-core" 12 | # Managed by tbump - don't change manually 13 | # And we can't have nice semver (..--) 14 | # like "-alpha-1" as long as this is open >> https://github.com/pypa/setuptools/issues/2181 15 | package_version = '2.2.2' 16 | description = "Soda SQL Core" 17 | 18 | long_description = (pathlib.Path(__file__).parent / "README.md").read_text() 19 | 20 | requires = [ 21 | 22 | "markupsafe==2.0.1", 23 | "Jinja2>=2.11.3, <4.0", 24 | "click>=8.0, <9.0", 25 | "pyyaml>=5.4.1, <6.0", 26 | "requests>=2.23.0, <3.0", 27 | "Deprecated>=1.2.13, <1.3", 28 | "opentelemetry-api~=1.11.0", 29 | "opentelemetry-exporter-otlp-proto-http~=1.11.0", 30 | "protobuf~=3.19.0" 31 | ] 32 | # TODO Fix the params 33 | # TODO Add a warning that installing core doesn't give any warehouse functionality 34 | setup( 35 | name=package_name, 36 | version=package_version, 37 | author="Tom Baeyens", 38 | author_email="tom@soda.io", 39 | description="Soda SQL library & CLI", 40 | long_description=long_description, 41 | long_description_content_type="text/markdown", 42 | packages=find_namespace_packages(include=["sodasql*"]), 43 | install_requires=requires, 44 | entry_points={"console_scripts": ["soda=sodasql.__main__:main"]}, 45 | classifiers=[ 46 | "Development Status :: 5 - Production/Stable", 47 | "License :: OSI Approved :: Apache Software License", 48 | "Operating System :: Microsoft :: Windows", 49 | "Operating System :: MacOS :: MacOS X", 50 | "Operating System :: POSIX :: Linux", 51 | "Programming Language :: Python :: 3.7", 52 | "Programming Language :: Python :: 3.8", 53 | "Programming Language :: Python :: 3.9", 54 | ], 55 | python_requires=">=3.7", 56 | ) 57 | -------------------------------------------------------------------------------- /core/sodasql/__main__.py: -------------------------------------------------------------------------------- 1 | from .cli import main 2 | 3 | 4 | if __name__ == "__main__": 5 | main() 6 | -------------------------------------------------------------------------------- /core/sodasql/__version__.py: -------------------------------------------------------------------------------- 1 | SODA_SQL_VERSION = '2.2.2' 2 | -------------------------------------------------------------------------------- /core/sodasql/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from .cli import main 2 | -------------------------------------------------------------------------------- /core/sodasql/cli/indenting_yaml_dumper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import yaml 12 | 13 | 14 | class IndentingDumper(yaml.Dumper): 15 | """ 16 | yaml.dump hack to get indentation. 17 | see also https://stackoverflow.com/questions/25108581/python-yaml-dump-bad-indentation 18 | """ 19 | 20 | def increase_indent(self, flow=False, indentless=False): 21 | return super(IndentingDumper, self).increase_indent(flow, False) 22 | 23 | 24 | -------------------------------------------------------------------------------- /core/sodasql/common/json_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | import datetime 13 | import json 14 | from decimal import Decimal 15 | 16 | 17 | class JsonHelper: 18 | 19 | @staticmethod 20 | def to_json(o): 21 | return json.dumps(o) 22 | 23 | @staticmethod 24 | def to_json_pretty(o): 25 | return json.dumps(o, indent=2) 26 | 27 | @staticmethod 28 | def to_jsonnable(o): 29 | if o is None \ 30 | or isinstance(o, str) \ 31 | or isinstance(o, int) \ 32 | or isinstance(o, float) \ 33 | or isinstance(o, bool): 34 | return o 35 | if isinstance(o, dict): 36 | for key, value in o.items(): 37 | update = False 38 | if not isinstance(key, str): 39 | del o[key] 40 | key = str(key) 41 | update = True 42 | jsonnable_value = JsonHelper.to_jsonnable(value) 43 | if value is not jsonnable_value: 44 | value = jsonnable_value 45 | update = True 46 | if update: 47 | o[key] = value 48 | return o 49 | if isinstance(o, list): 50 | for i in range(len(o)): 51 | element = o[i] 52 | jsonnable_element = JsonHelper.to_jsonnable(element) 53 | if element is not jsonnable_element: 54 | o[i] = jsonnable_element 55 | return o 56 | if isinstance(o, Decimal): 57 | return float(o) 58 | if isinstance(o, datetime.datetime): 59 | return o.isoformat() 60 | if isinstance(o, datetime.date): 61 | return o.strftime('%Y-%m-%d') 62 | if isinstance(o, datetime.time): 63 | return o.strftime('%H:%M:%S') 64 | raise RuntimeError(f"Don't know how to jsonize {o} ({type(o)})") 65 | -------------------------------------------------------------------------------- /core/sodasql/common/logging_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | import logging 13 | import os 14 | import sys 15 | 16 | 17 | class LoggingHelper: 18 | log_format = " | %(message)s" 19 | 20 | @classmethod 21 | def configure_for_cli(cls): 22 | cls.configure() 23 | 24 | @classmethod 25 | def configure(cls): 26 | logging.basicConfig( 27 | level=os.getenv("SODA_LOGGING_LEVEL", logging.DEBUG), 28 | # https://docs.python.org/3/library/logging.html#logrecord-attributes 29 | # %(name)s 30 | format=cls.log_format, 31 | handlers=[logging.StreamHandler(sys.stdout)] 32 | ) 33 | 34 | @classmethod 35 | def configure_for_test(cls): 36 | logging.getLogger('urllib3').setLevel(logging.WARNING) 37 | logging.getLogger('botocore').setLevel(logging.WARNING) 38 | logging.getLogger('pyathena').setLevel(logging.WARNING) 39 | logging.getLogger('faker').setLevel(logging.ERROR) 40 | logging.getLogger('snowflake').setLevel(logging.WARNING) 41 | logging.basicConfig( 42 | level=logging.DEBUG, 43 | force=True, # Override any previously set handlers. 44 | # https://docs.python.org/3/library/logging.html#logrecord-attributes 45 | # %(name)s 46 | format=cls.log_format, 47 | handlers=[logging.StreamHandler(sys.stdout)] 48 | ) 49 | -------------------------------------------------------------------------------- /core/sodasql/common/yaml_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | import logging 13 | 14 | import yaml 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class YamlHelper: 20 | 21 | @staticmethod 22 | def parse_yaml(yaml_str: str, description: str = None): 23 | try: 24 | return yaml.load(yaml_str, Loader=yaml.SafeLoader) 25 | except Exception as e: 26 | logger.error(f'Parsing YAML failed: {str(e)}: ({description if description else yaml_str})xWW4') 27 | 28 | @staticmethod 29 | def validate_numeric_value(column_name, key, value): 30 | if value is None: 31 | logger.info(f'There is no value specified for {key} for column {column_name}') 32 | elif value is isinstance(value, int): 33 | logger.error(f'{column_name} could not be parsed: {key}-{value} is not of a numeric type.') 34 | raise Exception(f'{column_name} could not be parsed: {key}-{value} is not of a numeric type.') 35 | else: 36 | return value 37 | 38 | @staticmethod 39 | def validate_list_value(column_name, key, value): 40 | if value is None: 41 | logger.info(f'There is no value specified for {key} for column {column_name}') 42 | elif value is isinstance(value, list): 43 | logger.error(f'{column_name} could not be parsed: {key}-{value} is not of a list type.') 44 | raise Exception(f'{column_name} could not be parsed: {key}-{value} is not of a list type.') 45 | else: 46 | return value 47 | -------------------------------------------------------------------------------- /core/sodasql/exceptions/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | ERROR_CODE_GENERIC = 'generic_error' 13 | ERROR_CODE_CONNECTION_FAILED = 'connection_failed' 14 | ERROR_CODE_AUTHENTICATION_FAILED = 'authentication_failed' 15 | ERROR_CODE_TEST_FAILED = 'test_failed' 16 | 17 | 18 | class SodaSqlError(Exception): 19 | 20 | def __init__(self, msg, original_exception): 21 | super(SodaSqlError, self).__init__(f"{msg}: {str(original_exception)}") 22 | self.error_code = ERROR_CODE_GENERIC 23 | self.original_exception = original_exception 24 | 25 | 26 | class WarehouseAuthenticationError(SodaSqlError): 27 | 28 | def __init__(self, warehouse_type, original_exception): 29 | super(WarehouseAuthenticationError, self).__init__( 30 | f"Soda-sql encountered a problem while trying to authenticate to {warehouse_type}", 31 | original_exception) 32 | self.error_code = ERROR_CODE_AUTHENTICATION_FAILED 33 | self.warehouse_type = warehouse_type 34 | 35 | 36 | class WarehouseConnectionError(SodaSqlError): 37 | 38 | def __init__(self, warehouse_type, original_exception): 39 | super(WarehouseConnectionError, self).__init__( 40 | f"Soda-sql encountered a problem while trying to connect to {warehouse_type}", 41 | original_exception) 42 | self.error_code = ERROR_CODE_CONNECTION_FAILED 43 | self.warehouse_type = warehouse_type 44 | 45 | 46 | class TestFailureError(SodaSqlError): 47 | 48 | def __init__(self, original_exception, errors_count): 49 | if errors_count > 1: 50 | msg = f"{errors_count} soda-sql tests failed with errors" 51 | else: 52 | msg = "Soda-sql test failed with error" 53 | super(TestFailureError, self).__init__(msg, 54 | original_exception) 55 | self.error_code = ERROR_CODE_TEST_FAILED 56 | self.errors_number = errors_count 57 | 58 | class InvalidWarehouseYaml(Exception): 59 | 60 | def __init__(self, exception_detail): 61 | super(InvalidWarehouseYaml, self).__init__( 62 | f"Invalid Warehouse YAML. {exception_detail}") -------------------------------------------------------------------------------- /core/sodasql/scan/column_metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from deprecated import deprecated 12 | 13 | 14 | class ColumnMetadata: 15 | 16 | def __init__(self, name: str, data_type: str = None, logical_type: str = None, nullable: bool = None): 17 | self.name = name 18 | self.data_type = data_type 19 | self.nullable = nullable 20 | self.logical_type = logical_type 21 | 22 | def __str__(self): 23 | return self.name + (' ' + self.type if self.type else '') 24 | 25 | def to_dict(self): 26 | return { 27 | 'name': self.name, 28 | # TODO kept backward compatibility, remove after https://github.com/sodadata/soda/issues/2385 is fixed 29 | 'type': self.data_type, 30 | 'dataType': self.data_type, 31 | 'nullable': self.nullable, 32 | 'logicalType': self.logical_type, 33 | # deprecated, use logicalType 34 | 'semanticType': self.logical_type, 35 | } 36 | 37 | @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') 38 | def to_json(self): 39 | return self.to_dict() 40 | -------------------------------------------------------------------------------- /core/sodasql/scan/db.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import logging 12 | from datetime import datetime 13 | from typing import List 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def sql_fetchone(connection, sql: str) -> tuple: 19 | """ 20 | Only returns the tuple obtained by cursor.fetchone() 21 | """ 22 | return sql_fetchone_description(connection, sql)[0] 23 | 24 | 25 | def sql_fetchone_description(connection, sql: str) -> tuple: 26 | """ 27 | Returns a tuple with 2 elements: 28 | 1) the tuple obtained by cursor.fetchone() 29 | 2) the cursor.description 30 | """ 31 | cursor = connection.cursor() 32 | try: 33 | logger.debug(f'Executing SQL query: \n{sql}') 34 | start = datetime.now() 35 | cursor.execute(sql) 36 | row_tuple = cursor.fetchone() 37 | description = cursor.description 38 | delta = datetime.now() - start 39 | logger.debug(f'SQL took {str(delta)}') 40 | return row_tuple, description 41 | finally: 42 | cursor.close() 43 | 44 | 45 | def sql_fetchall(connection, sql: str) -> List[tuple]: 46 | """ 47 | Only returns the tuples obtained by cursor.fetchall() 48 | """ 49 | return sql_fetchall_description(connection, sql)[0] 50 | 51 | 52 | def sql_fetchall_description(connection, sql: str) -> tuple: 53 | """ 54 | Returns a tuple with 2 elements: 55 | 1) the tuples obtained by cursor.fetchall() 56 | 2) the cursor.description 57 | """ 58 | cursor = connection.cursor() 59 | try: 60 | logger.debug(f'Executing SQL query: \n{sql}') 61 | start = datetime.now() 62 | cursor.execute(sql) 63 | rows = cursor.fetchall() 64 | delta = datetime.now() - start 65 | logger.debug(f'SQL took {str(delta)}') 66 | return rows, cursor.description 67 | finally: 68 | cursor.close() 69 | 70 | 71 | def sql_update(connection, sql: str): 72 | cursor = connection.cursor() 73 | try: 74 | logger.debug(f'Executing SQL update: \n{sql}') 75 | start = datetime.now() 76 | cursor.execute(sql) 77 | delta = datetime.now() - start 78 | logger.debug(f'SQL took {str(delta)}') 79 | finally: 80 | cursor.close() 81 | 82 | 83 | def sql_updates(connection, sqls: List[str]): 84 | for sql in sqls: 85 | sql_update(connection, sql) 86 | -------------------------------------------------------------------------------- /core/sodasql/scan/dialect_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.dialect import Dialect 12 | from sodasql.scan.parser import Parser 13 | 14 | 15 | class DialectParser(Parser): 16 | 17 | def __init__(self, warehouse_connection_dict: dict): 18 | super().__init__('warehouse') 19 | self._push_context(warehouse_connection_dict, 'connection') 20 | self.dialect = Dialect.create(self) 21 | -------------------------------------------------------------------------------- /core/sodasql/scan/env_vars.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | import os 13 | 14 | import yaml 15 | from sodasql.scan.file_system import FileSystemSingleton 16 | 17 | 18 | class EnvVars: 19 | 20 | # Loads the environment variables in ~/.soda/env_vars.yml under the project name key 21 | @classmethod 22 | def load_env_vars(cls, project_name: str): 23 | env_vars_path = f'{FileSystemSingleton.INSTANCE.user_home_dir()}/.soda/env_vars.yml' 24 | if FileSystemSingleton.INSTANCE.is_file(env_vars_path): 25 | file_contents = FileSystemSingleton.INSTANCE.file_read_as_str(env_vars_path) 26 | env_vars_dict = yaml.load(file_contents, Loader=yaml.SafeLoader) 27 | if isinstance(env_vars_dict, dict): 28 | project_env_vars_dict = env_vars_dict.get(project_name) 29 | if isinstance(project_env_vars_dict, dict): 30 | for env_var_name in project_env_vars_dict: 31 | env_var_value = project_env_vars_dict.get(env_var_name) 32 | if isinstance(env_var_value, str): 33 | os.environ[env_var_name] = env_var_value 34 | elif env_var_value is None and env_var_name in os.environ: 35 | del os.environ[env_var_name] 36 | -------------------------------------------------------------------------------- /core/sodasql/scan/failed_rows_processor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | 13 | class FailedRowsProcessor: 14 | def process(self, context: dict): 15 | """ 16 | Override this class in your program to process the failed rows 17 | :param context: dict with following keys: 18 | 19 | column_name : Name of the Column 20 | sample_name : Generated name of the sample 21 | sample_columns: Columns of the selected samples 22 | sample_rows: List of rows 23 | sample_description: Auto-generated description of the samples 24 | total_row_count: total count of the failed rows 25 | 26 | :return: dict with message and count keys which will be sent to soda cloud instead of failed rows 27 | e.g. {'message': 'Failed Rows are saved in S3 bucket s3:///test_failed_rows' 28 | 'count': 23} 29 | """ 30 | pass 31 | -------------------------------------------------------------------------------- /core/sodasql/scan/group_value.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from dataclasses import dataclass 12 | from typing import List 13 | 14 | from sodasql.common.json_helper import JsonHelper 15 | 16 | from deprecated import deprecated 17 | 18 | 19 | @dataclass 20 | class GroupValue: 21 | group: dict 22 | value: object 23 | 24 | @classmethod 25 | def from_json(cls, json: dict): 26 | if json is None: 27 | return None 28 | assert isinstance(json, dict) 29 | return GroupValue( 30 | group=json.get('group'), 31 | value=json.get('value') 32 | ) 33 | 34 | @classmethod 35 | def from_json_list(cls, json_list: List): 36 | if json_list is None: 37 | return None 38 | assert isinstance(json_list, list) 39 | group_values = [] 40 | for json in json_list: 41 | group_value = cls.from_json(json) 42 | if group_value: 43 | group_values.append(group_value) 44 | return group_values 45 | 46 | def to_dict(self): 47 | return { 48 | 'group': JsonHelper.to_jsonnable(self.group), 49 | 'value': JsonHelper.to_jsonnable(self.value) 50 | } 51 | 52 | @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') 53 | def to_json(self): 54 | return self.to_dict() 55 | -------------------------------------------------------------------------------- /core/sodasql/scan/historic_metric_yml.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from dataclasses import dataclass 12 | from typing import Optional 13 | 14 | 15 | @dataclass 16 | class HistoricMetricYml: 17 | name: str 18 | type: str 19 | metric: str 20 | count: int = 1 21 | -------------------------------------------------------------------------------- /core/sodasql/scan/measurement.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from dataclasses import dataclass 12 | from typing import Optional, List 13 | from deprecated import deprecated 14 | from sodasql.common.json_helper import JsonHelper 15 | from sodasql.scan.group_value import GroupValue 16 | 17 | 18 | @dataclass 19 | class Measurement: 20 | metric: str 21 | column_name: Optional[str] = None 22 | value: object = None 23 | group_values: Optional[List[GroupValue]] = None 24 | 25 | def __str__(self): 26 | column_str = f'({self.column_name})' if self.column_name else '' 27 | 28 | if self.group_values is not None: 29 | if len(self.group_values) == 0: 30 | return f'{self.metric}{column_str}: no groups' 31 | else: 32 | values_str = '\n '.join([f'group{JsonHelper.to_jsonnable(group_value.group)} = {group_value.value}' 33 | for group_value in self.group_values]) 34 | return f'{self.metric}{column_str}: \n {values_str}' 35 | else: 36 | return f'{self.metric}{column_str} = {self.value}' 37 | 38 | def to_dict(self) -> dict: 39 | dictionary = { 40 | 'metric': self.metric, 41 | } 42 | 43 | if self.group_values is None: 44 | dictionary['value'] = JsonHelper.to_jsonnable(self.value) 45 | else: 46 | dictionary['groupValues'] = [group_value.to_dict() for group_value in self.group_values] 47 | 48 | if self.column_name is not None: 49 | dictionary['columnName'] = self.column_name 50 | 51 | return dictionary 52 | 53 | @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') 54 | def to_json(self): 55 | return self.to_dict() 56 | -------------------------------------------------------------------------------- /core/sodasql/scan/missing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from typing import List, Optional 12 | 13 | 14 | class Missing: 15 | 16 | FORMATS = { 17 | # more regexes: https://regexr.com/ https://digitalfortress.tech/tricks/top-15-commonly-used-regex/ http://regexlib.com/ 18 | 'empty': r'^$', 19 | 'whitespace': r'^\s*$' 20 | } 21 | 22 | def __init__(self): 23 | self.values: Optional[List[str]] = None 24 | self.format: Optional[str] = None 25 | self.regex: Optional[str] = None 26 | -------------------------------------------------------------------------------- /core/sodasql/scan/samples_yml.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from dataclasses import dataclass 13 | 14 | 15 | @dataclass 16 | class SamplesYml: 17 | table_limit: int 18 | table_tablesample: str 19 | failed_limit: int 20 | failed_tablesample: str 21 | passed_limit: int 22 | passed_tablesample: int 23 | 24 | def with_defaults(self, default_samples_yml): 25 | return SamplesYml( 26 | table_limit=self.table_limit if self.table_limit is not None else default_samples_yml.table_limit, 27 | table_tablesample=self.table_tablesample if self.table_tablesample is not None else default_samples_yml.table_tablesample, 28 | failed_limit=self.failed_limit if self.failed_limit is not None else default_samples_yml.failed_limit, 29 | failed_tablesample=self.failed_tablesample if self.failed_tablesample is not None else default_samples_yml.failed_tablesample, 30 | passed_limit=self.passed_limit if self.passed_limit is not None else default_samples_yml.passed_limit, 31 | passed_tablesample=self.passed_tablesample if self.passed_tablesample is not None else default_samples_yml.passed_tablesample, 32 | ) 33 | 34 | def is_failed_enabled(self): 35 | return self.failed_limit or self.failed_tablesample 36 | 37 | def is_passed_enabled(self): 38 | return self.passed_limit or self.passed_tablesample 39 | 40 | def is_table_enabled(self): 41 | return self.table_limit is not None or self.table_tablesample is not None 42 | -------------------------------------------------------------------------------- /core/sodasql/scan/scan_error.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from dataclasses import dataclass 13 | from deprecated import deprecated 14 | from sodasql.scan.test import Test 15 | 16 | 17 | @dataclass 18 | class ScanError: 19 | message: str 20 | exception: Exception = None 21 | 22 | def __str__(self) -> str: 23 | return f'[{self.get_type()}] {self.get_message()}' 24 | 25 | def to_dict(self) -> dict: 26 | json = { 27 | 'type': self.get_type(), 28 | 'message': self.get_message() 29 | } 30 | if self.exception is not None: 31 | json['exception'] = str(self.exception) 32 | 33 | if hasattr(self.exception, "error_code"): 34 | json['errorCode'] = self.exception.error_code 35 | return json 36 | 37 | @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') 38 | def to_json(self): 39 | return self.to_dict() 40 | 41 | def get_type(self) -> str: 42 | return 'error' 43 | 44 | def get_message(self) -> str: 45 | return self.message 46 | 47 | 48 | @dataclass 49 | class TestExecutionScanError(ScanError): 50 | test: Test = None 51 | 52 | def get_type(self) -> str: 53 | return 'test_execution_error' 54 | 55 | 56 | @dataclass 57 | class SodaCloudScanError(ScanError): 58 | def get_type(self) -> str: 59 | return 'soda_cloud_error' 60 | 61 | 62 | @dataclass 63 | class WarehouseAuthenticationScanError(ScanError): 64 | def get_type(self) -> str: 65 | return 'warehouse_authentication_error' 66 | 67 | 68 | @dataclass 69 | class WarehouseConnectionScanError(ScanError): 70 | def get_type(self) -> str: 71 | return 'warehouse_connection_error' 72 | -------------------------------------------------------------------------------- /core/sodasql/scan/scan_yml_column.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from dataclasses import dataclass 12 | from typing import Set, List, Optional 13 | 14 | from sodasql.scan.missing import Missing 15 | from sodasql.scan.samples_yml import SamplesYml 16 | from sodasql.scan.sql_metric_yml import SqlMetricYml 17 | from sodasql.scan.historic_metric_yml import HistoricMetricYml 18 | from sodasql.scan.test import Test 19 | from sodasql.scan.validity import Validity 20 | 21 | 22 | @dataclass 23 | class ScanYmlColumn: 24 | 25 | metrics: Set[str] 26 | sql_metric_ymls: List[SqlMetricYml] 27 | missing: Missing 28 | validity: Validity 29 | tests: List[Test] 30 | samples_yml: Optional[SamplesYml] 31 | historic_metrics: List[HistoricMetricYml] 32 | -------------------------------------------------------------------------------- /core/sodasql/scan/sql_metric_yml.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from dataclasses import dataclass 12 | from typing import List, Optional 13 | 14 | from sodasql.scan.test import Test 15 | 16 | 17 | @dataclass 18 | class SqlMetricYml: 19 | 20 | type: str 21 | name: Optional[str] 22 | title: Optional[str] 23 | sql: str 24 | index: int 25 | column_name: Optional[str] 26 | failed_limit: Optional[int] = None 27 | 28 | # TODO move these next members into a subclass NumericSqlMetricYml 29 | metric_names: List[str] = None 30 | group_fields: List[str] = None 31 | tests: List[Test] = None 32 | -------------------------------------------------------------------------------- /core/sodasql/scan/test_result.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import json 12 | from dataclasses import dataclass 13 | from deprecated import deprecated 14 | from typing import Optional 15 | 16 | from sodasql.common.json_helper import JsonHelper 17 | from sodasql.scan.test import Test 18 | 19 | 20 | @dataclass 21 | class TestResult: 22 | test: Test 23 | passed: bool 24 | skipped: bool 25 | values: Optional[dict] = None 26 | error: Optional[Exception] = None 27 | group_values: Optional[dict] = None 28 | 29 | def __str__(self): 30 | if self.passed: 31 | status_str = "passed" 32 | elif self.skipped: 33 | status_str = "skipped" 34 | else: 35 | status_str = "failed" 36 | return ( 37 | f"Test {self.test.title} {status_str}" 38 | + (f" with group values {self.group_values}" if self.group_values else "") 39 | + f" with measurements {json.dumps(JsonHelper.to_jsonnable(self.values))}" 40 | ) 41 | 42 | def to_dict(self) -> dict: 43 | if not self.test or not self.test.expression: 44 | return {"error": "Invalid test result"} 45 | 46 | test_result_json = { 47 | "id": self.test.id, 48 | "title": self.test.title, 49 | "description": self.test.title, # for backwards compatibility 50 | "expression": self.test.expression, 51 | } 52 | 53 | if self.test.column: 54 | test_result_json["columnName"] = self.test.column 55 | 56 | if self.test.source: 57 | test_result_json["source"] = self.test.source 58 | else: 59 | test_result_json["source"] = "soda-sql" 60 | 61 | if self.error: 62 | test_result_json["error"] = str(self.error) 63 | 64 | else: 65 | test_result_json["passed"] = self.passed 66 | test_result_json["skipped"] = self.skipped 67 | test_result_json["values"] = JsonHelper.to_jsonnable(self.values) 68 | 69 | if self.group_values: 70 | test_result_json["groupValues"] = JsonHelper.to_jsonnable(self.group_values) 71 | 72 | return test_result_json 73 | 74 | @deprecated(version="2.1.0b19", reason="This function is deprecated, please use to_dict") 75 | def to_json(self): 76 | return self.to_dict() 77 | -------------------------------------------------------------------------------- /core/sodasql/scan/validity/money_patterns.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from typing import List 12 | 13 | 14 | class MoneyPatternHelper: 15 | 16 | @staticmethod 17 | def currency_number_pattern(thousands_separator, decimal_separator): 18 | return rf"([0-9]+[{thousands_separator}])*([0-9]+)({decimal_separator}[0-9]+)?" 19 | 20 | @staticmethod 21 | def currency_prefix_pattern(currency_symbol): 22 | return rf"({currency_symbol})? ?(\-)?" 23 | 24 | @staticmethod 25 | def currency_suffix_pattern(currency_symbol, currency_name): 26 | return rf" ?({currency_symbol}|{currency_name.lower()}|{currency_name.upper()})?" 27 | 28 | @classmethod 29 | def money_pattern(cls, thousands_separator, decimal_separator, currency_symbol, currency_name): 30 | return cls.currency_prefix_pattern(currency_symbol) + \ 31 | cls.currency_number_pattern(thousands_separator, decimal_separator, ) + \ 32 | cls.currency_suffix_pattern(currency_symbol, currency_name) 33 | 34 | @staticmethod 35 | def or_patterns(patterns: List[str]): 36 | return '(' + '|'.join([f"({pattern})" for pattern in patterns]) + ')' 37 | 38 | @staticmethod 39 | def enclose_pattern(pattern): 40 | return r'^' + pattern + r'$' 41 | 42 | 43 | MONEY_USD_PATTERN = MoneyPatternHelper.money_pattern(r',', r'\.', r'\$', r'usd') 44 | MONEY_EUR_PATTERN = MoneyPatternHelper.money_pattern(r' ', r',', r'€', r'eur') 45 | MONEY_GBP_PATTERN = MoneyPatternHelper.money_pattern(r',', r'\.', r'£', r'gbp') 46 | MONEY_RMB_PATTERN = MoneyPatternHelper.money_pattern(r',', r'\.', r'¥', r'rmb') 47 | MONEY_CHF_PATTERN = MoneyPatternHelper.money_pattern(r"''", r'\.', r'CHf', r'chf') 48 | -------------------------------------------------------------------------------- /core/sodasql/scan/warehouse.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import logging 12 | from typing import List 13 | 14 | from sodasql.scan.db import sql_fetchone, sql_fetchall, sql_fetchone_description, sql_fetchall_description 15 | from sodasql.scan.dialect import Dialect 16 | from sodasql.scan.warehouse_yml import WarehouseYml 17 | from sodasql.telemetry.soda_telemetry import SodaTelemetry 18 | 19 | logger = logging.getLogger(__name__) 20 | soda_telemetry = SodaTelemetry.get_instance() 21 | 22 | class Warehouse: 23 | 24 | def __init__(self, warehouse_yml: WarehouseYml): 25 | self.name = warehouse_yml.name 26 | self.dialect: Dialect = warehouse_yml.dialect 27 | self.connection = self.dialect.create_connection() 28 | soda_telemetry.set_attribute('datasource_type', self.dialect.type) 29 | soda_telemetry.set_attribute('datasource_id', soda_telemetry.obtain_datasource_hash(self.dialect)) 30 | 31 | def sql_fetchone(self, sql) -> tuple: 32 | return sql_fetchone(self.connection, sql) 33 | 34 | def sql_fetchone_description(self, sql) -> tuple: 35 | return sql_fetchone_description(self.connection, sql) 36 | 37 | def sql_fetchall(self, sql) -> List[tuple]: 38 | return sql_fetchall(self.connection, sql) 39 | 40 | def sql_fetchall_description(self, sql) -> tuple: 41 | return sql_fetchall_description(self.connection, sql) 42 | 43 | def create_scan(self, *args, **kwargs): 44 | return self.dialect.create_scan(self, *args, **kwargs) 45 | 46 | def close(self): 47 | if self.connection: 48 | try: 49 | self.connection.close() 50 | except Exception as e: 51 | logger.debug(f'Closing connection failed: {str(e)}') 52 | -------------------------------------------------------------------------------- /core/sodasql/scan/warehouse_yml.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from dataclasses import dataclass 12 | from typing import Optional 13 | 14 | from sodasql.scan.dialect import Dialect 15 | 16 | 17 | @dataclass 18 | class WarehouseYml: 19 | dialect: Dialect = None 20 | name: str = None 21 | soda_host: Optional[str] = None 22 | soda_port: Optional[int] = None 23 | soda_protocol: Optional[str] = None 24 | soda_api_key_id: Optional[str] = None 25 | soda_api_key_secret: Optional[str] = None 26 | -------------------------------------------------------------------------------- /core/sodasql/soda_server_client/monitor_measurement.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from dataclasses import dataclass 13 | from deprecated import deprecated 14 | from sodasql.scan.group_value import GroupValue 15 | from sodasql.scan.measurement import Measurement 16 | 17 | 18 | @dataclass 19 | class MonitorMeasurement(Measurement): 20 | metric_id: str = None 21 | sql: str = None 22 | query_milliseconds: int = None 23 | 24 | @classmethod 25 | def from_dict(cls, dictionary: dict) -> 'MonitorMeasurement': 26 | assert isinstance(dictionary, dict) 27 | return MonitorMeasurement( 28 | metric_id=dictionary.get('metricId'), 29 | metric=dictionary.get('metricType'), 30 | sql=dictionary.get('sql'), 31 | column_name=dictionary.get('columnName'), 32 | value=dictionary.get('value'), 33 | group_values=GroupValue.from_json_list(dictionary.get('groupValues')), 34 | query_milliseconds=dictionary.get('queryMilliseconds')) 35 | 36 | @classmethod 37 | @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') 38 | def from_json(cls, dictionary: dict): 39 | cls.from_dict(cls, dictionary) 40 | 41 | def to_dict(self) -> dict: 42 | dictionary = super().to_dict() 43 | dictionary['metricId'] = self.metric_id 44 | dictionary['sql'] = self.sql 45 | dictionary['queryMilliseconds'] = self.query_milliseconds 46 | return dictionary 47 | 48 | @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') 49 | def to_json(self): 50 | return self.to_dict() 51 | -------------------------------------------------------------------------------- /core/sodasql/soda_server_client/monitor_metric_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.dialect import Dialect 13 | from sodasql.scan.parser import Parser 14 | from sodasql.scan.scan import Scan 15 | 16 | from sodasql.soda_server_client.monitor_metric import MonitorMetric 17 | 18 | KEY_ID = 'id' 19 | KEY_TYPE = 'type' 20 | KEY_COLUMN_NAME = 'columnName' 21 | KEY_GROUP_BY_COLUMN_NAMES = 'groupByColumnNames' 22 | KEY_FILTER = 'filter' 23 | 24 | 25 | class MonitorMetricParser(Parser): 26 | 27 | def __init__(self, monitor_metric_dict: dict, scan: Scan): 28 | super().__init__('Monitor SQL metric') 29 | self.dialect: Dialect = scan.dialect 30 | self._push_context(monitor_metric_dict) 31 | try: 32 | metric_type = self.get_str_required(KEY_TYPE) 33 | metric_id = self.get_str_required(KEY_ID) 34 | column_name = self.get_str_optional(KEY_COLUMN_NAME) 35 | group_by_column_names = self.get_list_optional(KEY_GROUP_BY_COLUMN_NAMES) 36 | 37 | self.monitor_metric = MonitorMetric( 38 | scan=scan, 39 | metric_id=metric_id, 40 | metric_type=metric_type, 41 | column_name=column_name, 42 | group_by_column_names=group_by_column_names 43 | ) 44 | 45 | filter_expression_dict = self.get_dict_optional(KEY_FILTER) 46 | 47 | qualified_group_column_names = [] 48 | if group_by_column_names: 49 | qualified_group_column_names = [self.dialect.qualify_column_name(group_field) 50 | for group_field in group_by_column_names] 51 | filter_condition = self.dialect.sql_expression(expression_dict=filter_expression_dict, 52 | scan_time=scan.time) 53 | 54 | self.monitor_metric.build_sql( 55 | qualified_group_column_names, 56 | filter_condition, 57 | scan.qualified_table_name) 58 | 59 | finally: 60 | self._pop_context() 61 | -------------------------------------------------------------------------------- /core/sodasql/telemetry/memory_span_exporter.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, List, Sequence 3 | 4 | 5 | from opentelemetry.sdk.trace import ReadableSpan 6 | from opentelemetry.sdk.trace.export import ( 7 | SpanExporter, 8 | SpanExportResult 9 | ) 10 | 11 | class MemorySpanExporter(SpanExporter): 12 | """Implementation of :class:`SpanExporter` that saves spans in memory. 13 | 14 | This class can be used for diagnostic purposes, multi-threaded scenarios etc. 15 | """ 16 | 17 | __instance = None 18 | __spans = [] 19 | 20 | @staticmethod 21 | def get_instance(): 22 | if MemorySpanExporter.__instance is None: 23 | MemorySpanExporter() 24 | return MemorySpanExporter.__instance 25 | 26 | def __init__(self): 27 | if MemorySpanExporter.__instance is not None: 28 | raise Exception("This class is a singleton!") 29 | else: 30 | MemorySpanExporter.__instance = self 31 | 32 | def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: 33 | for span in spans: 34 | self.__spans.append(span) 35 | return SpanExportResult.SUCCESS 36 | 37 | def reset(self): 38 | self.__spans = [] 39 | 40 | @property 41 | def spans(self) -> List[ReadableSpan]: 42 | return self.__spans 43 | 44 | @property 45 | def span_dicts(self) -> List[Dict]: 46 | return [json.loads(span.to_json()) for span in self.spans] 47 | -------------------------------------------------------------------------------- /core/sodasql/telemetry/soda_exporter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict, Sequence, Optional 3 | 4 | from opentelemetry.sdk.trace.export import ( 5 | ConsoleSpanExporter, 6 | SpanExportResult, 7 | ) 8 | from opentelemetry.sdk.trace import ReadableSpan 9 | from opentelemetry.exporter.otlp.proto.http import Compression 10 | from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter 11 | 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def get_soda_spans(spans: Sequence[ReadableSpan]) -> Sequence[ReadableSpan]: 17 | result = [] 18 | for span in spans: 19 | if span.name.startswith("soda"): 20 | result.append(span) 21 | else: 22 | logger.debug(f"Open Telemetry: Skipping non-soda span '{span.name}'.") 23 | 24 | return result 25 | 26 | 27 | class SodaConsoleSpanExporter(ConsoleSpanExporter): 28 | """Soda version of console exporter. 29 | 30 | Does not export any non-soda spans for security and privacy reasons.""" 31 | def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: 32 | return super().export(get_soda_spans(spans)) 33 | 34 | 35 | class SodaOTLPSpanExporter(OTLPSpanExporter): 36 | """Soda version of OTLP exporter. 37 | 38 | Does not export any non-soda spans for security and privacy reasons.""" 39 | def __init__( 40 | self, 41 | endpoint: Optional[str] = None, 42 | certificate_file: Optional[str] = None, 43 | headers: Optional[Dict[str, str]] = None, 44 | timeout: Optional[int] = None, 45 | compression: Optional[Compression] = None, 46 | ): 47 | super().__init__( 48 | endpoint, 49 | certificate_file, 50 | headers, 51 | timeout, 52 | compression, 53 | ) 54 | 55 | def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: 56 | return super().export(get_soda_spans(spans)) 57 | -------------------------------------------------------------------------------- /dev-requirements.in: -------------------------------------------------------------------------------- 1 | pip-tools==6.4.0 2 | pytest~=6.0.1 3 | python-dotenv~=0.13.0 4 | tox~=3.24.0 5 | tox-docker~=2.0.0a3 6 | pytest-html~=3.1.1 7 | pytest-cov~=2.10.1 8 | requests==2.26.0 9 | twine~=3.4.2 10 | Faker==8.1.2 11 | tbump==6.3.1 12 | click>=8.0,<9.0 13 | 14 | urllib3==1.26.5 15 | pygments==2.10.0 16 | readme-renderer==29.0 17 | -------------------------------------------------------------------------------- /docker-compose-arm.yml: -------------------------------------------------------------------------------- 1 | # docker-compose.yml 2 | version: "3.8" 3 | services: 4 | soda: 5 | build: 6 | context: . 7 | platform: "linux/amd64" 8 | volumes: 9 | - .:/app 10 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Soda SQL and Soda Spark have been deprecated. 3 | 4 | This archived documentation for Soda SQL and Soda Spark open-source projects exists here for reference only. It is not maintained. 5 | 6 | Use Soda's new OSS tool, Soda Core, to write checks for data quality. Access the Soda Core documentation at docs.soda.io. -------------------------------------------------------------------------------- /docs/assets/images/cloud-tutorial-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/cloud-tutorial-results.png -------------------------------------------------------------------------------- /docs/assets/images/column-metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/column-metrics.png -------------------------------------------------------------------------------- /docs/assets/images/configure-yaml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/configure-yaml.png -------------------------------------------------------------------------------- /docs/assets/images/dataset-metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/dataset-metadata.png -------------------------------------------------------------------------------- /docs/assets/images/failed-row-message.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/failed-row-message.png -------------------------------------------------------------------------------- /docs/assets/images/failed-rows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/failed-rows.png -------------------------------------------------------------------------------- /docs/assets/images/monitor-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/monitor-results.png -------------------------------------------------------------------------------- /docs/assets/images/named-dataset1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/named-dataset1.png -------------------------------------------------------------------------------- /docs/assets/images/named-dataset2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/named-dataset2.png -------------------------------------------------------------------------------- /docs/assets/images/named-dataset3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/named-dataset3.png -------------------------------------------------------------------------------- /docs/assets/images/orchestrate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/orchestrate.png -------------------------------------------------------------------------------- /docs/assets/images/sample-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/sample-data.png -------------------------------------------------------------------------------- /docs/assets/images/scan-anatomy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/scan-anatomy.png -------------------------------------------------------------------------------- /docs/assets/images/scan-failed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/scan-failed.png -------------------------------------------------------------------------------- /docs/assets/images/scan-with-cloud-sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/scan-with-cloud-sql.png -------------------------------------------------------------------------------- /docs/assets/images/soda-cloud-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/soda-cloud-logo.png -------------------------------------------------------------------------------- /docs/assets/images/soda-operation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/soda-operation.png -------------------------------------------------------------------------------- /docs/assets/images/soda-sql-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/soda-sql-logo.png -------------------------------------------------------------------------------- /docs/assets/images/table-metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/docs/assets/images/table-metrics.png -------------------------------------------------------------------------------- /docs/release notes/soda-spark-launch.md: -------------------------------------------------------------------------------- 1 | # Soda Spark 0.1.0 2 | 3 | 2021-09-06 4 | 5 | 6 | Soft-launch release of Soda Spark, an extension of Soda SQL functionality. 7 | 8 | Refer to the Soda Spark Changelog on GitHub for details. -------------------------------------------------------------------------------- /docs/release notes/soda-spark-v0.1.1.md: -------------------------------------------------------------------------------- 1 | # Soda Spark 0.1.1 2 | 2021-09-07 3 | 4 | 5 | * Update documentation: 6 | * Add example for executing a scan with file. 7 | * Explain how the soda-spark works. 8 | * Add change log. 9 | 10 | 11 | Refer to the Soda Spark Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-spark-v0.2.0.md: -------------------------------------------------------------------------------- 1 | # Soda Spark 0.2.0 2 | 2021-09-09 3 | 4 | 5 | - Add option to connect with Soda Cloud. 6 | 7 | Refer to the Soda Spark Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-spark-v0.2.1.md: -------------------------------------------------------------------------------- 1 | # Soda Spark v0.2.1 2 | 2021-09-23 3 | 4 | - Test and document sql metrics. 5 | 6 | Refer to the Soda Spark Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-spark-v0.2.3.md: -------------------------------------------------------------------------------- 1 | # Soda Spark 0.2.3 2 | 2021-11-18 3 | 4 | * Fix samples not working. 5 | 6 | 7 | Refer to the Soda Spark Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-spark-v0.3.0.md: -------------------------------------------------------------------------------- 1 | # Soda Spark 0.3.0 2 | 2021-12-22 3 | 4 | 5 | * Added an optional flag for `as_dataframe`s to the Soda scan function which allows you to receive the scan output as Spark DataFrames. 6 | * `measurements` 7 | * `test_results` 8 | * `scan_errors` 9 | * Added ability to use version range for `soda-sql-spark` dependency. 10 | * Added `host` and `port` attributes to `_SparkDialect`. 11 | 12 | 13 | Refer to the Soda Spark Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.0: Faramir 2 | 2021-12-02 3 | 4 | 5 | - Enable Open Telemetry so that Soda SQL can collect anonymous usage statistics. 6 | 7 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b017.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.0b17: Denethor II 2 | 2021-09-21 3 | 4 | 5 | - Core: Fix test connection method. 6 | - Spark: Add support for pyodbc/Databricks. 7 | - Spark: Allow spark dialect to work without database specified. 8 | 9 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b021.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.0b21: Éomer 2 | 2021-11-23 3 | 4 | 5 | * Core: Use abstract method instead of exceptions. (#566) 6 | * Core: Save scan results to a json file. (#569) 7 | * Core: Introduce Open Telemetry tracing. (opt-out by default) (#563) 8 | * Core: Support pathlib.Purepath for yaml files. (#573) 9 | * dbt: Add dbt package to parse manifest and run_results. (#572) 10 | 11 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b16.md: -------------------------------------------------------------------------------- 1 | # Soda SQL v2.1.0b16: Celeborn 2 | 2021-09-07 3 | 4 | 5 | - Core: Fix time option as it's always set to "now" by default 6 | - Core: Update dev requirements 7 | - Core: Update readme with dialect status (#477) 8 | - Core: Update Tox in dev requirements to prevent version deadlock (#474) 9 | - BigQuery: Fix NoneType issue when credentials are not sufficient for BigQuery (#472) 10 | - BigQuery: Update bigquery dependency version (#470) 11 | - MySQL: Fix MySQL dialect issues (#475) 12 | 13 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b18.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.0b18: Elladan 2 | 2021-10-05 3 | 4 | 5 | - Core: Fix timeout validation. 6 | - Spark: Filter columns in spark dialect. 7 | 8 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b19.md: -------------------------------------------------------------------------------- 1 | # Soda SQL v2.1.0b19: Elrohir 2 | 2021-11-09 3 | 4 | Introducing the preview release historic metrics, a type of metric enabling you to use Soda SQL to access the historic measurements in the Cloud Metric Store and write tests that use those historic measurements. 5 | 6 | For other changes included in this Soda SQL release you can refer to the Soda SQL Changelog. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b20.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.0b20: Elrond 2 | 2021-11-09 3 | 4 | * Core: Fix redshift CI test details 5 | * Core: Fix typo in command help 6 | 7 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.0b22.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.0b22: Éowyn 2 | 2021-11-23 3 | 4 | 5 | - dbt: Add dbt package to releases. 6 | 7 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.1.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.1: Galadriel 2 | 2021-12-15 3 | 4 | * Core: Docker x86 workaround for running soda-sql on ARM-based machines (#590) 5 | * Soda Cloud: Add database name and schema to Cloud scanStart command (#584) 6 | * dbt: Adds ingest dbt tests 7 | * sqlserver: Support for SQLServer Dialect to make it more production ready (#564) 8 | * sqlserver: Fix column quoting and analyze issues (#595) 9 | * trino: Added experimental Trino dialect (#596) 10 | 11 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.2.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.2: Samwise Gamgee 2 | 2021-12-28 3 | 4 | - Core: add main module, so you can run Soda using `python -m sodasql`. 5 | - Core: move cryptography dependency to Snowflake. 6 | - BigQuery: introduce `use_context_auth` setting. 7 | - Soda Cloud: make sure that title is sent to Soda Cloud for sql metrics. 8 | - dbt: add ingestion of source nodes. 9 | - dbt: ingest test artifacts from dbt Cloud. 10 | - SQLserver: fix column names quoting. 11 | - Misc: update classifiers on PyPi. 12 | 13 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.3.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.3: Gamling 2 | 2022-01-14 3 | 4 | 5 | * Core: Change invalid keys message to a warning instead of error (#656) 6 | * Core: submit a utc timestamp when creating scan (#651) 7 | * Core: Remove explicit permission setting on yml files created (#642) 8 | * Core: Fix SodaOTLPExporter constructor. Fixes #627 (#639) 9 | * Core: Do not export non-soda spans in console and OTLP exporters. Fixes #627 (#632) 10 | * Core: Fix exit code when running scans (#623) 11 | * dbt: Add telemetry for ingest command (#655) 12 | * dbt: Raise error if parsed results contain only null failures (#654) 13 | * dbt: Resolve key access error when sources are not present in manifest (#646) 14 | * dbt: Get artifacts from dbt Cloud via job_id (#647) 15 | * BigQuery: Fix the create command (#653) 16 | * SQLserver: Add encrypt, and trust_server_certificate options (#643) 17 | 18 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.4.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.4: Gandalf 2 | 2022-02-24 3 | 4 | 5 | - Core: Update the telemetry attribute (#672) 6 | - Core: Pin markupsafe (#677) 7 | - Snowflake: Collation removal for REGEX functions (#673) 8 | - Hive: Update connection parameters (#662) 9 | 10 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.5.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.6: Ghân-buri-Ghân 2 | 2022-03-01 3 | 4 | 5 | - Spark: Try using describe table instead of iterating over columns (#678) 6 | 7 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/release notes/soda-sql-v2.1.6.md: -------------------------------------------------------------------------------- 1 | # Soda SQL 2.1.6 Gimli 2 | 2022-03-07 3 | 4 | * Core: Fix exception handling in telemetry and cli (#685) 5 | * dbt: Add logging for ignored dbt tests (#682) 6 | * dbt: Cleanup json dump debug statements 7 | * Snowflake: Quote columns in queries, fixes #679. (#680) 8 | 9 | Refer to the Soda SQL Changelog for details. -------------------------------------------------------------------------------- /docs/soda-sql/cli.md: -------------------------------------------------------------------------------- 1 | # Soda SQL CLI commands 2 | 3 | 4 | | Command | Description | 5 | | --------------------- | ----------- | 6 | | `soda analyze` | Analyzes the contents of your data source and automatically prepares a scan YAML file for each dataset. Soda SQL puts the YAML files in the `/tables` directory inside the warehouse directory. | 7 | | `soda create yourdatawarehouse` | Creates a new `warehouse.yml` file and prepares credentials in your `~/.soda/env_vars.yml`. Soda SQL does not overwrite or remove and existing environment variables, it only adds new. | 8 | | `soda ingest` | Ingests test result details from other tools, such as dbt. | 9 | | `soda scan` | Uses the configurations in your scan YAML file to prepare, then run SQL queries against the data in your data source. | 10 | 11 | ## List of commands 12 | 13 | To see a list of Soda SQL command-line interface (CLI) commands, use the `soda` command. 14 | 15 | Command: 16 | ```shell 17 | $ soda 18 | ``` 19 | 20 | Output: 21 | ```shell 22 | Usage: soda [OPTIONS] COMMAND [ARGS]... 23 | 24 | Soda CLI version 2.x.xxx 25 | 26 | Options: 27 | --help Show this message and exit. 28 | 29 | Commands: 30 | analyze Analyze tables and scaffold SCAN YAML 31 | create Create a template warehouse.yml file 32 | ingest Ingest test information from different tools 33 | scan Compute metrics and run tests for a given table 34 | ``` 35 | 36 | ## List of options 37 | 38 | To see a list of configurable options for each command, use the command-line help. 39 | ```shell 40 | $ soda create --help 41 | $ soda analyze --help 42 | $ ingest --help 43 | $ soda scan --help 44 | ``` 45 | 46 | Refer to Run a Soda SQL scan for details and examples. 47 | 48 | 49 | -------------------------------------------------------------------------------- /docs/soda-sql/connect_to_cloud.md: -------------------------------------------------------------------------------- 1 | # Connect to Soda Cloud 2 | 3 | To use all the features and functionality that **Soda Cloud** and **Soda SQL** have to offer, you can install and configure the Soda SQL command-line tool, then connect it to your Soda Cloud account. 4 | 5 | Soda SQL uses an API to connect to Soda Cloud. To use the API, you must generate API keys in your Soda Cloud account, then add them to the [warehouse YAML]({/docs/soda-sql/warehouse.md) file that Soda SQL created. Note that the API keys you create do not expire. 6 | 7 | 8 | 1. If you have not already done so, create a Soda Cloud account at cloud.soda.io. 9 | 2. Use the instructions in [Install Soda SQL]({/docs/soda-sql/installation.md) to install Soda SQL. 10 | 3. Follow steps in the [Quick start tutorial](/docs/soda-sql/quick-start-soda-sql.md) to create your warehouse YAML file, connect to your data source, analyze your datasets, and run a scan on the data. 11 | 4. Open the `warehouse.yml` file in a text editor, then add the following to the file: 12 | ```yaml 13 | soda_account: 14 | host: cloud.soda.io 15 | api_key_id: env_var(API_PUBLIC) 16 | api_key_secret: env_var(API_PRIVATE) 17 | ``` 18 | 5. Save the `warehouse.yml` file. 19 | 6. Open your `~/.soda/env_vars.yml` file in a text editor, then add `API_PUBLIC:` and `API_PRIVATE` as per the following: 20 | ```yaml 21 | soda_sql_tutorial: 22 | POSTGRES_USERNAME: sodasql 23 | POSTGRES_PASSWORD: Eg abc123 24 | API_PUBLIC: 25 | API_PRIVATE: 26 | ``` 27 | 7. In Soda Cloud, navigate to **your avatar** > **Profile** > **API Keys**, then click the plus icon to generate new API keys. 28 | * Copy the **API Key ID**, then paste it into the `env_vars.yml` file as the value for `API_PUBLIC`. 29 | * Copy the **API Key Secret**, then paste it into the `env_vars.yml` file as the value for `API_PRIVATE`. 30 | 8. Save the changes to the `env_vars.yml` file. Close the **Create API Key** dialog box in your Soda Cloud account. 31 | 9. From the command-line, use Soda SQL to scan the datasets in your data source again. 32 | ```shell 33 | $ soda scan warehouse.yml tables/datasetname.yml 34 | ``` 35 | 10. Navigate to your Soda Cloud account in your browser and refresh the page. Review the results of your scan in **Monitor Results**. 36 | -------------------------------------------------------------------------------- /docs/soda-sql/overview.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | ![soda-sql-logo](/docs/assets/images/soda-sql-logo.png) 4 |
5 | 6 | ✔ Open-source software
7 | 8 | ✔ [Install](/docs/soda-sql/installation.md) from the command-line
9 | 10 | ✔ Compatible with Snowflake, Amazon Redshift, BigQuery, [and more](/docs//soda-sql/installation.md#compatibility)
11 | 12 | ✔ [Write tests](/docs/soda-sql/tests.md) in a YAML file
13 | 14 | ✔ Deploy in an [Airflow enviroment](/docs/soda-sql/orchestrate_scans.md)
15 |
16 | 17 | #### Example scan YAML file 18 | ```yaml 19 | table_name: breakdowns 20 | metrics: 21 | - row_count 22 | - missing_count 23 | - missing_percentage 24 | ... 25 | # Validates that a table has rows 26 | tests: 27 | - row_count > 0 28 | 29 | # Tests that numbers in the column are entered in a valid format as whole numbers 30 | columns: 31 | incident_number: 32 | valid_format: number_whole 33 | tests: 34 | - invalid_percentage == 0 35 | 36 | # Tests that no values in the column are missing 37 | school_year: 38 | tests: 39 | - missing_count == 0 40 | 41 | # Tests for duplicates in a column 42 | bus_no: 43 | tests: 44 | - duplicate_count == 0 45 | 46 | # Compares row count between datasets 47 | sql_metric: 48 | sql: | 49 | SELECT COUNT(*) as other_row_count 50 | FROM other_table 51 | tests: 52 | - row_count == other_row_count 53 | ``` 54 | 55 | ## Get started 56 | * Soda SQL playground in GitHub 57 | * [Install Soda SQL](/docs/soda-sql/installation.md) 58 | * [Quick start for Soda SQL and Soda Cloud](/docs/soda-sql/quick-start-soda-sql.md) 59 | -------------------------------------------------------------------------------- /docs/soda-sql/scan-different-datasets.md: -------------------------------------------------------------------------------- 1 | # Scan multiple data sources or datasets 2 | 3 | You can run a single scan against different data sources in your environments. For example, you can run the same scan against data in a development environment and data in a production environment. 4 | 5 | You can also run a single scan against different datasets in your data source using custom metrics. 6 | 7 | ## Run a basic scan 8 | 9 | When you run a scan, Soda SQL uses the configurations in your [scan YAML file](/docs/soda-sql/scan-yaml.md) and Soda Cloud monitors to prepare, then run SQL queries against data in your data source. The default tests and metrics Soda SQL configured when it created the YAML file focus on finding missing, invalid, or unexpected data in your datasets. 10 | 11 | Each scan requires the following as input: 12 | - a warehouse YAML file, which represents a connection to your data source 13 | - a scan YAML file, including its filepath, which contains the metric and test instructions that Soda SQL uses to scan datasets in your data source 14 | 15 | #### Example command 16 | ```shell 17 | $ soda scan warehouse.yml tables/demodata.yml 18 | ``` 19 | 20 | ## Scan multiple data sources 21 | 22 | To run the same scan against different data sources, proceed as follows. 23 | 24 | 1. Prepare one [warehouse YAML file](/docs/soda-sql/warehouse.md) for each data source you wish to scan. For example: 25 | * `warehouse_postgres_dev.yml` 26 | ```yaml 27 | name: my_postgres_datawarehouse_dev 28 | connection: 29 | type: postgres 30 | host: localhost 31 | port: '5432' 32 | username: env_var(POSTGRES_USERNAME) 33 | password: env_var(POSTGRES_PASSWORD) 34 | database: dev 35 | schema: public 36 | ``` 37 | * `warehouse_postgres_prod.yml` 38 | ```yaml 39 | name: my_postgres_datawarehouse_prod 40 | connection: 41 | type: postgres 42 | host: dbhost.example.com 43 | port: '5432' 44 | username: env_var(POSTGRES_USERNAME) 45 | password: env_var(POSTGRES_PASSWORD) 46 | database: prod 47 | schema: public 48 | ``` 49 | 2. Prepare a [scan YAML file](/docs/soda-sql/scan-yaml.md) to define all the tests you wish to run against your data sources. See [Define tests](/docs/soda-sql/tests.md) for details. 50 | 3. Run separate Soda SQL scans against each data source by specifying which warehouse YAML to scan and using the same scan YAML file. For example: 51 | ```shell 52 | soda scan warehouse_postgres_dev.yml tables/my_dataset_scan.yml 53 | soda scan warehouse_postgres_prod.yml tables/my_dataset_scan.yml 54 | ``` 55 | 56 | ## Scan multiple datasets 57 | 58 | Use a single scan YAML file to run tests on different datasets in your data source. 59 | 60 | Prepare one [scan YAML file](/docs/soda-sql/scan-yaml.md) to define the tests you wish to apply against multiple datasets. Use custom metrics to write SQL queries and subqueries that run against multiple datasets. When you run a scan, Soda SQL uses your SQL queries to query data in the datasets you specified in your scan YAML file. 61 | -------------------------------------------------------------------------------- /examples/airflow/airflow_bash.py: -------------------------------------------------------------------------------- 1 | # Soda scan using BashOperator 2 | # The simplest way is to install Soda SQL in the same environment 3 | # as your Airflow and invoke `soda scan` using Airflow BashOperator. 4 | # 5 | # When there are test failures in soda scan, the exit code will be non-zero. 6 | # 7 | # In this DAG, `soda_sql_scan_demodata` task will fail when the tests you 8 | # defined for `demodata` table fails. This will prevent the `publish_data_op` from 9 | # running. You can further customize the bash command to use different soda scan command 10 | # options, for example passing variables to `soda scan` command. 11 | 12 | from airflow import DAG 13 | from airflow.models.variable import Variable 14 | from airflow.operators.bash import BashOperator 15 | from airflow.operators.dummy import DummyOperator 16 | from airflow.utils.dates import days_ago 17 | from datetime import timedelta 18 | 19 | # Use the same variable name that you used in airflow variable creation 20 | soda_sql_project_path = Variable.get('soda_sql_project_path') 21 | 22 | default_args = { 23 | 'owner': 'soda_sql', 24 | 'retries': 1, 25 | 'retry_delay': timedelta(minutes=5), 26 | } 27 | 28 | dag = DAG( 29 | 'soda_sql_scan', 30 | default_args=default_args, 31 | description='A simple Soda SQL scan DAG', 32 | schedule_interval=timedelta(days=1), 33 | start_date=days_ago(1), 34 | ) 35 | # A dummy operator to simulate data ingestion 36 | ingest_data_op = DummyOperator( 37 | task_id='ingest_data' 38 | ) 39 | 40 | # Soda SQL Scan which will run the appropriate table scan for the ingestion 41 | soda_sql_scan_op = BashOperator( 42 | task_id='soda_sql_scan_demodata', 43 | bash_command=f'cd {soda_sql_project_path} && soda scan warehouse.yml tables/demodata.yml', 44 | dag=dag 45 | ) 46 | 47 | # A dummy operator to simulate data publication when the Soda SQL Scan task is successful 48 | publish_data_op = DummyOperator( 49 | task_id='publish_data' 50 | ) 51 | 52 | ingest_data_op >> soda_sql_scan_op >> publish_data_op 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /examples/airflow/airflow_bash_venv.py: -------------------------------------------------------------------------------- 1 | # Using your favorite virtualenv management tool create a venv, we will use 2 | # `virtualenv` command as example. Please review soda-sql installation 3 | # requirements to use tested python version for creating the virtualenv. 4 | # 5 | # create a new virtualenv in a convenient location 6 | # `virtualenv .sodavenv && .sodavenv/bin/pip install soda-sql` 7 | # 8 | # Now you can modify the BashOperator in the above DAG as follows: 9 | # 10 | 11 | from airflow import DAG 12 | from airflow.models.variable import Variable 13 | from airflow.operators.bash import BashOperator 14 | from airflow.operators.dummy import DummyOperator 15 | from airflow.utils.dates import days_ago 16 | from datetime import timedelta 17 | 18 | # Use the same variable name that you used in airflow variable creation 19 | soda_sql_project_path = Variable.get('soda_sql_project_path') 20 | 21 | default_args = { 22 | 'owner': 'soda_sql', 23 | 'retries': 1, 24 | 'retry_delay': timedelta(minutes=5), 25 | } 26 | 27 | dag = DAG( 28 | 'soda_sql_scan', 29 | default_args=default_args, 30 | description='A simple Soda SQL scan DAG', 31 | schedule_interval=timedelta(days=1), 32 | start_date=days_ago(1), 33 | ) 34 | # A dummy operator to simulate data ingestion 35 | ingest_data_op = DummyOperator( 36 | task_id='ingest_data' 37 | ) 38 | 39 | # Soda SQL Scan which will run the appropriate table scan for the ingestion 40 | soda_sql_scan_op = BashOperator( 41 | task_id='soda_sql_scan_demodata', 42 | bash_command=f'cd {soda_sql_project_path} && .sodavenv/bin/soda scan warehouse.yml tables/demodata.yml', 43 | dag=dag 44 | ) 45 | 46 | # A dummy operator to simulate data publication when the Soda SQL Scan task is successful 47 | publish_data_op = DummyOperator( 48 | task_id='publish_data' 49 | ) 50 | 51 | ingest_data_op >> soda_sql_scan_op >> publish_data_op 52 | -------------------------------------------------------------------------------- /examples/airflow/airflow_python_op.py: -------------------------------------------------------------------------------- 1 | # 2 | # If you installed Soda SQL in your python environment you can use 3 | # PythonOperator to invoke Soda Scan. The following shows a sample Airflow DAG 4 | # using PythonOperator that you can use as a starting point. 5 | # 6 | 7 | from airflow import DAG 8 | from airflow.models.variable import Variable 9 | from airflow.operators.python import PythonOperator 10 | from airflow.operators.dummy import DummyOperator 11 | from airflow.utils.dates import days_ago 12 | from datetime import timedelta 13 | from sodasql.scan.scan_builder import ScanBuilder 14 | from airflow.exceptions import AirflowFailException 15 | 16 | # Make sure that this variables are set in your Airflow 17 | warehouse_yml = Variable.get('soda_sql_warehouse_yml_path') 18 | scan_yml = Variable.get('soda_sql_scan_yml_path') 19 | 20 | default_args = { 21 | 'owner': 'soda_sql', 22 | 'retries': 1, 23 | 'retry_delay': timedelta(minutes=5), 24 | } 25 | 26 | 27 | def run_soda_scan(warehouse_yml_file, scan_yml_file): 28 | scan_builder = ScanBuilder() 29 | scan_builder.warehouse_yml_file = warehouse_yml_file 30 | scan_builder.scan_yml_file = scan_yml_file 31 | scan = scan_builder.build() 32 | scan_result = scan.execute() 33 | if scan_result.has_test_failures(): 34 | failures = scan_result.get_test_failures_count() 35 | raise AirflowFailException(f"Soda Scan found {failures} errors in your data!") 36 | 37 | 38 | dag = DAG( 39 | 'soda_sql_python_op', 40 | default_args=default_args, 41 | description='A simple Soda SQL scan DAG', 42 | schedule_interval=timedelta(days=1), 43 | start_date=days_ago(1), 44 | ) 45 | 46 | ingest_data_op = DummyOperator( 47 | task_id='ingest_data' 48 | ) 49 | 50 | soda_sql_scan_op = PythonOperator( 51 | task_id='soda_sql_scan_demodata', 52 | python_callable=run_soda_scan, 53 | op_kwargs={'warehouse_yml_file': warehouse_yml, 54 | 'scan_yml_file': scan_yml}, 55 | dag=dag 56 | ) 57 | 58 | publish_data_op = DummyOperator( 59 | task_id='publish_data' 60 | ) 61 | 62 | ingest_data_op >> soda_sql_scan_op >> publish_data_op 63 | 64 | -------------------------------------------------------------------------------- /examples/airflow/airflow_python_venv_op.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.models.variable import Variable 3 | from airflow.operators.python import PythonVirtualenvOperator 4 | from airflow.operators.dummy import DummyOperator 5 | from airflow.utils.dates import days_ago 6 | from datetime import timedelta 7 | 8 | 9 | # Make sure that this variable is set in your Airflow 10 | warehouse_yml = Variable.get('soda_sql_warehouse_yml_path') 11 | scan_yml = Variable.get('soda_sql_scan_yml_path') 12 | 13 | default_args = { 14 | 'owner': 'soda_sql', 15 | 'retries': 1, 16 | 'retry_delay': timedelta(minutes=5), 17 | } 18 | 19 | 20 | def run_soda_scan(warehouse_yml_file, scan_yml_file): 21 | from sodasql.scan.scan_builder import ScanBuilder 22 | scan_builder = ScanBuilder() 23 | # Optionally you can directly build the Warehouse dict from Airflow secrets/variables 24 | # and set scan_builder.warehouse_dict with values. 25 | scan_builder.warehouse_yml_file = warehouse_yml_file 26 | scan_builder.scan_yml_file = scan_yml_file 27 | scan = scan_builder.build() 28 | scan_result = scan.execute() 29 | if scan_result.has_test_failures(): 30 | failures = scan_result.get_test_failures_count() 31 | raise ValueError(f"Soda Scan found {failures} errors in your data!") 32 | 33 | 34 | dag = DAG( 35 | 'soda_sql_python_venv_op', 36 | default_args=default_args, 37 | description='A simple Soda SQL scan DAG', 38 | schedule_interval=timedelta(days=1), 39 | start_date=days_ago(1), 40 | ) 41 | 42 | ingest_data_op = DummyOperator( 43 | task_id='ingest_data' 44 | ) 45 | 46 | soda_sql_scan_op = PythonVirtualenvOperator( 47 | task_id='soda_sql_scan_demodata', 48 | python_callable=run_soda_scan, 49 | requirements=["soda-sql==2.0.0b10"], 50 | system_site_packages=False, 51 | op_kwargs={'warehouse_yml_file': warehouse_yml, 52 | 'scan_yml_file': scan_yml}, 53 | dag=dag 54 | ) 55 | 56 | publish_data_op = DummyOperator( 57 | task_id='publish_data' 58 | ) 59 | 60 | ingest_data_op >> soda_sql_scan_op >> publish_data_op 61 | -------------------------------------------------------------------------------- /examples/aws-lambda/lambda-zip/product.yml: -------------------------------------------------------------------------------- 1 | table_name: product 2 | metrics: 3 | - row_count 4 | tests: 5 | - row_count > 0 6 | -------------------------------------------------------------------------------- /examples/aws-lambda/lambda-zip/soda_lambda.py: -------------------------------------------------------------------------------- 1 | from sodasql.scan.scan_builder import ScanBuilder 2 | from sodasql.__version__ import SODA_SQL_VERSION 3 | 4 | 5 | def lambda_handler(event, context): 6 | print(f'Lambda Handler: Soda SQL Version: {SODA_SQL_VERSION}') 7 | scan_builder = ScanBuilder() 8 | scan_builder.warehouse_yml_dict = { 9 | 'name': 'lambda-demo', 10 | 'connection': { 11 | 'type': 'postgres', 12 | 'host': 'env_var(POSTGRES_URL)', 13 | 'port': '5432', 14 | 'username': 'env_var(POSTGRES_USERNAME)', 15 | 'password': 'env_var(POSTGRES_PASSWORD)', 16 | 'database': 'postgres', 17 | 'schema': 'public' 18 | }, 19 | 'soda_account': { 20 | 'host': 'cloud.soda.io', 21 | 'api_key_id': 'env_var(API_PUBLIC)', 22 | 'api_key_secret': 'env_var(API_PRIVATE)', 23 | } 24 | } 25 | 26 | scan_builder.scan_yml_file = 'product.yml' 27 | scan = scan_builder.build() 28 | scan_result = scan.execute() 29 | 30 | print("Finished: Soda Scan") 31 | print(scan_result.to_dict()) 32 | -------------------------------------------------------------------------------- /examples/spark/warehouse.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sodadata/soda-sql/3cecde90a1111213c692cc48e2c99b4dd07d6b90/examples/spark/warehouse.yml -------------------------------------------------------------------------------- /packages/athena/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-athena" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Amazon Athena" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'PyAthena>=2.2.0, <3.0' 18 | ] 19 | # TODO Fix the params 20 | # TODO Add a warning that installing core doesn't give any warehouse functionality 21 | setup( 22 | name=package_name, 23 | version=package_version, 24 | install_requires=requires, 25 | packages=find_namespace_packages(include=["sodasql*"]), 26 | classifiers=[ 27 | "Development Status :: 5 - Production/Stable", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Operating System :: Microsoft :: Windows", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: POSIX :: Linux", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "Programming Language :: Python :: 3.9", 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /packages/bigquery/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-bigquery" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL BigQuery" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'google-cloud-bigquery>=2.25.0, <3.0' 18 | ] 19 | # TODO Fix the params 20 | # TODO Add a warning that installing core doesn't give any warehouse functionality 21 | setup( 22 | name=package_name, 23 | version=package_version, 24 | install_requires=requires, 25 | packages=find_namespace_packages(include=["sodasql*"]), 26 | classifiers=[ 27 | "Development Status :: 5 - Production/Stable", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Operating System :: Microsoft :: Windows", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: POSIX :: Linux", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "Programming Language :: Python :: 3.9", 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /packages/dbt/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print("Error: Soda SQL requires at least Python 3.7") 7 | print("Error: Please upgrade your Python version to 3.7 or later") 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-dbt" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL DBT" 14 | 15 | requires = [ 16 | f"soda-sql-core=={package_version}", 17 | "dbt-core~=1.0.0", 18 | ] 19 | # TODO Fix the params 20 | # TODO Add a warning that installing core doesn't give any warehouse functionality 21 | setup( 22 | name=package_name, 23 | version=package_version, 24 | install_requires=requires, 25 | packages=find_namespace_packages(include=["sodasql*"]), 26 | classifiers=[ 27 | "Development Status :: 5 - Production/Stable", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Operating System :: Microsoft :: Windows", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: POSIX :: Linux", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "Programming Language :: Python :: 3.9", 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /packages/denodo/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-denodo" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Denodo" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'psycopg2-binary>=2.8.5, <3.0' 18 | ] 19 | # TODO Fix the params 20 | # TODO Add a warning that installing core doesn't give any warehouse functionality 21 | setup( 22 | name=package_name, 23 | version=package_version, 24 | install_requires=requires, 25 | packages=find_namespace_packages(include=["sodasql*"]), 26 | classifiers=[ 27 | "Development Status :: 5 - Production/Stable", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Operating System :: Microsoft :: Windows", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: POSIX :: Linux", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "Programming Language :: Python :: 3.9", 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /packages/hive/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-hive" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Apache Hive" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'PyHive>=0.6.3, <1.0', 18 | 'thrift>=0.13.0, <1.0' 19 | 20 | ] 21 | # TODO Fix the params 22 | # TODO Add a warning that installing core doesn't give any warehouse functionality 23 | setup( 24 | name=package_name, 25 | version=package_version, 26 | install_requires=requires, 27 | packages=find_namespace_packages(include=["sodasql*"]), 28 | classifiers=[ 29 | "Development Status :: 4 - Beta", 30 | "License :: OSI Approved :: Apache Software License", 31 | "Operating System :: Microsoft :: Windows", 32 | "Operating System :: MacOS :: MacOS X", 33 | "Operating System :: POSIX :: Linux", 34 | "Programming Language :: Python :: 3.7", 35 | "Programming Language :: Python :: 3.8", 36 | "Programming Language :: Python :: 3.9", 37 | ] 38 | ) 39 | -------------------------------------------------------------------------------- /packages/mysql/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-mysql" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL MySQL" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'mysql-connector-python>=8.0.26, <9.0' 18 | ] 19 | # TODO Fix the params 20 | setup( 21 | name=package_name, 22 | version=package_version, 23 | install_requires=requires, 24 | packages=find_namespace_packages(include=["sodasql*"]), 25 | classifiers=[ 26 | "Development Status :: 4 - Beta", 27 | "License :: OSI Approved :: Apache Software License", 28 | "Operating System :: Microsoft :: Windows", 29 | "Operating System :: MacOS :: MacOS X", 30 | "Operating System :: POSIX :: Linux", 31 | "Programming Language :: Python :: 3.7", 32 | "Programming Language :: Python :: 3.8", 33 | "Programming Language :: Python :: 3.9", 34 | ] 35 | ) 36 | -------------------------------------------------------------------------------- /packages/postgresql/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-postgresql" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL PostgreSQL" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'psycopg2-binary>=2.8.5, <3.0' 18 | ] 19 | # TODO Fix the params 20 | # TODO Add a warning that installing core doesn't give any warehouse functionality 21 | setup( 22 | name=package_name, 23 | version=package_version, 24 | install_requires=requires, 25 | packages=find_namespace_packages(include=["sodasql*"]), 26 | classifiers=[ 27 | "Development Status :: 5 - Production/Stable", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Operating System :: Microsoft :: Windows", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: POSIX :: Linux", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "Programming Language :: Python :: 3.9", 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /packages/redshift/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-redshift" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Redshift" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | f'soda-sql-postgresql=={package_version}', 18 | 'boto3>=1.15.18, <2.0' 19 | ] 20 | # TODO Fix the params 21 | setup( 22 | name=package_name, 23 | version=package_version, 24 | install_requires=requires, 25 | packages=find_namespace_packages(include=["sodasql*"]), 26 | classifiers=[ 27 | "Development Status :: 5 - Production/Stable", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Operating System :: Microsoft :: Windows", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: POSIX :: Linux", 32 | "Programming Language :: Python :: 3.7", 33 | "Programming Language :: Python :: 3.8", 34 | "Programming Language :: Python :: 3.9", 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /packages/snowflake/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-snowflake" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Snowflake" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'snowflake-connector-python~=2.7', 18 | ] 19 | # TODO Fix the params 20 | setup( 21 | name=package_name, 22 | version=package_version, 23 | install_requires=requires, 24 | packages=find_namespace_packages(include=["sodasql*"]), 25 | classifiers=[ 26 | "Development Status :: 5 - Production/Stable", 27 | "License :: OSI Approved :: Apache Software License", 28 | "Operating System :: Microsoft :: Windows", 29 | "Operating System :: MacOS :: MacOS X", 30 | "Operating System :: POSIX :: Linux", 31 | "Programming Language :: Python :: 3.7", 32 | "Programming Language :: Python :: 3.8", 33 | "Programming Language :: Python :: 3.9", 34 | ] 35 | ) 36 | -------------------------------------------------------------------------------- /packages/spark/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-spark" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Apache Spark" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'pyodbc>=4.0,<5.0', 18 | 'PyHive>=0.6.3, <1.0', 19 | 'thrift>=0.13.0, <1.0', 20 | 'sasl>=0.3.1, <1.0', 21 | 'thrift-sasl>=0.4.3, <1.0', 22 | ] 23 | # TODO Fix the params 24 | # TODO Add a warning that installing core doesn't give any warehouse functionality 25 | setup( 26 | name=package_name, 27 | version=package_version, 28 | install_requires=requires, 29 | packages=find_namespace_packages(include=["sodasql*"]), 30 | classifiers=[ 31 | "Development Status :: 5 - Production/Stable", 32 | "License :: OSI Approved :: Apache Software License", 33 | "Operating System :: Microsoft :: Windows", 34 | "Operating System :: MacOS :: MacOS X", 35 | "Operating System :: POSIX :: Linux", 36 | "Programming Language :: Python :: 3.7", 37 | "Programming Language :: Python :: 3.8", 38 | "Programming Language :: Python :: 3.9", 39 | ] 40 | ) 41 | -------------------------------------------------------------------------------- /packages/sqlserver/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-sqlserver" 11 | package_version = '2.2.2' 12 | # TODO Add proper description 13 | description = "Soda SQL Microsoft SQLServer" 14 | 15 | requires = [ 16 | f'soda-sql-core=={package_version}', 17 | 'pyodbc>=4.0.30, <5.0' 18 | ] 19 | # TODO Fix the params 20 | setup( 21 | name=package_name, 22 | version=package_version, 23 | install_requires=requires, 24 | packages=find_namespace_packages(include=["sodasql*"]), 25 | classifiers=[ 26 | "Development Status :: 4 - Beta", 27 | "License :: OSI Approved :: Apache Software License", 28 | "Operating System :: Microsoft :: Windows", 29 | "Operating System :: MacOS :: MacOS X", 30 | "Operating System :: POSIX :: Linux", 31 | "Programming Language :: Python :: 3.7", 32 | "Programming Language :: Python :: 3.8", 33 | "Programming Language :: Python :: 3.9", 34 | ] 35 | ) 36 | -------------------------------------------------------------------------------- /packages/trino/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from setuptools import setup, find_namespace_packages 4 | 5 | if sys.version_info < (3, 7): 6 | print('Error: Soda SQL requires at least Python 3.7') 7 | print('Error: Please upgrade your Python version to 3.7 or later') 8 | sys.exit(1) 9 | 10 | package_name = "soda-sql-trino" 11 | package_version = '2.2.2' 12 | description = "Soda SQL Trino" 13 | 14 | requires = [ 15 | f'soda-sql-core=={package_version}', 16 | 'trino>=0.305.0' 17 | ] 18 | 19 | setup( 20 | name=package_name, 21 | version=package_version, 22 | install_requires=requires, 23 | packages=find_namespace_packages(include=["sodasql*"]), 24 | classifiers=[ 25 | "Development Status :: 4 - Beta", 26 | "License :: OSI Approved :: Apache Software License", 27 | "Operating System :: Microsoft :: Windows", 28 | "Operating System :: MacOS :: MacOS X", 29 | "Operating System :: POSIX :: Linux", 30 | "Programming Language :: Python :: 3.7", 31 | "Programming Language :: Python :: 3.8", 32 | "Programming Language :: Python :: 3.9", 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | junit_family=xunit2 3 | python_files = *_suite.py test_*.py 4 | python_classes = Suite Test 5 | log_level = DEBUG 6 | addopts = -v 7 | norecursedirs = examples/* build/* 8 | 9 | -------------------------------------------------------------------------------- /reports/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Reports 5 | 6 | 7 | 8 |

Reports

9 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ./core 2 | ./packages/athena 3 | ./packages/dbt 4 | ./packages/denodo 5 | ./packages/bigquery 6 | ./packages/hive 7 | ./packages/postgresql 8 | ./packages/mysql 9 | ./packages/redshift 10 | ./packages/snowflake 11 | ./packages/sqlserver 12 | ./packages/spark 13 | ./packages/trino 14 | -------------------------------------------------------------------------------- /scripts/build_for_arm_full.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker compose -f docker-compose-arm.yml -p soda-sql build soda 3 | -------------------------------------------------------------------------------- /scripts/clean-install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | rm -fr build 3 | rm -fr dist 4 | rm -fr soda_sql_core.egg-info 5 | pip uninstall -y soda-sql-core 6 | python setup.py sdist 7 | pip install . 8 | -------------------------------------------------------------------------------- /scripts/demo.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | . .venv/bin/activate 13 | 14 | # print_demodata_sql_script.py prints demodata sql script on the console 15 | # The demodata sql script is stored as a file 16 | python tests/demo/print_demodata_sql_script.py > ~/.soda/demodata.sql 17 | 18 | # The demodata sql file is executed on the local postgres db 19 | psql -h localhost -U sodasql -d sodasql -a -f ~/.soda/demodata.sql 20 | 21 | # Runs the scans on the demodata in postgres 22 | python tests/demo/run_demo_scans.py 23 | -------------------------------------------------------------------------------- /scripts/install_soda_cli.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run this from the root project dir with . scripts/run_all_tests.sh 4 | 5 | . .venv/bin/activate 6 | 7 | pip install . 8 | -------------------------------------------------------------------------------- /scripts/publish_package.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # A hacky script to publish all packages to testpypi 3 | pushd . 4 | cd core 5 | echo "Publishing core" 6 | rm -fr dist 7 | python setup.py sdist 8 | twine upload --repository testpypi dist/* 9 | popd 10 | 11 | cd packages 12 | for pack in * 13 | do 14 | echo "Publishing ${pack}" 15 | pushd . 16 | cd $pack 17 | rm -fr dist 18 | python setup.py sdist 19 | twine upload --repository testpypi dist/* 20 | popd 21 | done 22 | -------------------------------------------------------------------------------- /scripts/recreate_venv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Run this from the root project dir with scripts/recreate_venv.sh 6 | 7 | VENV_DIR=.venv 8 | 9 | rm -rf $VENV_DIR 10 | rm -rf soda_sql.egg-info 11 | 12 | python3 -m venv $VENV_DIR 13 | source $VENV_DIR/bin/activate 14 | pip install --upgrade pip 15 | pip install "$(cat dev-requirements.in | grep pip-tools)" 16 | pip-compile dev-requirements.in 17 | pip install -r dev-requirements.txt 18 | pip install -r requirements.txt 19 | -------------------------------------------------------------------------------- /scripts/run_scan_on_arm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker compose -f docker-compose-arm.yml -p soda-sql run soda scan "$@" 3 | -------------------------------------------------------------------------------- /scripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run this from the root project dir with scripts/run_all_tests.sh 4 | 5 | . .venv/bin/activate 6 | 7 | python -m pytest tests/local "$@" 8 | -------------------------------------------------------------------------------- /scripts/show_release_tags.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | git tag -n 4 | -------------------------------------------------------------------------------- /scripts/start_postgres_container.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run this from the root project dir with scripts/start_postgres_container.sh 4 | 5 | # POSTGRES_DATA_DIR=./tests/postgres_container/.postgres 6 | # if [ -d "$POSTGRES_DATA_DIR" ]; then 7 | # rm -rf "$POSTGRES_DATA_DIR" 8 | # fi 9 | 10 | ( cd tests/postgres_container ; docker-compose up ) 11 | -------------------------------------------------------------------------------- /scripts/start_spark_container.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # run this from the root project dir with scripts/start_spark_container.sh 4 | 5 | ( cd tests/spark_container ; docker-compose up ) 6 | -------------------------------------------------------------------------------- /tbump.toml: -------------------------------------------------------------------------------- 1 | [version] 2 | current = "2.2.2" 3 | 4 | regex = ''' 5 | (?P\d+)\.(?P\d+)\.(?P\d+)((?P[a-z]+)(?P\d+))? 6 | ''' 7 | 8 | [git] 9 | message_template = "Bump to {new_version}" 10 | tag_template = "v{new_version}" 11 | 12 | [[file]] 13 | src = "core/setup.py" 14 | search = "package_version = '{current_version}'" 15 | 16 | [[file]] 17 | src = "core/sodasql/__version__.py" 18 | search = "SODA_SQL_VERSION = '{current_version}'" 19 | 20 | [[file]] 21 | src = "packages/athena/setup.py" 22 | search = "package_version = '{current_version}'" 23 | 24 | [[file]] 25 | src = "packages/bigquery/setup.py" 26 | search = "package_version = '{current_version}'" 27 | 28 | [[file]] 29 | src = "packages/hive/setup.py" 30 | search = "package_version = '{current_version}'" 31 | 32 | [[file]] 33 | src = "packages/postgresql/setup.py" 34 | search = "package_version = '{current_version}'" 35 | 36 | [[file]] 37 | src = "packages/redshift/setup.py" 38 | search = "package_version = '{current_version}'" 39 | 40 | [[file]] 41 | src = "packages/snowflake/setup.py" 42 | search = "package_version = '{current_version}'" 43 | 44 | [[file]] 45 | src = "packages/sqlserver/setup.py" 46 | search = "package_version = '{current_version}'" 47 | 48 | [[file]] 49 | src = "packages/mysql/setup.py" 50 | search = "package_version = '{current_version}'" 51 | 52 | [[file]] 53 | src = "packages/spark/setup.py" 54 | search = "package_version = '{current_version}'" 55 | 56 | [[file]] 57 | src = "packages/dbt/setup.py" 58 | search = "package_version = '{current_version}'" 59 | 60 | [[file]] 61 | src = "packages/trino/setup.py" 62 | search = "package_version = '{current_version}'" 63 | 64 | [[file]] 65 | src = "packages/denodo/setup.py" 66 | search = "package_version = '{current_version}'" 67 | -------------------------------------------------------------------------------- /tests/cli/run_cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import sys 12 | import traceback 13 | 14 | from click.testing import CliRunner 15 | 16 | from sodasql.cli.cli import main 17 | 18 | if __name__ == "__main__": 19 | 20 | run_result = None 21 | try: 22 | runner = CliRunner() 23 | runner.file = sys.stdout 24 | run_result = runner.invoke(main, ['scan', '~/Downloads/sql_snowflake_demo/warehouse.yml', '~/Downloads/sql_snowflake_demo/orders.yml']) 25 | except Exception as e: 26 | traceback.print_exc() 27 | if run_result: 28 | print(f'\nConsole:') 29 | print(run_result.output) 30 | print(f'\nExit code {run_result.exit_code}') 31 | -------------------------------------------------------------------------------- /tests/cli/test_cli_commands.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import unittest 12 | import os 13 | from click.testing import CliRunner 14 | from sodasql.cli.cli import create 15 | 16 | 17 | class TestCLICommands(unittest.TestCase): 18 | 19 | def setUp(self): 20 | super().setUp() 21 | self.runner = CliRunner() 22 | 23 | def test_create_athena(self): 24 | result = self.runner.invoke(create, ['athena']) 25 | assert result.exception is None 26 | assert result.exit_code == 0 27 | 28 | def test_create_bigquery(self): 29 | result = self.runner.invoke(create, ['bigquery']) 30 | assert result.exception is None 31 | assert result.exit_code == 0 32 | 33 | def test_create_hive(self): 34 | result = self.runner.invoke(create, ['hive']) 35 | assert result.exception is None 36 | assert result.exit_code == 0 37 | 38 | def test_create_postgres(self): 39 | result = self.runner.invoke(create, ['postgres']) 40 | assert result.exception is None 41 | assert result.exit_code == 0 42 | 43 | def test_create_redshift(self): 44 | result = self.runner.invoke(create, ['redshift']) 45 | assert result.exception is None 46 | assert result.exit_code == 0 47 | 48 | def test_create_snowflake(self): 49 | result = self.runner.invoke(create, ['snowflake']) 50 | assert result.exception is None 51 | assert result.exit_code == 0 52 | 53 | def test_create_sqlserver(self): 54 | result = self.runner.invoke(create, ['sqlserver']) 55 | assert result.exception is None 56 | assert result.exit_code == 0 57 | 58 | def tearDown(self): 59 | try: 60 | os.remove('warehouse.yml') 61 | except OSError: 62 | pass 63 | -------------------------------------------------------------------------------- /tests/cli/test_soda_server_interaction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from datetime import datetime 12 | 13 | from sodasql.scan.scan_yml_parser import ScanYmlParser 14 | from sodasql.soda_server_client.soda_server_client import SodaServerClient 15 | from tests.common.sql_test_case import SqlTestCase 16 | 17 | 18 | class TestSodaServerInteraction(SqlTestCase): 19 | 20 | def test_soda_server_client(self): 21 | self.sql_recreate_table( 22 | [f"name {self.dialect.data_type_integer}"], 23 | ["(1)", 24 | "(2)", 25 | "(3)", 26 | "(4)", 27 | "(null)"]) 28 | 29 | scan_yml_dict = { 30 | 'table_name': self.default_test_table_name, 31 | # 'samples': { 32 | # 'table_limit': 10, 33 | # 'failed_limit': 5 34 | # }, 35 | 'metric_groups': [ 36 | 'missing', 37 | 'validity' 38 | ], 39 | 'tests': [ 40 | 'row_count > 0' 41 | ], 42 | 'columns': { 43 | 'name': { 44 | 'valid_max': 2, 45 | 'tests': [ 46 | 'missing_count < 1', 47 | ] 48 | } 49 | }, 50 | 'sql_metrics': [{ 51 | 'name': 'big_names', 52 | 'sql': f'SELECT * from {self.default_test_table_name} WHERE name > 3', 53 | 'type': 'failed_rows' 54 | }] 55 | } 56 | 57 | scan_configuration_parser = ScanYmlParser(scan_yml_dict, 'test-scan') 58 | scan_configuration_parser.assert_no_warnings_or_errors() 59 | 60 | soda_server_client = SodaServerClient( 61 | host='localhost', 62 | port='5000', 63 | protocol='http', 64 | api_key_id='testapikeyid', 65 | api_key_secret='testapikeysecret' 66 | ) 67 | 68 | scan = self.warehouse.create_scan(scan_yml=scan_configuration_parser.scan_yml, 69 | soda_server_client=soda_server_client, 70 | time=datetime.now().isoformat(timespec='seconds')) 71 | scan.close_warehouse = False 72 | scan_result = scan.execute() 73 | 74 | # if scan_result.has_failures(): 75 | # raise RuntimeError(f'Scan failed: {scan_result.get_failures_message()}') 76 | -------------------------------------------------------------------------------- /tests/common/boto3_helper.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | class Boto3Helper: 5 | 6 | @classmethod 7 | def filter_false_positive_boto3_warning(cls): 8 | # see 9 | # https://github.com/boto/boto3/issues/454#issuecomment-380900404 10 | warnings.filterwarnings("ignore", category=ResourceWarning, message='unclosed str: 24 | pass 25 | 26 | def sql_tables_metadata_query(self, limit: Optional[int] = None, filter: str = None): 27 | pass 28 | 29 | def is_text(self, column_type: str): 30 | pass 31 | 32 | def is_number(self, column_type: str): 33 | pass 34 | 35 | def is_time(self, column_type: str): 36 | pass 37 | -------------------------------------------------------------------------------- /tests/common/mock_soda_server_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from sodasql.soda_server_client.soda_server_client import SodaServerClient 4 | 5 | 6 | class MockSodaServerClient(SodaServerClient): 7 | 8 | # noinspection PyMissingConstructor 9 | def __init__(self): 10 | self.requests = [] 11 | self.commands = [] 12 | self.host = 'MockSodaServerClient' 13 | self.token = 'mocktoken' 14 | self.file_uploads = {} 15 | 16 | def execute_command(self, command: dict): 17 | # Serializing is important as it ensures no exceptions occur during serialization 18 | json.dumps(command, indent=2) 19 | # Still we use the unserialized version to check the results as that is easier 20 | self.commands.append(command) 21 | 22 | if command['type'] == 'sodaSqlScanStart': 23 | return {'scanReference': 'scanref-123'} 24 | 25 | if command['type'] == 'sodaSqlCustomMetrics': 26 | return [{ 27 | 'id': 'f255b6af-f2ad-485c-8222-416ccbe4b6e2', 28 | 'type': 'missingValuesCount', 29 | 'columnName': 'id', 30 | 'datasetId': '901d99c4-2dfe-43f9-acf3-f0344fc690a0', 31 | 'filter': { 32 | 'type': 'equals', 33 | 'left': { 34 | 'type': 'columnValue', 35 | 'columnName': 'date' 36 | }, 37 | 'right': { 38 | 'type': 'time', 39 | 'scanTime': True 40 | } 41 | }, 42 | 'custom': True 43 | }] 44 | 45 | def execute_query(self, command: dict): 46 | if command['type'] == 'sodaSqlCustomMetrics': 47 | return [{ 48 | 'id': 'f255b6af-f2ad-485c-8222-416ccbe4b6e2', 49 | 'type': 'missingValuesCount', 50 | 'columnName': 'id', 51 | 'datasetId': '901d99c4-2dfe-43f9-acf3-f0344fc690a0', 52 | 'filter': { 53 | 54 | "type": "greaterThanOrEqual", 55 | "left": { 56 | "type": "columnValue", 57 | "columnName": "date" 58 | }, 59 | "right": { 60 | "type": "time", 61 | "scanTime": True 62 | } 63 | }, 64 | 65 | 'custom': True 66 | }] 67 | 68 | raise RuntimeError(f"{command['type']} is not supported yet") 69 | 70 | def _upload_file(self, headers, temp_file): 71 | file_id = f'file-{str(len(self.file_uploads))}' 72 | data = temp_file.read().decode("utf-8") 73 | self.file_uploads[file_id] = {'headers': headers, 'data': data} 74 | temp_file.close() 75 | return {'fileId': file_id} 76 | -------------------------------------------------------------------------------- /tests/common/telemetry_helper.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Union 2 | from functools import wraps 3 | 4 | from sodasql.telemetry.soda_telemetry import MemorySpanExporter 5 | 6 | telemetry_exporter = MemorySpanExporter.get_instance() 7 | 8 | 9 | def telemetry_ensure_no_secrets(*o_args, **o_kwargs): 10 | default_secret_keys = ["secret", "password"] 11 | default_secret_values = ["secret", "password", "sodasql"] 12 | 13 | def iteritems_recursive(collection: Union[Dict, List, Tuple]): 14 | """Iterates over provided collection and visits every key 15 | 16 | Some magic is present: 17 | - tuples and lists are treated as dicts, with numeric indexes added for simplicity 18 | - "collection" value is yielded when value is not a simple type - this is so that the key in such case is not missed. 19 | """ 20 | if isinstance(collection, dict): 21 | items = collection 22 | elif isinstance(collection, tuple) or isinstance(collection, list): 23 | items = {i: collection[i] for i in range(0, len(collection))} 24 | 25 | for key, value in items.items(): 26 | if isinstance(value, dict) or isinstance(value, tuple) or isinstance(value, list): 27 | yield key, "collection" 28 | yield from iteritems_recursive(value) 29 | else: 30 | yield key,value 31 | 32 | 33 | def decorate(func): 34 | @wraps(func) 35 | def wrapper(*args, **kwargs): 36 | telemetry_exporter.reset() 37 | result = func(*args, **kwargs) 38 | 39 | secret_keys = o_kwargs.get("secret_keys", default_secret_keys) 40 | secret_values = o_kwargs.get("secret_values", default_secret_values) 41 | 42 | for span in telemetry_exporter.span_dicts: 43 | for key, value in iteritems_recursive(span): 44 | error_msg = f"Forbidden telemetry key:value pair '{key}:{value}'." 45 | assert key not in secret_keys, error_msg 46 | assert value not in secret_values, error_msg 47 | return result 48 | return wrapper 49 | return decorate 50 | -------------------------------------------------------------------------------- /tests/common/validity_test_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.local.warehouse.validity.test_date_and_time_validity_formats import TestDateAndTimeValidityFormats 12 | from tests.local.warehouse.validity.test_network_validity_formats import TestNetworkValidityFormats 13 | from tests.local.warehouse.validity.test_number_validity_formats import TestNumberValidityFormats 14 | from tests.local.warehouse.validity.test_user_info_validity_formats import TestPersonalInfoValidityFormats 15 | 16 | 17 | class ValidityTestSuite( 18 | TestNumberValidityFormats, 19 | TestNetworkValidityFormats, 20 | TestDateAndTimeValidityFormats, 21 | TestPersonalInfoValidityFormats 22 | ): 23 | pass 24 | -------------------------------------------------------------------------------- /tests/common/yaml_helper_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from sodasql.common.yaml_helper import YamlHelper 3 | 4 | 5 | class YamlHelperTest(unittest.TestCase): 6 | def test_valid_numeric_value(self): 7 | value = YamlHelper.validate_numeric_value("column_name", "key", 2) 8 | self.assertEqual(value, 2) 9 | 10 | def test_valid_array_value(self): 11 | value = YamlHelper.validate_list_value("column_name", "key", [1, 2, 3]) 12 | self.assertEqual(value, [1, 2, 3]) 13 | 14 | def test_invalid_numeric_value(self): 15 | value = YamlHelper.validate_numeric_value("column_name", "key", None) 16 | self.assertEqual(value, None) 17 | 18 | def test_invalid_array_value(self): 19 | value = YamlHelper.validate_list_value("column_name", "key", None) 20 | self.assertEqual(value, None) 21 | 22 | 23 | 24 | if __name__ == '__main__': 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /tests/demo/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:9.6-alpine 2 | 3 | EXPOSE 5432 4 | 5 | ENV POSTGRES_USER=sodasql 6 | ENV POSTGRES_DB=sodasql 7 | ENV POSTGRES_HOST_AUTH_METHOD=trust 8 | 9 | COPY ./demodata.sql /docker-entrypoint-initdb.d/ 10 | 11 | CMD ["postgres"] 12 | -------------------------------------------------------------------------------- /tests/demo/run_demo_scans.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from datetime import timedelta, datetime 12 | 13 | from sodasql.scan.scan import Scan 14 | from sodasql.scan.scan_yml_parser import ScanYmlParser, KEY_METRICS, KEY_COLUMNS 15 | from sodasql.scan.warehouse import Warehouse 16 | from sodasql.scan.warehouse_yml import WarehouseYml 17 | from tests.common.sql_test_case import SqlTestCase 18 | 19 | scan_configuration_dict = { 20 | 'table_name': 'demodata', 21 | 'filter': "date = DATE '{{ date }}'", 22 | KEY_METRICS: [ 23 | 'missing', 24 | 'validity', 25 | 'min', 26 | 'max', 27 | 'avg', 28 | 'sum', 29 | 'min_length', 30 | 'max_length', 31 | 'avg_length'], 32 | KEY_COLUMNS: { 33 | 'ID': { 34 | KEY_METRICS: [ 35 | 'distinct', 36 | 'uniqueness'], 37 | 'tests': { 38 | 'nomissing': 'missing_percentage < 3.0', 39 | 'noinvalid': 'invalid_count == 0' 40 | } 41 | } 42 | } 43 | } 44 | 45 | scan_configuration_parser = ScanYmlParser(scan_configuration_dict, 'demodata-scan') 46 | scan_configuration_parser.assert_no_warnings_or_errors() 47 | 48 | dialect = SqlTestCase.create_dialect('postgres') 49 | warehouse_yml = WarehouseYml(dialect=dialect) 50 | warehouse = Warehouse(warehouse_yml) 51 | 52 | row = warehouse.sql_fetchone( 53 | 'SELECT MIN(date), MAX(date) FROM demodata' 54 | ) 55 | min_date = row[0] 56 | max_date = row[1] 57 | 58 | scan_results = [] 59 | 60 | date = min_date 61 | while date != max_date: 62 | timeslice = datetime(year=date.year, month=date.month, day=date.day).isoformat() 63 | variables = {'date': date.strftime("%Y-%m-%d")} 64 | scan = Scan(warehouse=warehouse, 65 | scan_yml=scan_configuration_parser.scan_yml, 66 | variables=variables, 67 | time=timeslice) 68 | scan_results.append(scan.execute()) 69 | date = date + timedelta(days=1) 70 | 71 | print() 72 | print('Summary:') 73 | for scan_result in scan_results: 74 | print(f'Scan results:') 75 | print(f' Measurements: {len(scan_result.measurements)}') 76 | print(f' Test results: {len(scan_result.test_results)} of which {scan_result.get_test_failures_count()} failed') 77 | -------------------------------------------------------------------------------- /tests/example_userhome_env_vars.yml: -------------------------------------------------------------------------------- 1 | # Copy this example credentials file to your ~/.soda/env_vars.yml 2 | # and update the credentials 3 | 4 | test: 5 | # To execute tests in local/sql on another warehouse than postgres use eg 6 | #SODA_TEST_TARGET: hive 7 | SODA_ATHENA_ACCESS_KEY_ID: xxx 8 | SODA_ATHENA_SECRET_ACCESS_KEY: xxx 9 | SODA_REDSHIFT_USERNAME: xxx 10 | SODA_REDSHIFT_PASSWORD: xxx 11 | SODA_SNOWFLAKE_USERNAME: xxx 12 | SODA_SNOWFLAKE_PASSWORD: xxx 13 | SODA_HIVE_USERNAME: xxx 14 | SODA_HIVE_PASSWORD: xxx 15 | 16 | -------------------------------------------------------------------------------- /tests/local/independent/test_date_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from datetime import datetime, timezone 12 | from unittest import TestCase 13 | 14 | 15 | class TestDateParser(TestCase): 16 | 17 | def test_default_date(self): 18 | default_date = datetime.now(tz=timezone.utc).isoformat(timespec='seconds') 19 | self.assertTrue(self.datetime_valid(default_date)) 20 | 21 | def test_is_valid_iso_8601_date(self): 22 | compliant_date = "2021-04-15T09:00:00+02:00" 23 | self.assertTrue(self.datetime_valid(compliant_date)) 24 | 25 | compliant_date_2 = "2021-04-15T09:00:00+00:00" 26 | self.assertTrue(self.datetime_valid(compliant_date_2)) 27 | 28 | def test_is_not_valid_iso_8601_date(self): 29 | self.datetime_valid("2021-04-15T09:00:00+0200") 30 | self.assertRaises(ValueError) 31 | 32 | @staticmethod 33 | def datetime_valid(date: str): 34 | try: 35 | datetime.fromisoformat(date) 36 | except Exception as e: 37 | return False 38 | return True 39 | -------------------------------------------------------------------------------- /tests/local/independent/test_json_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import datetime 12 | from unittest import TestCase 13 | 14 | from sodasql.common.json_helper import JsonHelper 15 | 16 | 17 | class TestJsonHelper(TestCase): 18 | 19 | def test_jsonize_date(self): 20 | self.assertEqual(JsonHelper.to_jsonnable(datetime.date(2021, 1, 2)), '2021-01-02') 21 | 22 | def test_jsonize_datetime(self): 23 | self.assertEqual(JsonHelper.to_jsonnable(datetime.datetime(2021, 1, 2, 10, 5, 23)), '2021-01-02T10:05:23') 24 | 25 | def test_jsonize_time(self): 26 | self.assertEqual(JsonHelper.to_jsonnable(datetime.time(10, 5, 23)), '10:05:23') 27 | -------------------------------------------------------------------------------- /tests/local/independent/test_scan_configuration_validation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from unittest import TestCase 13 | 14 | from sodasql.scan.parser import ERROR, WARNING 15 | from sodasql.scan.scan_yml_parser import ScanYmlParser, KEY_METRICS, KEY_COLUMNS, KEY_TABLE_NAME 16 | 17 | 18 | class TestScanConfigurationValidation(TestCase): 19 | 20 | def test_table_name_required(self): 21 | parser = ScanYmlParser({}, 'Test scan') 22 | log = parser.logs[0] 23 | self.assertIn(ERROR, log.level) 24 | self.assertIn('table_name', log.message) 25 | self.assertIn('does not exist', log.message) 26 | 27 | def test_metrics_not_a_list(self): 28 | parser = ScanYmlParser({ 29 | KEY_TABLE_NAME: 't', 30 | KEY_METRICS: 'txt' 31 | }, 'Test scan') 32 | 33 | log = parser.logs[0] 34 | self.assertIn(ERROR, log.level) 35 | self.assertIn('Invalid metrics', log.message) 36 | self.assertIn('list', log.message) 37 | self.assertIn('str', log.message) 38 | 39 | def test_invalid_column_metric(self): 40 | parser = ScanYmlParser({ 41 | KEY_TABLE_NAME: 't', 42 | KEY_METRICS: [ 43 | 'revenue' 44 | ] 45 | }, 'Test scan') 46 | 47 | log = parser.logs[0] 48 | self.assertIn(WARNING, log.level) 49 | self.assertIn('Invalid key', log.message) 50 | self.assertIn('metrics', log.message) 51 | self.assertIn('revenue', log.message) 52 | 53 | def test_invalid_valid_format(self): 54 | parser = ScanYmlParser({ 55 | KEY_TABLE_NAME: 't', 56 | KEY_COLUMNS: { 57 | 'col': { 58 | 'valid_format': 'buzz' 59 | } 60 | } 61 | }, 'Test scan') 62 | 63 | log = parser.logs[0] 64 | self.assertIn(WARNING, log.level) 65 | self.assertIn('Invalid', log.message) 66 | self.assertIn('valid_format', log.message) 67 | self.assertIn('buzz', log.message) 68 | -------------------------------------------------------------------------------- /tests/local/independent/test_soda_server_client_configuration.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from unittest import TestCase 12 | 13 | from sodasql.scan.scan_builder import ScanBuilder 14 | from sodasql.scan.scan_yml_parser import KEY_TABLE_NAME 15 | from sodasql.scan.warehouse_yml_parser import KEY_NAME, KEY_SODA_ACCOUNT, \ 16 | SODA_KEY_HOST, SODA_KEY_API_KEY_ID, SODA_KEY_API_KEY_SECRET, KEY_CONNECTION 17 | 18 | 19 | class TestSodaServerClientConfiguration(TestCase): 20 | 21 | def test_soda_server_client_configuration(self): 22 | scan_builder = ScanBuilder() 23 | scan_builder.warehouse_yml_dict = { 24 | KEY_NAME: 'Test warehouse', 25 | KEY_CONNECTION: { 26 | }, 27 | KEY_SODA_ACCOUNT: { 28 | SODA_KEY_HOST: 'mycloud.soda.io', 29 | SODA_KEY_API_KEY_ID: 'mykeyid', 30 | SODA_KEY_API_KEY_SECRET: 'mykeysecret' 31 | } 32 | } 33 | scan_builder.scan_yml_dict = { 34 | KEY_TABLE_NAME: 't' 35 | } 36 | 37 | scan_builder._build_warehouse_yml() 38 | scan_builder._create_soda_server_client() 39 | 40 | self.assertIsNotNone(scan_builder.soda_server_client) 41 | self.assertEqual(scan_builder.soda_server_client.host, 'mycloud.soda.io') 42 | self.assertEqual(scan_builder.soda_server_client.api_key_id, 'mykeyid') 43 | self.assertEqual(scan_builder.soda_server_client.api_key_secret, 'mykeysecret') 44 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_default_metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.metric import Metric 13 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_TESTS 14 | from tests.common.sql_test_case import SqlTestCase 15 | from decimal import * 16 | 17 | 18 | class TestDefaultMetrics(SqlTestCase): 19 | 20 | def test_default_generated_metrics(self): 21 | self.sql_recreate_table( 22 | [f"score {self.dialect.data_type_varchar_255}", 23 | f"score_int {self.dialect.data_type_integer}"], 24 | ["('1', 1)", 25 | "('2', 2)", 26 | "('2', 3)", 27 | "('3', 4)", 28 | "('3', 5)", 29 | "('3', 6)", 30 | "('3', 6)", 31 | "('3', 7)", 32 | "('4', 8)", 33 | "('4', 9)", 34 | "('5', 1)", 35 | "(null, null)"]) 36 | 37 | scan_result = self.scan({ 38 | KEY_METRICS: [ 39 | Metric.ROW_COUNT, 40 | Metric.MISSING_COUNT, 41 | Metric.MISSING_PERCENTAGE, 42 | Metric.VALUES_COUNT, 43 | Metric.VALUES_PERCENTAGE, 44 | Metric.INVALID_COUNT, 45 | Metric.INVALID_PERCENTAGE, 46 | Metric.VALID_COUNT, 47 | Metric.VALID_PERCENTAGE, 48 | Metric.AVG_LENGTH, 49 | Metric.MAX_LENGTH, 50 | Metric.MIN_LENGTH, 51 | Metric.AVG, 52 | Metric.MAX, 53 | Metric.MIN, 54 | Metric.STDDEV, 55 | Metric.VARIANCE 56 | ] 57 | }) 58 | 59 | with self.assertRaises(AssertionError): 60 | scan_result.get(Metric.SUM, 'score') 61 | with self.assertRaises(AssertionError): 62 | scan_result.get(Metric.SUM, 'score_int') 63 | 64 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_distinct_and_uniqueness.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.metric import Metric 13 | from sodasql.scan.scan_yml_parser import KEY_METRICS 14 | from tests.common.sql_test_case import SqlTestCase 15 | 16 | 17 | class TestDistinctAndUniqueness(SqlTestCase): 18 | 19 | def test_distinct(self): 20 | self.sql_recreate_table( 21 | [f"score {self.dialect.data_type_varchar_255}"], 22 | ["('1')", 23 | "('2')", 24 | "('2')", 25 | "('3')", 26 | "('3')", 27 | "('3')", 28 | "('3')", 29 | "('3')", 30 | "('4')", 31 | "('4')", 32 | "('5')", 33 | "(null)"]) 34 | 35 | scan_result = self.scan({ 36 | KEY_METRICS: [ 37 | 'distinct' 38 | ] 39 | }) 40 | 41 | self.assertEqual(scan_result.get(Metric.DISTINCT, 'score'), 5) 42 | self.assertEqual(scan_result.get(Metric.UNIQUE_COUNT, 'score'), 2) 43 | 44 | # (5 - 1) * 100 / (10 valid values - 1) 45 | self.assertEqual(scan_result.get(Metric.UNIQUENESS, 'score'), 40) 46 | 47 | # values 2, 3 and 4 occur multiple times -> 3 duplicates 48 | self.assertEqual(scan_result.get(Metric.DUPLICATE_COUNT, 'score'), 3) 49 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_frequent_values.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.metric import Metric 13 | from sodasql.scan.scan_yml_parser import KEY_METRICS 14 | from tests.common.sql_test_case import SqlTestCase 15 | 16 | 17 | class TestFrequentValues(SqlTestCase): 18 | 19 | def test_scan_mins_maxs(self): 20 | self.sql_recreate_table( 21 | [f"name {self.dialect.data_type_integer}"], 22 | ["(1)", 23 | "(2)", 24 | "(2)", 25 | "(3)", 26 | "(3)", 27 | "(3)", 28 | "(null)"]) 29 | 30 | scan_result = self.scan({ 31 | KEY_METRICS: [ 32 | Metric.FREQUENT_VALUES 33 | ] 34 | }) 35 | 36 | self.assertEqual(scan_result.get(Metric.FREQUENT_VALUES, 'name'), 37 | [{'frequency': 3, 'value': 3}, 38 | {'frequency': 2, 'value': 2}, 39 | {'frequency': 1, 'value': 1}]) 40 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_histogram_numeric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.metric import Metric 13 | from sodasql.scan.scan_yml_parser import KEY_METRICS 14 | from tests.common.sql_test_case import SqlTestCase 15 | 16 | 17 | class TestHistogramNumeric(SqlTestCase): 18 | 19 | table_name = 'test_table' 20 | 21 | def test_scan_histogram_numeric(self): 22 | self.sql_recreate_table( 23 | [f"size {self.dialect.data_type_integer}"], 24 | ["(1)", 25 | "(11)", 26 | "(11)", 27 | "(11)", 28 | "(11)", 29 | "(16)", 30 | "(17)", 31 | "(18)", 32 | "(20)", 33 | "(20)", 34 | "(null)"]) 35 | 36 | scan_result = self.scan({ 37 | KEY_METRICS: [ 38 | Metric.HISTOGRAM 39 | ] 40 | }) 41 | 42 | histogram = scan_result.get(Metric.HISTOGRAM, 'size') 43 | 44 | self.assertEqual(histogram['frequencies'], 45 | [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 1, 1, 0, 2]) 46 | 47 | self.assertEqual(histogram['boundaries'], 48 | [1.0, 1.95, 2.9, 3.85, 4.8, 5.75, 6.7, 7.65, 8.6, 9.55, 10.5, 49 | 11.45, 12.4, 13.35, 14.3, 15.25, 16.2, 17.15, 18.1, 19.05, 20.0]) 50 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_min_max_length.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import re 12 | 13 | from sodasql.scan.metric import Metric 14 | from sodasql.scan.scan_yml_parser import KEY_METRICS 15 | from tests.common.sql_test_case import SqlTestCase 16 | 17 | 18 | class TestMinMaxLength(SqlTestCase): 19 | 20 | table_name = 'test_table' 21 | 22 | def test_scan_min_max_length(self): 23 | self.sql_recreate_table( 24 | [f"name {self.dialect.data_type_varchar_255}", 25 | f"size {self.dialect.data_type_integer}"], 26 | ["('one', 1)", 27 | "('two', 2)", 28 | "('three', 3)", 29 | "(null, null)"]) 30 | 31 | scan_result = self.scan({ 32 | KEY_METRICS: [ 33 | 'min_length', 34 | 'max_length' 35 | ] 36 | }) 37 | 38 | self.assertEqual(scan_result.get(Metric.MIN_LENGTH, 'name'), 3) 39 | self.assertEqual(scan_result.get(Metric.MAX_LENGTH, 'name'), 5) 40 | 41 | self.assertIsNone(scan_result.find_measurement(Metric.MIN_LENGTH, 'size')) 42 | self.assertIsNone(scan_result.find_measurement(Metric.MAX_LENGTH, 'size')) 43 | 44 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_statistical_metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.metric import Metric 13 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_COLUMNS, COLUMN_KEY_VALID_FORMAT 14 | from tests.common.sql_test_case import SqlTestCase 15 | 16 | 17 | class TestStatisticalMetrics(SqlTestCase): 18 | 19 | def test_scan_statistical_metrics(self): 20 | self.sql_recreate_table( 21 | [f"score {self.dialect.data_type_varchar_255}"], 22 | ["('1')", 23 | "('2')", 24 | "('5')", 25 | "('12')", 26 | "(null)"]) 27 | 28 | scan_result = self.scan({ 29 | KEY_METRICS: [ 30 | Metric.MIN, 31 | Metric.MAX, 32 | Metric.AVG, 33 | Metric.SUM, 34 | Metric.VARIANCE, 35 | Metric.STDDEV 36 | ], 37 | KEY_COLUMNS: { 38 | 'score': { 39 | COLUMN_KEY_VALID_FORMAT: 'number_whole' 40 | } 41 | } 42 | }) 43 | 44 | self.assertEqual(scan_result.get(Metric.MIN, 'score'), 1) 45 | self.assertEqual(scan_result.get(Metric.MAX, 'score'), 12) 46 | self.assertEqual(scan_result.get(Metric.AVG, 'score'), 5) 47 | self.assertEqual(scan_result.get(Metric.SUM, 'score'), 20) 48 | self.assertMeasurementsPresent(scan_result, 'score', [ 49 | Metric.VARIANCE, 50 | Metric.STDDEV 51 | ]) 52 | def test_no_minmax_for_non_numeric_strings(self): 53 | self.sql_recreate_table( 54 | [f"txt {self.dialect.data_type_varchar_255}"], 55 | ["('a')", 56 | "('b')", 57 | "('c')", 58 | "('d')", 59 | "(null)"]) 60 | 61 | scan_result = self.scan({ 62 | KEY_METRICS: [ 63 | Metric.MIN, 64 | Metric.MAX, 65 | Metric.MINS, 66 | Metric.MAXS, 67 | Metric.FREQUENT_VALUES 68 | ] 69 | }) 70 | 71 | self.assertMeasurementsAbsent(scan_result, 'txt', [ 72 | Metric.MIN, 73 | Metric.MAX 74 | ]) 75 | -------------------------------------------------------------------------------- /tests/local/warehouse/metrics/test_valid_values.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.metric import Metric 13 | from sodasql.scan.scan_yml_parser import KEY_METRICS 14 | from tests.common.sql_test_case import SqlTestCase 15 | 16 | 17 | class TestValidValues(SqlTestCase): 18 | 19 | def test_valid_values(self): 20 | self.sql_recreate_table( 21 | [f"name {self.dialect.data_type_varchar_255}", 22 | f"size {self.dialect.data_type_integer}"], 23 | ["('one', -1)", 24 | "('two', 0)", 25 | "('a', 1)", 26 | "('b', 2)", 27 | "('c', null)"]) 28 | 29 | scan_result = self.scan({ 30 | KEY_METRICS: [ 31 | Metric.INVALID_COUNT 32 | ], 33 | 'columns': { 34 | 'name': { 35 | 'valid_values': [ 36 | 'one', 37 | 'two' 38 | ] 39 | }, 40 | 'size': { 41 | 'valid_min': 0 42 | } 43 | } 44 | }) 45 | self.assertEqual(scan_result.get(Metric.INVALID_COUNT, 'name'), 3) 46 | self.assertEqual(scan_result.get(Metric.VALID_COUNT, 'size'), 3) 47 | -------------------------------------------------------------------------------- /tests/local/warehouse/samples/test_tests_sql_metric_failed_rows.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.metric import Metric 12 | from tests.common.sql_test_case import SqlTestCase 13 | 14 | 15 | class TestTestsSqlMetric(SqlTestCase): 16 | 17 | def setUp(self) -> None: 18 | super().setUp() 19 | self.use_mock_soda_server_client() 20 | 21 | self.sql_recreate_table( 22 | [f"country {self.dialect.data_type_varchar_255}", 23 | f"size {self.dialect.data_type_integer}"], 24 | ["('one', 2)", 25 | "('two', 3)", 26 | "('one', 4) ", 27 | "('one', 5)", 28 | "('two', 6)"]) 29 | self.qualified_table_name = self.warehouse.dialect.qualify_table_name(self.default_test_table_name) 30 | 31 | def test_sql_metric_failed_rows(self): 32 | 33 | scan_yml_dict = { 34 | 'sql_metrics': [{ 35 | 'type': 'failed_rows', 36 | 'name': 'large_ones', 37 | 'sql': ( 38 | f"SELECT * \n" 39 | f"FROM {self.qualified_table_name} \n" 40 | f"WHERE country = 'one' and size > 2" 41 | ) 42 | }] 43 | } 44 | 45 | scan_result = self.scan(scan_yml_dict=scan_yml_dict) 46 | self.assertTrue(scan_result.has_test_failures()) 47 | 48 | -------------------------------------------------------------------------------- /tests/local/warehouse/scans/test_columns_exclusion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_EXCLUDED_COLUMNS 13 | from tests.common.sql_test_case import SqlTestCase 14 | 15 | 16 | class TestColumnsExclusion(SqlTestCase): 17 | 18 | def setUp(self) -> None: 19 | super().setUp() 20 | self.sql_recreate_table( 21 | [f"one {self.dialect.data_type_varchar_255}", f"two {self.dialect.data_type_varchar_255}"], 22 | ["('a', 'alpha')", 23 | "('b', 'beta')", 24 | "('c', 'gamma') ", 25 | "('d', 'delta')", 26 | "(null, null)"]) 27 | 28 | def test_scan_result_with_test_error(self): 29 | scan_yml_dict = { 30 | KEY_EXCLUDED_COLUMNS: [ 31 | "two" 32 | ], 33 | KEY_METRICS: [ 34 | 'row_count', 35 | 'missing_count' 36 | ] 37 | 38 | } 39 | scan_result = self.scan(scan_yml_dict) 40 | self.assertIsNotNone(scan_result.get_measurement('missing_count', 'one')) 41 | with self.assertRaises(AssertionError): 42 | scan_result.get_measurement('missing_count', 'two') 43 | -------------------------------------------------------------------------------- /tests/local/warehouse/scans/test_scan_result.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.exceptions.exceptions import ERROR_CODE_TEST_FAILED 12 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_TESTS 13 | from tests.common.sql_test_case import SqlTestCase 14 | 15 | 16 | class TestScanResult(SqlTestCase): 17 | 18 | def setUp(self) -> None: 19 | super().setUp() 20 | self.sql_recreate_table( 21 | [f"name {self.dialect.data_type_varchar_255}"], 22 | ["('one')", 23 | "('two')", 24 | "('three') ", 25 | "('no value')", 26 | "(null)"]) 27 | 28 | def test_scan_result_with_test_error(self): 29 | scan_yml_dict = { 30 | KEY_METRICS: [ 31 | 'row_count' 32 | ], 33 | KEY_TESTS: [ 34 | '10 < error < 20' 35 | ] 36 | } 37 | scan_result = self.scan(scan_yml_dict) 38 | self.assertTrue(scan_result.has_test_failures()) 39 | self.assertEqual(len(scan_result.errors), 1) 40 | errors = scan_result.errors[0].to_dict() 41 | self.assertEqual(errors['type'], 'test_execution_error') 42 | self.assertIsNotNone(errors['message']) 43 | 44 | def test_scan_result_with_test_errors(self): 45 | scan_yml_dict = { 46 | KEY_METRICS: [ 47 | 'row_count' 48 | ], 49 | KEY_TESTS: [ 50 | '10 < error < 20', 51 | 'lskdfj(lkj)', 52 | ] 53 | } 54 | scan_result = self.scan(scan_yml_dict) 55 | self.assertTrue(scan_result.has_test_failures()) 56 | self.assertEqual(len(scan_result.errors), 2) 57 | 58 | errors = scan_result.errors[0].to_dict() 59 | self.assertEqual(errors['type'], 'test_execution_error') 60 | self.assertIn('10 < error < 20', errors['message']) 61 | 62 | errors = scan_result.errors[1].to_dict() 63 | self.assertEqual(errors['type'], 'test_execution_error') 64 | self.assertIn('lskdfj(lkj)', errors['message']) 65 | -------------------------------------------------------------------------------- /tests/local/warehouse/tests/test_tests_column_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.metric import Metric 12 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_COLUMNS, COLUMN_KEY_TESTS, KEY_METRIC_GROUPS 13 | from tests.common.sql_test_case import SqlTestCase 14 | 15 | 16 | class TestColumnMetricTests(SqlTestCase): 17 | 18 | def test_column_metric_test(self): 19 | self.sql_recreate_table( 20 | [f"name {self.dialect.data_type_varchar_255}"], 21 | ["('one')", 22 | "('two')", 23 | "('three') ", 24 | "('no value')", 25 | "(null)"]) 26 | 27 | scan_yml_dict = { 28 | KEY_METRIC_GROUPS: [ 29 | Metric.METRIC_GROUP_MISSING 30 | ], 31 | KEY_COLUMNS: { 32 | 'name': { 33 | COLUMN_KEY_TESTS: [ 34 | 'missing_count < 2' 35 | ] 36 | } 37 | } 38 | } 39 | scan_result = self.scan(scan_yml_dict) 40 | self.assertFalse(scan_result.has_test_failures()) 41 | 42 | scan_yml_dict[KEY_COLUMNS]['name']['tests'][0] = 'missing_count == 0' 43 | 44 | scan_result = self.scan(scan_yml_dict) 45 | self.assertTrue(scan_result.has_test_failures()) 46 | 47 | def test_column_metric_metric_calculation_test(self): 48 | self.sql_recreate_table( 49 | [f"size {self.dialect.data_type_integer}"], 50 | ["(3)", 51 | "(3)", 52 | "(4) ", 53 | "(12)", 54 | "(11)"]) 55 | 56 | scan_result = self.scan({ 57 | 'metrics': [ 58 | 'min', 59 | 'max' 60 | ], 61 | 'columns': { 62 | 'size': { 63 | 'tests': [ 64 | 'max > 0', 65 | 'min < 20', 66 | 'max - min < 10', 67 | 'max - min < 5' 68 | ] 69 | } 70 | } 71 | }) 72 | self.assertTrue(scan_result.has_test_failures()) 73 | -------------------------------------------------------------------------------- /tests/local/warehouse/tests/test_tests_complex_expressions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_TESTS 12 | from tests.common.sql_test_case import SqlTestCase 13 | 14 | 15 | class TestTestsComplexExpressions(SqlTestCase): 16 | def setUp(self) -> None: 17 | super().setUp() 18 | self.sql_recreate_table( 19 | [f"name {self.dialect.data_type_varchar_255}"], 20 | ["('one')", 21 | "('two')", 22 | "('three') "]) 23 | 24 | def test_complex_expression_result(self): 25 | test_name = 'my_test' 26 | scan_yml_dict = { 27 | KEY_METRICS: [ 28 | 'row_count' 29 | ], 30 | KEY_TESTS: { 31 | test_name: '10 - row_count + 1 >= 5' 32 | } 33 | } 34 | 35 | scan_result = self.scan(scan_yml_dict) 36 | self.assertFalse(scan_result.has_test_failures()) 37 | assert 'expression_result' in scan_result.test_results[0].values 38 | result_values = scan_result.test_results[0].values 39 | 40 | assert list(result_values.keys())[0] == 'expression_result' 41 | assert result_values['expression_result'] == 8 42 | -------------------------------------------------------------------------------- /tests/local/warehouse/tests/test_tests_sql_metric_multi.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.scan_yml_parser import KEY_SQL_METRICS, SQL_METRIC_KEY_TESTS, SQL_METRIC_KEY_SQL, \ 12 | COLUMN_KEY_SQL_METRICS, SQL_METRIC_KEY_GROUP_FIELDS, COLUMN_KEY_METRICS 13 | from tests.common.sql_test_case import SqlTestCase 14 | 15 | 16 | class TestTestsSqlMetric(SqlTestCase): 17 | 18 | def setUp(self) -> None: 19 | super().setUp() 20 | self.sql_recreate_table( 21 | table_name='xxx_view', 22 | columns=[f"r_id {self.dialect.data_type_varchar_255}", 23 | f"s_id {self.dialect.data_type_varchar_255}", 24 | f"p_id {self.dialect.data_type_varchar_255}", 25 | f"week {self.dialect.data_type_integer}", 26 | f"price {self.dialect.data_type_integer}"], 27 | rows=["('r1', 's1', 'p1', 1, 1)", 28 | "('r1', 's1', 'p1', 1, 2)", 29 | "('r1', 's1', 'p1', 1, 3) ", 30 | "('r1', 's2', 'p2', 2, 10)", 31 | "('r1', 's2', 'p2', 2, 20)", 32 | "('r3', 's3', 'p3', 3, 50)"]) 33 | self.qualified_table_name = self.warehouse.dialect.qualify_table_name(self.default_test_table_name) 34 | 35 | def test_sql_metric_groups_multi(self): 36 | scan_yml_dict = { 37 | 'filter': "r_id = '{{ R_ID }}'", 38 | KEY_SQL_METRICS: [{ 39 | SQL_METRIC_KEY_SQL: ( 40 | f"SELECT r_id, week, s_id, p_id, count(price) as prices_found \n" 41 | f"FROM xxx_view \n" 42 | f"WHERE r_id = '{{{{ R_ID }}}}' \n" 43 | f"GROUP BY r_id, week, s_id, p_id \n" 44 | f"ORDER BY r_id, week, s_id, p_id" 45 | ), 46 | SQL_METRIC_KEY_TESTS: [ 47 | "total_size_per_country < 20" 48 | ], 49 | SQL_METRIC_KEY_GROUP_FIELDS: ['r_id', 'week', 's_id', 'p_id'], 50 | SQL_METRIC_KEY_TESTS: [ 51 | "prices_found > 0" 52 | ] 53 | }] 54 | } 55 | 56 | scan_result = self.scan(scan_yml_dict=scan_yml_dict, variables={'R_ID': 'r1'}) 57 | self.assertFalse(scan_result.has_test_failures()) 58 | -------------------------------------------------------------------------------- /tests/local/warehouse/tests/test_tests_table_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_TESTS 12 | from tests.common.sql_test_case import SqlTestCase 13 | 14 | 15 | class TestTestsTableMetric(SqlTestCase): 16 | 17 | def setUp(self) -> None: 18 | super().setUp() 19 | self.sql_recreate_table( 20 | [f"name {self.dialect.data_type_varchar_255}"], 21 | ["('one')", 22 | "('two')", 23 | "('three') ", 24 | "('no value')", 25 | "(null)"]) 26 | 27 | def test_tests(self): 28 | scan_yml_dict = { 29 | KEY_METRICS: [ 30 | 'row_count' 31 | ], 32 | 'tests': ['2 < row_count < 20'] 33 | } 34 | scan_result = self.scan(scan_yml_dict) 35 | self.assertFalse(scan_result.has_test_failures()) 36 | 37 | scan_yml_dict['tests'][0] = '10 < row_count < 20' 38 | scan_result = self.scan(scan_yml_dict) 39 | self.assertTrue(scan_result.has_test_failures()) 40 | 41 | def test_named_tests(self): 42 | test_name = 'my_test' 43 | scan_yml_dict = { 44 | KEY_METRICS: [ 45 | 'row_count' 46 | ], 47 | KEY_TESTS: { 48 | test_name: '2 < row_count < 20' 49 | } 50 | } 51 | scan_result = self.scan(scan_yml_dict) 52 | self.assertFalse(scan_result.has_test_failures()) 53 | 54 | scan_yml_dict['tests'][test_name] = '10 < row_count < 20' 55 | scan_result = self.scan(scan_yml_dict) 56 | self.assertTrue(scan_result.has_test_failures()) 57 | -------------------------------------------------------------------------------- /tests/local/warehouse/tests/test_tests_with_filtering.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_TESTS 12 | from tests.common.sql_test_case import SqlTestCase 13 | 14 | 15 | class TestTestsWithFiltering(SqlTestCase): 16 | 17 | def setUp(self) -> None: 18 | super().setUp() 19 | self.sql_recreate_table( 20 | [f"start_date {self.dialect.data_type_date}"], 21 | ["('2022-01-01')", 22 | "('2022-01-01')", 23 | "('2022-01-01')"]) 24 | 25 | def test_dynamic_filtering(self): 26 | test_name = 'my_test' 27 | scan_yml_dict = { 28 | 'filter': "DATE(start_date) = '{{ START_DATE }}'", 29 | KEY_METRICS: [ 30 | 'row_count' 31 | ], 32 | KEY_TESTS: { 33 | test_name: 'row_count > 0' 34 | } 35 | } 36 | 37 | scan_result = self.scan(scan_yml_dict, variables={ 38 | 'START_DATE': '2022-01-01'}) 39 | self.assertFalse(scan_result.has_test_failures()) 40 | 41 | def test_static_filtering(self): 42 | test_name = 'my_test' 43 | scan_yml_dict = { 44 | 'filter': "DATE(start_date) = '2022-01-01'", 45 | KEY_METRICS: [ 46 | 'row_count' 47 | ], 48 | KEY_TESTS: { 49 | test_name: 'row_count > 0' 50 | } 51 | } 52 | 53 | scan_result = self.scan(scan_yml_dict) 54 | self.assertFalse(scan_result.has_test_failures()) 55 | -------------------------------------------------------------------------------- /tests/local/warehouse/tests/test_tests_with_variables.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_TESTS 12 | from tests.common.sql_test_case import SqlTestCase 13 | 14 | 15 | class TestTestsWithVariables(SqlTestCase): 16 | 17 | def setUp(self) -> None: 18 | super().setUp() 19 | self.sql_recreate_table( 20 | [f"name {self.dialect.data_type_varchar_255}"], 21 | ["('one')", 22 | "('two')", 23 | "('three') ", 24 | "('no value')", 25 | "(null)"]) 26 | 27 | def test_tests_with_variables(self): 28 | scan_yml_dict = { 29 | KEY_METRICS: [ 30 | 'row_count' 31 | ], 32 | 'tests': ['{{ row_count_variable_min }} < row_count < {{ row_count_variable_max }}'] 33 | } 34 | 35 | scan_result = self.scan(scan_yml_dict, variables={ 36 | 'row_count_variable_min': 1, 37 | 'row_count_variable_max': 20}) 38 | self.assertFalse(scan_result.has_test_failures()) 39 | -------------------------------------------------------------------------------- /tests/local/warehouse/validity/test_numeric_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from sodasql.scan.metric import Metric 12 | from sodasql.scan.scan_yml_parser import KEY_METRICS, KEY_COLUMNS 13 | from tests.common.sql_test_case import SqlTestCase 14 | import pytest 15 | 16 | 17 | class TestNumericData(SqlTestCase): 18 | 19 | def test_overflow(self): 20 | self.sql_recreate_table( 21 | [f"name {self.dialect.data_type_bigint}"], 22 | ["(9223372036854775807)", 23 | "(9223372036854775807)"]) 24 | 25 | self.scan({ 26 | KEY_METRICS: [ 27 | Metric.SUM, 28 | Metric.AVG 29 | ] 30 | }) 31 | 32 | @pytest.mark.skip(reason="no easy way to do this for all warehouses") 33 | def test_numeric_parsing(self): 34 | self.sql_recreate_table( 35 | [f"name {self.dialect.data_type_varchar_255}"], 36 | ["('1%')", 37 | "('2.0%')", 38 | "('3,0%')"]) 39 | 40 | scan_result = self.scan({ 41 | KEY_METRICS: [ 42 | Metric.INVALID_COUNT, 43 | Metric.INVALID_PERCENTAGE, 44 | Metric.VALID_COUNT, 45 | Metric.VALID_PERCENTAGE, 46 | Metric.HISTOGRAM, 47 | Metric.MIN, 48 | Metric.MAX 49 | ], 50 | KEY_COLUMNS: { 51 | 'name': { 52 | 'valid_format': 'number_percentage' 53 | } 54 | } 55 | }) 56 | 57 | self.assertEqual(scan_result.get(Metric.VALUES_COUNT, 'name'), 3) 58 | self.assertEqual(scan_result.get(Metric.INVALID_COUNT, 'name'), 0) 59 | self.assertEqual(scan_result.get(Metric.INVALID_PERCENTAGE, 'name'), 0.0) 60 | self.assertEqual(scan_result.get(Metric.VALID_COUNT, 'name'), 3) 61 | self.assertEqual(scan_result.get(Metric.VALID_PERCENTAGE, 'name'), 100.0) 62 | self.assertEqual(scan_result.get(Metric.MIN, 'name'), 1) 63 | self.assertEqual(scan_result.get(Metric.MAX, 'name'), 3) 64 | self.assertAllNumeric(scan_result.get(Metric.HISTOGRAM)['boundaries']) 65 | self.assertAllNumeric(scan_result.get(Metric.HISTOGRAM)['frequencies']) 66 | -------------------------------------------------------------------------------- /tests/mysql_container/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | soda-sql-mysql: 4 | platform: linux/x86_64 5 | image: mysql:8 6 | ports: 7 | - "3306:3306" 8 | volumes: 9 | - ./.mysql/:/var/lib/mysql 10 | environment: 11 | - MYSQL_DATABASE=sodasql 12 | - MYSQL_USER=sodasql 13 | - MYSQL_PASSWORD=sodasql 14 | - MYSQL_ROOT_PASSWORD=sodasql 15 | -------------------------------------------------------------------------------- /tests/postgres_container/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | soda-sql-postgres: 4 | image: postgres:9.6.17-alpine 5 | ports: 6 | - "5432:5432" 7 | volumes: 8 | - ./.postgres/:/var/lib/postgresql/data 9 | environment: 10 | - POSTGRES_USER=sodasql 11 | - POSTGRES_DB=sodasql 12 | - POSTGRES_HOST_AUTH_METHOD=trust 13 | -------------------------------------------------------------------------------- /tests/spark_container/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | spark2-thrift: 4 | image: godatadriven/pyspark:3.1 5 | ports: 6 | - "10000:10000" 7 | - "4040:4040" 8 | depends_on: 9 | - hive-metastore 10 | command: > 11 | --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 12 | --name Thrift JDBC/ODBC Server 13 | volumes: 14 | - /tmp/.spark-warehouse/:/spark-warehouse/ 15 | - ./hive-site.xml:/usr/spark/conf/hive-site.xml 16 | environment: 17 | - WAIT_FOR=hive-metastore:5432 18 | 19 | hive-metastore: 20 | image: postgres:9.6.17-alpine 21 | volumes: 22 | - /tmp/.hive-metastore/:/var/lib/postgresql/data 23 | environment: 24 | - POSTGRES_USER=soda 25 | - POSTGRES_PASSWORD=soda 26 | - POSTGRES_DB=metastore 27 | -------------------------------------------------------------------------------- /tests/spark_container/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | 18 | 19 | 20 | 21 | javax.jdo.option.ConnectionURL 22 | jdbc:postgresql://hive-metastore/metastore 23 | 24 | 25 | 26 | javax.jdo.option.ConnectionDriverName 27 | org.postgresql.Driver 28 | 29 | 30 | 31 | javax.jdo.option.ConnectionUserName 32 | soda 33 | 34 | 35 | 36 | javax.jdo.option.ConnectionPassword 37 | soda 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /tests/sqlserver_container/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | soda-sql-sqlserver: 4 | image: mcr.microsoft.com/mssql/server:2017-CU8-ubuntu 5 | ports: 6 | - "1433:1433" 7 | environment: 8 | - ACCEPT_EULA=Y 9 | - SA_PASSWORD=yourStrong@@PasswordyourStrong@@Password 10 | 11 | -------------------------------------------------------------------------------- /tests/trino_contaner/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | soda-sql-trino: 4 | image: trinodb/trino 5 | ports: 6 | - "8080:8080" 7 | volumes: 8 | - ./.trino/:/data/trino 9 | command: sh -c "sleep 15 && /usr/bin/trino --execute='create schema memory.sodasql; use memory.sodasql'" 10 | -------------------------------------------------------------------------------- /tests/warehouses/athena_cfg.yml: -------------------------------------------------------------------------------- 1 | # See ../example_userhome_env_vars.yml for instructions on setting the env vars 2 | type: athena 3 | access_key_id: env_var(SODA_ATHENA_ACCESS_KEY_ID) 4 | secret_access_key: env_var(SODA_ATHENA_SECRET_ACCESS_KEY) 5 | region_name: eu-west-1 6 | staging_dir: s3://sodalite-test/ 7 | database: sodalite_test 8 | schema: PUBLIC 9 | -------------------------------------------------------------------------------- /tests/warehouses/athena_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from os import path 13 | from typing import List 14 | 15 | from sodasql.scan.metric import Metric 16 | from sodasql.scan.scan_yml_parser import KEY_COLUMNS, KEY_METRIC_GROUPS 17 | from tests.common.sql_test_case import TARGET_ATHENA 18 | from tests.common.sql_test_suite import SqlTestSuite 19 | 20 | 21 | class AthenaSuite(SqlTestSuite): 22 | 23 | def setUp(self) -> None: 24 | self.target = TARGET_ATHENA 25 | super().setUp() 26 | 27 | def test_skip_column_with_unknown_type_struct(self): 28 | # goal of this test is to ensure that unknown types like the struct are skipped during the scan 29 | 30 | self.sql_recreate_table( 31 | [f"name {self.dialect.data_type_varchar_255}", 32 | f"structcolumn struct"], 33 | ["('one', null)", 34 | "('', null)", 35 | "(' ', null)", 36 | "('no value', null)", 37 | "(null, null)"]) 38 | 39 | scan_result = self.scan({ 40 | 'columns': { 41 | 'name': { 42 | 'metric_groups': [ 43 | 'missing' 44 | ] 45 | } 46 | } 47 | }) 48 | self.assertEqual(scan_result.get(Metric.MISSING_COUNT, 'name'), 1) 49 | 50 | self.assertIsNone(scan_result.find_measurement(Metric.MISSING_COUNT, 'structcolumn')) 51 | -------------------------------------------------------------------------------- /tests/warehouses/bigquery_cfg.yml: -------------------------------------------------------------------------------- 1 | type: bigquery 2 | account_info_json: env_var(BIGQUERY_ACCOUNT_INFO_JSON) 3 | dataset: sodalite 4 | -------------------------------------------------------------------------------- /tests/warehouses/bigquery_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from google.cloud import bigquery 13 | import logging 14 | 15 | from tests.common.warehouse_fixture import WarehouseFixture 16 | 17 | 18 | class BigQueryFixture(WarehouseFixture): 19 | 20 | def __init__(self, *args, **kwargs): 21 | self.project_id = None 22 | super().__init__(*args, **kwargs) 23 | 24 | def create_database(self): 25 | self.database = self.create_unique_database_name() 26 | self.warehouse.dialect.database = self.database 27 | self.warehouse.dialect.dataset_name = self.database 28 | self.project_id = self.warehouse.dialect.account_info_dict['project_id'] 29 | dataset_id = f"{self.project_id}.{self.database}" 30 | dataset = bigquery.Dataset(dataset_id) 31 | dataset.location = "EU" 32 | self.warehouse.dialect.client.create_dataset(dataset, timeout=30) 33 | try: 34 | self.warehouse.dialect.sql_test_connection() 35 | except: 36 | pass 37 | 38 | def drop_database(self): 39 | dataset_id = f"{self.project_id}.{self.database}" 40 | self.warehouse.dialect.client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) 41 | 42 | def tear_down(self): 43 | pass 44 | -------------------------------------------------------------------------------- /tests/warehouses/bigquery_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_BIGQUERY 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class BigQuerySuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_BIGQUERY 19 | super().setUp() 20 | -------------------------------------------------------------------------------- /tests/warehouses/denodo_cfg.yml: -------------------------------------------------------------------------------- 1 | type: denodo 2 | host: localhost 3 | username: admin 4 | database: admin 5 | password: admin 6 | port: 9996 7 | 8 | -------------------------------------------------------------------------------- /tests/warehouses/denodo_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from tests.common.warehouse_fixture import WarehouseFixture 13 | 14 | 15 | class DenodoFixture(WarehouseFixture): 16 | 17 | def create_database(self): 18 | pass 19 | 20 | def drop_database(self): 21 | pass 22 | 23 | def test_warehouse_connection(self): 24 | assert(self.warehouse.dialect.sql_test_connection()) 25 | -------------------------------------------------------------------------------- /tests/warehouses/denodo_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_DENODO 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class PostgresSuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_DENODO 19 | super().setUp() 20 | 21 | 22 | -------------------------------------------------------------------------------- /tests/warehouses/hive_cfg.yml: -------------------------------------------------------------------------------- 1 | type: hive 2 | host: localhost 3 | username: env_var(SODA_HIVE_PASSWORD) 4 | password: env_var(SODA_HIVE_PASSWORD) 5 | database: hive 6 | configuration: 7 | hive.execution.engine: mr 8 | mapreduce.job.reduces: 2 9 | -------------------------------------------------------------------------------- /tests/warehouses/hive_fixture.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from tests.common.warehouse_fixture import WarehouseFixture 4 | 5 | 6 | class HiveFixture(WarehouseFixture): 7 | 8 | def __init__(self, *args, **kwargs): 9 | super().__init__(*args, **kwargs) 10 | 11 | def create_database(self): 12 | pass 13 | 14 | def drop_database(self): 15 | pass 16 | 17 | def test_warehouse_connection(self): 18 | assert(self.warehouse.dialect.sql_test_connection()) 19 | 20 | def sql_create_table(self, columns: List[str], table_name: str): 21 | columns_sql = ", ".join(columns) 22 | return f"CREATE TABLE " \ 23 | f"{self.warehouse.dialect.qualify_writable_table_name(table_name)} ( \n " \ 24 | f"{columns_sql} )" 25 | 26 | def tear_down(self): 27 | pass 28 | -------------------------------------------------------------------------------- /tests/warehouses/hive_suite.py: -------------------------------------------------------------------------------- 1 | from tests.common.sql_test_case import TARGET_HIVE 2 | from tests.common.sql_test_suite import SqlTestSuite 3 | 4 | 5 | class HiveSuite(SqlTestSuite): 6 | 7 | def setUp(self) -> None: 8 | self.target = TARGET_HIVE 9 | super().setUp() 10 | -------------------------------------------------------------------------------- /tests/warehouses/mysql_cfg.yml: -------------------------------------------------------------------------------- 1 | type: mysql 2 | host: localhost 3 | username: sodasql 4 | password: sodasql 5 | database: sodasql 6 | -------------------------------------------------------------------------------- /tests/warehouses/mysql_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from tests.common.warehouse_fixture import WarehouseFixture 13 | 14 | 15 | class MySQLFixture(WarehouseFixture): 16 | 17 | def create_database(self): 18 | pass 19 | 20 | def drop_database(self): 21 | pass 22 | 23 | def test_warehouse_connection(self): 24 | assert(self.warehouse.dialect.sql_test_connection()) 25 | -------------------------------------------------------------------------------- /tests/warehouses/mysql_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_MYSQL 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class MySqlSuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_MYSQL 19 | super().setUp() 20 | -------------------------------------------------------------------------------- /tests/warehouses/postgres_cfg.yml: -------------------------------------------------------------------------------- 1 | type: postgres 2 | host: localhost 3 | username: sodasql 4 | database: sodasql 5 | schema: public 6 | -------------------------------------------------------------------------------- /tests/warehouses/postgres_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from tests.common.warehouse_fixture import WarehouseFixture 13 | 14 | 15 | class PostgresFixture(WarehouseFixture): 16 | 17 | def create_database(self): 18 | pass 19 | 20 | def drop_database(self): 21 | pass 22 | 23 | def test_warehouse_connection(self): 24 | assert(self.warehouse.dialect.sql_test_connection()) 25 | -------------------------------------------------------------------------------- /tests/warehouses/postgres_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_POSTGRES 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class PostgresSuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_POSTGRES 19 | super().setUp() -------------------------------------------------------------------------------- /tests/warehouses/redshift_cfg.yml: -------------------------------------------------------------------------------- 1 | type: redshift 2 | host: env_var(SODA_REDSHIFT_ENDPOINT) 3 | # See ../example_userhome_env_vars.yml for instructions on setting the env vars 4 | username: env_var(SODA_REDSHIFT_USERNAME) 5 | password: env_var(SODA_REDSHIFT_PASSWORD) 6 | database: soda_test 7 | schema: public 8 | -------------------------------------------------------------------------------- /tests/warehouses/redshift_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | import logging 12 | 13 | from sodasql.scan.db import sql_update 14 | from tests.common.warehouse_fixture import WarehouseFixture 15 | 16 | 17 | class RedshiftFixture(WarehouseFixture): 18 | 19 | original_dialect = None 20 | original_connection = None 21 | 22 | def create_database(self): 23 | self.database = self.create_unique_database_name() 24 | 25 | self.original_connection = self.warehouse.connection 26 | self.original_dialect = self.warehouse.dialect 27 | self.original_connection.set_isolation_level(0) 28 | sql_update(self.original_connection, f'CREATE DATABASE {self.database}') 29 | 30 | self.warehouse.dialect = self.warehouse.dialect.with_database(self.database) 31 | self.warehouse.connection = self.warehouse.dialect.create_connection() 32 | 33 | def test_warehouse_connection(self): 34 | assert(self.warehouse.dialect.sql_test_connection()) 35 | 36 | def drop_database(self): 37 | try: 38 | self.warehouse.connection.close() 39 | except Exception as e: 40 | logging.debug(f'Closing connection failed: {str(e)}') 41 | sql_update(self.original_connection, f'DROP DATABASE {self.database}') 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /tests/warehouses/redshift_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_REDSHIFT 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class RedshiftSuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_REDSHIFT 19 | super().setUp() 20 | -------------------------------------------------------------------------------- /tests/warehouses/snowflake_cfg.yml: -------------------------------------------------------------------------------- 1 | type: snowflake 2 | # See ../example_userhome_env_vars.yml for instructions on setting the env vars 3 | username: env_var(SODA_SNOWFLAKE_USERNAME) 4 | password: env_var(SODA_SNOWFLAKE_PASSWORD) 5 | account: SODADATAPARTNER.eu-central-1 6 | warehouse: DEMO_WH 7 | schema: PUBLIC 8 | -------------------------------------------------------------------------------- /tests/warehouses/snowflake_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from tests.common.warehouse_fixture import WarehouseFixture 13 | 14 | 15 | class SnowflakeFixture(WarehouseFixture): 16 | def test_warehouse_connection(self): 17 | pass 18 | -------------------------------------------------------------------------------- /tests/warehouses/snowflake_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_SNOWFLAKE 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class SnowflakeSuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_SNOWFLAKE 19 | super().setUp() 20 | -------------------------------------------------------------------------------- /tests/warehouses/spark_cfg.yml: -------------------------------------------------------------------------------- 1 | type: spark 2 | host: localhost 3 | port: 10000 4 | database: default 5 | token: env_var(SPARK_TOKEN) 6 | -------------------------------------------------------------------------------- /tests/warehouses/spark_fixture.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from tests.common.warehouse_fixture import WarehouseFixture 4 | 5 | 6 | class SparkFixture(WarehouseFixture): 7 | 8 | def create_database(self): 9 | pass 10 | 11 | def drop_database(self): 12 | pass 13 | 14 | def tear_down(self): 15 | pass 16 | 17 | def sql_create_table(self, columns: List[str], table_name: str): 18 | columns_sql = ", ".join(columns) 19 | return f"CREATE TABLE " \ 20 | f"{self.warehouse.dialect.qualify_writable_table_name(table_name)} ( \n " \ 21 | f"{columns_sql} )" 22 | 23 | def test_warehouse_connection(self): 24 | assert(self.warehouse.dialect.sql_test_connection()) 25 | -------------------------------------------------------------------------------- /tests/warehouses/spark_suite.py: -------------------------------------------------------------------------------- 1 | from sodasql.dialects.spark_dialect import ColumnMetadata 2 | 3 | from tests.common.sql_test_case import TARGET_SPARK 4 | from tests.common.sql_test_suite import SqlTestSuite 5 | 6 | 7 | class SparkSuite(SqlTestSuite): 8 | 9 | def setUp(self) -> None: 10 | self.target = TARGET_SPARK 11 | super().setUp() 12 | 13 | def test_sql_columns_metadata(self): 14 | data_type = self.dialect.data_type_varchar_255.lower() 15 | expected = [ 16 | ColumnMetadata("name", data_type, is_nullable="YES") 17 | ] 18 | 19 | self.sql_recreate_table([" ".join(column[:2]) for column in expected]) 20 | columns_metadata = self.dialect.sql_columns_metadata( 21 | self.default_test_table_name) 22 | 23 | assert columns_metadata == expected 24 | -------------------------------------------------------------------------------- /tests/warehouses/sqlserver_cfg.yml: -------------------------------------------------------------------------------- 1 | type: sqlserver 2 | host: localhost 3 | username: SA 4 | password: yourStrong@@Password 5 | database: master 6 | schema: dbo 7 | -------------------------------------------------------------------------------- /tests/warehouses/sqlserver_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from tests.common.warehouse_fixture import WarehouseFixture 13 | 14 | 15 | class SQLServerFixture(WarehouseFixture): 16 | 17 | def create_database(self): 18 | pass 19 | 20 | def drop_database(self): 21 | pass 22 | -------------------------------------------------------------------------------- /tests/warehouses/sqlserver_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_SQLSERVER 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | class SQLServerSuite(SqlTestSuite): 15 | 16 | def setUp(self) -> None: 17 | self.target = TARGET_SQLSERVER 18 | super().setUp() -------------------------------------------------------------------------------- /tests/warehouses/trino_cfg.yml: -------------------------------------------------------------------------------- 1 | type: trino 2 | host: localhost 3 | port: 8080 4 | http_scheme: https 5 | catalog: memory 6 | schema: sodasql 7 | #username: 8 | #password: 9 | -------------------------------------------------------------------------------- /tests/warehouses/trino_fixture.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | 12 | from tests.common.warehouse_fixture import WarehouseFixture 13 | 14 | 15 | class TrinoFixture(WarehouseFixture): 16 | 17 | def create_database(self): 18 | pass 19 | 20 | def drop_database(self): 21 | pass 22 | -------------------------------------------------------------------------------- /tests/warehouses/trino_suite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Soda 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # Unless required by applicable law or agreed to in writing, software 7 | # distributed under the License is distributed on an "AS IS" BASIS, 8 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | # See the License for the specific language governing permissions and 10 | # limitations under the License. 11 | from tests.common.sql_test_case import TARGET_TRINO 12 | from tests.common.sql_test_suite import SqlTestSuite 13 | 14 | 15 | class SQLServerSuite(SqlTestSuite): 16 | 17 | def setUp(self) -> None: 18 | self.target = TARGET_TRINO 19 | super().setUp() 20 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | 4 | [testenv] 5 | skip_install = true 6 | passenv = * 7 | docker = postgres 8 | commands = 9 | python -m pytest {toxinidir}/tests \ 10 | --junitxml=TEST_tox_{envname}.xml \ 11 | --html=./reports/tests/index.html \ 12 | --cov=sodasql tests --cov-report=html:reports/coverage \ 13 | {posargs} 14 | deps = 15 | -r {toxinidir}/requirements.txt 16 | -r {toxinidir}/dev-requirements.txt 17 | whitelist_externals = 18 | docker-compose 19 | sleep 20 | commands_pre = 21 | docker-compose -f {toxinidir}/tests/spark_container/docker-compose.yml up --detach 22 | sleep 15 23 | commands_post = docker-compose -f {toxinidir}/tests/spark_container/docker-compose.yml down 24 | 25 | [docker:postgres] 26 | image=postgres:9.6.17-alpine 27 | environment = 28 | POSTGRES_USER=sodasql 29 | POSTGRES_PASSWORD=sodasql 30 | POSTGRES_DB=sodasql 31 | POSTGRES_HOST_AUTH_METHOD=trust 32 | ports = 5432:5432/tcp 33 | healthcheck_cmd = PGPASSWORD=$POSTGRES_PASSWORD psql \ 34 | --user=$POSTGRES_USER --dbname=$POSTGRES_DB \ 35 | --host=127.0.0.1 --quiet --no-align --tuples-only \ 36 | -1 --command="SELECT 1" 37 | healthcheck_timeout = 1 38 | healthcheck_retries = 30 39 | healthcheck_interval = 1 40 | healthcheck_start_period = 1 41 | 42 | 43 | --------------------------------------------------------------------------------